diff --git a/venv.bak/bin/activate b/venv.bak/bin/activate new file mode 100644 index 0000000..b1f97b8 --- /dev/null +++ b/venv.bak/bin/activate @@ -0,0 +1,76 @@ +# This file must be used with "source bin/activate" *from bash* +# you cannot run it directly + +deactivate () { + # reset old environment variables + if [ -n "${_OLD_VIRTUAL_PATH:-}" ] ; then + PATH="${_OLD_VIRTUAL_PATH:-}" + export PATH + unset _OLD_VIRTUAL_PATH + fi + if [ -n "${_OLD_VIRTUAL_PYTHONHOME:-}" ] ; then + PYTHONHOME="${_OLD_VIRTUAL_PYTHONHOME:-}" + export PYTHONHOME + unset _OLD_VIRTUAL_PYTHONHOME + fi + + # This should detect bash and zsh, which have a hash command that must + # be called to get it to forget past commands. Without forgetting + # past commands the $PATH changes we made may not be respected + if [ -n "${BASH:-}" -o -n "${ZSH_VERSION:-}" ] ; then + hash -r + fi + + if [ -n "${_OLD_VIRTUAL_PS1:-}" ] ; then + PS1="${_OLD_VIRTUAL_PS1:-}" + export PS1 + unset _OLD_VIRTUAL_PS1 + fi + + unset VIRTUAL_ENV + if [ ! "$1" = "nondestructive" ] ; then + # Self destruct! + unset -f deactivate + fi +} + +# unset irrelevant variables +deactivate nondestructive + +VIRTUAL_ENV="/Users/bootje/Desktop/graduation2020/bo-graduation/venv" +export VIRTUAL_ENV + +_OLD_VIRTUAL_PATH="$PATH" +PATH="$VIRTUAL_ENV/bin:$PATH" +export PATH + +# unset PYTHONHOME if set +# this will fail if PYTHONHOME is set to the empty string (which is bad anyway) +# could use `if (set -u; : $PYTHONHOME) ;` in bash +if [ -n "${PYTHONHOME:-}" ] ; then + _OLD_VIRTUAL_PYTHONHOME="${PYTHONHOME:-}" + unset PYTHONHOME +fi + +if [ -z "${VIRTUAL_ENV_DISABLE_PROMPT:-}" ] ; then + _OLD_VIRTUAL_PS1="${PS1:-}" + if [ "x(venv) " != x ] ; then + PS1="(venv) ${PS1:-}" + else + if [ "`basename \"$VIRTUAL_ENV\"`" = "__" ] ; then + # special case for Aspen magic directories + # see http://www.zetadev.com/software/aspen/ + PS1="[`basename \`dirname \"$VIRTUAL_ENV\"\``] $PS1" + else + PS1="(`basename \"$VIRTUAL_ENV\"`)$PS1" + fi + fi + export PS1 +fi + +# This should detect bash and zsh, which have a hash command that must +# be called to get it to forget past commands. Without forgetting +# past commands the $PATH changes we made may not be respected +if [ -n "${BASH:-}" -o -n "${ZSH_VERSION:-}" ] ; then + hash -r +fi diff --git a/venv.bak/bin/activate.csh b/venv.bak/bin/activate.csh new file mode 100644 index 0000000..4b25abc --- /dev/null +++ b/venv.bak/bin/activate.csh @@ -0,0 +1,37 @@ +# This file must be used with "source bin/activate.csh" *from csh*. +# You cannot run it directly. +# Created by Davide Di Blasi . +# Ported to Python 3.3 venv by Andrew Svetlov + +alias deactivate 'test $?_OLD_VIRTUAL_PATH != 0 && setenv PATH "$_OLD_VIRTUAL_PATH" && unset _OLD_VIRTUAL_PATH; rehash; test $?_OLD_VIRTUAL_PROMPT != 0 && set prompt="$_OLD_VIRTUAL_PROMPT" && unset _OLD_VIRTUAL_PROMPT; unsetenv VIRTUAL_ENV; test "\!:*" != "nondestructive" && unalias deactivate' + +# Unset irrelevant variables. +deactivate nondestructive + +setenv VIRTUAL_ENV "/Users/bootje/Desktop/graduation2020/bo-graduation/venv" + +set _OLD_VIRTUAL_PATH="$PATH" +setenv PATH "$VIRTUAL_ENV/bin:$PATH" + + +set _OLD_VIRTUAL_PROMPT="$prompt" + +if (! "$?VIRTUAL_ENV_DISABLE_PROMPT") then + if ("venv" != "") then + set env_name = "venv" + else + if (`basename "VIRTUAL_ENV"` == "__") then + # special case for Aspen magic directories + # see http://www.zetadev.com/software/aspen/ + set env_name = `basename \`dirname "$VIRTUAL_ENV"\`` + else + set env_name = `basename "$VIRTUAL_ENV"` + endif + endif + set prompt = "[$env_name] $prompt" + unset env_name +endif + +alias pydoc python -m pydoc + +rehash diff --git a/venv.bak/bin/activate.fish b/venv.bak/bin/activate.fish new file mode 100644 index 0000000..e638826 --- /dev/null +++ b/venv.bak/bin/activate.fish @@ -0,0 +1,75 @@ +# This file must be used with ". bin/activate.fish" *from fish* (http://fishshell.org) +# you cannot run it directly + +function deactivate -d "Exit virtualenv and return to normal shell environment" + # reset old environment variables + if test -n "$_OLD_VIRTUAL_PATH" + set -gx PATH $_OLD_VIRTUAL_PATH + set -e _OLD_VIRTUAL_PATH + end + if test -n "$_OLD_VIRTUAL_PYTHONHOME" + set -gx PYTHONHOME $_OLD_VIRTUAL_PYTHONHOME + set -e _OLD_VIRTUAL_PYTHONHOME + end + + if test -n "$_OLD_FISH_PROMPT_OVERRIDE" + functions -e fish_prompt + set -e _OLD_FISH_PROMPT_OVERRIDE + functions -c _old_fish_prompt fish_prompt + functions -e _old_fish_prompt + end + + set -e VIRTUAL_ENV + if test "$argv[1]" != "nondestructive" + # Self destruct! + functions -e deactivate + end +end + +# unset irrelevant variables +deactivate nondestructive + +set -gx VIRTUAL_ENV "/Users/bootje/Desktop/graduation2020/bo-graduation/venv" + +set -gx _OLD_VIRTUAL_PATH $PATH +set -gx PATH "$VIRTUAL_ENV/bin" $PATH + +# unset PYTHONHOME if set +if set -q PYTHONHOME + set -gx _OLD_VIRTUAL_PYTHONHOME $PYTHONHOME + set -e PYTHONHOME +end + +if test -z "$VIRTUAL_ENV_DISABLE_PROMPT" + # fish uses a function instead of an env var to generate the prompt. + + # save the current fish_prompt function as the function _old_fish_prompt + functions -c fish_prompt _old_fish_prompt + + # with the original prompt function renamed, we can override with our own. + function fish_prompt + # Save the return status of the last command + set -l old_status $status + + # Prompt override? + if test -n "(venv) " + printf "%s%s" "(venv) " (set_color normal) + else + # ...Otherwise, prepend env + set -l _checkbase (basename "$VIRTUAL_ENV") + if test $_checkbase = "__" + # special case for Aspen magic directories + # see http://www.zetadev.com/software/aspen/ + printf "%s[%s]%s " (set_color -b blue white) (basename (dirname "$VIRTUAL_ENV")) (set_color normal) + else + printf "%s(%s)%s" (set_color -b blue white) (basename "$VIRTUAL_ENV") (set_color normal) + end + end + + # Restore the return status of the previous command. + echo "exit $old_status" | . + _old_fish_prompt + end + + set -gx _OLD_FISH_PROMPT_OVERRIDE "$VIRTUAL_ENV" +end diff --git a/venv.bak/bin/easy_install b/venv.bak/bin/easy_install new file mode 100755 index 0000000..105da0e --- /dev/null +++ b/venv.bak/bin/easy_install @@ -0,0 +1,10 @@ +#!/Users/bootje/Desktop/graduation2020/bo-graduation/venv/bin/python3 +# -*- coding: utf-8 -*- +import re +import sys + +from setuptools.command.easy_install import main + +if __name__ == '__main__': + sys.argv[0] = re.sub(r'(-script\.pyw?|\.exe)?$', '', sys.argv[0]) + sys.exit(main()) diff --git a/venv.bak/bin/easy_install-3.7 b/venv.bak/bin/easy_install-3.7 new file mode 100755 index 0000000..105da0e --- /dev/null +++ b/venv.bak/bin/easy_install-3.7 @@ -0,0 +1,10 @@ +#!/Users/bootje/Desktop/graduation2020/bo-graduation/venv/bin/python3 +# -*- coding: utf-8 -*- +import re +import sys + +from setuptools.command.easy_install import main + +if __name__ == '__main__': + sys.argv[0] = re.sub(r'(-script\.pyw?|\.exe)?$', '', sys.argv[0]) + sys.exit(main()) diff --git a/venv.bak/bin/pip b/venv.bak/bin/pip new file mode 100755 index 0000000..48dee25 --- /dev/null +++ b/venv.bak/bin/pip @@ -0,0 +1,10 @@ +#!/Users/bootje/Desktop/graduation2020/bo-graduation/venv/bin/python3 +# -*- coding: utf-8 -*- +import re +import sys + +from pip._internal import main + +if __name__ == '__main__': + sys.argv[0] = re.sub(r'(-script\.pyw?|\.exe)?$', '', sys.argv[0]) + sys.exit(main()) diff --git a/venv.bak/bin/pip3 b/venv.bak/bin/pip3 new file mode 100755 index 0000000..48dee25 --- /dev/null +++ b/venv.bak/bin/pip3 @@ -0,0 +1,10 @@ +#!/Users/bootje/Desktop/graduation2020/bo-graduation/venv/bin/python3 +# -*- coding: utf-8 -*- +import re +import sys + +from pip._internal import main + +if __name__ == '__main__': + sys.argv[0] = re.sub(r'(-script\.pyw?|\.exe)?$', '', sys.argv[0]) + sys.exit(main()) diff --git a/venv.bak/bin/pip3.7 b/venv.bak/bin/pip3.7 new file mode 100755 index 0000000..48dee25 --- /dev/null +++ b/venv.bak/bin/pip3.7 @@ -0,0 +1,10 @@ +#!/Users/bootje/Desktop/graduation2020/bo-graduation/venv/bin/python3 +# -*- coding: utf-8 -*- +import re +import sys + +from pip._internal import main + +if __name__ == '__main__': + sys.argv[0] = re.sub(r'(-script\.pyw?|\.exe)?$', '', sys.argv[0]) + sys.exit(main()) diff --git a/venv.bak/bin/python b/venv.bak/bin/python new file mode 120000 index 0000000..b8a0adb --- /dev/null +++ b/venv.bak/bin/python @@ -0,0 +1 @@ +python3 \ No newline at end of file diff --git a/venv.bak/bin/python3 b/venv.bak/bin/python3 new file mode 120000 index 0000000..79ab74b --- /dev/null +++ b/venv.bak/bin/python3 @@ -0,0 +1 @@ +/usr/local/bin/python3 \ No newline at end of file diff --git a/venv/lib/python3.7/site-packages/.DS_Store b/venv.bak/lib/python3.7/site-packages/.DS_Store similarity index 100% rename from venv/lib/python3.7/site-packages/.DS_Store rename to venv.bak/lib/python3.7/site-packages/.DS_Store diff --git a/venv/lib/python3.7/site-packages/PIL/.dylibs/libXau.6.dylib b/venv.bak/lib/python3.7/site-packages/PIL/.dylibs/libXau.6.dylib similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/.dylibs/libXau.6.dylib rename to venv.bak/lib/python3.7/site-packages/PIL/.dylibs/libXau.6.dylib diff --git a/venv/lib/python3.7/site-packages/PIL/.dylibs/libXdmcp.6.dylib b/venv.bak/lib/python3.7/site-packages/PIL/.dylibs/libXdmcp.6.dylib similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/.dylibs/libXdmcp.6.dylib rename to venv.bak/lib/python3.7/site-packages/PIL/.dylibs/libXdmcp.6.dylib diff --git a/venv/lib/python3.7/site-packages/PIL/.dylibs/libfreetype.6.dylib b/venv.bak/lib/python3.7/site-packages/PIL/.dylibs/libfreetype.6.dylib similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/.dylibs/libfreetype.6.dylib rename to venv.bak/lib/python3.7/site-packages/PIL/.dylibs/libfreetype.6.dylib diff --git a/venv/lib/python3.7/site-packages/PIL/.dylibs/libjpeg.9.dylib b/venv.bak/lib/python3.7/site-packages/PIL/.dylibs/libjpeg.9.dylib similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/.dylibs/libjpeg.9.dylib rename to venv.bak/lib/python3.7/site-packages/PIL/.dylibs/libjpeg.9.dylib diff --git a/venv/lib/python3.7/site-packages/PIL/.dylibs/liblcms2.2.dylib b/venv.bak/lib/python3.7/site-packages/PIL/.dylibs/liblcms2.2.dylib similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/.dylibs/liblcms2.2.dylib rename to venv.bak/lib/python3.7/site-packages/PIL/.dylibs/liblcms2.2.dylib diff --git a/venv/lib/python3.7/site-packages/PIL/.dylibs/liblzma.5.dylib b/venv.bak/lib/python3.7/site-packages/PIL/.dylibs/liblzma.5.dylib similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/.dylibs/liblzma.5.dylib rename to venv.bak/lib/python3.7/site-packages/PIL/.dylibs/liblzma.5.dylib diff --git a/venv/lib/python3.7/site-packages/PIL/.dylibs/libopenjp2.2.3.1.dylib b/venv.bak/lib/python3.7/site-packages/PIL/.dylibs/libopenjp2.2.3.1.dylib similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/.dylibs/libopenjp2.2.3.1.dylib rename to venv.bak/lib/python3.7/site-packages/PIL/.dylibs/libopenjp2.2.3.1.dylib diff --git a/venv/lib/python3.7/site-packages/PIL/.dylibs/libpng16.16.dylib b/venv.bak/lib/python3.7/site-packages/PIL/.dylibs/libpng16.16.dylib similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/.dylibs/libpng16.16.dylib rename to venv.bak/lib/python3.7/site-packages/PIL/.dylibs/libpng16.16.dylib diff --git a/venv/lib/python3.7/site-packages/PIL/.dylibs/libtiff.5.dylib b/venv.bak/lib/python3.7/site-packages/PIL/.dylibs/libtiff.5.dylib similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/.dylibs/libtiff.5.dylib rename to venv.bak/lib/python3.7/site-packages/PIL/.dylibs/libtiff.5.dylib diff --git a/venv/lib/python3.7/site-packages/PIL/.dylibs/libwebp.7.dylib b/venv.bak/lib/python3.7/site-packages/PIL/.dylibs/libwebp.7.dylib similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/.dylibs/libwebp.7.dylib rename to venv.bak/lib/python3.7/site-packages/PIL/.dylibs/libwebp.7.dylib diff --git a/venv/lib/python3.7/site-packages/PIL/.dylibs/libwebpdemux.2.dylib b/venv.bak/lib/python3.7/site-packages/PIL/.dylibs/libwebpdemux.2.dylib similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/.dylibs/libwebpdemux.2.dylib rename to venv.bak/lib/python3.7/site-packages/PIL/.dylibs/libwebpdemux.2.dylib diff --git a/venv/lib/python3.7/site-packages/PIL/.dylibs/libwebpmux.3.dylib b/venv.bak/lib/python3.7/site-packages/PIL/.dylibs/libwebpmux.3.dylib similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/.dylibs/libwebpmux.3.dylib rename to venv.bak/lib/python3.7/site-packages/PIL/.dylibs/libwebpmux.3.dylib diff --git a/venv/lib/python3.7/site-packages/PIL/.dylibs/libxcb.1.dylib b/venv.bak/lib/python3.7/site-packages/PIL/.dylibs/libxcb.1.dylib similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/.dylibs/libxcb.1.dylib rename to venv.bak/lib/python3.7/site-packages/PIL/.dylibs/libxcb.1.dylib diff --git a/venv/lib/python3.7/site-packages/PIL/.dylibs/libz.1.2.11.dylib b/venv.bak/lib/python3.7/site-packages/PIL/.dylibs/libz.1.2.11.dylib similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/.dylibs/libz.1.2.11.dylib rename to venv.bak/lib/python3.7/site-packages/PIL/.dylibs/libz.1.2.11.dylib diff --git a/venv/lib/python3.7/site-packages/PIL/BdfFontFile.py b/venv.bak/lib/python3.7/site-packages/PIL/BdfFontFile.py similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/BdfFontFile.py rename to venv.bak/lib/python3.7/site-packages/PIL/BdfFontFile.py diff --git a/venv/lib/python3.7/site-packages/PIL/BlpImagePlugin.py b/venv.bak/lib/python3.7/site-packages/PIL/BlpImagePlugin.py similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/BlpImagePlugin.py rename to venv.bak/lib/python3.7/site-packages/PIL/BlpImagePlugin.py diff --git a/venv/lib/python3.7/site-packages/PIL/BmpImagePlugin.py b/venv.bak/lib/python3.7/site-packages/PIL/BmpImagePlugin.py similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/BmpImagePlugin.py rename to venv.bak/lib/python3.7/site-packages/PIL/BmpImagePlugin.py diff --git a/venv/lib/python3.7/site-packages/PIL/BufrStubImagePlugin.py b/venv.bak/lib/python3.7/site-packages/PIL/BufrStubImagePlugin.py similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/BufrStubImagePlugin.py rename to venv.bak/lib/python3.7/site-packages/PIL/BufrStubImagePlugin.py diff --git a/venv/lib/python3.7/site-packages/PIL/ContainerIO.py b/venv.bak/lib/python3.7/site-packages/PIL/ContainerIO.py similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/ContainerIO.py rename to venv.bak/lib/python3.7/site-packages/PIL/ContainerIO.py diff --git a/venv/lib/python3.7/site-packages/PIL/CurImagePlugin.py b/venv.bak/lib/python3.7/site-packages/PIL/CurImagePlugin.py similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/CurImagePlugin.py rename to venv.bak/lib/python3.7/site-packages/PIL/CurImagePlugin.py diff --git a/venv/lib/python3.7/site-packages/PIL/DcxImagePlugin.py b/venv.bak/lib/python3.7/site-packages/PIL/DcxImagePlugin.py similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/DcxImagePlugin.py rename to venv.bak/lib/python3.7/site-packages/PIL/DcxImagePlugin.py diff --git a/venv/lib/python3.7/site-packages/PIL/DdsImagePlugin.py b/venv.bak/lib/python3.7/site-packages/PIL/DdsImagePlugin.py similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/DdsImagePlugin.py rename to venv.bak/lib/python3.7/site-packages/PIL/DdsImagePlugin.py diff --git a/venv/lib/python3.7/site-packages/PIL/EpsImagePlugin.py b/venv.bak/lib/python3.7/site-packages/PIL/EpsImagePlugin.py similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/EpsImagePlugin.py rename to venv.bak/lib/python3.7/site-packages/PIL/EpsImagePlugin.py diff --git a/venv/lib/python3.7/site-packages/PIL/ExifTags.py b/venv.bak/lib/python3.7/site-packages/PIL/ExifTags.py similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/ExifTags.py rename to venv.bak/lib/python3.7/site-packages/PIL/ExifTags.py diff --git a/venv/lib/python3.7/site-packages/PIL/FitsStubImagePlugin.py b/venv.bak/lib/python3.7/site-packages/PIL/FitsStubImagePlugin.py similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/FitsStubImagePlugin.py rename to venv.bak/lib/python3.7/site-packages/PIL/FitsStubImagePlugin.py diff --git a/venv/lib/python3.7/site-packages/PIL/FliImagePlugin.py b/venv.bak/lib/python3.7/site-packages/PIL/FliImagePlugin.py similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/FliImagePlugin.py rename to venv.bak/lib/python3.7/site-packages/PIL/FliImagePlugin.py diff --git a/venv/lib/python3.7/site-packages/PIL/FontFile.py b/venv.bak/lib/python3.7/site-packages/PIL/FontFile.py similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/FontFile.py rename to venv.bak/lib/python3.7/site-packages/PIL/FontFile.py diff --git a/venv/lib/python3.7/site-packages/PIL/FpxImagePlugin.py b/venv.bak/lib/python3.7/site-packages/PIL/FpxImagePlugin.py similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/FpxImagePlugin.py rename to venv.bak/lib/python3.7/site-packages/PIL/FpxImagePlugin.py diff --git a/venv/lib/python3.7/site-packages/PIL/FtexImagePlugin.py b/venv.bak/lib/python3.7/site-packages/PIL/FtexImagePlugin.py similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/FtexImagePlugin.py rename to venv.bak/lib/python3.7/site-packages/PIL/FtexImagePlugin.py diff --git a/venv/lib/python3.7/site-packages/PIL/GbrImagePlugin.py b/venv.bak/lib/python3.7/site-packages/PIL/GbrImagePlugin.py similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/GbrImagePlugin.py rename to venv.bak/lib/python3.7/site-packages/PIL/GbrImagePlugin.py diff --git a/venv/lib/python3.7/site-packages/PIL/GdImageFile.py b/venv.bak/lib/python3.7/site-packages/PIL/GdImageFile.py similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/GdImageFile.py rename to venv.bak/lib/python3.7/site-packages/PIL/GdImageFile.py diff --git a/venv/lib/python3.7/site-packages/PIL/GifImagePlugin.py b/venv.bak/lib/python3.7/site-packages/PIL/GifImagePlugin.py similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/GifImagePlugin.py rename to venv.bak/lib/python3.7/site-packages/PIL/GifImagePlugin.py diff --git a/venv/lib/python3.7/site-packages/PIL/GimpGradientFile.py b/venv.bak/lib/python3.7/site-packages/PIL/GimpGradientFile.py similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/GimpGradientFile.py rename to venv.bak/lib/python3.7/site-packages/PIL/GimpGradientFile.py diff --git a/venv/lib/python3.7/site-packages/PIL/GimpPaletteFile.py b/venv.bak/lib/python3.7/site-packages/PIL/GimpPaletteFile.py similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/GimpPaletteFile.py rename to venv.bak/lib/python3.7/site-packages/PIL/GimpPaletteFile.py diff --git a/venv/lib/python3.7/site-packages/PIL/GribStubImagePlugin.py b/venv.bak/lib/python3.7/site-packages/PIL/GribStubImagePlugin.py similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/GribStubImagePlugin.py rename to venv.bak/lib/python3.7/site-packages/PIL/GribStubImagePlugin.py diff --git a/venv/lib/python3.7/site-packages/PIL/Hdf5StubImagePlugin.py b/venv.bak/lib/python3.7/site-packages/PIL/Hdf5StubImagePlugin.py similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/Hdf5StubImagePlugin.py rename to venv.bak/lib/python3.7/site-packages/PIL/Hdf5StubImagePlugin.py diff --git a/venv/lib/python3.7/site-packages/PIL/IcnsImagePlugin.py b/venv.bak/lib/python3.7/site-packages/PIL/IcnsImagePlugin.py similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/IcnsImagePlugin.py rename to venv.bak/lib/python3.7/site-packages/PIL/IcnsImagePlugin.py diff --git a/venv/lib/python3.7/site-packages/PIL/IcoImagePlugin.py b/venv.bak/lib/python3.7/site-packages/PIL/IcoImagePlugin.py similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/IcoImagePlugin.py rename to venv.bak/lib/python3.7/site-packages/PIL/IcoImagePlugin.py diff --git a/venv/lib/python3.7/site-packages/PIL/ImImagePlugin.py b/venv.bak/lib/python3.7/site-packages/PIL/ImImagePlugin.py similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/ImImagePlugin.py rename to venv.bak/lib/python3.7/site-packages/PIL/ImImagePlugin.py diff --git a/venv/lib/python3.7/site-packages/PIL/Image.py b/venv.bak/lib/python3.7/site-packages/PIL/Image.py similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/Image.py rename to venv.bak/lib/python3.7/site-packages/PIL/Image.py diff --git a/venv/lib/python3.7/site-packages/PIL/ImageChops.py b/venv.bak/lib/python3.7/site-packages/PIL/ImageChops.py similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/ImageChops.py rename to venv.bak/lib/python3.7/site-packages/PIL/ImageChops.py diff --git a/venv/lib/python3.7/site-packages/PIL/ImageCms.py b/venv.bak/lib/python3.7/site-packages/PIL/ImageCms.py similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/ImageCms.py rename to venv.bak/lib/python3.7/site-packages/PIL/ImageCms.py diff --git a/venv/lib/python3.7/site-packages/PIL/ImageColor.py b/venv.bak/lib/python3.7/site-packages/PIL/ImageColor.py similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/ImageColor.py rename to venv.bak/lib/python3.7/site-packages/PIL/ImageColor.py diff --git a/venv/lib/python3.7/site-packages/PIL/ImageDraw.py b/venv.bak/lib/python3.7/site-packages/PIL/ImageDraw.py similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/ImageDraw.py rename to venv.bak/lib/python3.7/site-packages/PIL/ImageDraw.py diff --git a/venv/lib/python3.7/site-packages/PIL/ImageDraw2.py b/venv.bak/lib/python3.7/site-packages/PIL/ImageDraw2.py similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/ImageDraw2.py rename to venv.bak/lib/python3.7/site-packages/PIL/ImageDraw2.py diff --git a/venv/lib/python3.7/site-packages/PIL/ImageEnhance.py b/venv.bak/lib/python3.7/site-packages/PIL/ImageEnhance.py similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/ImageEnhance.py rename to venv.bak/lib/python3.7/site-packages/PIL/ImageEnhance.py diff --git a/venv/lib/python3.7/site-packages/PIL/ImageFile.py b/venv.bak/lib/python3.7/site-packages/PIL/ImageFile.py similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/ImageFile.py rename to venv.bak/lib/python3.7/site-packages/PIL/ImageFile.py diff --git a/venv/lib/python3.7/site-packages/PIL/ImageFilter.py b/venv.bak/lib/python3.7/site-packages/PIL/ImageFilter.py similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/ImageFilter.py rename to venv.bak/lib/python3.7/site-packages/PIL/ImageFilter.py diff --git a/venv/lib/python3.7/site-packages/PIL/ImageFont.py b/venv.bak/lib/python3.7/site-packages/PIL/ImageFont.py similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/ImageFont.py rename to venv.bak/lib/python3.7/site-packages/PIL/ImageFont.py diff --git a/venv/lib/python3.7/site-packages/PIL/ImageGrab.py b/venv.bak/lib/python3.7/site-packages/PIL/ImageGrab.py similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/ImageGrab.py rename to venv.bak/lib/python3.7/site-packages/PIL/ImageGrab.py diff --git a/venv/lib/python3.7/site-packages/PIL/ImageMath.py b/venv.bak/lib/python3.7/site-packages/PIL/ImageMath.py similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/ImageMath.py rename to venv.bak/lib/python3.7/site-packages/PIL/ImageMath.py diff --git a/venv/lib/python3.7/site-packages/PIL/ImageMode.py b/venv.bak/lib/python3.7/site-packages/PIL/ImageMode.py similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/ImageMode.py rename to venv.bak/lib/python3.7/site-packages/PIL/ImageMode.py diff --git a/venv/lib/python3.7/site-packages/PIL/ImageMorph.py b/venv.bak/lib/python3.7/site-packages/PIL/ImageMorph.py similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/ImageMorph.py rename to venv.bak/lib/python3.7/site-packages/PIL/ImageMorph.py diff --git a/venv/lib/python3.7/site-packages/PIL/ImageOps.py b/venv.bak/lib/python3.7/site-packages/PIL/ImageOps.py similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/ImageOps.py rename to venv.bak/lib/python3.7/site-packages/PIL/ImageOps.py diff --git a/venv/lib/python3.7/site-packages/PIL/ImagePalette.py b/venv.bak/lib/python3.7/site-packages/PIL/ImagePalette.py similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/ImagePalette.py rename to venv.bak/lib/python3.7/site-packages/PIL/ImagePalette.py diff --git a/venv/lib/python3.7/site-packages/PIL/ImagePath.py b/venv.bak/lib/python3.7/site-packages/PIL/ImagePath.py similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/ImagePath.py rename to venv.bak/lib/python3.7/site-packages/PIL/ImagePath.py diff --git a/venv/lib/python3.7/site-packages/PIL/ImageQt.py b/venv.bak/lib/python3.7/site-packages/PIL/ImageQt.py similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/ImageQt.py rename to venv.bak/lib/python3.7/site-packages/PIL/ImageQt.py diff --git a/venv/lib/python3.7/site-packages/PIL/ImageSequence.py b/venv.bak/lib/python3.7/site-packages/PIL/ImageSequence.py similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/ImageSequence.py rename to venv.bak/lib/python3.7/site-packages/PIL/ImageSequence.py diff --git a/venv/lib/python3.7/site-packages/PIL/ImageShow.py b/venv.bak/lib/python3.7/site-packages/PIL/ImageShow.py similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/ImageShow.py rename to venv.bak/lib/python3.7/site-packages/PIL/ImageShow.py diff --git a/venv/lib/python3.7/site-packages/PIL/ImageStat.py b/venv.bak/lib/python3.7/site-packages/PIL/ImageStat.py similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/ImageStat.py rename to venv.bak/lib/python3.7/site-packages/PIL/ImageStat.py diff --git a/venv/lib/python3.7/site-packages/PIL/ImageTk.py b/venv.bak/lib/python3.7/site-packages/PIL/ImageTk.py similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/ImageTk.py rename to venv.bak/lib/python3.7/site-packages/PIL/ImageTk.py diff --git a/venv/lib/python3.7/site-packages/PIL/ImageTransform.py b/venv.bak/lib/python3.7/site-packages/PIL/ImageTransform.py similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/ImageTransform.py rename to venv.bak/lib/python3.7/site-packages/PIL/ImageTransform.py diff --git a/venv/lib/python3.7/site-packages/PIL/ImageWin.py b/venv.bak/lib/python3.7/site-packages/PIL/ImageWin.py similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/ImageWin.py rename to venv.bak/lib/python3.7/site-packages/PIL/ImageWin.py diff --git a/venv/lib/python3.7/site-packages/PIL/ImtImagePlugin.py b/venv.bak/lib/python3.7/site-packages/PIL/ImtImagePlugin.py similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/ImtImagePlugin.py rename to venv.bak/lib/python3.7/site-packages/PIL/ImtImagePlugin.py diff --git a/venv/lib/python3.7/site-packages/PIL/IptcImagePlugin.py b/venv.bak/lib/python3.7/site-packages/PIL/IptcImagePlugin.py similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/IptcImagePlugin.py rename to venv.bak/lib/python3.7/site-packages/PIL/IptcImagePlugin.py diff --git a/venv/lib/python3.7/site-packages/PIL/Jpeg2KImagePlugin.py b/venv.bak/lib/python3.7/site-packages/PIL/Jpeg2KImagePlugin.py similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/Jpeg2KImagePlugin.py rename to venv.bak/lib/python3.7/site-packages/PIL/Jpeg2KImagePlugin.py diff --git a/venv/lib/python3.7/site-packages/PIL/JpegImagePlugin.py b/venv.bak/lib/python3.7/site-packages/PIL/JpegImagePlugin.py similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/JpegImagePlugin.py rename to venv.bak/lib/python3.7/site-packages/PIL/JpegImagePlugin.py diff --git a/venv/lib/python3.7/site-packages/PIL/JpegPresets.py b/venv.bak/lib/python3.7/site-packages/PIL/JpegPresets.py similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/JpegPresets.py rename to venv.bak/lib/python3.7/site-packages/PIL/JpegPresets.py diff --git a/venv/lib/python3.7/site-packages/PIL/McIdasImagePlugin.py b/venv.bak/lib/python3.7/site-packages/PIL/McIdasImagePlugin.py similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/McIdasImagePlugin.py rename to venv.bak/lib/python3.7/site-packages/PIL/McIdasImagePlugin.py diff --git a/venv/lib/python3.7/site-packages/PIL/MicImagePlugin.py b/venv.bak/lib/python3.7/site-packages/PIL/MicImagePlugin.py similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/MicImagePlugin.py rename to venv.bak/lib/python3.7/site-packages/PIL/MicImagePlugin.py diff --git a/venv/lib/python3.7/site-packages/PIL/MpegImagePlugin.py b/venv.bak/lib/python3.7/site-packages/PIL/MpegImagePlugin.py similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/MpegImagePlugin.py rename to venv.bak/lib/python3.7/site-packages/PIL/MpegImagePlugin.py diff --git a/venv/lib/python3.7/site-packages/PIL/MpoImagePlugin.py b/venv.bak/lib/python3.7/site-packages/PIL/MpoImagePlugin.py similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/MpoImagePlugin.py rename to venv.bak/lib/python3.7/site-packages/PIL/MpoImagePlugin.py diff --git a/venv/lib/python3.7/site-packages/PIL/MspImagePlugin.py b/venv.bak/lib/python3.7/site-packages/PIL/MspImagePlugin.py similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/MspImagePlugin.py rename to venv.bak/lib/python3.7/site-packages/PIL/MspImagePlugin.py diff --git a/venv/lib/python3.7/site-packages/PIL/PSDraw.py b/venv.bak/lib/python3.7/site-packages/PIL/PSDraw.py similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/PSDraw.py rename to venv.bak/lib/python3.7/site-packages/PIL/PSDraw.py diff --git a/venv/lib/python3.7/site-packages/PIL/PaletteFile.py b/venv.bak/lib/python3.7/site-packages/PIL/PaletteFile.py similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/PaletteFile.py rename to venv.bak/lib/python3.7/site-packages/PIL/PaletteFile.py diff --git a/venv/lib/python3.7/site-packages/PIL/PalmImagePlugin.py b/venv.bak/lib/python3.7/site-packages/PIL/PalmImagePlugin.py similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/PalmImagePlugin.py rename to venv.bak/lib/python3.7/site-packages/PIL/PalmImagePlugin.py diff --git a/venv/lib/python3.7/site-packages/PIL/PcdImagePlugin.py b/venv.bak/lib/python3.7/site-packages/PIL/PcdImagePlugin.py similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/PcdImagePlugin.py rename to venv.bak/lib/python3.7/site-packages/PIL/PcdImagePlugin.py diff --git a/venv/lib/python3.7/site-packages/PIL/PcfFontFile.py b/venv.bak/lib/python3.7/site-packages/PIL/PcfFontFile.py similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/PcfFontFile.py rename to venv.bak/lib/python3.7/site-packages/PIL/PcfFontFile.py diff --git a/venv/lib/python3.7/site-packages/PIL/PcxImagePlugin.py b/venv.bak/lib/python3.7/site-packages/PIL/PcxImagePlugin.py similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/PcxImagePlugin.py rename to venv.bak/lib/python3.7/site-packages/PIL/PcxImagePlugin.py diff --git a/venv/lib/python3.7/site-packages/PIL/PdfImagePlugin.py b/venv.bak/lib/python3.7/site-packages/PIL/PdfImagePlugin.py similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/PdfImagePlugin.py rename to venv.bak/lib/python3.7/site-packages/PIL/PdfImagePlugin.py diff --git a/venv/lib/python3.7/site-packages/PIL/PdfParser.py b/venv.bak/lib/python3.7/site-packages/PIL/PdfParser.py similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/PdfParser.py rename to venv.bak/lib/python3.7/site-packages/PIL/PdfParser.py diff --git a/venv/lib/python3.7/site-packages/PIL/PixarImagePlugin.py b/venv.bak/lib/python3.7/site-packages/PIL/PixarImagePlugin.py similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/PixarImagePlugin.py rename to venv.bak/lib/python3.7/site-packages/PIL/PixarImagePlugin.py diff --git a/venv/lib/python3.7/site-packages/PIL/PngImagePlugin.py b/venv.bak/lib/python3.7/site-packages/PIL/PngImagePlugin.py similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/PngImagePlugin.py rename to venv.bak/lib/python3.7/site-packages/PIL/PngImagePlugin.py diff --git a/venv/lib/python3.7/site-packages/PIL/PpmImagePlugin.py b/venv.bak/lib/python3.7/site-packages/PIL/PpmImagePlugin.py similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/PpmImagePlugin.py rename to venv.bak/lib/python3.7/site-packages/PIL/PpmImagePlugin.py diff --git a/venv/lib/python3.7/site-packages/PIL/PsdImagePlugin.py b/venv.bak/lib/python3.7/site-packages/PIL/PsdImagePlugin.py similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/PsdImagePlugin.py rename to venv.bak/lib/python3.7/site-packages/PIL/PsdImagePlugin.py diff --git a/venv/lib/python3.7/site-packages/PIL/PyAccess.py b/venv.bak/lib/python3.7/site-packages/PIL/PyAccess.py similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/PyAccess.py rename to venv.bak/lib/python3.7/site-packages/PIL/PyAccess.py diff --git a/venv/lib/python3.7/site-packages/PIL/SgiImagePlugin.py b/venv.bak/lib/python3.7/site-packages/PIL/SgiImagePlugin.py similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/SgiImagePlugin.py rename to venv.bak/lib/python3.7/site-packages/PIL/SgiImagePlugin.py diff --git a/venv/lib/python3.7/site-packages/PIL/SpiderImagePlugin.py b/venv.bak/lib/python3.7/site-packages/PIL/SpiderImagePlugin.py similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/SpiderImagePlugin.py rename to venv.bak/lib/python3.7/site-packages/PIL/SpiderImagePlugin.py diff --git a/venv/lib/python3.7/site-packages/PIL/SunImagePlugin.py b/venv.bak/lib/python3.7/site-packages/PIL/SunImagePlugin.py similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/SunImagePlugin.py rename to venv.bak/lib/python3.7/site-packages/PIL/SunImagePlugin.py diff --git a/venv/lib/python3.7/site-packages/PIL/TarIO.py b/venv.bak/lib/python3.7/site-packages/PIL/TarIO.py similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/TarIO.py rename to venv.bak/lib/python3.7/site-packages/PIL/TarIO.py diff --git a/venv/lib/python3.7/site-packages/PIL/TgaImagePlugin.py b/venv.bak/lib/python3.7/site-packages/PIL/TgaImagePlugin.py similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/TgaImagePlugin.py rename to venv.bak/lib/python3.7/site-packages/PIL/TgaImagePlugin.py diff --git a/venv/lib/python3.7/site-packages/PIL/TiffImagePlugin.py b/venv.bak/lib/python3.7/site-packages/PIL/TiffImagePlugin.py similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/TiffImagePlugin.py rename to venv.bak/lib/python3.7/site-packages/PIL/TiffImagePlugin.py diff --git a/venv/lib/python3.7/site-packages/PIL/TiffTags.py b/venv.bak/lib/python3.7/site-packages/PIL/TiffTags.py similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/TiffTags.py rename to venv.bak/lib/python3.7/site-packages/PIL/TiffTags.py diff --git a/venv/lib/python3.7/site-packages/PIL/WalImageFile.py b/venv.bak/lib/python3.7/site-packages/PIL/WalImageFile.py similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/WalImageFile.py rename to venv.bak/lib/python3.7/site-packages/PIL/WalImageFile.py diff --git a/venv/lib/python3.7/site-packages/PIL/WebPImagePlugin.py b/venv.bak/lib/python3.7/site-packages/PIL/WebPImagePlugin.py similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/WebPImagePlugin.py rename to venv.bak/lib/python3.7/site-packages/PIL/WebPImagePlugin.py diff --git a/venv/lib/python3.7/site-packages/PIL/WmfImagePlugin.py b/venv.bak/lib/python3.7/site-packages/PIL/WmfImagePlugin.py similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/WmfImagePlugin.py rename to venv.bak/lib/python3.7/site-packages/PIL/WmfImagePlugin.py diff --git a/venv/lib/python3.7/site-packages/PIL/XVThumbImagePlugin.py b/venv.bak/lib/python3.7/site-packages/PIL/XVThumbImagePlugin.py similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/XVThumbImagePlugin.py rename to venv.bak/lib/python3.7/site-packages/PIL/XVThumbImagePlugin.py diff --git a/venv/lib/python3.7/site-packages/PIL/XbmImagePlugin.py b/venv.bak/lib/python3.7/site-packages/PIL/XbmImagePlugin.py similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/XbmImagePlugin.py rename to venv.bak/lib/python3.7/site-packages/PIL/XbmImagePlugin.py diff --git a/venv/lib/python3.7/site-packages/PIL/XpmImagePlugin.py b/venv.bak/lib/python3.7/site-packages/PIL/XpmImagePlugin.py similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/XpmImagePlugin.py rename to venv.bak/lib/python3.7/site-packages/PIL/XpmImagePlugin.py diff --git a/venv/lib/python3.7/site-packages/PIL/__init__.py b/venv.bak/lib/python3.7/site-packages/PIL/__init__.py similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/__init__.py rename to venv.bak/lib/python3.7/site-packages/PIL/__init__.py diff --git a/venv/lib/python3.7/site-packages/PIL/__main__.py b/venv.bak/lib/python3.7/site-packages/PIL/__main__.py similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/__main__.py rename to venv.bak/lib/python3.7/site-packages/PIL/__main__.py diff --git a/venv/lib/python3.7/site-packages/PIL/__pycache__/BdfFontFile.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/PIL/__pycache__/BdfFontFile.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/__pycache__/BdfFontFile.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/PIL/__pycache__/BdfFontFile.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/PIL/__pycache__/BlpImagePlugin.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/PIL/__pycache__/BlpImagePlugin.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/__pycache__/BlpImagePlugin.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/PIL/__pycache__/BlpImagePlugin.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/PIL/__pycache__/BmpImagePlugin.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/PIL/__pycache__/BmpImagePlugin.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/__pycache__/BmpImagePlugin.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/PIL/__pycache__/BmpImagePlugin.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/PIL/__pycache__/BufrStubImagePlugin.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/PIL/__pycache__/BufrStubImagePlugin.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/__pycache__/BufrStubImagePlugin.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/PIL/__pycache__/BufrStubImagePlugin.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/PIL/__pycache__/ContainerIO.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/PIL/__pycache__/ContainerIO.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/__pycache__/ContainerIO.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/PIL/__pycache__/ContainerIO.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/PIL/__pycache__/CurImagePlugin.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/PIL/__pycache__/CurImagePlugin.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/__pycache__/CurImagePlugin.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/PIL/__pycache__/CurImagePlugin.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/PIL/__pycache__/DcxImagePlugin.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/PIL/__pycache__/DcxImagePlugin.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/__pycache__/DcxImagePlugin.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/PIL/__pycache__/DcxImagePlugin.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/PIL/__pycache__/DdsImagePlugin.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/PIL/__pycache__/DdsImagePlugin.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/__pycache__/DdsImagePlugin.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/PIL/__pycache__/DdsImagePlugin.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/PIL/__pycache__/EpsImagePlugin.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/PIL/__pycache__/EpsImagePlugin.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/__pycache__/EpsImagePlugin.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/PIL/__pycache__/EpsImagePlugin.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/PIL/__pycache__/ExifTags.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/PIL/__pycache__/ExifTags.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/__pycache__/ExifTags.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/PIL/__pycache__/ExifTags.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/PIL/__pycache__/FitsStubImagePlugin.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/PIL/__pycache__/FitsStubImagePlugin.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/__pycache__/FitsStubImagePlugin.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/PIL/__pycache__/FitsStubImagePlugin.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/PIL/__pycache__/FliImagePlugin.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/PIL/__pycache__/FliImagePlugin.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/__pycache__/FliImagePlugin.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/PIL/__pycache__/FliImagePlugin.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/PIL/__pycache__/FontFile.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/PIL/__pycache__/FontFile.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/__pycache__/FontFile.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/PIL/__pycache__/FontFile.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/PIL/__pycache__/FpxImagePlugin.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/PIL/__pycache__/FpxImagePlugin.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/__pycache__/FpxImagePlugin.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/PIL/__pycache__/FpxImagePlugin.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/PIL/__pycache__/FtexImagePlugin.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/PIL/__pycache__/FtexImagePlugin.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/__pycache__/FtexImagePlugin.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/PIL/__pycache__/FtexImagePlugin.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/PIL/__pycache__/GbrImagePlugin.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/PIL/__pycache__/GbrImagePlugin.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/__pycache__/GbrImagePlugin.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/PIL/__pycache__/GbrImagePlugin.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/PIL/__pycache__/GdImageFile.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/PIL/__pycache__/GdImageFile.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/__pycache__/GdImageFile.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/PIL/__pycache__/GdImageFile.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/PIL/__pycache__/GifImagePlugin.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/PIL/__pycache__/GifImagePlugin.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/__pycache__/GifImagePlugin.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/PIL/__pycache__/GifImagePlugin.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/PIL/__pycache__/GimpGradientFile.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/PIL/__pycache__/GimpGradientFile.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/__pycache__/GimpGradientFile.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/PIL/__pycache__/GimpGradientFile.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/PIL/__pycache__/GimpPaletteFile.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/PIL/__pycache__/GimpPaletteFile.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/__pycache__/GimpPaletteFile.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/PIL/__pycache__/GimpPaletteFile.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/PIL/__pycache__/GribStubImagePlugin.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/PIL/__pycache__/GribStubImagePlugin.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/__pycache__/GribStubImagePlugin.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/PIL/__pycache__/GribStubImagePlugin.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/PIL/__pycache__/Hdf5StubImagePlugin.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/PIL/__pycache__/Hdf5StubImagePlugin.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/__pycache__/Hdf5StubImagePlugin.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/PIL/__pycache__/Hdf5StubImagePlugin.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/PIL/__pycache__/IcnsImagePlugin.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/PIL/__pycache__/IcnsImagePlugin.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/__pycache__/IcnsImagePlugin.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/PIL/__pycache__/IcnsImagePlugin.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/PIL/__pycache__/IcoImagePlugin.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/PIL/__pycache__/IcoImagePlugin.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/__pycache__/IcoImagePlugin.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/PIL/__pycache__/IcoImagePlugin.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/PIL/__pycache__/ImImagePlugin.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/PIL/__pycache__/ImImagePlugin.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/__pycache__/ImImagePlugin.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/PIL/__pycache__/ImImagePlugin.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/PIL/__pycache__/Image.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/PIL/__pycache__/Image.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/__pycache__/Image.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/PIL/__pycache__/Image.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/PIL/__pycache__/ImageChops.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/PIL/__pycache__/ImageChops.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/__pycache__/ImageChops.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/PIL/__pycache__/ImageChops.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/PIL/__pycache__/ImageCms.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/PIL/__pycache__/ImageCms.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/__pycache__/ImageCms.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/PIL/__pycache__/ImageCms.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/PIL/__pycache__/ImageColor.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/PIL/__pycache__/ImageColor.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/__pycache__/ImageColor.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/PIL/__pycache__/ImageColor.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/PIL/__pycache__/ImageDraw.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/PIL/__pycache__/ImageDraw.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/__pycache__/ImageDraw.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/PIL/__pycache__/ImageDraw.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/PIL/__pycache__/ImageDraw2.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/PIL/__pycache__/ImageDraw2.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/__pycache__/ImageDraw2.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/PIL/__pycache__/ImageDraw2.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/PIL/__pycache__/ImageEnhance.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/PIL/__pycache__/ImageEnhance.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/__pycache__/ImageEnhance.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/PIL/__pycache__/ImageEnhance.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/PIL/__pycache__/ImageFile.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/PIL/__pycache__/ImageFile.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/__pycache__/ImageFile.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/PIL/__pycache__/ImageFile.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/PIL/__pycache__/ImageFilter.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/PIL/__pycache__/ImageFilter.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/__pycache__/ImageFilter.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/PIL/__pycache__/ImageFilter.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/PIL/__pycache__/ImageFont.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/PIL/__pycache__/ImageFont.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/__pycache__/ImageFont.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/PIL/__pycache__/ImageFont.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/PIL/__pycache__/ImageGrab.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/PIL/__pycache__/ImageGrab.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/__pycache__/ImageGrab.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/PIL/__pycache__/ImageGrab.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/PIL/__pycache__/ImageMath.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/PIL/__pycache__/ImageMath.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/__pycache__/ImageMath.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/PIL/__pycache__/ImageMath.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/PIL/__pycache__/ImageMode.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/PIL/__pycache__/ImageMode.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/__pycache__/ImageMode.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/PIL/__pycache__/ImageMode.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/PIL/__pycache__/ImageMorph.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/PIL/__pycache__/ImageMorph.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/__pycache__/ImageMorph.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/PIL/__pycache__/ImageMorph.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/PIL/__pycache__/ImageOps.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/PIL/__pycache__/ImageOps.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/__pycache__/ImageOps.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/PIL/__pycache__/ImageOps.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/PIL/__pycache__/ImagePalette.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/PIL/__pycache__/ImagePalette.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/__pycache__/ImagePalette.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/PIL/__pycache__/ImagePalette.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/PIL/__pycache__/ImagePath.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/PIL/__pycache__/ImagePath.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/__pycache__/ImagePath.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/PIL/__pycache__/ImagePath.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/PIL/__pycache__/ImageQt.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/PIL/__pycache__/ImageQt.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/__pycache__/ImageQt.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/PIL/__pycache__/ImageQt.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/PIL/__pycache__/ImageSequence.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/PIL/__pycache__/ImageSequence.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/__pycache__/ImageSequence.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/PIL/__pycache__/ImageSequence.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/PIL/__pycache__/ImageShow.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/PIL/__pycache__/ImageShow.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/__pycache__/ImageShow.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/PIL/__pycache__/ImageShow.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/PIL/__pycache__/ImageStat.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/PIL/__pycache__/ImageStat.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/__pycache__/ImageStat.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/PIL/__pycache__/ImageStat.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/PIL/__pycache__/ImageTk.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/PIL/__pycache__/ImageTk.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/__pycache__/ImageTk.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/PIL/__pycache__/ImageTk.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/PIL/__pycache__/ImageTransform.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/PIL/__pycache__/ImageTransform.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/__pycache__/ImageTransform.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/PIL/__pycache__/ImageTransform.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/PIL/__pycache__/ImageWin.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/PIL/__pycache__/ImageWin.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/__pycache__/ImageWin.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/PIL/__pycache__/ImageWin.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/PIL/__pycache__/ImtImagePlugin.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/PIL/__pycache__/ImtImagePlugin.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/__pycache__/ImtImagePlugin.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/PIL/__pycache__/ImtImagePlugin.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/PIL/__pycache__/IptcImagePlugin.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/PIL/__pycache__/IptcImagePlugin.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/__pycache__/IptcImagePlugin.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/PIL/__pycache__/IptcImagePlugin.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/PIL/__pycache__/Jpeg2KImagePlugin.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/PIL/__pycache__/Jpeg2KImagePlugin.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/__pycache__/Jpeg2KImagePlugin.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/PIL/__pycache__/Jpeg2KImagePlugin.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/PIL/__pycache__/JpegImagePlugin.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/PIL/__pycache__/JpegImagePlugin.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/__pycache__/JpegImagePlugin.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/PIL/__pycache__/JpegImagePlugin.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/PIL/__pycache__/JpegPresets.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/PIL/__pycache__/JpegPresets.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/__pycache__/JpegPresets.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/PIL/__pycache__/JpegPresets.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/PIL/__pycache__/McIdasImagePlugin.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/PIL/__pycache__/McIdasImagePlugin.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/__pycache__/McIdasImagePlugin.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/PIL/__pycache__/McIdasImagePlugin.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/PIL/__pycache__/MicImagePlugin.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/PIL/__pycache__/MicImagePlugin.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/__pycache__/MicImagePlugin.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/PIL/__pycache__/MicImagePlugin.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/PIL/__pycache__/MpegImagePlugin.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/PIL/__pycache__/MpegImagePlugin.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/__pycache__/MpegImagePlugin.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/PIL/__pycache__/MpegImagePlugin.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/PIL/__pycache__/MpoImagePlugin.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/PIL/__pycache__/MpoImagePlugin.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/__pycache__/MpoImagePlugin.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/PIL/__pycache__/MpoImagePlugin.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/PIL/__pycache__/MspImagePlugin.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/PIL/__pycache__/MspImagePlugin.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/__pycache__/MspImagePlugin.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/PIL/__pycache__/MspImagePlugin.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/PIL/__pycache__/PSDraw.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/PIL/__pycache__/PSDraw.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/__pycache__/PSDraw.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/PIL/__pycache__/PSDraw.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/PIL/__pycache__/PaletteFile.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/PIL/__pycache__/PaletteFile.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/__pycache__/PaletteFile.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/PIL/__pycache__/PaletteFile.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/PIL/__pycache__/PalmImagePlugin.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/PIL/__pycache__/PalmImagePlugin.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/__pycache__/PalmImagePlugin.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/PIL/__pycache__/PalmImagePlugin.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/PIL/__pycache__/PcdImagePlugin.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/PIL/__pycache__/PcdImagePlugin.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/__pycache__/PcdImagePlugin.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/PIL/__pycache__/PcdImagePlugin.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/PIL/__pycache__/PcfFontFile.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/PIL/__pycache__/PcfFontFile.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/__pycache__/PcfFontFile.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/PIL/__pycache__/PcfFontFile.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/PIL/__pycache__/PcxImagePlugin.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/PIL/__pycache__/PcxImagePlugin.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/__pycache__/PcxImagePlugin.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/PIL/__pycache__/PcxImagePlugin.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/PIL/__pycache__/PdfImagePlugin.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/PIL/__pycache__/PdfImagePlugin.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/__pycache__/PdfImagePlugin.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/PIL/__pycache__/PdfImagePlugin.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/PIL/__pycache__/PdfParser.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/PIL/__pycache__/PdfParser.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/__pycache__/PdfParser.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/PIL/__pycache__/PdfParser.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/PIL/__pycache__/PixarImagePlugin.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/PIL/__pycache__/PixarImagePlugin.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/__pycache__/PixarImagePlugin.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/PIL/__pycache__/PixarImagePlugin.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/PIL/__pycache__/PngImagePlugin.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/PIL/__pycache__/PngImagePlugin.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/__pycache__/PngImagePlugin.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/PIL/__pycache__/PngImagePlugin.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/PIL/__pycache__/PpmImagePlugin.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/PIL/__pycache__/PpmImagePlugin.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/__pycache__/PpmImagePlugin.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/PIL/__pycache__/PpmImagePlugin.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/PIL/__pycache__/PsdImagePlugin.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/PIL/__pycache__/PsdImagePlugin.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/__pycache__/PsdImagePlugin.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/PIL/__pycache__/PsdImagePlugin.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/PIL/__pycache__/PyAccess.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/PIL/__pycache__/PyAccess.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/__pycache__/PyAccess.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/PIL/__pycache__/PyAccess.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/PIL/__pycache__/SgiImagePlugin.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/PIL/__pycache__/SgiImagePlugin.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/__pycache__/SgiImagePlugin.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/PIL/__pycache__/SgiImagePlugin.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/PIL/__pycache__/SpiderImagePlugin.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/PIL/__pycache__/SpiderImagePlugin.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/__pycache__/SpiderImagePlugin.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/PIL/__pycache__/SpiderImagePlugin.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/PIL/__pycache__/SunImagePlugin.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/PIL/__pycache__/SunImagePlugin.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/__pycache__/SunImagePlugin.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/PIL/__pycache__/SunImagePlugin.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/PIL/__pycache__/TarIO.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/PIL/__pycache__/TarIO.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/__pycache__/TarIO.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/PIL/__pycache__/TarIO.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/PIL/__pycache__/TgaImagePlugin.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/PIL/__pycache__/TgaImagePlugin.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/__pycache__/TgaImagePlugin.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/PIL/__pycache__/TgaImagePlugin.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/PIL/__pycache__/TiffImagePlugin.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/PIL/__pycache__/TiffImagePlugin.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/__pycache__/TiffImagePlugin.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/PIL/__pycache__/TiffImagePlugin.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/PIL/__pycache__/TiffTags.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/PIL/__pycache__/TiffTags.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/__pycache__/TiffTags.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/PIL/__pycache__/TiffTags.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/PIL/__pycache__/WalImageFile.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/PIL/__pycache__/WalImageFile.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/__pycache__/WalImageFile.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/PIL/__pycache__/WalImageFile.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/PIL/__pycache__/WebPImagePlugin.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/PIL/__pycache__/WebPImagePlugin.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/__pycache__/WebPImagePlugin.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/PIL/__pycache__/WebPImagePlugin.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/PIL/__pycache__/WmfImagePlugin.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/PIL/__pycache__/WmfImagePlugin.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/__pycache__/WmfImagePlugin.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/PIL/__pycache__/WmfImagePlugin.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/PIL/__pycache__/XVThumbImagePlugin.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/PIL/__pycache__/XVThumbImagePlugin.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/__pycache__/XVThumbImagePlugin.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/PIL/__pycache__/XVThumbImagePlugin.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/PIL/__pycache__/XbmImagePlugin.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/PIL/__pycache__/XbmImagePlugin.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/__pycache__/XbmImagePlugin.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/PIL/__pycache__/XbmImagePlugin.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/PIL/__pycache__/XpmImagePlugin.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/PIL/__pycache__/XpmImagePlugin.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/__pycache__/XpmImagePlugin.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/PIL/__pycache__/XpmImagePlugin.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/PIL/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/PIL/__pycache__/__init__.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/__pycache__/__init__.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/PIL/__pycache__/__init__.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/PIL/__pycache__/__main__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/PIL/__pycache__/__main__.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/__pycache__/__main__.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/PIL/__pycache__/__main__.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/PIL/__pycache__/_binary.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/PIL/__pycache__/_binary.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/__pycache__/_binary.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/PIL/__pycache__/_binary.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/PIL/__pycache__/_tkinter_finder.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/PIL/__pycache__/_tkinter_finder.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/__pycache__/_tkinter_finder.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/PIL/__pycache__/_tkinter_finder.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/PIL/__pycache__/_util.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/PIL/__pycache__/_util.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/__pycache__/_util.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/PIL/__pycache__/_util.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/PIL/__pycache__/_version.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/PIL/__pycache__/_version.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/__pycache__/_version.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/PIL/__pycache__/_version.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/PIL/__pycache__/features.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/PIL/__pycache__/features.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/__pycache__/features.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/PIL/__pycache__/features.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/PIL/_binary.py b/venv.bak/lib/python3.7/site-packages/PIL/_binary.py similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/_binary.py rename to venv.bak/lib/python3.7/site-packages/PIL/_binary.py diff --git a/venv/lib/python3.7/site-packages/PIL/_imaging.cpython-37m-darwin.so b/venv.bak/lib/python3.7/site-packages/PIL/_imaging.cpython-37m-darwin.so similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/_imaging.cpython-37m-darwin.so rename to venv.bak/lib/python3.7/site-packages/PIL/_imaging.cpython-37m-darwin.so diff --git a/venv/lib/python3.7/site-packages/PIL/_imagingcms.cpython-37m-darwin.so b/venv.bak/lib/python3.7/site-packages/PIL/_imagingcms.cpython-37m-darwin.so similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/_imagingcms.cpython-37m-darwin.so rename to venv.bak/lib/python3.7/site-packages/PIL/_imagingcms.cpython-37m-darwin.so diff --git a/venv/lib/python3.7/site-packages/PIL/_imagingft.cpython-37m-darwin.so b/venv.bak/lib/python3.7/site-packages/PIL/_imagingft.cpython-37m-darwin.so similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/_imagingft.cpython-37m-darwin.so rename to venv.bak/lib/python3.7/site-packages/PIL/_imagingft.cpython-37m-darwin.so diff --git a/venv/lib/python3.7/site-packages/PIL/_imagingmath.cpython-37m-darwin.so b/venv.bak/lib/python3.7/site-packages/PIL/_imagingmath.cpython-37m-darwin.so similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/_imagingmath.cpython-37m-darwin.so rename to venv.bak/lib/python3.7/site-packages/PIL/_imagingmath.cpython-37m-darwin.so diff --git a/venv/lib/python3.7/site-packages/PIL/_imagingmorph.cpython-37m-darwin.so b/venv.bak/lib/python3.7/site-packages/PIL/_imagingmorph.cpython-37m-darwin.so similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/_imagingmorph.cpython-37m-darwin.so rename to venv.bak/lib/python3.7/site-packages/PIL/_imagingmorph.cpython-37m-darwin.so diff --git a/venv/lib/python3.7/site-packages/PIL/_imagingtk.cpython-37m-darwin.so b/venv.bak/lib/python3.7/site-packages/PIL/_imagingtk.cpython-37m-darwin.so similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/_imagingtk.cpython-37m-darwin.so rename to venv.bak/lib/python3.7/site-packages/PIL/_imagingtk.cpython-37m-darwin.so diff --git a/venv/lib/python3.7/site-packages/PIL/_tkinter_finder.py b/venv.bak/lib/python3.7/site-packages/PIL/_tkinter_finder.py similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/_tkinter_finder.py rename to venv.bak/lib/python3.7/site-packages/PIL/_tkinter_finder.py diff --git a/venv/lib/python3.7/site-packages/PIL/_util.py b/venv.bak/lib/python3.7/site-packages/PIL/_util.py similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/_util.py rename to venv.bak/lib/python3.7/site-packages/PIL/_util.py diff --git a/venv/lib/python3.7/site-packages/PIL/_version.py b/venv.bak/lib/python3.7/site-packages/PIL/_version.py similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/_version.py rename to venv.bak/lib/python3.7/site-packages/PIL/_version.py diff --git a/venv/lib/python3.7/site-packages/PIL/_webp.cpython-37m-darwin.so b/venv.bak/lib/python3.7/site-packages/PIL/_webp.cpython-37m-darwin.so similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/_webp.cpython-37m-darwin.so rename to venv.bak/lib/python3.7/site-packages/PIL/_webp.cpython-37m-darwin.so diff --git a/venv/lib/python3.7/site-packages/PIL/features.py b/venv.bak/lib/python3.7/site-packages/PIL/features.py similarity index 100% rename from venv/lib/python3.7/site-packages/PIL/features.py rename to venv.bak/lib/python3.7/site-packages/PIL/features.py diff --git a/venv/lib/python3.7/site-packages/Pillow-7.1.1.dist-info/INSTALLER b/venv.bak/lib/python3.7/site-packages/Pillow-7.1.1.dist-info/INSTALLER similarity index 100% rename from venv/lib/python3.7/site-packages/Pillow-7.1.1.dist-info/INSTALLER rename to venv.bak/lib/python3.7/site-packages/Pillow-7.1.1.dist-info/INSTALLER diff --git a/venv/lib/python3.7/site-packages/Pillow-7.1.1.dist-info/LICENSE b/venv.bak/lib/python3.7/site-packages/Pillow-7.1.1.dist-info/LICENSE similarity index 100% rename from venv/lib/python3.7/site-packages/Pillow-7.1.1.dist-info/LICENSE rename to venv.bak/lib/python3.7/site-packages/Pillow-7.1.1.dist-info/LICENSE diff --git a/venv/lib/python3.7/site-packages/Pillow-7.1.1.dist-info/METADATA b/venv.bak/lib/python3.7/site-packages/Pillow-7.1.1.dist-info/METADATA similarity index 100% rename from venv/lib/python3.7/site-packages/Pillow-7.1.1.dist-info/METADATA rename to venv.bak/lib/python3.7/site-packages/Pillow-7.1.1.dist-info/METADATA diff --git a/venv/lib/python3.7/site-packages/Pillow-7.1.1.dist-info/RECORD b/venv.bak/lib/python3.7/site-packages/Pillow-7.1.1.dist-info/RECORD similarity index 100% rename from venv/lib/python3.7/site-packages/Pillow-7.1.1.dist-info/RECORD rename to venv.bak/lib/python3.7/site-packages/Pillow-7.1.1.dist-info/RECORD diff --git a/venv/lib/python3.7/site-packages/Pillow-7.1.1.dist-info/WHEEL b/venv.bak/lib/python3.7/site-packages/Pillow-7.1.1.dist-info/WHEEL similarity index 100% rename from venv/lib/python3.7/site-packages/Pillow-7.1.1.dist-info/WHEEL rename to venv.bak/lib/python3.7/site-packages/Pillow-7.1.1.dist-info/WHEEL diff --git a/venv/lib/python3.7/site-packages/Pillow-7.1.1.dist-info/top_level.txt b/venv.bak/lib/python3.7/site-packages/Pillow-7.1.1.dist-info/top_level.txt similarity index 100% rename from venv/lib/python3.7/site-packages/Pillow-7.1.1.dist-info/top_level.txt rename to venv.bak/lib/python3.7/site-packages/Pillow-7.1.1.dist-info/top_level.txt diff --git a/venv/lib/python3.7/site-packages/Pillow-7.1.1.dist-info/zip-safe b/venv.bak/lib/python3.7/site-packages/Pillow-7.1.1.dist-info/zip-safe similarity index 100% rename from venv/lib/python3.7/site-packages/Pillow-7.1.1.dist-info/zip-safe rename to venv.bak/lib/python3.7/site-packages/Pillow-7.1.1.dist-info/zip-safe diff --git a/venv/lib/python3.7/site-packages/__pycache__/cycler.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/__pycache__/cycler.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/__pycache__/cycler.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/__pycache__/cycler.cpython-37.pyc diff --git a/venv.bak/lib/python3.7/site-packages/__pycache__/easy_install.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/__pycache__/easy_install.cpython-37.pyc new file mode 100644 index 0000000..3d40bde Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/__pycache__/easy_install.cpython-37.pyc differ diff --git a/venv/lib/python3.7/site-packages/__pycache__/pylab.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/__pycache__/pylab.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/__pycache__/pylab.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/__pycache__/pylab.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/__pycache__/pyparsing.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/__pycache__/pyparsing.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/__pycache__/pyparsing.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/__pycache__/pyparsing.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/__pycache__/six.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/__pycache__/six.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/__pycache__/six.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/__pycache__/six.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/__pycache__/termcolor.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/__pycache__/termcolor.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/__pycache__/termcolor.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/__pycache__/termcolor.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/beautifulsoup4-4.9.0.dist-info/INSTALLER b/venv.bak/lib/python3.7/site-packages/beautifulsoup4-4.9.0.dist-info/INSTALLER similarity index 100% rename from venv/lib/python3.7/site-packages/beautifulsoup4-4.9.0.dist-info/INSTALLER rename to venv.bak/lib/python3.7/site-packages/beautifulsoup4-4.9.0.dist-info/INSTALLER diff --git a/venv/lib/python3.7/site-packages/beautifulsoup4-4.9.0.dist-info/METADATA b/venv.bak/lib/python3.7/site-packages/beautifulsoup4-4.9.0.dist-info/METADATA similarity index 100% rename from venv/lib/python3.7/site-packages/beautifulsoup4-4.9.0.dist-info/METADATA rename to venv.bak/lib/python3.7/site-packages/beautifulsoup4-4.9.0.dist-info/METADATA diff --git a/venv/lib/python3.7/site-packages/beautifulsoup4-4.9.0.dist-info/RECORD b/venv.bak/lib/python3.7/site-packages/beautifulsoup4-4.9.0.dist-info/RECORD similarity index 100% rename from venv/lib/python3.7/site-packages/beautifulsoup4-4.9.0.dist-info/RECORD rename to venv.bak/lib/python3.7/site-packages/beautifulsoup4-4.9.0.dist-info/RECORD diff --git a/venv/lib/python3.7/site-packages/beautifulsoup4-4.9.0.dist-info/WHEEL b/venv.bak/lib/python3.7/site-packages/beautifulsoup4-4.9.0.dist-info/WHEEL similarity index 100% rename from venv/lib/python3.7/site-packages/beautifulsoup4-4.9.0.dist-info/WHEEL rename to venv.bak/lib/python3.7/site-packages/beautifulsoup4-4.9.0.dist-info/WHEEL diff --git a/venv/lib/python3.7/site-packages/beautifulsoup4-4.9.0.dist-info/top_level.txt b/venv.bak/lib/python3.7/site-packages/beautifulsoup4-4.9.0.dist-info/top_level.txt similarity index 100% rename from venv/lib/python3.7/site-packages/beautifulsoup4-4.9.0.dist-info/top_level.txt rename to venv.bak/lib/python3.7/site-packages/beautifulsoup4-4.9.0.dist-info/top_level.txt diff --git a/venv/lib/python3.7/site-packages/bs4-0.0.1.dist-info/INSTALLER b/venv.bak/lib/python3.7/site-packages/bs4-0.0.1.dist-info/INSTALLER similarity index 100% rename from venv/lib/python3.7/site-packages/bs4-0.0.1.dist-info/INSTALLER rename to venv.bak/lib/python3.7/site-packages/bs4-0.0.1.dist-info/INSTALLER diff --git a/venv/lib/python3.7/site-packages/bs4-0.0.1.dist-info/METADATA b/venv.bak/lib/python3.7/site-packages/bs4-0.0.1.dist-info/METADATA similarity index 100% rename from venv/lib/python3.7/site-packages/bs4-0.0.1.dist-info/METADATA rename to venv.bak/lib/python3.7/site-packages/bs4-0.0.1.dist-info/METADATA diff --git a/venv/lib/python3.7/site-packages/bs4-0.0.1.dist-info/RECORD b/venv.bak/lib/python3.7/site-packages/bs4-0.0.1.dist-info/RECORD similarity index 100% rename from venv/lib/python3.7/site-packages/bs4-0.0.1.dist-info/RECORD rename to venv.bak/lib/python3.7/site-packages/bs4-0.0.1.dist-info/RECORD diff --git a/venv/lib/python3.7/site-packages/bs4-0.0.1.dist-info/WHEEL b/venv.bak/lib/python3.7/site-packages/bs4-0.0.1.dist-info/WHEEL similarity index 100% rename from venv/lib/python3.7/site-packages/bs4-0.0.1.dist-info/WHEEL rename to venv.bak/lib/python3.7/site-packages/bs4-0.0.1.dist-info/WHEEL diff --git a/venv/lib/python3.7/site-packages/bs4-0.0.1.dist-info/top_level.txt b/venv.bak/lib/python3.7/site-packages/bs4-0.0.1.dist-info/top_level.txt similarity index 100% rename from venv/lib/python3.7/site-packages/bs4-0.0.1.dist-info/top_level.txt rename to venv.bak/lib/python3.7/site-packages/bs4-0.0.1.dist-info/top_level.txt diff --git a/venv/lib/python3.7/site-packages/bs4/__init__.py b/venv.bak/lib/python3.7/site-packages/bs4/__init__.py similarity index 100% rename from venv/lib/python3.7/site-packages/bs4/__init__.py rename to venv.bak/lib/python3.7/site-packages/bs4/__init__.py diff --git a/venv/lib/python3.7/site-packages/bs4/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/bs4/__pycache__/__init__.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/bs4/__pycache__/__init__.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/bs4/__pycache__/__init__.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/bs4/__pycache__/dammit.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/bs4/__pycache__/dammit.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/bs4/__pycache__/dammit.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/bs4/__pycache__/dammit.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/bs4/__pycache__/diagnose.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/bs4/__pycache__/diagnose.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/bs4/__pycache__/diagnose.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/bs4/__pycache__/diagnose.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/bs4/__pycache__/element.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/bs4/__pycache__/element.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/bs4/__pycache__/element.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/bs4/__pycache__/element.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/bs4/__pycache__/formatter.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/bs4/__pycache__/formatter.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/bs4/__pycache__/formatter.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/bs4/__pycache__/formatter.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/bs4/__pycache__/testing.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/bs4/__pycache__/testing.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/bs4/__pycache__/testing.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/bs4/__pycache__/testing.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/bs4/builder/__init__.py b/venv.bak/lib/python3.7/site-packages/bs4/builder/__init__.py similarity index 100% rename from venv/lib/python3.7/site-packages/bs4/builder/__init__.py rename to venv.bak/lib/python3.7/site-packages/bs4/builder/__init__.py diff --git a/venv/lib/python3.7/site-packages/bs4/builder/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/bs4/builder/__pycache__/__init__.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/bs4/builder/__pycache__/__init__.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/bs4/builder/__pycache__/__init__.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/bs4/builder/__pycache__/_html5lib.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/bs4/builder/__pycache__/_html5lib.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/bs4/builder/__pycache__/_html5lib.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/bs4/builder/__pycache__/_html5lib.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/bs4/builder/__pycache__/_htmlparser.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/bs4/builder/__pycache__/_htmlparser.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/bs4/builder/__pycache__/_htmlparser.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/bs4/builder/__pycache__/_htmlparser.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/bs4/builder/__pycache__/_lxml.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/bs4/builder/__pycache__/_lxml.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/bs4/builder/__pycache__/_lxml.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/bs4/builder/__pycache__/_lxml.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/bs4/builder/_html5lib.py b/venv.bak/lib/python3.7/site-packages/bs4/builder/_html5lib.py similarity index 100% rename from venv/lib/python3.7/site-packages/bs4/builder/_html5lib.py rename to venv.bak/lib/python3.7/site-packages/bs4/builder/_html5lib.py diff --git a/venv/lib/python3.7/site-packages/bs4/builder/_htmlparser.py b/venv.bak/lib/python3.7/site-packages/bs4/builder/_htmlparser.py similarity index 100% rename from venv/lib/python3.7/site-packages/bs4/builder/_htmlparser.py rename to venv.bak/lib/python3.7/site-packages/bs4/builder/_htmlparser.py diff --git a/venv/lib/python3.7/site-packages/bs4/builder/_lxml.py b/venv.bak/lib/python3.7/site-packages/bs4/builder/_lxml.py similarity index 100% rename from venv/lib/python3.7/site-packages/bs4/builder/_lxml.py rename to venv.bak/lib/python3.7/site-packages/bs4/builder/_lxml.py diff --git a/venv/lib/python3.7/site-packages/bs4/dammit.py b/venv.bak/lib/python3.7/site-packages/bs4/dammit.py similarity index 100% rename from venv/lib/python3.7/site-packages/bs4/dammit.py rename to venv.bak/lib/python3.7/site-packages/bs4/dammit.py diff --git a/venv/lib/python3.7/site-packages/bs4/diagnose.py b/venv.bak/lib/python3.7/site-packages/bs4/diagnose.py similarity index 100% rename from venv/lib/python3.7/site-packages/bs4/diagnose.py rename to venv.bak/lib/python3.7/site-packages/bs4/diagnose.py diff --git a/venv/lib/python3.7/site-packages/bs4/element.py b/venv.bak/lib/python3.7/site-packages/bs4/element.py similarity index 100% rename from venv/lib/python3.7/site-packages/bs4/element.py rename to venv.bak/lib/python3.7/site-packages/bs4/element.py diff --git a/venv/lib/python3.7/site-packages/bs4/formatter.py b/venv.bak/lib/python3.7/site-packages/bs4/formatter.py similarity index 100% rename from venv/lib/python3.7/site-packages/bs4/formatter.py rename to venv.bak/lib/python3.7/site-packages/bs4/formatter.py diff --git a/venv/lib/python3.7/site-packages/bs4/testing.py b/venv.bak/lib/python3.7/site-packages/bs4/testing.py similarity index 100% rename from venv/lib/python3.7/site-packages/bs4/testing.py rename to venv.bak/lib/python3.7/site-packages/bs4/testing.py diff --git a/venv/lib/python3.7/site-packages/bs4/tests/__init__.py b/venv.bak/lib/python3.7/site-packages/bs4/tests/__init__.py similarity index 100% rename from venv/lib/python3.7/site-packages/bs4/tests/__init__.py rename to venv.bak/lib/python3.7/site-packages/bs4/tests/__init__.py diff --git a/venv/lib/python3.7/site-packages/bs4/tests/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/bs4/tests/__pycache__/__init__.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/bs4/tests/__pycache__/__init__.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/bs4/tests/__pycache__/__init__.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/bs4/tests/__pycache__/test_builder_registry.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/bs4/tests/__pycache__/test_builder_registry.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/bs4/tests/__pycache__/test_builder_registry.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/bs4/tests/__pycache__/test_builder_registry.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/bs4/tests/__pycache__/test_docs.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/bs4/tests/__pycache__/test_docs.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/bs4/tests/__pycache__/test_docs.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/bs4/tests/__pycache__/test_docs.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/bs4/tests/__pycache__/test_html5lib.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/bs4/tests/__pycache__/test_html5lib.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/bs4/tests/__pycache__/test_html5lib.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/bs4/tests/__pycache__/test_html5lib.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/bs4/tests/__pycache__/test_htmlparser.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/bs4/tests/__pycache__/test_htmlparser.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/bs4/tests/__pycache__/test_htmlparser.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/bs4/tests/__pycache__/test_htmlparser.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/bs4/tests/__pycache__/test_lxml.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/bs4/tests/__pycache__/test_lxml.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/bs4/tests/__pycache__/test_lxml.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/bs4/tests/__pycache__/test_lxml.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/bs4/tests/__pycache__/test_soup.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/bs4/tests/__pycache__/test_soup.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/bs4/tests/__pycache__/test_soup.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/bs4/tests/__pycache__/test_soup.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/bs4/tests/__pycache__/test_tree.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/bs4/tests/__pycache__/test_tree.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/bs4/tests/__pycache__/test_tree.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/bs4/tests/__pycache__/test_tree.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/bs4/tests/test_builder_registry.py b/venv.bak/lib/python3.7/site-packages/bs4/tests/test_builder_registry.py similarity index 100% rename from venv/lib/python3.7/site-packages/bs4/tests/test_builder_registry.py rename to venv.bak/lib/python3.7/site-packages/bs4/tests/test_builder_registry.py diff --git a/venv/lib/python3.7/site-packages/bs4/tests/test_docs.py b/venv.bak/lib/python3.7/site-packages/bs4/tests/test_docs.py similarity index 100% rename from venv/lib/python3.7/site-packages/bs4/tests/test_docs.py rename to venv.bak/lib/python3.7/site-packages/bs4/tests/test_docs.py diff --git a/venv/lib/python3.7/site-packages/bs4/tests/test_html5lib.py b/venv.bak/lib/python3.7/site-packages/bs4/tests/test_html5lib.py similarity index 100% rename from venv/lib/python3.7/site-packages/bs4/tests/test_html5lib.py rename to venv.bak/lib/python3.7/site-packages/bs4/tests/test_html5lib.py diff --git a/venv/lib/python3.7/site-packages/bs4/tests/test_htmlparser.py b/venv.bak/lib/python3.7/site-packages/bs4/tests/test_htmlparser.py similarity index 100% rename from venv/lib/python3.7/site-packages/bs4/tests/test_htmlparser.py rename to venv.bak/lib/python3.7/site-packages/bs4/tests/test_htmlparser.py diff --git a/venv/lib/python3.7/site-packages/bs4/tests/test_lxml.py b/venv.bak/lib/python3.7/site-packages/bs4/tests/test_lxml.py similarity index 100% rename from venv/lib/python3.7/site-packages/bs4/tests/test_lxml.py rename to venv.bak/lib/python3.7/site-packages/bs4/tests/test_lxml.py diff --git a/venv/lib/python3.7/site-packages/bs4/tests/test_soup.py b/venv.bak/lib/python3.7/site-packages/bs4/tests/test_soup.py similarity index 100% rename from venv/lib/python3.7/site-packages/bs4/tests/test_soup.py rename to venv.bak/lib/python3.7/site-packages/bs4/tests/test_soup.py diff --git a/venv/lib/python3.7/site-packages/bs4/tests/test_tree.py b/venv.bak/lib/python3.7/site-packages/bs4/tests/test_tree.py similarity index 100% rename from venv/lib/python3.7/site-packages/bs4/tests/test_tree.py rename to venv.bak/lib/python3.7/site-packages/bs4/tests/test_tree.py diff --git a/venv/lib/python3.7/site-packages/cycler-0.10.0.dist-info/DESCRIPTION.rst b/venv.bak/lib/python3.7/site-packages/cycler-0.10.0.dist-info/DESCRIPTION.rst similarity index 100% rename from venv/lib/python3.7/site-packages/cycler-0.10.0.dist-info/DESCRIPTION.rst rename to venv.bak/lib/python3.7/site-packages/cycler-0.10.0.dist-info/DESCRIPTION.rst diff --git a/venv/lib/python3.7/site-packages/cycler-0.10.0.dist-info/INSTALLER b/venv.bak/lib/python3.7/site-packages/cycler-0.10.0.dist-info/INSTALLER similarity index 100% rename from venv/lib/python3.7/site-packages/cycler-0.10.0.dist-info/INSTALLER rename to venv.bak/lib/python3.7/site-packages/cycler-0.10.0.dist-info/INSTALLER diff --git a/venv/lib/python3.7/site-packages/cycler-0.10.0.dist-info/METADATA b/venv.bak/lib/python3.7/site-packages/cycler-0.10.0.dist-info/METADATA similarity index 100% rename from venv/lib/python3.7/site-packages/cycler-0.10.0.dist-info/METADATA rename to venv.bak/lib/python3.7/site-packages/cycler-0.10.0.dist-info/METADATA diff --git a/venv/lib/python3.7/site-packages/cycler-0.10.0.dist-info/RECORD b/venv.bak/lib/python3.7/site-packages/cycler-0.10.0.dist-info/RECORD similarity index 100% rename from venv/lib/python3.7/site-packages/cycler-0.10.0.dist-info/RECORD rename to venv.bak/lib/python3.7/site-packages/cycler-0.10.0.dist-info/RECORD diff --git a/venv/lib/python3.7/site-packages/cycler-0.10.0.dist-info/WHEEL b/venv.bak/lib/python3.7/site-packages/cycler-0.10.0.dist-info/WHEEL similarity index 100% rename from venv/lib/python3.7/site-packages/cycler-0.10.0.dist-info/WHEEL rename to venv.bak/lib/python3.7/site-packages/cycler-0.10.0.dist-info/WHEEL diff --git a/venv/lib/python3.7/site-packages/cycler-0.10.0.dist-info/metadata.json b/venv.bak/lib/python3.7/site-packages/cycler-0.10.0.dist-info/metadata.json similarity index 100% rename from venv/lib/python3.7/site-packages/cycler-0.10.0.dist-info/metadata.json rename to venv.bak/lib/python3.7/site-packages/cycler-0.10.0.dist-info/metadata.json diff --git a/venv/lib/python3.7/site-packages/cycler-0.10.0.dist-info/top_level.txt b/venv.bak/lib/python3.7/site-packages/cycler-0.10.0.dist-info/top_level.txt similarity index 100% rename from venv/lib/python3.7/site-packages/cycler-0.10.0.dist-info/top_level.txt rename to venv.bak/lib/python3.7/site-packages/cycler-0.10.0.dist-info/top_level.txt diff --git a/venv/lib/python3.7/site-packages/cycler.py b/venv.bak/lib/python3.7/site-packages/cycler.py similarity index 100% rename from venv/lib/python3.7/site-packages/cycler.py rename to venv.bak/lib/python3.7/site-packages/cycler.py diff --git a/venv/lib/python3.7/site-packages/dateutil/__init__.py b/venv.bak/lib/python3.7/site-packages/dateutil/__init__.py similarity index 100% rename from venv/lib/python3.7/site-packages/dateutil/__init__.py rename to venv.bak/lib/python3.7/site-packages/dateutil/__init__.py diff --git a/venv/lib/python3.7/site-packages/dateutil/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/dateutil/__pycache__/__init__.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/dateutil/__pycache__/__init__.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/dateutil/__pycache__/__init__.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/dateutil/__pycache__/_common.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/dateutil/__pycache__/_common.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/dateutil/__pycache__/_common.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/dateutil/__pycache__/_common.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/dateutil/__pycache__/_version.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/dateutil/__pycache__/_version.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/dateutil/__pycache__/_version.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/dateutil/__pycache__/_version.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/dateutil/__pycache__/easter.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/dateutil/__pycache__/easter.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/dateutil/__pycache__/easter.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/dateutil/__pycache__/easter.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/dateutil/__pycache__/relativedelta.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/dateutil/__pycache__/relativedelta.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/dateutil/__pycache__/relativedelta.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/dateutil/__pycache__/relativedelta.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/dateutil/__pycache__/rrule.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/dateutil/__pycache__/rrule.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/dateutil/__pycache__/rrule.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/dateutil/__pycache__/rrule.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/dateutil/__pycache__/tzwin.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/dateutil/__pycache__/tzwin.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/dateutil/__pycache__/tzwin.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/dateutil/__pycache__/tzwin.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/dateutil/__pycache__/utils.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/dateutil/__pycache__/utils.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/dateutil/__pycache__/utils.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/dateutil/__pycache__/utils.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/dateutil/_common.py b/venv.bak/lib/python3.7/site-packages/dateutil/_common.py similarity index 100% rename from venv/lib/python3.7/site-packages/dateutil/_common.py rename to venv.bak/lib/python3.7/site-packages/dateutil/_common.py diff --git a/venv/lib/python3.7/site-packages/dateutil/_version.py b/venv.bak/lib/python3.7/site-packages/dateutil/_version.py similarity index 100% rename from venv/lib/python3.7/site-packages/dateutil/_version.py rename to venv.bak/lib/python3.7/site-packages/dateutil/_version.py diff --git a/venv/lib/python3.7/site-packages/dateutil/easter.py b/venv.bak/lib/python3.7/site-packages/dateutil/easter.py similarity index 100% rename from venv/lib/python3.7/site-packages/dateutil/easter.py rename to venv.bak/lib/python3.7/site-packages/dateutil/easter.py diff --git a/venv/lib/python3.7/site-packages/dateutil/parser/__init__.py b/venv.bak/lib/python3.7/site-packages/dateutil/parser/__init__.py similarity index 100% rename from venv/lib/python3.7/site-packages/dateutil/parser/__init__.py rename to venv.bak/lib/python3.7/site-packages/dateutil/parser/__init__.py diff --git a/venv/lib/python3.7/site-packages/dateutil/parser/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/dateutil/parser/__pycache__/__init__.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/dateutil/parser/__pycache__/__init__.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/dateutil/parser/__pycache__/__init__.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/dateutil/parser/__pycache__/_parser.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/dateutil/parser/__pycache__/_parser.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/dateutil/parser/__pycache__/_parser.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/dateutil/parser/__pycache__/_parser.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/dateutil/parser/__pycache__/isoparser.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/dateutil/parser/__pycache__/isoparser.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/dateutil/parser/__pycache__/isoparser.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/dateutil/parser/__pycache__/isoparser.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/dateutil/parser/_parser.py b/venv.bak/lib/python3.7/site-packages/dateutil/parser/_parser.py similarity index 100% rename from venv/lib/python3.7/site-packages/dateutil/parser/_parser.py rename to venv.bak/lib/python3.7/site-packages/dateutil/parser/_parser.py diff --git a/venv/lib/python3.7/site-packages/dateutil/parser/isoparser.py b/venv.bak/lib/python3.7/site-packages/dateutil/parser/isoparser.py similarity index 100% rename from venv/lib/python3.7/site-packages/dateutil/parser/isoparser.py rename to venv.bak/lib/python3.7/site-packages/dateutil/parser/isoparser.py diff --git a/venv/lib/python3.7/site-packages/dateutil/relativedelta.py b/venv.bak/lib/python3.7/site-packages/dateutil/relativedelta.py similarity index 100% rename from venv/lib/python3.7/site-packages/dateutil/relativedelta.py rename to venv.bak/lib/python3.7/site-packages/dateutil/relativedelta.py diff --git a/venv/lib/python3.7/site-packages/dateutil/rrule.py b/venv.bak/lib/python3.7/site-packages/dateutil/rrule.py similarity index 100% rename from venv/lib/python3.7/site-packages/dateutil/rrule.py rename to venv.bak/lib/python3.7/site-packages/dateutil/rrule.py diff --git a/venv/lib/python3.7/site-packages/dateutil/tz/__init__.py b/venv.bak/lib/python3.7/site-packages/dateutil/tz/__init__.py similarity index 100% rename from venv/lib/python3.7/site-packages/dateutil/tz/__init__.py rename to venv.bak/lib/python3.7/site-packages/dateutil/tz/__init__.py diff --git a/venv/lib/python3.7/site-packages/dateutil/tz/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/dateutil/tz/__pycache__/__init__.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/dateutil/tz/__pycache__/__init__.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/dateutil/tz/__pycache__/__init__.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/dateutil/tz/__pycache__/_common.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/dateutil/tz/__pycache__/_common.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/dateutil/tz/__pycache__/_common.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/dateutil/tz/__pycache__/_common.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/dateutil/tz/__pycache__/_factories.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/dateutil/tz/__pycache__/_factories.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/dateutil/tz/__pycache__/_factories.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/dateutil/tz/__pycache__/_factories.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/dateutil/tz/__pycache__/tz.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/dateutil/tz/__pycache__/tz.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/dateutil/tz/__pycache__/tz.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/dateutil/tz/__pycache__/tz.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/dateutil/tz/__pycache__/win.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/dateutil/tz/__pycache__/win.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/dateutil/tz/__pycache__/win.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/dateutil/tz/__pycache__/win.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/dateutil/tz/_common.py b/venv.bak/lib/python3.7/site-packages/dateutil/tz/_common.py similarity index 100% rename from venv/lib/python3.7/site-packages/dateutil/tz/_common.py rename to venv.bak/lib/python3.7/site-packages/dateutil/tz/_common.py diff --git a/venv/lib/python3.7/site-packages/dateutil/tz/_factories.py b/venv.bak/lib/python3.7/site-packages/dateutil/tz/_factories.py similarity index 100% rename from venv/lib/python3.7/site-packages/dateutil/tz/_factories.py rename to venv.bak/lib/python3.7/site-packages/dateutil/tz/_factories.py diff --git a/venv/lib/python3.7/site-packages/dateutil/tz/tz.py b/venv.bak/lib/python3.7/site-packages/dateutil/tz/tz.py similarity index 100% rename from venv/lib/python3.7/site-packages/dateutil/tz/tz.py rename to venv.bak/lib/python3.7/site-packages/dateutil/tz/tz.py diff --git a/venv/lib/python3.7/site-packages/dateutil/tz/win.py b/venv.bak/lib/python3.7/site-packages/dateutil/tz/win.py similarity index 100% rename from venv/lib/python3.7/site-packages/dateutil/tz/win.py rename to venv.bak/lib/python3.7/site-packages/dateutil/tz/win.py diff --git a/venv/lib/python3.7/site-packages/dateutil/tzwin.py b/venv.bak/lib/python3.7/site-packages/dateutil/tzwin.py similarity index 100% rename from venv/lib/python3.7/site-packages/dateutil/tzwin.py rename to venv.bak/lib/python3.7/site-packages/dateutil/tzwin.py diff --git a/venv/lib/python3.7/site-packages/dateutil/utils.py b/venv.bak/lib/python3.7/site-packages/dateutil/utils.py similarity index 100% rename from venv/lib/python3.7/site-packages/dateutil/utils.py rename to venv.bak/lib/python3.7/site-packages/dateutil/utils.py diff --git a/venv/lib/python3.7/site-packages/dateutil/zoneinfo/__init__.py b/venv.bak/lib/python3.7/site-packages/dateutil/zoneinfo/__init__.py similarity index 100% rename from venv/lib/python3.7/site-packages/dateutil/zoneinfo/__init__.py rename to venv.bak/lib/python3.7/site-packages/dateutil/zoneinfo/__init__.py diff --git a/venv/lib/python3.7/site-packages/dateutil/zoneinfo/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/dateutil/zoneinfo/__pycache__/__init__.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/dateutil/zoneinfo/__pycache__/__init__.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/dateutil/zoneinfo/__pycache__/__init__.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/dateutil/zoneinfo/__pycache__/rebuild.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/dateutil/zoneinfo/__pycache__/rebuild.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/dateutil/zoneinfo/__pycache__/rebuild.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/dateutil/zoneinfo/__pycache__/rebuild.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/dateutil/zoneinfo/dateutil-zoneinfo.tar.gz b/venv.bak/lib/python3.7/site-packages/dateutil/zoneinfo/dateutil-zoneinfo.tar.gz similarity index 100% rename from venv/lib/python3.7/site-packages/dateutil/zoneinfo/dateutil-zoneinfo.tar.gz rename to venv.bak/lib/python3.7/site-packages/dateutil/zoneinfo/dateutil-zoneinfo.tar.gz diff --git a/venv/lib/python3.7/site-packages/dateutil/zoneinfo/rebuild.py b/venv.bak/lib/python3.7/site-packages/dateutil/zoneinfo/rebuild.py similarity index 100% rename from venv/lib/python3.7/site-packages/dateutil/zoneinfo/rebuild.py rename to venv.bak/lib/python3.7/site-packages/dateutil/zoneinfo/rebuild.py diff --git a/venv/lib/python3.7/site-packages/original/easy_install.py b/venv.bak/lib/python3.7/site-packages/easy_install.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/easy_install.py rename to venv.bak/lib/python3.7/site-packages/easy_install.py diff --git a/venv/lib/python3.7/site-packages/html5lib-1.0.1.dist-info/DESCRIPTION.rst b/venv.bak/lib/python3.7/site-packages/html5lib-1.0.1.dist-info/DESCRIPTION.rst similarity index 100% rename from venv/lib/python3.7/site-packages/html5lib-1.0.1.dist-info/DESCRIPTION.rst rename to venv.bak/lib/python3.7/site-packages/html5lib-1.0.1.dist-info/DESCRIPTION.rst diff --git a/venv/lib/python3.7/site-packages/html5lib-1.0.1.dist-info/INSTALLER b/venv.bak/lib/python3.7/site-packages/html5lib-1.0.1.dist-info/INSTALLER similarity index 100% rename from venv/lib/python3.7/site-packages/html5lib-1.0.1.dist-info/INSTALLER rename to venv.bak/lib/python3.7/site-packages/html5lib-1.0.1.dist-info/INSTALLER diff --git a/venv/lib/python3.7/site-packages/html5lib-1.0.1.dist-info/LICENSE.txt b/venv.bak/lib/python3.7/site-packages/html5lib-1.0.1.dist-info/LICENSE.txt similarity index 100% rename from venv/lib/python3.7/site-packages/html5lib-1.0.1.dist-info/LICENSE.txt rename to venv.bak/lib/python3.7/site-packages/html5lib-1.0.1.dist-info/LICENSE.txt diff --git a/venv/lib/python3.7/site-packages/html5lib-1.0.1.dist-info/METADATA b/venv.bak/lib/python3.7/site-packages/html5lib-1.0.1.dist-info/METADATA similarity index 100% rename from venv/lib/python3.7/site-packages/html5lib-1.0.1.dist-info/METADATA rename to venv.bak/lib/python3.7/site-packages/html5lib-1.0.1.dist-info/METADATA diff --git a/venv/lib/python3.7/site-packages/html5lib-1.0.1.dist-info/RECORD b/venv.bak/lib/python3.7/site-packages/html5lib-1.0.1.dist-info/RECORD similarity index 100% rename from venv/lib/python3.7/site-packages/html5lib-1.0.1.dist-info/RECORD rename to venv.bak/lib/python3.7/site-packages/html5lib-1.0.1.dist-info/RECORD diff --git a/venv/lib/python3.7/site-packages/html5lib-1.0.1.dist-info/WHEEL b/venv.bak/lib/python3.7/site-packages/html5lib-1.0.1.dist-info/WHEEL similarity index 100% rename from venv/lib/python3.7/site-packages/html5lib-1.0.1.dist-info/WHEEL rename to venv.bak/lib/python3.7/site-packages/html5lib-1.0.1.dist-info/WHEEL diff --git a/venv/lib/python3.7/site-packages/html5lib-1.0.1.dist-info/metadata.json b/venv.bak/lib/python3.7/site-packages/html5lib-1.0.1.dist-info/metadata.json similarity index 100% rename from venv/lib/python3.7/site-packages/html5lib-1.0.1.dist-info/metadata.json rename to venv.bak/lib/python3.7/site-packages/html5lib-1.0.1.dist-info/metadata.json diff --git a/venv/lib/python3.7/site-packages/html5lib-1.0.1.dist-info/top_level.txt b/venv.bak/lib/python3.7/site-packages/html5lib-1.0.1.dist-info/top_level.txt similarity index 100% rename from venv/lib/python3.7/site-packages/html5lib-1.0.1.dist-info/top_level.txt rename to venv.bak/lib/python3.7/site-packages/html5lib-1.0.1.dist-info/top_level.txt diff --git a/venv/lib/python3.7/site-packages/html5lib/__init__.py b/venv.bak/lib/python3.7/site-packages/html5lib/__init__.py similarity index 100% rename from venv/lib/python3.7/site-packages/html5lib/__init__.py rename to venv.bak/lib/python3.7/site-packages/html5lib/__init__.py diff --git a/venv/lib/python3.7/site-packages/html5lib/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/html5lib/__pycache__/__init__.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/html5lib/__pycache__/__init__.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/html5lib/__pycache__/__init__.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/html5lib/__pycache__/_ihatexml.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/html5lib/__pycache__/_ihatexml.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/html5lib/__pycache__/_ihatexml.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/html5lib/__pycache__/_ihatexml.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/html5lib/__pycache__/_inputstream.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/html5lib/__pycache__/_inputstream.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/html5lib/__pycache__/_inputstream.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/html5lib/__pycache__/_inputstream.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/html5lib/__pycache__/_tokenizer.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/html5lib/__pycache__/_tokenizer.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/html5lib/__pycache__/_tokenizer.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/html5lib/__pycache__/_tokenizer.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/html5lib/__pycache__/_utils.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/html5lib/__pycache__/_utils.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/html5lib/__pycache__/_utils.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/html5lib/__pycache__/_utils.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/html5lib/__pycache__/constants.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/html5lib/__pycache__/constants.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/html5lib/__pycache__/constants.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/html5lib/__pycache__/constants.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/html5lib/__pycache__/html5parser.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/html5lib/__pycache__/html5parser.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/html5lib/__pycache__/html5parser.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/html5lib/__pycache__/html5parser.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/html5lib/__pycache__/serializer.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/html5lib/__pycache__/serializer.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/html5lib/__pycache__/serializer.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/html5lib/__pycache__/serializer.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/html5lib/_ihatexml.py b/venv.bak/lib/python3.7/site-packages/html5lib/_ihatexml.py similarity index 100% rename from venv/lib/python3.7/site-packages/html5lib/_ihatexml.py rename to venv.bak/lib/python3.7/site-packages/html5lib/_ihatexml.py diff --git a/venv/lib/python3.7/site-packages/html5lib/_inputstream.py b/venv.bak/lib/python3.7/site-packages/html5lib/_inputstream.py similarity index 100% rename from venv/lib/python3.7/site-packages/html5lib/_inputstream.py rename to venv.bak/lib/python3.7/site-packages/html5lib/_inputstream.py diff --git a/venv/lib/python3.7/site-packages/html5lib/_tokenizer.py b/venv.bak/lib/python3.7/site-packages/html5lib/_tokenizer.py similarity index 100% rename from venv/lib/python3.7/site-packages/html5lib/_tokenizer.py rename to venv.bak/lib/python3.7/site-packages/html5lib/_tokenizer.py diff --git a/venv/lib/python3.7/site-packages/html5lib/_trie/__init__.py b/venv.bak/lib/python3.7/site-packages/html5lib/_trie/__init__.py similarity index 100% rename from venv/lib/python3.7/site-packages/html5lib/_trie/__init__.py rename to venv.bak/lib/python3.7/site-packages/html5lib/_trie/__init__.py diff --git a/venv/lib/python3.7/site-packages/html5lib/_trie/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/html5lib/_trie/__pycache__/__init__.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/html5lib/_trie/__pycache__/__init__.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/html5lib/_trie/__pycache__/__init__.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/html5lib/_trie/__pycache__/_base.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/html5lib/_trie/__pycache__/_base.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/html5lib/_trie/__pycache__/_base.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/html5lib/_trie/__pycache__/_base.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/html5lib/_trie/__pycache__/datrie.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/html5lib/_trie/__pycache__/datrie.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/html5lib/_trie/__pycache__/datrie.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/html5lib/_trie/__pycache__/datrie.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/html5lib/_trie/__pycache__/py.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/html5lib/_trie/__pycache__/py.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/html5lib/_trie/__pycache__/py.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/html5lib/_trie/__pycache__/py.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/html5lib/_trie/_base.py b/venv.bak/lib/python3.7/site-packages/html5lib/_trie/_base.py similarity index 100% rename from venv/lib/python3.7/site-packages/html5lib/_trie/_base.py rename to venv.bak/lib/python3.7/site-packages/html5lib/_trie/_base.py diff --git a/venv/lib/python3.7/site-packages/html5lib/_trie/datrie.py b/venv.bak/lib/python3.7/site-packages/html5lib/_trie/datrie.py similarity index 100% rename from venv/lib/python3.7/site-packages/html5lib/_trie/datrie.py rename to venv.bak/lib/python3.7/site-packages/html5lib/_trie/datrie.py diff --git a/venv/lib/python3.7/site-packages/html5lib/_trie/py.py b/venv.bak/lib/python3.7/site-packages/html5lib/_trie/py.py similarity index 100% rename from venv/lib/python3.7/site-packages/html5lib/_trie/py.py rename to venv.bak/lib/python3.7/site-packages/html5lib/_trie/py.py diff --git a/venv/lib/python3.7/site-packages/html5lib/_utils.py b/venv.bak/lib/python3.7/site-packages/html5lib/_utils.py similarity index 100% rename from venv/lib/python3.7/site-packages/html5lib/_utils.py rename to venv.bak/lib/python3.7/site-packages/html5lib/_utils.py diff --git a/venv/lib/python3.7/site-packages/html5lib/constants.py b/venv.bak/lib/python3.7/site-packages/html5lib/constants.py similarity index 100% rename from venv/lib/python3.7/site-packages/html5lib/constants.py rename to venv.bak/lib/python3.7/site-packages/html5lib/constants.py diff --git a/venv/lib/python3.7/site-packages/html5lib/filters/__init__.py b/venv.bak/lib/python3.7/site-packages/html5lib/filters/__init__.py similarity index 100% rename from venv/lib/python3.7/site-packages/html5lib/filters/__init__.py rename to venv.bak/lib/python3.7/site-packages/html5lib/filters/__init__.py diff --git a/venv/lib/python3.7/site-packages/html5lib/filters/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/html5lib/filters/__pycache__/__init__.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/html5lib/filters/__pycache__/__init__.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/html5lib/filters/__pycache__/__init__.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/html5lib/filters/__pycache__/alphabeticalattributes.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/html5lib/filters/__pycache__/alphabeticalattributes.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/html5lib/filters/__pycache__/alphabeticalattributes.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/html5lib/filters/__pycache__/alphabeticalattributes.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/html5lib/filters/__pycache__/base.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/html5lib/filters/__pycache__/base.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/html5lib/filters/__pycache__/base.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/html5lib/filters/__pycache__/base.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/html5lib/filters/__pycache__/inject_meta_charset.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/html5lib/filters/__pycache__/inject_meta_charset.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/html5lib/filters/__pycache__/inject_meta_charset.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/html5lib/filters/__pycache__/inject_meta_charset.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/html5lib/filters/__pycache__/lint.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/html5lib/filters/__pycache__/lint.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/html5lib/filters/__pycache__/lint.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/html5lib/filters/__pycache__/lint.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/html5lib/filters/__pycache__/optionaltags.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/html5lib/filters/__pycache__/optionaltags.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/html5lib/filters/__pycache__/optionaltags.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/html5lib/filters/__pycache__/optionaltags.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/html5lib/filters/__pycache__/sanitizer.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/html5lib/filters/__pycache__/sanitizer.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/html5lib/filters/__pycache__/sanitizer.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/html5lib/filters/__pycache__/sanitizer.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/html5lib/filters/__pycache__/whitespace.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/html5lib/filters/__pycache__/whitespace.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/html5lib/filters/__pycache__/whitespace.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/html5lib/filters/__pycache__/whitespace.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/html5lib/filters/alphabeticalattributes.py b/venv.bak/lib/python3.7/site-packages/html5lib/filters/alphabeticalattributes.py similarity index 100% rename from venv/lib/python3.7/site-packages/html5lib/filters/alphabeticalattributes.py rename to venv.bak/lib/python3.7/site-packages/html5lib/filters/alphabeticalattributes.py diff --git a/venv/lib/python3.7/site-packages/html5lib/filters/base.py b/venv.bak/lib/python3.7/site-packages/html5lib/filters/base.py similarity index 100% rename from venv/lib/python3.7/site-packages/html5lib/filters/base.py rename to venv.bak/lib/python3.7/site-packages/html5lib/filters/base.py diff --git a/venv/lib/python3.7/site-packages/html5lib/filters/inject_meta_charset.py b/venv.bak/lib/python3.7/site-packages/html5lib/filters/inject_meta_charset.py similarity index 100% rename from venv/lib/python3.7/site-packages/html5lib/filters/inject_meta_charset.py rename to venv.bak/lib/python3.7/site-packages/html5lib/filters/inject_meta_charset.py diff --git a/venv/lib/python3.7/site-packages/html5lib/filters/lint.py b/venv.bak/lib/python3.7/site-packages/html5lib/filters/lint.py similarity index 100% rename from venv/lib/python3.7/site-packages/html5lib/filters/lint.py rename to venv.bak/lib/python3.7/site-packages/html5lib/filters/lint.py diff --git a/venv/lib/python3.7/site-packages/html5lib/filters/optionaltags.py b/venv.bak/lib/python3.7/site-packages/html5lib/filters/optionaltags.py similarity index 100% rename from venv/lib/python3.7/site-packages/html5lib/filters/optionaltags.py rename to venv.bak/lib/python3.7/site-packages/html5lib/filters/optionaltags.py diff --git a/venv/lib/python3.7/site-packages/html5lib/filters/sanitizer.py b/venv.bak/lib/python3.7/site-packages/html5lib/filters/sanitizer.py similarity index 100% rename from venv/lib/python3.7/site-packages/html5lib/filters/sanitizer.py rename to venv.bak/lib/python3.7/site-packages/html5lib/filters/sanitizer.py diff --git a/venv/lib/python3.7/site-packages/html5lib/filters/whitespace.py b/venv.bak/lib/python3.7/site-packages/html5lib/filters/whitespace.py similarity index 100% rename from venv/lib/python3.7/site-packages/html5lib/filters/whitespace.py rename to venv.bak/lib/python3.7/site-packages/html5lib/filters/whitespace.py diff --git a/venv/lib/python3.7/site-packages/html5lib/html5parser.py b/venv.bak/lib/python3.7/site-packages/html5lib/html5parser.py similarity index 100% rename from venv/lib/python3.7/site-packages/html5lib/html5parser.py rename to venv.bak/lib/python3.7/site-packages/html5lib/html5parser.py diff --git a/venv/lib/python3.7/site-packages/html5lib/serializer.py b/venv.bak/lib/python3.7/site-packages/html5lib/serializer.py similarity index 100% rename from venv/lib/python3.7/site-packages/html5lib/serializer.py rename to venv.bak/lib/python3.7/site-packages/html5lib/serializer.py diff --git a/venv/lib/python3.7/site-packages/html5lib/treeadapters/__init__.py b/venv.bak/lib/python3.7/site-packages/html5lib/treeadapters/__init__.py similarity index 100% rename from venv/lib/python3.7/site-packages/html5lib/treeadapters/__init__.py rename to venv.bak/lib/python3.7/site-packages/html5lib/treeadapters/__init__.py diff --git a/venv/lib/python3.7/site-packages/html5lib/treeadapters/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/html5lib/treeadapters/__pycache__/__init__.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/html5lib/treeadapters/__pycache__/__init__.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/html5lib/treeadapters/__pycache__/__init__.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/html5lib/treeadapters/__pycache__/genshi.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/html5lib/treeadapters/__pycache__/genshi.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/html5lib/treeadapters/__pycache__/genshi.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/html5lib/treeadapters/__pycache__/genshi.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/html5lib/treeadapters/__pycache__/sax.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/html5lib/treeadapters/__pycache__/sax.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/html5lib/treeadapters/__pycache__/sax.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/html5lib/treeadapters/__pycache__/sax.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/html5lib/treeadapters/genshi.py b/venv.bak/lib/python3.7/site-packages/html5lib/treeadapters/genshi.py similarity index 100% rename from venv/lib/python3.7/site-packages/html5lib/treeadapters/genshi.py rename to venv.bak/lib/python3.7/site-packages/html5lib/treeadapters/genshi.py diff --git a/venv/lib/python3.7/site-packages/html5lib/treeadapters/sax.py b/venv.bak/lib/python3.7/site-packages/html5lib/treeadapters/sax.py similarity index 100% rename from venv/lib/python3.7/site-packages/html5lib/treeadapters/sax.py rename to venv.bak/lib/python3.7/site-packages/html5lib/treeadapters/sax.py diff --git a/venv/lib/python3.7/site-packages/html5lib/treebuilders/__init__.py b/venv.bak/lib/python3.7/site-packages/html5lib/treebuilders/__init__.py similarity index 100% rename from venv/lib/python3.7/site-packages/html5lib/treebuilders/__init__.py rename to venv.bak/lib/python3.7/site-packages/html5lib/treebuilders/__init__.py diff --git a/venv/lib/python3.7/site-packages/html5lib/treebuilders/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/html5lib/treebuilders/__pycache__/__init__.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/html5lib/treebuilders/__pycache__/__init__.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/html5lib/treebuilders/__pycache__/__init__.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/html5lib/treebuilders/__pycache__/base.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/html5lib/treebuilders/__pycache__/base.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/html5lib/treebuilders/__pycache__/base.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/html5lib/treebuilders/__pycache__/base.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/html5lib/treebuilders/__pycache__/dom.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/html5lib/treebuilders/__pycache__/dom.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/html5lib/treebuilders/__pycache__/dom.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/html5lib/treebuilders/__pycache__/dom.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/html5lib/treebuilders/__pycache__/etree.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/html5lib/treebuilders/__pycache__/etree.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/html5lib/treebuilders/__pycache__/etree.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/html5lib/treebuilders/__pycache__/etree.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/html5lib/treebuilders/__pycache__/etree_lxml.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/html5lib/treebuilders/__pycache__/etree_lxml.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/html5lib/treebuilders/__pycache__/etree_lxml.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/html5lib/treebuilders/__pycache__/etree_lxml.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/html5lib/treebuilders/base.py b/venv.bak/lib/python3.7/site-packages/html5lib/treebuilders/base.py similarity index 100% rename from venv/lib/python3.7/site-packages/html5lib/treebuilders/base.py rename to venv.bak/lib/python3.7/site-packages/html5lib/treebuilders/base.py diff --git a/venv/lib/python3.7/site-packages/html5lib/treebuilders/dom.py b/venv.bak/lib/python3.7/site-packages/html5lib/treebuilders/dom.py similarity index 100% rename from venv/lib/python3.7/site-packages/html5lib/treebuilders/dom.py rename to venv.bak/lib/python3.7/site-packages/html5lib/treebuilders/dom.py diff --git a/venv/lib/python3.7/site-packages/html5lib/treebuilders/etree.py b/venv.bak/lib/python3.7/site-packages/html5lib/treebuilders/etree.py similarity index 100% rename from venv/lib/python3.7/site-packages/html5lib/treebuilders/etree.py rename to venv.bak/lib/python3.7/site-packages/html5lib/treebuilders/etree.py diff --git a/venv/lib/python3.7/site-packages/html5lib/treebuilders/etree_lxml.py b/venv.bak/lib/python3.7/site-packages/html5lib/treebuilders/etree_lxml.py similarity index 100% rename from venv/lib/python3.7/site-packages/html5lib/treebuilders/etree_lxml.py rename to venv.bak/lib/python3.7/site-packages/html5lib/treebuilders/etree_lxml.py diff --git a/venv/lib/python3.7/site-packages/html5lib/treewalkers/__init__.py b/venv.bak/lib/python3.7/site-packages/html5lib/treewalkers/__init__.py similarity index 100% rename from venv/lib/python3.7/site-packages/html5lib/treewalkers/__init__.py rename to venv.bak/lib/python3.7/site-packages/html5lib/treewalkers/__init__.py diff --git a/venv/lib/python3.7/site-packages/html5lib/treewalkers/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/html5lib/treewalkers/__pycache__/__init__.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/html5lib/treewalkers/__pycache__/__init__.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/html5lib/treewalkers/__pycache__/__init__.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/html5lib/treewalkers/__pycache__/base.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/html5lib/treewalkers/__pycache__/base.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/html5lib/treewalkers/__pycache__/base.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/html5lib/treewalkers/__pycache__/base.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/html5lib/treewalkers/__pycache__/dom.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/html5lib/treewalkers/__pycache__/dom.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/html5lib/treewalkers/__pycache__/dom.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/html5lib/treewalkers/__pycache__/dom.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/html5lib/treewalkers/__pycache__/etree.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/html5lib/treewalkers/__pycache__/etree.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/html5lib/treewalkers/__pycache__/etree.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/html5lib/treewalkers/__pycache__/etree.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/html5lib/treewalkers/__pycache__/etree_lxml.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/html5lib/treewalkers/__pycache__/etree_lxml.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/html5lib/treewalkers/__pycache__/etree_lxml.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/html5lib/treewalkers/__pycache__/etree_lxml.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/html5lib/treewalkers/__pycache__/genshi.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/html5lib/treewalkers/__pycache__/genshi.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/html5lib/treewalkers/__pycache__/genshi.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/html5lib/treewalkers/__pycache__/genshi.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/html5lib/treewalkers/base.py b/venv.bak/lib/python3.7/site-packages/html5lib/treewalkers/base.py similarity index 100% rename from venv/lib/python3.7/site-packages/html5lib/treewalkers/base.py rename to venv.bak/lib/python3.7/site-packages/html5lib/treewalkers/base.py diff --git a/venv/lib/python3.7/site-packages/html5lib/treewalkers/dom.py b/venv.bak/lib/python3.7/site-packages/html5lib/treewalkers/dom.py similarity index 100% rename from venv/lib/python3.7/site-packages/html5lib/treewalkers/dom.py rename to venv.bak/lib/python3.7/site-packages/html5lib/treewalkers/dom.py diff --git a/venv/lib/python3.7/site-packages/html5lib/treewalkers/etree.py b/venv.bak/lib/python3.7/site-packages/html5lib/treewalkers/etree.py similarity index 100% rename from venv/lib/python3.7/site-packages/html5lib/treewalkers/etree.py rename to venv.bak/lib/python3.7/site-packages/html5lib/treewalkers/etree.py diff --git a/venv/lib/python3.7/site-packages/html5lib/treewalkers/etree_lxml.py b/venv.bak/lib/python3.7/site-packages/html5lib/treewalkers/etree_lxml.py similarity index 100% rename from venv/lib/python3.7/site-packages/html5lib/treewalkers/etree_lxml.py rename to venv.bak/lib/python3.7/site-packages/html5lib/treewalkers/etree_lxml.py diff --git a/venv/lib/python3.7/site-packages/html5lib/treewalkers/genshi.py b/venv.bak/lib/python3.7/site-packages/html5lib/treewalkers/genshi.py similarity index 100% rename from venv/lib/python3.7/site-packages/html5lib/treewalkers/genshi.py rename to venv.bak/lib/python3.7/site-packages/html5lib/treewalkers/genshi.py diff --git a/venv/lib/python3.7/site-packages/kiwisolver-1.2.0.dist-info/INSTALLER b/venv.bak/lib/python3.7/site-packages/kiwisolver-1.2.0.dist-info/INSTALLER similarity index 100% rename from venv/lib/python3.7/site-packages/kiwisolver-1.2.0.dist-info/INSTALLER rename to venv.bak/lib/python3.7/site-packages/kiwisolver-1.2.0.dist-info/INSTALLER diff --git a/venv/lib/python3.7/site-packages/kiwisolver-1.2.0.dist-info/LICENSE b/venv.bak/lib/python3.7/site-packages/kiwisolver-1.2.0.dist-info/LICENSE similarity index 100% rename from venv/lib/python3.7/site-packages/kiwisolver-1.2.0.dist-info/LICENSE rename to venv.bak/lib/python3.7/site-packages/kiwisolver-1.2.0.dist-info/LICENSE diff --git a/venv/lib/python3.7/site-packages/kiwisolver-1.2.0.dist-info/METADATA b/venv.bak/lib/python3.7/site-packages/kiwisolver-1.2.0.dist-info/METADATA similarity index 100% rename from venv/lib/python3.7/site-packages/kiwisolver-1.2.0.dist-info/METADATA rename to venv.bak/lib/python3.7/site-packages/kiwisolver-1.2.0.dist-info/METADATA diff --git a/venv/lib/python3.7/site-packages/kiwisolver-1.2.0.dist-info/RECORD b/venv.bak/lib/python3.7/site-packages/kiwisolver-1.2.0.dist-info/RECORD similarity index 100% rename from venv/lib/python3.7/site-packages/kiwisolver-1.2.0.dist-info/RECORD rename to venv.bak/lib/python3.7/site-packages/kiwisolver-1.2.0.dist-info/RECORD diff --git a/venv/lib/python3.7/site-packages/kiwisolver-1.2.0.dist-info/WHEEL b/venv.bak/lib/python3.7/site-packages/kiwisolver-1.2.0.dist-info/WHEEL similarity index 100% rename from venv/lib/python3.7/site-packages/kiwisolver-1.2.0.dist-info/WHEEL rename to venv.bak/lib/python3.7/site-packages/kiwisolver-1.2.0.dist-info/WHEEL diff --git a/venv/lib/python3.7/site-packages/kiwisolver-1.2.0.dist-info/top_level.txt b/venv.bak/lib/python3.7/site-packages/kiwisolver-1.2.0.dist-info/top_level.txt similarity index 100% rename from venv/lib/python3.7/site-packages/kiwisolver-1.2.0.dist-info/top_level.txt rename to venv.bak/lib/python3.7/site-packages/kiwisolver-1.2.0.dist-info/top_level.txt diff --git a/venv/lib/python3.7/site-packages/kiwisolver.cpython-37m-darwin.so b/venv.bak/lib/python3.7/site-packages/kiwisolver.cpython-37m-darwin.so similarity index 100% rename from venv/lib/python3.7/site-packages/kiwisolver.cpython-37m-darwin.so rename to venv.bak/lib/python3.7/site-packages/kiwisolver.cpython-37m-darwin.so diff --git a/venv/lib/python3.7/site-packages/matplotlib-3.2.1-py3.7-nspkg.pth b/venv.bak/lib/python3.7/site-packages/matplotlib-3.2.1-py3.7-nspkg.pth similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib-3.2.1-py3.7-nspkg.pth rename to venv.bak/lib/python3.7/site-packages/matplotlib-3.2.1-py3.7-nspkg.pth diff --git a/venv/lib/python3.7/site-packages/matplotlib-3.2.1.dist-info/INSTALLER b/venv.bak/lib/python3.7/site-packages/matplotlib-3.2.1.dist-info/INSTALLER similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib-3.2.1.dist-info/INSTALLER rename to venv.bak/lib/python3.7/site-packages/matplotlib-3.2.1.dist-info/INSTALLER diff --git a/venv/lib/python3.7/site-packages/matplotlib-3.2.1.dist-info/METADATA b/venv.bak/lib/python3.7/site-packages/matplotlib-3.2.1.dist-info/METADATA similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib-3.2.1.dist-info/METADATA rename to venv.bak/lib/python3.7/site-packages/matplotlib-3.2.1.dist-info/METADATA diff --git a/venv/lib/python3.7/site-packages/matplotlib-3.2.1.dist-info/RECORD b/venv.bak/lib/python3.7/site-packages/matplotlib-3.2.1.dist-info/RECORD similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib-3.2.1.dist-info/RECORD rename to venv.bak/lib/python3.7/site-packages/matplotlib-3.2.1.dist-info/RECORD diff --git a/venv/lib/python3.7/site-packages/matplotlib-3.2.1.dist-info/WHEEL b/venv.bak/lib/python3.7/site-packages/matplotlib-3.2.1.dist-info/WHEEL similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib-3.2.1.dist-info/WHEEL rename to venv.bak/lib/python3.7/site-packages/matplotlib-3.2.1.dist-info/WHEEL diff --git a/venv/lib/python3.7/site-packages/matplotlib-3.2.1.dist-info/namespace_packages.txt b/venv.bak/lib/python3.7/site-packages/matplotlib-3.2.1.dist-info/namespace_packages.txt similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib-3.2.1.dist-info/namespace_packages.txt rename to venv.bak/lib/python3.7/site-packages/matplotlib-3.2.1.dist-info/namespace_packages.txt diff --git a/venv/lib/python3.7/site-packages/matplotlib-3.2.1.dist-info/top_level.txt b/venv.bak/lib/python3.7/site-packages/matplotlib-3.2.1.dist-info/top_level.txt similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib-3.2.1.dist-info/top_level.txt rename to venv.bak/lib/python3.7/site-packages/matplotlib-3.2.1.dist-info/top_level.txt diff --git a/venv/lib/python3.7/site-packages/matplotlib/.dylibs/libpng16.16.dylib b/venv.bak/lib/python3.7/site-packages/matplotlib/.dylibs/libpng16.16.dylib similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/.dylibs/libpng16.16.dylib rename to venv.bak/lib/python3.7/site-packages/matplotlib/.dylibs/libpng16.16.dylib diff --git a/venv/lib/python3.7/site-packages/matplotlib/.dylibs/libz.1.2.10.dylib b/venv.bak/lib/python3.7/site-packages/matplotlib/.dylibs/libz.1.2.10.dylib similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/.dylibs/libz.1.2.10.dylib rename to venv.bak/lib/python3.7/site-packages/matplotlib/.dylibs/libz.1.2.10.dylib diff --git a/venv/lib/python3.7/site-packages/matplotlib/__init__.py b/venv.bak/lib/python3.7/site-packages/matplotlib/__init__.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/__init__.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/__init__.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/__pycache__/__init__.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/__pycache__/__init__.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/__pycache__/__init__.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/__pycache__/_animation_data.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/__pycache__/_animation_data.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/__pycache__/_animation_data.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/__pycache__/_animation_data.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/__pycache__/_cm.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/__pycache__/_cm.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/__pycache__/_cm.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/__pycache__/_cm.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/__pycache__/_cm_listed.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/__pycache__/_cm_listed.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/__pycache__/_cm_listed.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/__pycache__/_cm_listed.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/__pycache__/_color_data.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/__pycache__/_color_data.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/__pycache__/_color_data.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/__pycache__/_color_data.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/__pycache__/_constrained_layout.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/__pycache__/_constrained_layout.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/__pycache__/_constrained_layout.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/__pycache__/_constrained_layout.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/__pycache__/_layoutbox.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/__pycache__/_layoutbox.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/__pycache__/_layoutbox.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/__pycache__/_layoutbox.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/__pycache__/_mathtext_data.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/__pycache__/_mathtext_data.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/__pycache__/_mathtext_data.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/__pycache__/_mathtext_data.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/__pycache__/_pylab_helpers.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/__pycache__/_pylab_helpers.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/__pycache__/_pylab_helpers.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/__pycache__/_pylab_helpers.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/__pycache__/_text_layout.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/__pycache__/_text_layout.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/__pycache__/_text_layout.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/__pycache__/_text_layout.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/__pycache__/_version.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/__pycache__/_version.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/__pycache__/_version.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/__pycache__/_version.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/__pycache__/afm.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/__pycache__/afm.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/__pycache__/afm.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/__pycache__/afm.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/__pycache__/animation.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/__pycache__/animation.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/__pycache__/animation.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/__pycache__/animation.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/__pycache__/artist.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/__pycache__/artist.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/__pycache__/artist.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/__pycache__/artist.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/__pycache__/axis.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/__pycache__/axis.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/__pycache__/axis.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/__pycache__/axis.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/__pycache__/backend_bases.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/__pycache__/backend_bases.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/__pycache__/backend_bases.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/__pycache__/backend_bases.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/__pycache__/backend_managers.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/__pycache__/backend_managers.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/__pycache__/backend_managers.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/__pycache__/backend_managers.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/__pycache__/backend_tools.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/__pycache__/backend_tools.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/__pycache__/backend_tools.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/__pycache__/backend_tools.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/__pycache__/bezier.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/__pycache__/bezier.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/__pycache__/bezier.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/__pycache__/bezier.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/__pycache__/blocking_input.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/__pycache__/blocking_input.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/__pycache__/blocking_input.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/__pycache__/blocking_input.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/__pycache__/category.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/__pycache__/category.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/__pycache__/category.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/__pycache__/category.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/__pycache__/cm.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/__pycache__/cm.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/__pycache__/cm.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/__pycache__/cm.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/__pycache__/collections.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/__pycache__/collections.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/__pycache__/collections.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/__pycache__/collections.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/__pycache__/colorbar.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/__pycache__/colorbar.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/__pycache__/colorbar.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/__pycache__/colorbar.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/__pycache__/colors.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/__pycache__/colors.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/__pycache__/colors.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/__pycache__/colors.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/__pycache__/container.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/__pycache__/container.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/__pycache__/container.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/__pycache__/container.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/__pycache__/contour.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/__pycache__/contour.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/__pycache__/contour.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/__pycache__/contour.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/__pycache__/dates.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/__pycache__/dates.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/__pycache__/dates.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/__pycache__/dates.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/__pycache__/docstring.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/__pycache__/docstring.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/__pycache__/docstring.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/__pycache__/docstring.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/__pycache__/dviread.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/__pycache__/dviread.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/__pycache__/dviread.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/__pycache__/dviread.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/__pycache__/figure.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/__pycache__/figure.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/__pycache__/figure.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/__pycache__/figure.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/__pycache__/font_manager.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/__pycache__/font_manager.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/__pycache__/font_manager.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/__pycache__/font_manager.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/__pycache__/fontconfig_pattern.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/__pycache__/fontconfig_pattern.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/__pycache__/fontconfig_pattern.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/__pycache__/fontconfig_pattern.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/__pycache__/gridspec.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/__pycache__/gridspec.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/__pycache__/gridspec.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/__pycache__/gridspec.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/__pycache__/hatch.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/__pycache__/hatch.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/__pycache__/hatch.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/__pycache__/hatch.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/__pycache__/image.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/__pycache__/image.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/__pycache__/image.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/__pycache__/image.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/__pycache__/legend.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/__pycache__/legend.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/__pycache__/legend.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/__pycache__/legend.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/__pycache__/legend_handler.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/__pycache__/legend_handler.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/__pycache__/legend_handler.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/__pycache__/legend_handler.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/__pycache__/lines.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/__pycache__/lines.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/__pycache__/lines.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/__pycache__/lines.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/__pycache__/markers.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/__pycache__/markers.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/__pycache__/markers.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/__pycache__/markers.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/__pycache__/mathtext.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/__pycache__/mathtext.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/__pycache__/mathtext.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/__pycache__/mathtext.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/__pycache__/mlab.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/__pycache__/mlab.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/__pycache__/mlab.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/__pycache__/mlab.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/__pycache__/offsetbox.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/__pycache__/offsetbox.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/__pycache__/offsetbox.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/__pycache__/offsetbox.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/__pycache__/patches.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/__pycache__/patches.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/__pycache__/patches.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/__pycache__/patches.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/__pycache__/path.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/__pycache__/path.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/__pycache__/path.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/__pycache__/path.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/__pycache__/patheffects.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/__pycache__/patheffects.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/__pycache__/patheffects.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/__pycache__/patheffects.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/__pycache__/pylab.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/__pycache__/pylab.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/__pycache__/pylab.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/__pycache__/pylab.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/__pycache__/pyplot.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/__pycache__/pyplot.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/__pycache__/pyplot.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/__pycache__/pyplot.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/__pycache__/quiver.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/__pycache__/quiver.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/__pycache__/quiver.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/__pycache__/quiver.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/__pycache__/rcsetup.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/__pycache__/rcsetup.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/__pycache__/rcsetup.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/__pycache__/rcsetup.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/__pycache__/sankey.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/__pycache__/sankey.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/__pycache__/sankey.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/__pycache__/sankey.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/__pycache__/scale.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/__pycache__/scale.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/__pycache__/scale.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/__pycache__/scale.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/__pycache__/spines.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/__pycache__/spines.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/__pycache__/spines.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/__pycache__/spines.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/__pycache__/stackplot.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/__pycache__/stackplot.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/__pycache__/stackplot.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/__pycache__/stackplot.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/__pycache__/streamplot.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/__pycache__/streamplot.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/__pycache__/streamplot.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/__pycache__/streamplot.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/__pycache__/table.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/__pycache__/table.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/__pycache__/table.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/__pycache__/table.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/__pycache__/texmanager.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/__pycache__/texmanager.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/__pycache__/texmanager.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/__pycache__/texmanager.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/__pycache__/text.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/__pycache__/text.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/__pycache__/text.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/__pycache__/text.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/__pycache__/textpath.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/__pycache__/textpath.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/__pycache__/textpath.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/__pycache__/textpath.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/__pycache__/ticker.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/__pycache__/ticker.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/__pycache__/ticker.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/__pycache__/ticker.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/__pycache__/tight_bbox.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/__pycache__/tight_bbox.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/__pycache__/tight_bbox.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/__pycache__/tight_bbox.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/__pycache__/tight_layout.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/__pycache__/tight_layout.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/__pycache__/tight_layout.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/__pycache__/tight_layout.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/__pycache__/transforms.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/__pycache__/transforms.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/__pycache__/transforms.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/__pycache__/transforms.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/__pycache__/type1font.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/__pycache__/type1font.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/__pycache__/type1font.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/__pycache__/type1font.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/__pycache__/units.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/__pycache__/units.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/__pycache__/units.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/__pycache__/units.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/__pycache__/widgets.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/__pycache__/widgets.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/__pycache__/widgets.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/__pycache__/widgets.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/_animation_data.py b/venv.bak/lib/python3.7/site-packages/matplotlib/_animation_data.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/_animation_data.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/_animation_data.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/_cm.py b/venv.bak/lib/python3.7/site-packages/matplotlib/_cm.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/_cm.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/_cm.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/_cm_listed.py b/venv.bak/lib/python3.7/site-packages/matplotlib/_cm_listed.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/_cm_listed.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/_cm_listed.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/_color_data.py b/venv.bak/lib/python3.7/site-packages/matplotlib/_color_data.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/_color_data.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/_color_data.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/_constrained_layout.py b/venv.bak/lib/python3.7/site-packages/matplotlib/_constrained_layout.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/_constrained_layout.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/_constrained_layout.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/_contour.cpython-37m-darwin.so b/venv.bak/lib/python3.7/site-packages/matplotlib/_contour.cpython-37m-darwin.so similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/_contour.cpython-37m-darwin.so rename to venv.bak/lib/python3.7/site-packages/matplotlib/_contour.cpython-37m-darwin.so diff --git a/venv/lib/python3.7/site-packages/matplotlib/_image.cpython-37m-darwin.so b/venv.bak/lib/python3.7/site-packages/matplotlib/_image.cpython-37m-darwin.so similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/_image.cpython-37m-darwin.so rename to venv.bak/lib/python3.7/site-packages/matplotlib/_image.cpython-37m-darwin.so diff --git a/venv/lib/python3.7/site-packages/matplotlib/_layoutbox.py b/venv.bak/lib/python3.7/site-packages/matplotlib/_layoutbox.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/_layoutbox.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/_layoutbox.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/_mathtext_data.py b/venv.bak/lib/python3.7/site-packages/matplotlib/_mathtext_data.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/_mathtext_data.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/_mathtext_data.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/_path.cpython-37m-darwin.so b/venv.bak/lib/python3.7/site-packages/matplotlib/_path.cpython-37m-darwin.so similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/_path.cpython-37m-darwin.so rename to venv.bak/lib/python3.7/site-packages/matplotlib/_path.cpython-37m-darwin.so diff --git a/venv/lib/python3.7/site-packages/matplotlib/_png.cpython-37m-darwin.so b/venv.bak/lib/python3.7/site-packages/matplotlib/_png.cpython-37m-darwin.so similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/_png.cpython-37m-darwin.so rename to venv.bak/lib/python3.7/site-packages/matplotlib/_png.cpython-37m-darwin.so diff --git a/venv/lib/python3.7/site-packages/matplotlib/_pylab_helpers.py b/venv.bak/lib/python3.7/site-packages/matplotlib/_pylab_helpers.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/_pylab_helpers.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/_pylab_helpers.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/_qhull.cpython-37m-darwin.so b/venv.bak/lib/python3.7/site-packages/matplotlib/_qhull.cpython-37m-darwin.so similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/_qhull.cpython-37m-darwin.so rename to venv.bak/lib/python3.7/site-packages/matplotlib/_qhull.cpython-37m-darwin.so diff --git a/venv/lib/python3.7/site-packages/matplotlib/_text_layout.py b/venv.bak/lib/python3.7/site-packages/matplotlib/_text_layout.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/_text_layout.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/_text_layout.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/_tri.cpython-37m-darwin.so b/venv.bak/lib/python3.7/site-packages/matplotlib/_tri.cpython-37m-darwin.so similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/_tri.cpython-37m-darwin.so rename to venv.bak/lib/python3.7/site-packages/matplotlib/_tri.cpython-37m-darwin.so diff --git a/venv/lib/python3.7/site-packages/matplotlib/_version.py b/venv.bak/lib/python3.7/site-packages/matplotlib/_version.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/_version.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/_version.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/afm.py b/venv.bak/lib/python3.7/site-packages/matplotlib/afm.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/afm.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/afm.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/animation.py b/venv.bak/lib/python3.7/site-packages/matplotlib/animation.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/animation.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/animation.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/artist.py b/venv.bak/lib/python3.7/site-packages/matplotlib/artist.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/artist.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/artist.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/axes/__init__.py b/venv.bak/lib/python3.7/site-packages/matplotlib/axes/__init__.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/axes/__init__.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/axes/__init__.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/axes/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/axes/__pycache__/__init__.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/axes/__pycache__/__init__.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/axes/__pycache__/__init__.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/axes/__pycache__/_axes.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/axes/__pycache__/_axes.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/axes/__pycache__/_axes.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/axes/__pycache__/_axes.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/axes/__pycache__/_base.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/axes/__pycache__/_base.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/axes/__pycache__/_base.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/axes/__pycache__/_base.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/axes/__pycache__/_secondary_axes.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/axes/__pycache__/_secondary_axes.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/axes/__pycache__/_secondary_axes.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/axes/__pycache__/_secondary_axes.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/axes/__pycache__/_subplots.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/axes/__pycache__/_subplots.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/axes/__pycache__/_subplots.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/axes/__pycache__/_subplots.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/axes/_axes.py b/venv.bak/lib/python3.7/site-packages/matplotlib/axes/_axes.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/axes/_axes.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/axes/_axes.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/axes/_base.py b/venv.bak/lib/python3.7/site-packages/matplotlib/axes/_base.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/axes/_base.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/axes/_base.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/axes/_secondary_axes.py b/venv.bak/lib/python3.7/site-packages/matplotlib/axes/_secondary_axes.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/axes/_secondary_axes.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/axes/_secondary_axes.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/axes/_subplots.py b/venv.bak/lib/python3.7/site-packages/matplotlib/axes/_subplots.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/axes/_subplots.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/axes/_subplots.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/axis.py b/venv.bak/lib/python3.7/site-packages/matplotlib/axis.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/axis.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/axis.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/backend_bases.py b/venv.bak/lib/python3.7/site-packages/matplotlib/backend_bases.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/backend_bases.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/backend_bases.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/backend_managers.py b/venv.bak/lib/python3.7/site-packages/matplotlib/backend_managers.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/backend_managers.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/backend_managers.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/backend_tools.py b/venv.bak/lib/python3.7/site-packages/matplotlib/backend_tools.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/backend_tools.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/backend_tools.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/backends/__init__.py b/venv.bak/lib/python3.7/site-packages/matplotlib/backends/__init__.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/backends/__init__.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/backends/__init__.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/backends/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/backends/__pycache__/__init__.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/backends/__pycache__/__init__.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/backends/__pycache__/__init__.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/backends/__pycache__/_backend_pdf_ps.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/backends/__pycache__/_backend_pdf_ps.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/backends/__pycache__/_backend_pdf_ps.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/backends/__pycache__/_backend_pdf_ps.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/backends/__pycache__/_backend_tk.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/backends/__pycache__/_backend_tk.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/backends/__pycache__/_backend_tk.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/backends/__pycache__/_backend_tk.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/backends/__pycache__/backend_agg.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/backends/__pycache__/backend_agg.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/backends/__pycache__/backend_agg.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/backends/__pycache__/backend_agg.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/backends/__pycache__/backend_cairo.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/backends/__pycache__/backend_cairo.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/backends/__pycache__/backend_cairo.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/backends/__pycache__/backend_cairo.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/backends/__pycache__/backend_gtk3.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/backends/__pycache__/backend_gtk3.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/backends/__pycache__/backend_gtk3.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/backends/__pycache__/backend_gtk3.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/backends/__pycache__/backend_gtk3agg.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/backends/__pycache__/backend_gtk3agg.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/backends/__pycache__/backend_gtk3agg.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/backends/__pycache__/backend_gtk3agg.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/backends/__pycache__/backend_gtk3cairo.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/backends/__pycache__/backend_gtk3cairo.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/backends/__pycache__/backend_gtk3cairo.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/backends/__pycache__/backend_gtk3cairo.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/backends/__pycache__/backend_macosx.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/backends/__pycache__/backend_macosx.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/backends/__pycache__/backend_macosx.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/backends/__pycache__/backend_macosx.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/backends/__pycache__/backend_mixed.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/backends/__pycache__/backend_mixed.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/backends/__pycache__/backend_mixed.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/backends/__pycache__/backend_mixed.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/backends/__pycache__/backend_nbagg.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/backends/__pycache__/backend_nbagg.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/backends/__pycache__/backend_nbagg.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/backends/__pycache__/backend_nbagg.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/backends/__pycache__/backend_pdf.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/backends/__pycache__/backend_pdf.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/backends/__pycache__/backend_pdf.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/backends/__pycache__/backend_pdf.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/backends/__pycache__/backend_pgf.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/backends/__pycache__/backend_pgf.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/backends/__pycache__/backend_pgf.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/backends/__pycache__/backend_pgf.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/backends/__pycache__/backend_ps.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/backends/__pycache__/backend_ps.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/backends/__pycache__/backend_ps.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/backends/__pycache__/backend_ps.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/backends/__pycache__/backend_qt4.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/backends/__pycache__/backend_qt4.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/backends/__pycache__/backend_qt4.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/backends/__pycache__/backend_qt4.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/backends/__pycache__/backend_qt4agg.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/backends/__pycache__/backend_qt4agg.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/backends/__pycache__/backend_qt4agg.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/backends/__pycache__/backend_qt4agg.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/backends/__pycache__/backend_qt4cairo.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/backends/__pycache__/backend_qt4cairo.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/backends/__pycache__/backend_qt4cairo.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/backends/__pycache__/backend_qt4cairo.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/backends/__pycache__/backend_qt5.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/backends/__pycache__/backend_qt5.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/backends/__pycache__/backend_qt5.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/backends/__pycache__/backend_qt5.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/backends/__pycache__/backend_qt5agg.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/backends/__pycache__/backend_qt5agg.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/backends/__pycache__/backend_qt5agg.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/backends/__pycache__/backend_qt5agg.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/backends/__pycache__/backend_qt5cairo.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/backends/__pycache__/backend_qt5cairo.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/backends/__pycache__/backend_qt5cairo.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/backends/__pycache__/backend_qt5cairo.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/backends/__pycache__/backend_svg.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/backends/__pycache__/backend_svg.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/backends/__pycache__/backend_svg.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/backends/__pycache__/backend_svg.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/backends/__pycache__/backend_template.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/backends/__pycache__/backend_template.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/backends/__pycache__/backend_template.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/backends/__pycache__/backend_template.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/backends/__pycache__/backend_tkagg.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/backends/__pycache__/backend_tkagg.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/backends/__pycache__/backend_tkagg.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/backends/__pycache__/backend_tkagg.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/backends/__pycache__/backend_tkcairo.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/backends/__pycache__/backend_tkcairo.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/backends/__pycache__/backend_tkcairo.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/backends/__pycache__/backend_tkcairo.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/backends/__pycache__/backend_webagg.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/backends/__pycache__/backend_webagg.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/backends/__pycache__/backend_webagg.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/backends/__pycache__/backend_webagg.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/backends/__pycache__/backend_webagg_core.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/backends/__pycache__/backend_webagg_core.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/backends/__pycache__/backend_webagg_core.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/backends/__pycache__/backend_webagg_core.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/backends/__pycache__/backend_wx.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/backends/__pycache__/backend_wx.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/backends/__pycache__/backend_wx.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/backends/__pycache__/backend_wx.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/backends/__pycache__/backend_wxagg.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/backends/__pycache__/backend_wxagg.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/backends/__pycache__/backend_wxagg.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/backends/__pycache__/backend_wxagg.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/backends/__pycache__/backend_wxcairo.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/backends/__pycache__/backend_wxcairo.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/backends/__pycache__/backend_wxcairo.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/backends/__pycache__/backend_wxcairo.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/backends/__pycache__/qt_compat.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/backends/__pycache__/qt_compat.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/backends/__pycache__/qt_compat.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/backends/__pycache__/qt_compat.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/backends/_backend_agg.cpython-37m-darwin.so b/venv.bak/lib/python3.7/site-packages/matplotlib/backends/_backend_agg.cpython-37m-darwin.so similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/backends/_backend_agg.cpython-37m-darwin.so rename to venv.bak/lib/python3.7/site-packages/matplotlib/backends/_backend_agg.cpython-37m-darwin.so diff --git a/venv/lib/python3.7/site-packages/matplotlib/backends/_backend_pdf_ps.py b/venv.bak/lib/python3.7/site-packages/matplotlib/backends/_backend_pdf_ps.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/backends/_backend_pdf_ps.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/backends/_backend_pdf_ps.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/backends/_backend_tk.py b/venv.bak/lib/python3.7/site-packages/matplotlib/backends/_backend_tk.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/backends/_backend_tk.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/backends/_backend_tk.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/backends/_macosx.cpython-37m-darwin.so b/venv.bak/lib/python3.7/site-packages/matplotlib/backends/_macosx.cpython-37m-darwin.so similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/backends/_macosx.cpython-37m-darwin.so rename to venv.bak/lib/python3.7/site-packages/matplotlib/backends/_macosx.cpython-37m-darwin.so diff --git a/venv/lib/python3.7/site-packages/matplotlib/backends/_tkagg.cpython-37m-darwin.so b/venv.bak/lib/python3.7/site-packages/matplotlib/backends/_tkagg.cpython-37m-darwin.so similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/backends/_tkagg.cpython-37m-darwin.so rename to venv.bak/lib/python3.7/site-packages/matplotlib/backends/_tkagg.cpython-37m-darwin.so diff --git a/venv/lib/python3.7/site-packages/matplotlib/backends/backend_agg.py b/venv.bak/lib/python3.7/site-packages/matplotlib/backends/backend_agg.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/backends/backend_agg.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/backends/backend_agg.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/backends/backend_cairo.py b/venv.bak/lib/python3.7/site-packages/matplotlib/backends/backend_cairo.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/backends/backend_cairo.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/backends/backend_cairo.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/backends/backend_gtk3.py b/venv.bak/lib/python3.7/site-packages/matplotlib/backends/backend_gtk3.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/backends/backend_gtk3.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/backends/backend_gtk3.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/backends/backend_gtk3agg.py b/venv.bak/lib/python3.7/site-packages/matplotlib/backends/backend_gtk3agg.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/backends/backend_gtk3agg.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/backends/backend_gtk3agg.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/backends/backend_gtk3cairo.py b/venv.bak/lib/python3.7/site-packages/matplotlib/backends/backend_gtk3cairo.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/backends/backend_gtk3cairo.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/backends/backend_gtk3cairo.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/backends/backend_macosx.py b/venv.bak/lib/python3.7/site-packages/matplotlib/backends/backend_macosx.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/backends/backend_macosx.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/backends/backend_macosx.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/backends/backend_mixed.py b/venv.bak/lib/python3.7/site-packages/matplotlib/backends/backend_mixed.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/backends/backend_mixed.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/backends/backend_mixed.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/backends/backend_nbagg.py b/venv.bak/lib/python3.7/site-packages/matplotlib/backends/backend_nbagg.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/backends/backend_nbagg.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/backends/backend_nbagg.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/backends/backend_pdf.py b/venv.bak/lib/python3.7/site-packages/matplotlib/backends/backend_pdf.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/backends/backend_pdf.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/backends/backend_pdf.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/backends/backend_pgf.py b/venv.bak/lib/python3.7/site-packages/matplotlib/backends/backend_pgf.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/backends/backend_pgf.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/backends/backend_pgf.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/backends/backend_ps.py b/venv.bak/lib/python3.7/site-packages/matplotlib/backends/backend_ps.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/backends/backend_ps.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/backends/backend_ps.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/backends/backend_qt4.py b/venv.bak/lib/python3.7/site-packages/matplotlib/backends/backend_qt4.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/backends/backend_qt4.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/backends/backend_qt4.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/backends/backend_qt4agg.py b/venv.bak/lib/python3.7/site-packages/matplotlib/backends/backend_qt4agg.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/backends/backend_qt4agg.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/backends/backend_qt4agg.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/backends/backend_qt4cairo.py b/venv.bak/lib/python3.7/site-packages/matplotlib/backends/backend_qt4cairo.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/backends/backend_qt4cairo.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/backends/backend_qt4cairo.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/backends/backend_qt5.py b/venv.bak/lib/python3.7/site-packages/matplotlib/backends/backend_qt5.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/backends/backend_qt5.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/backends/backend_qt5.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/backends/backend_qt5agg.py b/venv.bak/lib/python3.7/site-packages/matplotlib/backends/backend_qt5agg.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/backends/backend_qt5agg.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/backends/backend_qt5agg.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/backends/backend_qt5cairo.py b/venv.bak/lib/python3.7/site-packages/matplotlib/backends/backend_qt5cairo.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/backends/backend_qt5cairo.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/backends/backend_qt5cairo.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/backends/backend_svg.py b/venv.bak/lib/python3.7/site-packages/matplotlib/backends/backend_svg.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/backends/backend_svg.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/backends/backend_svg.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/backends/backend_template.py b/venv.bak/lib/python3.7/site-packages/matplotlib/backends/backend_template.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/backends/backend_template.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/backends/backend_template.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/backends/backend_tkagg.py b/venv.bak/lib/python3.7/site-packages/matplotlib/backends/backend_tkagg.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/backends/backend_tkagg.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/backends/backend_tkagg.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/backends/backend_tkcairo.py b/venv.bak/lib/python3.7/site-packages/matplotlib/backends/backend_tkcairo.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/backends/backend_tkcairo.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/backends/backend_tkcairo.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/backends/backend_webagg.py b/venv.bak/lib/python3.7/site-packages/matplotlib/backends/backend_webagg.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/backends/backend_webagg.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/backends/backend_webagg.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/backends/backend_webagg_core.py b/venv.bak/lib/python3.7/site-packages/matplotlib/backends/backend_webagg_core.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/backends/backend_webagg_core.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/backends/backend_webagg_core.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/backends/backend_wx.py b/venv.bak/lib/python3.7/site-packages/matplotlib/backends/backend_wx.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/backends/backend_wx.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/backends/backend_wx.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/backends/backend_wxagg.py b/venv.bak/lib/python3.7/site-packages/matplotlib/backends/backend_wxagg.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/backends/backend_wxagg.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/backends/backend_wxagg.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/backends/backend_wxcairo.py b/venv.bak/lib/python3.7/site-packages/matplotlib/backends/backend_wxcairo.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/backends/backend_wxcairo.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/backends/backend_wxcairo.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/backends/qt_compat.py b/venv.bak/lib/python3.7/site-packages/matplotlib/backends/qt_compat.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/backends/qt_compat.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/backends/qt_compat.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/backends/qt_editor/__init__.py b/venv.bak/lib/python3.7/site-packages/matplotlib/backends/qt_editor/__init__.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/backends/qt_editor/__init__.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/backends/qt_editor/__init__.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/backends/qt_editor/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/backends/qt_editor/__pycache__/__init__.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/backends/qt_editor/__pycache__/__init__.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/backends/qt_editor/__pycache__/__init__.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/backends/qt_editor/__pycache__/_formlayout.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/backends/qt_editor/__pycache__/_formlayout.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/backends/qt_editor/__pycache__/_formlayout.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/backends/qt_editor/__pycache__/_formlayout.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/backends/qt_editor/__pycache__/figureoptions.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/backends/qt_editor/__pycache__/figureoptions.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/backends/qt_editor/__pycache__/figureoptions.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/backends/qt_editor/__pycache__/figureoptions.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/backends/qt_editor/__pycache__/formlayout.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/backends/qt_editor/__pycache__/formlayout.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/backends/qt_editor/__pycache__/formlayout.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/backends/qt_editor/__pycache__/formlayout.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/backends/qt_editor/__pycache__/formsubplottool.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/backends/qt_editor/__pycache__/formsubplottool.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/backends/qt_editor/__pycache__/formsubplottool.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/backends/qt_editor/__pycache__/formsubplottool.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/backends/qt_editor/_formlayout.py b/venv.bak/lib/python3.7/site-packages/matplotlib/backends/qt_editor/_formlayout.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/backends/qt_editor/_formlayout.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/backends/qt_editor/_formlayout.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/backends/qt_editor/figureoptions.py b/venv.bak/lib/python3.7/site-packages/matplotlib/backends/qt_editor/figureoptions.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/backends/qt_editor/figureoptions.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/backends/qt_editor/figureoptions.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/backends/qt_editor/formlayout.py b/venv.bak/lib/python3.7/site-packages/matplotlib/backends/qt_editor/formlayout.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/backends/qt_editor/formlayout.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/backends/qt_editor/formlayout.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/backends/qt_editor/formsubplottool.py b/venv.bak/lib/python3.7/site-packages/matplotlib/backends/qt_editor/formsubplottool.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/backends/qt_editor/formsubplottool.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/backends/qt_editor/formsubplottool.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/backends/web_backend/all_figures.html b/venv.bak/lib/python3.7/site-packages/matplotlib/backends/web_backend/all_figures.html similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/backends/web_backend/all_figures.html rename to venv.bak/lib/python3.7/site-packages/matplotlib/backends/web_backend/all_figures.html diff --git a/venv/lib/python3.7/site-packages/matplotlib/backends/web_backend/css/boilerplate.css b/venv.bak/lib/python3.7/site-packages/matplotlib/backends/web_backend/css/boilerplate.css similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/backends/web_backend/css/boilerplate.css rename to venv.bak/lib/python3.7/site-packages/matplotlib/backends/web_backend/css/boilerplate.css diff --git a/venv/lib/python3.7/site-packages/matplotlib/backends/web_backend/css/fbm.css b/venv.bak/lib/python3.7/site-packages/matplotlib/backends/web_backend/css/fbm.css similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/backends/web_backend/css/fbm.css rename to venv.bak/lib/python3.7/site-packages/matplotlib/backends/web_backend/css/fbm.css diff --git a/venv/lib/python3.7/site-packages/matplotlib/backends/web_backend/css/page.css b/venv.bak/lib/python3.7/site-packages/matplotlib/backends/web_backend/css/page.css similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/backends/web_backend/css/page.css rename to venv.bak/lib/python3.7/site-packages/matplotlib/backends/web_backend/css/page.css diff --git a/venv/lib/python3.7/site-packages/matplotlib/backends/web_backend/ipython_inline_figure.html b/venv.bak/lib/python3.7/site-packages/matplotlib/backends/web_backend/ipython_inline_figure.html similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/backends/web_backend/ipython_inline_figure.html rename to venv.bak/lib/python3.7/site-packages/matplotlib/backends/web_backend/ipython_inline_figure.html diff --git a/venv/lib/python3.7/site-packages/matplotlib/backends/web_backend/jquery-ui-1.12.1/AUTHORS.txt b/venv.bak/lib/python3.7/site-packages/matplotlib/backends/web_backend/jquery-ui-1.12.1/AUTHORS.txt similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/backends/web_backend/jquery-ui-1.12.1/AUTHORS.txt rename to venv.bak/lib/python3.7/site-packages/matplotlib/backends/web_backend/jquery-ui-1.12.1/AUTHORS.txt diff --git a/venv/lib/python3.7/site-packages/matplotlib/backends/web_backend/jquery-ui-1.12.1/LICENSE.txt b/venv.bak/lib/python3.7/site-packages/matplotlib/backends/web_backend/jquery-ui-1.12.1/LICENSE.txt similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/backends/web_backend/jquery-ui-1.12.1/LICENSE.txt rename to venv.bak/lib/python3.7/site-packages/matplotlib/backends/web_backend/jquery-ui-1.12.1/LICENSE.txt diff --git a/venv/lib/python3.7/site-packages/matplotlib/backends/web_backend/jquery-ui-1.12.1/external/jquery/jquery.js b/venv.bak/lib/python3.7/site-packages/matplotlib/backends/web_backend/jquery-ui-1.12.1/external/jquery/jquery.js similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/backends/web_backend/jquery-ui-1.12.1/external/jquery/jquery.js rename to venv.bak/lib/python3.7/site-packages/matplotlib/backends/web_backend/jquery-ui-1.12.1/external/jquery/jquery.js diff --git a/venv/lib/python3.7/site-packages/matplotlib/backends/web_backend/jquery-ui-1.12.1/images/ui-icons_444444_256x240.png b/venv.bak/lib/python3.7/site-packages/matplotlib/backends/web_backend/jquery-ui-1.12.1/images/ui-icons_444444_256x240.png similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/backends/web_backend/jquery-ui-1.12.1/images/ui-icons_444444_256x240.png rename to venv.bak/lib/python3.7/site-packages/matplotlib/backends/web_backend/jquery-ui-1.12.1/images/ui-icons_444444_256x240.png diff --git a/venv/lib/python3.7/site-packages/matplotlib/backends/web_backend/jquery-ui-1.12.1/images/ui-icons_555555_256x240.png b/venv.bak/lib/python3.7/site-packages/matplotlib/backends/web_backend/jquery-ui-1.12.1/images/ui-icons_555555_256x240.png similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/backends/web_backend/jquery-ui-1.12.1/images/ui-icons_555555_256x240.png rename to venv.bak/lib/python3.7/site-packages/matplotlib/backends/web_backend/jquery-ui-1.12.1/images/ui-icons_555555_256x240.png diff --git a/venv/lib/python3.7/site-packages/matplotlib/backends/web_backend/jquery-ui-1.12.1/images/ui-icons_777620_256x240.png b/venv.bak/lib/python3.7/site-packages/matplotlib/backends/web_backend/jquery-ui-1.12.1/images/ui-icons_777620_256x240.png similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/backends/web_backend/jquery-ui-1.12.1/images/ui-icons_777620_256x240.png rename to venv.bak/lib/python3.7/site-packages/matplotlib/backends/web_backend/jquery-ui-1.12.1/images/ui-icons_777620_256x240.png diff --git a/venv/lib/python3.7/site-packages/matplotlib/backends/web_backend/jquery-ui-1.12.1/images/ui-icons_777777_256x240.png b/venv.bak/lib/python3.7/site-packages/matplotlib/backends/web_backend/jquery-ui-1.12.1/images/ui-icons_777777_256x240.png similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/backends/web_backend/jquery-ui-1.12.1/images/ui-icons_777777_256x240.png rename to venv.bak/lib/python3.7/site-packages/matplotlib/backends/web_backend/jquery-ui-1.12.1/images/ui-icons_777777_256x240.png diff --git a/venv/lib/python3.7/site-packages/matplotlib/backends/web_backend/jquery-ui-1.12.1/images/ui-icons_cc0000_256x240.png b/venv.bak/lib/python3.7/site-packages/matplotlib/backends/web_backend/jquery-ui-1.12.1/images/ui-icons_cc0000_256x240.png similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/backends/web_backend/jquery-ui-1.12.1/images/ui-icons_cc0000_256x240.png rename to venv.bak/lib/python3.7/site-packages/matplotlib/backends/web_backend/jquery-ui-1.12.1/images/ui-icons_cc0000_256x240.png diff --git a/venv/lib/python3.7/site-packages/matplotlib/backends/web_backend/jquery-ui-1.12.1/images/ui-icons_ffffff_256x240.png b/venv.bak/lib/python3.7/site-packages/matplotlib/backends/web_backend/jquery-ui-1.12.1/images/ui-icons_ffffff_256x240.png similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/backends/web_backend/jquery-ui-1.12.1/images/ui-icons_ffffff_256x240.png rename to venv.bak/lib/python3.7/site-packages/matplotlib/backends/web_backend/jquery-ui-1.12.1/images/ui-icons_ffffff_256x240.png diff --git a/venv/lib/python3.7/site-packages/matplotlib/backends/web_backend/jquery-ui-1.12.1/index.html b/venv.bak/lib/python3.7/site-packages/matplotlib/backends/web_backend/jquery-ui-1.12.1/index.html similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/backends/web_backend/jquery-ui-1.12.1/index.html rename to venv.bak/lib/python3.7/site-packages/matplotlib/backends/web_backend/jquery-ui-1.12.1/index.html diff --git a/venv/lib/python3.7/site-packages/matplotlib/backends/web_backend/jquery-ui-1.12.1/jquery-ui.css b/venv.bak/lib/python3.7/site-packages/matplotlib/backends/web_backend/jquery-ui-1.12.1/jquery-ui.css similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/backends/web_backend/jquery-ui-1.12.1/jquery-ui.css rename to venv.bak/lib/python3.7/site-packages/matplotlib/backends/web_backend/jquery-ui-1.12.1/jquery-ui.css diff --git a/venv/lib/python3.7/site-packages/matplotlib/backends/web_backend/jquery-ui-1.12.1/jquery-ui.js b/venv.bak/lib/python3.7/site-packages/matplotlib/backends/web_backend/jquery-ui-1.12.1/jquery-ui.js similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/backends/web_backend/jquery-ui-1.12.1/jquery-ui.js rename to venv.bak/lib/python3.7/site-packages/matplotlib/backends/web_backend/jquery-ui-1.12.1/jquery-ui.js diff --git a/venv/lib/python3.7/site-packages/matplotlib/backends/web_backend/jquery-ui-1.12.1/jquery-ui.min.css b/venv.bak/lib/python3.7/site-packages/matplotlib/backends/web_backend/jquery-ui-1.12.1/jquery-ui.min.css similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/backends/web_backend/jquery-ui-1.12.1/jquery-ui.min.css rename to venv.bak/lib/python3.7/site-packages/matplotlib/backends/web_backend/jquery-ui-1.12.1/jquery-ui.min.css diff --git a/venv/lib/python3.7/site-packages/matplotlib/backends/web_backend/jquery-ui-1.12.1/jquery-ui.min.js b/venv.bak/lib/python3.7/site-packages/matplotlib/backends/web_backend/jquery-ui-1.12.1/jquery-ui.min.js similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/backends/web_backend/jquery-ui-1.12.1/jquery-ui.min.js rename to venv.bak/lib/python3.7/site-packages/matplotlib/backends/web_backend/jquery-ui-1.12.1/jquery-ui.min.js diff --git a/venv/lib/python3.7/site-packages/matplotlib/backends/web_backend/jquery-ui-1.12.1/jquery-ui.structure.css b/venv.bak/lib/python3.7/site-packages/matplotlib/backends/web_backend/jquery-ui-1.12.1/jquery-ui.structure.css similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/backends/web_backend/jquery-ui-1.12.1/jquery-ui.structure.css rename to venv.bak/lib/python3.7/site-packages/matplotlib/backends/web_backend/jquery-ui-1.12.1/jquery-ui.structure.css diff --git a/venv/lib/python3.7/site-packages/matplotlib/backends/web_backend/jquery-ui-1.12.1/jquery-ui.structure.min.css b/venv.bak/lib/python3.7/site-packages/matplotlib/backends/web_backend/jquery-ui-1.12.1/jquery-ui.structure.min.css similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/backends/web_backend/jquery-ui-1.12.1/jquery-ui.structure.min.css rename to venv.bak/lib/python3.7/site-packages/matplotlib/backends/web_backend/jquery-ui-1.12.1/jquery-ui.structure.min.css diff --git a/venv/lib/python3.7/site-packages/matplotlib/backends/web_backend/jquery-ui-1.12.1/jquery-ui.theme.css b/venv.bak/lib/python3.7/site-packages/matplotlib/backends/web_backend/jquery-ui-1.12.1/jquery-ui.theme.css similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/backends/web_backend/jquery-ui-1.12.1/jquery-ui.theme.css rename to venv.bak/lib/python3.7/site-packages/matplotlib/backends/web_backend/jquery-ui-1.12.1/jquery-ui.theme.css diff --git a/venv/lib/python3.7/site-packages/matplotlib/backends/web_backend/jquery-ui-1.12.1/jquery-ui.theme.min.css b/venv.bak/lib/python3.7/site-packages/matplotlib/backends/web_backend/jquery-ui-1.12.1/jquery-ui.theme.min.css similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/backends/web_backend/jquery-ui-1.12.1/jquery-ui.theme.min.css rename to venv.bak/lib/python3.7/site-packages/matplotlib/backends/web_backend/jquery-ui-1.12.1/jquery-ui.theme.min.css diff --git a/venv/lib/python3.7/site-packages/matplotlib/backends/web_backend/jquery-ui-1.12.1/package.json b/venv.bak/lib/python3.7/site-packages/matplotlib/backends/web_backend/jquery-ui-1.12.1/package.json similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/backends/web_backend/jquery-ui-1.12.1/package.json rename to venv.bak/lib/python3.7/site-packages/matplotlib/backends/web_backend/jquery-ui-1.12.1/package.json diff --git a/venv/lib/python3.7/site-packages/matplotlib/backends/web_backend/js/mpl.js b/venv.bak/lib/python3.7/site-packages/matplotlib/backends/web_backend/js/mpl.js similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/backends/web_backend/js/mpl.js rename to venv.bak/lib/python3.7/site-packages/matplotlib/backends/web_backend/js/mpl.js diff --git a/venv/lib/python3.7/site-packages/matplotlib/backends/web_backend/js/mpl_tornado.js b/venv.bak/lib/python3.7/site-packages/matplotlib/backends/web_backend/js/mpl_tornado.js similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/backends/web_backend/js/mpl_tornado.js rename to venv.bak/lib/python3.7/site-packages/matplotlib/backends/web_backend/js/mpl_tornado.js diff --git a/venv/lib/python3.7/site-packages/matplotlib/backends/web_backend/js/nbagg_mpl.js b/venv.bak/lib/python3.7/site-packages/matplotlib/backends/web_backend/js/nbagg_mpl.js similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/backends/web_backend/js/nbagg_mpl.js rename to venv.bak/lib/python3.7/site-packages/matplotlib/backends/web_backend/js/nbagg_mpl.js diff --git a/venv/lib/python3.7/site-packages/matplotlib/backends/web_backend/nbagg_uat.ipynb b/venv.bak/lib/python3.7/site-packages/matplotlib/backends/web_backend/nbagg_uat.ipynb similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/backends/web_backend/nbagg_uat.ipynb rename to venv.bak/lib/python3.7/site-packages/matplotlib/backends/web_backend/nbagg_uat.ipynb diff --git a/venv/lib/python3.7/site-packages/matplotlib/backends/web_backend/single_figure.html b/venv.bak/lib/python3.7/site-packages/matplotlib/backends/web_backend/single_figure.html similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/backends/web_backend/single_figure.html rename to venv.bak/lib/python3.7/site-packages/matplotlib/backends/web_backend/single_figure.html diff --git a/venv/lib/python3.7/site-packages/matplotlib/bezier.py b/venv.bak/lib/python3.7/site-packages/matplotlib/bezier.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/bezier.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/bezier.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/blocking_input.py b/venv.bak/lib/python3.7/site-packages/matplotlib/blocking_input.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/blocking_input.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/blocking_input.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/category.py b/venv.bak/lib/python3.7/site-packages/matplotlib/category.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/category.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/category.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/cbook/__init__.py b/venv.bak/lib/python3.7/site-packages/matplotlib/cbook/__init__.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/cbook/__init__.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/cbook/__init__.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/cbook/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/cbook/__pycache__/__init__.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/cbook/__pycache__/__init__.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/cbook/__pycache__/__init__.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/cbook/__pycache__/deprecation.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/cbook/__pycache__/deprecation.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/cbook/__pycache__/deprecation.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/cbook/__pycache__/deprecation.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/cbook/deprecation.py b/venv.bak/lib/python3.7/site-packages/matplotlib/cbook/deprecation.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/cbook/deprecation.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/cbook/deprecation.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/cm.py b/venv.bak/lib/python3.7/site-packages/matplotlib/cm.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/cm.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/cm.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/collections.py b/venv.bak/lib/python3.7/site-packages/matplotlib/collections.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/collections.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/collections.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/colorbar.py b/venv.bak/lib/python3.7/site-packages/matplotlib/colorbar.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/colorbar.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/colorbar.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/colors.py b/venv.bak/lib/python3.7/site-packages/matplotlib/colors.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/colors.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/colors.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/compat/__init__.py b/venv.bak/lib/python3.7/site-packages/matplotlib/compat/__init__.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/compat/__init__.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/compat/__init__.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/compat/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/compat/__pycache__/__init__.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/compat/__pycache__/__init__.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/compat/__pycache__/__init__.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/container.py b/venv.bak/lib/python3.7/site-packages/matplotlib/container.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/container.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/container.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/contour.py b/venv.bak/lib/python3.7/site-packages/matplotlib/contour.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/contour.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/contour.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/dates.py b/venv.bak/lib/python3.7/site-packages/matplotlib/dates.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/dates.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/dates.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/docstring.py b/venv.bak/lib/python3.7/site-packages/matplotlib/docstring.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/docstring.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/docstring.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/dviread.py b/venv.bak/lib/python3.7/site-packages/matplotlib/dviread.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/dviread.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/dviread.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/figure.py b/venv.bak/lib/python3.7/site-packages/matplotlib/figure.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/figure.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/figure.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/font_manager.py b/venv.bak/lib/python3.7/site-packages/matplotlib/font_manager.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/font_manager.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/font_manager.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/fontconfig_pattern.py b/venv.bak/lib/python3.7/site-packages/matplotlib/fontconfig_pattern.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/fontconfig_pattern.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/fontconfig_pattern.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/ft2font.cpython-37m-darwin.so b/venv.bak/lib/python3.7/site-packages/matplotlib/ft2font.cpython-37m-darwin.so similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/ft2font.cpython-37m-darwin.so rename to venv.bak/lib/python3.7/site-packages/matplotlib/ft2font.cpython-37m-darwin.so diff --git a/venv/lib/python3.7/site-packages/matplotlib/gridspec.py b/venv.bak/lib/python3.7/site-packages/matplotlib/gridspec.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/gridspec.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/gridspec.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/hatch.py b/venv.bak/lib/python3.7/site-packages/matplotlib/hatch.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/hatch.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/hatch.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/image.py b/venv.bak/lib/python3.7/site-packages/matplotlib/image.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/image.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/image.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/legend.py b/venv.bak/lib/python3.7/site-packages/matplotlib/legend.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/legend.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/legend.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/legend_handler.py b/venv.bak/lib/python3.7/site-packages/matplotlib/legend_handler.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/legend_handler.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/legend_handler.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/lines.py b/venv.bak/lib/python3.7/site-packages/matplotlib/lines.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/lines.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/lines.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/markers.py b/venv.bak/lib/python3.7/site-packages/matplotlib/markers.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/markers.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/markers.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/mathtext.py b/venv.bak/lib/python3.7/site-packages/matplotlib/mathtext.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mathtext.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/mathtext.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/mlab.py b/venv.bak/lib/python3.7/site-packages/matplotlib/mlab.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mlab.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/mlab.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/cmex10.afm b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/cmex10.afm similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/cmex10.afm rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/cmex10.afm diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/cmmi10.afm b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/cmmi10.afm similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/cmmi10.afm rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/cmmi10.afm diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/cmr10.afm b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/cmr10.afm similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/cmr10.afm rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/cmr10.afm diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/cmsy10.afm b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/cmsy10.afm similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/cmsy10.afm rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/cmsy10.afm diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/cmtt10.afm b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/cmtt10.afm similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/cmtt10.afm rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/cmtt10.afm diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/pagd8a.afm b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/pagd8a.afm similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/pagd8a.afm rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/pagd8a.afm diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/pagdo8a.afm b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/pagdo8a.afm similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/pagdo8a.afm rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/pagdo8a.afm diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/pagk8a.afm b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/pagk8a.afm similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/pagk8a.afm rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/pagk8a.afm diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/pagko8a.afm b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/pagko8a.afm similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/pagko8a.afm rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/pagko8a.afm diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/pbkd8a.afm b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/pbkd8a.afm similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/pbkd8a.afm rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/pbkd8a.afm diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/pbkdi8a.afm b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/pbkdi8a.afm similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/pbkdi8a.afm rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/pbkdi8a.afm diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/pbkl8a.afm b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/pbkl8a.afm similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/pbkl8a.afm rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/pbkl8a.afm diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/pbkli8a.afm b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/pbkli8a.afm similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/pbkli8a.afm rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/pbkli8a.afm diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/pcrb8a.afm b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/pcrb8a.afm similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/pcrb8a.afm rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/pcrb8a.afm diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/pcrbo8a.afm b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/pcrbo8a.afm similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/pcrbo8a.afm rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/pcrbo8a.afm diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/pcrr8a.afm b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/pcrr8a.afm similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/pcrr8a.afm rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/pcrr8a.afm diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/pcrro8a.afm b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/pcrro8a.afm similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/pcrro8a.afm rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/pcrro8a.afm diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/phvb8a.afm b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/phvb8a.afm similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/phvb8a.afm rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/phvb8a.afm diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/phvb8an.afm b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/phvb8an.afm similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/phvb8an.afm rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/phvb8an.afm diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/phvbo8a.afm b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/phvbo8a.afm similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/phvbo8a.afm rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/phvbo8a.afm diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/phvbo8an.afm b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/phvbo8an.afm similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/phvbo8an.afm rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/phvbo8an.afm diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/phvl8a.afm b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/phvl8a.afm similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/phvl8a.afm rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/phvl8a.afm diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/phvlo8a.afm b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/phvlo8a.afm similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/phvlo8a.afm rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/phvlo8a.afm diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/phvr8a.afm b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/phvr8a.afm similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/phvr8a.afm rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/phvr8a.afm diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/phvr8an.afm b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/phvr8an.afm similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/phvr8an.afm rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/phvr8an.afm diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/phvro8a.afm b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/phvro8a.afm similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/phvro8a.afm rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/phvro8a.afm diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/phvro8an.afm b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/phvro8an.afm similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/phvro8an.afm rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/phvro8an.afm diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/pncb8a.afm b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/pncb8a.afm similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/pncb8a.afm rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/pncb8a.afm diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/pncbi8a.afm b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/pncbi8a.afm similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/pncbi8a.afm rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/pncbi8a.afm diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/pncr8a.afm b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/pncr8a.afm similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/pncr8a.afm rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/pncr8a.afm diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/pncri8a.afm b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/pncri8a.afm similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/pncri8a.afm rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/pncri8a.afm diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/pplb8a.afm b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/pplb8a.afm similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/pplb8a.afm rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/pplb8a.afm diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/pplbi8a.afm b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/pplbi8a.afm similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/pplbi8a.afm rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/pplbi8a.afm diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/pplr8a.afm b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/pplr8a.afm similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/pplr8a.afm rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/pplr8a.afm diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/pplri8a.afm b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/pplri8a.afm similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/pplri8a.afm rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/pplri8a.afm diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/psyr.afm b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/psyr.afm similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/psyr.afm rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/psyr.afm diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/ptmb8a.afm b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/ptmb8a.afm similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/ptmb8a.afm rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/ptmb8a.afm diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/ptmbi8a.afm b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/ptmbi8a.afm similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/ptmbi8a.afm rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/ptmbi8a.afm diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/ptmr8a.afm b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/ptmr8a.afm similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/ptmr8a.afm rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/ptmr8a.afm diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/ptmri8a.afm b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/ptmri8a.afm similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/ptmri8a.afm rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/ptmri8a.afm diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/putb8a.afm b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/putb8a.afm similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/putb8a.afm rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/putb8a.afm diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/putbi8a.afm b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/putbi8a.afm similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/putbi8a.afm rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/putbi8a.afm diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/putr8a.afm b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/putr8a.afm similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/putr8a.afm rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/putr8a.afm diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/putri8a.afm b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/putri8a.afm similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/putri8a.afm rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/putri8a.afm diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/pzcmi8a.afm b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/pzcmi8a.afm similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/pzcmi8a.afm rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/pzcmi8a.afm diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/pzdr.afm b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/pzdr.afm similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/pzdr.afm rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm/pzdr.afm diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/pdfcorefonts/Courier-Bold.afm b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/pdfcorefonts/Courier-Bold.afm similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/pdfcorefonts/Courier-Bold.afm rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/pdfcorefonts/Courier-Bold.afm diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/pdfcorefonts/Courier-BoldOblique.afm b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/pdfcorefonts/Courier-BoldOblique.afm similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/pdfcorefonts/Courier-BoldOblique.afm rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/pdfcorefonts/Courier-BoldOblique.afm diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/pdfcorefonts/Courier-Oblique.afm b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/pdfcorefonts/Courier-Oblique.afm similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/pdfcorefonts/Courier-Oblique.afm rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/pdfcorefonts/Courier-Oblique.afm diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/pdfcorefonts/Courier.afm b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/pdfcorefonts/Courier.afm similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/pdfcorefonts/Courier.afm rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/pdfcorefonts/Courier.afm diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/pdfcorefonts/Helvetica-Bold.afm b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/pdfcorefonts/Helvetica-Bold.afm similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/pdfcorefonts/Helvetica-Bold.afm rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/pdfcorefonts/Helvetica-Bold.afm diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/pdfcorefonts/Helvetica-BoldOblique.afm b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/pdfcorefonts/Helvetica-BoldOblique.afm similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/pdfcorefonts/Helvetica-BoldOblique.afm rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/pdfcorefonts/Helvetica-BoldOblique.afm diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/pdfcorefonts/Helvetica-Oblique.afm b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/pdfcorefonts/Helvetica-Oblique.afm similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/pdfcorefonts/Helvetica-Oblique.afm rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/pdfcorefonts/Helvetica-Oblique.afm diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/pdfcorefonts/Helvetica.afm b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/pdfcorefonts/Helvetica.afm similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/pdfcorefonts/Helvetica.afm rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/pdfcorefonts/Helvetica.afm diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/pdfcorefonts/Symbol.afm b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/pdfcorefonts/Symbol.afm similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/pdfcorefonts/Symbol.afm rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/pdfcorefonts/Symbol.afm diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/pdfcorefonts/Times-Bold.afm b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/pdfcorefonts/Times-Bold.afm similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/pdfcorefonts/Times-Bold.afm rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/pdfcorefonts/Times-Bold.afm diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/pdfcorefonts/Times-BoldItalic.afm b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/pdfcorefonts/Times-BoldItalic.afm similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/pdfcorefonts/Times-BoldItalic.afm rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/pdfcorefonts/Times-BoldItalic.afm diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/pdfcorefonts/Times-Italic.afm b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/pdfcorefonts/Times-Italic.afm similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/pdfcorefonts/Times-Italic.afm rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/pdfcorefonts/Times-Italic.afm diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/pdfcorefonts/Times-Roman.afm b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/pdfcorefonts/Times-Roman.afm similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/pdfcorefonts/Times-Roman.afm rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/pdfcorefonts/Times-Roman.afm diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/pdfcorefonts/ZapfDingbats.afm b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/pdfcorefonts/ZapfDingbats.afm similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/pdfcorefonts/ZapfDingbats.afm rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/pdfcorefonts/ZapfDingbats.afm diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/pdfcorefonts/readme.txt b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/pdfcorefonts/readme.txt similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/pdfcorefonts/readme.txt rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/pdfcorefonts/readme.txt diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/DejaVuSans-Bold.ttf b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/DejaVuSans-Bold.ttf similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/DejaVuSans-Bold.ttf rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/DejaVuSans-Bold.ttf diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/DejaVuSans-BoldOblique.ttf b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/DejaVuSans-BoldOblique.ttf similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/DejaVuSans-BoldOblique.ttf rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/DejaVuSans-BoldOblique.ttf diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/DejaVuSans-Oblique.ttf b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/DejaVuSans-Oblique.ttf similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/DejaVuSans-Oblique.ttf rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/DejaVuSans-Oblique.ttf diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/DejaVuSans.ttf b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/DejaVuSans.ttf similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/DejaVuSans.ttf rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/DejaVuSans.ttf diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/DejaVuSansDisplay.ttf b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/DejaVuSansDisplay.ttf similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/DejaVuSansDisplay.ttf rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/DejaVuSansDisplay.ttf diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/DejaVuSansMono-Bold.ttf b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/DejaVuSansMono-Bold.ttf similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/DejaVuSansMono-Bold.ttf rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/DejaVuSansMono-Bold.ttf diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/DejaVuSansMono-BoldOblique.ttf b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/DejaVuSansMono-BoldOblique.ttf similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/DejaVuSansMono-BoldOblique.ttf rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/DejaVuSansMono-BoldOblique.ttf diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/DejaVuSansMono-Oblique.ttf b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/DejaVuSansMono-Oblique.ttf similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/DejaVuSansMono-Oblique.ttf rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/DejaVuSansMono-Oblique.ttf diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/DejaVuSansMono.ttf b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/DejaVuSansMono.ttf similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/DejaVuSansMono.ttf rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/DejaVuSansMono.ttf diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/DejaVuSerif-Bold.ttf b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/DejaVuSerif-Bold.ttf similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/DejaVuSerif-Bold.ttf rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/DejaVuSerif-Bold.ttf diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/DejaVuSerif-BoldItalic.ttf b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/DejaVuSerif-BoldItalic.ttf similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/DejaVuSerif-BoldItalic.ttf rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/DejaVuSerif-BoldItalic.ttf diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/DejaVuSerif-Italic.ttf b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/DejaVuSerif-Italic.ttf similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/DejaVuSerif-Italic.ttf rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/DejaVuSerif-Italic.ttf diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/DejaVuSerif.ttf b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/DejaVuSerif.ttf similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/DejaVuSerif.ttf rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/DejaVuSerif.ttf diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/DejaVuSerifDisplay.ttf b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/DejaVuSerifDisplay.ttf similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/DejaVuSerifDisplay.ttf rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/DejaVuSerifDisplay.ttf diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/LICENSE_DEJAVU b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/LICENSE_DEJAVU similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/LICENSE_DEJAVU rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/LICENSE_DEJAVU diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/LICENSE_STIX b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/LICENSE_STIX similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/LICENSE_STIX rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/LICENSE_STIX diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/STIXGeneral.ttf b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/STIXGeneral.ttf similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/STIXGeneral.ttf rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/STIXGeneral.ttf diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/STIXGeneralBol.ttf b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/STIXGeneralBol.ttf similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/STIXGeneralBol.ttf rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/STIXGeneralBol.ttf diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/STIXGeneralBolIta.ttf b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/STIXGeneralBolIta.ttf similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/STIXGeneralBolIta.ttf rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/STIXGeneralBolIta.ttf diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/STIXGeneralItalic.ttf b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/STIXGeneralItalic.ttf similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/STIXGeneralItalic.ttf rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/STIXGeneralItalic.ttf diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/STIXNonUni.ttf b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/STIXNonUni.ttf similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/STIXNonUni.ttf rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/STIXNonUni.ttf diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/STIXNonUniBol.ttf b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/STIXNonUniBol.ttf similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/STIXNonUniBol.ttf rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/STIXNonUniBol.ttf diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/STIXNonUniBolIta.ttf b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/STIXNonUniBolIta.ttf similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/STIXNonUniBolIta.ttf rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/STIXNonUniBolIta.ttf diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/STIXNonUniIta.ttf b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/STIXNonUniIta.ttf similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/STIXNonUniIta.ttf rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/STIXNonUniIta.ttf diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/STIXSizFiveSymReg.ttf b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/STIXSizFiveSymReg.ttf similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/STIXSizFiveSymReg.ttf rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/STIXSizFiveSymReg.ttf diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/STIXSizFourSymBol.ttf b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/STIXSizFourSymBol.ttf similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/STIXSizFourSymBol.ttf rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/STIXSizFourSymBol.ttf diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/STIXSizFourSymReg.ttf b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/STIXSizFourSymReg.ttf similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/STIXSizFourSymReg.ttf rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/STIXSizFourSymReg.ttf diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/STIXSizOneSymBol.ttf b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/STIXSizOneSymBol.ttf similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/STIXSizOneSymBol.ttf rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/STIXSizOneSymBol.ttf diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/STIXSizOneSymReg.ttf b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/STIXSizOneSymReg.ttf similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/STIXSizOneSymReg.ttf rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/STIXSizOneSymReg.ttf diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/STIXSizThreeSymBol.ttf b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/STIXSizThreeSymBol.ttf similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/STIXSizThreeSymBol.ttf rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/STIXSizThreeSymBol.ttf diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/STIXSizThreeSymReg.ttf b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/STIXSizThreeSymReg.ttf similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/STIXSizThreeSymReg.ttf rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/STIXSizThreeSymReg.ttf diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/STIXSizTwoSymBol.ttf b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/STIXSizTwoSymBol.ttf similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/STIXSizTwoSymBol.ttf rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/STIXSizTwoSymBol.ttf diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/STIXSizTwoSymReg.ttf b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/STIXSizTwoSymReg.ttf similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/STIXSizTwoSymReg.ttf rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/STIXSizTwoSymReg.ttf diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/cmb10.ttf b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/cmb10.ttf similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/cmb10.ttf rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/cmb10.ttf diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/cmex10.ttf b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/cmex10.ttf similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/cmex10.ttf rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/cmex10.ttf diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/cmmi10.ttf b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/cmmi10.ttf similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/cmmi10.ttf rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/cmmi10.ttf diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/cmr10.ttf b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/cmr10.ttf similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/cmr10.ttf rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/cmr10.ttf diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/cmss10.ttf b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/cmss10.ttf similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/cmss10.ttf rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/cmss10.ttf diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/cmsy10.ttf b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/cmsy10.ttf similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/cmsy10.ttf rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/cmsy10.ttf diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/cmtt10.ttf b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/cmtt10.ttf similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/cmtt10.ttf rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/cmtt10.ttf diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/images/back.gif b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/images/back.gif similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/images/back.gif rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/images/back.gif diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/images/back.pdf b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/images/back.pdf similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/images/back.pdf rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/images/back.pdf diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/images/back.png b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/images/back.png similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/images/back.png rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/images/back.png diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/images/back.svg b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/images/back.svg similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/images/back.svg rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/images/back.svg diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/images/back_large.gif b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/images/back_large.gif similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/images/back_large.gif rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/images/back_large.gif diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/images/back_large.png b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/images/back_large.png similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/images/back_large.png rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/images/back_large.png diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/images/filesave.gif b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/images/filesave.gif similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/images/filesave.gif rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/images/filesave.gif diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/images/filesave.pdf b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/images/filesave.pdf similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/images/filesave.pdf rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/images/filesave.pdf diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/images/filesave.png b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/images/filesave.png similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/images/filesave.png rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/images/filesave.png diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/images/filesave.svg b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/images/filesave.svg similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/images/filesave.svg rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/images/filesave.svg diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/images/filesave_large.gif b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/images/filesave_large.gif similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/images/filesave_large.gif rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/images/filesave_large.gif diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/images/filesave_large.png b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/images/filesave_large.png similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/images/filesave_large.png rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/images/filesave_large.png diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/images/forward.gif b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/images/forward.gif similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/images/forward.gif rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/images/forward.gif diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/images/forward.pdf b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/images/forward.pdf similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/images/forward.pdf rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/images/forward.pdf diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/images/forward.png b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/images/forward.png similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/images/forward.png rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/images/forward.png diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/images/forward.svg b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/images/forward.svg similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/images/forward.svg rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/images/forward.svg diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/images/forward_large.gif b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/images/forward_large.gif similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/images/forward_large.gif rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/images/forward_large.gif diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/images/forward_large.png b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/images/forward_large.png similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/images/forward_large.png rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/images/forward_large.png diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/images/hand.gif b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/images/hand.gif similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/images/hand.gif rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/images/hand.gif diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/images/hand.pdf b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/images/hand.pdf similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/images/hand.pdf rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/images/hand.pdf diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/images/hand.png b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/images/hand.png similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/images/hand.png rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/images/hand.png diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/images/hand.svg b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/images/hand.svg similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/images/hand.svg rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/images/hand.svg diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/images/hand_large.gif b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/images/hand_large.gif similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/images/hand_large.gif rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/images/hand_large.gif diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/images/help.pdf b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/images/help.pdf similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/images/help.pdf rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/images/help.pdf diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/images/help.png b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/images/help.png similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/images/help.png rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/images/help.png diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/images/help.ppm b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/images/help.ppm similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/images/help.ppm rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/images/help.ppm diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/images/help.svg b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/images/help.svg similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/images/help.svg rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/images/help.svg diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/images/help_large.png b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/images/help_large.png similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/images/help_large.png rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/images/help_large.png diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/images/help_large.ppm b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/images/help_large.ppm similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/images/help_large.ppm rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/images/help_large.ppm diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/images/home.gif b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/images/home.gif similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/images/home.gif rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/images/home.gif diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/images/home.pdf b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/images/home.pdf similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/images/home.pdf rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/images/home.pdf diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/images/home.png b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/images/home.png similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/images/home.png rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/images/home.png diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/images/home.svg b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/images/home.svg similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/images/home.svg rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/images/home.svg diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/images/home_large.gif b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/images/home_large.gif similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/images/home_large.gif rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/images/home_large.gif diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/images/home_large.png b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/images/home_large.png similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/images/home_large.png rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/images/home_large.png diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/images/matplotlib.pdf b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/images/matplotlib.pdf similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/images/matplotlib.pdf rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/images/matplotlib.pdf diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/images/matplotlib.png b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/images/matplotlib.png similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/images/matplotlib.png rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/images/matplotlib.png diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/images/matplotlib.svg b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/images/matplotlib.svg similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/images/matplotlib.svg rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/images/matplotlib.svg diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/images/matplotlib_128.ppm b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/images/matplotlib_128.ppm similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/images/matplotlib_128.ppm rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/images/matplotlib_128.ppm diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/images/matplotlib_large.png b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/images/matplotlib_large.png similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/images/matplotlib_large.png rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/images/matplotlib_large.png diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/images/move.gif b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/images/move.gif similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/images/move.gif rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/images/move.gif diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/images/move.pdf b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/images/move.pdf similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/images/move.pdf rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/images/move.pdf diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/images/move.png b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/images/move.png similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/images/move.png rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/images/move.png diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/images/move.svg b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/images/move.svg similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/images/move.svg rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/images/move.svg diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/images/move_large.gif b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/images/move_large.gif similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/images/move_large.gif rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/images/move_large.gif diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/images/move_large.png b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/images/move_large.png similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/images/move_large.png rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/images/move_large.png diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/images/qt4_editor_options.pdf b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/images/qt4_editor_options.pdf similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/images/qt4_editor_options.pdf rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/images/qt4_editor_options.pdf diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/images/qt4_editor_options.png b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/images/qt4_editor_options.png similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/images/qt4_editor_options.png rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/images/qt4_editor_options.png diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/images/qt4_editor_options.svg b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/images/qt4_editor_options.svg similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/images/qt4_editor_options.svg rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/images/qt4_editor_options.svg diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/images/qt4_editor_options_large.png b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/images/qt4_editor_options_large.png similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/images/qt4_editor_options_large.png rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/images/qt4_editor_options_large.png diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/images/subplots.gif b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/images/subplots.gif similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/images/subplots.gif rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/images/subplots.gif diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/images/subplots.pdf b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/images/subplots.pdf similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/images/subplots.pdf rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/images/subplots.pdf diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/images/subplots.png b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/images/subplots.png similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/images/subplots.png rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/images/subplots.png diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/images/subplots.svg b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/images/subplots.svg similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/images/subplots.svg rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/images/subplots.svg diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/images/subplots_large.gif b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/images/subplots_large.gif similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/images/subplots_large.gif rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/images/subplots_large.gif diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/images/subplots_large.png b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/images/subplots_large.png similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/images/subplots_large.png rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/images/subplots_large.png diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/images/zoom_to_rect.gif b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/images/zoom_to_rect.gif similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/images/zoom_to_rect.gif rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/images/zoom_to_rect.gif diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/images/zoom_to_rect.pdf b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/images/zoom_to_rect.pdf similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/images/zoom_to_rect.pdf rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/images/zoom_to_rect.pdf diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/images/zoom_to_rect.png b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/images/zoom_to_rect.png similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/images/zoom_to_rect.png rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/images/zoom_to_rect.png diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/images/zoom_to_rect.svg b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/images/zoom_to_rect.svg similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/images/zoom_to_rect.svg rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/images/zoom_to_rect.svg diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/images/zoom_to_rect_large.gif b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/images/zoom_to_rect_large.gif similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/images/zoom_to_rect_large.gif rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/images/zoom_to_rect_large.gif diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/images/zoom_to_rect_large.png b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/images/zoom_to_rect_large.png similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/images/zoom_to_rect_large.png rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/images/zoom_to_rect_large.png diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/matplotlibrc b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/matplotlibrc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/matplotlibrc rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/matplotlibrc diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/sample_data/Minduka_Present_Blue_Pack.png b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/sample_data/Minduka_Present_Blue_Pack.png similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/sample_data/Minduka_Present_Blue_Pack.png rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/sample_data/Minduka_Present_Blue_Pack.png diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/sample_data/None_vs_nearest-pdf.png b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/sample_data/None_vs_nearest-pdf.png similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/sample_data/None_vs_nearest-pdf.png rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/sample_data/None_vs_nearest-pdf.png diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/sample_data/README.txt b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/sample_data/README.txt similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/sample_data/README.txt rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/sample_data/README.txt diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/sample_data/aapl.npz b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/sample_data/aapl.npz similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/sample_data/aapl.npz rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/sample_data/aapl.npz diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/sample_data/ada.png b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/sample_data/ada.png similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/sample_data/ada.png rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/sample_data/ada.png diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/sample_data/axes_grid/bivariate_normal.npy b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/sample_data/axes_grid/bivariate_normal.npy similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/sample_data/axes_grid/bivariate_normal.npy rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/sample_data/axes_grid/bivariate_normal.npy diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/sample_data/ct.raw.gz b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/sample_data/ct.raw.gz similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/sample_data/ct.raw.gz rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/sample_data/ct.raw.gz diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/sample_data/data_x_x2_x3.csv b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/sample_data/data_x_x2_x3.csv similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/sample_data/data_x_x2_x3.csv rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/sample_data/data_x_x2_x3.csv diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/sample_data/demodata.csv b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/sample_data/demodata.csv similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/sample_data/demodata.csv rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/sample_data/demodata.csv diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/sample_data/eeg.dat b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/sample_data/eeg.dat similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/sample_data/eeg.dat rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/sample_data/eeg.dat diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/sample_data/embedding_in_wx3.xrc b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/sample_data/embedding_in_wx3.xrc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/sample_data/embedding_in_wx3.xrc rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/sample_data/embedding_in_wx3.xrc diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/sample_data/goog.npz b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/sample_data/goog.npz similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/sample_data/goog.npz rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/sample_data/goog.npz diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/sample_data/grace_hopper.jpg b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/sample_data/grace_hopper.jpg similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/sample_data/grace_hopper.jpg rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/sample_data/grace_hopper.jpg diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/sample_data/grace_hopper.png b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/sample_data/grace_hopper.png similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/sample_data/grace_hopper.png rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/sample_data/grace_hopper.png diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/sample_data/jacksboro_fault_dem.npz b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/sample_data/jacksboro_fault_dem.npz similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/sample_data/jacksboro_fault_dem.npz rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/sample_data/jacksboro_fault_dem.npz diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/sample_data/logo2.png b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/sample_data/logo2.png similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/sample_data/logo2.png rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/sample_data/logo2.png diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/sample_data/membrane.dat b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/sample_data/membrane.dat similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/sample_data/membrane.dat rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/sample_data/membrane.dat diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/sample_data/msft.csv b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/sample_data/msft.csv similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/sample_data/msft.csv rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/sample_data/msft.csv diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/sample_data/percent_bachelors_degrees_women_usa.csv b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/sample_data/percent_bachelors_degrees_women_usa.csv similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/sample_data/percent_bachelors_degrees_women_usa.csv rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/sample_data/percent_bachelors_degrees_women_usa.csv diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/sample_data/s1045.ima.gz b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/sample_data/s1045.ima.gz similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/sample_data/s1045.ima.gz rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/sample_data/s1045.ima.gz diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/sample_data/topobathy.npz b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/sample_data/topobathy.npz similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/sample_data/topobathy.npz rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/sample_data/topobathy.npz diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/stylelib/Solarize_Light2.mplstyle b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/stylelib/Solarize_Light2.mplstyle similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/stylelib/Solarize_Light2.mplstyle rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/stylelib/Solarize_Light2.mplstyle diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/stylelib/_classic_test_patch.mplstyle b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/stylelib/_classic_test_patch.mplstyle similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/stylelib/_classic_test_patch.mplstyle rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/stylelib/_classic_test_patch.mplstyle diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/stylelib/bmh.mplstyle b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/stylelib/bmh.mplstyle similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/stylelib/bmh.mplstyle rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/stylelib/bmh.mplstyle diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/stylelib/classic.mplstyle b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/stylelib/classic.mplstyle similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/stylelib/classic.mplstyle rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/stylelib/classic.mplstyle diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/stylelib/dark_background.mplstyle b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/stylelib/dark_background.mplstyle similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/stylelib/dark_background.mplstyle rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/stylelib/dark_background.mplstyle diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/stylelib/fast.mplstyle b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/stylelib/fast.mplstyle similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/stylelib/fast.mplstyle rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/stylelib/fast.mplstyle diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/stylelib/fivethirtyeight.mplstyle b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/stylelib/fivethirtyeight.mplstyle similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/stylelib/fivethirtyeight.mplstyle rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/stylelib/fivethirtyeight.mplstyle diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/stylelib/ggplot.mplstyle b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/stylelib/ggplot.mplstyle similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/stylelib/ggplot.mplstyle rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/stylelib/ggplot.mplstyle diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/stylelib/grayscale.mplstyle b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/stylelib/grayscale.mplstyle similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/stylelib/grayscale.mplstyle rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/stylelib/grayscale.mplstyle diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/stylelib/seaborn-bright.mplstyle b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/stylelib/seaborn-bright.mplstyle similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/stylelib/seaborn-bright.mplstyle rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/stylelib/seaborn-bright.mplstyle diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/stylelib/seaborn-colorblind.mplstyle b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/stylelib/seaborn-colorblind.mplstyle similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/stylelib/seaborn-colorblind.mplstyle rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/stylelib/seaborn-colorblind.mplstyle diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/stylelib/seaborn-dark-palette.mplstyle b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/stylelib/seaborn-dark-palette.mplstyle similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/stylelib/seaborn-dark-palette.mplstyle rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/stylelib/seaborn-dark-palette.mplstyle diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/stylelib/seaborn-dark.mplstyle b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/stylelib/seaborn-dark.mplstyle similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/stylelib/seaborn-dark.mplstyle rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/stylelib/seaborn-dark.mplstyle diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/stylelib/seaborn-darkgrid.mplstyle b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/stylelib/seaborn-darkgrid.mplstyle similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/stylelib/seaborn-darkgrid.mplstyle rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/stylelib/seaborn-darkgrid.mplstyle diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/stylelib/seaborn-deep.mplstyle b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/stylelib/seaborn-deep.mplstyle similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/stylelib/seaborn-deep.mplstyle rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/stylelib/seaborn-deep.mplstyle diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/stylelib/seaborn-muted.mplstyle b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/stylelib/seaborn-muted.mplstyle similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/stylelib/seaborn-muted.mplstyle rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/stylelib/seaborn-muted.mplstyle diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/stylelib/seaborn-notebook.mplstyle b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/stylelib/seaborn-notebook.mplstyle similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/stylelib/seaborn-notebook.mplstyle rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/stylelib/seaborn-notebook.mplstyle diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/stylelib/seaborn-paper.mplstyle b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/stylelib/seaborn-paper.mplstyle similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/stylelib/seaborn-paper.mplstyle rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/stylelib/seaborn-paper.mplstyle diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/stylelib/seaborn-pastel.mplstyle b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/stylelib/seaborn-pastel.mplstyle similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/stylelib/seaborn-pastel.mplstyle rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/stylelib/seaborn-pastel.mplstyle diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/stylelib/seaborn-poster.mplstyle b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/stylelib/seaborn-poster.mplstyle similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/stylelib/seaborn-poster.mplstyle rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/stylelib/seaborn-poster.mplstyle diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/stylelib/seaborn-talk.mplstyle b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/stylelib/seaborn-talk.mplstyle similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/stylelib/seaborn-talk.mplstyle rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/stylelib/seaborn-talk.mplstyle diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/stylelib/seaborn-ticks.mplstyle b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/stylelib/seaborn-ticks.mplstyle similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/stylelib/seaborn-ticks.mplstyle rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/stylelib/seaborn-ticks.mplstyle diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/stylelib/seaborn-white.mplstyle b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/stylelib/seaborn-white.mplstyle similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/stylelib/seaborn-white.mplstyle rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/stylelib/seaborn-white.mplstyle diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/stylelib/seaborn-whitegrid.mplstyle b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/stylelib/seaborn-whitegrid.mplstyle similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/stylelib/seaborn-whitegrid.mplstyle rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/stylelib/seaborn-whitegrid.mplstyle diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/stylelib/seaborn.mplstyle b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/stylelib/seaborn.mplstyle similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/stylelib/seaborn.mplstyle rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/stylelib/seaborn.mplstyle diff --git a/venv/lib/python3.7/site-packages/matplotlib/mpl-data/stylelib/tableau-colorblind10.mplstyle b/venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/stylelib/tableau-colorblind10.mplstyle similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/mpl-data/stylelib/tableau-colorblind10.mplstyle rename to venv.bak/lib/python3.7/site-packages/matplotlib/mpl-data/stylelib/tableau-colorblind10.mplstyle diff --git a/venv/lib/python3.7/site-packages/matplotlib/offsetbox.py b/venv.bak/lib/python3.7/site-packages/matplotlib/offsetbox.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/offsetbox.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/offsetbox.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/patches.py b/venv.bak/lib/python3.7/site-packages/matplotlib/patches.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/patches.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/patches.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/path.py b/venv.bak/lib/python3.7/site-packages/matplotlib/path.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/path.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/path.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/patheffects.py b/venv.bak/lib/python3.7/site-packages/matplotlib/patheffects.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/patheffects.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/patheffects.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/projections/__init__.py b/venv.bak/lib/python3.7/site-packages/matplotlib/projections/__init__.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/projections/__init__.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/projections/__init__.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/projections/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/projections/__pycache__/__init__.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/projections/__pycache__/__init__.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/projections/__pycache__/__init__.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/projections/__pycache__/geo.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/projections/__pycache__/geo.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/projections/__pycache__/geo.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/projections/__pycache__/geo.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/projections/__pycache__/polar.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/projections/__pycache__/polar.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/projections/__pycache__/polar.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/projections/__pycache__/polar.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/projections/geo.py b/venv.bak/lib/python3.7/site-packages/matplotlib/projections/geo.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/projections/geo.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/projections/geo.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/projections/polar.py b/venv.bak/lib/python3.7/site-packages/matplotlib/projections/polar.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/projections/polar.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/projections/polar.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/pylab.py b/venv.bak/lib/python3.7/site-packages/matplotlib/pylab.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/pylab.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/pylab.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/pyplot.py b/venv.bak/lib/python3.7/site-packages/matplotlib/pyplot.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/pyplot.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/pyplot.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/quiver.py b/venv.bak/lib/python3.7/site-packages/matplotlib/quiver.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/quiver.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/quiver.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/rcsetup.py b/venv.bak/lib/python3.7/site-packages/matplotlib/rcsetup.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/rcsetup.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/rcsetup.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/sankey.py b/venv.bak/lib/python3.7/site-packages/matplotlib/sankey.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/sankey.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/sankey.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/scale.py b/venv.bak/lib/python3.7/site-packages/matplotlib/scale.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/scale.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/scale.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/sphinxext/__init__.py b/venv.bak/lib/python3.7/site-packages/matplotlib/sphinxext/__init__.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/sphinxext/__init__.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/sphinxext/__init__.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/sphinxext/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/sphinxext/__pycache__/__init__.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/sphinxext/__pycache__/__init__.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/sphinxext/__pycache__/__init__.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/sphinxext/__pycache__/mathmpl.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/sphinxext/__pycache__/mathmpl.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/sphinxext/__pycache__/mathmpl.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/sphinxext/__pycache__/mathmpl.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/sphinxext/__pycache__/plot_directive.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/sphinxext/__pycache__/plot_directive.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/sphinxext/__pycache__/plot_directive.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/sphinxext/__pycache__/plot_directive.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/sphinxext/mathmpl.py b/venv.bak/lib/python3.7/site-packages/matplotlib/sphinxext/mathmpl.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/sphinxext/mathmpl.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/sphinxext/mathmpl.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/sphinxext/plot_directive.py b/venv.bak/lib/python3.7/site-packages/matplotlib/sphinxext/plot_directive.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/sphinxext/plot_directive.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/sphinxext/plot_directive.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/spines.py b/venv.bak/lib/python3.7/site-packages/matplotlib/spines.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/spines.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/spines.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/stackplot.py b/venv.bak/lib/python3.7/site-packages/matplotlib/stackplot.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/stackplot.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/stackplot.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/streamplot.py b/venv.bak/lib/python3.7/site-packages/matplotlib/streamplot.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/streamplot.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/streamplot.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/style/__init__.py b/venv.bak/lib/python3.7/site-packages/matplotlib/style/__init__.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/style/__init__.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/style/__init__.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/style/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/style/__pycache__/__init__.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/style/__pycache__/__init__.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/style/__pycache__/__init__.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/style/__pycache__/core.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/style/__pycache__/core.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/style/__pycache__/core.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/style/__pycache__/core.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/style/core.py b/venv.bak/lib/python3.7/site-packages/matplotlib/style/core.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/style/core.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/style/core.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/table.py b/venv.bak/lib/python3.7/site-packages/matplotlib/table.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/table.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/table.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/testing/__init__.py b/venv.bak/lib/python3.7/site-packages/matplotlib/testing/__init__.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/testing/__init__.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/testing/__init__.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/testing/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/testing/__pycache__/__init__.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/testing/__pycache__/__init__.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/testing/__pycache__/__init__.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/testing/__pycache__/compare.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/testing/__pycache__/compare.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/testing/__pycache__/compare.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/testing/__pycache__/compare.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/testing/__pycache__/conftest.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/testing/__pycache__/conftest.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/testing/__pycache__/conftest.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/testing/__pycache__/conftest.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/testing/__pycache__/decorators.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/testing/__pycache__/decorators.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/testing/__pycache__/decorators.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/testing/__pycache__/decorators.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/testing/__pycache__/disable_internet.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/testing/__pycache__/disable_internet.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/testing/__pycache__/disable_internet.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/testing/__pycache__/disable_internet.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/testing/__pycache__/exceptions.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/testing/__pycache__/exceptions.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/testing/__pycache__/exceptions.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/testing/__pycache__/exceptions.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/testing/compare.py b/venv.bak/lib/python3.7/site-packages/matplotlib/testing/compare.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/testing/compare.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/testing/compare.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/testing/conftest.py b/venv.bak/lib/python3.7/site-packages/matplotlib/testing/conftest.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/testing/conftest.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/testing/conftest.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/testing/decorators.py b/venv.bak/lib/python3.7/site-packages/matplotlib/testing/decorators.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/testing/decorators.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/testing/decorators.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/testing/disable_internet.py b/venv.bak/lib/python3.7/site-packages/matplotlib/testing/disable_internet.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/testing/disable_internet.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/testing/disable_internet.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/testing/exceptions.py b/venv.bak/lib/python3.7/site-packages/matplotlib/testing/exceptions.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/testing/exceptions.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/testing/exceptions.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/testing/jpl_units/Duration.py b/venv.bak/lib/python3.7/site-packages/matplotlib/testing/jpl_units/Duration.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/testing/jpl_units/Duration.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/testing/jpl_units/Duration.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/testing/jpl_units/Epoch.py b/venv.bak/lib/python3.7/site-packages/matplotlib/testing/jpl_units/Epoch.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/testing/jpl_units/Epoch.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/testing/jpl_units/Epoch.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/testing/jpl_units/EpochConverter.py b/venv.bak/lib/python3.7/site-packages/matplotlib/testing/jpl_units/EpochConverter.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/testing/jpl_units/EpochConverter.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/testing/jpl_units/EpochConverter.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/testing/jpl_units/StrConverter.py b/venv.bak/lib/python3.7/site-packages/matplotlib/testing/jpl_units/StrConverter.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/testing/jpl_units/StrConverter.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/testing/jpl_units/StrConverter.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/testing/jpl_units/UnitDbl.py b/venv.bak/lib/python3.7/site-packages/matplotlib/testing/jpl_units/UnitDbl.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/testing/jpl_units/UnitDbl.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/testing/jpl_units/UnitDbl.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/testing/jpl_units/UnitDblConverter.py b/venv.bak/lib/python3.7/site-packages/matplotlib/testing/jpl_units/UnitDblConverter.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/testing/jpl_units/UnitDblConverter.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/testing/jpl_units/UnitDblConverter.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/testing/jpl_units/UnitDblFormatter.py b/venv.bak/lib/python3.7/site-packages/matplotlib/testing/jpl_units/UnitDblFormatter.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/testing/jpl_units/UnitDblFormatter.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/testing/jpl_units/UnitDblFormatter.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/testing/jpl_units/__init__.py b/venv.bak/lib/python3.7/site-packages/matplotlib/testing/jpl_units/__init__.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/testing/jpl_units/__init__.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/testing/jpl_units/__init__.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/testing/jpl_units/__pycache__/Duration.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/testing/jpl_units/__pycache__/Duration.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/testing/jpl_units/__pycache__/Duration.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/testing/jpl_units/__pycache__/Duration.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/testing/jpl_units/__pycache__/Epoch.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/testing/jpl_units/__pycache__/Epoch.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/testing/jpl_units/__pycache__/Epoch.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/testing/jpl_units/__pycache__/Epoch.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/testing/jpl_units/__pycache__/EpochConverter.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/testing/jpl_units/__pycache__/EpochConverter.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/testing/jpl_units/__pycache__/EpochConverter.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/testing/jpl_units/__pycache__/EpochConverter.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/testing/jpl_units/__pycache__/StrConverter.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/testing/jpl_units/__pycache__/StrConverter.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/testing/jpl_units/__pycache__/StrConverter.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/testing/jpl_units/__pycache__/StrConverter.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/testing/jpl_units/__pycache__/UnitDbl.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/testing/jpl_units/__pycache__/UnitDbl.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/testing/jpl_units/__pycache__/UnitDbl.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/testing/jpl_units/__pycache__/UnitDbl.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/testing/jpl_units/__pycache__/UnitDblConverter.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/testing/jpl_units/__pycache__/UnitDblConverter.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/testing/jpl_units/__pycache__/UnitDblConverter.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/testing/jpl_units/__pycache__/UnitDblConverter.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/testing/jpl_units/__pycache__/UnitDblFormatter.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/testing/jpl_units/__pycache__/UnitDblFormatter.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/testing/jpl_units/__pycache__/UnitDblFormatter.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/testing/jpl_units/__pycache__/UnitDblFormatter.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/testing/jpl_units/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/testing/jpl_units/__pycache__/__init__.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/testing/jpl_units/__pycache__/__init__.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/testing/jpl_units/__pycache__/__init__.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/__init__.py b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/__init__.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/__init__.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/__init__.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/__init__.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/__init__.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/__init__.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/conftest.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/conftest.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/conftest.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/conftest.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_afm.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_afm.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_afm.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_afm.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_agg.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_agg.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_agg.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_agg.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_agg_filter.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_agg_filter.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_agg_filter.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_agg_filter.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_animation.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_animation.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_animation.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_animation.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_arrow_patches.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_arrow_patches.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_arrow_patches.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_arrow_patches.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_artist.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_artist.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_artist.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_artist.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_axes.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_axes.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_axes.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_axes.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_backend_bases.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_backend_bases.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_backend_bases.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_backend_bases.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_backend_cairo.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_backend_cairo.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_backend_cairo.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_backend_cairo.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_backend_nbagg.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_backend_nbagg.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_backend_nbagg.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_backend_nbagg.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_backend_pdf.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_backend_pdf.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_backend_pdf.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_backend_pdf.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_backend_pgf.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_backend_pgf.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_backend_pgf.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_backend_pgf.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_backend_ps.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_backend_ps.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_backend_ps.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_backend_ps.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_backend_qt.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_backend_qt.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_backend_qt.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_backend_qt.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_backend_svg.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_backend_svg.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_backend_svg.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_backend_svg.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_backend_tk.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_backend_tk.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_backend_tk.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_backend_tk.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_backend_tools.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_backend_tools.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_backend_tools.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_backend_tools.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_backend_webagg.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_backend_webagg.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_backend_webagg.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_backend_webagg.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_backends_interactive.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_backends_interactive.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_backends_interactive.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_backends_interactive.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_basic.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_basic.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_basic.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_basic.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_bbox_tight.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_bbox_tight.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_bbox_tight.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_bbox_tight.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_category.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_category.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_category.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_category.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_cbook.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_cbook.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_cbook.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_cbook.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_collections.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_collections.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_collections.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_collections.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_colorbar.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_colorbar.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_colorbar.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_colorbar.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_colors.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_colors.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_colors.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_colors.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_compare_images.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_compare_images.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_compare_images.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_compare_images.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_constrainedlayout.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_constrainedlayout.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_constrainedlayout.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_constrainedlayout.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_container.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_container.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_container.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_container.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_contour.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_contour.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_contour.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_contour.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_cycles.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_cycles.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_cycles.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_cycles.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_dates.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_dates.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_dates.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_dates.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_determinism.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_determinism.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_determinism.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_determinism.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_dviread.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_dviread.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_dviread.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_dviread.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_figure.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_figure.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_figure.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_figure.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_font_manager.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_font_manager.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_font_manager.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_font_manager.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_fontconfig_pattern.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_fontconfig_pattern.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_fontconfig_pattern.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_fontconfig_pattern.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_gridspec.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_gridspec.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_gridspec.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_gridspec.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_image.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_image.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_image.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_image.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_legend.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_legend.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_legend.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_legend.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_lines.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_lines.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_lines.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_lines.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_marker.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_marker.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_marker.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_marker.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_mathtext.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_mathtext.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_mathtext.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_mathtext.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_matplotlib.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_matplotlib.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_matplotlib.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_matplotlib.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_mlab.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_mlab.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_mlab.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_mlab.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_offsetbox.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_offsetbox.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_offsetbox.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_offsetbox.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_patches.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_patches.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_patches.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_patches.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_path.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_path.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_path.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_path.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_patheffects.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_patheffects.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_patheffects.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_patheffects.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_pickle.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_pickle.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_pickle.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_pickle.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_png.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_png.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_png.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_png.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_preprocess_data.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_preprocess_data.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_preprocess_data.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_preprocess_data.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_pyplot.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_pyplot.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_pyplot.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_pyplot.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_quiver.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_quiver.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_quiver.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_quiver.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_rcparams.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_rcparams.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_rcparams.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_rcparams.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_sankey.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_sankey.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_sankey.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_sankey.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_scale.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_scale.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_scale.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_scale.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_simplification.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_simplification.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_simplification.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_simplification.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_skew.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_skew.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_skew.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_skew.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_sphinxext.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_sphinxext.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_sphinxext.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_sphinxext.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_spines.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_spines.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_spines.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_spines.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_streamplot.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_streamplot.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_streamplot.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_streamplot.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_style.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_style.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_style.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_style.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_subplots.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_subplots.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_subplots.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_subplots.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_table.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_table.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_table.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_table.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_testing.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_testing.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_testing.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_testing.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_texmanager.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_texmanager.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_texmanager.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_texmanager.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_text.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_text.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_text.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_text.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_ticker.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_ticker.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_ticker.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_ticker.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_tightlayout.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_tightlayout.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_tightlayout.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_tightlayout.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_transforms.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_transforms.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_transforms.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_transforms.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_triangulation.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_triangulation.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_triangulation.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_triangulation.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_ttconv.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_ttconv.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_ttconv.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_ttconv.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_type1font.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_type1font.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_type1font.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_type1font.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_units.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_units.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_units.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_units.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_usetex.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_usetex.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_usetex.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_usetex.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_widgets.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_widgets.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_widgets.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/__pycache__/test_widgets.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/cmr10.pfb b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/cmr10.pfb similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/cmr10.pfb rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/cmr10.pfb diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/conftest.py b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/conftest.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/conftest.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/conftest.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/mpltest.ttf b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/mpltest.ttf similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/mpltest.ttf rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/mpltest.ttf diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/test_afm.py b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/test_afm.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/test_afm.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/test_afm.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/test_agg.py b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/test_agg.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/test_agg.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/test_agg.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/test_agg_filter.py b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/test_agg_filter.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/test_agg_filter.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/test_agg_filter.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/test_animation.py b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/test_animation.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/test_animation.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/test_animation.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/test_arrow_patches.py b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/test_arrow_patches.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/test_arrow_patches.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/test_arrow_patches.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/test_artist.py b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/test_artist.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/test_artist.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/test_artist.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/test_axes.py b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/test_axes.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/test_axes.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/test_axes.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/test_backend_bases.py b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/test_backend_bases.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/test_backend_bases.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/test_backend_bases.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/test_backend_cairo.py b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/test_backend_cairo.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/test_backend_cairo.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/test_backend_cairo.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/test_backend_nbagg.py b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/test_backend_nbagg.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/test_backend_nbagg.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/test_backend_nbagg.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/test_backend_pdf.py b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/test_backend_pdf.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/test_backend_pdf.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/test_backend_pdf.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/test_backend_pgf.py b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/test_backend_pgf.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/test_backend_pgf.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/test_backend_pgf.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/test_backend_ps.py b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/test_backend_ps.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/test_backend_ps.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/test_backend_ps.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/test_backend_qt.py b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/test_backend_qt.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/test_backend_qt.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/test_backend_qt.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/test_backend_svg.py b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/test_backend_svg.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/test_backend_svg.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/test_backend_svg.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/test_backend_tk.py b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/test_backend_tk.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/test_backend_tk.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/test_backend_tk.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/test_backend_tools.py b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/test_backend_tools.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/test_backend_tools.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/test_backend_tools.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/test_backend_webagg.py b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/test_backend_webagg.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/test_backend_webagg.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/test_backend_webagg.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/test_backends_interactive.py b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/test_backends_interactive.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/test_backends_interactive.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/test_backends_interactive.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/test_basic.py b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/test_basic.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/test_basic.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/test_basic.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/test_bbox_tight.py b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/test_bbox_tight.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/test_bbox_tight.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/test_bbox_tight.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/test_category.py b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/test_category.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/test_category.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/test_category.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/test_cbook.py b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/test_cbook.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/test_cbook.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/test_cbook.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/test_collections.py b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/test_collections.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/test_collections.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/test_collections.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/test_colorbar.py b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/test_colorbar.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/test_colorbar.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/test_colorbar.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/test_colors.py b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/test_colors.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/test_colors.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/test_colors.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/test_compare_images.py b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/test_compare_images.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/test_compare_images.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/test_compare_images.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/test_constrainedlayout.py b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/test_constrainedlayout.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/test_constrainedlayout.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/test_constrainedlayout.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/test_container.py b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/test_container.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/test_container.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/test_container.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/test_contour.py b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/test_contour.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/test_contour.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/test_contour.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/test_cycles.py b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/test_cycles.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/test_cycles.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/test_cycles.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/test_dates.py b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/test_dates.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/test_dates.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/test_dates.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/test_determinism.py b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/test_determinism.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/test_determinism.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/test_determinism.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/test_dviread.py b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/test_dviread.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/test_dviread.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/test_dviread.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/test_figure.py b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/test_figure.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/test_figure.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/test_figure.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/test_font_manager.py b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/test_font_manager.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/test_font_manager.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/test_font_manager.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/test_fontconfig_pattern.py b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/test_fontconfig_pattern.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/test_fontconfig_pattern.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/test_fontconfig_pattern.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/test_gridspec.py b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/test_gridspec.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/test_gridspec.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/test_gridspec.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/test_image.py b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/test_image.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/test_image.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/test_image.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/test_legend.py b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/test_legend.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/test_legend.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/test_legend.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/test_lines.py b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/test_lines.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/test_lines.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/test_lines.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/test_marker.py b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/test_marker.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/test_marker.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/test_marker.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/test_mathtext.py b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/test_mathtext.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/test_mathtext.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/test_mathtext.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/test_matplotlib.py b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/test_matplotlib.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/test_matplotlib.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/test_matplotlib.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/test_mlab.py b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/test_mlab.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/test_mlab.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/test_mlab.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/test_offsetbox.py b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/test_offsetbox.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/test_offsetbox.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/test_offsetbox.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/test_patches.py b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/test_patches.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/test_patches.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/test_patches.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/test_path.py b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/test_path.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/test_path.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/test_path.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/test_patheffects.py b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/test_patheffects.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/test_patheffects.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/test_patheffects.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/test_pickle.py b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/test_pickle.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/test_pickle.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/test_pickle.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/test_png.py b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/test_png.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/test_png.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/test_png.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/test_preprocess_data.py b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/test_preprocess_data.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/test_preprocess_data.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/test_preprocess_data.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/test_pyplot.py b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/test_pyplot.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/test_pyplot.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/test_pyplot.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/test_quiver.py b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/test_quiver.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/test_quiver.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/test_quiver.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/test_rcparams.py b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/test_rcparams.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/test_rcparams.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/test_rcparams.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/test_sankey.py b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/test_sankey.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/test_sankey.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/test_sankey.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/test_scale.py b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/test_scale.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/test_scale.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/test_scale.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/test_simplification.py b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/test_simplification.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/test_simplification.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/test_simplification.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/test_skew.py b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/test_skew.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/test_skew.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/test_skew.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/test_sphinxext.py b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/test_sphinxext.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/test_sphinxext.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/test_sphinxext.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/test_spines.py b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/test_spines.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/test_spines.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/test_spines.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/test_streamplot.py b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/test_streamplot.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/test_streamplot.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/test_streamplot.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/test_style.py b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/test_style.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/test_style.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/test_style.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/test_subplots.py b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/test_subplots.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/test_subplots.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/test_subplots.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/test_table.py b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/test_table.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/test_table.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/test_table.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/test_testing.py b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/test_testing.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/test_testing.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/test_testing.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/test_texmanager.py b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/test_texmanager.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/test_texmanager.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/test_texmanager.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/test_text.py b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/test_text.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/test_text.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/test_text.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/test_ticker.py b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/test_ticker.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/test_ticker.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/test_ticker.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/test_tightlayout.py b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/test_tightlayout.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/test_tightlayout.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/test_tightlayout.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/test_transforms.py b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/test_transforms.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/test_transforms.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/test_transforms.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/test_triangulation.py b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/test_triangulation.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/test_triangulation.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/test_triangulation.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/test_ttconv.py b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/test_ttconv.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/test_ttconv.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/test_ttconv.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/test_type1font.py b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/test_type1font.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/test_type1font.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/test_type1font.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/test_units.py b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/test_units.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/test_units.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/test_units.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/test_usetex.py b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/test_usetex.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/test_usetex.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/test_usetex.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/test_widgets.py b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/test_widgets.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/test_widgets.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/test_widgets.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/tinypages/.gitignore b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/tinypages/.gitignore similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/tinypages/.gitignore rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/tinypages/.gitignore diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/tinypages/README.md b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/tinypages/README.md similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/tinypages/README.md rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/tinypages/README.md diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/tinypages/__pycache__/conf.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/tinypages/__pycache__/conf.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/tinypages/__pycache__/conf.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/tinypages/__pycache__/conf.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/tinypages/__pycache__/range4.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/tinypages/__pycache__/range4.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/tinypages/__pycache__/range4.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/tinypages/__pycache__/range4.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/tinypages/__pycache__/range6.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/tinypages/__pycache__/range6.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/tinypages/__pycache__/range6.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/tinypages/__pycache__/range6.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/tinypages/_static/.gitignore b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/tinypages/_static/.gitignore similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/tinypages/_static/.gitignore rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/tinypages/_static/.gitignore diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/tinypages/_static/README.txt b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/tinypages/_static/README.txt similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/tinypages/_static/README.txt rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/tinypages/_static/README.txt diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/tinypages/conf.py b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/tinypages/conf.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/tinypages/conf.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/tinypages/conf.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/tinypages/index.rst b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/tinypages/index.rst similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/tinypages/index.rst rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/tinypages/index.rst diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/tinypages/range4.py b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/tinypages/range4.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/tinypages/range4.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/tinypages/range4.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/tinypages/range6.py b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/tinypages/range6.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/tinypages/range6.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/tinypages/range6.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/tests/tinypages/some_plots.rst b/venv.bak/lib/python3.7/site-packages/matplotlib/tests/tinypages/some_plots.rst similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tests/tinypages/some_plots.rst rename to venv.bak/lib/python3.7/site-packages/matplotlib/tests/tinypages/some_plots.rst diff --git a/venv/lib/python3.7/site-packages/matplotlib/texmanager.py b/venv.bak/lib/python3.7/site-packages/matplotlib/texmanager.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/texmanager.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/texmanager.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/text.py b/venv.bak/lib/python3.7/site-packages/matplotlib/text.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/text.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/text.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/textpath.py b/venv.bak/lib/python3.7/site-packages/matplotlib/textpath.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/textpath.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/textpath.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/ticker.py b/venv.bak/lib/python3.7/site-packages/matplotlib/ticker.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/ticker.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/ticker.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/tight_bbox.py b/venv.bak/lib/python3.7/site-packages/matplotlib/tight_bbox.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tight_bbox.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/tight_bbox.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/tight_layout.py b/venv.bak/lib/python3.7/site-packages/matplotlib/tight_layout.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tight_layout.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/tight_layout.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/transforms.py b/venv.bak/lib/python3.7/site-packages/matplotlib/transforms.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/transforms.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/transforms.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/tri/__init__.py b/venv.bak/lib/python3.7/site-packages/matplotlib/tri/__init__.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tri/__init__.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/tri/__init__.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/tri/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/tri/__pycache__/__init__.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tri/__pycache__/__init__.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/tri/__pycache__/__init__.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/tri/__pycache__/triangulation.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/tri/__pycache__/triangulation.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tri/__pycache__/triangulation.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/tri/__pycache__/triangulation.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/tri/__pycache__/tricontour.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/tri/__pycache__/tricontour.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tri/__pycache__/tricontour.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/tri/__pycache__/tricontour.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/tri/__pycache__/trifinder.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/tri/__pycache__/trifinder.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tri/__pycache__/trifinder.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/tri/__pycache__/trifinder.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/tri/__pycache__/triinterpolate.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/tri/__pycache__/triinterpolate.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tri/__pycache__/triinterpolate.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/tri/__pycache__/triinterpolate.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/tri/__pycache__/tripcolor.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/tri/__pycache__/tripcolor.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tri/__pycache__/tripcolor.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/tri/__pycache__/tripcolor.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/tri/__pycache__/triplot.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/tri/__pycache__/triplot.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tri/__pycache__/triplot.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/tri/__pycache__/triplot.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/tri/__pycache__/trirefine.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/tri/__pycache__/trirefine.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tri/__pycache__/trirefine.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/tri/__pycache__/trirefine.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/tri/__pycache__/tritools.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/matplotlib/tri/__pycache__/tritools.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tri/__pycache__/tritools.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/matplotlib/tri/__pycache__/tritools.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/matplotlib/tri/triangulation.py b/venv.bak/lib/python3.7/site-packages/matplotlib/tri/triangulation.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tri/triangulation.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/tri/triangulation.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/tri/tricontour.py b/venv.bak/lib/python3.7/site-packages/matplotlib/tri/tricontour.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tri/tricontour.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/tri/tricontour.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/tri/trifinder.py b/venv.bak/lib/python3.7/site-packages/matplotlib/tri/trifinder.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tri/trifinder.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/tri/trifinder.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/tri/triinterpolate.py b/venv.bak/lib/python3.7/site-packages/matplotlib/tri/triinterpolate.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tri/triinterpolate.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/tri/triinterpolate.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/tri/tripcolor.py b/venv.bak/lib/python3.7/site-packages/matplotlib/tri/tripcolor.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tri/tripcolor.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/tri/tripcolor.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/tri/triplot.py b/venv.bak/lib/python3.7/site-packages/matplotlib/tri/triplot.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tri/triplot.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/tri/triplot.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/tri/trirefine.py b/venv.bak/lib/python3.7/site-packages/matplotlib/tri/trirefine.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tri/trirefine.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/tri/trirefine.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/tri/tritools.py b/venv.bak/lib/python3.7/site-packages/matplotlib/tri/tritools.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/tri/tritools.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/tri/tritools.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/ttconv.cpython-37m-darwin.so b/venv.bak/lib/python3.7/site-packages/matplotlib/ttconv.cpython-37m-darwin.so similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/ttconv.cpython-37m-darwin.so rename to venv.bak/lib/python3.7/site-packages/matplotlib/ttconv.cpython-37m-darwin.so diff --git a/venv/lib/python3.7/site-packages/matplotlib/type1font.py b/venv.bak/lib/python3.7/site-packages/matplotlib/type1font.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/type1font.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/type1font.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/units.py b/venv.bak/lib/python3.7/site-packages/matplotlib/units.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/units.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/units.py diff --git a/venv/lib/python3.7/site-packages/matplotlib/widgets.py b/venv.bak/lib/python3.7/site-packages/matplotlib/widgets.py similarity index 100% rename from venv/lib/python3.7/site-packages/matplotlib/widgets.py rename to venv.bak/lib/python3.7/site-packages/matplotlib/widgets.py diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/axes_grid/__init__.py b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/axes_grid/__init__.py similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/axes_grid/__init__.py rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/axes_grid/__init__.py diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/axes_grid/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/axes_grid/__pycache__/__init__.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/axes_grid/__pycache__/__init__.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/axes_grid/__pycache__/__init__.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/axes_grid/__pycache__/anchored_artists.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/axes_grid/__pycache__/anchored_artists.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/axes_grid/__pycache__/anchored_artists.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/axes_grid/__pycache__/anchored_artists.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/axes_grid/__pycache__/angle_helper.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/axes_grid/__pycache__/angle_helper.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/axes_grid/__pycache__/angle_helper.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/axes_grid/__pycache__/angle_helper.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/axes_grid/__pycache__/axes_divider.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/axes_grid/__pycache__/axes_divider.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/axes_grid/__pycache__/axes_divider.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/axes_grid/__pycache__/axes_divider.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/axes_grid/__pycache__/axes_grid.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/axes_grid/__pycache__/axes_grid.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/axes_grid/__pycache__/axes_grid.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/axes_grid/__pycache__/axes_grid.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/axes_grid/__pycache__/axes_rgb.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/axes_grid/__pycache__/axes_rgb.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/axes_grid/__pycache__/axes_rgb.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/axes_grid/__pycache__/axes_rgb.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/axes_grid/__pycache__/axes_size.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/axes_grid/__pycache__/axes_size.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/axes_grid/__pycache__/axes_size.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/axes_grid/__pycache__/axes_size.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/axes_grid/__pycache__/axis_artist.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/axes_grid/__pycache__/axis_artist.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/axes_grid/__pycache__/axis_artist.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/axes_grid/__pycache__/axis_artist.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/axes_grid/__pycache__/axisline_style.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/axes_grid/__pycache__/axisline_style.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/axes_grid/__pycache__/axisline_style.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/axes_grid/__pycache__/axisline_style.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/axes_grid/__pycache__/axislines.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/axes_grid/__pycache__/axislines.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/axes_grid/__pycache__/axislines.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/axes_grid/__pycache__/axislines.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/axes_grid/__pycache__/clip_path.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/axes_grid/__pycache__/clip_path.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/axes_grid/__pycache__/clip_path.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/axes_grid/__pycache__/clip_path.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/axes_grid/__pycache__/colorbar.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/axes_grid/__pycache__/colorbar.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/axes_grid/__pycache__/colorbar.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/axes_grid/__pycache__/colorbar.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/axes_grid/__pycache__/floating_axes.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/axes_grid/__pycache__/floating_axes.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/axes_grid/__pycache__/floating_axes.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/axes_grid/__pycache__/floating_axes.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/axes_grid/__pycache__/grid_finder.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/axes_grid/__pycache__/grid_finder.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/axes_grid/__pycache__/grid_finder.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/axes_grid/__pycache__/grid_finder.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/axes_grid/__pycache__/grid_helper_curvelinear.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/axes_grid/__pycache__/grid_helper_curvelinear.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/axes_grid/__pycache__/grid_helper_curvelinear.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/axes_grid/__pycache__/grid_helper_curvelinear.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/axes_grid/__pycache__/inset_locator.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/axes_grid/__pycache__/inset_locator.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/axes_grid/__pycache__/inset_locator.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/axes_grid/__pycache__/inset_locator.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/axes_grid/__pycache__/parasite_axes.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/axes_grid/__pycache__/parasite_axes.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/axes_grid/__pycache__/parasite_axes.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/axes_grid/__pycache__/parasite_axes.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/axes_grid/anchored_artists.py b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/axes_grid/anchored_artists.py similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/axes_grid/anchored_artists.py rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/axes_grid/anchored_artists.py diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/axes_grid/angle_helper.py b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/axes_grid/angle_helper.py similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/axes_grid/angle_helper.py rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/axes_grid/angle_helper.py diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/axes_grid/axes_divider.py b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/axes_grid/axes_divider.py similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/axes_grid/axes_divider.py rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/axes_grid/axes_divider.py diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/axes_grid/axes_grid.py b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/axes_grid/axes_grid.py similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/axes_grid/axes_grid.py rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/axes_grid/axes_grid.py diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/axes_grid/axes_rgb.py b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/axes_grid/axes_rgb.py similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/axes_grid/axes_rgb.py rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/axes_grid/axes_rgb.py diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/axes_grid/axes_size.py b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/axes_grid/axes_size.py similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/axes_grid/axes_size.py rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/axes_grid/axes_size.py diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/axes_grid/axis_artist.py b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/axes_grid/axis_artist.py similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/axes_grid/axis_artist.py rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/axes_grid/axis_artist.py diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/axes_grid/axisline_style.py b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/axes_grid/axisline_style.py similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/axes_grid/axisline_style.py rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/axes_grid/axisline_style.py diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/axes_grid/axislines.py b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/axes_grid/axislines.py similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/axes_grid/axislines.py rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/axes_grid/axislines.py diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/axes_grid/clip_path.py b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/axes_grid/clip_path.py similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/axes_grid/clip_path.py rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/axes_grid/clip_path.py diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/axes_grid/colorbar.py b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/axes_grid/colorbar.py similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/axes_grid/colorbar.py rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/axes_grid/colorbar.py diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/axes_grid/floating_axes.py b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/axes_grid/floating_axes.py similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/axes_grid/floating_axes.py rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/axes_grid/floating_axes.py diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/axes_grid/grid_finder.py b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/axes_grid/grid_finder.py similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/axes_grid/grid_finder.py rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/axes_grid/grid_finder.py diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/axes_grid/grid_helper_curvelinear.py b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/axes_grid/grid_helper_curvelinear.py similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/axes_grid/grid_helper_curvelinear.py rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/axes_grid/grid_helper_curvelinear.py diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/axes_grid/inset_locator.py b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/axes_grid/inset_locator.py similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/axes_grid/inset_locator.py rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/axes_grid/inset_locator.py diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/axes_grid/parasite_axes.py b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/axes_grid/parasite_axes.py similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/axes_grid/parasite_axes.py rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/axes_grid/parasite_axes.py diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/axes_grid1/__init__.py b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/axes_grid1/__init__.py similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/axes_grid1/__init__.py rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/axes_grid1/__init__.py diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/axes_grid1/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/axes_grid1/__pycache__/__init__.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/axes_grid1/__pycache__/__init__.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/axes_grid1/__pycache__/__init__.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/axes_grid1/__pycache__/anchored_artists.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/axes_grid1/__pycache__/anchored_artists.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/axes_grid1/__pycache__/anchored_artists.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/axes_grid1/__pycache__/anchored_artists.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/axes_grid1/__pycache__/axes_divider.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/axes_grid1/__pycache__/axes_divider.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/axes_grid1/__pycache__/axes_divider.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/axes_grid1/__pycache__/axes_divider.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/axes_grid1/__pycache__/axes_grid.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/axes_grid1/__pycache__/axes_grid.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/axes_grid1/__pycache__/axes_grid.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/axes_grid1/__pycache__/axes_grid.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/axes_grid1/__pycache__/axes_rgb.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/axes_grid1/__pycache__/axes_rgb.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/axes_grid1/__pycache__/axes_rgb.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/axes_grid1/__pycache__/axes_rgb.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/axes_grid1/__pycache__/axes_size.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/axes_grid1/__pycache__/axes_size.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/axes_grid1/__pycache__/axes_size.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/axes_grid1/__pycache__/axes_size.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/axes_grid1/__pycache__/colorbar.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/axes_grid1/__pycache__/colorbar.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/axes_grid1/__pycache__/colorbar.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/axes_grid1/__pycache__/colorbar.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/axes_grid1/__pycache__/inset_locator.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/axes_grid1/__pycache__/inset_locator.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/axes_grid1/__pycache__/inset_locator.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/axes_grid1/__pycache__/inset_locator.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/axes_grid1/__pycache__/mpl_axes.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/axes_grid1/__pycache__/mpl_axes.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/axes_grid1/__pycache__/mpl_axes.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/axes_grid1/__pycache__/mpl_axes.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/axes_grid1/__pycache__/parasite_axes.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/axes_grid1/__pycache__/parasite_axes.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/axes_grid1/__pycache__/parasite_axes.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/axes_grid1/__pycache__/parasite_axes.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/axes_grid1/anchored_artists.py b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/axes_grid1/anchored_artists.py similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/axes_grid1/anchored_artists.py rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/axes_grid1/anchored_artists.py diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/axes_grid1/axes_divider.py b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/axes_grid1/axes_divider.py similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/axes_grid1/axes_divider.py rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/axes_grid1/axes_divider.py diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/axes_grid1/axes_grid.py b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/axes_grid1/axes_grid.py similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/axes_grid1/axes_grid.py rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/axes_grid1/axes_grid.py diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/axes_grid1/axes_rgb.py b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/axes_grid1/axes_rgb.py similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/axes_grid1/axes_rgb.py rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/axes_grid1/axes_rgb.py diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/axes_grid1/axes_size.py b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/axes_grid1/axes_size.py similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/axes_grid1/axes_size.py rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/axes_grid1/axes_size.py diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/axes_grid1/colorbar.py b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/axes_grid1/colorbar.py similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/axes_grid1/colorbar.py rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/axes_grid1/colorbar.py diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/axes_grid1/inset_locator.py b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/axes_grid1/inset_locator.py similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/axes_grid1/inset_locator.py rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/axes_grid1/inset_locator.py diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/axes_grid1/mpl_axes.py b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/axes_grid1/mpl_axes.py similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/axes_grid1/mpl_axes.py rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/axes_grid1/mpl_axes.py diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/axes_grid1/parasite_axes.py b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/axes_grid1/parasite_axes.py similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/axes_grid1/parasite_axes.py rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/axes_grid1/parasite_axes.py diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/axisartist/__init__.py b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/axisartist/__init__.py similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/axisartist/__init__.py rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/axisartist/__init__.py diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/axisartist/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/axisartist/__pycache__/__init__.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/axisartist/__pycache__/__init__.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/axisartist/__pycache__/__init__.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/axisartist/__pycache__/angle_helper.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/axisartist/__pycache__/angle_helper.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/axisartist/__pycache__/angle_helper.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/axisartist/__pycache__/angle_helper.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/axisartist/__pycache__/axes_divider.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/axisartist/__pycache__/axes_divider.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/axisartist/__pycache__/axes_divider.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/axisartist/__pycache__/axes_divider.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/axisartist/__pycache__/axes_grid.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/axisartist/__pycache__/axes_grid.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/axisartist/__pycache__/axes_grid.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/axisartist/__pycache__/axes_grid.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/axisartist/__pycache__/axes_rgb.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/axisartist/__pycache__/axes_rgb.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/axisartist/__pycache__/axes_rgb.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/axisartist/__pycache__/axes_rgb.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/axisartist/__pycache__/axis_artist.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/axisartist/__pycache__/axis_artist.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/axisartist/__pycache__/axis_artist.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/axisartist/__pycache__/axis_artist.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/axisartist/__pycache__/axisline_style.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/axisartist/__pycache__/axisline_style.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/axisartist/__pycache__/axisline_style.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/axisartist/__pycache__/axisline_style.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/axisartist/__pycache__/axislines.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/axisartist/__pycache__/axislines.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/axisartist/__pycache__/axislines.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/axisartist/__pycache__/axislines.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/axisartist/__pycache__/clip_path.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/axisartist/__pycache__/clip_path.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/axisartist/__pycache__/clip_path.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/axisartist/__pycache__/clip_path.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/axisartist/__pycache__/floating_axes.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/axisartist/__pycache__/floating_axes.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/axisartist/__pycache__/floating_axes.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/axisartist/__pycache__/floating_axes.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/axisartist/__pycache__/grid_finder.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/axisartist/__pycache__/grid_finder.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/axisartist/__pycache__/grid_finder.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/axisartist/__pycache__/grid_finder.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/axisartist/__pycache__/grid_helper_curvelinear.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/axisartist/__pycache__/grid_helper_curvelinear.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/axisartist/__pycache__/grid_helper_curvelinear.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/axisartist/__pycache__/grid_helper_curvelinear.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/axisartist/__pycache__/parasite_axes.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/axisartist/__pycache__/parasite_axes.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/axisartist/__pycache__/parasite_axes.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/axisartist/__pycache__/parasite_axes.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/axisartist/angle_helper.py b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/axisartist/angle_helper.py similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/axisartist/angle_helper.py rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/axisartist/angle_helper.py diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/axisartist/axes_divider.py b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/axisartist/axes_divider.py similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/axisartist/axes_divider.py rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/axisartist/axes_divider.py diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/axisartist/axes_grid.py b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/axisartist/axes_grid.py similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/axisartist/axes_grid.py rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/axisartist/axes_grid.py diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/axisartist/axes_rgb.py b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/axisartist/axes_rgb.py similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/axisartist/axes_rgb.py rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/axisartist/axes_rgb.py diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/axisartist/axis_artist.py b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/axisartist/axis_artist.py similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/axisartist/axis_artist.py rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/axisartist/axis_artist.py diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/axisartist/axisline_style.py b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/axisartist/axisline_style.py similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/axisartist/axisline_style.py rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/axisartist/axisline_style.py diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/axisartist/axislines.py b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/axisartist/axislines.py similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/axisartist/axislines.py rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/axisartist/axislines.py diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/axisartist/clip_path.py b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/axisartist/clip_path.py similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/axisartist/clip_path.py rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/axisartist/clip_path.py diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/axisartist/floating_axes.py b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/axisartist/floating_axes.py similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/axisartist/floating_axes.py rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/axisartist/floating_axes.py diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/axisartist/grid_finder.py b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/axisartist/grid_finder.py similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/axisartist/grid_finder.py rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/axisartist/grid_finder.py diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/axisartist/grid_helper_curvelinear.py b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/axisartist/grid_helper_curvelinear.py similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/axisartist/grid_helper_curvelinear.py rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/axisartist/grid_helper_curvelinear.py diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/axisartist/parasite_axes.py b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/axisartist/parasite_axes.py similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/axisartist/parasite_axes.py rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/axisartist/parasite_axes.py diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/mplot3d/__init__.py b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/mplot3d/__init__.py similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/mplot3d/__init__.py rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/mplot3d/__init__.py diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/mplot3d/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/mplot3d/__pycache__/__init__.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/mplot3d/__pycache__/__init__.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/mplot3d/__pycache__/__init__.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/mplot3d/__pycache__/art3d.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/mplot3d/__pycache__/art3d.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/mplot3d/__pycache__/art3d.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/mplot3d/__pycache__/art3d.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/mplot3d/__pycache__/axes3d.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/mplot3d/__pycache__/axes3d.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/mplot3d/__pycache__/axes3d.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/mplot3d/__pycache__/axes3d.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/mplot3d/__pycache__/axis3d.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/mplot3d/__pycache__/axis3d.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/mplot3d/__pycache__/axis3d.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/mplot3d/__pycache__/axis3d.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/mplot3d/__pycache__/proj3d.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/mplot3d/__pycache__/proj3d.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/mplot3d/__pycache__/proj3d.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/mplot3d/__pycache__/proj3d.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/mplot3d/art3d.py b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/mplot3d/art3d.py similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/mplot3d/art3d.py rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/mplot3d/art3d.py diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/mplot3d/axes3d.py b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/mplot3d/axes3d.py similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/mplot3d/axes3d.py rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/mplot3d/axes3d.py diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/mplot3d/axis3d.py b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/mplot3d/axis3d.py similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/mplot3d/axis3d.py rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/mplot3d/axis3d.py diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/mplot3d/proj3d.py b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/mplot3d/proj3d.py similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/mplot3d/proj3d.py rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/mplot3d/proj3d.py diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/tests/__init__.py b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/__init__.py similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/tests/__init__.py rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/__init__.py diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/tests/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/__pycache__/__init__.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/tests/__pycache__/__init__.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/__pycache__/__init__.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/tests/__pycache__/conftest.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/__pycache__/conftest.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/tests/__pycache__/conftest.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/__pycache__/conftest.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/tests/__pycache__/test_axes_grid.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/__pycache__/test_axes_grid.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/tests/__pycache__/test_axes_grid.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/__pycache__/test_axes_grid.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/tests/__pycache__/test_axes_grid1.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/__pycache__/test_axes_grid1.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/tests/__pycache__/test_axes_grid1.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/__pycache__/test_axes_grid1.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/tests/__pycache__/test_axisartist_angle_helper.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/__pycache__/test_axisartist_angle_helper.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/tests/__pycache__/test_axisartist_angle_helper.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/__pycache__/test_axisartist_angle_helper.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/tests/__pycache__/test_axisartist_axis_artist.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/__pycache__/test_axisartist_axis_artist.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/tests/__pycache__/test_axisartist_axis_artist.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/__pycache__/test_axisartist_axis_artist.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/tests/__pycache__/test_axisartist_axislines.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/__pycache__/test_axisartist_axislines.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/tests/__pycache__/test_axisartist_axislines.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/__pycache__/test_axisartist_axislines.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/tests/__pycache__/test_axisartist_clip_path.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/__pycache__/test_axisartist_clip_path.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/tests/__pycache__/test_axisartist_clip_path.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/__pycache__/test_axisartist_clip_path.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/tests/__pycache__/test_axisartist_floating_axes.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/__pycache__/test_axisartist_floating_axes.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/tests/__pycache__/test_axisartist_floating_axes.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/__pycache__/test_axisartist_floating_axes.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/tests/__pycache__/test_axisartist_grid_finder.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/__pycache__/test_axisartist_grid_finder.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/tests/__pycache__/test_axisartist_grid_finder.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/__pycache__/test_axisartist_grid_finder.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/tests/__pycache__/test_axisartist_grid_helper_curvelinear.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/__pycache__/test_axisartist_grid_helper_curvelinear.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/tests/__pycache__/test_axisartist_grid_helper_curvelinear.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/__pycache__/test_axisartist_grid_helper_curvelinear.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/tests/__pycache__/test_mplot3d.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/__pycache__/test_mplot3d.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/tests/__pycache__/test_mplot3d.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/__pycache__/test_mplot3d.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_axes_grid/imagegrid_cbar_mode.png b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_axes_grid/imagegrid_cbar_mode.png similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_axes_grid/imagegrid_cbar_mode.png rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_axes_grid/imagegrid_cbar_mode.png diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_axes_grid1/anchored_direction_arrows.png b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_axes_grid1/anchored_direction_arrows.png similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_axes_grid1/anchored_direction_arrows.png rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_axes_grid1/anchored_direction_arrows.png diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_axes_grid1/anchored_direction_arrows_many_args.png b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_axes_grid1/anchored_direction_arrows_many_args.png similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_axes_grid1/anchored_direction_arrows_many_args.png rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_axes_grid1/anchored_direction_arrows_many_args.png diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_axes_grid1/divider_append_axes.pdf b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_axes_grid1/divider_append_axes.pdf similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_axes_grid1/divider_append_axes.pdf rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_axes_grid1/divider_append_axes.pdf diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_axes_grid1/divider_append_axes.png b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_axes_grid1/divider_append_axes.png similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_axes_grid1/divider_append_axes.png rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_axes_grid1/divider_append_axes.png diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_axes_grid1/divider_append_axes.svg b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_axes_grid1/divider_append_axes.svg similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_axes_grid1/divider_append_axes.svg rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_axes_grid1/divider_append_axes.svg diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_axes_grid1/fill_facecolor.png b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_axes_grid1/fill_facecolor.png similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_axes_grid1/fill_facecolor.png rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_axes_grid1/fill_facecolor.png diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_axes_grid1/image_grid.png b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_axes_grid1/image_grid.png similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_axes_grid1/image_grid.png rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_axes_grid1/image_grid.png diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_axes_grid1/inset_axes.png b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_axes_grid1/inset_axes.png similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_axes_grid1/inset_axes.png rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_axes_grid1/inset_axes.png diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_axes_grid1/inset_locator.png b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_axes_grid1/inset_locator.png similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_axes_grid1/inset_locator.png rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_axes_grid1/inset_locator.png diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_axes_grid1/inverted_zoomed_axes.png b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_axes_grid1/inverted_zoomed_axes.png similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_axes_grid1/inverted_zoomed_axes.png rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_axes_grid1/inverted_zoomed_axes.png diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_axes_grid1/twin_axes_empty_and_removed.png b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_axes_grid1/twin_axes_empty_and_removed.png similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_axes_grid1/twin_axes_empty_and_removed.png rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_axes_grid1/twin_axes_empty_and_removed.png diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_axes_grid1/zoomed_axes.png b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_axes_grid1/zoomed_axes.png similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_axes_grid1/zoomed_axes.png rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_axes_grid1/zoomed_axes.png diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_axisartist_axis_artist/axis_artist.png b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_axisartist_axis_artist/axis_artist.png similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_axisartist_axis_artist/axis_artist.png rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_axisartist_axis_artist/axis_artist.png diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_axisartist_axis_artist/axis_artist_labelbase.png b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_axisartist_axis_artist/axis_artist_labelbase.png similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_axisartist_axis_artist/axis_artist_labelbase.png rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_axisartist_axis_artist/axis_artist_labelbase.png diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_axisartist_axis_artist/axis_artist_ticklabels.png b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_axisartist_axis_artist/axis_artist_ticklabels.png similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_axisartist_axis_artist/axis_artist_ticklabels.png rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_axisartist_axis_artist/axis_artist_ticklabels.png diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_axisartist_axis_artist/axis_artist_ticks.png b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_axisartist_axis_artist/axis_artist_ticks.png similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_axisartist_axis_artist/axis_artist_ticks.png rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_axisartist_axis_artist/axis_artist_ticks.png diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_axisartist_axislines/ParasiteAxesAuxTrans_meshplot.png b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_axisartist_axislines/ParasiteAxesAuxTrans_meshplot.png similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_axisartist_axislines/ParasiteAxesAuxTrans_meshplot.png rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_axisartist_axislines/ParasiteAxesAuxTrans_meshplot.png diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_axisartist_axislines/Subplot.png b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_axisartist_axislines/Subplot.png similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_axisartist_axislines/Subplot.png rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_axisartist_axislines/Subplot.png diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_axisartist_axislines/SubplotZero.png b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_axisartist_axislines/SubplotZero.png similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_axisartist_axislines/SubplotZero.png rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_axisartist_axislines/SubplotZero.png diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_axisartist_clip_path/clip_path.png b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_axisartist_clip_path/clip_path.png similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_axisartist_clip_path/clip_path.png rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_axisartist_clip_path/clip_path.png diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_axisartist_floating_axes/curvelinear3.png b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_axisartist_floating_axes/curvelinear3.png similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_axisartist_floating_axes/curvelinear3.png rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_axisartist_floating_axes/curvelinear3.png diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_axisartist_floating_axes/curvelinear4.png b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_axisartist_floating_axes/curvelinear4.png similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_axisartist_floating_axes/curvelinear4.png rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_axisartist_floating_axes/curvelinear4.png diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_axisartist_grid_helper_curvelinear/axis_direction.png b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_axisartist_grid_helper_curvelinear/axis_direction.png similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_axisartist_grid_helper_curvelinear/axis_direction.png rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_axisartist_grid_helper_curvelinear/axis_direction.png diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_axisartist_grid_helper_curvelinear/custom_transform.png b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_axisartist_grid_helper_curvelinear/custom_transform.png similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_axisartist_grid_helper_curvelinear/custom_transform.png rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_axisartist_grid_helper_curvelinear/custom_transform.png diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_axisartist_grid_helper_curvelinear/polar_box.png b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_axisartist_grid_helper_curvelinear/polar_box.png similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_axisartist_grid_helper_curvelinear/polar_box.png rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_axisartist_grid_helper_curvelinear/polar_box.png diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/axes3d_cla.png b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/axes3d_cla.png similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/axes3d_cla.png rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/axes3d_cla.png diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/axes3d_labelpad.png b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/axes3d_labelpad.png similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/axes3d_labelpad.png rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/axes3d_labelpad.png diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/axes3d_ortho.png b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/axes3d_ortho.png similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/axes3d_ortho.png rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/axes3d_ortho.png diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/bar3d.png b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/bar3d.png similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/bar3d.png rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/bar3d.png diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/bar3d_notshaded.png b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/bar3d_notshaded.png similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/bar3d_notshaded.png rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/bar3d_notshaded.png diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/bar3d_shaded.png b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/bar3d_shaded.png similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/bar3d_shaded.png rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/bar3d_shaded.png diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/contour3d.png b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/contour3d.png similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/contour3d.png rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/contour3d.png diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/contourf3d.png b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/contourf3d.png similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/contourf3d.png rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/contourf3d.png diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/contourf3d_fill.png b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/contourf3d_fill.png similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/contourf3d_fill.png rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/contourf3d_fill.png diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/lines3d.png b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/lines3d.png similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/lines3d.png rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/lines3d.png diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/mixedsubplot.png b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/mixedsubplot.png similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/mixedsubplot.png rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/mixedsubplot.png diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/plot_3d_from_2d.png b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/plot_3d_from_2d.png similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/plot_3d_from_2d.png rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/plot_3d_from_2d.png diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/poly3dcollection_alpha.png b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/poly3dcollection_alpha.png similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/poly3dcollection_alpha.png rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/poly3dcollection_alpha.png diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/poly3dcollection_closed.png b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/poly3dcollection_closed.png similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/poly3dcollection_closed.png rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/poly3dcollection_closed.png diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/proj3d_axes_cube.png b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/proj3d_axes_cube.png similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/proj3d_axes_cube.png rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/proj3d_axes_cube.png diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/proj3d_axes_cube_ortho.png b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/proj3d_axes_cube_ortho.png similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/proj3d_axes_cube_ortho.png rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/proj3d_axes_cube_ortho.png diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/proj3d_lines_dists.png b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/proj3d_lines_dists.png similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/proj3d_lines_dists.png rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/proj3d_lines_dists.png diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/quiver3d.png b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/quiver3d.png similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/quiver3d.png rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/quiver3d.png diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/quiver3d_empty.png b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/quiver3d_empty.png similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/quiver3d_empty.png rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/quiver3d_empty.png diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/quiver3d_masked.png b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/quiver3d_masked.png similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/quiver3d_masked.png rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/quiver3d_masked.png diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/quiver3d_pivot_middle.png b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/quiver3d_pivot_middle.png similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/quiver3d_pivot_middle.png rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/quiver3d_pivot_middle.png diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/quiver3d_pivot_tail.png b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/quiver3d_pivot_tail.png similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/quiver3d_pivot_tail.png rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/quiver3d_pivot_tail.png diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/scatter3d.png b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/scatter3d.png similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/scatter3d.png rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/scatter3d.png diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/scatter3d_color.png b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/scatter3d_color.png similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/scatter3d_color.png rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/scatter3d_color.png diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/surface3d.png b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/surface3d.png similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/surface3d.png rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/surface3d.png diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/surface3d_shaded.png b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/surface3d_shaded.png similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/surface3d_shaded.png rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/surface3d_shaded.png diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/text3d.png b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/text3d.png similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/text3d.png rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/text3d.png diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/tricontour.png b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/tricontour.png similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/tricontour.png rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/tricontour.png diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/trisurf3d.png b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/trisurf3d.png similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/trisurf3d.png rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/trisurf3d.png diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/trisurf3d_shaded.png b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/trisurf3d_shaded.png similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/trisurf3d_shaded.png rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/trisurf3d_shaded.png diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/voxels-alpha.png b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/voxels-alpha.png similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/voxels-alpha.png rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/voxels-alpha.png diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/voxels-edge-style.png b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/voxels-edge-style.png similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/voxels-edge-style.png rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/voxels-edge-style.png diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/voxels-named-colors.png b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/voxels-named-colors.png similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/voxels-named-colors.png rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/voxels-named-colors.png diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/voxels-rgb-data.png b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/voxels-rgb-data.png similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/voxels-rgb-data.png rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/voxels-rgb-data.png diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/voxels-simple.png b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/voxels-simple.png similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/voxels-simple.png rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/voxels-simple.png diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/voxels-xyz.png b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/voxels-xyz.png similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/voxels-xyz.png rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/voxels-xyz.png diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/wireframe3d.png b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/wireframe3d.png similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/wireframe3d.png rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/wireframe3d.png diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/wireframe3dzerocstride.png b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/wireframe3dzerocstride.png similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/wireframe3dzerocstride.png rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/wireframe3dzerocstride.png diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/wireframe3dzerorstride.png b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/wireframe3dzerorstride.png similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/wireframe3dzerorstride.png rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/baseline_images/test_mplot3d/wireframe3dzerorstride.png diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/tests/conftest.py b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/conftest.py similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/tests/conftest.py rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/conftest.py diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/tests/test_axes_grid.py b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/test_axes_grid.py similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/tests/test_axes_grid.py rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/test_axes_grid.py diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/tests/test_axes_grid1.py b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/test_axes_grid1.py similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/tests/test_axes_grid1.py rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/test_axes_grid1.py diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/tests/test_axisartist_angle_helper.py b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/test_axisartist_angle_helper.py similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/tests/test_axisartist_angle_helper.py rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/test_axisartist_angle_helper.py diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/tests/test_axisartist_axis_artist.py b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/test_axisartist_axis_artist.py similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/tests/test_axisartist_axis_artist.py rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/test_axisartist_axis_artist.py diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/tests/test_axisartist_axislines.py b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/test_axisartist_axislines.py similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/tests/test_axisartist_axislines.py rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/test_axisartist_axislines.py diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/tests/test_axisartist_clip_path.py b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/test_axisartist_clip_path.py similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/tests/test_axisartist_clip_path.py rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/test_axisartist_clip_path.py diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/tests/test_axisartist_floating_axes.py b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/test_axisartist_floating_axes.py similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/tests/test_axisartist_floating_axes.py rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/test_axisartist_floating_axes.py diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/tests/test_axisartist_grid_finder.py b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/test_axisartist_grid_finder.py similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/tests/test_axisartist_grid_finder.py rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/test_axisartist_grid_finder.py diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/tests/test_axisartist_grid_helper_curvelinear.py b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/test_axisartist_grid_helper_curvelinear.py similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/tests/test_axisartist_grid_helper_curvelinear.py rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/test_axisartist_grid_helper_curvelinear.py diff --git a/venv/lib/python3.7/site-packages/mpl_toolkits/tests/test_mplot3d.py b/venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/test_mplot3d.py similarity index 100% rename from venv/lib/python3.7/site-packages/mpl_toolkits/tests/test_mplot3d.py rename to venv.bak/lib/python3.7/site-packages/mpl_toolkits/tests/test_mplot3d.py diff --git a/venv/lib/python3.7/site-packages/nltk-3.4.5.dist-info/INSTALLER b/venv.bak/lib/python3.7/site-packages/nltk-3.4.5.dist-info/INSTALLER similarity index 100% rename from venv/lib/python3.7/site-packages/nltk-3.4.5.dist-info/INSTALLER rename to venv.bak/lib/python3.7/site-packages/nltk-3.4.5.dist-info/INSTALLER diff --git a/venv/lib/python3.7/site-packages/nltk-3.4.5.dist-info/LICENSE.txt b/venv.bak/lib/python3.7/site-packages/nltk-3.4.5.dist-info/LICENSE.txt similarity index 100% rename from venv/lib/python3.7/site-packages/nltk-3.4.5.dist-info/LICENSE.txt rename to venv.bak/lib/python3.7/site-packages/nltk-3.4.5.dist-info/LICENSE.txt diff --git a/venv/lib/python3.7/site-packages/nltk-3.4.5.dist-info/METADATA b/venv.bak/lib/python3.7/site-packages/nltk-3.4.5.dist-info/METADATA similarity index 100% rename from venv/lib/python3.7/site-packages/nltk-3.4.5.dist-info/METADATA rename to venv.bak/lib/python3.7/site-packages/nltk-3.4.5.dist-info/METADATA diff --git a/venv/lib/python3.7/site-packages/nltk-3.4.5.dist-info/RECORD b/venv.bak/lib/python3.7/site-packages/nltk-3.4.5.dist-info/RECORD similarity index 100% rename from venv/lib/python3.7/site-packages/nltk-3.4.5.dist-info/RECORD rename to venv.bak/lib/python3.7/site-packages/nltk-3.4.5.dist-info/RECORD diff --git a/venv/lib/python3.7/site-packages/nltk-3.4.5.dist-info/WHEEL b/venv.bak/lib/python3.7/site-packages/nltk-3.4.5.dist-info/WHEEL similarity index 100% rename from venv/lib/python3.7/site-packages/nltk-3.4.5.dist-info/WHEEL rename to venv.bak/lib/python3.7/site-packages/nltk-3.4.5.dist-info/WHEEL diff --git a/venv/lib/python3.7/site-packages/nltk-3.4.5.dist-info/top_level.txt b/venv.bak/lib/python3.7/site-packages/nltk-3.4.5.dist-info/top_level.txt similarity index 100% rename from venv/lib/python3.7/site-packages/nltk-3.4.5.dist-info/top_level.txt rename to venv.bak/lib/python3.7/site-packages/nltk-3.4.5.dist-info/top_level.txt diff --git a/venv.bak/lib/python3.7/site-packages/nltk/VERSION b/venv.bak/lib/python3.7/site-packages/nltk/VERSION new file mode 100644 index 0000000..4f5e697 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/VERSION @@ -0,0 +1 @@ +3.4.5 diff --git a/venv.bak/lib/python3.7/site-packages/nltk/__init__.py b/venv.bak/lib/python3.7/site-packages/nltk/__init__.py new file mode 100644 index 0000000..cd14254 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/__init__.py @@ -0,0 +1,205 @@ +# Natural Language Toolkit (NLTK) +# +# Copyright (C) 2001-2019 NLTK Project +# Authors: Steven Bird +# Edward Loper +# URL: +# For license information, see LICENSE.TXT + +""" +The Natural Language Toolkit (NLTK) is an open source Python library +for Natural Language Processing. A free online book is available. +(If you use the library for academic research, please cite the book.) + +Steven Bird, Ewan Klein, and Edward Loper (2009). +Natural Language Processing with Python. O'Reilly Media Inc. +http://nltk.org/book +""" +from __future__ import print_function, absolute_import + +import os + +# ////////////////////////////////////////////////////// +# Metadata +# ////////////////////////////////////////////////////// + +# Version. For each new release, the version number should be updated +# in the file VERSION. +try: + # If a VERSION file exists, use it! + version_file = os.path.join(os.path.dirname(__file__), 'VERSION') + with open(version_file, 'r') as infile: + __version__ = infile.read().strip() +except NameError: + __version__ = 'unknown (running code interactively?)' +except IOError as ex: + __version__ = "unknown (%s)" % ex + +if __doc__ is not None: # fix for the ``python -OO`` + __doc__ += '\n@version: ' + __version__ + + +# Copyright notice +__copyright__ = """\ +Copyright (C) 2001-2019 NLTK Project. + +Distributed and Licensed under the Apache License, Version 2.0, +which is included by reference. +""" + +__license__ = "Apache License, Version 2.0" +# Description of the toolkit, keywords, and the project's primary URL. +__longdescr__ = """\ +The Natural Language Toolkit (NLTK) is a Python package for +natural language processing. NLTK requires Python 2.6 or higher.""" +__keywords__ = [ + 'NLP', + 'CL', + 'natural language processing', + 'computational linguistics', + 'parsing', + 'tagging', + 'tokenizing', + 'syntax', + 'linguistics', + 'language', + 'natural language', + 'text analytics', +] +__url__ = "http://nltk.org/" + +# Maintainer, contributors, etc. +__maintainer__ = "Steven Bird, Edward Loper, Ewan Klein" +__maintainer_email__ = "stevenbird1@gmail.com" +__author__ = __maintainer__ +__author_email__ = __maintainer_email__ + +# "Trove" classifiers for Python Package Index. +__classifiers__ = [ + 'Development Status :: 5 - Production/Stable', + 'Intended Audience :: Developers', + 'Intended Audience :: Education', + 'Intended Audience :: Information Technology', + 'Intended Audience :: Science/Research', + 'License :: OSI Approved :: Apache Software License', + 'Operating System :: OS Independent', + 'Programming Language :: Python :: 2.6', + 'Programming Language :: Python :: 2.7', + 'Topic :: Scientific/Engineering', + 'Topic :: Scientific/Engineering :: Artificial Intelligence', + 'Topic :: Scientific/Engineering :: Human Machine Interfaces', + 'Topic :: Scientific/Engineering :: Information Analysis', + 'Topic :: Text Processing', + 'Topic :: Text Processing :: Filters', + 'Topic :: Text Processing :: General', + 'Topic :: Text Processing :: Indexing', + 'Topic :: Text Processing :: Linguistic', +] + +from nltk.internals import config_java + +# support numpy from pypy +try: + import numpypy +except ImportError: + pass + +# Override missing methods on environments where it cannot be used like GAE. +import subprocess + +if not hasattr(subprocess, 'PIPE'): + + def _fake_PIPE(*args, **kwargs): + raise NotImplementedError('subprocess.PIPE is not supported.') + + subprocess.PIPE = _fake_PIPE +if not hasattr(subprocess, 'Popen'): + + def _fake_Popen(*args, **kwargs): + raise NotImplementedError('subprocess.Popen is not supported.') + + subprocess.Popen = _fake_Popen + +########################################################### +# TOP-LEVEL MODULES +########################################################### + +# Import top-level functionality into top-level namespace + +from nltk.collocations import * +from nltk.decorators import decorator, memoize +from nltk.featstruct import * +from nltk.grammar import * +from nltk.probability import * +from nltk.text import * +from nltk.tree import * +from nltk.util import * +from nltk.jsontags import * + +########################################################### +# PACKAGES +########################################################### + +from nltk.chunk import * +from nltk.classify import * +from nltk.inference import * +from nltk.metrics import * +from nltk.parse import * +from nltk.tag import * +from nltk.tokenize import * +from nltk.translate import * +from nltk.sem import * +from nltk.stem import * + +# Packages which can be lazily imported +# (a) we don't import * +# (b) they're slow to import or have run-time dependencies +# that can safely fail at run time + +from nltk import lazyimport + +app = lazyimport.LazyModule('nltk.app', locals(), globals()) +chat = lazyimport.LazyModule('nltk.chat', locals(), globals()) +corpus = lazyimport.LazyModule('nltk.corpus', locals(), globals()) +draw = lazyimport.LazyModule('nltk.draw', locals(), globals()) +toolbox = lazyimport.LazyModule('nltk.toolbox', locals(), globals()) + +# Optional loading + +try: + import numpy +except ImportError: + pass +else: + from nltk import cluster + +from nltk.downloader import download, download_shell + +try: + from six.moves import tkinter +except ImportError: + pass +else: + try: + from nltk.downloader import download_gui + except RuntimeError as e: + import warnings + + warnings.warn( + "Corpus downloader GUI not loaded " + "(RuntimeError during import: %s)" % str(e) + ) + +# explicitly import all top-level modules (ensuring +# they override the same names inadvertently imported +# from a subpackage) + +from nltk import ccg, chunk, classify, collocations +from nltk import data, featstruct, grammar, help, inference, metrics +from nltk import misc, parse, probability, sem, stem, wsd +from nltk import tag, tbl, text, tokenize, translate, tree, treetransforms, util + + +# FIXME: override any accidentally imported demo, see https://github.com/nltk/nltk/issues/2116 +def demo(): + print("To run the demo code for a module, type nltk.module.demo()") diff --git a/venv.bak/lib/python3.7/site-packages/nltk/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/__pycache__/__init__.cpython-37.pyc new file mode 100644 index 0000000..dbbdf98 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/__pycache__/__init__.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/__pycache__/book.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/__pycache__/book.cpython-37.pyc new file mode 100644 index 0000000..aa2ee72 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/__pycache__/book.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/__pycache__/collections.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/__pycache__/collections.cpython-37.pyc new file mode 100644 index 0000000..7aa5bcd Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/__pycache__/collections.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/__pycache__/collocations.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/__pycache__/collocations.cpython-37.pyc new file mode 100644 index 0000000..1cc439c Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/__pycache__/collocations.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/__pycache__/compat.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/__pycache__/compat.cpython-37.pyc new file mode 100644 index 0000000..b92fe3f Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/__pycache__/compat.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/__pycache__/data.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/__pycache__/data.cpython-37.pyc new file mode 100644 index 0000000..8696671 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/__pycache__/data.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/__pycache__/decorators.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/__pycache__/decorators.cpython-37.pyc new file mode 100644 index 0000000..932af83 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/__pycache__/decorators.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/__pycache__/downloader.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/__pycache__/downloader.cpython-37.pyc new file mode 100644 index 0000000..6c5fb1d Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/__pycache__/downloader.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/__pycache__/featstruct.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/__pycache__/featstruct.cpython-37.pyc new file mode 100644 index 0000000..052305e Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/__pycache__/featstruct.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/__pycache__/grammar.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/__pycache__/grammar.cpython-37.pyc new file mode 100644 index 0000000..67084f8 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/__pycache__/grammar.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/__pycache__/help.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/__pycache__/help.cpython-37.pyc new file mode 100644 index 0000000..09f26de Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/__pycache__/help.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/__pycache__/internals.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/__pycache__/internals.cpython-37.pyc new file mode 100644 index 0000000..d2cb7bb Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/__pycache__/internals.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/__pycache__/jsontags.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/__pycache__/jsontags.cpython-37.pyc new file mode 100644 index 0000000..4a02b41 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/__pycache__/jsontags.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/__pycache__/lazyimport.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/__pycache__/lazyimport.cpython-37.pyc new file mode 100644 index 0000000..18ded5d Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/__pycache__/lazyimport.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/__pycache__/probability.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/__pycache__/probability.cpython-37.pyc new file mode 100644 index 0000000..3344f6a Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/__pycache__/probability.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/__pycache__/text.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/__pycache__/text.cpython-37.pyc new file mode 100644 index 0000000..7615acf Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/__pycache__/text.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/__pycache__/tgrep.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/__pycache__/tgrep.cpython-37.pyc new file mode 100644 index 0000000..d5e9afd Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/__pycache__/tgrep.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/__pycache__/toolbox.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/__pycache__/toolbox.cpython-37.pyc new file mode 100644 index 0000000..89eb56c Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/__pycache__/toolbox.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/__pycache__/tree.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/__pycache__/tree.cpython-37.pyc new file mode 100644 index 0000000..07add07 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/__pycache__/tree.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/__pycache__/treeprettyprinter.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/__pycache__/treeprettyprinter.cpython-37.pyc new file mode 100644 index 0000000..1efb033 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/__pycache__/treeprettyprinter.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/__pycache__/treetransforms.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/__pycache__/treetransforms.cpython-37.pyc new file mode 100644 index 0000000..01f6b8f Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/__pycache__/treetransforms.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/__pycache__/util.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/__pycache__/util.cpython-37.pyc new file mode 100644 index 0000000..56f36fd Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/__pycache__/util.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/__pycache__/wsd.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/__pycache__/wsd.cpython-37.pyc new file mode 100644 index 0000000..34d1439 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/__pycache__/wsd.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/app/__init__.py b/venv.bak/lib/python3.7/site-packages/nltk/app/__init__.py new file mode 100644 index 0000000..19157ac --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/app/__init__.py @@ -0,0 +1,55 @@ +# Natural Language Toolkit: Applications package +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Edward Loper +# Steven Bird +# URL: +# For license information, see LICENSE.TXT + +""" +Interactive NLTK Applications: + +chartparser: Chart Parser +chunkparser: Regular-Expression Chunk Parser +collocations: Find collocations in text +concordance: Part-of-speech concordancer +nemo: Finding (and Replacing) Nemo regular expression tool +rdparser: Recursive Descent Parser +srparser: Shift-Reduce Parser +wordnet: WordNet Browser +""" + + +# Import Tkinter-based modules if Tkinter is installed +try: + from six.moves import tkinter +except ImportError: + import warnings + + warnings.warn("nltk.app package not loaded " "(please install Tkinter library).") +else: + from nltk.app.chartparser_app import app as chartparser + from nltk.app.chunkparser_app import app as chunkparser + from nltk.app.collocations_app import app as collocations + from nltk.app.concordance_app import app as concordance + from nltk.app.nemo_app import app as nemo + from nltk.app.rdparser_app import app as rdparser + from nltk.app.srparser_app import app as srparser + from nltk.app.wordnet_app import app as wordnet + + try: + from matplotlib import pylab + except ImportError: + import warnings + + warnings.warn( + "nltk.app.wordfreq not loaded " "(requires the matplotlib library)." + ) + else: + from nltk.app.wordfreq_app import app as wordfreq + +# skip doctests from this package +def setup_module(module): + from nose import SkipTest + + raise SkipTest("nltk.app examples are not doctests") diff --git a/venv.bak/lib/python3.7/site-packages/nltk/app/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/app/__pycache__/__init__.cpython-37.pyc new file mode 100644 index 0000000..5ceae46 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/app/__pycache__/__init__.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/app/__pycache__/chartparser_app.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/app/__pycache__/chartparser_app.cpython-37.pyc new file mode 100644 index 0000000..67c9508 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/app/__pycache__/chartparser_app.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/app/__pycache__/chunkparser_app.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/app/__pycache__/chunkparser_app.cpython-37.pyc new file mode 100644 index 0000000..80dd5b8 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/app/__pycache__/chunkparser_app.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/app/__pycache__/collocations_app.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/app/__pycache__/collocations_app.cpython-37.pyc new file mode 100644 index 0000000..77daec1 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/app/__pycache__/collocations_app.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/app/__pycache__/concordance_app.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/app/__pycache__/concordance_app.cpython-37.pyc new file mode 100644 index 0000000..76455b2 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/app/__pycache__/concordance_app.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/app/__pycache__/nemo_app.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/app/__pycache__/nemo_app.cpython-37.pyc new file mode 100644 index 0000000..89a472e Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/app/__pycache__/nemo_app.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/app/__pycache__/rdparser_app.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/app/__pycache__/rdparser_app.cpython-37.pyc new file mode 100644 index 0000000..dc61749 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/app/__pycache__/rdparser_app.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/app/__pycache__/srparser_app.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/app/__pycache__/srparser_app.cpython-37.pyc new file mode 100644 index 0000000..3fc0af7 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/app/__pycache__/srparser_app.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/app/__pycache__/wordfreq_app.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/app/__pycache__/wordfreq_app.cpython-37.pyc new file mode 100644 index 0000000..a7f550b Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/app/__pycache__/wordfreq_app.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/app/__pycache__/wordnet_app.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/app/__pycache__/wordnet_app.cpython-37.pyc new file mode 100644 index 0000000..cf2ee64 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/app/__pycache__/wordnet_app.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/app/chartparser_app.py b/venv.bak/lib/python3.7/site-packages/nltk/app/chartparser_app.py new file mode 100644 index 0000000..d42095e --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/app/chartparser_app.py @@ -0,0 +1,2576 @@ +# Natural Language Toolkit: Chart Parser Application +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Edward Loper +# Jean Mark Gawron +# Steven Bird +# URL: +# For license information, see LICENSE.TXT + +""" +A graphical tool for exploring chart parsing. + +Chart parsing is a flexible parsing algorithm that uses a data +structure called a "chart" to record hypotheses about syntactic +constituents. Each hypothesis is represented by a single "edge" on +the chart. A set of "chart rules" determine when new edges can be +added to the chart. This set of rules controls the overall behavior +of the parser (e.g. whether it parses top-down or bottom-up). + +The chart parsing tool demonstrates the process of parsing a single +sentence, with a given grammar and lexicon. Its display is divided +into three sections: the bottom section displays the chart; the middle +section displays the sentence; and the top section displays the +partial syntax tree corresponding to the selected edge. Buttons along +the bottom of the window are used to control the execution of the +algorithm. + +The chart parsing tool allows for flexible control of the parsing +algorithm. At each step of the algorithm, you can select which rule +or strategy you wish to apply. This allows you to experiment with +mixing different strategies (e.g. top-down and bottom-up). You can +exercise fine-grained control over the algorithm by selecting which +edge you wish to apply a rule to. +""" + +# At some point, we should rewrite this tool to use the new canvas +# widget system. + + +from __future__ import division +import pickle +import os.path + +from six.moves.tkinter import ( + Button, + Canvas, + Checkbutton, + Frame, + IntVar, + Label, + Menu, + Scrollbar, + Tk, + Toplevel, +) +from six.moves.tkinter_font import Font +from six.moves.tkinter_messagebox import showerror, showinfo +from six.moves.tkinter_tkfiledialog import asksaveasfilename, askopenfilename + +from nltk.parse.chart import ( + BottomUpPredictCombineRule, + BottomUpPredictRule, + Chart, + LeafEdge, + LeafInitRule, + SingleEdgeFundamentalRule, + SteppingChartParser, + TopDownInitRule, + TopDownPredictRule, + TreeEdge, +) +from nltk.tree import Tree +from nltk.grammar import Nonterminal, CFG +from nltk.util import in_idle +from nltk.draw.util import ( + CanvasFrame, + ColorizedList, + EntryDialog, + MutableOptionMenu, + ShowText, + SymbolWidget, +) +from nltk.draw import CFGEditor, tree_to_treesegment, TreeSegmentWidget + +# Known bug: ChartView doesn't handle edges generated by epsilon +# productions (e.g., [Production: PP -> ]) very well. + +####################################################################### +# Edge List +####################################################################### + + +class EdgeList(ColorizedList): + ARROW = SymbolWidget.SYMBOLS['rightarrow'] + + def _init_colortags(self, textwidget, options): + textwidget.tag_config('terminal', foreground='#006000') + textwidget.tag_config('arrow', font='symbol', underline='0') + textwidget.tag_config('dot', foreground='#000000') + textwidget.tag_config( + 'nonterminal', foreground='blue', font=('helvetica', -12, 'bold') + ) + + def _item_repr(self, item): + contents = [] + contents.append(('%s\t' % item.lhs(), 'nonterminal')) + contents.append((self.ARROW, 'arrow')) + for i, elt in enumerate(item.rhs()): + if i == item.dot(): + contents.append((' *', 'dot')) + if isinstance(elt, Nonterminal): + contents.append((' %s' % elt.symbol(), 'nonterminal')) + else: + contents.append((' %r' % elt, 'terminal')) + if item.is_complete(): + contents.append((' *', 'dot')) + return contents + + +####################################################################### +# Chart Matrix View +####################################################################### + + +class ChartMatrixView(object): + """ + A view of a chart that displays the contents of the corresponding matrix. + """ + + def __init__( + self, parent, chart, toplevel=True, title='Chart Matrix', show_numedges=False + ): + self._chart = chart + self._cells = [] + self._marks = [] + + self._selected_cell = None + + if toplevel: + self._root = Toplevel(parent) + self._root.title(title) + self._root.bind('', self.destroy) + self._init_quit(self._root) + else: + self._root = Frame(parent) + + self._init_matrix(self._root) + self._init_list(self._root) + if show_numedges: + self._init_numedges(self._root) + else: + self._numedges_label = None + + self._callbacks = {} + + self._num_edges = 0 + + self.draw() + + def _init_quit(self, root): + quit = Button(root, text='Quit', command=self.destroy) + quit.pack(side='bottom', expand=0, fill='none') + + def _init_matrix(self, root): + cframe = Frame(root, border=2, relief='sunken') + cframe.pack(expand=0, fill='none', padx=1, pady=3, side='top') + self._canvas = Canvas(cframe, width=200, height=200, background='white') + self._canvas.pack(expand=0, fill='none') + + def _init_numedges(self, root): + self._numedges_label = Label(root, text='0 edges') + self._numedges_label.pack(expand=0, fill='none', side='top') + + def _init_list(self, root): + self._list = EdgeList(root, [], width=20, height=5) + self._list.pack(side='top', expand=1, fill='both', pady=3) + + def cb(edge, self=self): + self._fire_callbacks('select', edge) + + self._list.add_callback('select', cb) + self._list.focus() + + def destroy(self, *e): + if self._root is None: + return + try: + self._root.destroy() + except: + pass + self._root = None + + def set_chart(self, chart): + if chart is not self._chart: + self._chart = chart + self._num_edges = 0 + self.draw() + + def update(self): + if self._root is None: + return + + # Count the edges in each cell + N = len(self._cells) + cell_edges = [[0 for i in range(N)] for j in range(N)] + for edge in self._chart: + cell_edges[edge.start()][edge.end()] += 1 + + # Color the cells correspondingly. + for i in range(N): + for j in range(i, N): + if cell_edges[i][j] == 0: + color = 'gray20' + else: + color = '#00%02x%02x' % ( + min(255, 50 + 128 * cell_edges[i][j] / 10), + max(0, 128 - 128 * cell_edges[i][j] / 10), + ) + cell_tag = self._cells[i][j] + self._canvas.itemconfig(cell_tag, fill=color) + if (i, j) == self._selected_cell: + self._canvas.itemconfig(cell_tag, outline='#00ffff', width=3) + self._canvas.tag_raise(cell_tag) + else: + self._canvas.itemconfig(cell_tag, outline='black', width=1) + + # Update the edge list. + edges = list(self._chart.select(span=self._selected_cell)) + self._list.set(edges) + + # Update our edge count. + self._num_edges = self._chart.num_edges() + if self._numedges_label is not None: + self._numedges_label['text'] = '%d edges' % self._num_edges + + def activate(self): + self._canvas.itemconfig('inactivebox', state='hidden') + self.update() + + def inactivate(self): + self._canvas.itemconfig('inactivebox', state='normal') + self.update() + + def add_callback(self, event, func): + self._callbacks.setdefault(event, {})[func] = 1 + + def remove_callback(self, event, func=None): + if func is None: + del self._callbacks[event] + else: + try: + del self._callbacks[event][func] + except: + pass + + def _fire_callbacks(self, event, *args): + if event not in self._callbacks: + return + for cb_func in list(self._callbacks[event].keys()): + cb_func(*args) + + def select_cell(self, i, j): + if self._root is None: + return + + # If the cell is already selected (and the chart contents + # haven't changed), then do nothing. + if (i, j) == self._selected_cell and self._chart.num_edges() == self._num_edges: + return + + self._selected_cell = (i, j) + self.update() + + # Fire the callback. + self._fire_callbacks('select_cell', i, j) + + def deselect_cell(self): + if self._root is None: + return + self._selected_cell = None + self._list.set([]) + self.update() + + def _click_cell(self, i, j): + if self._selected_cell == (i, j): + self.deselect_cell() + else: + self.select_cell(i, j) + + def view_edge(self, edge): + self.select_cell(*edge.span()) + self._list.view(edge) + + def mark_edge(self, edge): + if self._root is None: + return + self.select_cell(*edge.span()) + self._list.mark(edge) + + def unmark_edge(self, edge=None): + if self._root is None: + return + self._list.unmark(edge) + + def markonly_edge(self, edge): + if self._root is None: + return + self.select_cell(*edge.span()) + self._list.markonly(edge) + + def draw(self): + if self._root is None: + return + LEFT_MARGIN = BOT_MARGIN = 15 + TOP_MARGIN = 5 + c = self._canvas + c.delete('all') + N = self._chart.num_leaves() + 1 + dx = (int(c['width']) - LEFT_MARGIN) / N + dy = (int(c['height']) - TOP_MARGIN - BOT_MARGIN) / N + + c.delete('all') + + # Labels and dotted lines + for i in range(N): + c.create_text( + LEFT_MARGIN - 2, i * dy + dy / 2 + TOP_MARGIN, text=repr(i), anchor='e' + ) + c.create_text( + i * dx + dx / 2 + LEFT_MARGIN, + N * dy + TOP_MARGIN + 1, + text=repr(i), + anchor='n', + ) + c.create_line( + LEFT_MARGIN, + dy * (i + 1) + TOP_MARGIN, + dx * N + LEFT_MARGIN, + dy * (i + 1) + TOP_MARGIN, + dash='.', + ) + c.create_line( + dx * i + LEFT_MARGIN, + TOP_MARGIN, + dx * i + LEFT_MARGIN, + dy * N + TOP_MARGIN, + dash='.', + ) + + # A box around the whole thing + c.create_rectangle( + LEFT_MARGIN, TOP_MARGIN, LEFT_MARGIN + dx * N, dy * N + TOP_MARGIN, width=2 + ) + + # Cells + self._cells = [[None for i in range(N)] for j in range(N)] + for i in range(N): + for j in range(i, N): + t = c.create_rectangle( + j * dx + LEFT_MARGIN, + i * dy + TOP_MARGIN, + (j + 1) * dx + LEFT_MARGIN, + (i + 1) * dy + TOP_MARGIN, + fill='gray20', + ) + self._cells[i][j] = t + + def cb(event, self=self, i=i, j=j): + self._click_cell(i, j) + + c.tag_bind(t, '', cb) + + # Inactive box + xmax, ymax = int(c['width']), int(c['height']) + t = c.create_rectangle( + -100, + -100, + xmax + 100, + ymax + 100, + fill='gray50', + state='hidden', + tag='inactivebox', + ) + c.tag_lower(t) + + # Update the cells. + self.update() + + def pack(self, *args, **kwargs): + self._root.pack(*args, **kwargs) + + +####################################################################### +# Chart Results View +####################################################################### + + +class ChartResultsView(object): + def __init__(self, parent, chart, grammar, toplevel=True): + self._chart = chart + self._grammar = grammar + self._trees = [] + self._y = 10 + self._treewidgets = [] + self._selection = None + self._selectbox = None + + if toplevel: + self._root = Toplevel(parent) + self._root.title('Chart Parser Application: Results') + self._root.bind('', self.destroy) + else: + self._root = Frame(parent) + + # Buttons + if toplevel: + buttons = Frame(self._root) + buttons.pack(side='bottom', expand=0, fill='x') + Button(buttons, text='Quit', command=self.destroy).pack(side='right') + Button(buttons, text='Print All', command=self.print_all).pack(side='left') + Button(buttons, text='Print Selection', command=self.print_selection).pack( + side='left' + ) + + # Canvas frame. + self._cframe = CanvasFrame(self._root, closeenough=20) + self._cframe.pack(side='top', expand=1, fill='both') + + # Initial update + self.update() + + def update(self, edge=None): + if self._root is None: + return + # If the edge isn't a parse edge, do nothing. + if edge is not None: + if edge.lhs() != self._grammar.start(): + return + if edge.span() != (0, self._chart.num_leaves()): + return + + for parse in self._chart.parses(self._grammar.start()): + if parse not in self._trees: + self._add(parse) + + def _add(self, parse): + # Add it to self._trees. + self._trees.append(parse) + + # Create a widget for it. + c = self._cframe.canvas() + treewidget = tree_to_treesegment(c, parse) + + # Add it to the canvas frame. + self._treewidgets.append(treewidget) + self._cframe.add_widget(treewidget, 10, self._y) + + # Register callbacks. + treewidget.bind_click(self._click) + + # Update y. + self._y = treewidget.bbox()[3] + 10 + + def _click(self, widget): + c = self._cframe.canvas() + if self._selection is not None: + c.delete(self._selectbox) + self._selection = widget + (x1, y1, x2, y2) = widget.bbox() + self._selectbox = c.create_rectangle(x1, y1, x2, y2, width=2, outline='#088') + + def _color(self, treewidget, color): + treewidget.label()['color'] = color + for child in treewidget.subtrees(): + if isinstance(child, TreeSegmentWidget): + self._color(child, color) + else: + child['color'] = color + + def print_all(self, *e): + if self._root is None: + return + self._cframe.print_to_file() + + def print_selection(self, *e): + if self._root is None: + return + if self._selection is None: + showerror('Print Error', 'No tree selected') + else: + c = self._cframe.canvas() + for widget in self._treewidgets: + if widget is not self._selection: + self._cframe.destroy_widget(widget) + c.delete(self._selectbox) + (x1, y1, x2, y2) = self._selection.bbox() + self._selection.move(10 - x1, 10 - y1) + c['scrollregion'] = '0 0 %s %s' % (x2 - x1 + 20, y2 - y1 + 20) + self._cframe.print_to_file() + + # Restore our state. + self._treewidgets = [self._selection] + self.clear() + self.update() + + def clear(self): + if self._root is None: + return + for treewidget in self._treewidgets: + self._cframe.destroy_widget(treewidget) + self._trees = [] + self._treewidgets = [] + if self._selection is not None: + self._cframe.canvas().delete(self._selectbox) + self._selection = None + self._y = 10 + + def set_chart(self, chart): + self.clear() + self._chart = chart + self.update() + + def set_grammar(self, grammar): + self.clear() + self._grammar = grammar + self.update() + + def destroy(self, *e): + if self._root is None: + return + try: + self._root.destroy() + except: + pass + self._root = None + + def pack(self, *args, **kwargs): + self._root.pack(*args, **kwargs) + + +####################################################################### +# Chart Comparer +####################################################################### + + +class ChartComparer(object): + """ + + :ivar _root: The root window + + :ivar _charts: A dictionary mapping names to charts. When + charts are loaded, they are added to this dictionary. + + :ivar _left_chart: The left ``Chart``. + :ivar _left_name: The name ``_left_chart`` (derived from filename) + :ivar _left_matrix: The ``ChartMatrixView`` for ``_left_chart`` + :ivar _left_selector: The drop-down ``MutableOptionsMenu`` used + to select ``_left_chart``. + + :ivar _right_chart: The right ``Chart``. + :ivar _right_name: The name ``_right_chart`` (derived from filename) + :ivar _right_matrix: The ``ChartMatrixView`` for ``_right_chart`` + :ivar _right_selector: The drop-down ``MutableOptionsMenu`` used + to select ``_right_chart``. + + :ivar _out_chart: The out ``Chart``. + :ivar _out_name: The name ``_out_chart`` (derived from filename) + :ivar _out_matrix: The ``ChartMatrixView`` for ``_out_chart`` + :ivar _out_label: The label for ``_out_chart``. + + :ivar _op_label: A Label containing the most recent operation. + """ + + _OPSYMBOL = { + '-': '-', + 'and': SymbolWidget.SYMBOLS['intersection'], + 'or': SymbolWidget.SYMBOLS['union'], + } + + def __init__(self, *chart_filenames): + # This chart is displayed when we don't have a value (eg + # before any chart is loaded). + faketok = [''] * 8 + self._emptychart = Chart(faketok) + + # The left & right charts start out empty. + self._left_name = 'None' + self._right_name = 'None' + self._left_chart = self._emptychart + self._right_chart = self._emptychart + + # The charts that have been loaded. + self._charts = {'None': self._emptychart} + + # The output chart. + self._out_chart = self._emptychart + + # The most recent operation + self._operator = None + + # Set up the root window. + self._root = Tk() + self._root.title('Chart Comparison') + self._root.bind('', self.destroy) + self._root.bind('', self.destroy) + + # Initialize all widgets, etc. + self._init_menubar(self._root) + self._init_chartviews(self._root) + self._init_divider(self._root) + self._init_buttons(self._root) + self._init_bindings(self._root) + + # Load any specified charts. + for filename in chart_filenames: + self.load_chart(filename) + + def destroy(self, *e): + if self._root is None: + return + try: + self._root.destroy() + except: + pass + self._root = None + + def mainloop(self, *args, **kwargs): + return + self._root.mainloop(*args, **kwargs) + + # //////////////////////////////////////////////////////////// + # Initialization + # //////////////////////////////////////////////////////////// + + def _init_menubar(self, root): + menubar = Menu(root) + + # File menu + filemenu = Menu(menubar, tearoff=0) + filemenu.add_command( + label='Load Chart', + accelerator='Ctrl-o', + underline=0, + command=self.load_chart_dialog, + ) + filemenu.add_command( + label='Save Output', + accelerator='Ctrl-s', + underline=0, + command=self.save_chart_dialog, + ) + filemenu.add_separator() + filemenu.add_command( + label='Exit', underline=1, command=self.destroy, accelerator='Ctrl-x' + ) + menubar.add_cascade(label='File', underline=0, menu=filemenu) + + # Compare menu + opmenu = Menu(menubar, tearoff=0) + opmenu.add_command( + label='Intersection', command=self._intersection, accelerator='+' + ) + opmenu.add_command(label='Union', command=self._union, accelerator='*') + opmenu.add_command( + label='Difference', command=self._difference, accelerator='-' + ) + opmenu.add_separator() + opmenu.add_command(label='Swap Charts', command=self._swapcharts) + menubar.add_cascade(label='Compare', underline=0, menu=opmenu) + + # Add the menu + self._root.config(menu=menubar) + + def _init_divider(self, root): + divider = Frame(root, border=2, relief='sunken') + divider.pack(side='top', fill='x', ipady=2) + + def _init_chartviews(self, root): + opfont = ('symbol', -36) # Font for operator. + eqfont = ('helvetica', -36) # Font for equals sign. + + frame = Frame(root, background='#c0c0c0') + frame.pack(side='top', expand=1, fill='both') + + # The left matrix. + cv1_frame = Frame(frame, border=3, relief='groove') + cv1_frame.pack(side='left', padx=8, pady=7, expand=1, fill='both') + self._left_selector = MutableOptionMenu( + cv1_frame, list(self._charts.keys()), command=self._select_left + ) + self._left_selector.pack(side='top', pady=5, fill='x') + self._left_matrix = ChartMatrixView( + cv1_frame, self._emptychart, toplevel=False, show_numedges=True + ) + self._left_matrix.pack(side='bottom', padx=5, pady=5, expand=1, fill='both') + self._left_matrix.add_callback('select', self.select_edge) + self._left_matrix.add_callback('select_cell', self.select_cell) + self._left_matrix.inactivate() + + # The operator. + self._op_label = Label( + frame, text=' ', width=3, background='#c0c0c0', font=opfont + ) + self._op_label.pack(side='left', padx=5, pady=5) + + # The right matrix. + cv2_frame = Frame(frame, border=3, relief='groove') + cv2_frame.pack(side='left', padx=8, pady=7, expand=1, fill='both') + self._right_selector = MutableOptionMenu( + cv2_frame, list(self._charts.keys()), command=self._select_right + ) + self._right_selector.pack(side='top', pady=5, fill='x') + self._right_matrix = ChartMatrixView( + cv2_frame, self._emptychart, toplevel=False, show_numedges=True + ) + self._right_matrix.pack(side='bottom', padx=5, pady=5, expand=1, fill='both') + self._right_matrix.add_callback('select', self.select_edge) + self._right_matrix.add_callback('select_cell', self.select_cell) + self._right_matrix.inactivate() + + # The equals sign + Label(frame, text='=', width=3, background='#c0c0c0', font=eqfont).pack( + side='left', padx=5, pady=5 + ) + + # The output matrix. + out_frame = Frame(frame, border=3, relief='groove') + out_frame.pack(side='left', padx=8, pady=7, expand=1, fill='both') + self._out_label = Label(out_frame, text='Output') + self._out_label.pack(side='top', pady=9) + self._out_matrix = ChartMatrixView( + out_frame, self._emptychart, toplevel=False, show_numedges=True + ) + self._out_matrix.pack(side='bottom', padx=5, pady=5, expand=1, fill='both') + self._out_matrix.add_callback('select', self.select_edge) + self._out_matrix.add_callback('select_cell', self.select_cell) + self._out_matrix.inactivate() + + def _init_buttons(self, root): + buttons = Frame(root) + buttons.pack(side='bottom', pady=5, fill='x', expand=0) + Button(buttons, text='Intersection', command=self._intersection).pack( + side='left' + ) + Button(buttons, text='Union', command=self._union).pack(side='left') + Button(buttons, text='Difference', command=self._difference).pack(side='left') + Frame(buttons, width=20).pack(side='left') + Button(buttons, text='Swap Charts', command=self._swapcharts).pack(side='left') + + Button(buttons, text='Detatch Output', command=self._detatch_out).pack( + side='right' + ) + + def _init_bindings(self, root): + # root.bind('', self.save_chart) + root.bind('', self.load_chart_dialog) + # root.bind('', self.reset) + + # //////////////////////////////////////////////////////////// + # Input Handling + # //////////////////////////////////////////////////////////// + + def _select_left(self, name): + self._left_name = name + self._left_chart = self._charts[name] + self._left_matrix.set_chart(self._left_chart) + if name == 'None': + self._left_matrix.inactivate() + self._apply_op() + + def _select_right(self, name): + self._right_name = name + self._right_chart = self._charts[name] + self._right_matrix.set_chart(self._right_chart) + if name == 'None': + self._right_matrix.inactivate() + self._apply_op() + + def _apply_op(self): + if self._operator == '-': + self._difference() + elif self._operator == 'or': + self._union() + elif self._operator == 'and': + self._intersection() + + # //////////////////////////////////////////////////////////// + # File + # //////////////////////////////////////////////////////////// + CHART_FILE_TYPES = [('Pickle file', '.pickle'), ('All files', '*')] + + def save_chart_dialog(self, *args): + filename = asksaveasfilename( + filetypes=self.CHART_FILE_TYPES, defaultextension='.pickle' + ) + if not filename: + return + try: + with open(filename, 'wb') as outfile: + pickle.dump(self._out_chart, outfile) + except Exception as e: + showerror( + 'Error Saving Chart', 'Unable to open file: %r\n%s' % (filename, e) + ) + + def load_chart_dialog(self, *args): + filename = askopenfilename( + filetypes=self.CHART_FILE_TYPES, defaultextension='.pickle' + ) + if not filename: + return + try: + self.load_chart(filename) + except Exception as e: + showerror( + 'Error Loading Chart', 'Unable to open file: %r\n%s' % (filename, e) + ) + + def load_chart(self, filename): + with open(filename, 'rb') as infile: + chart = pickle.load(infile) + name = os.path.basename(filename) + if name.endswith('.pickle'): + name = name[:-7] + if name.endswith('.chart'): + name = name[:-6] + self._charts[name] = chart + self._left_selector.add(name) + self._right_selector.add(name) + + # If either left_matrix or right_matrix is empty, then + # display the new chart. + if self._left_chart is self._emptychart: + self._left_selector.set(name) + elif self._right_chart is self._emptychart: + self._right_selector.set(name) + + def _update_chartviews(self): + self._left_matrix.update() + self._right_matrix.update() + self._out_matrix.update() + + # //////////////////////////////////////////////////////////// + # Selection + # //////////////////////////////////////////////////////////// + + def select_edge(self, edge): + if edge in self._left_chart: + self._left_matrix.markonly_edge(edge) + else: + self._left_matrix.unmark_edge() + if edge in self._right_chart: + self._right_matrix.markonly_edge(edge) + else: + self._right_matrix.unmark_edge() + if edge in self._out_chart: + self._out_matrix.markonly_edge(edge) + else: + self._out_matrix.unmark_edge() + + def select_cell(self, i, j): + self._left_matrix.select_cell(i, j) + self._right_matrix.select_cell(i, j) + self._out_matrix.select_cell(i, j) + + # //////////////////////////////////////////////////////////// + # Operations + # //////////////////////////////////////////////////////////// + + def _difference(self): + if not self._checkcompat(): + return + + out_chart = Chart(self._left_chart.tokens()) + for edge in self._left_chart: + if edge not in self._right_chart: + out_chart.insert(edge, []) + + self._update('-', out_chart) + + def _intersection(self): + if not self._checkcompat(): + return + + out_chart = Chart(self._left_chart.tokens()) + for edge in self._left_chart: + if edge in self._right_chart: + out_chart.insert(edge, []) + + self._update('and', out_chart) + + def _union(self): + if not self._checkcompat(): + return + + out_chart = Chart(self._left_chart.tokens()) + for edge in self._left_chart: + out_chart.insert(edge, []) + for edge in self._right_chart: + out_chart.insert(edge, []) + + self._update('or', out_chart) + + def _swapcharts(self): + left, right = self._left_name, self._right_name + self._left_selector.set(right) + self._right_selector.set(left) + + def _checkcompat(self): + if ( + self._left_chart.tokens() != self._right_chart.tokens() + or self._left_chart.property_names() != self._right_chart.property_names() + or self._left_chart == self._emptychart + or self._right_chart == self._emptychart + ): + # Clear & inactivate the output chart. + self._out_chart = self._emptychart + self._out_matrix.set_chart(self._out_chart) + self._out_matrix.inactivate() + self._out_label['text'] = 'Output' + # Issue some other warning? + return False + else: + return True + + def _update(self, operator, out_chart): + self._operator = operator + self._op_label['text'] = self._OPSYMBOL[operator] + self._out_chart = out_chart + self._out_matrix.set_chart(out_chart) + self._out_label['text'] = '%s %s %s' % ( + self._left_name, + self._operator, + self._right_name, + ) + + def _clear_out_chart(self): + self._out_chart = self._emptychart + self._out_matrix.set_chart(self._out_chart) + self._op_label['text'] = ' ' + self._out_matrix.inactivate() + + def _detatch_out(self): + ChartMatrixView(self._root, self._out_chart, title=self._out_label['text']) + + +####################################################################### +# Chart View +####################################################################### + + +class ChartView(object): + """ + A component for viewing charts. This is used by ``ChartParserApp`` to + allow students to interactively experiment with various chart + parsing techniques. It is also used by ``Chart.draw()``. + + :ivar _chart: The chart that we are giving a view of. This chart + may be modified; after it is modified, you should call + ``update``. + :ivar _sentence: The list of tokens that the chart spans. + + :ivar _root: The root window. + :ivar _chart_canvas: The canvas we're using to display the chart + itself. + :ivar _tree_canvas: The canvas we're using to display the tree + that each edge spans. May be None, if we're not displaying + trees. + :ivar _sentence_canvas: The canvas we're using to display the sentence + text. May be None, if we're not displaying the sentence text. + :ivar _edgetags: A dictionary mapping from edges to the tags of + the canvas elements (lines, etc) used to display that edge. + The values of this dictionary have the form + ``(linetag, rhstag1, dottag, rhstag2, lhstag)``. + :ivar _treetags: A list of all the tags that make up the tree; + used to erase the tree (without erasing the loclines). + :ivar _chart_height: The height of the chart canvas. + :ivar _sentence_height: The height of the sentence canvas. + :ivar _tree_height: The height of the tree + + :ivar _text_height: The height of a text string (in the normal + font). + + :ivar _edgelevels: A list of edges at each level of the chart (the + top level is the 0th element). This list is used to remember + where edges should be drawn; and to make sure that no edges + are overlapping on the chart view. + + :ivar _unitsize: Pixel size of one unit (from the location). This + is determined by the span of the chart's location, and the + width of the chart display canvas. + + :ivar _fontsize: The current font size + + :ivar _marks: A dictionary from edges to marks. Marks are + strings, specifying colors (e.g. 'green'). + """ + + _LEAF_SPACING = 10 + _MARGIN = 10 + _TREE_LEVEL_SIZE = 12 + _CHART_LEVEL_SIZE = 40 + + def __init__(self, chart, root=None, **kw): + """ + Construct a new ``Chart`` display. + """ + # Process keyword args. + draw_tree = kw.get('draw_tree', 0) + draw_sentence = kw.get('draw_sentence', 1) + self._fontsize = kw.get('fontsize', -12) + + # The chart! + self._chart = chart + + # Callback functions + self._callbacks = {} + + # Keep track of drawn edges + self._edgelevels = [] + self._edgetags = {} + + # Keep track of which edges are marked. + self._marks = {} + + # These are used to keep track of the set of tree tokens + # currently displayed in the tree canvas. + self._treetoks = [] + self._treetoks_edge = None + self._treetoks_index = 0 + + # Keep track of the tags used to draw the tree + self._tree_tags = [] + + # Put multiple edges on each level? + self._compact = 0 + + # If they didn't provide a main window, then set one up. + if root is None: + top = Tk() + top.title('Chart View') + + def destroy1(e, top=top): + top.destroy() + + def destroy2(top=top): + top.destroy() + + top.bind('q', destroy1) + b = Button(top, text='Done', command=destroy2) + b.pack(side='bottom') + self._root = top + else: + self._root = root + + # Create some fonts. + self._init_fonts(root) + + # Create the chart canvas. + (self._chart_sb, self._chart_canvas) = self._sb_canvas(self._root) + self._chart_canvas['height'] = 300 + self._chart_canvas['closeenough'] = 15 + + # Create the sentence canvas. + if draw_sentence: + cframe = Frame(self._root, relief='sunk', border=2) + cframe.pack(fill='both', side='bottom') + self._sentence_canvas = Canvas(cframe, height=50) + self._sentence_canvas['background'] = '#e0e0e0' + self._sentence_canvas.pack(fill='both') + # self._sentence_canvas['height'] = self._sentence_height + else: + self._sentence_canvas = None + + # Create the tree canvas. + if draw_tree: + (sb, canvas) = self._sb_canvas(self._root, 'n', 'x') + (self._tree_sb, self._tree_canvas) = (sb, canvas) + self._tree_canvas['height'] = 200 + else: + self._tree_canvas = None + + # Do some analysis to figure out how big the window should be + self._analyze() + self.draw() + self._resize() + self._grow() + + # Set up the configure callback, which will be called whenever + # the window is resized. + self._chart_canvas.bind('', self._configure) + + def _init_fonts(self, root): + self._boldfont = Font(family='helvetica', weight='bold', size=self._fontsize) + self._font = Font(family='helvetica', size=self._fontsize) + # See: + self._sysfont = Font(font=Button()["font"]) + root.option_add("*Font", self._sysfont) + + def _sb_canvas(self, root, expand='y', fill='both', side='bottom'): + """ + Helper for __init__: construct a canvas with a scrollbar. + """ + cframe = Frame(root, relief='sunk', border=2) + cframe.pack(fill=fill, expand=expand, side=side) + canvas = Canvas(cframe, background='#e0e0e0') + + # Give the canvas a scrollbar. + sb = Scrollbar(cframe, orient='vertical') + sb.pack(side='right', fill='y') + canvas.pack(side='left', fill=fill, expand='yes') + + # Connect the scrollbars to the canvas. + sb['command'] = canvas.yview + canvas['yscrollcommand'] = sb.set + + return (sb, canvas) + + def scroll_up(self, *e): + self._chart_canvas.yview('scroll', -1, 'units') + + def scroll_down(self, *e): + self._chart_canvas.yview('scroll', 1, 'units') + + def page_up(self, *e): + self._chart_canvas.yview('scroll', -1, 'pages') + + def page_down(self, *e): + self._chart_canvas.yview('scroll', 1, 'pages') + + def _grow(self): + """ + Grow the window, if necessary + """ + # Grow, if need-be + N = self._chart.num_leaves() + width = max( + int(self._chart_canvas['width']), N * self._unitsize + ChartView._MARGIN * 2 + ) + + # It won't resize without the second (height) line, but I + # don't understand why not. + self._chart_canvas.configure(width=width) + self._chart_canvas.configure(height=self._chart_canvas['height']) + + self._unitsize = (width - 2 * ChartView._MARGIN) / N + + # Reset the height for the sentence window. + if self._sentence_canvas is not None: + self._sentence_canvas['height'] = self._sentence_height + + def set_font_size(self, size): + self._font.configure(size=-abs(size)) + self._boldfont.configure(size=-abs(size)) + self._sysfont.configure(size=-abs(size)) + self._analyze() + self._grow() + self.draw() + + def get_font_size(self): + return abs(self._fontsize) + + def _configure(self, e): + """ + The configure callback. This is called whenever the window is + resized. It is also called when the window is first mapped. + It figures out the unit size, and redraws the contents of each + canvas. + """ + N = self._chart.num_leaves() + self._unitsize = (e.width - 2 * ChartView._MARGIN) / N + self.draw() + + def update(self, chart=None): + """ + Draw any edges that have not been drawn. This is typically + called when a after modifies the canvas that a CanvasView is + displaying. ``update`` will cause any edges that have been + added to the chart to be drawn. + + If update is given a ``chart`` argument, then it will replace + the current chart with the given chart. + """ + if chart is not None: + self._chart = chart + self._edgelevels = [] + self._marks = {} + self._analyze() + self._grow() + self.draw() + self.erase_tree() + self._resize() + else: + for edge in self._chart: + if edge not in self._edgetags: + self._add_edge(edge) + self._resize() + + def _edge_conflict(self, edge, lvl): + """ + Return True if the given edge overlaps with any edge on the given + level. This is used by _add_edge to figure out what level a + new edge should be added to. + """ + (s1, e1) = edge.span() + for otheredge in self._edgelevels[lvl]: + (s2, e2) = otheredge.span() + if (s1 <= s2 < e1) or (s2 <= s1 < e2) or (s1 == s2 == e1 == e2): + return True + return False + + def _analyze_edge(self, edge): + """ + Given a new edge, recalculate: + + - _text_height + - _unitsize (if the edge text is too big for the current + _unitsize, then increase _unitsize) + """ + c = self._chart_canvas + + if isinstance(edge, TreeEdge): + lhs = edge.lhs() + rhselts = [] + for elt in edge.rhs(): + if isinstance(elt, Nonterminal): + rhselts.append(str(elt.symbol())) + else: + rhselts.append(repr(elt)) + rhs = " ".join(rhselts) + else: + lhs = edge.lhs() + rhs = '' + + for s in (lhs, rhs): + tag = c.create_text( + 0, 0, text=s, font=self._boldfont, anchor='nw', justify='left' + ) + bbox = c.bbox(tag) + c.delete(tag) + width = bbox[2] # + ChartView._LEAF_SPACING + edgelen = max(edge.length(), 1) + self._unitsize = max(self._unitsize, width / edgelen) + self._text_height = max(self._text_height, bbox[3] - bbox[1]) + + def _add_edge(self, edge, minlvl=0): + """ + Add a single edge to the ChartView: + + - Call analyze_edge to recalculate display parameters + - Find an available level + - Call _draw_edge + """ + # Do NOT show leaf edges in the chart. + if isinstance(edge, LeafEdge): + return + + if edge in self._edgetags: + return + self._analyze_edge(edge) + self._grow() + + if not self._compact: + self._edgelevels.append([edge]) + lvl = len(self._edgelevels) - 1 + self._draw_edge(edge, lvl) + self._resize() + return + + # Figure out what level to draw the edge on. + lvl = 0 + while True: + # If this level doesn't exist yet, create it. + while lvl >= len(self._edgelevels): + self._edgelevels.append([]) + self._resize() + + # Check if we can fit the edge in this level. + if lvl >= minlvl and not self._edge_conflict(edge, lvl): + # Go ahead and draw it. + self._edgelevels[lvl].append(edge) + break + + # Try the next level. + lvl += 1 + + self._draw_edge(edge, lvl) + + def view_edge(self, edge): + level = None + for i in range(len(self._edgelevels)): + if edge in self._edgelevels[i]: + level = i + break + if level is None: + return + # Try to view the new edge.. + y = (level + 1) * self._chart_level_size + dy = self._text_height + 10 + self._chart_canvas.yview('moveto', 1.0) + if self._chart_height != 0: + self._chart_canvas.yview('moveto', (y - dy) / self._chart_height) + + def _draw_edge(self, edge, lvl): + """ + Draw a single edge on the ChartView. + """ + c = self._chart_canvas + + # Draw the arrow. + x1 = edge.start() * self._unitsize + ChartView._MARGIN + x2 = edge.end() * self._unitsize + ChartView._MARGIN + if x2 == x1: + x2 += max(4, self._unitsize / 5) + y = (lvl + 1) * self._chart_level_size + linetag = c.create_line(x1, y, x2, y, arrow='last', width=3) + + # Draw a label for the edge. + if isinstance(edge, TreeEdge): + rhs = [] + for elt in edge.rhs(): + if isinstance(elt, Nonterminal): + rhs.append(str(elt.symbol())) + else: + rhs.append(repr(elt)) + pos = edge.dot() + else: + rhs = [] + pos = 0 + + rhs1 = " ".join(rhs[:pos]) + rhs2 = " ".join(rhs[pos:]) + rhstag1 = c.create_text(x1 + 3, y, text=rhs1, font=self._font, anchor='nw') + dotx = c.bbox(rhstag1)[2] + 6 + doty = (c.bbox(rhstag1)[1] + c.bbox(rhstag1)[3]) / 2 + dottag = c.create_oval(dotx - 2, doty - 2, dotx + 2, doty + 2) + rhstag2 = c.create_text(dotx + 6, y, text=rhs2, font=self._font, anchor='nw') + lhstag = c.create_text( + (x1 + x2) / 2, y, text=str(edge.lhs()), anchor='s', font=self._boldfont + ) + + # Keep track of the edge's tags. + self._edgetags[edge] = (linetag, rhstag1, dottag, rhstag2, lhstag) + + # Register a callback for clicking on the edge. + def cb(event, self=self, edge=edge): + self._fire_callbacks('select', edge) + + c.tag_bind(rhstag1, '', cb) + c.tag_bind(rhstag2, '', cb) + c.tag_bind(linetag, '', cb) + c.tag_bind(dottag, '', cb) + c.tag_bind(lhstag, '', cb) + + self._color_edge(edge) + + def _color_edge(self, edge, linecolor=None, textcolor=None): + """ + Color in an edge with the given colors. + If no colors are specified, use intelligent defaults + (dependent on selection, etc.) + """ + if edge not in self._edgetags: + return + c = self._chart_canvas + + if linecolor is not None and textcolor is not None: + if edge in self._marks: + linecolor = self._marks[edge] + tags = self._edgetags[edge] + c.itemconfig(tags[0], fill=linecolor) + c.itemconfig(tags[1], fill=textcolor) + c.itemconfig(tags[2], fill=textcolor, outline=textcolor) + c.itemconfig(tags[3], fill=textcolor) + c.itemconfig(tags[4], fill=textcolor) + return + else: + N = self._chart.num_leaves() + if edge in self._marks: + self._color_edge(self._marks[edge]) + if edge.is_complete() and edge.span() == (0, N): + self._color_edge(edge, '#084', '#042') + elif isinstance(edge, LeafEdge): + self._color_edge(edge, '#48c', '#246') + else: + self._color_edge(edge, '#00f', '#008') + + def mark_edge(self, edge, mark='#0df'): + """ + Mark an edge + """ + self._marks[edge] = mark + self._color_edge(edge) + + def unmark_edge(self, edge=None): + """ + Unmark an edge (or all edges) + """ + if edge is None: + old_marked_edges = list(self._marks.keys()) + self._marks = {} + for edge in old_marked_edges: + self._color_edge(edge) + else: + del self._marks[edge] + self._color_edge(edge) + + def markonly_edge(self, edge, mark='#0df'): + self.unmark_edge() + self.mark_edge(edge, mark) + + def _analyze(self): + """ + Analyze the sentence string, to figure out how big a unit needs + to be, How big the tree should be, etc. + """ + # Figure out the text height and the unit size. + unitsize = 70 # min unitsize + text_height = 0 + c = self._chart_canvas + + # Check against all tokens + for leaf in self._chart.leaves(): + tag = c.create_text( + 0, 0, text=repr(leaf), font=self._font, anchor='nw', justify='left' + ) + bbox = c.bbox(tag) + c.delete(tag) + width = bbox[2] + ChartView._LEAF_SPACING + unitsize = max(width, unitsize) + text_height = max(text_height, bbox[3] - bbox[1]) + + self._unitsize = unitsize + self._text_height = text_height + self._sentence_height = self._text_height + 2 * ChartView._MARGIN + + # Check against edges. + for edge in self._chart.edges(): + self._analyze_edge(edge) + + # Size of chart levels + self._chart_level_size = self._text_height * 2 + + # Default tree size.. + self._tree_height = 3 * (ChartView._TREE_LEVEL_SIZE + self._text_height) + + # Resize the scrollregions. + self._resize() + + def _resize(self): + """ + Update the scroll-regions for each canvas. This ensures that + everything is within a scroll-region, so the user can use the + scrollbars to view the entire display. This does *not* + resize the window. + """ + c = self._chart_canvas + + # Reset the chart scroll region + width = self._chart.num_leaves() * self._unitsize + ChartView._MARGIN * 2 + + levels = len(self._edgelevels) + self._chart_height = (levels + 2) * self._chart_level_size + c['scrollregion'] = (0, 0, width, self._chart_height) + + # Reset the tree scroll region + if self._tree_canvas: + self._tree_canvas['scrollregion'] = (0, 0, width, self._tree_height) + + def _draw_loclines(self): + """ + Draw location lines. These are vertical gridlines used to + show where each location unit is. + """ + BOTTOM = 50000 + c1 = self._tree_canvas + c2 = self._sentence_canvas + c3 = self._chart_canvas + margin = ChartView._MARGIN + self._loclines = [] + for i in range(0, self._chart.num_leaves() + 1): + x = i * self._unitsize + margin + + if c1: + t1 = c1.create_line(x, 0, x, BOTTOM) + c1.tag_lower(t1) + if c2: + t2 = c2.create_line(x, 0, x, self._sentence_height) + c2.tag_lower(t2) + t3 = c3.create_line(x, 0, x, BOTTOM) + c3.tag_lower(t3) + t4 = c3.create_text(x + 2, 0, text=repr(i), anchor='nw', font=self._font) + c3.tag_lower(t4) + # if i % 4 == 0: + # if c1: c1.itemconfig(t1, width=2, fill='gray60') + # if c2: c2.itemconfig(t2, width=2, fill='gray60') + # c3.itemconfig(t3, width=2, fill='gray60') + if i % 2 == 0: + if c1: + c1.itemconfig(t1, fill='gray60') + if c2: + c2.itemconfig(t2, fill='gray60') + c3.itemconfig(t3, fill='gray60') + else: + if c1: + c1.itemconfig(t1, fill='gray80') + if c2: + c2.itemconfig(t2, fill='gray80') + c3.itemconfig(t3, fill='gray80') + + def _draw_sentence(self): + """Draw the sentence string.""" + if self._chart.num_leaves() == 0: + return + c = self._sentence_canvas + margin = ChartView._MARGIN + y = ChartView._MARGIN + + for i, leaf in enumerate(self._chart.leaves()): + x1 = i * self._unitsize + margin + x2 = x1 + self._unitsize + x = (x1 + x2) / 2 + tag = c.create_text( + x, y, text=repr(leaf), font=self._font, anchor='n', justify='left' + ) + bbox = c.bbox(tag) + rt = c.create_rectangle( + x1 + 2, + bbox[1] - (ChartView._LEAF_SPACING / 2), + x2 - 2, + bbox[3] + (ChartView._LEAF_SPACING / 2), + fill='#f0f0f0', + outline='#f0f0f0', + ) + c.tag_lower(rt) + + def erase_tree(self): + for tag in self._tree_tags: + self._tree_canvas.delete(tag) + self._treetoks = [] + self._treetoks_edge = None + self._treetoks_index = 0 + + def draw_tree(self, edge=None): + if edge is None and self._treetoks_edge is None: + return + if edge is None: + edge = self._treetoks_edge + + # If it's a new edge, then get a new list of treetoks. + if self._treetoks_edge != edge: + self._treetoks = [t for t in self._chart.trees(edge) if isinstance(t, Tree)] + self._treetoks_edge = edge + self._treetoks_index = 0 + + # Make sure there's something to draw. + if len(self._treetoks) == 0: + return + + # Erase the old tree. + for tag in self._tree_tags: + self._tree_canvas.delete(tag) + + # Draw the new tree. + tree = self._treetoks[self._treetoks_index] + self._draw_treetok(tree, edge.start()) + + # Show how many trees are available for the edge. + self._draw_treecycle() + + # Update the scroll region. + w = self._chart.num_leaves() * self._unitsize + 2 * ChartView._MARGIN + h = tree.height() * (ChartView._TREE_LEVEL_SIZE + self._text_height) + self._tree_canvas['scrollregion'] = (0, 0, w, h) + + def cycle_tree(self): + self._treetoks_index = (self._treetoks_index + 1) % len(self._treetoks) + self.draw_tree(self._treetoks_edge) + + def _draw_treecycle(self): + if len(self._treetoks) <= 1: + return + + # Draw the label. + label = '%d Trees' % len(self._treetoks) + c = self._tree_canvas + margin = ChartView._MARGIN + right = self._chart.num_leaves() * self._unitsize + margin - 2 + tag = c.create_text(right, 2, anchor='ne', text=label, font=self._boldfont) + self._tree_tags.append(tag) + _, _, _, y = c.bbox(tag) + + # Draw the triangles. + for i in range(len(self._treetoks)): + x = right - 20 * (len(self._treetoks) - i - 1) + if i == self._treetoks_index: + fill = '#084' + else: + fill = '#fff' + tag = c.create_polygon( + x, y + 10, x - 5, y, x - 10, y + 10, fill=fill, outline='black' + ) + self._tree_tags.append(tag) + + # Set up a callback: show the tree if they click on its + # triangle. + def cb(event, self=self, i=i): + self._treetoks_index = i + self.draw_tree() + + c.tag_bind(tag, '', cb) + + def _draw_treetok(self, treetok, index, depth=0): + """ + :param index: The index of the first leaf in the tree. + :return: The index of the first leaf after the tree. + """ + c = self._tree_canvas + margin = ChartView._MARGIN + + # Draw the children + child_xs = [] + for child in treetok: + if isinstance(child, Tree): + child_x, index = self._draw_treetok(child, index, depth + 1) + child_xs.append(child_x) + else: + child_xs.append((2 * index + 1) * self._unitsize / 2 + margin) + index += 1 + + # If we have children, then get the node's x by averaging their + # node x's. Otherwise, make room for ourselves. + if child_xs: + nodex = sum(child_xs) / len(child_xs) + else: + # [XX] breaks for null productions. + nodex = (2 * index + 1) * self._unitsize / 2 + margin + index += 1 + + # Draw the node + nodey = depth * (ChartView._TREE_LEVEL_SIZE + self._text_height) + tag = c.create_text( + nodex, + nodey, + anchor='n', + justify='center', + text=str(treetok.label()), + fill='#042', + font=self._boldfont, + ) + self._tree_tags.append(tag) + + # Draw lines to the children. + childy = nodey + ChartView._TREE_LEVEL_SIZE + self._text_height + for childx, child in zip(child_xs, treetok): + if isinstance(child, Tree) and child: + # A "real" tree token: + tag = c.create_line( + nodex, + nodey + self._text_height, + childx, + childy, + width=2, + fill='#084', + ) + self._tree_tags.append(tag) + if isinstance(child, Tree) and not child: + # An unexpanded tree token: + tag = c.create_line( + nodex, + nodey + self._text_height, + childx, + childy, + width=2, + fill='#048', + dash='2 3', + ) + self._tree_tags.append(tag) + if not isinstance(child, Tree): + # A leaf: + tag = c.create_line( + nodex, + nodey + self._text_height, + childx, + 10000, + width=2, + fill='#084', + ) + self._tree_tags.append(tag) + + return nodex, index + + def draw(self): + """ + Draw everything (from scratch). + """ + if self._tree_canvas: + self._tree_canvas.delete('all') + self.draw_tree() + + if self._sentence_canvas: + self._sentence_canvas.delete('all') + self._draw_sentence() + + self._chart_canvas.delete('all') + self._edgetags = {} + + # Redraw any edges we erased. + for lvl in range(len(self._edgelevels)): + for edge in self._edgelevels[lvl]: + self._draw_edge(edge, lvl) + + for edge in self._chart: + self._add_edge(edge) + + self._draw_loclines() + + def add_callback(self, event, func): + self._callbacks.setdefault(event, {})[func] = 1 + + def remove_callback(self, event, func=None): + if func is None: + del self._callbacks[event] + else: + try: + del self._callbacks[event][func] + except: + pass + + def _fire_callbacks(self, event, *args): + if event not in self._callbacks: + return + for cb_func in list(self._callbacks[event].keys()): + cb_func(*args) + + +####################################################################### +# Edge Rules +####################################################################### +# These version of the chart rules only apply to a specific edge. +# This lets the user select an edge, and then apply a rule. + + +class EdgeRule(object): + """ + To create an edge rule, make an empty base class that uses + EdgeRule as the first base class, and the basic rule as the + second base class. (Order matters!) + """ + + def __init__(self, edge): + super = self.__class__.__bases__[1] + self._edge = edge + self.NUM_EDGES = super.NUM_EDGES - 1 + + def apply(self, chart, grammar, *edges): + super = self.__class__.__bases__[1] + edges += (self._edge,) + for e in super.apply(self, chart, grammar, *edges): + yield e + + def __str__(self): + super = self.__class__.__bases__[1] + return super.__str__(self) + + +class TopDownPredictEdgeRule(EdgeRule, TopDownPredictRule): + pass + + +class BottomUpEdgeRule(EdgeRule, BottomUpPredictRule): + pass + + +class BottomUpLeftCornerEdgeRule(EdgeRule, BottomUpPredictCombineRule): + pass + + +class FundamentalEdgeRule(EdgeRule, SingleEdgeFundamentalRule): + pass + + +####################################################################### +# Chart Parser Application +####################################################################### + + +class ChartParserApp(object): + def __init__(self, grammar, tokens, title='Chart Parser Application'): + # Initialize the parser + self._init_parser(grammar, tokens) + + self._root = None + try: + # Create the root window. + self._root = Tk() + self._root.title(title) + self._root.bind('', self.destroy) + + # Set up some frames. + frame3 = Frame(self._root) + frame2 = Frame(self._root) + frame1 = Frame(self._root) + frame3.pack(side='bottom', fill='none') + frame2.pack(side='bottom', fill='x') + frame1.pack(side='bottom', fill='both', expand=1) + + self._init_fonts(self._root) + self._init_animation() + self._init_chartview(frame1) + self._init_rulelabel(frame2) + self._init_buttons(frame3) + self._init_menubar() + + self._matrix = None + self._results = None + + # Set up keyboard bindings. + self._init_bindings() + + except: + print('Error creating Tree View') + self.destroy() + raise + + def destroy(self, *args): + if self._root is None: + return + self._root.destroy() + self._root = None + + def mainloop(self, *args, **kwargs): + """ + Enter the Tkinter mainloop. This function must be called if + this demo is created from a non-interactive program (e.g. + from a secript); otherwise, the demo will close as soon as + the script completes. + """ + if in_idle(): + return + self._root.mainloop(*args, **kwargs) + + # //////////////////////////////////////////////////////////// + # Initialization Helpers + # //////////////////////////////////////////////////////////// + + def _init_parser(self, grammar, tokens): + self._grammar = grammar + self._tokens = tokens + self._reset_parser() + + def _reset_parser(self): + self._cp = SteppingChartParser(self._grammar) + self._cp.initialize(self._tokens) + self._chart = self._cp.chart() + + # Insert LeafEdges before the parsing starts. + for _new_edge in LeafInitRule().apply(self._chart, self._grammar): + pass + + # The step iterator -- use this to generate new edges + self._cpstep = self._cp.step() + + # The currently selected edge + self._selection = None + + def _init_fonts(self, root): + # See: + self._sysfont = Font(font=Button()["font"]) + root.option_add("*Font", self._sysfont) + + # TWhat's our font size (default=same as sysfont) + self._size = IntVar(root) + self._size.set(self._sysfont.cget('size')) + + self._boldfont = Font(family='helvetica', weight='bold', size=self._size.get()) + self._font = Font(family='helvetica', size=self._size.get()) + + def _init_animation(self): + # Are we stepping? (default=yes) + self._step = IntVar(self._root) + self._step.set(1) + + # What's our animation speed (default=fast) + self._animate = IntVar(self._root) + self._animate.set(3) # Default speed = fast + + # Are we currently animating? + self._animating = 0 + + def _init_chartview(self, parent): + self._cv = ChartView(self._chart, parent, draw_tree=1, draw_sentence=1) + self._cv.add_callback('select', self._click_cv_edge) + + def _init_rulelabel(self, parent): + ruletxt = 'Last edge generated by:' + + self._rulelabel1 = Label(parent, text=ruletxt, font=self._boldfont) + self._rulelabel2 = Label( + parent, width=40, relief='groove', anchor='w', font=self._boldfont + ) + self._rulelabel1.pack(side='left') + self._rulelabel2.pack(side='left') + step = Checkbutton(parent, variable=self._step, text='Step') + step.pack(side='right') + + def _init_buttons(self, parent): + frame1 = Frame(parent) + frame2 = Frame(parent) + frame1.pack(side='bottom', fill='x') + frame2.pack(side='top', fill='none') + + Button( + frame1, + text='Reset\nParser', + background='#90c0d0', + foreground='black', + command=self.reset, + ).pack(side='right') + # Button(frame1, text='Pause', + # background='#90c0d0', foreground='black', + # command=self.pause).pack(side='left') + + Button( + frame1, + text='Top Down\nStrategy', + background='#90c0d0', + foreground='black', + command=self.top_down_strategy, + ).pack(side='left') + Button( + frame1, + text='Bottom Up\nStrategy', + background='#90c0d0', + foreground='black', + command=self.bottom_up_strategy, + ).pack(side='left') + Button( + frame1, + text='Bottom Up\nLeft-Corner Strategy', + background='#90c0d0', + foreground='black', + command=self.bottom_up_leftcorner_strategy, + ).pack(side='left') + + Button( + frame2, + text='Top Down Init\nRule', + background='#90f090', + foreground='black', + command=self.top_down_init, + ).pack(side='left') + Button( + frame2, + text='Top Down Predict\nRule', + background='#90f090', + foreground='black', + command=self.top_down_predict, + ).pack(side='left') + Frame(frame2, width=20).pack(side='left') + + Button( + frame2, + text='Bottom Up Predict\nRule', + background='#90f090', + foreground='black', + command=self.bottom_up, + ).pack(side='left') + Frame(frame2, width=20).pack(side='left') + + Button( + frame2, + text='Bottom Up Left-Corner\nPredict Rule', + background='#90f090', + foreground='black', + command=self.bottom_up_leftcorner, + ).pack(side='left') + Frame(frame2, width=20).pack(side='left') + + Button( + frame2, + text='Fundamental\nRule', + background='#90f090', + foreground='black', + command=self.fundamental, + ).pack(side='left') + + def _init_bindings(self): + self._root.bind('', self._cv.scroll_up) + self._root.bind('', self._cv.scroll_down) + self._root.bind('', self._cv.page_up) + self._root.bind('', self._cv.page_down) + self._root.bind('', self.destroy) + self._root.bind('', self.destroy) + self._root.bind('', self.help) + + self._root.bind('', self.save_chart) + self._root.bind('', self.load_chart) + self._root.bind('', self.reset) + + self._root.bind('t', self.top_down_strategy) + self._root.bind('b', self.bottom_up_strategy) + self._root.bind('c', self.bottom_up_leftcorner_strategy) + self._root.bind('', self._stop_animation) + + self._root.bind('', self.edit_grammar) + self._root.bind('', self.edit_sentence) + + # Animation speed control + self._root.bind('-', lambda e, a=self._animate: a.set(1)) + self._root.bind('=', lambda e, a=self._animate: a.set(2)) + self._root.bind('+', lambda e, a=self._animate: a.set(3)) + + # Step control + self._root.bind('s', lambda e, s=self._step: s.set(not s.get())) + + def _init_menubar(self): + menubar = Menu(self._root) + + filemenu = Menu(menubar, tearoff=0) + filemenu.add_command( + label='Save Chart', + underline=0, + command=self.save_chart, + accelerator='Ctrl-s', + ) + filemenu.add_command( + label='Load Chart', + underline=0, + command=self.load_chart, + accelerator='Ctrl-o', + ) + filemenu.add_command( + label='Reset Chart', underline=0, command=self.reset, accelerator='Ctrl-r' + ) + filemenu.add_separator() + filemenu.add_command(label='Save Grammar', command=self.save_grammar) + filemenu.add_command(label='Load Grammar', command=self.load_grammar) + filemenu.add_separator() + filemenu.add_command( + label='Exit', underline=1, command=self.destroy, accelerator='Ctrl-x' + ) + menubar.add_cascade(label='File', underline=0, menu=filemenu) + + editmenu = Menu(menubar, tearoff=0) + editmenu.add_command( + label='Edit Grammar', + underline=5, + command=self.edit_grammar, + accelerator='Ctrl-g', + ) + editmenu.add_command( + label='Edit Text', + underline=5, + command=self.edit_sentence, + accelerator='Ctrl-t', + ) + menubar.add_cascade(label='Edit', underline=0, menu=editmenu) + + viewmenu = Menu(menubar, tearoff=0) + viewmenu.add_command( + label='Chart Matrix', underline=6, command=self.view_matrix + ) + viewmenu.add_command(label='Results', underline=0, command=self.view_results) + menubar.add_cascade(label='View', underline=0, menu=viewmenu) + + rulemenu = Menu(menubar, tearoff=0) + rulemenu.add_command( + label='Top Down Strategy', + underline=0, + command=self.top_down_strategy, + accelerator='t', + ) + rulemenu.add_command( + label='Bottom Up Strategy', + underline=0, + command=self.bottom_up_strategy, + accelerator='b', + ) + rulemenu.add_command( + label='Bottom Up Left-Corner Strategy', + underline=0, + command=self.bottom_up_leftcorner_strategy, + accelerator='c', + ) + rulemenu.add_separator() + rulemenu.add_command(label='Bottom Up Rule', command=self.bottom_up) + rulemenu.add_command( + label='Bottom Up Left-Corner Rule', command=self.bottom_up_leftcorner + ) + rulemenu.add_command(label='Top Down Init Rule', command=self.top_down_init) + rulemenu.add_command( + label='Top Down Predict Rule', command=self.top_down_predict + ) + rulemenu.add_command(label='Fundamental Rule', command=self.fundamental) + menubar.add_cascade(label='Apply', underline=0, menu=rulemenu) + + animatemenu = Menu(menubar, tearoff=0) + animatemenu.add_checkbutton( + label="Step", underline=0, variable=self._step, accelerator='s' + ) + animatemenu.add_separator() + animatemenu.add_radiobutton( + label="No Animation", underline=0, variable=self._animate, value=0 + ) + animatemenu.add_radiobutton( + label="Slow Animation", + underline=0, + variable=self._animate, + value=1, + accelerator='-', + ) + animatemenu.add_radiobutton( + label="Normal Animation", + underline=0, + variable=self._animate, + value=2, + accelerator='=', + ) + animatemenu.add_radiobutton( + label="Fast Animation", + underline=0, + variable=self._animate, + value=3, + accelerator='+', + ) + menubar.add_cascade(label="Animate", underline=1, menu=animatemenu) + + zoommenu = Menu(menubar, tearoff=0) + zoommenu.add_radiobutton( + label='Tiny', + variable=self._size, + underline=0, + value=10, + command=self.resize, + ) + zoommenu.add_radiobutton( + label='Small', + variable=self._size, + underline=0, + value=12, + command=self.resize, + ) + zoommenu.add_radiobutton( + label='Medium', + variable=self._size, + underline=0, + value=14, + command=self.resize, + ) + zoommenu.add_radiobutton( + label='Large', + variable=self._size, + underline=0, + value=18, + command=self.resize, + ) + zoommenu.add_radiobutton( + label='Huge', + variable=self._size, + underline=0, + value=24, + command=self.resize, + ) + menubar.add_cascade(label='Zoom', underline=0, menu=zoommenu) + + helpmenu = Menu(menubar, tearoff=0) + helpmenu.add_command(label='About', underline=0, command=self.about) + helpmenu.add_command( + label='Instructions', underline=0, command=self.help, accelerator='F1' + ) + menubar.add_cascade(label='Help', underline=0, menu=helpmenu) + + self._root.config(menu=menubar) + + # //////////////////////////////////////////////////////////// + # Selection Handling + # //////////////////////////////////////////////////////////// + + def _click_cv_edge(self, edge): + if edge != self._selection: + # Clicking on a new edge selects it. + self._select_edge(edge) + else: + # Repeated clicks on one edge cycle its trees. + self._cv.cycle_tree() + # [XX] this can get confused if animation is running + # faster than the callbacks... + + def _select_matrix_edge(self, edge): + self._select_edge(edge) + self._cv.view_edge(edge) + + def _select_edge(self, edge): + self._selection = edge + # Update the chart view. + self._cv.markonly_edge(edge, '#f00') + self._cv.draw_tree(edge) + # Update the matrix view. + if self._matrix: + self._matrix.markonly_edge(edge) + if self._matrix: + self._matrix.view_edge(edge) + + def _deselect_edge(self): + self._selection = None + # Update the chart view. + self._cv.unmark_edge() + self._cv.erase_tree() + # Update the matrix view + if self._matrix: + self._matrix.unmark_edge() + + def _show_new_edge(self, edge): + self._display_rule(self._cp.current_chartrule()) + # Update the chart view. + self._cv.update() + self._cv.draw_tree(edge) + self._cv.markonly_edge(edge, '#0df') + self._cv.view_edge(edge) + # Update the matrix view. + if self._matrix: + self._matrix.update() + if self._matrix: + self._matrix.markonly_edge(edge) + if self._matrix: + self._matrix.view_edge(edge) + # Update the results view. + if self._results: + self._results.update(edge) + + # //////////////////////////////////////////////////////////// + # Help/usage + # //////////////////////////////////////////////////////////// + + def help(self, *e): + self._animating = 0 + # The default font's not very legible; try using 'fixed' instead. + try: + ShowText( + self._root, + 'Help: Chart Parser Application', + (__doc__ or '').strip(), + width=75, + font='fixed', + ) + except: + ShowText( + self._root, + 'Help: Chart Parser Application', + (__doc__ or '').strip(), + width=75, + ) + + def about(self, *e): + ABOUT = "NLTK Chart Parser Application\n" + "Written by Edward Loper" + showinfo('About: Chart Parser Application', ABOUT) + + # //////////////////////////////////////////////////////////// + # File Menu + # //////////////////////////////////////////////////////////// + + CHART_FILE_TYPES = [('Pickle file', '.pickle'), ('All files', '*')] + GRAMMAR_FILE_TYPES = [ + ('Plaintext grammar file', '.cfg'), + ('Pickle file', '.pickle'), + ('All files', '*'), + ] + + def load_chart(self, *args): + "Load a chart from a pickle file" + filename = askopenfilename( + filetypes=self.CHART_FILE_TYPES, defaultextension='.pickle' + ) + if not filename: + return + try: + with open(filename, 'rb') as infile: + chart = pickle.load(infile) + self._chart = chart + self._cv.update(chart) + if self._matrix: + self._matrix.set_chart(chart) + if self._matrix: + self._matrix.deselect_cell() + if self._results: + self._results.set_chart(chart) + self._cp.set_chart(chart) + except Exception as e: + raise + showerror('Error Loading Chart', 'Unable to open file: %r' % filename) + + def save_chart(self, *args): + "Save a chart to a pickle file" + filename = asksaveasfilename( + filetypes=self.CHART_FILE_TYPES, defaultextension='.pickle' + ) + if not filename: + return + try: + with open(filename, 'wb') as outfile: + pickle.dump(self._chart, outfile) + except Exception as e: + raise + showerror('Error Saving Chart', 'Unable to open file: %r' % filename) + + def load_grammar(self, *args): + "Load a grammar from a pickle file" + filename = askopenfilename( + filetypes=self.GRAMMAR_FILE_TYPES, defaultextension='.cfg' + ) + if not filename: + return + try: + if filename.endswith('.pickle'): + with open(filename, 'rb') as infile: + grammar = pickle.load(infile) + else: + with open(filename, 'r') as infile: + grammar = CFG.fromstring(infile.read()) + self.set_grammar(grammar) + except Exception as e: + showerror('Error Loading Grammar', 'Unable to open file: %r' % filename) + + def save_grammar(self, *args): + filename = asksaveasfilename( + filetypes=self.GRAMMAR_FILE_TYPES, defaultextension='.cfg' + ) + if not filename: + return + try: + if filename.endswith('.pickle'): + with open(filename, 'wb') as outfile: + pickle.dump((self._chart, self._tokens), outfile) + else: + with open(filename, 'w') as outfile: + prods = self._grammar.productions() + start = [p for p in prods if p.lhs() == self._grammar.start()] + rest = [p for p in prods if p.lhs() != self._grammar.start()] + for prod in start: + outfile.write('%s\n' % prod) + for prod in rest: + outfile.write('%s\n' % prod) + except Exception as e: + showerror('Error Saving Grammar', 'Unable to open file: %r' % filename) + + def reset(self, *args): + self._animating = 0 + self._reset_parser() + self._cv.update(self._chart) + if self._matrix: + self._matrix.set_chart(self._chart) + if self._matrix: + self._matrix.deselect_cell() + if self._results: + self._results.set_chart(self._chart) + + # //////////////////////////////////////////////////////////// + # Edit + # //////////////////////////////////////////////////////////// + + def edit_grammar(self, *e): + CFGEditor(self._root, self._grammar, self.set_grammar) + + def set_grammar(self, grammar): + self._grammar = grammar + self._cp.set_grammar(grammar) + if self._results: + self._results.set_grammar(grammar) + + def edit_sentence(self, *e): + sentence = " ".join(self._tokens) + title = 'Edit Text' + instr = 'Enter a new sentence to parse.' + EntryDialog(self._root, sentence, instr, self.set_sentence, title) + + def set_sentence(self, sentence): + self._tokens = list(sentence.split()) + self.reset() + + # //////////////////////////////////////////////////////////// + # View Menu + # //////////////////////////////////////////////////////////// + + def view_matrix(self, *e): + if self._matrix is not None: + self._matrix.destroy() + self._matrix = ChartMatrixView(self._root, self._chart) + self._matrix.add_callback('select', self._select_matrix_edge) + + def view_results(self, *e): + if self._results is not None: + self._results.destroy() + self._results = ChartResultsView(self._root, self._chart, self._grammar) + + # //////////////////////////////////////////////////////////// + # Zoom Menu + # //////////////////////////////////////////////////////////// + + def resize(self): + self._animating = 0 + self.set_font_size(self._size.get()) + + def set_font_size(self, size): + self._cv.set_font_size(size) + self._font.configure(size=-abs(size)) + self._boldfont.configure(size=-abs(size)) + self._sysfont.configure(size=-abs(size)) + + def get_font_size(self): + return abs(self._size.get()) + + # //////////////////////////////////////////////////////////// + # Parsing + # //////////////////////////////////////////////////////////// + + def apply_strategy(self, strategy, edge_strategy=None): + # If we're animating, then stop. + if self._animating: + self._animating = 0 + return + + # Clear the rule display & mark. + self._display_rule(None) + # self._cv.unmark_edge() + + if self._step.get(): + selection = self._selection + if (selection is not None) and (edge_strategy is not None): + # Apply the given strategy to the selected edge. + self._cp.set_strategy([edge_strategy(selection)]) + newedge = self._apply_strategy() + + # If it failed, then clear the selection. + if newedge is None: + self._cv.unmark_edge() + self._selection = None + else: + self._cp.set_strategy(strategy) + self._apply_strategy() + + else: + self._cp.set_strategy(strategy) + if self._animate.get(): + self._animating = 1 + self._animate_strategy() + else: + for edge in self._cpstep: + if edge is None: + break + self._cv.update() + if self._matrix: + self._matrix.update() + if self._results: + self._results.update() + + def _stop_animation(self, *e): + self._animating = 0 + + def _animate_strategy(self, speed=1): + if self._animating == 0: + return + if self._apply_strategy() is not None: + if self._animate.get() == 0 or self._step.get() == 1: + return + if self._animate.get() == 1: + self._root.after(3000, self._animate_strategy) + elif self._animate.get() == 2: + self._root.after(1000, self._animate_strategy) + else: + self._root.after(20, self._animate_strategy) + + def _apply_strategy(self): + new_edge = next(self._cpstep) + + if new_edge is not None: + self._show_new_edge(new_edge) + return new_edge + + def _display_rule(self, rule): + if rule is None: + self._rulelabel2['text'] = '' + else: + name = str(rule) + self._rulelabel2['text'] = name + size = self._cv.get_font_size() + + # //////////////////////////////////////////////////////////// + # Parsing Strategies + # //////////////////////////////////////////////////////////// + + # Basic rules: + _TD_INIT = [TopDownInitRule()] + _TD_PREDICT = [TopDownPredictRule()] + _BU_RULE = [BottomUpPredictRule()] + _BU_LC_RULE = [BottomUpPredictCombineRule()] + _FUNDAMENTAL = [SingleEdgeFundamentalRule()] + + # Complete strategies: + _TD_STRATEGY = _TD_INIT + _TD_PREDICT + _FUNDAMENTAL + _BU_STRATEGY = _BU_RULE + _FUNDAMENTAL + _BU_LC_STRATEGY = _BU_LC_RULE + _FUNDAMENTAL + + # Button callback functions: + def top_down_init(self, *e): + self.apply_strategy(self._TD_INIT, None) + + def top_down_predict(self, *e): + self.apply_strategy(self._TD_PREDICT, TopDownPredictEdgeRule) + + def bottom_up(self, *e): + self.apply_strategy(self._BU_RULE, BottomUpEdgeRule) + + def bottom_up_leftcorner(self, *e): + self.apply_strategy(self._BU_LC_RULE, BottomUpLeftCornerEdgeRule) + + def fundamental(self, *e): + self.apply_strategy(self._FUNDAMENTAL, FundamentalEdgeRule) + + def bottom_up_strategy(self, *e): + self.apply_strategy(self._BU_STRATEGY, BottomUpEdgeRule) + + def bottom_up_leftcorner_strategy(self, *e): + self.apply_strategy(self._BU_LC_STRATEGY, BottomUpLeftCornerEdgeRule) + + def top_down_strategy(self, *e): + self.apply_strategy(self._TD_STRATEGY, TopDownPredictEdgeRule) + + +def app(): + grammar = CFG.fromstring( + """ + # Grammatical productions. + S -> NP VP + VP -> VP PP | V NP | V + NP -> Det N | NP PP + PP -> P NP + # Lexical productions. + NP -> 'John' | 'I' + Det -> 'the' | 'my' | 'a' + N -> 'dog' | 'cookie' | 'table' | 'cake' | 'fork' + V -> 'ate' | 'saw' + P -> 'on' | 'under' | 'with' + """ + ) + + sent = 'John ate the cake on the table with a fork' + sent = 'John ate the cake on the table' + tokens = list(sent.split()) + + print('grammar= (') + for rule in grammar.productions(): + print((' ', repr(rule) + ',')) + print(')') + print(('tokens = %r' % tokens)) + print('Calling "ChartParserApp(grammar, tokens)"...') + ChartParserApp(grammar, tokens).mainloop() + + +if __name__ == '__main__': + app() + + # Chart comparer: + # charts = ['/tmp/earley.pickle', + # '/tmp/topdown.pickle', + # '/tmp/bottomup.pickle'] + # ChartComparer(*charts).mainloop() + + # import profile + # profile.run('demo2()', '/tmp/profile.out') + # import pstats + # p = pstats.Stats('/tmp/profile.out') + # p.strip_dirs().sort_stats('time', 'cum').print_stats(60) + # p.strip_dirs().sort_stats('cum', 'time').print_stats(60) + +__all__ = ['app'] diff --git a/venv.bak/lib/python3.7/site-packages/nltk/app/chunkparser_app.py b/venv.bak/lib/python3.7/site-packages/nltk/app/chunkparser_app.py new file mode 100644 index 0000000..2aeca10 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/app/chunkparser_app.py @@ -0,0 +1,1504 @@ +# Natural Language Toolkit: Regexp Chunk Parser Application +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Edward Loper +# URL: +# For license information, see LICENSE.TXT + +""" +A graphical tool for exploring the regular expression based chunk +parser ``nltk.chunk.RegexpChunkParser``. +""" + +# Todo: Add a way to select the development set from the menubar. This +# might just need to be a selection box (conll vs treebank etc) plus +# configuration parameters to select what's being chunked (eg VP vs NP) +# and what part of the data is being used as the development set. + +from __future__ import division +import time +import textwrap +import re +import random + +from six.moves.tkinter import ( + Button, + Canvas, + Checkbutton, + Frame, + IntVar, + Label, + Menu, + Scrollbar, + Text, + Tk, +) +from six.moves.tkinter_tkfiledialog import askopenfilename, asksaveasfilename +from six.moves.tkinter_font import Font + +from nltk.tree import Tree +from nltk.util import in_idle +from nltk.draw.util import ShowText +from nltk.corpus import conll2000, treebank_chunk +from nltk.chunk import ChunkScore, RegexpChunkParser +from nltk.chunk.regexp import RegexpChunkRule + + +class RegexpChunkApp(object): + """ + A graphical tool for exploring the regular expression based chunk + parser ``nltk.chunk.RegexpChunkParser``. + + See ``HELP`` for instructional text. + """ + + ##///////////////////////////////////////////////////////////////// + ## Help Text + ##///////////////////////////////////////////////////////////////// + + #: A dictionary mapping from part of speech tags to descriptions, + #: which is used in the help text. (This should probably live with + #: the conll and/or treebank corpus instead.) + TAGSET = { + 'CC': 'Coordinating conjunction', + 'PRP$': 'Possessive pronoun', + 'CD': 'Cardinal number', + 'RB': 'Adverb', + 'DT': 'Determiner', + 'RBR': 'Adverb, comparative', + 'EX': 'Existential there', + 'RBS': 'Adverb, superlative', + 'FW': 'Foreign word', + 'RP': 'Particle', + 'JJ': 'Adjective', + 'TO': 'to', + 'JJR': 'Adjective, comparative', + 'UH': 'Interjection', + 'JJS': 'Adjective, superlative', + 'VB': 'Verb, base form', + 'LS': 'List item marker', + 'VBD': 'Verb, past tense', + 'MD': 'Modal', + 'NNS': 'Noun, plural', + 'NN': 'Noun, singular or masps', + 'VBN': 'Verb, past participle', + 'VBZ': 'Verb,3rd ps. sing. present', + 'NNP': 'Proper noun, singular', + 'NNPS': 'Proper noun plural', + 'WDT': 'wh-determiner', + 'PDT': 'Predeterminer', + 'WP': 'wh-pronoun', + 'POS': 'Possessive ending', + 'WP$': 'Possessive wh-pronoun', + 'PRP': 'Personal pronoun', + 'WRB': 'wh-adverb', + '(': 'open parenthesis', + ')': 'close parenthesis', + '``': 'open quote', + ',': 'comma', + "''": 'close quote', + '.': 'period', + '#': 'pound sign (currency marker)', + '$': 'dollar sign (currency marker)', + 'IN': 'Preposition/subord. conjunction', + 'SYM': 'Symbol (mathematical or scientific)', + 'VBG': 'Verb, gerund/present participle', + 'VBP': 'Verb, non-3rd ps. sing. present', + ':': 'colon', + } + + #: Contents for the help box. This is a list of tuples, one for + #: each help page, where each tuple has four elements: + #: - A title (displayed as a tab) + #: - A string description of tabstops (see Tkinter.Text for details) + #: - The text contents for the help page. You can use expressions + #: like ... to colorize the text; see ``HELP_AUTOTAG`` + #: for a list of tags you can use for colorizing. + HELP = [ + ( + 'Help', + '20', + "Welcome to the regular expression chunk-parser grammar editor. " + "You can use this editor to develop and test chunk parser grammars " + "based on NLTK's RegexpChunkParser class.\n\n" + # Help box. + "Use this box ('Help') to learn more about the editor; click on the " + "tabs for help on specific topics:" + "\n" + "Rules: grammar rule types\n" + "Regexps: regular expression syntax\n" + "Tags: part of speech tags\n\n" + # Grammar. + "Use the upper-left box ('Grammar') to edit your grammar. " + "Each line of your grammar specifies a single 'rule', " + "which performs an action such as creating a chunk or merging " + "two chunks.\n\n" + # Dev set. + "The lower-left box ('Development Set') runs your grammar on the " + "development set, and displays the results. " + "Your grammar's chunks are highlighted, and " + "the correct (gold standard) chunks are " + "underlined. If they " + "match, they are displayed in green; otherwise, " + "they are displayed in red. The box displays a single " + "sentence from the development set at a time; use the scrollbar or " + "the next/previous buttons view additional sentences.\n\n" + # Performance + "The lower-right box ('Evaluation') tracks the performance of " + "your grammar on the development set. The 'precision' axis " + "indicates how many of your grammar's chunks are correct; and " + "the 'recall' axis indicates how many of the gold standard " + "chunks your system generated. Typically, you should try to " + "design a grammar that scores high on both metrics. The " + "exact precision and recall of the current grammar, as well " + "as their harmonic mean (the 'f-score'), are displayed in " + "the status bar at the bottom of the window.", + ), + ( + 'Rules', + '10', + "

{...regexp...}

" + "\nChunk rule: creates new chunks from words matching " + "regexp.\n\n" + "

}...regexp...{

" + "\nChink rule: removes words matching regexp from existing " + "chunks.\n\n" + "

...regexp1...}{...regexp2...

" + "\nSplit rule: splits chunks that match regexp1 followed by " + "regexp2 in two.\n\n" + "

...regexp...{}...regexp...

" + "\nMerge rule: joins consecutive chunks that match regexp1 " + "and regexp2\n", + ), + ( + 'Regexps', + '10 60', + # "Regular Expression Syntax Summary:\n\n" + "

Pattern\t\tMatches...

\n" + "" + "\t<T>\ta word with tag T " + "(where T may be a regexp).\n" + "\tx?\tan optional x\n" + "\tx+\ta sequence of 1 or more x's\n" + "\tx*\ta sequence of 0 or more x's\n" + "\tx|y\tx or y\n" + "\t.\tmatches any character\n" + "\t(x)\tTreats x as a group\n" + "\t# x...\tTreats x... " + "(to the end of the line) as a comment\n" + "\t\\C\tmatches character C " + "(useful when C is a special character " + "like + or #)\n" + "" + "\n

Examples:

\n" + "" + '\t\n' + '\t\tMatches "cow/NN"\n' + '\t\tMatches "green/NN"\n' + '\t\n' + '\t\tMatches "eating/VBG"\n' + '\t\tMatches "ate/VBD"\n' + '\t
\n' + '\t\tMatches "on/IN the/DT car/NN"\n' + '\t?\n' + '\t\tMatches "ran/VBD"\n' + '\t\tMatches "slowly/RB ate/VBD"\n' + '\t<\#> # This is a comment...\n' + '\t\tMatches "#/# 100/CD"\n' + "", + ), + ( + 'Tags', + '10 60', + "

Part of Speech Tags:

\n" + + '' + + '<>' + + '\n', # this gets auto-substituted w/ self.TAGSET + ), + ] + + HELP_AUTOTAG = [ + ('red', dict(foreground='#a00')), + ('green', dict(foreground='#080')), + ('highlight', dict(background='#ddd')), + ('underline', dict(underline=True)), + ('h1', dict(underline=True)), + ('indent', dict(lmargin1=20, lmargin2=20)), + ('hangindent', dict(lmargin1=0, lmargin2=60)), + ('var', dict(foreground='#88f')), + ('regexp', dict(foreground='#ba7')), + ('match', dict(foreground='#6a6')), + ] + + ##///////////////////////////////////////////////////////////////// + ## Config Parmeters + ##///////////////////////////////////////////////////////////////// + + _EVAL_DELAY = 1 + """If the user has not pressed any key for this amount of time (in + seconds), and the current grammar has not been evaluated, then + the eval demon will evaluate it.""" + + _EVAL_CHUNK = 15 + """The number of sentences that should be evaluated by the eval + demon each time it runs.""" + _EVAL_FREQ = 0.2 + """The frequency (in seconds) at which the eval demon is run""" + _EVAL_DEMON_MIN = 0.02 + """The minimum amount of time that the eval demon should take each time + it runs -- if it takes less than this time, _EVAL_CHUNK will be + modified upwards.""" + _EVAL_DEMON_MAX = 0.04 + """The maximum amount of time that the eval demon should take each time + it runs -- if it takes more than this time, _EVAL_CHUNK will be + modified downwards.""" + + _GRAMMARBOX_PARAMS = dict( + width=40, + height=12, + background='#efe', + highlightbackground='#efe', + highlightthickness=1, + relief='groove', + border=2, + wrap='word', + ) + _HELPBOX_PARAMS = dict( + width=15, + height=15, + background='#efe', + highlightbackground='#efe', + foreground='#555', + highlightthickness=1, + relief='groove', + border=2, + wrap='word', + ) + _DEVSETBOX_PARAMS = dict( + width=70, + height=10, + background='#eef', + highlightbackground='#eef', + highlightthickness=1, + relief='groove', + border=2, + wrap='word', + tabs=(30,), + ) + _STATUS_PARAMS = dict(background='#9bb', relief='groove', border=2) + _FONT_PARAMS = dict(family='helvetica', size=-20) + _FRAME_PARAMS = dict(background='#777', padx=2, pady=2, border=3) + _EVALBOX_PARAMS = dict( + background='#eef', + highlightbackground='#eef', + highlightthickness=1, + relief='groove', + border=2, + width=300, + height=280, + ) + _BUTTON_PARAMS = dict( + background='#777', activebackground='#777', highlightbackground='#777' + ) + _HELPTAB_BG_COLOR = '#aba' + _HELPTAB_FG_COLOR = '#efe' + + _HELPTAB_FG_PARAMS = dict(background='#efe') + _HELPTAB_BG_PARAMS = dict(background='#aba') + _HELPTAB_SPACER = 6 + + def normalize_grammar(self, grammar): + # Strip comments + grammar = re.sub(r'((\\.|[^#])*)(#.*)?', r'\1', grammar) + # Normalize whitespace + grammar = re.sub(' +', ' ', grammar) + grammar = re.sub('\n\s+', '\n', grammar) + grammar = grammar.strip() + # [xx] Hack: automatically backslash $! + grammar = re.sub(r'([^\\])\$', r'\1\\$', grammar) + return grammar + + def __init__( + self, + devset_name='conll2000', + devset=None, + grammar='', + chunk_label='NP', + tagset=None, + ): + """ + :param devset_name: The name of the development set; used for + display & for save files. If either the name 'treebank' + or the name 'conll2000' is used, and devset is None, then + devset will be set automatically. + :param devset: A list of chunked sentences + :param grammar: The initial grammar to display. + :param tagset: Dictionary from tags to string descriptions, used + for the help page. Defaults to ``self.TAGSET``. + """ + self._chunk_label = chunk_label + + if tagset is None: + tagset = self.TAGSET + self.tagset = tagset + + # Named development sets: + if devset is None: + if devset_name == 'conll2000': + devset = conll2000.chunked_sents('train.txt') # [:100] + elif devset == 'treebank': + devset = treebank_chunk.chunked_sents() # [:100] + else: + raise ValueError('Unknown development set %s' % devset_name) + + self.chunker = None + """The chunker built from the grammar string""" + + self.grammar = grammar + """The unparsed grammar string""" + + self.normalized_grammar = None + """A normalized version of ``self.grammar``.""" + + self.grammar_changed = 0 + """The last time() that the grammar was changed.""" + + self.devset = devset + """The development set -- a list of chunked sentences.""" + + self.devset_name = devset_name + """The name of the development set (for save files).""" + + self.devset_index = -1 + """The index into the development set of the first instance + that's currently being viewed.""" + + self._last_keypress = 0 + """The time() when a key was most recently pressed""" + + self._history = [] + """A list of (grammar, precision, recall, fscore) tuples for + grammars that the user has already tried.""" + + self._history_index = 0 + """When the user is scrolling through previous grammars, this + is used to keep track of which grammar they're looking at.""" + + self._eval_grammar = None + """The grammar that is being currently evaluated by the eval + demon.""" + + self._eval_normalized_grammar = None + """A normalized copy of ``_eval_grammar``.""" + + self._eval_index = 0 + """The index of the next sentence in the development set that + should be looked at by the eval demon.""" + + self._eval_score = ChunkScore(chunk_label=chunk_label) + """The ``ChunkScore`` object that's used to keep track of the score + of the current grammar on the development set.""" + + # Set up the main window. + top = self.top = Tk() + top.geometry('+50+50') + top.title('Regexp Chunk Parser App') + top.bind('', self.destroy) + + # Varaible that restricts how much of the devset we look at. + self._devset_size = IntVar(top) + self._devset_size.set(100) + + # Set up all the tkinter widgets + self._init_fonts(top) + self._init_widgets(top) + self._init_bindings(top) + self._init_menubar(top) + self.grammarbox.focus() + + # If a grammar was given, then display it. + if grammar: + self.grammarbox.insert('end', grammar + '\n') + self.grammarbox.mark_set('insert', '1.0') + + # Display the first item in the development set + self.show_devset(0) + self.update() + + def _init_bindings(self, top): + top.bind('', self._devset_next) + top.bind('', self._devset_prev) + top.bind('', self.toggle_show_trace) + top.bind('', self.update) + top.bind('', lambda e: self.save_grammar()) + top.bind('', lambda e: self.load_grammar()) + self.grammarbox.bind('', self.toggle_show_trace) + self.grammarbox.bind('', self._devset_next) + self.grammarbox.bind('', self._devset_prev) + + # Redraw the eval graph when the window size changes + self.evalbox.bind('', self._eval_plot) + + def _init_fonts(self, top): + # TWhat's our font size (default=same as sysfont) + self._size = IntVar(top) + self._size.set(20) + self._font = Font(family='helvetica', size=-self._size.get()) + self._smallfont = Font( + family='helvetica', size=-(int(self._size.get() * 14 // 20)) + ) + + def _init_menubar(self, parent): + menubar = Menu(parent) + + filemenu = Menu(menubar, tearoff=0) + filemenu.add_command(label='Reset Application', underline=0, command=self.reset) + filemenu.add_command( + label='Save Current Grammar', + underline=0, + accelerator='Ctrl-s', + command=self.save_grammar, + ) + filemenu.add_command( + label='Load Grammar', + underline=0, + accelerator='Ctrl-o', + command=self.load_grammar, + ) + + filemenu.add_command( + label='Save Grammar History', underline=13, command=self.save_history + ) + + filemenu.add_command( + label='Exit', underline=1, command=self.destroy, accelerator='Ctrl-q' + ) + menubar.add_cascade(label='File', underline=0, menu=filemenu) + + viewmenu = Menu(menubar, tearoff=0) + viewmenu.add_radiobutton( + label='Tiny', + variable=self._size, + underline=0, + value=10, + command=self.resize, + ) + viewmenu.add_radiobutton( + label='Small', + variable=self._size, + underline=0, + value=16, + command=self.resize, + ) + viewmenu.add_radiobutton( + label='Medium', + variable=self._size, + underline=0, + value=20, + command=self.resize, + ) + viewmenu.add_radiobutton( + label='Large', + variable=self._size, + underline=0, + value=24, + command=self.resize, + ) + viewmenu.add_radiobutton( + label='Huge', + variable=self._size, + underline=0, + value=34, + command=self.resize, + ) + menubar.add_cascade(label='View', underline=0, menu=viewmenu) + + devsetmenu = Menu(menubar, tearoff=0) + devsetmenu.add_radiobutton( + label='50 sentences', + variable=self._devset_size, + value=50, + command=self.set_devset_size, + ) + devsetmenu.add_radiobutton( + label='100 sentences', + variable=self._devset_size, + value=100, + command=self.set_devset_size, + ) + devsetmenu.add_radiobutton( + label='200 sentences', + variable=self._devset_size, + value=200, + command=self.set_devset_size, + ) + devsetmenu.add_radiobutton( + label='500 sentences', + variable=self._devset_size, + value=500, + command=self.set_devset_size, + ) + menubar.add_cascade(label='Development-Set', underline=0, menu=devsetmenu) + + helpmenu = Menu(menubar, tearoff=0) + helpmenu.add_command(label='About', underline=0, command=self.about) + menubar.add_cascade(label='Help', underline=0, menu=helpmenu) + + parent.config(menu=menubar) + + def toggle_show_trace(self, *e): + if self._showing_trace: + self.show_devset() + else: + self.show_trace() + return 'break' + + _SCALE_N = 5 # center on the last 5 examples. + _DRAW_LINES = False + + def _eval_plot(self, *e, **config): + width = config.get('width', self.evalbox.winfo_width()) + height = config.get('height', self.evalbox.winfo_height()) + + # Clear the canvas + self.evalbox.delete('all') + + # Draw the precision & recall labels. + tag = self.evalbox.create_text( + 10, height // 2 - 10, justify='left', anchor='w', text='Precision' + ) + left, right = self.evalbox.bbox(tag)[2] + 5, width - 10 + tag = self.evalbox.create_text( + left + (width - left) // 2, + height - 10, + anchor='s', + text='Recall', + justify='center', + ) + top, bot = 10, self.evalbox.bbox(tag)[1] - 10 + + # Draw masks for clipping the plot. + bg = self._EVALBOX_PARAMS['background'] + self.evalbox.lower( + self.evalbox.create_rectangle(0, 0, left - 1, 5000, fill=bg, outline=bg) + ) + self.evalbox.lower( + self.evalbox.create_rectangle(0, bot + 1, 5000, 5000, fill=bg, outline=bg) + ) + + # Calculate the plot's scale. + if self._autoscale.get() and len(self._history) > 1: + max_precision = max_recall = 0 + min_precision = min_recall = 1 + for i in range(1, min(len(self._history), self._SCALE_N + 1)): + grammar, precision, recall, fmeasure = self._history[-i] + min_precision = min(precision, min_precision) + min_recall = min(recall, min_recall) + max_precision = max(precision, max_precision) + max_recall = max(recall, max_recall) + # if max_precision-min_precision > max_recall-min_recall: + # min_recall -= (max_precision-min_precision)/2 + # max_recall += (max_precision-min_precision)/2 + # else: + # min_precision -= (max_recall-min_recall)/2 + # max_precision += (max_recall-min_recall)/2 + # if min_recall < 0: + # max_recall -= min_recall + # min_recall = 0 + # if min_precision < 0: + # max_precision -= min_precision + # min_precision = 0 + min_precision = max(min_precision - 0.01, 0) + min_recall = max(min_recall - 0.01, 0) + max_precision = min(max_precision + 0.01, 1) + max_recall = min(max_recall + 0.01, 1) + else: + min_precision = min_recall = 0 + max_precision = max_recall = 1 + + # Draw the axis lines & grid lines + for i in range(11): + x = left + (right - left) * ( + (i / 10.0 - min_recall) / (max_recall - min_recall) + ) + y = bot - (bot - top) * ( + (i / 10.0 - min_precision) / (max_precision - min_precision) + ) + if left < x < right: + self.evalbox.create_line(x, top, x, bot, fill='#888') + if top < y < bot: + self.evalbox.create_line(left, y, right, y, fill='#888') + self.evalbox.create_line(left, top, left, bot) + self.evalbox.create_line(left, bot, right, bot) + + # Display the plot's scale + self.evalbox.create_text( + left - 3, + bot, + justify='right', + anchor='se', + text='%d%%' % (100 * min_precision), + ) + self.evalbox.create_text( + left - 3, + top, + justify='right', + anchor='ne', + text='%d%%' % (100 * max_precision), + ) + self.evalbox.create_text( + left, + bot + 3, + justify='center', + anchor='nw', + text='%d%%' % (100 * min_recall), + ) + self.evalbox.create_text( + right, + bot + 3, + justify='center', + anchor='ne', + text='%d%%' % (100 * max_recall), + ) + + # Display the scores. + prev_x = prev_y = None + for i, (_, precision, recall, fscore) in enumerate(self._history): + x = left + (right - left) * ( + (recall - min_recall) / (max_recall - min_recall) + ) + y = bot - (bot - top) * ( + (precision - min_precision) / (max_precision - min_precision) + ) + if i == self._history_index: + self.evalbox.create_oval( + x - 2, y - 2, x + 2, y + 2, fill='#0f0', outline='#000' + ) + self.status['text'] = ( + 'Precision: %.2f%%\t' % (precision * 100) + + 'Recall: %.2f%%\t' % (recall * 100) + + 'F-score: %.2f%%' % (fscore * 100) + ) + else: + self.evalbox.lower( + self.evalbox.create_oval( + x - 2, y - 2, x + 2, y + 2, fill='#afa', outline='#8c8' + ) + ) + if prev_x is not None and self._eval_lines.get(): + self.evalbox.lower( + self.evalbox.create_line(prev_x, prev_y, x, y, fill='#8c8') + ) + prev_x, prev_y = x, y + + _eval_demon_running = False + + def _eval_demon(self): + if self.top is None: + return + if self.chunker is None: + self._eval_demon_running = False + return + + # Note our starting time. + t0 = time.time() + + # If are still typing, then wait for them to finish. + if ( + time.time() - self._last_keypress < self._EVAL_DELAY + and self.normalized_grammar != self._eval_normalized_grammar + ): + self._eval_demon_running = True + return self.top.after(int(self._EVAL_FREQ * 1000), self._eval_demon) + + # If the grammar changed, restart the evaluation. + if self.normalized_grammar != self._eval_normalized_grammar: + # Check if we've seen this grammar already. If so, then + # just use the old evaluation values. + for (g, p, r, f) in self._history: + if self.normalized_grammar == self.normalize_grammar(g): + self._history.append((g, p, r, f)) + self._history_index = len(self._history) - 1 + self._eval_plot() + self._eval_demon_running = False + self._eval_normalized_grammar = None + return + self._eval_index = 0 + self._eval_score = ChunkScore(chunk_label=self._chunk_label) + self._eval_grammar = self.grammar + self._eval_normalized_grammar = self.normalized_grammar + + # If the grammar is empty, the don't bother evaluating it, or + # recording it in history -- the score will just be 0. + if self.normalized_grammar.strip() == '': + # self._eval_index = self._devset_size.get() + self._eval_demon_running = False + return + + # Score the next set of examples + for gold in self.devset[ + self._eval_index : min( + self._eval_index + self._EVAL_CHUNK, self._devset_size.get() + ) + ]: + guess = self._chunkparse(gold.leaves()) + self._eval_score.score(gold, guess) + + # update our index in the devset. + self._eval_index += self._EVAL_CHUNK + + # Check if we're done + if self._eval_index >= self._devset_size.get(): + self._history.append( + ( + self._eval_grammar, + self._eval_score.precision(), + self._eval_score.recall(), + self._eval_score.f_measure(), + ) + ) + self._history_index = len(self._history) - 1 + self._eval_plot() + self._eval_demon_running = False + self._eval_normalized_grammar = None + else: + progress = 100 * self._eval_index / self._devset_size.get() + self.status['text'] = 'Evaluating on Development Set (%d%%)' % progress + self._eval_demon_running = True + self._adaptively_modify_eval_chunk(time.time() - t0) + self.top.after(int(self._EVAL_FREQ * 1000), self._eval_demon) + + def _adaptively_modify_eval_chunk(self, t): + """ + Modify _EVAL_CHUNK to try to keep the amount of time that the + eval demon takes between _EVAL_DEMON_MIN and _EVAL_DEMON_MAX. + + :param t: The amount of time that the eval demon took. + """ + if t > self._EVAL_DEMON_MAX and self._EVAL_CHUNK > 5: + self._EVAL_CHUNK = min( + self._EVAL_CHUNK - 1, + max( + int(self._EVAL_CHUNK * (self._EVAL_DEMON_MAX / t)), + self._EVAL_CHUNK - 10, + ), + ) + elif t < self._EVAL_DEMON_MIN: + self._EVAL_CHUNK = max( + self._EVAL_CHUNK + 1, + min( + int(self._EVAL_CHUNK * (self._EVAL_DEMON_MIN / t)), + self._EVAL_CHUNK + 10, + ), + ) + + def _init_widgets(self, top): + frame0 = Frame(top, **self._FRAME_PARAMS) + frame0.grid_columnconfigure(0, weight=4) + frame0.grid_columnconfigure(3, weight=2) + frame0.grid_rowconfigure(1, weight=1) + frame0.grid_rowconfigure(5, weight=1) + + # The grammar + self.grammarbox = Text(frame0, font=self._font, **self._GRAMMARBOX_PARAMS) + self.grammarlabel = Label( + frame0, + font=self._font, + text='Grammar:', + highlightcolor='black', + background=self._GRAMMARBOX_PARAMS['background'], + ) + self.grammarlabel.grid(column=0, row=0, sticky='SW') + self.grammarbox.grid(column=0, row=1, sticky='NEWS') + + # Scroll bar for grammar + grammar_scrollbar = Scrollbar(frame0, command=self.grammarbox.yview) + grammar_scrollbar.grid(column=1, row=1, sticky='NWS') + self.grammarbox.config(yscrollcommand=grammar_scrollbar.set) + + # grammar buttons + bg = self._FRAME_PARAMS['background'] + frame3 = Frame(frame0, background=bg) + frame3.grid(column=0, row=2, sticky='EW') + Button( + frame3, + text='Prev Grammar', + command=self._history_prev, + **self._BUTTON_PARAMS + ).pack(side='left') + Button( + frame3, + text='Next Grammar', + command=self._history_next, + **self._BUTTON_PARAMS + ).pack(side='left') + + # Help box + self.helpbox = Text(frame0, font=self._smallfont, **self._HELPBOX_PARAMS) + self.helpbox.grid(column=3, row=1, sticky='NEWS') + self.helptabs = {} + bg = self._FRAME_PARAMS['background'] + helptab_frame = Frame(frame0, background=bg) + helptab_frame.grid(column=3, row=0, sticky='SW') + for i, (tab, tabstops, text) in enumerate(self.HELP): + label = Label(helptab_frame, text=tab, font=self._smallfont) + label.grid(column=i * 2, row=0, sticky='S') + # help_frame.grid_columnconfigure(i, weight=1) + # label.pack(side='left') + label.bind('', lambda e, tab=tab: self.show_help(tab)) + self.helptabs[tab] = label + Frame( + helptab_frame, height=1, width=self._HELPTAB_SPACER, background=bg + ).grid(column=i * 2 + 1, row=0) + self.helptabs[self.HELP[0][0]].configure(font=self._font) + self.helpbox.tag_config('elide', elide=True) + for (tag, params) in self.HELP_AUTOTAG: + self.helpbox.tag_config('tag-%s' % tag, **params) + self.show_help(self.HELP[0][0]) + + # Scroll bar for helpbox + help_scrollbar = Scrollbar(frame0, command=self.helpbox.yview) + self.helpbox.config(yscrollcommand=help_scrollbar.set) + help_scrollbar.grid(column=4, row=1, sticky='NWS') + + # The dev set + frame4 = Frame(frame0, background=self._FRAME_PARAMS['background']) + self.devsetbox = Text(frame4, font=self._font, **self._DEVSETBOX_PARAMS) + self.devsetbox.pack(expand=True, fill='both') + self.devsetlabel = Label( + frame0, + font=self._font, + text='Development Set:', + justify='right', + background=self._DEVSETBOX_PARAMS['background'], + ) + self.devsetlabel.grid(column=0, row=4, sticky='SW') + frame4.grid(column=0, row=5, sticky='NEWS') + + # dev set scrollbars + self.devset_scroll = Scrollbar(frame0, command=self._devset_scroll) + self.devset_scroll.grid(column=1, row=5, sticky='NWS') + self.devset_xscroll = Scrollbar( + frame4, command=self.devsetbox.xview, orient='horiz' + ) + self.devsetbox['xscrollcommand'] = self.devset_xscroll.set + self.devset_xscroll.pack(side='bottom', fill='x') + + # dev set buttons + bg = self._FRAME_PARAMS['background'] + frame1 = Frame(frame0, background=bg) + frame1.grid(column=0, row=7, sticky='EW') + Button( + frame1, + text='Prev Example (Ctrl-p)', + command=self._devset_prev, + **self._BUTTON_PARAMS + ).pack(side='left') + Button( + frame1, + text='Next Example (Ctrl-n)', + command=self._devset_next, + **self._BUTTON_PARAMS + ).pack(side='left') + self.devset_button = Button( + frame1, + text='Show example', + command=self.show_devset, + state='disabled', + **self._BUTTON_PARAMS + ) + self.devset_button.pack(side='right') + self.trace_button = Button( + frame1, text='Show trace', command=self.show_trace, **self._BUTTON_PARAMS + ) + self.trace_button.pack(side='right') + + # evaluation box + self.evalbox = Canvas(frame0, **self._EVALBOX_PARAMS) + label = Label( + frame0, + font=self._font, + text='Evaluation:', + justify='right', + background=self._EVALBOX_PARAMS['background'], + ) + label.grid(column=3, row=4, sticky='SW') + self.evalbox.grid(column=3, row=5, sticky='NEWS', columnspan=2) + + # evaluation box buttons + bg = self._FRAME_PARAMS['background'] + frame2 = Frame(frame0, background=bg) + frame2.grid(column=3, row=7, sticky='EW') + self._autoscale = IntVar(self.top) + self._autoscale.set(False) + Checkbutton( + frame2, + variable=self._autoscale, + command=self._eval_plot, + text='Zoom', + **self._BUTTON_PARAMS + ).pack(side='left') + self._eval_lines = IntVar(self.top) + self._eval_lines.set(False) + Checkbutton( + frame2, + variable=self._eval_lines, + command=self._eval_plot, + text='Lines', + **self._BUTTON_PARAMS + ).pack(side='left') + Button(frame2, text='History', **self._BUTTON_PARAMS).pack(side='right') + + # The status label + self.status = Label(frame0, font=self._font, **self._STATUS_PARAMS) + self.status.grid(column=0, row=9, sticky='NEW', padx=3, pady=2, columnspan=5) + + # Help box & devset box can't be edited. + self.helpbox['state'] = 'disabled' + self.devsetbox['state'] = 'disabled' + + # Spacers + bg = self._FRAME_PARAMS['background'] + Frame(frame0, height=10, width=0, background=bg).grid(column=0, row=3) + Frame(frame0, height=0, width=10, background=bg).grid(column=2, row=0) + Frame(frame0, height=6, width=0, background=bg).grid(column=0, row=8) + + # pack the frame. + frame0.pack(fill='both', expand=True) + + # Set up colors for the devset box + self.devsetbox.tag_config('true-pos', background='#afa', underline='True') + self.devsetbox.tag_config('false-neg', underline='True', foreground='#800') + self.devsetbox.tag_config('false-pos', background='#faa') + self.devsetbox.tag_config('trace', foreground='#666', wrap='none') + self.devsetbox.tag_config('wrapindent', lmargin2=30, wrap='none') + self.devsetbox.tag_config('error', foreground='#800') + + # And for the grammarbox + self.grammarbox.tag_config('error', background='#fec') + self.grammarbox.tag_config('comment', foreground='#840') + self.grammarbox.tag_config('angle', foreground='#00f') + self.grammarbox.tag_config('brace', foreground='#0a0') + self.grammarbox.tag_config('hangindent', lmargin1=0, lmargin2=40) + + _showing_trace = False + + def show_trace(self, *e): + self._showing_trace = True + self.trace_button['state'] = 'disabled' + self.devset_button['state'] = 'normal' + + self.devsetbox['state'] = 'normal' + # self.devsetbox['wrap'] = 'none' + self.devsetbox.delete('1.0', 'end') + self.devsetlabel['text'] = 'Development Set (%d/%d)' % ( + (self.devset_index + 1, self._devset_size.get()) + ) + + if self.chunker is None: + self.devsetbox.insert('1.0', 'Trace: waiting for a valid grammar.') + self.devsetbox.tag_add('error', '1.0', 'end') + return # can't do anything more + + gold_tree = self.devset[self.devset_index] + rules = self.chunker.rules() + + # Calculate the tag sequence + tagseq = '\t' + charnum = [1] + for wordnum, (word, pos) in enumerate(gold_tree.leaves()): + tagseq += '%s ' % pos + charnum.append(len(tagseq)) + self.charnum = dict( + ((i, j), charnum[j]) + for i in range(len(rules) + 1) + for j in range(len(charnum)) + ) + self.linenum = dict((i, i * 2 + 2) for i in range(len(rules) + 1)) + + for i in range(len(rules) + 1): + if i == 0: + self.devsetbox.insert('end', 'Start:\n') + self.devsetbox.tag_add('trace', 'end -2c linestart', 'end -2c') + else: + self.devsetbox.insert('end', 'Apply %s:\n' % rules[i - 1]) + self.devsetbox.tag_add('trace', 'end -2c linestart', 'end -2c') + # Display the tag sequence. + self.devsetbox.insert('end', tagseq + '\n') + self.devsetbox.tag_add('wrapindent', 'end -2c linestart', 'end -2c') + # Run a partial parser, and extract gold & test chunks + chunker = RegexpChunkParser(rules[:i]) + test_tree = self._chunkparse(gold_tree.leaves()) + gold_chunks = self._chunks(gold_tree) + test_chunks = self._chunks(test_tree) + # Compare them. + for chunk in gold_chunks.intersection(test_chunks): + self._color_chunk(i, chunk, 'true-pos') + for chunk in gold_chunks - test_chunks: + self._color_chunk(i, chunk, 'false-neg') + for chunk in test_chunks - gold_chunks: + self._color_chunk(i, chunk, 'false-pos') + self.devsetbox.insert('end', 'Finished.\n') + self.devsetbox.tag_add('trace', 'end -2c linestart', 'end -2c') + + # This is a hack, because the x-scrollbar isn't updating its + # position right -- I'm not sure what the underlying cause is + # though. (This is on OS X w/ python 2.5) + self.top.after(100, self.devset_xscroll.set, 0, 0.3) + + def show_help(self, tab): + self.helpbox['state'] = 'normal' + self.helpbox.delete('1.0', 'end') + for (name, tabstops, text) in self.HELP: + if name == tab: + text = text.replace( + '<>', + '\n'.join( + ( + '\t%s\t%s' % item + for item in sorted( + list(self.tagset.items()), + key=lambda t_w: re.match('\w+', t_w[0]) + and (0, t_w[0]) + or (1, t_w[0]), + ) + ) + ), + ) + + self.helptabs[name].config(**self._HELPTAB_FG_PARAMS) + self.helpbox.config(tabs=tabstops) + self.helpbox.insert('1.0', text + '\n' * 20) + C = '1.0 + %d chars' + for (tag, params) in self.HELP_AUTOTAG: + pattern = '(?s)(<%s>)(.*?)()' % (tag, tag) + for m in re.finditer(pattern, text): + self.helpbox.tag_add('elide', C % m.start(1), C % m.end(1)) + self.helpbox.tag_add( + 'tag-%s' % tag, C % m.start(2), C % m.end(2) + ) + self.helpbox.tag_add('elide', C % m.start(3), C % m.end(3)) + else: + self.helptabs[name].config(**self._HELPTAB_BG_PARAMS) + self.helpbox['state'] = 'disabled' + + def _history_prev(self, *e): + self._view_history(self._history_index - 1) + return 'break' + + def _history_next(self, *e): + self._view_history(self._history_index + 1) + return 'break' + + def _view_history(self, index): + # Bounds & sanity checking: + index = max(0, min(len(self._history) - 1, index)) + if not self._history: + return + # Already viewing the requested history item? + if index == self._history_index: + return + # Show the requested grammar. It will get added to _history + # only if they edit it (causing self.update() to get run.) + self.grammarbox['state'] = 'normal' + self.grammarbox.delete('1.0', 'end') + self.grammarbox.insert('end', self._history[index][0]) + self.grammarbox.mark_set('insert', '1.0') + self._history_index = index + self._syntax_highlight_grammar(self._history[index][0]) + # Record the normalized grammar & regenerate the chunker. + self.normalized_grammar = self.normalize_grammar(self._history[index][0]) + if self.normalized_grammar: + rules = [ + RegexpChunkRule.fromstring(line) + for line in self.normalized_grammar.split('\n') + ] + else: + rules = [] + self.chunker = RegexpChunkParser(rules) + # Show the score. + self._eval_plot() + # Update the devset box + self._highlight_devset() + if self._showing_trace: + self.show_trace() + # Update the grammar label + if self._history_index < len(self._history) - 1: + self.grammarlabel['text'] = 'Grammar %s/%s:' % ( + self._history_index + 1, + len(self._history), + ) + else: + self.grammarlabel['text'] = 'Grammar:' + + def _devset_next(self, *e): + self._devset_scroll('scroll', 1, 'page') + return 'break' + + def _devset_prev(self, *e): + self._devset_scroll('scroll', -1, 'page') + return 'break' + + def destroy(self, *e): + if self.top is None: + return + self.top.destroy() + self.top = None + + def _devset_scroll(self, command, *args): + N = 1 # size of a page -- one sentence. + showing_trace = self._showing_trace + if command == 'scroll' and args[1].startswith('unit'): + self.show_devset(self.devset_index + int(args[0])) + elif command == 'scroll' and args[1].startswith('page'): + self.show_devset(self.devset_index + N * int(args[0])) + elif command == 'moveto': + self.show_devset(int(float(args[0]) * self._devset_size.get())) + else: + assert 0, 'bad scroll command %s %s' % (command, args) + if showing_trace: + self.show_trace() + + def show_devset(self, index=None): + if index is None: + index = self.devset_index + + # Bounds checking + index = min(max(0, index), self._devset_size.get() - 1) + + if index == self.devset_index and not self._showing_trace: + return + self.devset_index = index + + self._showing_trace = False + self.trace_button['state'] = 'normal' + self.devset_button['state'] = 'disabled' + + # Clear the text box. + self.devsetbox['state'] = 'normal' + self.devsetbox['wrap'] = 'word' + self.devsetbox.delete('1.0', 'end') + self.devsetlabel['text'] = 'Development Set (%d/%d)' % ( + (self.devset_index + 1, self._devset_size.get()) + ) + + # Add the sentences + sample = self.devset[self.devset_index : self.devset_index + 1] + self.charnum = {} + self.linenum = {0: 1} + for sentnum, sent in enumerate(sample): + linestr = '' + for wordnum, (word, pos) in enumerate(sent.leaves()): + self.charnum[sentnum, wordnum] = len(linestr) + linestr += '%s/%s ' % (word, pos) + self.charnum[sentnum, wordnum + 1] = len(linestr) + self.devsetbox.insert('end', linestr[:-1] + '\n\n') + + # Highlight chunks in the dev set + if self.chunker is not None: + self._highlight_devset() + self.devsetbox['state'] = 'disabled' + + # Update the scrollbar + first = self.devset_index / self._devset_size.get() + last = (self.devset_index + 2) / self._devset_size.get() + self.devset_scroll.set(first, last) + + def _chunks(self, tree): + chunks = set() + wordnum = 0 + for child in tree: + if isinstance(child, Tree): + if child.label() == self._chunk_label: + chunks.add((wordnum, wordnum + len(child))) + wordnum += len(child) + else: + wordnum += 1 + return chunks + + def _syntax_highlight_grammar(self, grammar): + if self.top is None: + return + self.grammarbox.tag_remove('comment', '1.0', 'end') + self.grammarbox.tag_remove('angle', '1.0', 'end') + self.grammarbox.tag_remove('brace', '1.0', 'end') + self.grammarbox.tag_add('hangindent', '1.0', 'end') + for lineno, line in enumerate(grammar.split('\n')): + if not line.strip(): + continue + m = re.match(r'(\\.|[^#])*(#.*)?', line) + comment_start = None + if m.group(2): + comment_start = m.start(2) + s = '%d.%d' % (lineno + 1, m.start(2)) + e = '%d.%d' % (lineno + 1, m.end(2)) + self.grammarbox.tag_add('comment', s, e) + for m in re.finditer('[<>{}]', line): + if comment_start is not None and m.start() >= comment_start: + break + s = '%d.%d' % (lineno + 1, m.start()) + e = '%d.%d' % (lineno + 1, m.end()) + if m.group() in '<>': + self.grammarbox.tag_add('angle', s, e) + else: + self.grammarbox.tag_add('brace', s, e) + + def _grammarcheck(self, grammar): + if self.top is None: + return + self.grammarbox.tag_remove('error', '1.0', 'end') + self._grammarcheck_errs = [] + for lineno, line in enumerate(grammar.split('\n')): + line = re.sub(r'((\\.|[^#])*)(#.*)?', r'\1', line) + line = line.strip() + if line: + try: + RegexpChunkRule.fromstring(line) + except ValueError as e: + self.grammarbox.tag_add( + 'error', '%s.0' % (lineno + 1), '%s.0 lineend' % (lineno + 1) + ) + self.status['text'] = '' + + def update(self, *event): + # Record when update was called (for grammarcheck) + if event: + self._last_keypress = time.time() + + # Read the grammar from the Text box. + self.grammar = grammar = self.grammarbox.get('1.0', 'end') + + # If the grammar hasn't changed, do nothing: + normalized_grammar = self.normalize_grammar(grammar) + if normalized_grammar == self.normalized_grammar: + return + else: + self.normalized_grammar = normalized_grammar + + # If the grammar has changed, and we're looking at history, + # then stop looking at history. + if self._history_index < len(self._history) - 1: + self.grammarlabel['text'] = 'Grammar:' + + self._syntax_highlight_grammar(grammar) + + # The grammar has changed; try parsing it. If it doesn't + # parse, do nothing. (flag error location?) + try: + # Note: the normalized grammar has no blank lines. + if normalized_grammar: + rules = [ + RegexpChunkRule.fromstring(line) + for line in normalized_grammar.split('\n') + ] + else: + rules = [] + except ValueError as e: + # Use the un-normalized grammar for error highlighting. + self._grammarcheck(grammar) + self.chunker = None + return + + self.chunker = RegexpChunkParser(rules) + self.grammarbox.tag_remove('error', '1.0', 'end') + self.grammar_changed = time.time() + # Display the results + if self._showing_trace: + self.show_trace() + else: + self._highlight_devset() + # Start the eval demon + if not self._eval_demon_running: + self._eval_demon() + + def _highlight_devset(self, sample=None): + if sample is None: + sample = self.devset[self.devset_index : self.devset_index + 1] + + self.devsetbox.tag_remove('true-pos', '1.0', 'end') + self.devsetbox.tag_remove('false-neg', '1.0', 'end') + self.devsetbox.tag_remove('false-pos', '1.0', 'end') + + # Run the grammar on the test cases. + for sentnum, gold_tree in enumerate(sample): + # Run the chunk parser + test_tree = self._chunkparse(gold_tree.leaves()) + # Extract gold & test chunks + gold_chunks = self._chunks(gold_tree) + test_chunks = self._chunks(test_tree) + # Compare them. + for chunk in gold_chunks.intersection(test_chunks): + self._color_chunk(sentnum, chunk, 'true-pos') + for chunk in gold_chunks - test_chunks: + self._color_chunk(sentnum, chunk, 'false-neg') + for chunk in test_chunks - gold_chunks: + self._color_chunk(sentnum, chunk, 'false-pos') + + def _chunkparse(self, words): + try: + return self.chunker.parse(words) + except (ValueError, IndexError) as e: + # There's an error somewhere in the grammar, but we're not sure + # exactly where, so just mark the whole grammar as bad. + # E.g., this is caused by: "({})" + self.grammarbox.tag_add('error', '1.0', 'end') + # Treat it as tagging nothing: + return words + + def _color_chunk(self, sentnum, chunk, tag): + start, end = chunk + self.devsetbox.tag_add( + tag, + '%s.%s' % (self.linenum[sentnum], self.charnum[sentnum, start]), + '%s.%s' % (self.linenum[sentnum], self.charnum[sentnum, end] - 1), + ) + + def reset(self): + # Clear various variables + self.chunker = None + self.grammar = None + self.normalized_grammar = None + self.grammar_changed = 0 + self._history = [] + self._history_index = 0 + # Update the on-screen display. + self.grammarbox.delete('1.0', 'end') + self.show_devset(0) + self.update() + # self._eval_plot() + + SAVE_GRAMMAR_TEMPLATE = ( + '# Regexp Chunk Parsing Grammar\n' + '# Saved %(date)s\n' + '#\n' + '# Development set: %(devset)s\n' + '# Precision: %(precision)s\n' + '# Recall: %(recall)s\n' + '# F-score: %(fscore)s\n\n' + '%(grammar)s\n' + ) + + def save_grammar(self, filename=None): + if not filename: + ftypes = [('Chunk Gramamr', '.chunk'), ('All files', '*')] + filename = asksaveasfilename(filetypes=ftypes, defaultextension='.chunk') + if not filename: + return + if self._history and self.normalized_grammar == self.normalize_grammar( + self._history[-1][0] + ): + precision, recall, fscore = [ + '%.2f%%' % (100 * v) for v in self._history[-1][1:] + ] + elif self.chunker is None: + precision = recall = fscore = 'Grammar not well formed' + else: + precision = recall = fscore = 'Not finished evaluation yet' + + with open(filename, 'w') as outfile: + outfile.write( + self.SAVE_GRAMMAR_TEMPLATE + % dict( + date=time.ctime(), + devset=self.devset_name, + precision=precision, + recall=recall, + fscore=fscore, + grammar=self.grammar.strip(), + ) + ) + + def load_grammar(self, filename=None): + if not filename: + ftypes = [('Chunk Gramamr', '.chunk'), ('All files', '*')] + filename = askopenfilename(filetypes=ftypes, defaultextension='.chunk') + if not filename: + return + self.grammarbox.delete('1.0', 'end') + self.update() + with open(filename, 'r') as infile: + grammar = infile.read() + grammar = re.sub( + '^\# Regexp Chunk Parsing Grammar[\s\S]*' 'F-score:.*\n', '', grammar + ).lstrip() + self.grammarbox.insert('1.0', grammar) + self.update() + + def save_history(self, filename=None): + if not filename: + ftypes = [('Chunk Gramamr History', '.txt'), ('All files', '*')] + filename = asksaveasfilename(filetypes=ftypes, defaultextension='.txt') + if not filename: + return + + with open(filename, 'w') as outfile: + outfile.write('# Regexp Chunk Parsing Grammar History\n') + outfile.write('# Saved %s\n' % time.ctime()) + outfile.write('# Development set: %s\n' % self.devset_name) + for i, (g, p, r, f) in enumerate(self._history): + hdr = ( + 'Grammar %d/%d (precision=%.2f%%, recall=%.2f%%, ' + 'fscore=%.2f%%)' + % (i + 1, len(self._history), p * 100, r * 100, f * 100) + ) + outfile.write('\n%s\n' % hdr) + outfile.write(''.join(' %s\n' % line for line in g.strip().split())) + + if not ( + self._history + and self.normalized_grammar + == self.normalize_grammar(self._history[-1][0]) + ): + if self.chunker is None: + outfile.write('\nCurrent Grammar (not well-formed)\n') + else: + outfile.write('\nCurrent Grammar (not evaluated)\n') + outfile.write( + ''.join(' %s\n' % line for line in self.grammar.strip().split()) + ) + + def about(self, *e): + ABOUT = "NLTK RegExp Chunk Parser Application\n" + "Written by Edward Loper" + TITLE = 'About: Regular Expression Chunk Parser Application' + try: + from six.moves.tkinter_messagebox import Message + + Message(message=ABOUT, title=TITLE).show() + except: + ShowText(self.top, TITLE, ABOUT) + + def set_devset_size(self, size=None): + if size is not None: + self._devset_size.set(size) + self._devset_size.set(min(len(self.devset), self._devset_size.get())) + self.show_devset(1) + self.show_devset(0) + # what about history? Evaluated at diff dev set sizes! + + def resize(self, size=None): + if size is not None: + self._size.set(size) + size = self._size.get() + self._font.configure(size=-(abs(size))) + self._smallfont.configure(size=min(-10, -(abs(size)) * 14 // 20)) + + def mainloop(self, *args, **kwargs): + """ + Enter the Tkinter mainloop. This function must be called if + this demo is created from a non-interactive program (e.g. + from a secript); otherwise, the demo will close as soon as + the script completes. + """ + if in_idle(): + return + self.top.mainloop(*args, **kwargs) + + +def app(): + RegexpChunkApp().mainloop() + + +if __name__ == '__main__': + app() + +__all__ = ['app'] diff --git a/venv.bak/lib/python3.7/site-packages/nltk/app/collocations_app.py b/venv.bak/lib/python3.7/site-packages/nltk/app/collocations_app.py new file mode 100644 index 0000000..b2165e9 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/app/collocations_app.py @@ -0,0 +1,442 @@ +# Natural Language Toolkit: Collocations Application +# Much of the GUI code is imported from concordance.py; We intend to merge these tools together +# Copyright (C) 2001-2019 NLTK Project +# Author: Sumukh Ghodke +# URL: +# For license information, see LICENSE.TXT +# + + +from __future__ import division + +import threading + +from six.moves import queue as q +from six.moves.tkinter_font import Font +from six.moves.tkinter import ( + Button, + END, + Frame, + IntVar, + LEFT, + Label, + Menu, + OptionMenu, + SUNKEN, + Scrollbar, + StringVar, + Text, + Tk, +) + +from nltk.corpus import ( + cess_cat, + brown, + nps_chat, + treebank, + sinica_treebank, + alpino, + indian, + floresta, + mac_morpho, + machado, + cess_esp, +) +from nltk.util import in_idle +from nltk.probability import FreqDist + + +CORPUS_LOADED_EVENT = '<>' +ERROR_LOADING_CORPUS_EVENT = '<>' +POLL_INTERVAL = 100 + +_DEFAULT = 'English: Brown Corpus (Humor)' +_CORPORA = { + 'Catalan: CESS-CAT Corpus': lambda: cess_cat.words(), + 'English: Brown Corpus': lambda: brown.words(), + 'English: Brown Corpus (Press)': lambda: brown.words( + categories=['news', 'editorial', 'reviews'] + ), + 'English: Brown Corpus (Religion)': lambda: brown.words(categories='religion'), + 'English: Brown Corpus (Learned)': lambda: brown.words(categories='learned'), + 'English: Brown Corpus (Science Fiction)': lambda: brown.words( + categories='science_fiction' + ), + 'English: Brown Corpus (Romance)': lambda: brown.words(categories='romance'), + 'English: Brown Corpus (Humor)': lambda: brown.words(categories='humor'), + 'English: NPS Chat Corpus': lambda: nps_chat.words(), + 'English: Wall Street Journal Corpus': lambda: treebank.words(), + 'Chinese: Sinica Corpus': lambda: sinica_treebank.words(), + 'Dutch: Alpino Corpus': lambda: alpino.words(), + 'Hindi: Indian Languages Corpus': lambda: indian.words(files='hindi.pos'), + 'Portuguese: Floresta Corpus (Portugal)': lambda: floresta.words(), + 'Portuguese: MAC-MORPHO Corpus (Brazil)': lambda: mac_morpho.words(), + 'Portuguese: Machado Corpus (Brazil)': lambda: machado.words(), + 'Spanish: CESS-ESP Corpus': lambda: cess_esp.words(), +} + + +class CollocationsView: + _BACKGROUND_COLOUR = '#FFF' # white + + def __init__(self): + self.queue = q.Queue() + self.model = CollocationsModel(self.queue) + self.top = Tk() + self._init_top(self.top) + self._init_menubar() + self._init_widgets(self.top) + self.load_corpus(self.model.DEFAULT_CORPUS) + self.after = self.top.after(POLL_INTERVAL, self._poll) + + def _init_top(self, top): + top.geometry('550x650+50+50') + top.title('NLTK Collocations List') + top.bind('', self.destroy) + top.protocol('WM_DELETE_WINDOW', self.destroy) + top.minsize(550, 650) + + def _init_widgets(self, parent): + self.main_frame = Frame( + parent, dict(background=self._BACKGROUND_COLOUR, padx=1, pady=1, border=1) + ) + self._init_corpus_select(self.main_frame) + self._init_results_box(self.main_frame) + self._init_paging(self.main_frame) + self._init_status(self.main_frame) + self.main_frame.pack(fill='both', expand=True) + + def _init_corpus_select(self, parent): + innerframe = Frame(parent, background=self._BACKGROUND_COLOUR) + self.var = StringVar(innerframe) + self.var.set(self.model.DEFAULT_CORPUS) + Label( + innerframe, + justify=LEFT, + text=' Corpus: ', + background=self._BACKGROUND_COLOUR, + padx=2, + pady=1, + border=0, + ).pack(side='left') + + other_corpora = list(self.model.CORPORA.keys()).remove( + self.model.DEFAULT_CORPUS + ) + om = OptionMenu( + innerframe, + self.var, + self.model.DEFAULT_CORPUS, + command=self.corpus_selected, + *self.model.non_default_corpora() + ) + om['borderwidth'] = 0 + om['highlightthickness'] = 1 + om.pack(side='left') + innerframe.pack(side='top', fill='x', anchor='n') + + def _init_status(self, parent): + self.status = Label( + parent, + justify=LEFT, + relief=SUNKEN, + background=self._BACKGROUND_COLOUR, + border=0, + padx=1, + pady=0, + ) + self.status.pack(side='top', anchor='sw') + + def _init_menubar(self): + self._result_size = IntVar(self.top) + menubar = Menu(self.top) + + filemenu = Menu(menubar, tearoff=0, borderwidth=0) + filemenu.add_command( + label='Exit', underline=1, command=self.destroy, accelerator='Ctrl-q' + ) + menubar.add_cascade(label='File', underline=0, menu=filemenu) + + editmenu = Menu(menubar, tearoff=0) + rescntmenu = Menu(editmenu, tearoff=0) + rescntmenu.add_radiobutton( + label='20', + variable=self._result_size, + underline=0, + value=20, + command=self.set_result_size, + ) + rescntmenu.add_radiobutton( + label='50', + variable=self._result_size, + underline=0, + value=50, + command=self.set_result_size, + ) + rescntmenu.add_radiobutton( + label='100', + variable=self._result_size, + underline=0, + value=100, + command=self.set_result_size, + ) + rescntmenu.invoke(1) + editmenu.add_cascade(label='Result Count', underline=0, menu=rescntmenu) + + menubar.add_cascade(label='Edit', underline=0, menu=editmenu) + self.top.config(menu=menubar) + + def set_result_size(self, **kwargs): + self.model.result_count = self._result_size.get() + + def _init_results_box(self, parent): + innerframe = Frame(parent) + i1 = Frame(innerframe) + i2 = Frame(innerframe) + vscrollbar = Scrollbar(i1, borderwidth=1) + hscrollbar = Scrollbar(i2, borderwidth=1, orient='horiz') + self.results_box = Text( + i1, + font=Font(family='courier', size='16'), + state='disabled', + borderwidth=1, + yscrollcommand=vscrollbar.set, + xscrollcommand=hscrollbar.set, + wrap='none', + width='40', + height='20', + exportselection=1, + ) + self.results_box.pack(side='left', fill='both', expand=True) + vscrollbar.pack(side='left', fill='y', anchor='e') + vscrollbar.config(command=self.results_box.yview) + hscrollbar.pack(side='left', fill='x', expand=True, anchor='w') + hscrollbar.config(command=self.results_box.xview) + # there is no other way of avoiding the overlap of scrollbars while using pack layout manager!!! + Label(i2, text=' ', background=self._BACKGROUND_COLOUR).pack( + side='left', anchor='e' + ) + i1.pack(side='top', fill='both', expand=True, anchor='n') + i2.pack(side='bottom', fill='x', anchor='s') + innerframe.pack(side='top', fill='both', expand=True) + + def _init_paging(self, parent): + innerframe = Frame(parent, background=self._BACKGROUND_COLOUR) + self.prev = prev = Button( + innerframe, + text='Previous', + command=self.previous, + width='10', + borderwidth=1, + highlightthickness=1, + state='disabled', + ) + prev.pack(side='left', anchor='center') + self.next = next = Button( + innerframe, + text='Next', + command=self.__next__, + width='10', + borderwidth=1, + highlightthickness=1, + state='disabled', + ) + next.pack(side='right', anchor='center') + innerframe.pack(side='top', fill='y') + self.reset_current_page() + + def reset_current_page(self): + self.current_page = -1 + + def _poll(self): + try: + event = self.queue.get(block=False) + except q.Empty: + pass + else: + if event == CORPUS_LOADED_EVENT: + self.handle_corpus_loaded(event) + elif event == ERROR_LOADING_CORPUS_EVENT: + self.handle_error_loading_corpus(event) + self.after = self.top.after(POLL_INTERVAL, self._poll) + + def handle_error_loading_corpus(self, event): + self.status['text'] = 'Error in loading ' + self.var.get() + self.unfreeze_editable() + self.clear_results_box() + self.freeze_editable() + self.reset_current_page() + + def handle_corpus_loaded(self, event): + self.status['text'] = self.var.get() + ' is loaded' + self.unfreeze_editable() + self.clear_results_box() + self.reset_current_page() + # self.next() + collocations = self.model.next(self.current_page + 1) + self.write_results(collocations) + self.current_page += 1 + + def corpus_selected(self, *args): + new_selection = self.var.get() + self.load_corpus(new_selection) + + def previous(self): + self.freeze_editable() + collocations = self.model.prev(self.current_page - 1) + self.current_page = self.current_page - 1 + self.clear_results_box() + self.write_results(collocations) + self.unfreeze_editable() + + def __next__(self): + self.freeze_editable() + collocations = self.model.next(self.current_page + 1) + self.clear_results_box() + self.write_results(collocations) + self.current_page += 1 + self.unfreeze_editable() + + def load_corpus(self, selection): + if self.model.selected_corpus != selection: + self.status['text'] = 'Loading ' + selection + '...' + self.freeze_editable() + self.model.load_corpus(selection) + + def freeze_editable(self): + self.prev['state'] = 'disabled' + self.next['state'] = 'disabled' + + def clear_results_box(self): + self.results_box['state'] = 'normal' + self.results_box.delete("1.0", END) + self.results_box['state'] = 'disabled' + + def fire_event(self, event): + # Firing an event so that rendering of widgets happen in the mainloop thread + self.top.event_generate(event, when='tail') + + def destroy(self, *e): + if self.top is None: + return + self.top.after_cancel(self.after) + self.top.destroy() + self.top = None + + def mainloop(self, *args, **kwargs): + if in_idle(): + return + self.top.mainloop(*args, **kwargs) + + def unfreeze_editable(self): + self.set_paging_button_states() + + def set_paging_button_states(self): + if self.current_page == -1 or self.current_page == 0: + self.prev['state'] = 'disabled' + else: + self.prev['state'] = 'normal' + if self.model.is_last_page(self.current_page): + self.next['state'] = 'disabled' + else: + self.next['state'] = 'normal' + + def write_results(self, results): + self.results_box['state'] = 'normal' + row = 1 + for each in results: + self.results_box.insert(str(row) + '.0', each[0] + " " + each[1] + "\n") + row += 1 + self.results_box['state'] = 'disabled' + + +class CollocationsModel: + def __init__(self, queue): + self.result_count = None + self.selected_corpus = None + self.collocations = None + self.CORPORA = _CORPORA + self.DEFAULT_CORPUS = _DEFAULT + self.queue = queue + self.reset_results() + + def reset_results(self): + self.result_pages = [] + self.results_returned = 0 + + def load_corpus(self, name): + self.selected_corpus = name + self.collocations = None + runner_thread = self.LoadCorpus(name, self) + runner_thread.start() + self.reset_results() + + def non_default_corpora(self): + copy = [] + copy.extend(list(self.CORPORA.keys())) + copy.remove(self.DEFAULT_CORPUS) + copy.sort() + return copy + + def is_last_page(self, number): + if number < len(self.result_pages): + return False + return self.results_returned + ( + number - len(self.result_pages) + ) * self.result_count >= len(self.collocations) + + def next(self, page): + if (len(self.result_pages) - 1) < page: + for i in range(page - (len(self.result_pages) - 1)): + self.result_pages.append( + self.collocations[ + self.results_returned : self.results_returned + + self.result_count + ] + ) + self.results_returned += self.result_count + return self.result_pages[page] + + def prev(self, page): + if page == -1: + return [] + return self.result_pages[page] + + class LoadCorpus(threading.Thread): + def __init__(self, name, model): + threading.Thread.__init__(self) + self.model, self.name = model, name + + def run(self): + try: + words = self.model.CORPORA[self.name]() + from operator import itemgetter + + text = [w for w in words if len(w) > 2] + fd = FreqDist(tuple(text[i : i + 2]) for i in range(len(text) - 1)) + vocab = FreqDist(text) + scored = [ + ((w1, w2), fd[(w1, w2)] ** 3 / (vocab[w1] * vocab[w2])) + for w1, w2 in fd + ] + scored.sort(key=itemgetter(1), reverse=True) + self.model.collocations = list(map(itemgetter(0), scored)) + self.model.queue.put(CORPUS_LOADED_EVENT) + except Exception as e: + print(e) + self.model.queue.put(ERROR_LOADING_CORPUS_EVENT) + + +# def collocations(): +# colloc_strings = [w1 + ' ' + w2 for w1, w2 in self._collocations[:num]] + + +def app(): + c = CollocationsView() + c.mainloop() + + +if __name__ == '__main__': + app() + +__all__ = ['app'] diff --git a/venv.bak/lib/python3.7/site-packages/nltk/app/concordance_app.py b/venv.bak/lib/python3.7/site-packages/nltk/app/concordance_app.py new file mode 100644 index 0000000..a7f55d3 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/app/concordance_app.py @@ -0,0 +1,711 @@ +# Natural Language Toolkit: Concordance Application +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Sumukh Ghodke +# URL: +# For license information, see LICENSE.TXT + +import re +import threading + +from six.moves import queue as q +from six.moves.tkinter_font import Font +from six.moves.tkinter import ( + Tk, + Button, + END, + Entry, + Frame, + IntVar, + LEFT, + Label, + Menu, + OptionMenu, + SUNKEN, + Scrollbar, + StringVar, + Text, +) + +import nltk.compat +from nltk.corpus import ( + cess_cat, + brown, + nps_chat, + treebank, + sinica_treebank, + alpino, + indian, + floresta, + mac_morpho, + cess_esp, +) +from nltk.util import in_idle +from nltk.draw.util import ShowText + +WORD_OR_TAG = '[^/ ]+' +BOUNDARY = r'\b' + +CORPUS_LOADED_EVENT = '<>' +SEARCH_TERMINATED_EVENT = '<>' +SEARCH_ERROR_EVENT = '<>' +ERROR_LOADING_CORPUS_EVENT = '<>' + +POLL_INTERVAL = 50 + +# NB All corpora must be specified in a lambda expression so as not to be +# loaded when the module is imported. + +_DEFAULT = 'English: Brown Corpus (Humor, simplified)' +_CORPORA = { + 'Catalan: CESS-CAT Corpus (simplified)': lambda: cess_cat.tagged_sents( + tagset='universal' + ), + 'English: Brown Corpus': lambda: brown.tagged_sents(), + 'English: Brown Corpus (simplified)': lambda: brown.tagged_sents( + tagset='universal' + ), + 'English: Brown Corpus (Press, simplified)': lambda: brown.tagged_sents( + categories=['news', 'editorial', 'reviews'], tagset='universal' + ), + 'English: Brown Corpus (Religion, simplified)': lambda: brown.tagged_sents( + categories='religion', tagset='universal' + ), + 'English: Brown Corpus (Learned, simplified)': lambda: brown.tagged_sents( + categories='learned', tagset='universal' + ), + 'English: Brown Corpus (Science Fiction, simplified)': lambda: brown.tagged_sents( + categories='science_fiction', tagset='universal' + ), + 'English: Brown Corpus (Romance, simplified)': lambda: brown.tagged_sents( + categories='romance', tagset='universal' + ), + 'English: Brown Corpus (Humor, simplified)': lambda: brown.tagged_sents( + categories='humor', tagset='universal' + ), + 'English: NPS Chat Corpus': lambda: nps_chat.tagged_posts(), + 'English: NPS Chat Corpus (simplified)': lambda: nps_chat.tagged_posts( + tagset='universal' + ), + 'English: Wall Street Journal Corpus': lambda: treebank.tagged_sents(), + 'English: Wall Street Journal Corpus (simplified)': lambda: treebank.tagged_sents( + tagset='universal' + ), + 'Chinese: Sinica Corpus': lambda: sinica_treebank.tagged_sents(), + 'Chinese: Sinica Corpus (simplified)': lambda: sinica_treebank.tagged_sents( + tagset='universal' + ), + 'Dutch: Alpino Corpus': lambda: alpino.tagged_sents(), + 'Dutch: Alpino Corpus (simplified)': lambda: alpino.tagged_sents( + tagset='universal' + ), + 'Hindi: Indian Languages Corpus': lambda: indian.tagged_sents(files='hindi.pos'), + 'Hindi: Indian Languages Corpus (simplified)': lambda: indian.tagged_sents( + files='hindi.pos', tagset='universal' + ), + 'Portuguese: Floresta Corpus (Portugal)': lambda: floresta.tagged_sents(), + 'Portuguese: Floresta Corpus (Portugal, simplified)': lambda: floresta.tagged_sents( + tagset='universal' + ), + 'Portuguese: MAC-MORPHO Corpus (Brazil)': lambda: mac_morpho.tagged_sents(), + 'Portuguese: MAC-MORPHO Corpus (Brazil, simplified)': lambda: mac_morpho.tagged_sents( + tagset='universal' + ), + 'Spanish: CESS-ESP Corpus (simplified)': lambda: cess_esp.tagged_sents( + tagset='universal' + ), +} + + +class ConcordanceSearchView(object): + _BACKGROUND_COLOUR = '#FFF' # white + + # Colour of highlighted results + _HIGHLIGHT_WORD_COLOUR = '#F00' # red + _HIGHLIGHT_WORD_TAG = 'HL_WRD_TAG' + + _HIGHLIGHT_LABEL_COLOUR = '#C0C0C0' # dark grey + _HIGHLIGHT_LABEL_TAG = 'HL_LBL_TAG' + + # Percentage of text left of the scrollbar position + _FRACTION_LEFT_TEXT = 0.30 + + def __init__(self): + self.queue = q.Queue() + self.model = ConcordanceSearchModel(self.queue) + self.top = Tk() + self._init_top(self.top) + self._init_menubar() + self._init_widgets(self.top) + self.load_corpus(self.model.DEFAULT_CORPUS) + self.after = self.top.after(POLL_INTERVAL, self._poll) + + def _init_top(self, top): + top.geometry('950x680+50+50') + top.title('NLTK Concordance Search') + top.bind('', self.destroy) + top.protocol('WM_DELETE_WINDOW', self.destroy) + top.minsize(950, 680) + + def _init_widgets(self, parent): + self.main_frame = Frame( + parent, dict(background=self._BACKGROUND_COLOUR, padx=1, pady=1, border=1) + ) + self._init_corpus_select(self.main_frame) + self._init_query_box(self.main_frame) + self._init_results_box(self.main_frame) + self._init_paging(self.main_frame) + self._init_status(self.main_frame) + self.main_frame.pack(fill='both', expand=True) + + def _init_menubar(self): + self._result_size = IntVar(self.top) + self._cntx_bf_len = IntVar(self.top) + self._cntx_af_len = IntVar(self.top) + menubar = Menu(self.top) + + filemenu = Menu(menubar, tearoff=0, borderwidth=0) + filemenu.add_command( + label='Exit', underline=1, command=self.destroy, accelerator='Ctrl-q' + ) + menubar.add_cascade(label='File', underline=0, menu=filemenu) + + editmenu = Menu(menubar, tearoff=0) + rescntmenu = Menu(editmenu, tearoff=0) + rescntmenu.add_radiobutton( + label='20', + variable=self._result_size, + underline=0, + value=20, + command=self.set_result_size, + ) + rescntmenu.add_radiobutton( + label='50', + variable=self._result_size, + underline=0, + value=50, + command=self.set_result_size, + ) + rescntmenu.add_radiobutton( + label='100', + variable=self._result_size, + underline=0, + value=100, + command=self.set_result_size, + ) + rescntmenu.invoke(1) + editmenu.add_cascade(label='Result Count', underline=0, menu=rescntmenu) + + cntxmenu = Menu(editmenu, tearoff=0) + cntxbfmenu = Menu(cntxmenu, tearoff=0) + cntxbfmenu.add_radiobutton( + label='60 characters', + variable=self._cntx_bf_len, + underline=0, + value=60, + command=self.set_cntx_bf_len, + ) + cntxbfmenu.add_radiobutton( + label='80 characters', + variable=self._cntx_bf_len, + underline=0, + value=80, + command=self.set_cntx_bf_len, + ) + cntxbfmenu.add_radiobutton( + label='100 characters', + variable=self._cntx_bf_len, + underline=0, + value=100, + command=self.set_cntx_bf_len, + ) + cntxbfmenu.invoke(1) + cntxmenu.add_cascade(label='Before', underline=0, menu=cntxbfmenu) + + cntxafmenu = Menu(cntxmenu, tearoff=0) + cntxafmenu.add_radiobutton( + label='70 characters', + variable=self._cntx_af_len, + underline=0, + value=70, + command=self.set_cntx_af_len, + ) + cntxafmenu.add_radiobutton( + label='90 characters', + variable=self._cntx_af_len, + underline=0, + value=90, + command=self.set_cntx_af_len, + ) + cntxafmenu.add_radiobutton( + label='110 characters', + variable=self._cntx_af_len, + underline=0, + value=110, + command=self.set_cntx_af_len, + ) + cntxafmenu.invoke(1) + cntxmenu.add_cascade(label='After', underline=0, menu=cntxafmenu) + + editmenu.add_cascade(label='Context', underline=0, menu=cntxmenu) + + menubar.add_cascade(label='Edit', underline=0, menu=editmenu) + + self.top.config(menu=menubar) + + def set_result_size(self, **kwargs): + self.model.result_count = self._result_size.get() + + def set_cntx_af_len(self, **kwargs): + self._char_after = self._cntx_af_len.get() + + def set_cntx_bf_len(self, **kwargs): + self._char_before = self._cntx_bf_len.get() + + def _init_corpus_select(self, parent): + innerframe = Frame(parent, background=self._BACKGROUND_COLOUR) + self.var = StringVar(innerframe) + self.var.set(self.model.DEFAULT_CORPUS) + Label( + innerframe, + justify=LEFT, + text=' Corpus: ', + background=self._BACKGROUND_COLOUR, + padx=2, + pady=1, + border=0, + ).pack(side='left') + + other_corpora = list(self.model.CORPORA.keys()).remove( + self.model.DEFAULT_CORPUS + ) + om = OptionMenu( + innerframe, + self.var, + self.model.DEFAULT_CORPUS, + command=self.corpus_selected, + *self.model.non_default_corpora() + ) + om['borderwidth'] = 0 + om['highlightthickness'] = 1 + om.pack(side='left') + innerframe.pack(side='top', fill='x', anchor='n') + + def _init_status(self, parent): + self.status = Label( + parent, + justify=LEFT, + relief=SUNKEN, + background=self._BACKGROUND_COLOUR, + border=0, + padx=1, + pady=0, + ) + self.status.pack(side='top', anchor='sw') + + def _init_query_box(self, parent): + innerframe = Frame(parent, background=self._BACKGROUND_COLOUR) + another = Frame(innerframe, background=self._BACKGROUND_COLOUR) + self.query_box = Entry(another, width=60) + self.query_box.pack(side='left', fill='x', pady=25, anchor='center') + self.search_button = Button( + another, + text='Search', + command=self.search, + borderwidth=1, + highlightthickness=1, + ) + self.search_button.pack(side='left', fill='x', pady=25, anchor='center') + self.query_box.bind('', self.search_enter_keypress_handler) + another.pack() + innerframe.pack(side='top', fill='x', anchor='n') + + def search_enter_keypress_handler(self, *event): + self.search() + + def _init_results_box(self, parent): + innerframe = Frame(parent) + i1 = Frame(innerframe) + i2 = Frame(innerframe) + vscrollbar = Scrollbar(i1, borderwidth=1) + hscrollbar = Scrollbar(i2, borderwidth=1, orient='horiz') + self.results_box = Text( + i1, + font=Font(family='courier', size='16'), + state='disabled', + borderwidth=1, + yscrollcommand=vscrollbar.set, + xscrollcommand=hscrollbar.set, + wrap='none', + width='40', + height='20', + exportselection=1, + ) + self.results_box.pack(side='left', fill='both', expand=True) + self.results_box.tag_config( + self._HIGHLIGHT_WORD_TAG, foreground=self._HIGHLIGHT_WORD_COLOUR + ) + self.results_box.tag_config( + self._HIGHLIGHT_LABEL_TAG, foreground=self._HIGHLIGHT_LABEL_COLOUR + ) + vscrollbar.pack(side='left', fill='y', anchor='e') + vscrollbar.config(command=self.results_box.yview) + hscrollbar.pack(side='left', fill='x', expand=True, anchor='w') + hscrollbar.config(command=self.results_box.xview) + # there is no other way of avoiding the overlap of scrollbars while using pack layout manager!!! + Label(i2, text=' ', background=self._BACKGROUND_COLOUR).pack( + side='left', anchor='e' + ) + i1.pack(side='top', fill='both', expand=True, anchor='n') + i2.pack(side='bottom', fill='x', anchor='s') + innerframe.pack(side='top', fill='both', expand=True) + + def _init_paging(self, parent): + innerframe = Frame(parent, background=self._BACKGROUND_COLOUR) + self.prev = prev = Button( + innerframe, + text='Previous', + command=self.previous, + width='10', + borderwidth=1, + highlightthickness=1, + state='disabled', + ) + prev.pack(side='left', anchor='center') + self.next = next = Button( + innerframe, + text='Next', + command=self.__next__, + width='10', + borderwidth=1, + highlightthickness=1, + state='disabled', + ) + next.pack(side='right', anchor='center') + innerframe.pack(side='top', fill='y') + self.current_page = 0 + + def previous(self): + self.clear_results_box() + self.freeze_editable() + self.model.prev(self.current_page - 1) + + def __next__(self): + self.clear_results_box() + self.freeze_editable() + self.model.next(self.current_page + 1) + + def about(self, *e): + ABOUT = "NLTK Concordance Search Demo\n" + TITLE = 'About: NLTK Concordance Search Demo' + try: + from six.moves.tkinter_messagebox import Message + + Message(message=ABOUT, title=TITLE, parent=self.main_frame).show() + except: + ShowText(self.top, TITLE, ABOUT) + + def _bind_event_handlers(self): + self.top.bind(CORPUS_LOADED_EVENT, self.handle_corpus_loaded) + self.top.bind(SEARCH_TERMINATED_EVENT, self.handle_search_terminated) + self.top.bind(SEARCH_ERROR_EVENT, self.handle_search_error) + self.top.bind(ERROR_LOADING_CORPUS_EVENT, self.handle_error_loading_corpus) + + def _poll(self): + try: + event = self.queue.get(block=False) + except q.Empty: + pass + else: + if event == CORPUS_LOADED_EVENT: + self.handle_corpus_loaded(event) + elif event == SEARCH_TERMINATED_EVENT: + self.handle_search_terminated(event) + elif event == SEARCH_ERROR_EVENT: + self.handle_search_error(event) + elif event == ERROR_LOADING_CORPUS_EVENT: + self.handle_error_loading_corpus(event) + self.after = self.top.after(POLL_INTERVAL, self._poll) + + def handle_error_loading_corpus(self, event): + self.status['text'] = 'Error in loading ' + self.var.get() + self.unfreeze_editable() + self.clear_all() + self.freeze_editable() + + def handle_corpus_loaded(self, event): + self.status['text'] = self.var.get() + ' is loaded' + self.unfreeze_editable() + self.clear_all() + self.query_box.focus_set() + + def handle_search_terminated(self, event): + # todo: refactor the model such that it is less state sensitive + results = self.model.get_results() + self.write_results(results) + self.status['text'] = '' + if len(results) == 0: + self.status['text'] = 'No results found for ' + self.model.query + else: + self.current_page = self.model.last_requested_page + self.unfreeze_editable() + self.results_box.xview_moveto(self._FRACTION_LEFT_TEXT) + + def handle_search_error(self, event): + self.status['text'] = 'Error in query ' + self.model.query + self.unfreeze_editable() + + def corpus_selected(self, *args): + new_selection = self.var.get() + self.load_corpus(new_selection) + + def load_corpus(self, selection): + if self.model.selected_corpus != selection: + self.status['text'] = 'Loading ' + selection + '...' + self.freeze_editable() + self.model.load_corpus(selection) + + def search(self): + self.current_page = 0 + self.clear_results_box() + self.model.reset_results() + query = self.query_box.get() + if len(query.strip()) == 0: + return + self.status['text'] = 'Searching for ' + query + self.freeze_editable() + self.model.search(query, self.current_page + 1) + + def write_results(self, results): + self.results_box['state'] = 'normal' + row = 1 + for each in results: + sent, pos1, pos2 = each[0].strip(), each[1], each[2] + if len(sent) != 0: + if pos1 < self._char_before: + sent, pos1, pos2 = self.pad(sent, pos1, pos2) + sentence = sent[pos1 - self._char_before : pos1 + self._char_after] + if not row == len(results): + sentence += '\n' + self.results_box.insert(str(row) + '.0', sentence) + word_markers, label_markers = self.words_and_labels(sent, pos1, pos2) + for marker in word_markers: + self.results_box.tag_add( + self._HIGHLIGHT_WORD_TAG, + str(row) + '.' + str(marker[0]), + str(row) + '.' + str(marker[1]), + ) + for marker in label_markers: + self.results_box.tag_add( + self._HIGHLIGHT_LABEL_TAG, + str(row) + '.' + str(marker[0]), + str(row) + '.' + str(marker[1]), + ) + row += 1 + self.results_box['state'] = 'disabled' + + def words_and_labels(self, sentence, pos1, pos2): + search_exp = sentence[pos1:pos2] + words, labels = [], [] + labeled_words = search_exp.split(' ') + index = 0 + for each in labeled_words: + if each == '': + index += 1 + else: + word, label = each.split('/') + words.append( + (self._char_before + index, self._char_before + index + len(word)) + ) + index += len(word) + 1 + labels.append( + (self._char_before + index, self._char_before + index + len(label)) + ) + index += len(label) + index += 1 + return words, labels + + def pad(self, sent, hstart, hend): + if hstart >= self._char_before: + return sent, hstart, hend + d = self._char_before - hstart + sent = ''.join([' '] * d) + sent + return sent, hstart + d, hend + d + + def destroy(self, *e): + if self.top is None: + return + self.top.after_cancel(self.after) + self.top.destroy() + self.top = None + + def clear_all(self): + self.query_box.delete(0, END) + self.model.reset_query() + self.clear_results_box() + + def clear_results_box(self): + self.results_box['state'] = 'normal' + self.results_box.delete("1.0", END) + self.results_box['state'] = 'disabled' + + def freeze_editable(self): + self.query_box['state'] = 'disabled' + self.search_button['state'] = 'disabled' + self.prev['state'] = 'disabled' + self.next['state'] = 'disabled' + + def unfreeze_editable(self): + self.query_box['state'] = 'normal' + self.search_button['state'] = 'normal' + self.set_paging_button_states() + + def set_paging_button_states(self): + if self.current_page == 0 or self.current_page == 1: + self.prev['state'] = 'disabled' + else: + self.prev['state'] = 'normal' + if self.model.has_more_pages(self.current_page): + self.next['state'] = 'normal' + else: + self.next['state'] = 'disabled' + + def fire_event(self, event): + # Firing an event so that rendering of widgets happen in the mainloop thread + self.top.event_generate(event, when='tail') + + def mainloop(self, *args, **kwargs): + if in_idle(): + return + self.top.mainloop(*args, **kwargs) + + +class ConcordanceSearchModel(object): + def __init__(self, queue): + self.queue = queue + self.CORPORA = _CORPORA + self.DEFAULT_CORPUS = _DEFAULT + self.selected_corpus = None + self.reset_query() + self.reset_results() + self.result_count = None + self.last_sent_searched = 0 + + def non_default_corpora(self): + copy = [] + copy.extend(list(self.CORPORA.keys())) + copy.remove(self.DEFAULT_CORPUS) + copy.sort() + return copy + + def load_corpus(self, name): + self.selected_corpus = name + self.tagged_sents = [] + runner_thread = self.LoadCorpus(name, self) + runner_thread.start() + + def search(self, query, page): + self.query = query + self.last_requested_page = page + self.SearchCorpus(self, page, self.result_count).start() + + def next(self, page): + self.last_requested_page = page + if len(self.results) < page: + self.search(self.query, page) + else: + self.queue.put(SEARCH_TERMINATED_EVENT) + + def prev(self, page): + self.last_requested_page = page + self.queue.put(SEARCH_TERMINATED_EVENT) + + def reset_results(self): + self.last_sent_searched = 0 + self.results = [] + self.last_page = None + + def reset_query(self): + self.query = None + + def set_results(self, page, resultset): + self.results.insert(page - 1, resultset) + + def get_results(self): + return self.results[self.last_requested_page - 1] + + def has_more_pages(self, page): + if self.results == [] or self.results[0] == []: + return False + if self.last_page is None: + return True + return page < self.last_page + + class LoadCorpus(threading.Thread): + def __init__(self, name, model): + threading.Thread.__init__(self) + self.model, self.name = model, name + + def run(self): + try: + ts = self.model.CORPORA[self.name]() + self.model.tagged_sents = [ + ' '.join(w + '/' + t for (w, t) in sent) for sent in ts + ] + self.model.queue.put(CORPUS_LOADED_EVENT) + except Exception as e: + print(e) + self.model.queue.put(ERROR_LOADING_CORPUS_EVENT) + + class SearchCorpus(threading.Thread): + def __init__(self, model, page, count): + self.model, self.count, self.page = model, count, page + threading.Thread.__init__(self) + + def run(self): + q = self.processed_query() + sent_pos, i, sent_count = [], 0, 0 + for sent in self.model.tagged_sents[self.model.last_sent_searched :]: + try: + m = re.search(q, sent) + except re.error: + self.model.reset_results() + self.model.queue.put(SEARCH_ERROR_EVENT) + return + if m: + sent_pos.append((sent, m.start(), m.end())) + i += 1 + if i > self.count: + self.model.last_sent_searched += sent_count - 1 + break + sent_count += 1 + if self.count >= len(sent_pos): + self.model.last_sent_searched += sent_count - 1 + self.model.last_page = self.page + self.model.set_results(self.page, sent_pos) + else: + self.model.set_results(self.page, sent_pos[:-1]) + self.model.queue.put(SEARCH_TERMINATED_EVENT) + + def processed_query(self): + new = [] + for term in self.model.query.split(): + term = re.sub(r'\.', r'[^/ ]', term) + if re.match('[A-Z]+$', term): + new.append(BOUNDARY + WORD_OR_TAG + '/' + term + BOUNDARY) + elif '/' in term: + new.append(BOUNDARY + term + BOUNDARY) + else: + new.append(BOUNDARY + term + '/' + WORD_OR_TAG + BOUNDARY) + return ' '.join(new) + + +def app(): + d = ConcordanceSearchView() + d.mainloop() + + +if __name__ == '__main__': + app() + +__all__ = ['app'] diff --git a/venv.bak/lib/python3.7/site-packages/nltk/app/nemo_app.py b/venv.bak/lib/python3.7/site-packages/nltk/app/nemo_app.py new file mode 100644 index 0000000..639f767 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/app/nemo_app.py @@ -0,0 +1,174 @@ +# Finding (and Replacing) Nemo, Version 1.1, Aristide Grange 2006/06/06 +# http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/496783 + +""" +Finding (and Replacing) Nemo + +Instant Regular Expressions +Created by Aristide Grange +""" +import re +import itertools + +from six.moves.tkinter import ( + Frame, + Label, + PhotoImage, + Scrollbar, + Text, + Tk, + SEL_FIRST, + SEL_LAST, +) + + +windowTitle = "Finding (and Replacing) Nemo" +initialFind = r"n(.*?)e(.*?)m(.*?)o" +initialRepl = r"M\1A\2K\3I" +initialText = """\ +Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. +Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. +Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. +Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum. +""" +images = { + "FIND": "R0lGODlhMAAiAPcAMf/////37//35//n1v97Off///f/9/f37/fexvfOvfeEQvd7QvdrQvdrKfdaKfdSMfdSIe/v9+/v7+/v5+/n3u/e1u/Wxu/Gre+1lO+tnO+thO+Ua+97Y+97Oe97Me9rOe9rMe9jOe9jMe9jIe9aMefe5+fe3ufezuece+eEWudzQudaIedSIedKMedKIedCKedCId7e1t7Wzt7Oxt7Gvd69vd69rd61pd6ljN6UjN6Ue96EY95zY95rUt5rQt5jMd5SId5KIdbn59be3tbGztbGvda1rdaEa9Z7a9Z7WtZzQtZzOdZzMdZjMdZaQtZSOdZSMdZKMdZCKdZCGNY5Ic7W1s7Oxs7Gtc69xs69tc69rc6tpc6llM6clM6cjM6Ue86EY85zWs5rSs5SKc5KKc5KGMa1tcatrcalvcalnMaUpcZ7c8ZzMcZrUsZrOcZrMcZaQsZSOcZSMcZKMcZCKcZCGMYxIcYxGL3Gxr21tb21rb2lpb2crb2cjL2UnL2UlL2UhL2Ec717Wr17Ur1zWr1rMb1jUr1KMb1KIb1CIb0xGLWlrbWlpbWcnLWEe7V7c7VzY7VzUrVSKbVKMbVCMbVCIbU5KbUxIbUxEK2lta2lpa2clK2UjK2MnK2MlK2Ea617e61za61rY61rMa1jSq1aUq1aSq1SQq1KKa0xEKWlnKWcnKWUnKWUhKWMjKWEa6Vza6VrWqVjMaVaUqVaKaVSMaVCMaU5KaUxIaUxGJyclJyMe5yElJyEhJx7e5x7c5xrOZxaQpxSOZxKQpw5IZSMhJSEjJR7c5Rre5RrY5RrUpRSQpRSKZRCOZRCKZQxKZQxIYyEhIx7hIxza4xzY4xrc4xjUoxaa4xaUoxSSoxKQoxCMYw5GIR7c4Rzc4Rre4RjY4RjWoRaa4RSWoRSUoRSMYRKQoRCOYQ5KYQxIXtra3taY3taSntKOXtCMXtCKXNCMXM5MXMxIWtSUmtKSmtKQmtCOWs5MWs5KWs5IWNCKWMxIVIxKUIQCDkhGAAAACH+AS4ALAAAAAAwACIAAAj/AAEIHEiwoMGDCBMqXMiwoUOHMqxIeEiRoZVp7cpZ29WrF4WKIAd208dGAQEVbiTVChUjZMU9+pYQmPmBZpxgvVw+nDdKwQICNVcIXQEkTgKdDdUJ+/nggVAXK1xI3TEA6UIr2uJ8iBqka1cXXTlkqGoVYRZ7iLyqBSs0iiEtZQVKiDGxBI1u3NR6lUpGDKg8MSgEQCphU7Z22vhg0dILXRCpYLuSCcYJT4wqXASBQaBzU7klHxC127OHD7ZDJFpERqRt0x5OnwQpmZmCLEhrbgg4WIHO1RY+nbQ9WRGEDJlmnXwJ+9FBgXMCIzYMVijBBgYMFxIMqJBMSc0Ht7qh/+Gjpte2rnYsYeNlasWIBgQ6yCewIoPCCp/cyP/wgUGbXVu0QcADZNBDnh98gHMLGXYQUw02w61QU3wdbNWDbQVVIIhMMwFF1DaZiPLBAy7E04kafrjSizaK3LFNNc0AAYRQDsAHHQlJ2IDQJ2zE1+EKDjiAijShkECCC8Qgw4cr7ZgyzC2WaHPNLWWoNeNWPiRAw0QFWQFMhz8C+QQ20yAiVSrY+MGOJCsccsst2GCzoHFxxEGGC+8hgs0MB2kyCpgzrUDCbs1Es41UdtATHFFkWELMOtsoQsYcgvRRQw5RSDgGOjZMR1AvPQIq6KCo9AKOJWDd48owQlHR4DXEKP9iyRrK+DNNBTu4RwIPFeTAGUG7hAomkA84gEg1m6ADljy9PBKGGJY4ig0xlsTBRSn98FOFDUC8pwQOPkgHbCGAzhTkA850s0c7j6Hjix9+gBIrMXLeAccWXUCyiRBcBEECdEJ98KtAqtBCYQc/OvDENnl4gYpUxISCIjjzylkGGV9okYUVNogRhAOBuuAEhjG08wOgDYzAgA5bCjIoCe5uwUk80RKTTSppPREGGGCIISOQ9AXBg6cC6WIywvCpoMHAocRBwhP4bHLFLujYkV42xNxBRhAyGrc113EgYtRBerDDDHMoDCyQEL5sE083EkgwQyBhxGFHMM206DUixGxmE0wssbQjCQ4JCaFKFwgQTVAVVhQUwAVPIFJKrHfYYRwi6OCDzzuIJIFhXAD0EccPsYRiSyqKSDpFcWSMIcZRoBMkQyA2BGZDIKSYcggih8TRRg4VxM5QABVYYLxgwiev/PLMCxQQADs=", + "find": "R0lGODlhMAAiAPQAMf////f39+/v7+fn597e3tbW1s7OzsbGxr29vbW1ta2traWlpZycnJSUlIyMjISEhHt7e3Nzc2tra2NjY1paWlJSUkpKSkJCQjk5OSkpKRgYGAAAAAAAAAAAAAAAAAAAACH+AS4ALAAAAAAwACIAAAX/ICCOZGmeaKquY2AGLiuvMCAUBuHWc48Kh0iFInEYCb4kSQCxPBiMxkMigRQEgJiSFVBYHNGG0RiZOHjblWAiiY4fkDhEYoBp06dAWfyAQyKAgAwDaHgnB0RwgYASgQ0IhDuGJDAIFhMRVFSLEX8QCJJ4AQM5AgQHTZqqjBAOCQQEkWkCDRMUFQsICQ4Vm5maEwwHOAsPDTpKMAsUDlO4CssTcb+2DAp8YGCyNFoCEsZwFQ3QDRTTVBRS0g1QbgsCd5QAAwgIBwYFAwStzQ8UEdCKVchky0yVBw7YuXkAKt4IAg74vXHVagqFBRgXSCAyYWAVCH0SNhDTitCJfSL5/4RbAPKPhQYYjVCYYAvCP0BxEDaD8CheAAHNwqh8MMGPSwgLeJWhwHSjqkYI+xg4MMCEgQjtRvZ7UAYCpghMF7CxONOWJkYR+rCpY4JlVpVxKDwYWEactKW9mhYRtqCTgwgWEMArERSK1j5q//6T8KXonFsShpiJkAECgQYVjykooCVA0JGHEWNiYCHThTFeb3UkoiCCBgwGEKQ1kuAJlhFwhA71h5SukwUM5qqeCSGBgicEWkfNiWSERtBad4JNIBaQBaQah1ToyGZBAnsIuIJs1qnqiAIVjIE2gnAB1T5x0icgzXT79ipgMOOEH6HBbREBMJCeGEY08IoLAkzB1YYFwjxwSUGSNULQJnNUwRYlCcyEkALIxECAP9cNMMABYpRhy3ZsSLDaR70oUAiABGCkAxowCGCAAfDYIQACXoElGRsdXWDBdg2Y90IWktDYGYAB9PWHP0PMdFZaF07SQgAFNDAMAQg0QA1UC8xoZQl22JGFPgWkOUCOL1pZQyhjxinnnCWEAAA7", + "REPL": "R0lGODlhMAAjAPcAMf/////3//+lOf+UKf+MEPf///f39/f35/fv7/ecQvecOfecKfeUIfeUGPeUEPeUCPeMAO/37+/v9+/v3u/n3u/n1u+9jO+9c++1hO+ta++tY++tWu+tUu+tSu+lUu+lQu+lMe+UMe+UKe+UGO+UEO+UAO+MCOfv5+fvxufn7+fn5+fnzue9lOe9c+e1jOe1e+e1c+e1a+etWuetUuelQuecOeeUUueUCN7e597e3t7e1t7ezt7evd7Wzt7Oxt7Ovd7Otd7Opd7OnN7Gtd7Gpd69lN61hN6ta96lStbextberdbW3tbWztbWxtbOvdbOrda1hNalUtaECM7W1s7Ozs7Oxs7Otc7Gxs7Gvc69tc69rc69pc61jM6lc8bWlMbOvcbGxsbGpca9tca9pca1nMaMAL3OhL3Gtb21vb21tb2tpb2tnL2tlLW9tbW9pbW9e7W1pbWtjLWcKa21nK2tra2tnK2tlK2lpa2llK2ljK2le6WlnKWljKWUe6WUc6WUY5y1QpyclJycjJychJyUc5yMY5StY5SUe5SMhJSMe5SMc5SMWpSEa5SESoyUe4yMhIyEY4SlKYScWoSMe4SEe4SEa4R7c4R7Y3uMY3uEe3t7e3t7c3tza3tzY3trKXtjIXOcAHOUMXOEY3Nzc3NzWnNrSmulCGuUMWuMGGtzWmtrY2taMWtaGGOUOWOMAGNzUmNjWmNjSmNaUmNaQmNaOWNaIWNSCFqcAFpjUlpSMVpSIVpSEFpKKVKMAFJSUlJSSlJSMVJKMVJKGFJKAFI5CEqUAEqEAEpzQkpKIUpCQkpCGEpCAEo5EEoxAEJjOUJCOUJCAEI5IUIxADl7ADlaITlCOTkxMTkxKTkxEDkhADFzADFrGDE5OTExADEpEClrCCkxKSkpKSkpISkpACkhCCkhACkYACFzACFrACEhCCEYGBhjEBhjABghABgYCBgYABgQEBgQABAQABAIAAhjAAhSAAhKAAgIEAgICABaAABCAAAhAAAQAAAIAAAAAAAAACH+AS4ALAAAAAAwACMAAAj/AAEIHEiwoMGDCBMqXMiwocOHAA4cgEixIIIJO3JMmAjADIqKFU/8MHIkg5EgYXx4iaTkI0iHE6wE2TCggYILQayEAgXIy8uGCKz8sDCAQAMRG3iEcXULlJkJPwli3OFjh9UdYYLE6NBhA04UXHoVA2XoTZgfPKBWlOBDphAWOdfMcfMDLloeO3hIMjbWVCQ5Fn6E2UFxgpsgFjYIEBADrZU6luqEEfqjTqpt54z1uuWqTIcgWAk7PECGzIUQDRosDmxlUrVJkwQJkqVuX71v06YZcyUlROAdbnLAJKPFyAYFAhoMwFlnEh0rWkpz8raPHm7dqKKc/KFFkBUrVn1M/ziBcEIeLUEQI8/AYk0i9Be4sqjsrN66c9/OnbobhpR3HkIUoZ0WVnBE0AGLFKKFD0HAFUQe77HQgQI1hRBDEHMcY0899bBzihZuCPILJD8EccEGGzwAQhFaUHHQH82sUkgeNHISDBk8WCCCcsqFUEQWmOyzjz3sUGNNOO5Y48YOEgowAAQhnBScQV00k82V47jzjy9CXZBcjziFoco//4CDiSOyhPMPLkJZkEBqJmRQxA9uZGEQD8Ncmc044/zzDF2IZQBCCDYE8QMZz/iiCSx0neHGI7BIhhhNn+1gxRpokEcQAp7seWU7/PwTyxqG/iCEEVzQmUombnDRxRExzP9nBR2PCKLFD3UJwcMPa/SRqUGNWJmNOVn+M44ukMRB4KGcWDNLVhuUMEIJAlzwA3DJBHMJIXm4sQYhqyxCRQQGLSIsn1qac2UzysQSyzX/hLMGD0F0IMCODYAQBA9W/PKPOcRiw0wzwxTiokF9dLMnuv/Mo+fCZF7jBr0xbDDCACWEYKgb1vzjDp/jZNOMLX0IZxAKq2TZTjtaOjwOsXyG+s8sZJTIQsUdIGHoJPf8w487QI/TDSt5mGwQFZxc406o8HiDJchk/ltLHpSlJwSvz5DpTjvmuGNOM57koelBOaAhiCaaPBLL0wwbm003peRBnBZqJMJL1ECz/HXYYx/NdAIOOVCxQyLorswymU93o0wuwfAiTDNR/xz0MLXU0XdCE+UwSTRZAq2lsSATu+4wkGvt+TjNzPLrQyegAUku2Hij5cd8LhxyM8QIg4w18HgcdC6BTBFSDmfQqsovttveDcG7lFLHI75cE841sARCxeWsnxC4G9HADPK6ywzDCRqBo0EHHWhMgT1IJzziNci1N7PMKnSYfML96/90AiJKey/0KtbLX1QK0rrNnQ541xugQ7SHhkXBghN0SKACWRc4KlAhBwKcIOYymJCAAAA7", + "repl": "R0lGODlhMAAjAPQAMf////f39+/v7+fn597e3tbW1s7OzsbGxr29vbW1ta2traWlpZycnJSUlIyMjISEhHt7e3Nzc2tra2NjY1paWlJSUkpKSkJCQjk5OTExMSkpKSEhIRgYGBAQEAgICAAAACH+AS4ALAAAAAAwACMAAAX/ICCOZGmeaKqubOu+gCDANBkIQ1EMQhAghFptYEAkEgjEwXBo7ISvweGgWCwUysPjwTgEoCafTySYIhYMxgLBjEQgCULvCw0QdAZdoVhUIJUFChISEAxYeQM1N1OMTAp+UwZ5eA4TEhFbDWYFdC4ECVMJjwl5BwsQa0umEhUVlhESDgqlBp0rAn5nVpBMDxeZDRQbHBgWFBSWDgtLBnFjKwRYCI9VqQsPs0YKEcMXFq0UEalFDWx4BAO2IwPjppAKDkrTWKYUGd7fEJJFEZpM00cOzCgh4EE8SaoWxKNixQooBRMyZMBwAYIRBhUgLDGS4MoBJeoANMhAgQsaCRZm/5lqaCUJhA4cNHjDoKEDBlJUHqkBlYBTiQUZNGjYMMxDhY3VWk6R4MEDBoMUak5AqoYBqANIBo4wcGGDUKIeLlzVZmWJggsVIkwAZaQSA3kdZzlKkIiEAAlDvW5oOkEBs488JTw44oeUIwdvVTFTUK7uiAAPgubt8GFDhQepqETAQCFU1UMGzlqAgFhUsAcCS0AO6lUDhw8xNRSbENGDhgWSHjWUe6ACbKITizmopZoBa6KvOwj9uuHDhwxyj3xekgDDhw5EvWKo0IB4iQLCOCC/njc7ZQ8UeGvza+ABZZgcxJNc4FO1gc0cOsCUrHevc8tdIMTIAhc4F198G2Qwwd8CBIQUAwEINABBBJUwR9R5wElgVRLwWODBBx4cGB8GEzDQIAo33CGJA8gh+JoH/clUgQU0YvDhdfmJdwEFC6Sjgg8yEPAABsPkh2F22cl2AQbn6QdTghTQ5eAJAQyQAAQV0MSBB9gRVZ4GE1mw5JZOAmiAVi1UWcAZDrDyZXYTeaOhA/bIVuIBPtKQ4h7ViYekUPdcEAEbzTzCRp5CADmAAwj+ORGPBcgwAAHo9ABGCYtm0ChwFHShlRiXhmHlkAcCiOeUodqQw5W0oXLAiamy4MOkjOyAaqxUymApDCEAADs=", +} +colors = ["#FF7B39", "#80F121"] +emphColors = ["#DAFC33", "#F42548"] +fieldParams = { + "height": 3, + "width": 70, + "font": ("monaco", 14), + "highlightthickness": 0, + "borderwidth": 0, + "background": "white", +} +textParams = { + "bg": "#F7E0D4", + "fg": "#2321F1", + "highlightthickness": 0, + "width": 1, + "height": 10, + "font": ("verdana", 16), + "wrap": "word", +} + + +class Zone: + def __init__(self, image, initialField, initialText): + frm = Frame(root) + frm.config(background="white") + self.image = PhotoImage(format='gif', data=images[image.upper()]) + self.imageDimmed = PhotoImage(format='gif', data=images[image]) + self.img = Label(frm) + self.img.config(borderwidth=0) + self.img.pack(side="left") + self.fld = Text(frm, **fieldParams) + self.initScrollText(frm, self.fld, initialField) + frm = Frame(root) + self.txt = Text(frm, **textParams) + self.initScrollText(frm, self.txt, initialText) + for i in range(2): + self.txt.tag_config(colors[i], background=colors[i]) + self.txt.tag_config("emph" + colors[i], foreground=emphColors[i]) + + def initScrollText(self, frm, txt, contents): + scl = Scrollbar(frm) + scl.config(command=txt.yview) + scl.pack(side="right", fill="y") + txt.pack(side="left", expand=True, fill="x") + txt.config(yscrollcommand=scl.set) + txt.insert("1.0", contents) + frm.pack(fill="x") + Frame(height=2, bd=1, relief="ridge").pack(fill="x") + + def refresh(self): + self.colorCycle = itertools.cycle(colors) + try: + self.substitute() + self.img.config(image=self.image) + except re.error: + self.img.config(image=self.imageDimmed) + + +class FindZone(Zone): + def addTags(self, m): + color = next(self.colorCycle) + self.txt.tag_add(color, "1.0+%sc" % m.start(), "1.0+%sc" % m.end()) + try: + self.txt.tag_add( + "emph" + color, "1.0+%sc" % m.start("emph"), "1.0+%sc" % m.end("emph") + ) + except: + pass + + def substitute(self, *args): + for color in colors: + self.txt.tag_remove(color, "1.0", "end") + self.txt.tag_remove("emph" + color, "1.0", "end") + self.rex = re.compile("") # default value in case of misformed regexp + self.rex = re.compile(self.fld.get("1.0", "end")[:-1], re.MULTILINE) + try: + re.compile("(?P%s)" % self.fld.get(SEL_FIRST, SEL_LAST)) + self.rexSel = re.compile( + "%s(?P%s)%s" + % ( + self.fld.get("1.0", SEL_FIRST), + self.fld.get(SEL_FIRST, SEL_LAST), + self.fld.get(SEL_LAST, "end")[:-1], + ), + re.MULTILINE, + ) + except: + self.rexSel = self.rex + self.rexSel.sub(self.addTags, self.txt.get("1.0", "end")) + + +class ReplaceZone(Zone): + def addTags(self, m): + s = sz.rex.sub(self.repl, m.group()) + self.txt.delete( + "1.0+%sc" % (m.start() + self.diff), "1.0+%sc" % (m.end() + self.diff) + ) + self.txt.insert("1.0+%sc" % (m.start() + self.diff), s, next(self.colorCycle)) + self.diff += len(s) - (m.end() - m.start()) + + def substitute(self): + self.txt.delete("1.0", "end") + self.txt.insert("1.0", sz.txt.get("1.0", "end")[:-1]) + self.diff = 0 + self.repl = rex0.sub(r"\\g<\1>", self.fld.get("1.0", "end")[:-1]) + sz.rex.sub(self.addTags, sz.txt.get("1.0", "end")[:-1]) + + +def launchRefresh(_): + sz.fld.after_idle(sz.refresh) + rz.fld.after_idle(rz.refresh) + + +def app(): + global root, sz, rz, rex0 + root = Tk() + root.resizable(height=False, width=True) + root.title(windowTitle) + root.minsize(width=250, height=0) + sz = FindZone("find", initialFind, initialText) + sz.fld.bind("", launchRefresh) + sz.fld.bind("", launchRefresh) + sz.fld.bind("", launchRefresh) + sz.rexSel = re.compile("") + rz = ReplaceZone("repl", initialRepl, "") + rex0 = re.compile(r"(?", launchRefresh) + launchRefresh(None) + root.mainloop() + + +if __name__ == '__main__': + app() + +__all__ = ['app'] diff --git a/venv.bak/lib/python3.7/site-packages/nltk/app/rdparser_app.py b/venv.bak/lib/python3.7/site-packages/nltk/app/rdparser_app.py new file mode 100644 index 0000000..5d3054d --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/app/rdparser_app.py @@ -0,0 +1,1053 @@ +# Natural Language Toolkit: Recursive Descent Parser Application +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Edward Loper +# URL: +# For license information, see LICENSE.TXT + +""" +A graphical tool for exploring the recursive descent parser. + +The recursive descent parser maintains a tree, which records the +structure of the portion of the text that has been parsed. It uses +CFG productions to expand the fringe of the tree, and matches its +leaves against the text. Initially, the tree contains the start +symbol ("S"). It is shown in the main canvas, to the right of the +list of available expansions. + +The parser builds up a tree structure for the text using three +operations: + + - "expand" uses a CFG production to add children to a node on the + fringe of the tree. + - "match" compares a leaf in the tree to a text token. + - "backtrack" returns the tree to its state before the most recent + expand or match operation. + +The parser maintains a list of tree locations called a "frontier" to +remember which nodes have not yet been expanded and which leaves have +not yet been matched against the text. The leftmost frontier node is +shown in green, and the other frontier nodes are shown in blue. The +parser always performs expand and match operations on the leftmost +element of the frontier. + +You can control the parser's operation by using the "expand," "match," +and "backtrack" buttons; or you can use the "step" button to let the +parser automatically decide which operation to apply. The parser uses +the following rules to decide which operation to apply: + + - If the leftmost frontier element is a token, try matching it. + - If the leftmost frontier element is a node, try expanding it with + the first untried expansion. + - Otherwise, backtrack. + +The "expand" button applies the untried expansion whose CFG production +is listed earliest in the grammar. To manually choose which expansion +to apply, click on a CFG production from the list of available +expansions, on the left side of the main window. + +The "autostep" button will let the parser continue applying +applications to the tree until it reaches a complete parse. You can +cancel an autostep in progress at any time by clicking on the +"autostep" button again. + +Keyboard Shortcuts:: + [Space]\t Perform the next expand, match, or backtrack operation + [a]\t Step through operations until the next complete parse + [e]\t Perform an expand operation + [m]\t Perform a match operation + [b]\t Perform a backtrack operation + [Delete]\t Reset the parser + [g]\t Show/hide available expansions list + [h]\t Help + [Ctrl-p]\t Print + [q]\t Quit +""" +from __future__ import division + +from six.moves.tkinter_font import Font +from six.moves.tkinter import Listbox, IntVar, Button, Frame, Label, Menu, Scrollbar, Tk + +from nltk.tree import Tree +from nltk.util import in_idle +from nltk.parse import SteppingRecursiveDescentParser +from nltk.draw.util import TextWidget, ShowText, CanvasFrame, EntryDialog +from nltk.draw import CFGEditor, TreeSegmentWidget, tree_to_treesegment + + +class RecursiveDescentApp(object): + """ + A graphical tool for exploring the recursive descent parser. The tool + displays the parser's tree and the remaining text, and allows the + user to control the parser's operation. In particular, the user + can expand subtrees on the frontier, match tokens on the frontier + against the text, and backtrack. A "step" button simply steps + through the parsing process, performing the operations that + ``RecursiveDescentParser`` would use. + """ + + def __init__(self, grammar, sent, trace=0): + self._sent = sent + self._parser = SteppingRecursiveDescentParser(grammar, trace) + + # Set up the main window. + self._top = Tk() + self._top.title('Recursive Descent Parser Application') + + # Set up key bindings. + self._init_bindings() + + # Initialize the fonts. + self._init_fonts(self._top) + + # Animations. animating_lock is a lock to prevent the demo + # from performing new operations while it's animating. + self._animation_frames = IntVar(self._top) + self._animation_frames.set(5) + self._animating_lock = 0 + self._autostep = 0 + + # The user can hide the grammar. + self._show_grammar = IntVar(self._top) + self._show_grammar.set(1) + + # Create the basic frames. + self._init_menubar(self._top) + self._init_buttons(self._top) + self._init_feedback(self._top) + self._init_grammar(self._top) + self._init_canvas(self._top) + + # Initialize the parser. + self._parser.initialize(self._sent) + + # Resize callback + self._canvas.bind('', self._configure) + + ######################################### + ## Initialization Helpers + ######################################### + + def _init_fonts(self, root): + # See: + self._sysfont = Font(font=Button()["font"]) + root.option_add("*Font", self._sysfont) + + # TWhat's our font size (default=same as sysfont) + self._size = IntVar(root) + self._size.set(self._sysfont.cget('size')) + + self._boldfont = Font(family='helvetica', weight='bold', size=self._size.get()) + self._font = Font(family='helvetica', size=self._size.get()) + if self._size.get() < 0: + big = self._size.get() - 2 + else: + big = self._size.get() + 2 + self._bigfont = Font(family='helvetica', weight='bold', size=big) + + def _init_grammar(self, parent): + # Grammar view. + self._prodframe = listframe = Frame(parent) + self._prodframe.pack(fill='both', side='left', padx=2) + self._prodlist_label = Label( + self._prodframe, font=self._boldfont, text='Available Expansions' + ) + self._prodlist_label.pack() + self._prodlist = Listbox( + self._prodframe, + selectmode='single', + relief='groove', + background='white', + foreground='#909090', + font=self._font, + selectforeground='#004040', + selectbackground='#c0f0c0', + ) + + self._prodlist.pack(side='right', fill='both', expand=1) + + self._productions = list(self._parser.grammar().productions()) + for production in self._productions: + self._prodlist.insert('end', (' %s' % production)) + self._prodlist.config(height=min(len(self._productions), 25)) + + # Add a scrollbar if there are more than 25 productions. + if len(self._productions) > 25: + listscroll = Scrollbar(self._prodframe, orient='vertical') + self._prodlist.config(yscrollcommand=listscroll.set) + listscroll.config(command=self._prodlist.yview) + listscroll.pack(side='left', fill='y') + + # If they select a production, apply it. + self._prodlist.bind('<>', self._prodlist_select) + + def _init_bindings(self): + # Key bindings are a good thing. + self._top.bind('', self.destroy) + self._top.bind('', self.destroy) + self._top.bind('', self.destroy) + self._top.bind('e', self.expand) + # self._top.bind('', self.expand) + # self._top.bind('', self.expand) + self._top.bind('m', self.match) + self._top.bind('', self.match) + self._top.bind('', self.match) + self._top.bind('b', self.backtrack) + self._top.bind('', self.backtrack) + self._top.bind('', self.backtrack) + self._top.bind('', self.backtrack) + self._top.bind('', self.backtrack) + self._top.bind('a', self.autostep) + # self._top.bind('', self.autostep) + self._top.bind('', self.autostep) + self._top.bind('', self.cancel_autostep) + self._top.bind('', self.step) + self._top.bind('', self.reset) + self._top.bind('', self.postscript) + # self._top.bind('', self.help) + # self._top.bind('', self.help) + self._top.bind('', self.help) + self._top.bind('', self.help) + # self._top.bind('', self.toggle_grammar) + # self._top.bind('', self.toggle_grammar) + # self._top.bind('', self.toggle_grammar) + self._top.bind('', self.edit_grammar) + self._top.bind('', self.edit_sentence) + + def _init_buttons(self, parent): + # Set up the frames. + self._buttonframe = buttonframe = Frame(parent) + buttonframe.pack(fill='none', side='bottom', padx=3, pady=2) + Button( + buttonframe, + text='Step', + background='#90c0d0', + foreground='black', + command=self.step, + ).pack(side='left') + Button( + buttonframe, + text='Autostep', + background='#90c0d0', + foreground='black', + command=self.autostep, + ).pack(side='left') + Button( + buttonframe, + text='Expand', + underline=0, + background='#90f090', + foreground='black', + command=self.expand, + ).pack(side='left') + Button( + buttonframe, + text='Match', + underline=0, + background='#90f090', + foreground='black', + command=self.match, + ).pack(side='left') + Button( + buttonframe, + text='Backtrack', + underline=0, + background='#f0a0a0', + foreground='black', + command=self.backtrack, + ).pack(side='left') + # Replace autostep... + + # self._autostep_button = Button(buttonframe, text='Autostep', + # underline=0, command=self.autostep) + # self._autostep_button.pack(side='left') + + def _configure(self, event): + self._autostep = 0 + (x1, y1, x2, y2) = self._cframe.scrollregion() + y2 = event.height - 6 + self._canvas['scrollregion'] = '%d %d %d %d' % (x1, y1, x2, y2) + self._redraw() + + def _init_feedback(self, parent): + self._feedbackframe = feedbackframe = Frame(parent) + feedbackframe.pack(fill='x', side='bottom', padx=3, pady=3) + self._lastoper_label = Label( + feedbackframe, text='Last Operation:', font=self._font + ) + self._lastoper_label.pack(side='left') + lastoperframe = Frame(feedbackframe, relief='sunken', border=1) + lastoperframe.pack(fill='x', side='right', expand=1, padx=5) + self._lastoper1 = Label( + lastoperframe, foreground='#007070', background='#f0f0f0', font=self._font + ) + self._lastoper2 = Label( + lastoperframe, + anchor='w', + width=30, + foreground='#004040', + background='#f0f0f0', + font=self._font, + ) + self._lastoper1.pack(side='left') + self._lastoper2.pack(side='left', fill='x', expand=1) + + def _init_canvas(self, parent): + self._cframe = CanvasFrame( + parent, + background='white', + # width=525, height=250, + closeenough=10, + border=2, + relief='sunken', + ) + self._cframe.pack(expand=1, fill='both', side='top', pady=2) + canvas = self._canvas = self._cframe.canvas() + + # Initially, there's no tree or text + self._tree = None + self._textwidgets = [] + self._textline = None + + def _init_menubar(self, parent): + menubar = Menu(parent) + + filemenu = Menu(menubar, tearoff=0) + filemenu.add_command( + label='Reset Parser', underline=0, command=self.reset, accelerator='Del' + ) + filemenu.add_command( + label='Print to Postscript', + underline=0, + command=self.postscript, + accelerator='Ctrl-p', + ) + filemenu.add_command( + label='Exit', underline=1, command=self.destroy, accelerator='Ctrl-x' + ) + menubar.add_cascade(label='File', underline=0, menu=filemenu) + + editmenu = Menu(menubar, tearoff=0) + editmenu.add_command( + label='Edit Grammar', + underline=5, + command=self.edit_grammar, + accelerator='Ctrl-g', + ) + editmenu.add_command( + label='Edit Text', + underline=5, + command=self.edit_sentence, + accelerator='Ctrl-t', + ) + menubar.add_cascade(label='Edit', underline=0, menu=editmenu) + + rulemenu = Menu(menubar, tearoff=0) + rulemenu.add_command( + label='Step', underline=1, command=self.step, accelerator='Space' + ) + rulemenu.add_separator() + rulemenu.add_command( + label='Match', underline=0, command=self.match, accelerator='Ctrl-m' + ) + rulemenu.add_command( + label='Expand', underline=0, command=self.expand, accelerator='Ctrl-e' + ) + rulemenu.add_separator() + rulemenu.add_command( + label='Backtrack', underline=0, command=self.backtrack, accelerator='Ctrl-b' + ) + menubar.add_cascade(label='Apply', underline=0, menu=rulemenu) + + viewmenu = Menu(menubar, tearoff=0) + viewmenu.add_checkbutton( + label="Show Grammar", + underline=0, + variable=self._show_grammar, + command=self._toggle_grammar, + ) + viewmenu.add_separator() + viewmenu.add_radiobutton( + label='Tiny', + variable=self._size, + underline=0, + value=10, + command=self.resize, + ) + viewmenu.add_radiobutton( + label='Small', + variable=self._size, + underline=0, + value=12, + command=self.resize, + ) + viewmenu.add_radiobutton( + label='Medium', + variable=self._size, + underline=0, + value=14, + command=self.resize, + ) + viewmenu.add_radiobutton( + label='Large', + variable=self._size, + underline=0, + value=18, + command=self.resize, + ) + viewmenu.add_radiobutton( + label='Huge', + variable=self._size, + underline=0, + value=24, + command=self.resize, + ) + menubar.add_cascade(label='View', underline=0, menu=viewmenu) + + animatemenu = Menu(menubar, tearoff=0) + animatemenu.add_radiobutton( + label="No Animation", underline=0, variable=self._animation_frames, value=0 + ) + animatemenu.add_radiobutton( + label="Slow Animation", + underline=0, + variable=self._animation_frames, + value=10, + accelerator='-', + ) + animatemenu.add_radiobutton( + label="Normal Animation", + underline=0, + variable=self._animation_frames, + value=5, + accelerator='=', + ) + animatemenu.add_radiobutton( + label="Fast Animation", + underline=0, + variable=self._animation_frames, + value=2, + accelerator='+', + ) + menubar.add_cascade(label="Animate", underline=1, menu=animatemenu) + + helpmenu = Menu(menubar, tearoff=0) + helpmenu.add_command(label='About', underline=0, command=self.about) + helpmenu.add_command( + label='Instructions', underline=0, command=self.help, accelerator='F1' + ) + menubar.add_cascade(label='Help', underline=0, menu=helpmenu) + + parent.config(menu=menubar) + + ######################################### + ## Helper + ######################################### + + def _get(self, widget, treeloc): + for i in treeloc: + widget = widget.subtrees()[i] + if isinstance(widget, TreeSegmentWidget): + widget = widget.label() + return widget + + ######################################### + ## Main draw procedure + ######################################### + + def _redraw(self): + canvas = self._canvas + + # Delete the old tree, widgets, etc. + if self._tree is not None: + self._cframe.destroy_widget(self._tree) + for twidget in self._textwidgets: + self._cframe.destroy_widget(twidget) + if self._textline is not None: + self._canvas.delete(self._textline) + + # Draw the tree. + helv = ('helvetica', -self._size.get()) + bold = ('helvetica', -self._size.get(), 'bold') + attribs = { + 'tree_color': '#000000', + 'tree_width': 2, + 'node_font': bold, + 'leaf_font': helv, + } + tree = self._parser.tree() + self._tree = tree_to_treesegment(canvas, tree, **attribs) + self._cframe.add_widget(self._tree, 30, 5) + + # Draw the text. + helv = ('helvetica', -self._size.get()) + bottom = y = self._cframe.scrollregion()[3] + self._textwidgets = [ + TextWidget(canvas, word, font=self._font) for word in self._sent + ] + for twidget in self._textwidgets: + self._cframe.add_widget(twidget, 0, 0) + twidget.move(0, bottom - twidget.bbox()[3] - 5) + y = min(y, twidget.bbox()[1]) + + # Draw a line over the text, to separate it from the tree. + self._textline = canvas.create_line(-5000, y - 5, 5000, y - 5, dash='.') + + # Highlight appropriate nodes. + self._highlight_nodes() + self._highlight_prodlist() + + # Make sure the text lines up. + self._position_text() + + def _redraw_quick(self): + # This should be more-or-less sufficient after an animation. + self._highlight_nodes() + self._highlight_prodlist() + self._position_text() + + def _highlight_nodes(self): + # Highlight the list of nodes to be checked. + bold = ('helvetica', -self._size.get(), 'bold') + for treeloc in self._parser.frontier()[:1]: + self._get(self._tree, treeloc)['color'] = '#20a050' + self._get(self._tree, treeloc)['font'] = bold + for treeloc in self._parser.frontier()[1:]: + self._get(self._tree, treeloc)['color'] = '#008080' + + def _highlight_prodlist(self): + # Highlight the productions that can be expanded. + # Boy, too bad tkinter doesn't implement Listbox.itemconfig; + # that would be pretty useful here. + self._prodlist.delete(0, 'end') + expandable = self._parser.expandable_productions() + untried = self._parser.untried_expandable_productions() + productions = self._productions + for index in range(len(productions)): + if productions[index] in expandable: + if productions[index] in untried: + self._prodlist.insert(index, ' %s' % productions[index]) + else: + self._prodlist.insert(index, ' %s (TRIED)' % productions[index]) + self._prodlist.selection_set(index) + else: + self._prodlist.insert(index, ' %s' % productions[index]) + + def _position_text(self): + # Line up the text widgets that are matched against the tree + numwords = len(self._sent) + num_matched = numwords - len(self._parser.remaining_text()) + leaves = self._tree_leaves()[:num_matched] + xmax = self._tree.bbox()[0] + for i in range(0, len(leaves)): + widget = self._textwidgets[i] + leaf = leaves[i] + widget['color'] = '#006040' + leaf['color'] = '#006040' + widget.move(leaf.bbox()[0] - widget.bbox()[0], 0) + xmax = widget.bbox()[2] + 10 + + # Line up the text widgets that are not matched against the tree. + for i in range(len(leaves), numwords): + widget = self._textwidgets[i] + widget['color'] = '#a0a0a0' + widget.move(xmax - widget.bbox()[0], 0) + xmax = widget.bbox()[2] + 10 + + # If we have a complete parse, make everything green :) + if self._parser.currently_complete(): + for twidget in self._textwidgets: + twidget['color'] = '#00a000' + + # Move the matched leaves down to the text. + for i in range(0, len(leaves)): + widget = self._textwidgets[i] + leaf = leaves[i] + dy = widget.bbox()[1] - leaf.bbox()[3] - 10.0 + dy = max(dy, leaf.parent().label().bbox()[3] - leaf.bbox()[3] + 10) + leaf.move(0, dy) + + def _tree_leaves(self, tree=None): + if tree is None: + tree = self._tree + if isinstance(tree, TreeSegmentWidget): + leaves = [] + for child in tree.subtrees(): + leaves += self._tree_leaves(child) + return leaves + else: + return [tree] + + ######################################### + ## Button Callbacks + ######################################### + + def destroy(self, *e): + self._autostep = 0 + if self._top is None: + return + self._top.destroy() + self._top = None + + def reset(self, *e): + self._autostep = 0 + self._parser.initialize(self._sent) + self._lastoper1['text'] = 'Reset Application' + self._lastoper2['text'] = '' + self._redraw() + + def autostep(self, *e): + if self._animation_frames.get() == 0: + self._animation_frames.set(2) + if self._autostep: + self._autostep = 0 + else: + self._autostep = 1 + self._step() + + def cancel_autostep(self, *e): + # self._autostep_button['text'] = 'Autostep' + self._autostep = 0 + + # Make sure to stop auto-stepping if we get any user input. + def step(self, *e): + self._autostep = 0 + self._step() + + def match(self, *e): + self._autostep = 0 + self._match() + + def expand(self, *e): + self._autostep = 0 + self._expand() + + def backtrack(self, *e): + self._autostep = 0 + self._backtrack() + + def _step(self): + if self._animating_lock: + return + + # Try expanding, matching, and backtracking (in that order) + if self._expand(): + pass + elif self._parser.untried_match() and self._match(): + pass + elif self._backtrack(): + pass + else: + self._lastoper1['text'] = 'Finished' + self._lastoper2['text'] = '' + self._autostep = 0 + + # Check if we just completed a parse. + if self._parser.currently_complete(): + self._autostep = 0 + self._lastoper2['text'] += ' [COMPLETE PARSE]' + + def _expand(self, *e): + if self._animating_lock: + return + old_frontier = self._parser.frontier() + rv = self._parser.expand() + if rv is not None: + self._lastoper1['text'] = 'Expand:' + self._lastoper2['text'] = rv + self._prodlist.selection_clear(0, 'end') + index = self._productions.index(rv) + self._prodlist.selection_set(index) + self._animate_expand(old_frontier[0]) + return True + else: + self._lastoper1['text'] = 'Expand:' + self._lastoper2['text'] = '(all expansions tried)' + return False + + def _match(self, *e): + if self._animating_lock: + return + old_frontier = self._parser.frontier() + rv = self._parser.match() + if rv is not None: + self._lastoper1['text'] = 'Match:' + self._lastoper2['text'] = rv + self._animate_match(old_frontier[0]) + return True + else: + self._lastoper1['text'] = 'Match:' + self._lastoper2['text'] = '(failed)' + return False + + def _backtrack(self, *e): + if self._animating_lock: + return + if self._parser.backtrack(): + elt = self._parser.tree() + for i in self._parser.frontier()[0]: + elt = elt[i] + self._lastoper1['text'] = 'Backtrack' + self._lastoper2['text'] = '' + if isinstance(elt, Tree): + self._animate_backtrack(self._parser.frontier()[0]) + else: + self._animate_match_backtrack(self._parser.frontier()[0]) + return True + else: + self._autostep = 0 + self._lastoper1['text'] = 'Finished' + self._lastoper2['text'] = '' + return False + + def about(self, *e): + ABOUT = ( + "NLTK Recursive Descent Parser Application\n" + "Written by Edward Loper" + ) + TITLE = 'About: Recursive Descent Parser Application' + try: + from six.moves.tkinter_messagebox import Message + + Message(message=ABOUT, title=TITLE).show() + except: + ShowText(self._top, TITLE, ABOUT) + + def help(self, *e): + self._autostep = 0 + # The default font's not very legible; try using 'fixed' instead. + try: + ShowText( + self._top, + 'Help: Recursive Descent Parser Application', + (__doc__ or '').strip(), + width=75, + font='fixed', + ) + except: + ShowText( + self._top, + 'Help: Recursive Descent Parser Application', + (__doc__ or '').strip(), + width=75, + ) + + def postscript(self, *e): + self._autostep = 0 + self._cframe.print_to_file() + + def mainloop(self, *args, **kwargs): + """ + Enter the Tkinter mainloop. This function must be called if + this demo is created from a non-interactive program (e.g. + from a secript); otherwise, the demo will close as soon as + the script completes. + """ + if in_idle(): + return + self._top.mainloop(*args, **kwargs) + + def resize(self, size=None): + if size is not None: + self._size.set(size) + size = self._size.get() + self._font.configure(size=-(abs(size))) + self._boldfont.configure(size=-(abs(size))) + self._sysfont.configure(size=-(abs(size))) + self._bigfont.configure(size=-(abs(size + 2))) + self._redraw() + + ######################################### + ## Expand Production Selection + ######################################### + + def _toggle_grammar(self, *e): + if self._show_grammar.get(): + self._prodframe.pack( + fill='both', side='left', padx=2, after=self._feedbackframe + ) + self._lastoper1['text'] = 'Show Grammar' + else: + self._prodframe.pack_forget() + self._lastoper1['text'] = 'Hide Grammar' + self._lastoper2['text'] = '' + + # def toggle_grammar(self, *e): + # self._show_grammar = not self._show_grammar + # if self._show_grammar: + # self._prodframe.pack(fill='both', expand='y', side='left', + # after=self._feedbackframe) + # self._lastoper1['text'] = 'Show Grammar' + # else: + # self._prodframe.pack_forget() + # self._lastoper1['text'] = 'Hide Grammar' + # self._lastoper2['text'] = '' + + def _prodlist_select(self, event): + selection = self._prodlist.curselection() + if len(selection) != 1: + return + index = int(selection[0]) + old_frontier = self._parser.frontier() + production = self._parser.expand(self._productions[index]) + + if production: + self._lastoper1['text'] = 'Expand:' + self._lastoper2['text'] = production + self._prodlist.selection_clear(0, 'end') + self._prodlist.selection_set(index) + self._animate_expand(old_frontier[0]) + else: + # Reset the production selections. + self._prodlist.selection_clear(0, 'end') + for prod in self._parser.expandable_productions(): + index = self._productions.index(prod) + self._prodlist.selection_set(index) + + ######################################### + ## Animation + ######################################### + + def _animate_expand(self, treeloc): + oldwidget = self._get(self._tree, treeloc) + oldtree = oldwidget.parent() + top = not isinstance(oldtree.parent(), TreeSegmentWidget) + + tree = self._parser.tree() + for i in treeloc: + tree = tree[i] + + widget = tree_to_treesegment( + self._canvas, + tree, + node_font=self._boldfont, + leaf_color='white', + tree_width=2, + tree_color='white', + node_color='white', + leaf_font=self._font, + ) + widget.label()['color'] = '#20a050' + + (oldx, oldy) = oldtree.label().bbox()[:2] + (newx, newy) = widget.label().bbox()[:2] + widget.move(oldx - newx, oldy - newy) + + if top: + self._cframe.add_widget(widget, 0, 5) + widget.move(30 - widget.label().bbox()[0], 0) + self._tree = widget + else: + oldtree.parent().replace_child(oldtree, widget) + + # Move the children over so they don't overlap. + # Line the children up in a strange way. + if widget.subtrees(): + dx = ( + oldx + + widget.label().width() / 2 + - widget.subtrees()[0].bbox()[0] / 2 + - widget.subtrees()[0].bbox()[2] / 2 + ) + for subtree in widget.subtrees(): + subtree.move(dx, 0) + + self._makeroom(widget) + + if top: + self._cframe.destroy_widget(oldtree) + else: + oldtree.destroy() + + colors = [ + 'gray%d' % (10 * int(10 * x / self._animation_frames.get())) + for x in range(self._animation_frames.get(), 0, -1) + ] + + # Move the text string down, if necessary. + dy = widget.bbox()[3] + 30 - self._canvas.coords(self._textline)[1] + if dy > 0: + for twidget in self._textwidgets: + twidget.move(0, dy) + self._canvas.move(self._textline, 0, dy) + + self._animate_expand_frame(widget, colors) + + def _makeroom(self, treeseg): + """ + Make sure that no sibling tree bbox's overlap. + """ + parent = treeseg.parent() + if not isinstance(parent, TreeSegmentWidget): + return + + index = parent.subtrees().index(treeseg) + + # Handle siblings to the right + rsiblings = parent.subtrees()[index + 1 :] + if rsiblings: + dx = treeseg.bbox()[2] - rsiblings[0].bbox()[0] + 10 + for sibling in rsiblings: + sibling.move(dx, 0) + + # Handle siblings to the left + if index > 0: + lsibling = parent.subtrees()[index - 1] + dx = max(0, lsibling.bbox()[2] - treeseg.bbox()[0] + 10) + treeseg.move(dx, 0) + + # Keep working up the tree. + self._makeroom(parent) + + def _animate_expand_frame(self, widget, colors): + if len(colors) > 0: + self._animating_lock = 1 + widget['color'] = colors[0] + for subtree in widget.subtrees(): + if isinstance(subtree, TreeSegmentWidget): + subtree.label()['color'] = colors[0] + else: + subtree['color'] = colors[0] + self._top.after(50, self._animate_expand_frame, widget, colors[1:]) + else: + widget['color'] = 'black' + for subtree in widget.subtrees(): + if isinstance(subtree, TreeSegmentWidget): + subtree.label()['color'] = 'black' + else: + subtree['color'] = 'black' + self._redraw_quick() + widget.label()['color'] = 'black' + self._animating_lock = 0 + if self._autostep: + self._step() + + def _animate_backtrack(self, treeloc): + # Flash red first, if we're animating. + if self._animation_frames.get() == 0: + colors = [] + else: + colors = ['#a00000', '#000000', '#a00000'] + colors += [ + 'gray%d' % (10 * int(10 * x / (self._animation_frames.get()))) + for x in range(1, self._animation_frames.get() + 1) + ] + + widgets = [self._get(self._tree, treeloc).parent()] + for subtree in widgets[0].subtrees(): + if isinstance(subtree, TreeSegmentWidget): + widgets.append(subtree.label()) + else: + widgets.append(subtree) + + self._animate_backtrack_frame(widgets, colors) + + def _animate_backtrack_frame(self, widgets, colors): + if len(colors) > 0: + self._animating_lock = 1 + for widget in widgets: + widget['color'] = colors[0] + self._top.after(50, self._animate_backtrack_frame, widgets, colors[1:]) + else: + for widget in widgets[0].subtrees(): + widgets[0].remove_child(widget) + widget.destroy() + self._redraw_quick() + self._animating_lock = 0 + if self._autostep: + self._step() + + def _animate_match_backtrack(self, treeloc): + widget = self._get(self._tree, treeloc) + node = widget.parent().label() + dy = (node.bbox()[3] - widget.bbox()[1] + 14) / max( + 1, self._animation_frames.get() + ) + self._animate_match_backtrack_frame(self._animation_frames.get(), widget, dy) + + def _animate_match(self, treeloc): + widget = self._get(self._tree, treeloc) + + dy = (self._textwidgets[0].bbox()[1] - widget.bbox()[3] - 10.0) / max( + 1, self._animation_frames.get() + ) + self._animate_match_frame(self._animation_frames.get(), widget, dy) + + def _animate_match_frame(self, frame, widget, dy): + if frame > 0: + self._animating_lock = 1 + widget.move(0, dy) + self._top.after(10, self._animate_match_frame, frame - 1, widget, dy) + else: + widget['color'] = '#006040' + self._redraw_quick() + self._animating_lock = 0 + if self._autostep: + self._step() + + def _animate_match_backtrack_frame(self, frame, widget, dy): + if frame > 0: + self._animating_lock = 1 + widget.move(0, dy) + self._top.after( + 10, self._animate_match_backtrack_frame, frame - 1, widget, dy + ) + else: + widget.parent().remove_child(widget) + widget.destroy() + self._animating_lock = 0 + if self._autostep: + self._step() + + def edit_grammar(self, *e): + CFGEditor(self._top, self._parser.grammar(), self.set_grammar) + + def set_grammar(self, grammar): + self._parser.set_grammar(grammar) + self._productions = list(grammar.productions()) + self._prodlist.delete(0, 'end') + for production in self._productions: + self._prodlist.insert('end', (' %s' % production)) + + def edit_sentence(self, *e): + sentence = " ".join(self._sent) + title = 'Edit Text' + instr = 'Enter a new sentence to parse.' + EntryDialog(self._top, sentence, instr, self.set_sentence, title) + + def set_sentence(self, sentence): + self._sent = sentence.split() # [XX] use tagged? + self.reset() + + +def app(): + """ + Create a recursive descent parser demo, using a simple grammar and + text. + """ + from nltk.grammar import CFG + + grammar = CFG.fromstring( + """ + # Grammatical productions. + S -> NP VP + NP -> Det N PP | Det N + VP -> V NP PP | V NP | V + PP -> P NP + # Lexical productions. + NP -> 'I' + Det -> 'the' | 'a' + N -> 'man' | 'park' | 'dog' | 'telescope' + V -> 'ate' | 'saw' + P -> 'in' | 'under' | 'with' + """ + ) + + sent = 'the dog saw a man in the park'.split() + + RecursiveDescentApp(grammar, sent).mainloop() + + +if __name__ == '__main__': + app() + +__all__ = ['app'] diff --git a/venv.bak/lib/python3.7/site-packages/nltk/app/srparser_app.py b/venv.bak/lib/python3.7/site-packages/nltk/app/srparser_app.py new file mode 100644 index 0000000..1f11427 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/app/srparser_app.py @@ -0,0 +1,937 @@ +# Natural Language Toolkit: Shift-Reduce Parser Application +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Edward Loper +# URL: +# For license information, see LICENSE.TXT + +""" +A graphical tool for exploring the shift-reduce parser. + +The shift-reduce parser maintains a stack, which records the structure +of the portion of the text that has been parsed. The stack is +initially empty. Its contents are shown on the left side of the main +canvas. + +On the right side of the main canvas is the remaining text. This is +the portion of the text which has not yet been considered by the +parser. + +The parser builds up a tree structure for the text using two +operations: + + - "shift" moves the first token from the remaining text to the top + of the stack. In the demo, the top of the stack is its right-hand + side. + - "reduce" uses a grammar production to combine the rightmost stack + elements into a single tree token. + +You can control the parser's operation by using the "shift" and +"reduce" buttons; or you can use the "step" button to let the parser +automatically decide which operation to apply. The parser uses the +following rules to decide which operation to apply: + + - Only shift if no reductions are available. + - If multiple reductions are available, then apply the reduction + whose CFG production is listed earliest in the grammar. + +The "reduce" button applies the reduction whose CFG production is +listed earliest in the grammar. There are two ways to manually choose +which reduction to apply: + + - Click on a CFG production from the list of available reductions, + on the left side of the main window. The reduction based on that + production will be applied to the top of the stack. + - Click on one of the stack elements. A popup window will appear, + containing all available reductions. Select one, and it will be + applied to the top of the stack. + +Note that reductions can only be applied to the top of the stack. + +Keyboard Shortcuts:: + [Space]\t Perform the next shift or reduce operation + [s]\t Perform a shift operation + [r]\t Perform a reduction operation + [Ctrl-z]\t Undo most recent operation + [Delete]\t Reset the parser + [g]\t Show/hide available production list + [Ctrl-a]\t Toggle animations + [h]\t Help + [Ctrl-p]\t Print + [q]\t Quit + +""" + +from six.moves.tkinter_font import Font +from six.moves.tkinter import IntVar, Listbox, Button, Frame, Label, Menu, Scrollbar, Tk + +from nltk.tree import Tree +from nltk.parse import SteppingShiftReduceParser +from nltk.util import in_idle +from nltk.draw.util import CanvasFrame, EntryDialog, ShowText, TextWidget +from nltk.draw import CFGEditor, TreeSegmentWidget, tree_to_treesegment + +""" +Possible future improvements: + - button/window to change and/or select text. Just pop up a window + with an entry, and let them modify the text; and then retokenize + it? Maybe give a warning if it contains tokens whose types are + not in the grammar. + - button/window to change and/or select grammar. Select from + several alternative grammars? Or actually change the grammar? If + the later, then I'd want to define nltk.draw.cfg, which would be + responsible for that. +""" + + +class ShiftReduceApp(object): + """ + A graphical tool for exploring the shift-reduce parser. The tool + displays the parser's stack and the remaining text, and allows the + user to control the parser's operation. In particular, the user + can shift tokens onto the stack, and can perform reductions on the + top elements of the stack. A "step" button simply steps through + the parsing process, performing the operations that + ``nltk.parse.ShiftReduceParser`` would use. + """ + + def __init__(self, grammar, sent, trace=0): + self._sent = sent + self._parser = SteppingShiftReduceParser(grammar, trace) + + # Set up the main window. + self._top = Tk() + self._top.title('Shift Reduce Parser Application') + + # Animations. animating_lock is a lock to prevent the demo + # from performing new operations while it's animating. + self._animating_lock = 0 + self._animate = IntVar(self._top) + self._animate.set(10) # = medium + + # The user can hide the grammar. + self._show_grammar = IntVar(self._top) + self._show_grammar.set(1) + + # Initialize fonts. + self._init_fonts(self._top) + + # Set up key bindings. + self._init_bindings() + + # Create the basic frames. + self._init_menubar(self._top) + self._init_buttons(self._top) + self._init_feedback(self._top) + self._init_grammar(self._top) + self._init_canvas(self._top) + + # A popup menu for reducing. + self._reduce_menu = Menu(self._canvas, tearoff=0) + + # Reset the demo, and set the feedback frame to empty. + self.reset() + self._lastoper1['text'] = '' + + ######################################### + ## Initialization Helpers + ######################################### + + def _init_fonts(self, root): + # See: + self._sysfont = Font(font=Button()["font"]) + root.option_add("*Font", self._sysfont) + + # TWhat's our font size (default=same as sysfont) + self._size = IntVar(root) + self._size.set(self._sysfont.cget('size')) + + self._boldfont = Font(family='helvetica', weight='bold', size=self._size.get()) + self._font = Font(family='helvetica', size=self._size.get()) + + def _init_grammar(self, parent): + # Grammar view. + self._prodframe = listframe = Frame(parent) + self._prodframe.pack(fill='both', side='left', padx=2) + self._prodlist_label = Label( + self._prodframe, font=self._boldfont, text='Available Reductions' + ) + self._prodlist_label.pack() + self._prodlist = Listbox( + self._prodframe, + selectmode='single', + relief='groove', + background='white', + foreground='#909090', + font=self._font, + selectforeground='#004040', + selectbackground='#c0f0c0', + ) + + self._prodlist.pack(side='right', fill='both', expand=1) + + self._productions = list(self._parser.grammar().productions()) + for production in self._productions: + self._prodlist.insert('end', (' %s' % production)) + self._prodlist.config(height=min(len(self._productions), 25)) + + # Add a scrollbar if there are more than 25 productions. + if 1: # len(self._productions) > 25: + listscroll = Scrollbar(self._prodframe, orient='vertical') + self._prodlist.config(yscrollcommand=listscroll.set) + listscroll.config(command=self._prodlist.yview) + listscroll.pack(side='left', fill='y') + + # If they select a production, apply it. + self._prodlist.bind('<>', self._prodlist_select) + + # When they hover over a production, highlight it. + self._hover = -1 + self._prodlist.bind('', self._highlight_hover) + self._prodlist.bind('', self._clear_hover) + + def _init_bindings(self): + # Quit + self._top.bind('', self.destroy) + self._top.bind('', self.destroy) + self._top.bind('', self.destroy) + self._top.bind('', self.destroy) + + # Ops (step, shift, reduce, undo) + self._top.bind('', self.step) + self._top.bind('', self.shift) + self._top.bind('', self.shift) + self._top.bind('', self.shift) + self._top.bind('', self.reduce) + self._top.bind('', self.reduce) + self._top.bind('', self.reduce) + self._top.bind('', self.reset) + self._top.bind('', self.undo) + self._top.bind('', self.undo) + self._top.bind('', self.undo) + self._top.bind('', self.undo) + self._top.bind('', self.undo) + + # Misc + self._top.bind('', self.postscript) + self._top.bind('', self.help) + self._top.bind('', self.help) + self._top.bind('', self.edit_grammar) + self._top.bind('', self.edit_sentence) + + # Animation speed control + self._top.bind('-', lambda e, a=self._animate: a.set(20)) + self._top.bind('=', lambda e, a=self._animate: a.set(10)) + self._top.bind('+', lambda e, a=self._animate: a.set(4)) + + def _init_buttons(self, parent): + # Set up the frames. + self._buttonframe = buttonframe = Frame(parent) + buttonframe.pack(fill='none', side='bottom') + Button( + buttonframe, + text='Step', + background='#90c0d0', + foreground='black', + command=self.step, + ).pack(side='left') + Button( + buttonframe, + text='Shift', + underline=0, + background='#90f090', + foreground='black', + command=self.shift, + ).pack(side='left') + Button( + buttonframe, + text='Reduce', + underline=0, + background='#90f090', + foreground='black', + command=self.reduce, + ).pack(side='left') + Button( + buttonframe, + text='Undo', + underline=0, + background='#f0a0a0', + foreground='black', + command=self.undo, + ).pack(side='left') + + def _init_menubar(self, parent): + menubar = Menu(parent) + + filemenu = Menu(menubar, tearoff=0) + filemenu.add_command( + label='Reset Parser', underline=0, command=self.reset, accelerator='Del' + ) + filemenu.add_command( + label='Print to Postscript', + underline=0, + command=self.postscript, + accelerator='Ctrl-p', + ) + filemenu.add_command( + label='Exit', underline=1, command=self.destroy, accelerator='Ctrl-x' + ) + menubar.add_cascade(label='File', underline=0, menu=filemenu) + + editmenu = Menu(menubar, tearoff=0) + editmenu.add_command( + label='Edit Grammar', + underline=5, + command=self.edit_grammar, + accelerator='Ctrl-g', + ) + editmenu.add_command( + label='Edit Text', + underline=5, + command=self.edit_sentence, + accelerator='Ctrl-t', + ) + menubar.add_cascade(label='Edit', underline=0, menu=editmenu) + + rulemenu = Menu(menubar, tearoff=0) + rulemenu.add_command( + label='Step', underline=1, command=self.step, accelerator='Space' + ) + rulemenu.add_separator() + rulemenu.add_command( + label='Shift', underline=0, command=self.shift, accelerator='Ctrl-s' + ) + rulemenu.add_command( + label='Reduce', underline=0, command=self.reduce, accelerator='Ctrl-r' + ) + rulemenu.add_separator() + rulemenu.add_command( + label='Undo', underline=0, command=self.undo, accelerator='Ctrl-u' + ) + menubar.add_cascade(label='Apply', underline=0, menu=rulemenu) + + viewmenu = Menu(menubar, tearoff=0) + viewmenu.add_checkbutton( + label="Show Grammar", + underline=0, + variable=self._show_grammar, + command=self._toggle_grammar, + ) + viewmenu.add_separator() + viewmenu.add_radiobutton( + label='Tiny', + variable=self._size, + underline=0, + value=10, + command=self.resize, + ) + viewmenu.add_radiobutton( + label='Small', + variable=self._size, + underline=0, + value=12, + command=self.resize, + ) + viewmenu.add_radiobutton( + label='Medium', + variable=self._size, + underline=0, + value=14, + command=self.resize, + ) + viewmenu.add_radiobutton( + label='Large', + variable=self._size, + underline=0, + value=18, + command=self.resize, + ) + viewmenu.add_radiobutton( + label='Huge', + variable=self._size, + underline=0, + value=24, + command=self.resize, + ) + menubar.add_cascade(label='View', underline=0, menu=viewmenu) + + animatemenu = Menu(menubar, tearoff=0) + animatemenu.add_radiobutton( + label="No Animation", underline=0, variable=self._animate, value=0 + ) + animatemenu.add_radiobutton( + label="Slow Animation", + underline=0, + variable=self._animate, + value=20, + accelerator='-', + ) + animatemenu.add_radiobutton( + label="Normal Animation", + underline=0, + variable=self._animate, + value=10, + accelerator='=', + ) + animatemenu.add_radiobutton( + label="Fast Animation", + underline=0, + variable=self._animate, + value=4, + accelerator='+', + ) + menubar.add_cascade(label="Animate", underline=1, menu=animatemenu) + + helpmenu = Menu(menubar, tearoff=0) + helpmenu.add_command(label='About', underline=0, command=self.about) + helpmenu.add_command( + label='Instructions', underline=0, command=self.help, accelerator='F1' + ) + menubar.add_cascade(label='Help', underline=0, menu=helpmenu) + + parent.config(menu=menubar) + + def _init_feedback(self, parent): + self._feedbackframe = feedbackframe = Frame(parent) + feedbackframe.pack(fill='x', side='bottom', padx=3, pady=3) + self._lastoper_label = Label( + feedbackframe, text='Last Operation:', font=self._font + ) + self._lastoper_label.pack(side='left') + lastoperframe = Frame(feedbackframe, relief='sunken', border=1) + lastoperframe.pack(fill='x', side='right', expand=1, padx=5) + self._lastoper1 = Label( + lastoperframe, foreground='#007070', background='#f0f0f0', font=self._font + ) + self._lastoper2 = Label( + lastoperframe, + anchor='w', + width=30, + foreground='#004040', + background='#f0f0f0', + font=self._font, + ) + self._lastoper1.pack(side='left') + self._lastoper2.pack(side='left', fill='x', expand=1) + + def _init_canvas(self, parent): + self._cframe = CanvasFrame( + parent, + background='white', + width=525, + closeenough=10, + border=2, + relief='sunken', + ) + self._cframe.pack(expand=1, fill='both', side='top', pady=2) + canvas = self._canvas = self._cframe.canvas() + + self._stackwidgets = [] + self._rtextwidgets = [] + self._titlebar = canvas.create_rectangle( + 0, 0, 0, 0, fill='#c0f0f0', outline='black' + ) + self._exprline = canvas.create_line(0, 0, 0, 0, dash='.') + self._stacktop = canvas.create_line(0, 0, 0, 0, fill='#408080') + size = self._size.get() + 4 + self._stacklabel = TextWidget( + canvas, 'Stack', color='#004040', font=self._boldfont + ) + self._rtextlabel = TextWidget( + canvas, 'Remaining Text', color='#004040', font=self._boldfont + ) + self._cframe.add_widget(self._stacklabel) + self._cframe.add_widget(self._rtextlabel) + + ######################################### + ## Main draw procedure + ######################################### + + def _redraw(self): + scrollregion = self._canvas['scrollregion'].split() + (cx1, cy1, cx2, cy2) = [int(c) for c in scrollregion] + + # Delete the old stack & rtext widgets. + for stackwidget in self._stackwidgets: + self._cframe.destroy_widget(stackwidget) + self._stackwidgets = [] + for rtextwidget in self._rtextwidgets: + self._cframe.destroy_widget(rtextwidget) + self._rtextwidgets = [] + + # Position the titlebar & exprline + (x1, y1, x2, y2) = self._stacklabel.bbox() + y = y2 - y1 + 10 + self._canvas.coords(self._titlebar, -5000, 0, 5000, y - 4) + self._canvas.coords(self._exprline, 0, y * 2 - 10, 5000, y * 2 - 10) + + # Position the titlebar labels.. + (x1, y1, x2, y2) = self._stacklabel.bbox() + self._stacklabel.move(5 - x1, 3 - y1) + (x1, y1, x2, y2) = self._rtextlabel.bbox() + self._rtextlabel.move(cx2 - x2 - 5, 3 - y1) + + # Draw the stack. + stackx = 5 + for tok in self._parser.stack(): + if isinstance(tok, Tree): + attribs = { + 'tree_color': '#4080a0', + 'tree_width': 2, + 'node_font': self._boldfont, + 'node_color': '#006060', + 'leaf_color': '#006060', + 'leaf_font': self._font, + } + widget = tree_to_treesegment(self._canvas, tok, **attribs) + widget.label()['color'] = '#000000' + else: + widget = TextWidget(self._canvas, tok, color='#000000', font=self._font) + widget.bind_click(self._popup_reduce) + self._stackwidgets.append(widget) + self._cframe.add_widget(widget, stackx, y) + stackx = widget.bbox()[2] + 10 + + # Draw the remaining text. + rtextwidth = 0 + for tok in self._parser.remaining_text(): + widget = TextWidget(self._canvas, tok, color='#000000', font=self._font) + self._rtextwidgets.append(widget) + self._cframe.add_widget(widget, rtextwidth, y) + rtextwidth = widget.bbox()[2] + 4 + + # Allow enough room to shift the next token (for animations) + if len(self._rtextwidgets) > 0: + stackx += self._rtextwidgets[0].width() + + # Move the remaining text to the correct location (keep it + # right-justified, when possible); and move the remaining text + # label, if necessary. + stackx = max(stackx, self._stacklabel.width() + 25) + rlabelwidth = self._rtextlabel.width() + 10 + if stackx >= cx2 - max(rtextwidth, rlabelwidth): + cx2 = stackx + max(rtextwidth, rlabelwidth) + for rtextwidget in self._rtextwidgets: + rtextwidget.move(4 + cx2 - rtextwidth, 0) + self._rtextlabel.move(cx2 - self._rtextlabel.bbox()[2] - 5, 0) + + midx = (stackx + cx2 - max(rtextwidth, rlabelwidth)) / 2 + self._canvas.coords(self._stacktop, midx, 0, midx, 5000) + (x1, y1, x2, y2) = self._stacklabel.bbox() + + # Set up binding to allow them to shift a token by dragging it. + if len(self._rtextwidgets) > 0: + + def drag_shift(widget, midx=midx, self=self): + if widget.bbox()[0] < midx: + self.shift() + else: + self._redraw() + + self._rtextwidgets[0].bind_drag(drag_shift) + self._rtextwidgets[0].bind_click(self.shift) + + # Draw the stack top. + self._highlight_productions() + + def _draw_stack_top(self, widget): + # hack.. + midx = widget.bbox()[2] + 50 + self._canvas.coords(self._stacktop, midx, 0, midx, 5000) + + def _highlight_productions(self): + # Highlight the productions that can be reduced. + self._prodlist.selection_clear(0, 'end') + for prod in self._parser.reducible_productions(): + index = self._productions.index(prod) + self._prodlist.selection_set(index) + + ######################################### + ## Button Callbacks + ######################################### + + def destroy(self, *e): + if self._top is None: + return + self._top.destroy() + self._top = None + + def reset(self, *e): + self._parser.initialize(self._sent) + self._lastoper1['text'] = 'Reset App' + self._lastoper2['text'] = '' + self._redraw() + + def step(self, *e): + if self.reduce(): + return True + elif self.shift(): + return True + else: + if list(self._parser.parses()): + self._lastoper1['text'] = 'Finished:' + self._lastoper2['text'] = 'Success' + else: + self._lastoper1['text'] = 'Finished:' + self._lastoper2['text'] = 'Failure' + + def shift(self, *e): + if self._animating_lock: + return + if self._parser.shift(): + tok = self._parser.stack()[-1] + self._lastoper1['text'] = 'Shift:' + self._lastoper2['text'] = '%r' % tok + if self._animate.get(): + self._animate_shift() + else: + self._redraw() + return True + return False + + def reduce(self, *e): + if self._animating_lock: + return + production = self._parser.reduce() + if production: + self._lastoper1['text'] = 'Reduce:' + self._lastoper2['text'] = '%s' % production + if self._animate.get(): + self._animate_reduce() + else: + self._redraw() + return production + + def undo(self, *e): + if self._animating_lock: + return + if self._parser.undo(): + self._redraw() + + def postscript(self, *e): + self._cframe.print_to_file() + + def mainloop(self, *args, **kwargs): + """ + Enter the Tkinter mainloop. This function must be called if + this demo is created from a non-interactive program (e.g. + from a secript); otherwise, the demo will close as soon as + the script completes. + """ + if in_idle(): + return + self._top.mainloop(*args, **kwargs) + + ######################################### + ## Menubar callbacks + ######################################### + + def resize(self, size=None): + if size is not None: + self._size.set(size) + size = self._size.get() + self._font.configure(size=-(abs(size))) + self._boldfont.configure(size=-(abs(size))) + self._sysfont.configure(size=-(abs(size))) + + # self._stacklabel['font'] = ('helvetica', -size-4, 'bold') + # self._rtextlabel['font'] = ('helvetica', -size-4, 'bold') + # self._lastoper_label['font'] = ('helvetica', -size) + # self._lastoper1['font'] = ('helvetica', -size) + # self._lastoper2['font'] = ('helvetica', -size) + # self._prodlist['font'] = ('helvetica', -size) + # self._prodlist_label['font'] = ('helvetica', -size-2, 'bold') + self._redraw() + + def help(self, *e): + # The default font's not very legible; try using 'fixed' instead. + try: + ShowText( + self._top, + 'Help: Shift-Reduce Parser Application', + (__doc__ or '').strip(), + width=75, + font='fixed', + ) + except: + ShowText( + self._top, + 'Help: Shift-Reduce Parser Application', + (__doc__ or '').strip(), + width=75, + ) + + def about(self, *e): + ABOUT = "NLTK Shift-Reduce Parser Application\n" + "Written by Edward Loper" + TITLE = 'About: Shift-Reduce Parser Application' + try: + from six.moves.tkinter_messagebox import Message + + Message(message=ABOUT, title=TITLE).show() + except: + ShowText(self._top, TITLE, ABOUT) + + def edit_grammar(self, *e): + CFGEditor(self._top, self._parser.grammar(), self.set_grammar) + + def set_grammar(self, grammar): + self._parser.set_grammar(grammar) + self._productions = list(grammar.productions()) + self._prodlist.delete(0, 'end') + for production in self._productions: + self._prodlist.insert('end', (' %s' % production)) + + def edit_sentence(self, *e): + sentence = " ".join(self._sent) + title = 'Edit Text' + instr = 'Enter a new sentence to parse.' + EntryDialog(self._top, sentence, instr, self.set_sentence, title) + + def set_sentence(self, sent): + self._sent = sent.split() # [XX] use tagged? + self.reset() + + ######################################### + ## Reduce Production Selection + ######################################### + + def _toggle_grammar(self, *e): + if self._show_grammar.get(): + self._prodframe.pack( + fill='both', side='left', padx=2, after=self._feedbackframe + ) + self._lastoper1['text'] = 'Show Grammar' + else: + self._prodframe.pack_forget() + self._lastoper1['text'] = 'Hide Grammar' + self._lastoper2['text'] = '' + + def _prodlist_select(self, event): + selection = self._prodlist.curselection() + if len(selection) != 1: + return + index = int(selection[0]) + production = self._parser.reduce(self._productions[index]) + if production: + self._lastoper1['text'] = 'Reduce:' + self._lastoper2['text'] = '%s' % production + if self._animate.get(): + self._animate_reduce() + else: + self._redraw() + else: + # Reset the production selections. + self._prodlist.selection_clear(0, 'end') + for prod in self._parser.reducible_productions(): + index = self._productions.index(prod) + self._prodlist.selection_set(index) + + def _popup_reduce(self, widget): + # Remove old commands. + productions = self._parser.reducible_productions() + if len(productions) == 0: + return + + self._reduce_menu.delete(0, 'end') + for production in productions: + self._reduce_menu.add_command(label=str(production), command=self.reduce) + self._reduce_menu.post( + self._canvas.winfo_pointerx(), self._canvas.winfo_pointery() + ) + + ######################################### + ## Animations + ######################################### + + def _animate_shift(self): + # What widget are we shifting? + widget = self._rtextwidgets[0] + + # Where are we shifting from & to? + right = widget.bbox()[0] + if len(self._stackwidgets) == 0: + left = 5 + else: + left = self._stackwidgets[-1].bbox()[2] + 10 + + # Start animating. + dt = self._animate.get() + dx = (left - right) * 1.0 / dt + self._animate_shift_frame(dt, widget, dx) + + def _animate_shift_frame(self, frame, widget, dx): + if frame > 0: + self._animating_lock = 1 + widget.move(dx, 0) + self._top.after(10, self._animate_shift_frame, frame - 1, widget, dx) + else: + # but: stacktop?? + + # Shift the widget to the stack. + del self._rtextwidgets[0] + self._stackwidgets.append(widget) + self._animating_lock = 0 + + # Display the available productions. + self._draw_stack_top(widget) + self._highlight_productions() + + def _animate_reduce(self): + # What widgets are we shifting? + numwidgets = len(self._parser.stack()[-1]) # number of children + widgets = self._stackwidgets[-numwidgets:] + + # How far are we moving? + if isinstance(widgets[0], TreeSegmentWidget): + ydist = 15 + widgets[0].label().height() + else: + ydist = 15 + widgets[0].height() + + # Start animating. + dt = self._animate.get() + dy = ydist * 2.0 / dt + self._animate_reduce_frame(dt / 2, widgets, dy) + + def _animate_reduce_frame(self, frame, widgets, dy): + if frame > 0: + self._animating_lock = 1 + for widget in widgets: + widget.move(0, dy) + self._top.after(10, self._animate_reduce_frame, frame - 1, widgets, dy) + else: + del self._stackwidgets[-len(widgets) :] + for widget in widgets: + self._cframe.remove_widget(widget) + tok = self._parser.stack()[-1] + if not isinstance(tok, Tree): + raise ValueError() + label = TextWidget( + self._canvas, str(tok.label()), color='#006060', font=self._boldfont + ) + widget = TreeSegmentWidget(self._canvas, label, widgets, width=2) + (x1, y1, x2, y2) = self._stacklabel.bbox() + y = y2 - y1 + 10 + if not self._stackwidgets: + x = 5 + else: + x = self._stackwidgets[-1].bbox()[2] + 10 + self._cframe.add_widget(widget, x, y) + self._stackwidgets.append(widget) + + # Display the available productions. + self._draw_stack_top(widget) + self._highlight_productions() + + # # Delete the old widgets.. + # del self._stackwidgets[-len(widgets):] + # for widget in widgets: + # self._cframe.destroy_widget(widget) + # + # # Make a new one. + # tok = self._parser.stack()[-1] + # if isinstance(tok, Tree): + # attribs = {'tree_color': '#4080a0', 'tree_width': 2, + # 'node_font': bold, 'node_color': '#006060', + # 'leaf_color': '#006060', 'leaf_font':self._font} + # widget = tree_to_treesegment(self._canvas, tok.type(), + # **attribs) + # widget.node()['color'] = '#000000' + # else: + # widget = TextWidget(self._canvas, tok.type(), + # color='#000000', font=self._font) + # widget.bind_click(self._popup_reduce) + # (x1, y1, x2, y2) = self._stacklabel.bbox() + # y = y2-y1+10 + # if not self._stackwidgets: x = 5 + # else: x = self._stackwidgets[-1].bbox()[2] + 10 + # self._cframe.add_widget(widget, x, y) + # self._stackwidgets.append(widget) + + # self._redraw() + self._animating_lock = 0 + + ######################################### + ## Hovering. + ######################################### + + def _highlight_hover(self, event): + # What production are we hovering over? + index = self._prodlist.nearest(event.y) + if self._hover == index: + return + + # Clear any previous hover highlighting. + self._clear_hover() + + # If the production corresponds to an available reduction, + # highlight the stack. + selection = [int(s) for s in self._prodlist.curselection()] + if index in selection: + rhslen = len(self._productions[index].rhs()) + for stackwidget in self._stackwidgets[-rhslen:]: + if isinstance(stackwidget, TreeSegmentWidget): + stackwidget.label()['color'] = '#00a000' + else: + stackwidget['color'] = '#00a000' + + # Remember what production we're hovering over. + self._hover = index + + def _clear_hover(self, *event): + # Clear any previous hover highlighting. + if self._hover == -1: + return + self._hover = -1 + for stackwidget in self._stackwidgets: + if isinstance(stackwidget, TreeSegmentWidget): + stackwidget.label()['color'] = 'black' + else: + stackwidget['color'] = 'black' + + +def app(): + """ + Create a shift reduce parser app, using a simple grammar and + text. + """ + + from nltk.grammar import Nonterminal, Production, CFG + + nonterminals = 'S VP NP PP P N Name V Det' + (S, VP, NP, PP, P, N, Name, V, Det) = [Nonterminal(s) for s in nonterminals.split()] + + productions = ( + # Syntactic Productions + Production(S, [NP, VP]), + Production(NP, [Det, N]), + Production(NP, [NP, PP]), + Production(VP, [VP, PP]), + Production(VP, [V, NP, PP]), + Production(VP, [V, NP]), + Production(PP, [P, NP]), + # Lexical Productions + Production(NP, ['I']), + Production(Det, ['the']), + Production(Det, ['a']), + Production(N, ['man']), + Production(V, ['saw']), + Production(P, ['in']), + Production(P, ['with']), + Production(N, ['park']), + Production(N, ['dog']), + Production(N, ['statue']), + Production(Det, ['my']), + ) + + grammar = CFG(S, productions) + + # tokenize the sentence + sent = 'my dog saw a man in the park with a statue'.split() + + ShiftReduceApp(grammar, sent).mainloop() + + +if __name__ == '__main__': + app() + +__all__ = ['app'] diff --git a/venv.bak/lib/python3.7/site-packages/nltk/app/wordfreq_app.py b/venv.bak/lib/python3.7/site-packages/nltk/app/wordfreq_app.py new file mode 100644 index 0000000..52c7c66 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/app/wordfreq_app.py @@ -0,0 +1,35 @@ +# Natural Language Toolkit: Wordfreq Application +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Sumukh Ghodke +# URL: +# For license information, see LICENSE.TXT + +from matplotlib import pylab +from nltk.text import Text +from nltk.corpus import gutenberg + + +def plot_word_freq_dist(text): + fd = text.vocab() + + samples = [item for item, _ in fd.most_common(50)] + values = [fd[sample] for sample in samples] + values = [sum(values[: i + 1]) * 100.0 / fd.N() for i in range(len(values))] + pylab.title(text.name) + pylab.xlabel("Samples") + pylab.ylabel("Cumulative Percentage") + pylab.plot(values) + pylab.xticks(range(len(samples)), [str(s) for s in samples], rotation=90) + pylab.show() + + +def app(): + t1 = Text(gutenberg.words('melville-moby_dick.txt')) + plot_word_freq_dist(t1) + + +if __name__ == '__main__': + app() + +__all__ = ['app'] diff --git a/venv.bak/lib/python3.7/site-packages/nltk/app/wordnet_app.py b/venv.bak/lib/python3.7/site-packages/nltk/app/wordnet_app.py new file mode 100644 index 0000000..9854955 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/app/wordnet_app.py @@ -0,0 +1,1009 @@ +# Natural Language Toolkit: WordNet Browser Application +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Jussi Salmela +# Paul Bone +# URL: +# For license information, see LICENSE.TXT + +""" +A WordNet Browser application which launches the default browser +(if it is not already running) and opens a new tab with a connection +to http://localhost:port/ . It also starts an HTTP server on the +specified port and begins serving browser requests. The default +port is 8000. (For command-line help, run "python wordnet -h") +This application requires that the user's web browser supports +Javascript. + +BrowServer is a server for browsing the NLTK Wordnet database It first +launches a browser client to be used for browsing and then starts +serving the requests of that and maybe other clients + +Usage:: + + browserver.py -h + browserver.py [-s] [-p ] + +Options:: + + -h or --help + Display this help message. + + -l or --log-file + Logs messages to the given file, If this option is not specified + messages are silently dropped. + + -p or --port + Run the web server on this TCP port, defaults to 8000. + + -s or --server-mode + Do not start a web browser, and do not allow a user to + shotdown the server through the web interface. +""" +# TODO: throughout this package variable names and docstrings need +# modifying to be compliant with NLTK's coding standards. Tests also +# need to be develop to ensure this continues to work in the face of +# changes to other NLTK packages. +from __future__ import print_function + +# Allow this program to run inside the NLTK source tree. +from sys import path + +import os +import sys +from sys import argv +from collections import defaultdict +import webbrowser +import datetime +import re +import threading +import time +import getopt +import base64 +import pickle +import copy + +from six.moves.urllib.parse import unquote_plus + +from nltk import compat +from nltk.corpus import wordnet as wn +from nltk.corpus.reader.wordnet import Synset, Lemma + +if compat.PY3: + from http.server import HTTPServer, BaseHTTPRequestHandler +else: + from BaseHTTPServer import HTTPServer, BaseHTTPRequestHandler + +# now included in local file +# from util import html_header, html_trailer, \ +# get_static_index_page, get_static_page_by_path, \ +# page_from_word, page_from_href + +firstClient = True + +# True if we're not also running a web browser. The value f server_mode +# gets set by demo(). +server_mode = None + +# If set this is a file object for writting log messages. +logfile = None + + +class MyServerHandler(BaseHTTPRequestHandler): + def do_HEAD(self): + self.send_head() + + def do_GET(self): + global firstClient + sp = self.path[1:] + if unquote_plus(sp) == 'SHUTDOWN THE SERVER': + if server_mode: + page = "Server must be killed with SIGTERM." + type = "text/plain" + else: + print('Server shutting down!') + os._exit(0) + + elif sp == '': # First request. + type = 'text/html' + if not server_mode and firstClient: + firstClient = False + page = get_static_index_page(True) + else: + page = get_static_index_page(False) + word = 'green' + + elif sp.endswith('.html'): # Trying to fetch a HTML file TODO: + type = 'text/html' + usp = unquote_plus(sp) + if usp == 'NLTK Wordnet Browser Database Info.html': + word = '* Database Info *' + if os.path.isfile(usp): + with open(usp, 'r') as infile: + page = infile.read() + else: + page = ( + (html_header % word) + '

The database info file:' + '

' + + usp + + '' + + '

was not found. Run this:' + + '

python dbinfo_html.py' + + '

to produce it.' + + html_trailer + ) + else: + # Handle files here. + word = sp + page = get_static_page_by_path(usp) + elif sp.startswith("search"): + # This doesn't seem to work with MWEs. + type = 'text/html' + parts = (sp.split("?")[1]).split("&") + word = [ + p.split("=")[1].replace("+", " ") + for p in parts + if p.startswith("nextWord") + ][0] + page, word = page_from_word(word) + elif sp.startswith("lookup_"): + # TODO add a variation of this that takes a non ecoded word or MWE. + type = 'text/html' + sp = sp[len("lookup_") :] + page, word = page_from_href(sp) + elif sp == "start_page": + # if this is the first request we should display help + # information, and possibly set a default word. + type = 'text/html' + page, word = page_from_word("wordnet") + else: + type = 'text/plain' + page = "Could not parse request: '%s'" % sp + + # Send result. + self.send_head(type) + self.wfile.write(page.encode('utf8')) + + def send_head(self, type=None): + self.send_response(200) + self.send_header('Content-type', type) + self.end_headers() + + def log_message(self, format, *args): + global logfile + + if logfile: + logfile.write( + "%s - - [%s] %s\n" + % (self.address_string(), self.log_date_time_string(), format % args) + ) + + +def get_unique_counter_from_url(sp): + """ + Extract the unique counter from the URL if it has one. Otherwise return + null. + """ + pos = sp.rfind('%23') + if pos != -1: + return int(sp[(pos + 3) :]) + else: + return None + + +def wnb(port=8000, runBrowser=True, logfilename=None): + """ + Run NLTK Wordnet Browser Server. + + :param port: The port number for the server to listen on, defaults to + 8000 + :type port: int + + :param runBrowser: True to start a web browser and point it at the web + server. + :type runBrowser: bool + """ + # The webbrowser module is unpredictable, typically it blocks if it uses + # a console web browser, and doesn't block if it uses a GUI webbrowser, + # so we need to force it to have a clear correct behaviour. + # + # Normally the server should run for as long as the user wants. they + # should idealy be able to control this from the UI by closing the + # window or tab. Second best would be clicking a button to say + # 'Shutdown' that first shutsdown the server and closes the window or + # tab, or exits the text-mode browser. Both of these are unfreasable. + # + # The next best alternative is to start the server, have it close when + # it receives SIGTERM (default), and run the browser as well. The user + # may have to shutdown both programs. + # + # Since webbrowser may block, and the webserver will block, we must run + # them in separate threads. + # + global server_mode, logfile + server_mode = not runBrowser + + # Setup logging. + if logfilename: + try: + logfile = open(logfilename, "a", 1) # 1 means 'line buffering' + except IOError as e: + sys.stderr.write("Couldn't open %s for writing: %s", logfilename, e) + sys.exit(1) + else: + logfile = None + + # Compute URL and start web browser + url = 'http://localhost:' + str(port) + + server_ready = None + browser_thread = None + + if runBrowser: + server_ready = threading.Event() + browser_thread = startBrowser(url, server_ready) + + # Start the server. + server = HTTPServer(('', port), MyServerHandler) + if logfile: + logfile.write('NLTK Wordnet browser server running serving: %s\n' % url) + if runBrowser: + server_ready.set() + + try: + server.serve_forever() + except KeyboardInterrupt: + pass + + if runBrowser: + browser_thread.join() + + if logfile: + logfile.close() + + +def startBrowser(url, server_ready): + def run(): + server_ready.wait() + time.sleep(1) # Wait a little bit more, there's still the chance of + # a race condition. + webbrowser.open(url, new=2, autoraise=1) + + t = threading.Thread(target=run) + t.start() + return t + + +##################################################################### +# Utilities +##################################################################### + + +""" +WordNet Browser Utilities. + +This provides a backend to both wxbrowse and browserver.py. +""" + +################################################################################ +# +# Main logic for wordnet browser. +# + +# This is wrapped inside a function since wn is only available if the +# WordNet corpus is installed. +def _pos_tuples(): + return [ + (wn.NOUN, 'N', 'noun'), + (wn.VERB, 'V', 'verb'), + (wn.ADJ, 'J', 'adj'), + (wn.ADV, 'R', 'adv'), + ] + + +def _pos_match(pos_tuple): + """ + This function returns the complete pos tuple for the partial pos + tuple given to it. It attempts to match it against the first + non-null component of the given pos tuple. + """ + if pos_tuple[0] == 's': + pos_tuple = ('a', pos_tuple[1], pos_tuple[2]) + for n, x in enumerate(pos_tuple): + if x is not None: + break + for pt in _pos_tuples(): + if pt[n] == pos_tuple[n]: + return pt + return None + + +HYPONYM = 0 +HYPERNYM = 1 +CLASS_REGIONAL = 2 +PART_HOLONYM = 3 +PART_MERONYM = 4 +ATTRIBUTE = 5 +SUBSTANCE_HOLONYM = 6 +SUBSTANCE_MERONYM = 7 +MEMBER_HOLONYM = 8 +MEMBER_MERONYM = 9 +VERB_GROUP = 10 +INSTANCE_HYPONYM = 12 +INSTANCE_HYPERNYM = 13 +CAUSE = 14 +ALSO_SEE = 15 +SIMILAR = 16 +ENTAILMENT = 17 +ANTONYM = 18 +FRAMES = 19 +PERTAINYM = 20 + +CLASS_CATEGORY = 21 +CLASS_USAGE = 22 +CLASS_REGIONAL = 23 +CLASS_USAGE = 24 +CLASS_CATEGORY = 11 + +DERIVATIONALLY_RELATED_FORM = 25 + +INDIRECT_HYPERNYMS = 26 + + +def lemma_property(word, synset, func): + def flattern(l): + if l == []: + return [] + else: + return l[0] + flattern(l[1:]) + + return flattern([func(l) for l in synset.lemmas if l.name == word]) + + +def rebuild_tree(orig_tree): + node = orig_tree[0] + children = orig_tree[1:] + return (node, [rebuild_tree(t) for t in children]) + + +def get_relations_data(word, synset): + """ + Get synset relations data for a synset. Note that this doesn't + yet support things such as full hyponym vs direct hyponym. + """ + if synset.pos() == wn.NOUN: + return ( + (HYPONYM, 'Hyponyms', synset.hyponyms()), + (INSTANCE_HYPONYM, 'Instance hyponyms', synset.instance_hyponyms()), + (HYPERNYM, 'Direct hypernyms', synset.hypernyms()), + ( + INDIRECT_HYPERNYMS, + 'Indirect hypernyms', + rebuild_tree(synset.tree(lambda x: x.hypernyms()))[1], + ), + # hypernyms', 'Sister terms', + (INSTANCE_HYPERNYM, 'Instance hypernyms', synset.instance_hypernyms()), + # (CLASS_REGIONAL, ['domain term region'], ), + (PART_HOLONYM, 'Part holonyms', synset.part_holonyms()), + (PART_MERONYM, 'Part meronyms', synset.part_meronyms()), + (SUBSTANCE_HOLONYM, 'Substance holonyms', synset.substance_holonyms()), + (SUBSTANCE_MERONYM, 'Substance meronyms', synset.substance_meronyms()), + (MEMBER_HOLONYM, 'Member holonyms', synset.member_holonyms()), + (MEMBER_MERONYM, 'Member meronyms', synset.member_meronyms()), + (ATTRIBUTE, 'Attributes', synset.attributes()), + (ANTONYM, "Antonyms", lemma_property(word, synset, lambda l: l.antonyms())), + ( + DERIVATIONALLY_RELATED_FORM, + "Derivationally related form", + lemma_property( + word, synset, lambda l: l.derivationally_related_forms() + ), + ), + ) + elif synset.pos() == wn.VERB: + return ( + (ANTONYM, 'Antonym', lemma_property(word, synset, lambda l: l.antonyms())), + (HYPONYM, 'Hyponym', synset.hyponyms()), + (HYPERNYM, 'Direct hypernyms', synset.hypernyms()), + ( + INDIRECT_HYPERNYMS, + 'Indirect hypernyms', + rebuild_tree(synset.tree(lambda x: x.hypernyms()))[1], + ), + (ENTAILMENT, 'Entailments', synset.entailments()), + (CAUSE, 'Causes', synset.causes()), + (ALSO_SEE, 'Also see', synset.also_sees()), + (VERB_GROUP, 'Verb Groups', synset.verb_groups()), + ( + DERIVATIONALLY_RELATED_FORM, + "Derivationally related form", + lemma_property( + word, synset, lambda l: l.derivationally_related_forms() + ), + ), + ) + elif synset.pos() == wn.ADJ or synset.pos == wn.ADJ_SAT: + return ( + (ANTONYM, 'Antonym', lemma_property(word, synset, lambda l: l.antonyms())), + (SIMILAR, 'Similar to', synset.similar_tos()), + # Participle of verb - not supported by corpus + ( + PERTAINYM, + 'Pertainyms', + lemma_property(word, synset, lambda l: l.pertainyms()), + ), + (ATTRIBUTE, 'Attributes', synset.attributes()), + (ALSO_SEE, 'Also see', synset.also_sees()), + ) + elif synset.pos() == wn.ADV: + # This is weird. adverbs such as 'quick' and 'fast' don't seem + # to have antonyms returned by the corpus.a + return ( + (ANTONYM, 'Antonym', lemma_property(word, synset, lambda l: l.antonyms())), + ) + # Derived from adjective - not supported by corpus + else: + raise TypeError("Unhandles synset POS type: " + str(synset.pos())) + + +html_header = ''' + + + + + +NLTK Wordnet Browser display of: %s + +''' +html_trailer = ''' + + +''' + +explanation = ''' +

Search Help

+
  • The display below the line is an example of the output the browser +shows you when you enter a search word. The search word was green.
  • +
  • The search result shows for different parts of speech the synsets +i.e. different meanings for the word.
  • +
  • All underlined texts are hypertext links. There are two types of links: +word links and others. Clicking a word link carries out a search for the word +in the Wordnet database.
  • +
  • Clicking a link of the other type opens a display section of data attached +to that link. Clicking that link a second time closes the section again.
  • +
  • Clicking S: opens a section showing the relations for that synset. +
  • +
  • Clicking on a relation name opens a section that displays the associated +synsets.
  • +
  • Type a search word in the Word field and start the search by the +Enter/Return key or click the Search button.
  • +
+
+''' + +# HTML oriented functions + + +def _bold(txt): + return '%s' % txt + + +def _center(txt): + return '
%s
' % txt + + +def _hlev(n, txt): + return '%s' % (n, txt, n) + + +def _italic(txt): + return '%s' % txt + + +def _li(txt): + return '
  • %s
  • ' % txt + + +def pg(word, body): + ''' + Return a HTML page of NLTK Browser format constructed from the + word and body + + :param word: The word that the body corresponds to + :type word: str + :param body: The HTML body corresponding to the word + :type body: str + :return: a HTML page for the word-body combination + :rtype: str + ''' + return (html_header % word) + body + html_trailer + + +def _ul(txt): + return '
      ' + txt + '
    ' + + +def _abbc(txt): + """ + abbc = asterisks, breaks, bold, center + """ + return _center(_bold('
    ' * 10 + '*' * 10 + ' ' + txt + ' ' + '*' * 10)) + + +full_hyponym_cont_text = _ul(_li(_italic('(has full hyponym continuation)'))) + '\n' + + +def _get_synset(synset_key): + """ + The synset key is the unique name of the synset, this can be + retrived via synset.name() + """ + return wn.synset(synset_key) + + +def _collect_one_synset(word, synset, synset_relations): + ''' + Returns the HTML string for one synset or word + + :param word: the current word + :type word: str + :param synset: a synset + :type synset: synset + :param synset_relations: information about which synset relations + to display. + :type synset_relations: dict(synset_key, set(relation_id)) + :return: The HTML string built for this synset + :rtype: str + ''' + if isinstance(synset, tuple): # It's a word + raise NotImplementedError("word not supported by _collect_one_synset") + + typ = 'S' + pos_tuple = _pos_match((synset.pos(), None, None)) + assert pos_tuple is not None, "pos_tuple is null: synset.pos(): %s" % synset.pos() + descr = pos_tuple[2] + ref = copy.deepcopy(Reference(word, synset_relations)) + ref.toggle_synset(synset) + synset_label = typ + ";" + if synset.name() in synset_relations: + synset_label = _bold(synset_label) + s = '
  • %s (%s) ' % (make_lookup_link(ref, synset_label), descr) + + def format_lemma(w): + w = w.replace('_', ' ') + if w.lower() == word: + return _bold(w) + else: + ref = Reference(w) + return make_lookup_link(ref, w) + + s += ', '.join(format_lemma(l.name()) for l in synset.lemmas()) + + gl = " (%s) %s " % ( + synset.definition(), + "; ".join("\"%s\"" % e for e in synset.examples()), + ) + return s + gl + _synset_relations(word, synset, synset_relations) + '
  • \n' + + +def _collect_all_synsets(word, pos, synset_relations=dict()): + """ + Return a HTML unordered list of synsets for the given word and + part of speech. + """ + return '
      %s\n
    \n' % ''.join( + ( + _collect_one_synset(word, synset, synset_relations) + for synset in wn.synsets(word, pos) + ) + ) + + +def _synset_relations(word, synset, synset_relations): + ''' + Builds the HTML string for the relations of a synset + + :param word: The current word + :type word: str + :param synset: The synset for which we're building the relations. + :type synset: Synset + :param synset_relations: synset keys and relation types for which to display relations. + :type synset_relations: dict(synset_key, set(relation_type)) + :return: The HTML for a synset's relations + :rtype: str + ''' + + if not synset.name() in synset_relations: + return "" + ref = Reference(word, synset_relations) + + def relation_html(r): + if isinstance(r, Synset): + return make_lookup_link(Reference(r.lemma_names()[0]), r.lemma_names()[0]) + elif isinstance(r, Lemma): + return relation_html(r.synset()) + elif isinstance(r, tuple): + # It's probably a tuple containing a Synset and a list of + # similar tuples. This forms a tree of synsets. + return "%s\n
      %s
    \n" % ( + relation_html(r[0]), + ''.join('
  • %s
  • \n' % relation_html(sr) for sr in r[1]), + ) + else: + raise TypeError( + "r must be a synset, lemma or list, it was: type(r) = %s, r = %s" + % (type(r), r) + ) + + def make_synset_html(db_name, disp_name, rels): + synset_html = '%s\n' % make_lookup_link( + copy.deepcopy(ref).toggle_synset_relation(synset, db_name).encode(), + disp_name, + ) + + if db_name in ref.synset_relations[synset.name()]: + synset_html += '
      %s
    \n' % ''.join( + "
  • %s
  • \n" % relation_html(r) for r in rels + ) + + return synset_html + + html = ( + '
      ' + + '\n'.join( + ( + "
    • %s
    • " % make_synset_html(*rel_data) + for rel_data in get_relations_data(word, synset) + if rel_data[2] != [] + ) + ) + + '
    ' + ) + + return html + + +class Reference(object): + """ + A reference to a page that may be generated by page_word + """ + + def __init__(self, word, synset_relations=dict()): + """ + Build a reference to a new page. + + word is the word or words (separated by commas) for which to + search for synsets of + + synset_relations is a dictionary of synset keys to sets of + synset relation identifaiers to unfold a list of synset + relations for. + """ + self.word = word + self.synset_relations = synset_relations + + def encode(self): + """ + Encode this reference into a string to be used in a URL. + """ + # This uses a tuple rather than an object since the python + # pickle representation is much smaller and there is no need + # to represent the complete object. + string = pickle.dumps((self.word, self.synset_relations), -1) + return base64.urlsafe_b64encode(string).decode() + + @staticmethod + def decode(string): + """ + Decode a reference encoded with Reference.encode + """ + string = base64.urlsafe_b64decode(string.encode()) + word, synset_relations = pickle.loads(string) + return Reference(word, synset_relations) + + def toggle_synset_relation(self, synset, relation): + """ + Toggle the display of the relations for the given synset and + relation type. + + This function will throw a KeyError if the synset is currently + not being displayed. + """ + if relation in self.synset_relations[synset.name()]: + self.synset_relations[synset.name()].remove(relation) + else: + self.synset_relations[synset.name()].add(relation) + + return self + + def toggle_synset(self, synset): + """ + Toggle displaying of the relation types for the given synset + """ + if synset.name() in self.synset_relations: + del self.synset_relations[synset.name()] + else: + self.synset_relations[synset.name()] = set() + + return self + + +def make_lookup_link(ref, label): + return '%s' % (ref.encode(), label) + + +def page_from_word(word): + """ + Return a HTML page for the given word. + + :type word: str + :param word: The currently active word + :return: A tuple (page,word), where page is the new current HTML page + to be sent to the browser and + word is the new current word + :rtype: A tuple (str,str) + """ + return page_from_reference(Reference(word)) + + +def page_from_href(href): + ''' + Returns a tuple of the HTML page built and the new current word + + :param href: The hypertext reference to be solved + :type href: str + :return: A tuple (page,word), where page is the new current HTML page + to be sent to the browser and + word is the new current word + :rtype: A tuple (str,str) + ''' + return page_from_reference(Reference.decode(href)) + + +def page_from_reference(href): + ''' + Returns a tuple of the HTML page built and the new current word + + :param href: The hypertext reference to be solved + :type href: str + :return: A tuple (page,word), where page is the new current HTML page + to be sent to the browser and + word is the new current word + :rtype: A tuple (str,str) + ''' + word = href.word + pos_forms = defaultdict(list) + words = word.split(',') + words = [w for w in [w.strip().lower().replace(' ', '_') for w in words] if w != ""] + if len(words) == 0: + # No words were found. + return "", "Please specify a word to search for." + + # This looks up multiple words at once. This is probably not + # necessary and may lead to problems. + for w in words: + for pos in [wn.NOUN, wn.VERB, wn.ADJ, wn.ADV]: + form = wn.morphy(w, pos) + if form and form not in pos_forms[pos]: + pos_forms[pos].append(form) + body = '' + for pos, pos_str, name in _pos_tuples(): + if pos in pos_forms: + body += _hlev(3, name) + '\n' + for w in pos_forms[pos]: + # Not all words of exc files are in the database, skip + # to the next word if a KeyError is raised. + try: + body += _collect_all_synsets(w, pos, href.synset_relations) + except KeyError: + pass + if not body: + body = "The word or words '%s' where not found in the dictonary." % word + return body, word + + +##################################################################### +# Static pages +##################################################################### + + +def get_static_page_by_path(path): + """ + Return a static HTML page from the path given. + """ + if path == "index_2.html": + return get_static_index_page(False) + elif path == "index.html": + return get_static_index_page(True) + elif path == "NLTK Wordnet Browser Database Info.html": + return "Display of Wordnet Database Statistics is not supported" + elif path == "upper_2.html": + return get_static_upper_page(False) + elif path == "upper.html": + return get_static_upper_page(True) + elif path == "web_help.html": + return get_static_web_help_page() + elif path == "wx_help.html": + return get_static_wx_help_page() + else: + return "Internal error: Path for static page '%s' is unknown" % path + + +def get_static_web_help_page(): + """ + Return the static web help page. + """ + return """ + + + + + + NLTK Wordnet Browser display of: * Help * + + +

    NLTK Wordnet Browser Help

    +

    The NLTK Wordnet Browser is a tool to use in browsing the Wordnet database. It tries to behave like the Wordnet project's web browser but the difference is that the NLTK Wordnet Browser uses a local Wordnet database. +

    You are using the Javascript client part of the NLTK Wordnet BrowseServer. We assume your browser is in tab sheets enabled mode.

    +

    For background information on Wordnet, see the Wordnet project home page: http://wordnet.princeton.edu/. For more information on the NLTK project, see the project home: +http://nltk.sourceforge.net/. To get an idea of what the Wordnet version used by this browser includes choose Show Database Info from the View submenu.

    +

    Word search

    +

    The word to be searched is typed into the New Word field and the search started with Enter or by clicking the Search button. There is no uppercase/lowercase distinction: the search word is transformed to lowercase before the search.

    +

    In addition, the word does not have to be in base form. The browser tries to find the possible base form(s) by making certain morphological substitutions. Typing fLIeS as an obscure example gives one this. Click the previous link to see what this kind of search looks like and then come back to this page by using the Alt+LeftArrow key combination.

    +

    The result of a search is a display of one or more +synsets for every part of speech in which a form of the +search word was found to occur. A synset is a set of words +having the same sense or meaning. Each word in a synset that is +underlined is a hyperlink which can be clicked to trigger an +automatic search for that word.

    +

    Every synset has a hyperlink S: at the start of its +display line. Clicking that symbol shows you the name of every +relation that this synset is part of. Every relation name is a hyperlink that opens up a display for that relation. Clicking it another time closes the display again. Clicking another relation name on a line that has an opened relation closes the open relation and opens the clicked relation.

    +

    It is also possible to give two or more words or collocations to be searched at the same time separating them with a comma like this cheer up,clear up, for example. Click the previous link to see what this kind of search looks like and then come back to this page by using the Alt+LeftArrow key combination. As you could see the search result includes the synsets found in the same order than the forms were given in the search field.

    +

    +There are also word level (lexical) relations recorded in the Wordnet database. Opening this kind of relation displays lines with a hyperlink W: at their beginning. Clicking this link shows more info on the word in question.

    +

    The Buttons

    +

    The Search and Help buttons need no more explanation.

    +

    The Show Database Info button shows a collection of Wordnet database statistics.

    +

    The Shutdown the Server button is shown for the first client of the BrowServer program i.e. for the client that is automatically launched when the BrowServer is started but not for the succeeding clients in order to protect the server from accidental shutdowns. +

    + +""" + + +def get_static_welcome_message(): + """ + Get the static welcome page. + """ + return """ +

    Search Help

    +
    • The display below the line is an example of the output the browser +shows you when you enter a search word. The search word was green.
    • +
    • The search result shows for different parts of speech the synsets +i.e. different meanings for the word.
    • +
    • All underlined texts are hypertext links. There are two types of links: +word links and others. Clicking a word link carries out a search for the word +in the Wordnet database.
    • +
    • Clicking a link of the other type opens a display section of data attached +to that link. Clicking that link a second time closes the section again.
    • +
    • Clicking S: opens a section showing the relations for that synset.
    • +
    • Clicking on a relation name opens a section that displays the associated +synsets.
    • +
    • Type a search word in the Next Word field and start the search by the +Enter/Return key or click the Search button.
    • +
    +""" + + +def get_static_index_page(with_shutdown): + """ + Get the static index page. + """ + template = """ + + + + + NLTK Wordnet Browser + + + + + + + +""" + if with_shutdown: + upper_link = "upper.html" + else: + upper_link = "upper_2.html" + + return template % upper_link + + +def get_static_upper_page(with_shutdown): + """ + Return the upper frame page, + + If with_shutdown is True then a 'shutdown' button is also provided + to shutdown the server. + """ + template = """ + + + + + + Untitled Document + + +
    + Current Word:  + Next Word:  + +
    + Help + %s + + + +""" + if with_shutdown: + shutdown_link = "Shutdown" + else: + shutdown_link = "" + + return template % shutdown_link + + +def usage(): + """ + Display the command line help message. + """ + print(__doc__) + + +def app(): + # Parse and interpret options. + (opts, _) = getopt.getopt( + argv[1:], "l:p:sh", ["logfile=", "port=", "server-mode", "help"] + ) + port = 8000 + server_mode = False + help_mode = False + logfilename = None + for (opt, value) in opts: + if (opt == "-l") or (opt == "--logfile"): + logfilename = str(value) + elif (opt == "-p") or (opt == "--port"): + port = int(value) + elif (opt == "-s") or (opt == "--server-mode"): + server_mode = True + elif (opt == "-h") or (opt == "--help"): + help_mode = True + + if help_mode: + usage() + else: + wnb(port, not server_mode, logfilename) + + +if __name__ == '__main__': + app() + +__all__ = ['app'] diff --git a/venv.bak/lib/python3.7/site-packages/nltk/book.py b/venv.bak/lib/python3.7/site-packages/nltk/book.py new file mode 100644 index 0000000..e130ecd --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/book.py @@ -0,0 +1,214 @@ +# Natural Language Toolkit: Some texts for exploration in chapter 1 of the book +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Steven Bird +# +# URL: +# For license information, see LICENSE.TXT +from __future__ import print_function + +from nltk.corpus import ( + gutenberg, + genesis, + inaugural, + nps_chat, + webtext, + treebank, + wordnet, +) +from nltk.text import Text +from nltk.probability import FreqDist +from nltk.util import bigrams + +print("*** Introductory Examples for the NLTK Book ***") +print("Loading text1, ..., text9 and sent1, ..., sent9") +print("Type the name of the text or sentence to view it.") +print("Type: 'texts()' or 'sents()' to list the materials.") + +text1 = Text(gutenberg.words('melville-moby_dick.txt')) +print("text1:", text1.name) + +text2 = Text(gutenberg.words('austen-sense.txt')) +print("text2:", text2.name) + +text3 = Text(genesis.words('english-kjv.txt'), name="The Book of Genesis") +print("text3:", text3.name) + +text4 = Text(inaugural.words(), name="Inaugural Address Corpus") +print("text4:", text4.name) + +text5 = Text(nps_chat.words(), name="Chat Corpus") +print("text5:", text5.name) + +text6 = Text(webtext.words('grail.txt'), name="Monty Python and the Holy Grail") +print("text6:", text6.name) + +text7 = Text(treebank.words(), name="Wall Street Journal") +print("text7:", text7.name) + +text8 = Text(webtext.words('singles.txt'), name="Personals Corpus") +print("text8:", text8.name) + +text9 = Text(gutenberg.words('chesterton-thursday.txt')) +print("text9:", text9.name) + + +def texts(): + print("text1:", text1.name) + print("text2:", text2.name) + print("text3:", text3.name) + print("text4:", text4.name) + print("text5:", text5.name) + print("text6:", text6.name) + print("text7:", text7.name) + print("text8:", text8.name) + print("text9:", text9.name) + + +sent1 = ["Call", "me", "Ishmael", "."] +sent2 = [ + "The", + "family", + "of", + "Dashwood", + "had", + "long", + "been", + "settled", + "in", + "Sussex", + ".", +] +sent3 = [ + "In", + "the", + "beginning", + "God", + "created", + "the", + "heaven", + "and", + "the", + "earth", + ".", +] +sent4 = [ + "Fellow", + "-", + "Citizens", + "of", + "the", + "Senate", + "and", + "of", + "the", + "House", + "of", + "Representatives", + ":", +] +sent5 = [ + "I", + "have", + "a", + "problem", + "with", + "people", + "PMing", + "me", + "to", + "lol", + "JOIN", +] +sent6 = [ + 'SCENE', + '1', + ':', + '[', + 'wind', + ']', + '[', + 'clop', + 'clop', + 'clop', + ']', + 'KING', + 'ARTHUR', + ':', + 'Whoa', + 'there', + '!', +] +sent7 = [ + "Pierre", + "Vinken", + ",", + "61", + "years", + "old", + ",", + "will", + "join", + "the", + "board", + "as", + "a", + "nonexecutive", + "director", + "Nov.", + "29", + ".", +] +sent8 = [ + '25', + 'SEXY', + 'MALE', + ',', + 'seeks', + 'attrac', + 'older', + 'single', + 'lady', + ',', + 'for', + 'discreet', + 'encounters', + '.', +] +sent9 = [ + "THE", + "suburb", + "of", + "Saffron", + "Park", + "lay", + "on", + "the", + "sunset", + "side", + "of", + "London", + ",", + "as", + "red", + "and", + "ragged", + "as", + "a", + "cloud", + "of", + "sunset", + ".", +] + + +def sents(): + print("sent1:", " ".join(sent1)) + print("sent2:", " ".join(sent2)) + print("sent3:", " ".join(sent3)) + print("sent4:", " ".join(sent4)) + print("sent5:", " ".join(sent5)) + print("sent6:", " ".join(sent6)) + print("sent7:", " ".join(sent7)) + print("sent8:", " ".join(sent8)) + print("sent9:", " ".join(sent9)) diff --git a/venv.bak/lib/python3.7/site-packages/nltk/ccg/__init__.py b/venv.bak/lib/python3.7/site-packages/nltk/ccg/__init__.py new file mode 100644 index 0000000..40515aa --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/ccg/__init__.py @@ -0,0 +1,34 @@ +# Natural Language Toolkit: Combinatory Categorial Grammar +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Graeme Gange +# URL: +# For license information, see LICENSE.TXT + +""" +Combinatory Categorial Grammar. + +For more information see nltk/doc/contrib/ccg/ccg.pdf +""" + +from nltk.ccg.combinator import ( + UndirectedBinaryCombinator, + DirectedBinaryCombinator, + ForwardCombinator, + BackwardCombinator, + UndirectedFunctionApplication, + ForwardApplication, + BackwardApplication, + UndirectedComposition, + ForwardComposition, + BackwardComposition, + BackwardBx, + UndirectedSubstitution, + ForwardSubstitution, + BackwardSx, + UndirectedTypeRaise, + ForwardT, + BackwardT, +) +from nltk.ccg.chart import CCGEdge, CCGLeafEdge, CCGChartParser, CCGChart +from nltk.ccg.lexicon import CCGLexicon diff --git a/venv.bak/lib/python3.7/site-packages/nltk/ccg/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/ccg/__pycache__/__init__.cpython-37.pyc new file mode 100644 index 0000000..44d72bd Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/ccg/__pycache__/__init__.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/ccg/__pycache__/api.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/ccg/__pycache__/api.cpython-37.pyc new file mode 100644 index 0000000..00e025d Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/ccg/__pycache__/api.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/ccg/__pycache__/chart.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/ccg/__pycache__/chart.cpython-37.pyc new file mode 100644 index 0000000..c9dbb0e Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/ccg/__pycache__/chart.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/ccg/__pycache__/combinator.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/ccg/__pycache__/combinator.cpython-37.pyc new file mode 100644 index 0000000..ebfe456 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/ccg/__pycache__/combinator.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/ccg/__pycache__/lexicon.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/ccg/__pycache__/lexicon.cpython-37.pyc new file mode 100644 index 0000000..f44721c Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/ccg/__pycache__/lexicon.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/ccg/__pycache__/logic.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/ccg/__pycache__/logic.cpython-37.pyc new file mode 100644 index 0000000..6a6347c Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/ccg/__pycache__/logic.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/ccg/api.py b/venv.bak/lib/python3.7/site-packages/nltk/ccg/api.py new file mode 100644 index 0000000..7173ea0 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/ccg/api.py @@ -0,0 +1,366 @@ +# Natural Language Toolkit: CCG Categories +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Graeme Gange +# URL: +# For license information, see LICENSE.TXT +from __future__ import unicode_literals +from functools import total_ordering + +from abc import ABCMeta, abstractmethod +from six import add_metaclass + +from nltk.internals import raise_unorderable_types +from nltk.compat import python_2_unicode_compatible, unicode_repr + + +@add_metaclass(ABCMeta) +@total_ordering +class AbstractCCGCategory(object): + ''' + Interface for categories in combinatory grammars. + ''' + + @abstractmethod + def is_primitive(self): + """ + Returns true if the category is primitive. + """ + + @abstractmethod + def is_function(self): + """ + Returns true if the category is a function application. + """ + + @abstractmethod + def is_var(self): + """ + Returns true if the category is a variable. + """ + + @abstractmethod + def substitute(self, substitutions): + """ + Takes a set of (var, category) substitutions, and replaces every + occurrence of the variable with the corresponding category. + """ + + @abstractmethod + def can_unify(self, other): + """ + Determines whether two categories can be unified. + - Returns None if they cannot be unified + - Returns a list of necessary substitutions if they can. + """ + + # Utility functions: comparison, strings and hashing. + @abstractmethod + def __str__(self): + pass + + def __eq__(self, other): + return ( + self.__class__ is other.__class__ + and self._comparison_key == other._comparison_key + ) + + def __ne__(self, other): + return not self == other + + def __lt__(self, other): + if not isinstance(other, AbstractCCGCategory): + raise_unorderable_types("<", self, other) + if self.__class__ is other.__class__: + return self._comparison_key < other._comparison_key + else: + return self.__class__.__name__ < other.__class__.__name__ + + def __hash__(self): + try: + return self._hash + except AttributeError: + self._hash = hash(self._comparison_key) + return self._hash + + +@python_2_unicode_compatible +class CCGVar(AbstractCCGCategory): + ''' + Class representing a variable CCG category. + Used for conjunctions (and possibly type-raising, if implemented as a + unary rule). + ''' + + _maxID = 0 + + def __init__(self, prim_only=False): + """Initialize a variable (selects a new identifier) + + :param prim_only: a boolean that determines whether the variable is + restricted to primitives + :type prim_only: bool + """ + self._id = self.new_id() + self._prim_only = prim_only + self._comparison_key = self._id + + @classmethod + def new_id(cls): + """ + A class method allowing generation of unique variable identifiers. + """ + cls._maxID = cls._maxID + 1 + return cls._maxID - 1 + + @classmethod + def reset_id(cls): + cls._maxID = 0 + + def is_primitive(self): + return False + + def is_function(self): + return False + + def is_var(self): + return True + + def substitute(self, substitutions): + """If there is a substitution corresponding to this variable, + return the substituted category. + """ + for (var, cat) in substitutions: + if var == self: + return cat + return self + + def can_unify(self, other): + """ If the variable can be replaced with other + a substitution is returned. + """ + if other.is_primitive() or not self._prim_only: + return [(self, other)] + return None + + def id(self): + return self._id + + def __str__(self): + return "_var" + str(self._id) + + +@total_ordering +@python_2_unicode_compatible +class Direction(object): + ''' + Class representing the direction of a function application. + Also contains maintains information as to which combinators + may be used with the category. + ''' + + def __init__(self, dir, restrictions): + self._dir = dir + self._restrs = restrictions + self._comparison_key = (dir, tuple(restrictions)) + + # Testing the application direction + def is_forward(self): + return self._dir == '/' + + def is_backward(self): + return self._dir == '\\' + + def dir(self): + return self._dir + + def restrs(self): + """A list of restrictions on the combinators. + '.' denotes that permuting operations are disallowed + ',' denotes that function composition is disallowed + '_' denotes that the direction has variable restrictions. + (This is redundant in the current implementation of type-raising) + """ + return self._restrs + + def is_variable(self): + return self._restrs == '_' + + # Unification and substitution of variable directions. + # Used only if type-raising is implemented as a unary rule, as it + # must inherit restrictions from the argument category. + def can_unify(self, other): + if other.is_variable(): + return [('_', self.restrs())] + elif self.is_variable(): + return [('_', other.restrs())] + else: + if self.restrs() == other.restrs(): + return [] + return None + + def substitute(self, subs): + if not self.is_variable(): + return self + + for (var, restrs) in subs: + if var == '_': + return Direction(self._dir, restrs) + return self + + # Testing permitted combinators + def can_compose(self): + return ',' not in self._restrs + + def can_cross(self): + return '.' not in self._restrs + + def __eq__(self, other): + return ( + self.__class__ is other.__class__ + and self._comparison_key == other._comparison_key + ) + + def __ne__(self, other): + return not self == other + + def __lt__(self, other): + if not isinstance(other, Direction): + raise_unorderable_types("<", self, other) + if self.__class__ is other.__class__: + return self._comparison_key < other._comparison_key + else: + return self.__class__.__name__ < other.__class__.__name__ + + def __hash__(self): + try: + return self._hash + except AttributeError: + self._hash = hash(self._comparison_key) + return self._hash + + def __str__(self): + r_str = "" + for r in self._restrs: + r_str = r_str + "%s" % r + return "%s%s" % (self._dir, r_str) + + # The negation operator reverses the direction of the application + def __neg__(self): + if self._dir == '/': + return Direction('\\', self._restrs) + else: + return Direction('/', self._restrs) + + +@python_2_unicode_compatible +class PrimitiveCategory(AbstractCCGCategory): + ''' + Class representing primitive categories. + Takes a string representation of the category, and a + list of strings specifying the morphological subcategories. + ''' + + def __init__(self, categ, restrictions=[]): + self._categ = categ + self._restrs = restrictions + self._comparison_key = (categ, tuple(restrictions)) + + def is_primitive(self): + return True + + def is_function(self): + return False + + def is_var(self): + return False + + def restrs(self): + return self._restrs + + def categ(self): + return self._categ + + # Substitution does nothing to a primitive category + def substitute(self, subs): + return self + + # A primitive can be unified with a class of the same + # base category, given that the other category shares all + # of its subclasses, or with a variable. + def can_unify(self, other): + if not other.is_primitive(): + return None + if other.is_var(): + return [(other, self)] + if other.categ() == self.categ(): + for restr in self._restrs: + if restr not in other.restrs(): + return None + return [] + return None + + def __str__(self): + if self._restrs == []: + return "%s" % self._categ + restrictions = "[%s]" % ",".join(unicode_repr(r) for r in self._restrs) + return "%s%s" % (self._categ, restrictions) + + +@python_2_unicode_compatible +class FunctionalCategory(AbstractCCGCategory): + ''' + Class that represents a function application category. + Consists of argument and result categories, together with + an application direction. + ''' + + def __init__(self, res, arg, dir): + self._res = res + self._arg = arg + self._dir = dir + self._comparison_key = (arg, dir, res) + + def is_primitive(self): + return False + + def is_function(self): + return True + + def is_var(self): + return False + + # Substitution returns the category consisting of the + # substitution applied to each of its constituents. + def substitute(self, subs): + sub_res = self._res.substitute(subs) + sub_dir = self._dir.substitute(subs) + sub_arg = self._arg.substitute(subs) + return FunctionalCategory(sub_res, sub_arg, self._dir) + + # A function can unify with another function, so long as its + # constituents can unify, or with an unrestricted variable. + def can_unify(self, other): + if other.is_var(): + return [(other, self)] + if other.is_function(): + sa = self._res.can_unify(other.res()) + sd = self._dir.can_unify(other.dir()) + if sa is not None and sd is not None: + sb = self._arg.substitute(sa).can_unify(other.arg().substitute(sa)) + if sb is not None: + return sa + sb + return None + + # Constituent accessors + def arg(self): + return self._arg + + def res(self): + return self._res + + def dir(self): + return self._dir + + def __str__(self): + return "(%s%s%s)" % (self._res, self._dir, self._arg) diff --git a/venv.bak/lib/python3.7/site-packages/nltk/ccg/chart.py b/venv.bak/lib/python3.7/site-packages/nltk/ccg/chart.py new file mode 100644 index 0000000..bd410c7 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/ccg/chart.py @@ -0,0 +1,485 @@ +# Natural Language Toolkit: Combinatory Categorial Grammar +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Graeme Gange +# URL: +# For license information, see LICENSE.TXT + +""" +The lexicon is constructed by calling +``lexicon.fromstring()``. + +In order to construct a parser, you also need a rule set. +The standard English rules are provided in chart as +``chart.DefaultRuleSet``. + +The parser can then be constructed by calling, for example: +``parser = chart.CCGChartParser(, )`` + +Parsing is then performed by running +``parser.parse(.split())``. + +While this returns a list of trees, the default representation +of the produced trees is not very enlightening, particularly +given that it uses the same tree class as the CFG parsers. +It is probably better to call: +``chart.printCCGDerivation()`` +which should print a nice representation of the derivation. + +This entire process is shown far more clearly in the demonstration: +python chart.py +""" +from __future__ import print_function, division, unicode_literals + +import itertools + +from six import string_types + +from nltk.parse import ParserI +from nltk.parse.chart import AbstractChartRule, EdgeI, Chart +from nltk.tree import Tree + +from nltk.ccg.lexicon import fromstring, Token +from nltk.ccg.combinator import ( + ForwardT, + BackwardT, + ForwardApplication, + BackwardApplication, + ForwardComposition, + BackwardComposition, + ForwardSubstitution, + BackwardBx, + BackwardSx, +) +from nltk.compat import python_2_unicode_compatible +from nltk.ccg.combinator import * +from nltk.ccg.logic import * +from nltk.sem.logic import * + +# Based on the EdgeI class from NLTK. +# A number of the properties of the EdgeI interface don't +# transfer well to CCGs, however. +class CCGEdge(EdgeI): + def __init__(self, span, categ, rule): + self._span = span + self._categ = categ + self._rule = rule + self._comparison_key = (span, categ, rule) + + # Accessors + def lhs(self): + return self._categ + + def span(self): + return self._span + + def start(self): + return self._span[0] + + def end(self): + return self._span[1] + + def length(self): + return self._span[1] - self.span[0] + + def rhs(self): + return () + + def dot(self): + return 0 + + def is_complete(self): + return True + + def is_incomplete(self): + return False + + def nextsym(self): + return None + + def categ(self): + return self._categ + + def rule(self): + return self._rule + + +class CCGLeafEdge(EdgeI): + ''' + Class representing leaf edges in a CCG derivation. + ''' + + def __init__(self, pos, token, leaf): + self._pos = pos + self._token = token + self._leaf = leaf + self._comparison_key = (pos, token.categ(), leaf) + + # Accessors + def lhs(self): + return self._token.categ() + + def span(self): + return (self._pos, self._pos + 1) + + def start(self): + return self._pos + + def end(self): + return self._pos + 1 + + def length(self): + return 1 + + def rhs(self): + return self._leaf + + def dot(self): + return 0 + + def is_complete(self): + return True + + def is_incomplete(self): + return False + + def nextsym(self): + return None + + def token(self): + return self._token + + def categ(self): + return self._token.categ() + + def leaf(self): + return self._leaf + + +@python_2_unicode_compatible +class BinaryCombinatorRule(AbstractChartRule): + ''' + Class implementing application of a binary combinator to a chart. + Takes the directed combinator to apply. + ''' + + NUMEDGES = 2 + + def __init__(self, combinator): + self._combinator = combinator + + # Apply a combinator + def apply(self, chart, grammar, left_edge, right_edge): + # The left & right edges must be touching. + if not (left_edge.end() == right_edge.start()): + return + + # Check if the two edges are permitted to combine. + # If so, generate the corresponding edge. + if self._combinator.can_combine(left_edge.categ(), right_edge.categ()): + for res in self._combinator.combine(left_edge.categ(), right_edge.categ()): + new_edge = CCGEdge( + span=(left_edge.start(), right_edge.end()), + categ=res, + rule=self._combinator, + ) + if chart.insert(new_edge, (left_edge, right_edge)): + yield new_edge + + # The representation of the combinator (for printing derivations) + def __str__(self): + return "%s" % self._combinator + + +# Type-raising must be handled slightly differently to the other rules, as the +# resulting rules only span a single edge, rather than both edges. +@python_2_unicode_compatible +class ForwardTypeRaiseRule(AbstractChartRule): + ''' + Class for applying forward type raising + ''' + + NUMEDGES = 2 + + def __init__(self): + self._combinator = ForwardT + + def apply(self, chart, grammar, left_edge, right_edge): + if not (left_edge.end() == right_edge.start()): + return + + for res in self._combinator.combine(left_edge.categ(), right_edge.categ()): + new_edge = CCGEdge(span=left_edge.span(), categ=res, rule=self._combinator) + if chart.insert(new_edge, (left_edge,)): + yield new_edge + + def __str__(self): + return "%s" % self._combinator + + +@python_2_unicode_compatible +class BackwardTypeRaiseRule(AbstractChartRule): + ''' + Class for applying backward type raising. + ''' + + NUMEDGES = 2 + + def __init__(self): + self._combinator = BackwardT + + def apply(self, chart, grammar, left_edge, right_edge): + if not (left_edge.end() == right_edge.start()): + return + + for res in self._combinator.combine(left_edge.categ(), right_edge.categ()): + new_edge = CCGEdge(span=right_edge.span(), categ=res, rule=self._combinator) + if chart.insert(new_edge, (right_edge,)): + yield new_edge + + def __str__(self): + return "%s" % self._combinator + + +# Common sets of combinators used for English derivations. +ApplicationRuleSet = [ + BinaryCombinatorRule(ForwardApplication), + BinaryCombinatorRule(BackwardApplication), +] +CompositionRuleSet = [ + BinaryCombinatorRule(ForwardComposition), + BinaryCombinatorRule(BackwardComposition), + BinaryCombinatorRule(BackwardBx), +] +SubstitutionRuleSet = [ + BinaryCombinatorRule(ForwardSubstitution), + BinaryCombinatorRule(BackwardSx), +] +TypeRaiseRuleSet = [ForwardTypeRaiseRule(), BackwardTypeRaiseRule()] + +# The standard English rule set. +DefaultRuleSet = ( + ApplicationRuleSet + CompositionRuleSet + SubstitutionRuleSet + TypeRaiseRuleSet +) + + +class CCGChartParser(ParserI): + ''' + Chart parser for CCGs. + Based largely on the ChartParser class from NLTK. + ''' + + def __init__(self, lexicon, rules, trace=0): + self._lexicon = lexicon + self._rules = rules + self._trace = trace + + def lexicon(self): + return self._lexicon + + # Implements the CYK algorithm + def parse(self, tokens): + tokens = list(tokens) + chart = CCGChart(list(tokens)) + lex = self._lexicon + + # Initialize leaf edges. + for index in range(chart.num_leaves()): + for token in lex.categories(chart.leaf(index)): + new_edge = CCGLeafEdge(index, token, chart.leaf(index)) + chart.insert(new_edge, ()) + + # Select a span for the new edges + for span in range(2, chart.num_leaves() + 1): + for start in range(0, chart.num_leaves() - span + 1): + # Try all possible pairs of edges that could generate + # an edge for that span + for part in range(1, span): + lstart = start + mid = start + part + rend = start + span + + for left in chart.select(span=(lstart, mid)): + for right in chart.select(span=(mid, rend)): + # Generate all possible combinations of the two edges + for rule in self._rules: + edges_added_by_rule = 0 + for newedge in rule.apply(chart, lex, left, right): + edges_added_by_rule += 1 + + # Output the resulting parses + return chart.parses(lex.start()) + + +class CCGChart(Chart): + def __init__(self, tokens): + Chart.__init__(self, tokens) + + # Constructs the trees for a given parse. Unfortnunately, the parse trees need to be + # constructed slightly differently to those in the default Chart class, so it has to + # be reimplemented + def _trees(self, edge, complete, memo, tree_class): + assert complete, "CCGChart cannot build incomplete trees" + + if edge in memo: + return memo[edge] + + if isinstance(edge, CCGLeafEdge): + word = tree_class(edge.token(), [self._tokens[edge.start()]]) + leaf = tree_class((edge.token(), "Leaf"), [word]) + memo[edge] = [leaf] + return [leaf] + + memo[edge] = [] + trees = [] + + for cpl in self.child_pointer_lists(edge): + child_choices = [self._trees(cp, complete, memo, tree_class) for cp in cpl] + for children in itertools.product(*child_choices): + lhs = ( + Token( + self._tokens[edge.start() : edge.end()], + edge.lhs(), + compute_semantics(children, edge), + ), + str(edge.rule()), + ) + trees.append(tree_class(lhs, children)) + + memo[edge] = trees + return trees + + +def compute_semantics(children, edge): + if children[0].label()[0].semantics() is None: + return None + + if len(children) == 2: + if isinstance(edge.rule(), BackwardCombinator): + children = [children[1], children[0]] + + combinator = edge.rule()._combinator + function = children[0].label()[0].semantics() + argument = children[1].label()[0].semantics() + + if isinstance(combinator, UndirectedFunctionApplication): + return compute_function_semantics(function, argument) + elif isinstance(combinator, UndirectedComposition): + return compute_composition_semantics(function, argument) + elif isinstance(combinator, UndirectedSubstitution): + return compute_substitution_semantics(function, argument) + else: + raise AssertionError('Unsupported combinator \'' + combinator + '\'') + else: + return compute_type_raised_semantics(children[0].label()[0].semantics()) + + +# -------- +# Displaying derivations +# -------- +def printCCGDerivation(tree): + # Get the leaves and initial categories + leafcats = tree.pos() + leafstr = '' + catstr = '' + + # Construct a string with both the leaf word and corresponding + # category aligned. + for (leaf, cat) in leafcats: + str_cat = "%s" % cat + nextlen = 2 + max(len(leaf), len(str_cat)) + lcatlen = (nextlen - len(str_cat)) // 2 + rcatlen = lcatlen + (nextlen - len(str_cat)) % 2 + catstr += ' ' * lcatlen + str_cat + ' ' * rcatlen + lleaflen = (nextlen - len(leaf)) // 2 + rleaflen = lleaflen + (nextlen - len(leaf)) % 2 + leafstr += ' ' * lleaflen + leaf + ' ' * rleaflen + print(leafstr.rstrip()) + print(catstr.rstrip()) + + # Display the derivation steps + printCCGTree(0, tree) + + +# Prints the sequence of derivation steps. +def printCCGTree(lwidth, tree): + rwidth = lwidth + + # Is a leaf (word). + # Increment the span by the space occupied by the leaf. + if not isinstance(tree, Tree): + return 2 + lwidth + len(tree) + + # Find the width of the current derivation step + for child in tree: + rwidth = max(rwidth, printCCGTree(rwidth, child)) + + # Is a leaf node. + # Don't print anything, but account for the space occupied. + if not isinstance(tree.label(), tuple): + return max( + rwidth, 2 + lwidth + len("%s" % tree.label()), 2 + lwidth + len(tree[0]) + ) + + (token, op) = tree.label() + + if op == 'Leaf': + return rwidth + + # Pad to the left with spaces, followed by a sequence of '-' + # and the derivation rule. + print(lwidth * ' ' + (rwidth - lwidth) * '-' + "%s" % op) + # Print the resulting category on a new line. + str_res = "%s" % (token.categ()) + if token.semantics() is not None: + str_res += " {" + str(token.semantics()) + "}" + respadlen = (rwidth - lwidth - len(str_res)) // 2 + lwidth + print(respadlen * ' ' + str_res) + return rwidth + + +### Demonstration code + +# Construct the lexicon +lex = fromstring( + ''' + :- S, NP, N, VP # Primitive categories, S is the target primitive + + Det :: NP/N # Family of words + Pro :: NP + TV :: VP/NP + Modal :: (S\\NP)/VP # Backslashes need to be escaped + + I => Pro # Word -> Category mapping + you => Pro + + the => Det + + # Variables have the special keyword 'var' + # '.' prevents permutation + # ',' prevents composition + and => var\\.,var/.,var + + which => (N\\N)/(S/NP) + + will => Modal # Categories can be either explicit, or families. + might => Modal + + cook => TV + eat => TV + + mushrooms => N + parsnips => N + bacon => N + ''' +) + + +def demo(): + parser = CCGChartParser(lex, DefaultRuleSet) + for parse in parser.parse("I might cook and eat the bacon".split()): + printCCGDerivation(parse) + + +if __name__ == '__main__': + demo() diff --git a/venv.bak/lib/python3.7/site-packages/nltk/ccg/combinator.py b/venv.bak/lib/python3.7/site-packages/nltk/ccg/combinator.py new file mode 100644 index 0000000..56f15ed --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/ccg/combinator.py @@ -0,0 +1,352 @@ +# Natural Language Toolkit: Combinatory Categorial Grammar +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Graeme Gange +# URL: +# For license information, see LICENSE.TXT +""" +CCG Combinators +""" + +from __future__ import unicode_literals +from abc import ABCMeta, abstractmethod +from six import add_metaclass + +from nltk.compat import python_2_unicode_compatible +from nltk.ccg.api import FunctionalCategory + + +@add_metaclass(ABCMeta) +class UndirectedBinaryCombinator(object): + """ + Abstract class for representing a binary combinator. + Merely defines functions for checking if the function and argument + are able to be combined, and what the resulting category is. + + Note that as no assumptions are made as to direction, the unrestricted + combinators can perform all backward, forward and crossed variations + of the combinators; these restrictions must be added in the rule + class. + """ + + @abstractmethod + def can_combine(self, function, argument): + pass + + @abstractmethod + def combine(self, function, argument): + pass + + +@add_metaclass(ABCMeta) +class DirectedBinaryCombinator(object): + """ + Wrapper for the undirected binary combinator. + It takes left and right categories, and decides which is to be + the function, and which the argument. + It then decides whether or not they can be combined. + """ + + @abstractmethod + def can_combine(self, left, right): + pass + + @abstractmethod + def combine(self, left, right): + pass + + +@python_2_unicode_compatible +class ForwardCombinator(DirectedBinaryCombinator): + """ + Class representing combinators where the primary functor is on the left. + + Takes an undirected combinator, and a predicate which adds constraints + restricting the cases in which it may apply. + """ + + def __init__(self, combinator, predicate, suffix=''): + self._combinator = combinator + self._predicate = predicate + self._suffix = suffix + + def can_combine(self, left, right): + return self._combinator.can_combine(left, right) and self._predicate( + left, right + ) + + def combine(self, left, right): + for cat in self._combinator.combine(left, right): + yield cat + + def __str__(self): + return ">%s%s" % (self._combinator, self._suffix) + + +@python_2_unicode_compatible +class BackwardCombinator(DirectedBinaryCombinator): + """ + The backward equivalent of the ForwardCombinator class. + """ + + def __init__(self, combinator, predicate, suffix=''): + self._combinator = combinator + self._predicate = predicate + self._suffix = suffix + + def can_combine(self, left, right): + return self._combinator.can_combine(right, left) and self._predicate( + left, right + ) + + def combine(self, left, right): + for cat in self._combinator.combine(right, left): + yield cat + + def __str__(self): + return "<%s%s" % (self._combinator, self._suffix) + + +@python_2_unicode_compatible +class UndirectedFunctionApplication(UndirectedBinaryCombinator): + """ + Class representing function application. + Implements rules of the form: + X/Y Y -> X (>) + And the corresponding backwards application rule + """ + + def can_combine(self, function, argument): + if not function.is_function(): + return False + + return not function.arg().can_unify(argument) is None + + def combine(self, function, argument): + if not function.is_function(): + return + + subs = function.arg().can_unify(argument) + if subs is None: + return + + yield function.res().substitute(subs) + + def __str__(self): + return '' + + +# Predicates for function application. + +# Ensures the left functor takes an argument on the right +def forwardOnly(left, right): + return left.dir().is_forward() + + +# Ensures the right functor takes an argument on the left +def backwardOnly(left, right): + return right.dir().is_backward() + + +# Application combinator instances +ForwardApplication = ForwardCombinator(UndirectedFunctionApplication(), forwardOnly) +BackwardApplication = BackwardCombinator(UndirectedFunctionApplication(), backwardOnly) + + +@python_2_unicode_compatible +class UndirectedComposition(UndirectedBinaryCombinator): + """ + Functional composition (harmonic) combinator. + Implements rules of the form + X/Y Y/Z -> X/Z (B>) + And the corresponding backwards and crossed variations. + """ + + def can_combine(self, function, argument): + # Can only combine two functions, and both functions must + # allow composition. + if not (function.is_function() and argument.is_function()): + return False + if function.dir().can_compose() and argument.dir().can_compose(): + return not function.arg().can_unify(argument.res()) is None + return False + + def combine(self, function, argument): + if not (function.is_function() and argument.is_function()): + return + if function.dir().can_compose() and argument.dir().can_compose(): + subs = function.arg().can_unify(argument.res()) + if subs is not None: + yield FunctionalCategory( + function.res().substitute(subs), + argument.arg().substitute(subs), + argument.dir(), + ) + + def __str__(self): + return 'B' + + +# Predicates for restricting application of straight composition. +def bothForward(left, right): + return left.dir().is_forward() and right.dir().is_forward() + + +def bothBackward(left, right): + return left.dir().is_backward() and right.dir().is_backward() + + +# Predicates for crossed composition +def crossedDirs(left, right): + return left.dir().is_forward() and right.dir().is_backward() + + +def backwardBxConstraint(left, right): + # The functors must be crossed inwards + if not crossedDirs(left, right): + return False + # Permuting combinators must be allowed + if not left.dir().can_cross() and right.dir().can_cross(): + return False + # The resulting argument category is restricted to be primitive + return left.arg().is_primitive() + + +# Straight composition combinators +ForwardComposition = ForwardCombinator(UndirectedComposition(), forwardOnly) +BackwardComposition = BackwardCombinator(UndirectedComposition(), backwardOnly) + +# Backward crossed composition +BackwardBx = BackwardCombinator( + UndirectedComposition(), backwardBxConstraint, suffix='x' +) + + +@python_2_unicode_compatible +class UndirectedSubstitution(UndirectedBinaryCombinator): + """ + Substitution (permutation) combinator. + Implements rules of the form + Y/Z (X\Y)/Z -> X/Z ( N\N +def innermostFunction(categ): + while categ.res().is_function(): + categ = categ.res() + return categ + + +@python_2_unicode_compatible +class UndirectedTypeRaise(UndirectedBinaryCombinator): + """ + Undirected combinator for type raising. + """ + + def can_combine(self, function, arg): + # The argument must be a function. + # The restriction that arg.res() must be a function + # merely reduces redundant type-raising; if arg.res() is + # primitive, we have: + # X Y\X =>((>) Y + # which is equivalent to + # X Y\X =>(<) Y + if not (arg.is_function() and arg.res().is_function()): + return False + + arg = innermostFunction(arg) + + # left, arg_categ are undefined! + subs = left.can_unify(arg_categ.arg()) + if subs is not None: + return True + return False + + def combine(self, function, arg): + if not ( + function.is_primitive() and arg.is_function() and arg.res().is_function() + ): + return + + # Type-raising matches only the innermost application. + arg = innermostFunction(arg) + + subs = function.can_unify(arg.arg()) + if subs is not None: + xcat = arg.res().substitute(subs) + yield FunctionalCategory( + xcat, FunctionalCategory(xcat, function, arg.dir()), -(arg.dir()) + ) + + def __str__(self): + return 'T' + + +# Predicates for type-raising +# The direction of the innermost category must be towards +# the primary functor. +# The restriction that the variable must be primitive is not +# common to all versions of CCGs; some authors have other restrictions. +def forwardTConstraint(left, right): + arg = innermostFunction(right) + return arg.dir().is_backward() and arg.res().is_primitive() + + +def backwardTConstraint(left, right): + arg = innermostFunction(left) + return arg.dir().is_forward() and arg.res().is_primitive() + + +# Instances of type-raising combinators +ForwardT = ForwardCombinator(UndirectedTypeRaise(), forwardTConstraint) +BackwardT = BackwardCombinator(UndirectedTypeRaise(), backwardTConstraint) diff --git a/venv.bak/lib/python3.7/site-packages/nltk/ccg/lexicon.py b/venv.bak/lib/python3.7/site-packages/nltk/ccg/lexicon.py new file mode 100644 index 0000000..d8e2bf3 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/ccg/lexicon.py @@ -0,0 +1,344 @@ +# Natural Language Toolkit: Combinatory Categorial Grammar +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Graeme Gange +# URL: +# For license information, see LICENSE.TXT +""" +CCG Lexicons +""" + +from __future__ import unicode_literals + +import re +from collections import defaultdict + +from nltk.ccg.api import PrimitiveCategory, Direction, CCGVar, FunctionalCategory +from nltk.compat import python_2_unicode_compatible +from nltk.internals import deprecated + +from nltk.sem.logic import Expression + +# ------------ +# Regular expressions used for parsing components of the lexicon +# ------------ + +# Parses a primitive category and subscripts +PRIM_RE = re.compile(r'''([A-Za-z]+)(\[[A-Za-z,]+\])?''') + +# Separates the next primitive category from the remainder of the +# string +NEXTPRIM_RE = re.compile(r'''([A-Za-z]+(?:\[[A-Za-z,]+\])?)(.*)''') + +# Separates the next application operator from the remainder +APP_RE = re.compile(r'''([\\/])([.,]?)([.,]?)(.*)''') + +# Parses the definition of the right-hand side (rhs) of either a word or a family +LEX_RE = re.compile(r'''([\S_]+)\s*(::|[-=]+>)\s*(.+)''', re.UNICODE) + +# Parses the right hand side that contains category and maybe semantic predicate +RHS_RE = re.compile(r'''([^{}]*[^ {}])\s*(\{[^}]+\})?''', re.UNICODE) + +# Parses the semantic predicate +SEMANTICS_RE = re.compile(r'''\{([^}]+)\}''', re.UNICODE) + +# Strips comments from a line +COMMENTS_RE = re.compile('''([^#]*)(?:#.*)?''') + + +class Token(object): + """ + Class representing a token. + + token => category {semantics} + e.g. eat => S\\var[pl]/var {\\x y.eat(x,y)} + + * `token` (string) + * `categ` (string) + * `semantics` (Expression) + """ + + def __init__(self, token, categ, semantics=None): + self._token = token + self._categ = categ + self._semantics = semantics + + def categ(self): + return self._categ + + def semantics(self): + return self._semantics + + def __str__(self): + semantics_str = "" + if self._semantics is not None: + semantics_str = " {" + str(self._semantics) + "}" + return "" + str(self._categ) + semantics_str + + def __cmp__(self, other): + if not isinstance(other, Token): + return -1 + return cmp((self._categ, self._semantics), other.categ(), other.semantics()) + + +@python_2_unicode_compatible +class CCGLexicon(object): + """ + Class representing a lexicon for CCG grammars. + + * `primitives`: The list of primitive categories for the lexicon + * `families`: Families of categories + * `entries`: A mapping of words to possible categories + """ + + def __init__(self, start, primitives, families, entries): + self._start = PrimitiveCategory(start) + self._primitives = primitives + self._families = families + self._entries = entries + + def categories(self, word): + """ + Returns all the possible categories for a word + """ + return self._entries[word] + + def start(self): + """ + Return the target category for the parser + """ + return self._start + + def __str__(self): + """ + String representation of the lexicon. Used for debugging. + """ + string = "" + first = True + for ident in sorted(self._entries): + if not first: + string = string + "\n" + string = string + ident + " => " + + first = True + for cat in self._entries[ident]: + if not first: + string = string + " | " + else: + first = False + string = string + "%s" % cat + return string + + +# ----------- +# Parsing lexicons +# ----------- + + +def matchBrackets(string): + """ + Separate the contents matching the first set of brackets from the rest of + the input. + """ + rest = string[1:] + inside = "(" + + while rest != "" and not rest.startswith(')'): + if rest.startswith('('): + (part, rest) = matchBrackets(rest) + inside = inside + part + else: + inside = inside + rest[0] + rest = rest[1:] + if rest.startswith(')'): + return (inside + ')', rest[1:]) + raise AssertionError('Unmatched bracket in string \'' + string + '\'') + + +def nextCategory(string): + """ + Separate the string for the next portion of the category from the rest + of the string + """ + if string.startswith('('): + return matchBrackets(string) + return NEXTPRIM_RE.match(string).groups() + + +def parseApplication(app): + """ + Parse an application operator + """ + return Direction(app[0], app[1:]) + + +def parseSubscripts(subscr): + """ + Parse the subscripts for a primitive category + """ + if subscr: + return subscr[1:-1].split(',') + return [] + + +def parsePrimitiveCategory(chunks, primitives, families, var): + """ + Parse a primitive category + + If the primitive is the special category 'var', replace it with the + correct `CCGVar`. + """ + if chunks[0] == "var": + if chunks[1] is None: + if var is None: + var = CCGVar() + return (var, var) + + catstr = chunks[0] + if catstr in families: + (cat, cvar) = families[catstr] + if var is None: + var = cvar + else: + cat = cat.substitute([(cvar, var)]) + return (cat, var) + + if catstr in primitives: + subscrs = parseSubscripts(chunks[1]) + return (PrimitiveCategory(catstr, subscrs), var) + raise AssertionError( + 'String \'' + catstr + '\' is neither a family nor primitive category.' + ) + + +def augParseCategory(line, primitives, families, var=None): + """ + Parse a string representing a category, and returns a tuple with + (possibly) the CCG variable for the category + """ + (cat_string, rest) = nextCategory(line) + + if cat_string.startswith('('): + (res, var) = augParseCategory(cat_string[1:-1], primitives, families, var) + + else: + # print rePrim.match(str).groups() + (res, var) = parsePrimitiveCategory( + PRIM_RE.match(cat_string).groups(), primitives, families, var + ) + + while rest != "": + app = APP_RE.match(rest).groups() + direction = parseApplication(app[0:3]) + rest = app[3] + + (cat_string, rest) = nextCategory(rest) + if cat_string.startswith('('): + (arg, var) = augParseCategory(cat_string[1:-1], primitives, families, var) + else: + (arg, var) = parsePrimitiveCategory( + PRIM_RE.match(cat_string).groups(), primitives, families, var + ) + res = FunctionalCategory(res, arg, direction) + + return (res, var) + + +def fromstring(lex_str, include_semantics=False): + """ + Convert string representation into a lexicon for CCGs. + """ + CCGVar.reset_id() + primitives = [] + families = {} + entries = defaultdict(list) + for line in lex_str.splitlines(): + # Strip comments and leading/trailing whitespace. + line = COMMENTS_RE.match(line).groups()[0].strip() + if line == "": + continue + + if line.startswith(':-'): + # A line of primitive categories. + # The first one is the target category + # ie, :- S, N, NP, VP + primitives = primitives + [ + prim.strip() for prim in line[2:].strip().split(',') + ] + else: + # Either a family definition, or a word definition + (ident, sep, rhs) = LEX_RE.match(line).groups() + (catstr, semantics_str) = RHS_RE.match(rhs).groups() + (cat, var) = augParseCategory(catstr, primitives, families) + + if sep == '::': + # Family definition + # ie, Det :: NP/N + families[ident] = (cat, var) + else: + semantics = None + if include_semantics is True: + if semantics_str is None: + raise AssertionError( + line + + " must contain semantics because include_semantics is set to True" + ) + else: + semantics = Expression.fromstring( + SEMANTICS_RE.match(semantics_str).groups()[0] + ) + # Word definition + # ie, which => (N\N)/(S/NP) + entries[ident].append(Token(ident, cat, semantics)) + return CCGLexicon(primitives[0], primitives, families, entries) + + +@deprecated('Use fromstring() instead.') +def parseLexicon(lex_str): + return fromstring(lex_str) + + +openccg_tinytiny = fromstring( + """ + # Rather minimal lexicon based on the openccg `tinytiny' grammar. + # Only incorporates a subset of the morphological subcategories, however. + :- S,NP,N # Primitive categories + Det :: NP/N # Determiners + Pro :: NP + IntransVsg :: S\\NP[sg] # Tensed intransitive verbs (singular) + IntransVpl :: S\\NP[pl] # Plural + TransVsg :: S\\NP[sg]/NP # Tensed transitive verbs (singular) + TransVpl :: S\\NP[pl]/NP # Plural + + the => NP[sg]/N[sg] + the => NP[pl]/N[pl] + + I => Pro + me => Pro + we => Pro + us => Pro + + book => N[sg] + books => N[pl] + + peach => N[sg] + peaches => N[pl] + + policeman => N[sg] + policemen => N[pl] + + boy => N[sg] + boys => N[pl] + + sleep => IntransVsg + sleep => IntransVpl + + eat => IntransVpl + eat => TransVpl + eats => IntransVsg + eats => TransVsg + + see => TransVpl + sees => TransVsg + """ +) diff --git a/venv.bak/lib/python3.7/site-packages/nltk/ccg/logic.py b/venv.bak/lib/python3.7/site-packages/nltk/ccg/logic.py new file mode 100644 index 0000000..b89bea9 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/ccg/logic.py @@ -0,0 +1,60 @@ +# Natural Language Toolkit: Combinatory Categorial Grammar +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Tanin Na Nakorn (@tanin) +# URL: +# For license information, see LICENSE.TXT +""" +Helper functions for CCG semantics computation +""" + +from nltk.sem.logic import * + + +def compute_type_raised_semantics(semantics): + core = semantics + parent = None + while isinstance(core, LambdaExpression): + parent = core + core = core.term + + var = Variable("F") + while var in core.free(): + var = unique_variable(pattern=var) + core = ApplicationExpression(FunctionVariableExpression(var), core) + + if parent is not None: + parent.term = core + else: + semantics = core + + return LambdaExpression(var, semantics) + + +def compute_function_semantics(function, argument): + return ApplicationExpression(function, argument).simplify() + + +def compute_composition_semantics(function, argument): + assert isinstance(argument, LambdaExpression), ( + "`" + str(argument) + "` must be a lambda expression" + ) + return LambdaExpression( + argument.variable, ApplicationExpression(function, argument.term).simplify() + ) + + +def compute_substitution_semantics(function, argument): + assert isinstance(function, LambdaExpression) and isinstance( + function.term, LambdaExpression + ), ("`" + str(function) + "` must be a lambda expression with 2 arguments") + assert isinstance(argument, LambdaExpression), ( + "`" + str(argument) + "` must be a lambda expression" + ) + + new_argument = ApplicationExpression( + argument, VariableExpression(function.variable) + ).simplify() + new_term = ApplicationExpression(function.term, new_argument).simplify() + + return LambdaExpression(function.variable, new_term) diff --git a/venv.bak/lib/python3.7/site-packages/nltk/chat/__init__.py b/venv.bak/lib/python3.7/site-packages/nltk/chat/__init__.py new file mode 100644 index 0000000..cd0ad40 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/chat/__init__.py @@ -0,0 +1,52 @@ +# Natural Language Toolkit: Chatbots +# +# Copyright (C) 2001-2019 NLTK Project +# Authors: Steven Bird +# URL: +# For license information, see LICENSE.TXT + +# Based on an Eliza implementation by Joe Strout , +# Jeff Epler and Jez Higgins . + +""" +A class for simple chatbots. These perform simple pattern matching on sentences +typed by users, and respond with automatically generated sentences. + +These chatbots may not work using the windows command line or the +windows IDLE GUI. +""" +from __future__ import print_function + +from nltk.chat.util import Chat +from nltk.chat.eliza import eliza_chat +from nltk.chat.iesha import iesha_chat +from nltk.chat.rude import rude_chat +from nltk.chat.suntsu import suntsu_chat +from nltk.chat.zen import zen_chat + +bots = [ + (eliza_chat, 'Eliza (psycho-babble)'), + (iesha_chat, 'Iesha (teen anime junky)'), + (rude_chat, 'Rude (abusive bot)'), + (suntsu_chat, 'Suntsu (Chinese sayings)'), + (zen_chat, 'Zen (gems of wisdom)'), +] + + +def chatbots(): + import sys + + print('Which chatbot would you like to talk to?') + botcount = len(bots) + for i in range(botcount): + print(' %d: %s' % (i + 1, bots[i][1])) + while True: + print('\nEnter a number in the range 1-%d: ' % botcount, end=' ') + choice = sys.stdin.readline().strip() + if choice.isdigit() and (int(choice) - 1) in range(botcount): + break + else: + print(' Error: bad chatbot number') + + chatbot = bots[int(choice) - 1][0] + chatbot() diff --git a/venv.bak/lib/python3.7/site-packages/nltk/chat/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/chat/__pycache__/__init__.cpython-37.pyc new file mode 100644 index 0000000..48e9e65 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/chat/__pycache__/__init__.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/chat/__pycache__/eliza.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/chat/__pycache__/eliza.cpython-37.pyc new file mode 100644 index 0000000..e692ebb Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/chat/__pycache__/eliza.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/chat/__pycache__/iesha.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/chat/__pycache__/iesha.cpython-37.pyc new file mode 100644 index 0000000..a77e062 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/chat/__pycache__/iesha.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/chat/__pycache__/rude.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/chat/__pycache__/rude.cpython-37.pyc new file mode 100644 index 0000000..e5766ad Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/chat/__pycache__/rude.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/chat/__pycache__/suntsu.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/chat/__pycache__/suntsu.cpython-37.pyc new file mode 100644 index 0000000..54dc298 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/chat/__pycache__/suntsu.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/chat/__pycache__/util.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/chat/__pycache__/util.cpython-37.pyc new file mode 100644 index 0000000..5a296d7 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/chat/__pycache__/util.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/chat/__pycache__/zen.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/chat/__pycache__/zen.cpython-37.pyc new file mode 100644 index 0000000..b59368b Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/chat/__pycache__/zen.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/chat/eliza.py b/venv.bak/lib/python3.7/site-packages/nltk/chat/eliza.py new file mode 100644 index 0000000..ef23b80 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/chat/eliza.py @@ -0,0 +1,338 @@ +# Natural Language Toolkit: Eliza +# +# Copyright (C) 2001-2019 NLTK Project +# Authors: Steven Bird +# Edward Loper +# URL: +# For license information, see LICENSE.TXT + +# Based on an Eliza implementation by Joe Strout , +# Jeff Epler and Jez Higgins . + +# a translation table used to convert things you say into things the +# computer says back, e.g. "I am" --> "you are" + +from __future__ import print_function +from nltk.chat.util import Chat, reflections + +# a table of response pairs, where each pair consists of a +# regular expression, and a list of possible responses, +# with group-macros labelled as %1, %2. + +pairs = ( + ( + r'I need (.*)', + ( + "Why do you need %1?", + "Would it really help you to get %1?", + "Are you sure you need %1?", + ), + ), + ( + r'Why don\'t you (.*)', + ( + "Do you really think I don't %1?", + "Perhaps eventually I will %1.", + "Do you really want me to %1?", + ), + ), + ( + r'Why can\'t I (.*)', + ( + "Do you think you should be able to %1?", + "If you could %1, what would you do?", + "I don't know -- why can't you %1?", + "Have you really tried?", + ), + ), + ( + r'I can\'t (.*)', + ( + "How do you know you can't %1?", + "Perhaps you could %1 if you tried.", + "What would it take for you to %1?", + ), + ), + ( + r'I am (.*)', + ( + "Did you come to me because you are %1?", + "How long have you been %1?", + "How do you feel about being %1?", + ), + ), + ( + r'I\'m (.*)', + ( + "How does being %1 make you feel?", + "Do you enjoy being %1?", + "Why do you tell me you're %1?", + "Why do you think you're %1?", + ), + ), + ( + r'Are you (.*)', + ( + "Why does it matter whether I am %1?", + "Would you prefer it if I were not %1?", + "Perhaps you believe I am %1.", + "I may be %1 -- what do you think?", + ), + ), + ( + r'What (.*)', + ( + "Why do you ask?", + "How would an answer to that help you?", + "What do you think?", + ), + ), + ( + r'How (.*)', + ( + "How do you suppose?", + "Perhaps you can answer your own question.", + "What is it you're really asking?", + ), + ), + ( + r'Because (.*)', + ( + "Is that the real reason?", + "What other reasons come to mind?", + "Does that reason apply to anything else?", + "If %1, what else must be true?", + ), + ), + ( + r'(.*) sorry (.*)', + ( + "There are many times when no apology is needed.", + "What feelings do you have when you apologize?", + ), + ), + ( + r'Hello(.*)', + ( + "Hello... I'm glad you could drop by today.", + "Hi there... how are you today?", + "Hello, how are you feeling today?", + ), + ), + ( + r'I think (.*)', + ("Do you doubt %1?", "Do you really think so?", "But you're not sure %1?"), + ), + ( + r'(.*) friend (.*)', + ( + "Tell me more about your friends.", + "When you think of a friend, what comes to mind?", + "Why don't you tell me about a childhood friend?", + ), + ), + (r'Yes', ("You seem quite sure.", "OK, but can you elaborate a bit?")), + ( + r'(.*) computer(.*)', + ( + "Are you really talking about me?", + "Does it seem strange to talk to a computer?", + "How do computers make you feel?", + "Do you feel threatened by computers?", + ), + ), + ( + r'Is it (.*)', + ( + "Do you think it is %1?", + "Perhaps it's %1 -- what do you think?", + "If it were %1, what would you do?", + "It could well be that %1.", + ), + ), + ( + r'It is (.*)', + ( + "You seem very certain.", + "If I told you that it probably isn't %1, what would you feel?", + ), + ), + ( + r'Can you (.*)', + ( + "What makes you think I can't %1?", + "If I could %1, then what?", + "Why do you ask if I can %1?", + ), + ), + ( + r'Can I (.*)', + ( + "Perhaps you don't want to %1.", + "Do you want to be able to %1?", + "If you could %1, would you?", + ), + ), + ( + r'You are (.*)', + ( + "Why do you think I am %1?", + "Does it please you to think that I'm %1?", + "Perhaps you would like me to be %1.", + "Perhaps you're really talking about yourself?", + ), + ), + ( + r'You\'re (.*)', + ( + "Why do you say I am %1?", + "Why do you think I am %1?", + "Are we talking about you, or me?", + ), + ), + ( + r'I don\'t (.*)', + ("Don't you really %1?", "Why don't you %1?", "Do you want to %1?"), + ), + ( + r'I feel (.*)', + ( + "Good, tell me more about these feelings.", + "Do you often feel %1?", + "When do you usually feel %1?", + "When you feel %1, what do you do?", + ), + ), + ( + r'I have (.*)', + ( + "Why do you tell me that you've %1?", + "Have you really %1?", + "Now that you have %1, what will you do next?", + ), + ), + ( + r'I would (.*)', + ( + "Could you explain why you would %1?", + "Why would you %1?", + "Who else knows that you would %1?", + ), + ), + ( + r'Is there (.*)', + ( + "Do you think there is %1?", + "It's likely that there is %1.", + "Would you like there to be %1?", + ), + ), + ( + r'My (.*)', + ( + "I see, your %1.", + "Why do you say that your %1?", + "When your %1, how do you feel?", + ), + ), + ( + r'You (.*)', + ( + "We should be discussing you, not me.", + "Why do you say that about me?", + "Why do you care whether I %1?", + ), + ), + (r'Why (.*)', ("Why don't you tell me the reason why %1?", "Why do you think %1?")), + ( + r'I want (.*)', + ( + "What would it mean to you if you got %1?", + "Why do you want %1?", + "What would you do if you got %1?", + "If you got %1, then what would you do?", + ), + ), + ( + r'(.*) mother(.*)', + ( + "Tell me more about your mother.", + "What was your relationship with your mother like?", + "How do you feel about your mother?", + "How does this relate to your feelings today?", + "Good family relations are important.", + ), + ), + ( + r'(.*) father(.*)', + ( + "Tell me more about your father.", + "How did your father make you feel?", + "How do you feel about your father?", + "Does your relationship with your father relate to your feelings today?", + "Do you have trouble showing affection with your family?", + ), + ), + ( + r'(.*) child(.*)', + ( + "Did you have close friends as a child?", + "What is your favorite childhood memory?", + "Do you remember any dreams or nightmares from childhood?", + "Did the other children sometimes tease you?", + "How do you think your childhood experiences relate to your feelings today?", + ), + ), + ( + r'(.*)\?', + ( + "Why do you ask that?", + "Please consider whether you can answer your own question.", + "Perhaps the answer lies within yourself?", + "Why don't you tell me?", + ), + ), + ( + r'quit', + ( + "Thank you for talking with me.", + "Good-bye.", + "Thank you, that will be $150. Have a good day!", + ), + ), + ( + r'(.*)', + ( + "Please tell me more.", + "Let's change focus a bit... Tell me about your family.", + "Can you elaborate on that?", + "Why do you say that %1?", + "I see.", + "Very interesting.", + "%1.", + "I see. And what does that tell you?", + "How does that make you feel?", + "How do you feel when you say that?", + ), + ), +) + +eliza_chatbot = Chat(pairs, reflections) + + +def eliza_chat(): + print("Therapist\n---------") + print("Talk to the program by typing in plain English, using normal upper-") + print('and lower-case letters and punctuation. Enter "quit" when done.') + print('=' * 72) + print("Hello. How are you feeling today?") + + eliza_chatbot.converse() + + +def demo(): + eliza_chat() + + +if __name__ == "__main__": + demo() diff --git a/venv.bak/lib/python3.7/site-packages/nltk/chat/iesha.py b/venv.bak/lib/python3.7/site-packages/nltk/chat/iesha.py new file mode 100644 index 0000000..4a7a615 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/chat/iesha.py @@ -0,0 +1,161 @@ +# Natural Language Toolkit: Teen Chatbot +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Selina Dennis +# URL: +# For license information, see LICENSE.TXT + +""" +This chatbot is a tongue-in-cheek take on the average teen +anime junky that frequents YahooMessenger or MSNM. +All spelling mistakes and flawed grammar are intentional. +""" +from __future__ import print_function + +from nltk.chat.util import Chat + +reflections = { + "am": "r", + "was": "were", + "i": "u", + "i'd": "u'd", + "i've": "u'v", + "ive": "u'v", + "i'll": "u'll", + "my": "ur", + "are": "am", + "you're": "im", + "you've": "ive", + "you'll": "i'll", + "your": "my", + "yours": "mine", + "you": "me", + "u": "me", + "ur": "my", + "urs": "mine", + "me": "u", +} + +# Note: %1/2/etc are used without spaces prior as the chat bot seems +# to add a superfluous space when matching. + +pairs = ( + ( + r'I\'m (.*)', + ( + "ur%1?? that's so cool! kekekekeke ^_^ tell me more!", + "ur%1? neat!! kekeke >_<", + ), + ), + ( + r'(.*) don\'t you (.*)', + ( + "u think I can%2??! really?? kekeke \<_\<", + "what do u mean%2??!", + "i could if i wanted, don't you think!! kekeke", + ), + ), + (r'ye[as] [iI] (.*)', ("u%1? cool!! how?", "how come u%1??", "u%1? so do i!!")), + ( + r'do (you|u) (.*)\??', + ("do i%2? only on tuesdays! kekeke *_*", "i dunno! do u%2??"), + ), + ( + r'(.*)\?', + ( + "man u ask lots of questions!", + "booooring! how old r u??", + "boooooring!! ur not very fun", + ), + ), + ( + r'(cos|because) (.*)', + ("hee! i don't believe u! >_<", "nuh-uh! >_<", "ooooh i agree!"), + ), + ( + r'why can\'t [iI] (.*)', + ( + "i dunno! y u askin me for!", + "try harder, silly! hee! ^_^", + "i dunno! but when i can't%1 i jump up and down!", + ), + ), + ( + r'I can\'t (.*)', + ( + "u can't what??! >_<", + "that's ok! i can't%1 either! kekekekeke ^_^", + "try harder, silly! hee! ^&^", + ), + ), + ( + r'(.*) (like|love|watch) anime', + ( + "omg i love anime!! do u like sailor moon??! ^&^", + "anime yay! anime rocks sooooo much!", + "oooh anime! i love anime more than anything!", + "anime is the bestest evar! evangelion is the best!", + "hee anime is the best! do you have ur fav??", + ), + ), + ( + r'I (like|love|watch|play) (.*)', + ("yay! %2 rocks!", "yay! %2 is neat!", "cool! do u like other stuff?? ^_^"), + ), + ( + r'anime sucks|(.*) (hate|detest) anime', + ( + "ur a liar! i'm not gonna talk to u nemore if u h8 anime *;*", + "no way! anime is the best ever!", + "nuh-uh, anime is the best!", + ), + ), + ( + r'(are|r) (you|u) (.*)', + ("am i%1??! how come u ask that!", "maybe! y shud i tell u?? kekeke >_>"), + ), + ( + r'what (.*)', + ("hee u think im gonna tell u? .v.", "booooooooring! ask me somethin else!"), + ), + (r'how (.*)', ("not tellin!! kekekekekeke ^_^",)), + (r'(hi|hello|hey) (.*)', ("hi!!! how r u!!",)), + ( + r'quit', + ( + "mom says i have to go eat dinner now :,( bye!!", + "awww u have to go?? see u next time!!", + "how to see u again soon! ^_^", + ), + ), + ( + r'(.*)', + ( + "ur funny! kekeke", + "boooooring! talk about something else! tell me wat u like!", + "do u like anime??", + "do u watch anime? i like sailor moon! ^_^", + "i wish i was a kitty!! kekekeke ^_^", + ), + ), +) + +iesha_chatbot = Chat(pairs, reflections) + + +def iesha_chat(): + print("Iesha the TeenBoT\n---------") + print("Talk to the program by typing in plain English, using normal upper-") + print('and lower-case letters and punctuation. Enter "quit" when done.') + print('=' * 72) + print("hi!! i'm iesha! who r u??!") + + iesha_chatbot.converse() + + +def demo(): + iesha_chat() + + +if __name__ == "__main__": + demo() diff --git a/venv.bak/lib/python3.7/site-packages/nltk/chat/rude.py b/venv.bak/lib/python3.7/site-packages/nltk/chat/rude.py new file mode 100644 index 0000000..c9c9de8 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/chat/rude.py @@ -0,0 +1,126 @@ +# Natural Language Toolkit: Rude Chatbot +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Peter Spiller +# URL: +# For license information, see LICENSE.TXT +from __future__ import print_function + +from nltk.chat.util import Chat, reflections + +pairs = ( + ( + r'We (.*)', + ( + "What do you mean, 'we'?", + "Don't include me in that!", + "I wouldn't be so sure about that.", + ), + ), + ( + r'You should (.*)', + ("Don't tell me what to do, buddy.", "Really? I should, should I?"), + ), + ( + r'You\'re(.*)', + ( + "More like YOU'RE %1!", + "Hah! Look who's talking.", + "Come over here and tell me I'm %1.", + ), + ), + ( + r'You are(.*)', + ( + "More like YOU'RE %1!", + "Hah! Look who's talking.", + "Come over here and tell me I'm %1.", + ), + ), + ( + r'I can\'t(.*)', + ( + "You do sound like the type who can't %1.", + "Hear that splashing sound? That's my heart bleeding for you.", + "Tell somebody who might actually care.", + ), + ), + ( + r'I think (.*)', + ( + "I wouldn't think too hard if I were you.", + "You actually think? I'd never have guessed...", + ), + ), + ( + r'I (.*)', + ( + "I'm getting a bit tired of hearing about you.", + "How about we talk about me instead?", + "Me, me, me... Frankly, I don't care.", + ), + ), + ( + r'How (.*)', + ( + "How do you think?", + "Take a wild guess.", + "I'm not even going to dignify that with an answer.", + ), + ), + (r'What (.*)', ("Do I look like an encyclopedia?", "Figure it out yourself.")), + ( + r'Why (.*)', + ( + "Why not?", + "That's so obvious I thought even you'd have already figured it out.", + ), + ), + ( + r'(.*)shut up(.*)', + ( + "Make me.", + "Getting angry at a feeble NLP assignment? Somebody's losing it.", + "Say that again, I dare you.", + ), + ), + ( + r'Shut up(.*)', + ( + "Make me.", + "Getting angry at a feeble NLP assignment? Somebody's losing it.", + "Say that again, I dare you.", + ), + ), + ( + r'Hello(.*)', + ("Oh good, somebody else to talk to. Joy.", "'Hello'? How original..."), + ), + ( + r'(.*)', + ( + "I'm getting bored here. Become more interesting.", + "Either become more thrilling or get lost, buddy.", + "Change the subject before I die of fatal boredom.", + ), + ), +) + +rude_chatbot = Chat(pairs, reflections) + + +def rude_chat(): + print("Talk to the program by typing in plain English, using normal upper-") + print('and lower-case letters and punctuation. Enter "quit" when done.') + print('=' * 72) + print("I suppose I should say hello.") + + rude_chatbot.converse() + + +def demo(): + rude_chat() + + +if __name__ == "__main__": + demo() diff --git a/venv.bak/lib/python3.7/site-packages/nltk/chat/suntsu.py b/venv.bak/lib/python3.7/site-packages/nltk/chat/suntsu.py new file mode 100644 index 0000000..9f6dc34 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/chat/suntsu.py @@ -0,0 +1,141 @@ +# Natural Language Toolkit: Sun Tsu-Bot +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Sam Huston 2007 +# URL: +# For license information, see LICENSE.TXT + +""" +Tsu bot responds to all queries with a Sun Tsu sayings + +Quoted from Sun Tsu's The Art of War +Translated by LIONEL GILES, M.A. 1910 +Hosted by the Gutenberg Project +http://www.gutenberg.org/ +""" +from __future__ import print_function + +from nltk.chat.util import Chat, reflections + +pairs = ( + (r'quit', ("Good-bye.", "Plan well", "May victory be your future")), + ( + r'[^\?]*\?', + ( + "Please consider whether you can answer your own question.", + "Ask me no questions!", + ), + ), + ( + r'[0-9]+(.*)', + ( + "It is the rule in war, if our forces are ten to the enemy's one, to surround him; if five to one, to attack him; if twice as numerous, to divide our army into two.", + "There are five essentials for victory", + ), + ), + ( + r'[A-Ca-c](.*)', + ( + "The art of war is of vital importance to the State.", + "All warfare is based on deception.", + "If your opponent is secure at all points, be prepared for him. If he is in superior strength, evade him.", + "If the campaign is protracted, the resources of the State will not be equal to the strain.", + "Attack him where he is unprepared, appear where you are not expected.", + "There is no instance of a country having benefited from prolonged warfare.", + ), + ), + ( + r'[D-Fd-f](.*)', + ( + "The skillful soldier does not raise a second levy, neither are his supply-wagons loaded more than twice.", + "Bring war material with you from home, but forage on the enemy.", + "In war, then, let your great object be victory, not lengthy campaigns.", + "To fight and conquer in all your battles is not supreme excellence; supreme excellence consists in breaking the enemy's resistance without fighting.", + ), + ), + ( + r'[G-Ig-i](.*)', + ( + "Heaven signifies night and day, cold and heat, times and seasons.", + "It is the rule in war, if our forces are ten to the enemy's one, to surround him; if five to one, to attack him; if twice as numerous, to divide our army into two.", + "The good fighters of old first put themselves beyond the possibility of defeat, and then waited for an opportunity of defeating the enemy.", + "One may know how to conquer without being able to do it.", + ), + ), + ( + r'[J-Lj-l](.*)', + ( + "There are three ways in which a ruler can bring misfortune upon his army.", + "By commanding the army to advance or to retreat, being ignorant of the fact that it cannot obey. This is called hobbling the army.", + "By attempting to govern an army in the same way as he administers a kingdom, being ignorant of the conditions which obtain in an army. This causes restlessness in the soldier's minds.", + "By employing the officers of his army without discrimination, through ignorance of the military principle of adaptation to circumstances. This shakes the confidence of the soldiers.", + "There are five essentials for victory", + "He will win who knows when to fight and when not to fight.", + "He will win who knows how to handle both superior and inferior forces.", + "He will win whose army is animated by the same spirit throughout all its ranks.", + "He will win who, prepared himself, waits to take the enemy unprepared.", + "He will win who has military capacity and is not interfered with by the sovereign.", + ), + ), + ( + r'[M-Om-o](.*)', + ( + "If you know the enemy and know yourself, you need not fear the result of a hundred battles.", + "If you know yourself but not the enemy, for every victory gained you will also suffer a defeat.", + "If you know neither the enemy nor yourself, you will succumb in every battle.", + "The control of a large force is the same principle as the control of a few men: it is merely a question of dividing up their numbers.", + ), + ), + ( + r'[P-Rp-r](.*)', + ( + "Security against defeat implies defensive tactics; ability to defeat the enemy means taking the offensive.", + "Standing on the defensive indicates insufficient strength; attacking, a superabundance of strength.", + "He wins his battles by making no mistakes. Making no mistakes is what establishes the certainty of victory, for it means conquering an enemy that is already defeated.", + "A victorious army opposed to a routed one, is as a pound's weight placed in the scale against a single grain.", + "The onrush of a conquering force is like the bursting of pent-up waters into a chasm a thousand fathoms deep.", + ), + ), + ( + r'[S-Us-u](.*)', + ( + "What the ancients called a clever fighter is one who not only wins, but excels in winning with ease.", + "Hence his victories bring him neither reputation for wisdom nor credit for courage.", + "Hence the skillful fighter puts himself into a position which makes defeat impossible, and does not miss the moment for defeating the enemy.", + "In war the victorious strategist only seeks battle after the victory has been won, whereas he who is destined to defeat first fights and afterwards looks for victory.", + "There are not more than five musical notes, yet the combinations of these five give rise to more melodies than can ever be heard.", + "Appear at points which the enemy must hasten to defend; march swiftly to places where you are not expected.", + ), + ), + ( + r'[V-Zv-z](.*)', + ( + "It is a matter of life and death, a road either to safety or to ruin.", + "Hold out baits to entice the enemy. Feign disorder, and crush him.", + "All men can see the tactics whereby I conquer, but what none can see is the strategy out of which victory is evolved.", + "Do not repeat the tactics which have gained you one victory, but let your methods be regulated by the infinite variety of circumstances.", + "So in war, the way is to avoid what is strong and to strike at what is weak.", + "Just as water retains no constant shape, so in warfare there are no constant conditions.", + ), + ), + (r'(.*)', ("Your statement insults me.", "")), +) + +suntsu_chatbot = Chat(pairs, reflections) + + +def suntsu_chat(): + print("Talk to the program by typing in plain English, using normal upper-") + print('and lower-case letters and punctuation. Enter "quit" when done.') + print('=' * 72) + print("You seek enlightenment?") + + suntsu_chatbot.converse() + + +def demo(): + suntsu_chat() + + +if __name__ == "__main__": + demo() diff --git a/venv.bak/lib/python3.7/site-packages/nltk/chat/util.py b/venv.bak/lib/python3.7/site-packages/nltk/chat/util.py new file mode 100644 index 0000000..f2dd361 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/chat/util.py @@ -0,0 +1,128 @@ +# Natural Language Toolkit: Chatbot Utilities +# +# Copyright (C) 2001-2019 NLTK Project +# Authors: Steven Bird +# URL: +# For license information, see LICENSE.TXT + +# Based on an Eliza implementation by Joe Strout , +# Jeff Epler and Jez Higgins . +from __future__ import print_function + +import re +import random + +from six.moves import input + + +reflections = { + "i am": "you are", + "i was": "you were", + "i": "you", + "i'm": "you are", + "i'd": "you would", + "i've": "you have", + "i'll": "you will", + "my": "your", + "you are": "I am", + "you were": "I was", + "you've": "I have", + "you'll": "I will", + "your": "my", + "yours": "mine", + "you": "me", + "me": "you", +} + + +class Chat(object): + def __init__(self, pairs, reflections={}): + """ + Initialize the chatbot. Pairs is a list of patterns and responses. Each + pattern is a regular expression matching the user's statement or question, + e.g. r'I like (.*)'. For each such pattern a list of possible responses + is given, e.g. ['Why do you like %1', 'Did you ever dislike %1']. Material + which is matched by parenthesized sections of the patterns (e.g. .*) is mapped to + the numbered positions in the responses, e.g. %1. + + :type pairs: list of tuple + :param pairs: The patterns and responses + :type reflections: dict + :param reflections: A mapping between first and second person expressions + :rtype: None + """ + + self._pairs = [(re.compile(x, re.IGNORECASE), y) for (x, y) in pairs] + self._reflections = reflections + self._regex = self._compile_reflections() + + def _compile_reflections(self): + sorted_refl = sorted(self._reflections.keys(), key=len, reverse=True) + return re.compile( + r"\b({0})\b".format("|".join(map(re.escape, sorted_refl))), re.IGNORECASE + ) + + def _substitute(self, str): + """ + Substitute words in the string, according to the specified reflections, + e.g. "I'm" -> "you are" + + :type str: str + :param str: The string to be mapped + :rtype: str + """ + + return self._regex.sub( + lambda mo: self._reflections[mo.string[mo.start() : mo.end()]], str.lower() + ) + + def _wildcards(self, response, match): + pos = response.find('%') + while pos >= 0: + num = int(response[pos + 1 : pos + 2]) + response = ( + response[:pos] + + self._substitute(match.group(num)) + + response[pos + 2 :] + ) + pos = response.find('%') + return response + + def respond(self, str): + """ + Generate a response to the user input. + + :type str: str + :param str: The string to be mapped + :rtype: str + """ + + # check each pattern + for (pattern, response) in self._pairs: + match = pattern.match(str) + + # did the pattern match? + if match: + resp = random.choice(response) # pick a random response + resp = self._wildcards(resp, match) # process wildcards + + # fix munged punctuation at the end + if resp[-2:] == '?.': + resp = resp[:-2] + '.' + if resp[-2:] == '??': + resp = resp[:-2] + '?' + return resp + + # Hold a conversation with a chatbot + def converse(self, quit="quit"): + user_input = "" + while user_input != quit: + user_input = quit + try: + user_input = input(">") + except EOFError: + print(user_input) + if user_input: + while user_input[-1] in "!.": + user_input = user_input[:-1] + print(self.respond(user_input)) diff --git a/venv.bak/lib/python3.7/site-packages/nltk/chat/zen.py b/venv.bak/lib/python3.7/site-packages/nltk/chat/zen.py new file mode 100644 index 0000000..d46a9f9 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/chat/zen.py @@ -0,0 +1,330 @@ +# Natural Language Toolkit: Zen Chatbot +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Amy Holland +# URL: +# For license information, see LICENSE.TXT + +""" +Zen Chatbot talks in gems of Zen wisdom. + +This is a sample conversation with Zen Chatbot: +ZC: Welcome, my child. +me: Good afternoon. +ZC: Ask the question you have come to ask. +me: How can I achieve enlightenment? +ZC: How do you suppose? +me: Through meditation. +ZC: Form is emptiness, and emptiness form. +me: How can I empty my mind of worldly troubles? +ZC: Will an answer to that really help in your search for enlightenment? +me: Yes. +ZC: It is better to be right than to be certain. +me: I seek truth and wisdom. +ZC: The search for truth is a long journey. +me: Are you sure? +ZC: Maybe sure, maybe not sure. + + +The chatbot structure is based on that of chat.eliza. Thus, it uses +a translation table to convert from question to response +i.e. "I am" --> "you are" + +Of course, since Zen Chatbot does not understand the meaning of any words, +responses are very limited. Zen Chatbot will usually answer very vaguely, or +respond to a question by asking a different question, in much the same way +as Eliza. +""" +from __future__ import print_function + +from nltk.chat.util import Chat, reflections + +# responses are matched top to bottom, so non-specific matches occur later +# for each match, a list of possible responses is provided +responses = ( + # Zen Chatbot opens with the line "Welcome, my child." The usual + # response will be a greeting problem: 'good' matches "good morning", + # "good day" etc, but also "good grief!" and other sentences starting + # with the word 'good' that may not be a greeting + ( + r'(hello(.*))|(good [a-zA-Z]+)', + ( + "The path to enlightenment is often difficult to see.", + "Greetings. I sense your mind is troubled. Tell me of your troubles.", + "Ask the question you have come to ask.", + "Hello. Do you seek englightenment?", + ), + ), + # "I need" and "I want" can be followed by a thing (eg 'help') + # or an action (eg 'to see you') + # + # This is a problem with this style of response - + # person: "I need you" + # chatbot: "me can be achieved by hard work and dedication of the mind" + # i.e. 'you' is not really a thing that can be mapped this way, so this + # interpretation only makes sense for some inputs + # + ( + r'i need (.*)', + ( + "%1 can be achieved by hard work and dedication of the mind.", + "%1 is not a need, but a desire of the mind. Clear your mind of such concerns.", + "Focus your mind on%1, and you will find what you need.", + ), + ), + ( + r'i want (.*)', + ( + "Desires of the heart will distract you from the path to enlightenment.", + "Will%1 help you attain enlightenment?", + "Is%1 a desire of the mind, or of the heart?", + ), + ), + # why questions are separated into three types: + # "why..I" e.g. "why am I here?" "Why do I like cake?" + # "why..you" e.g. "why are you here?" "Why won't you tell me?" + # "why..." e.g. "Why is the sky blue?" + # problems: + # person: "Why can't you tell me?" + # chatbot: "Are you sure I tell you?" + # - this style works for positives (e.g. "why do you like cake?") + # but does not work for negatives (e.g. "why don't you like cake?") + (r'why (.*) i (.*)\?', ("You%1%2?", "Perhaps you only think you%1%2")), + (r'why (.*) you(.*)\?', ("Why%1 you%2?", "%2 I%1", "Are you sure I%2?")), + (r'why (.*)\?', ("I cannot tell you why%1.", "Why do you think %1?")), + # e.g. "are you listening?", "are you a duck" + ( + r'are you (.*)\?', + ("Maybe%1, maybe not%1.", "Whether I am%1 or not is God's business."), + ), + # e.g. "am I a duck?", "am I going to die?" + ( + r'am i (.*)\?', + ("Perhaps%1, perhaps not%1.", "Whether you are%1 or not is not for me to say."), + ), + # what questions, e.g. "what time is it?" + # problems: + # person: "What do you want?" + # chatbot: "Seek truth, not what do me want." + (r'what (.*)\?', ("Seek truth, not what%1.", "What%1 should not concern you.")), + # how questions, e.g. "how do you do?" + ( + r'how (.*)\?', + ( + "How do you suppose?", + "Will an answer to that really help in your search for enlightenment?", + "Ask yourself not how, but why.", + ), + ), + # can questions, e.g. "can you run?", "can you come over here please?" + ( + r'can you (.*)\?', + ( + "I probably can, but I may not.", + "Maybe I can%1, and maybe I cannot.", + "I can do all, and I can do nothing.", + ), + ), + # can questions, e.g. "can I have some cake?", "can I know truth?" + ( + r'can i (.*)\?', + ( + "You can%1 if you believe you can%1, and have a pure spirit.", + "Seek truth and you will know if you can%1.", + ), + ), + # e.g. "It is raining" - implies the speaker is certain of a fact + ( + r'it is (.*)', + ( + "How can you be certain that%1, when you do not even know yourself?", + "Whether it is%1 or not does not change the way the world is.", + ), + ), + # e.g. "is there a doctor in the house?" + ( + r'is there (.*)\?', + ("There is%1 if you believe there is.", "It is possible that there is%1."), + ), + # e.g. "is it possible?", "is this true?" + (r'is(.*)\?', ("%1 is not relevant.", "Does this matter?")), + # non-specific question + ( + r'(.*)\?', + ( + "Do you think %1?", + "You seek the truth. Does the truth seek you?", + "If you intentionally pursue the answers to your questions, the answers become hard to see.", + "The answer to your question cannot be told. It must be experienced.", + ), + ), + # expression of hate of form "I hate you" or "Kelly hates cheese" + ( + r'(.*) (hate[s]?)|(dislike[s]?)|(don\'t like)(.*)', + ( + "Perhaps it is not about hating %2, but about hate from within.", + "Weeds only grow when we dislike them", + "Hate is a very strong emotion.", + ), + ), + # statement containing the word 'truth' + ( + r'(.*) truth(.*)', + ( + "Seek truth, and truth will seek you.", + "Remember, it is not the spoon which bends - only yourself.", + "The search for truth is a long journey.", + ), + ), + # desire to do an action + # e.g. "I want to go shopping" + ( + r'i want to (.*)', + ("You may %1 if your heart truly desires to.", "You may have to %1."), + ), + # desire for an object + # e.g. "I want a pony" + ( + r'i want (.*)', + ( + "Does your heart truly desire %1?", + "Is this a desire of the heart, or of the mind?", + ), + ), + # e.g. "I can't wait" or "I can't do this" + ( + r'i can\'t (.*)', + ( + "What we can and can't do is a limitation of the mind.", + "There are limitations of the body, and limitations of the mind.", + "Have you tried to%1 with a clear mind?", + ), + ), + # "I think.." indicates uncertainty. e.g. "I think so." + # problem: exceptions... + # e.g. "I think, therefore I am" + ( + r'i think (.*)', + ( + "Uncertainty in an uncertain world.", + "Indeed, how can we be certain of anything in such uncertain times.", + "Are you not, in fact, certain that%1?", + ), + ), + # "I feel...emotions/sick/light-headed..." + ( + r'i feel (.*)', + ( + "Your body and your emotions are both symptoms of your mind." + "What do you believe is the root of such feelings?", + "Feeling%1 can be a sign of your state-of-mind.", + ), + ), + # exclaimation mark indicating emotion + # e.g. "Wow!" or "No!" + ( + r'(.*)!', + ( + "I sense that you are feeling emotional today.", + "You need to calm your emotions.", + ), + ), + # because [statement] + # e.g. "because I said so" + ( + r'because (.*)', + ( + "Does knowning the reasons behind things help you to understand" + " the things themselves?", + "If%1, what else must be true?", + ), + ), + # yes or no - raise an issue of certainty/correctness + ( + r'(yes)|(no)', + ( + "Is there certainty in an uncertain world?", + "It is better to be right than to be certain.", + ), + ), + # sentence containing word 'love' + ( + r'(.*)love(.*)', + ( + "Think of the trees: they let the birds perch and fly with no intention to call them when they come, and no longing for their return when they fly away. Let your heart be like the trees.", + "Free love!", + ), + ), + # sentence containing word 'understand' - r + ( + r'(.*)understand(.*)', + ( + "If you understand, things are just as they are;" + " if you do not understand, things are just as they are.", + "Imagination is more important than knowledge.", + ), + ), + # 'I', 'me', 'my' - person is talking about themself. + # this breaks down when words contain these - eg 'Thyme', 'Irish' + ( + r'(.*)(me )|( me)|(my)|(mine)|(i)(.*)', + ( + "'I', 'me', 'my'... these are selfish expressions.", + "Have you ever considered that you might be a selfish person?", + "Try to consider others, not just yourself.", + "Think not just of yourself, but of others.", + ), + ), + # 'you' starting a sentence + # e.g. "you stink!" + ( + r'you (.*)', + ("My path is not of conern to you.", "I am but one, and you but one more."), + ), + # say goodbye with some extra Zen wisdom. + ( + r'exit', + ( + "Farewell. The obstacle is the path.", + "Farewell. Life is a journey, not a destination.", + "Good bye. We are cups, constantly and quietly being filled." + "\nThe trick is knowning how to tip ourselves over and let the beautiful stuff out.", + ), + ), + # fall through case - + # when stumped, respond with generic zen wisdom + # + ( + r'(.*)', + ( + "When you're enlightened, every word is wisdom.", + "Random talk is useless.", + "The reverse side also has a reverse side.", + "Form is emptiness, and emptiness is form.", + "I pour out a cup of water. Is the cup empty?", + ), + ), +) + +zen_chatbot = Chat(responses, reflections) + + +def zen_chat(): + print('*' * 75) + print("Zen Chatbot!".center(75)) + print('*' * 75) + print('"Look beyond mere words and letters - look into your mind"'.center(75)) + print("* Talk your way to truth with Zen Chatbot.") + print("* Type 'quit' when you have had enough.") + print('*' * 75) + print("Welcome, my child.") + + zen_chatbot.converse() + + +def demo(): + zen_chat() + + +if __name__ == "__main__": + demo() diff --git a/venv.bak/lib/python3.7/site-packages/nltk/chunk/__init__.py b/venv.bak/lib/python3.7/site-packages/nltk/chunk/__init__.py new file mode 100644 index 0000000..f4b107c --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/chunk/__init__.py @@ -0,0 +1,199 @@ +# Natural Language Toolkit: Chunkers +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Steven Bird +# Edward Loper +# URL: +# For license information, see LICENSE.TXT +# + +""" +Classes and interfaces for identifying non-overlapping linguistic +groups (such as base noun phrases) in unrestricted text. This task is +called "chunk parsing" or "chunking", and the identified groups are +called "chunks". The chunked text is represented using a shallow +tree called a "chunk structure." A chunk structure is a tree +containing tokens and chunks, where each chunk is a subtree containing +only tokens. For example, the chunk structure for base noun phrase +chunks in the sentence "I saw the big dog on the hill" is:: + + (SENTENCE: + (NP: ) + + (NP: ) + + (NP: )) + +To convert a chunk structure back to a list of tokens, simply use the +chunk structure's ``leaves()`` method. + +This module defines ``ChunkParserI``, a standard interface for +chunking texts; and ``RegexpChunkParser``, a regular-expression based +implementation of that interface. It also defines ``ChunkScore``, a +utility class for scoring chunk parsers. + +RegexpChunkParser +================= + +``RegexpChunkParser`` is an implementation of the chunk parser interface +that uses regular-expressions over tags to chunk a text. Its +``parse()`` method first constructs a ``ChunkString``, which encodes a +particular chunking of the input text. Initially, nothing is +chunked. ``parse.RegexpChunkParser`` then applies a sequence of +``RegexpChunkRule`` rules to the ``ChunkString``, each of which modifies +the chunking that it encodes. Finally, the ``ChunkString`` is +transformed back into a chunk structure, which is returned. + +``RegexpChunkParser`` can only be used to chunk a single kind of phrase. +For example, you can use an ``RegexpChunkParser`` to chunk the noun +phrases in a text, or the verb phrases in a text; but you can not +use it to simultaneously chunk both noun phrases and verb phrases in +the same text. (This is a limitation of ``RegexpChunkParser``, not of +chunk parsers in general.) + +RegexpChunkRules +---------------- + +A ``RegexpChunkRule`` is a transformational rule that updates the +chunking of a text by modifying its ``ChunkString``. Each +``RegexpChunkRule`` defines the ``apply()`` method, which modifies +the chunking encoded by a ``ChunkString``. The +``RegexpChunkRule`` class itself can be used to implement any +transformational rule based on regular expressions. There are +also a number of subclasses, which can be used to implement +simpler types of rules: + + - ``ChunkRule`` chunks anything that matches a given regular + expression. + - ``ChinkRule`` chinks anything that matches a given regular + expression. + - ``UnChunkRule`` will un-chunk any chunk that matches a given + regular expression. + - ``MergeRule`` can be used to merge two contiguous chunks. + - ``SplitRule`` can be used to split a single chunk into two + smaller chunks. + - ``ExpandLeftRule`` will expand a chunk to incorporate new + unchunked material on the left. + - ``ExpandRightRule`` will expand a chunk to incorporate new + unchunked material on the right. + +Tag Patterns +~~~~~~~~~~~~ + +A ``RegexpChunkRule`` uses a modified version of regular +expression patterns, called "tag patterns". Tag patterns are +used to match sequences of tags. Examples of tag patterns are:: + + r'(
    ||)+' + r'+' + r'' + +The differences between regular expression patterns and tag +patterns are: + + - In tag patterns, ``'<'`` and ``'>'`` act as parentheses; so + ``'+'`` matches one or more repetitions of ``''``, not + ``''``. + - Whitespace in tag patterns is ignored. So + ``'
    | '`` is equivalant to ``'
    |'`` + - In tag patterns, ``'.'`` is equivalant to ``'[^{}<>]'``; so + ``''`` matches any single tag starting with ``'NN'``. + +The function ``tag_pattern2re_pattern`` can be used to transform +a tag pattern to an equivalent regular expression pattern. + +Efficiency +---------- + +Preliminary tests indicate that ``RegexpChunkParser`` can chunk at a +rate of about 300 tokens/second, with a moderately complex rule set. + +There may be problems if ``RegexpChunkParser`` is used with more than +5,000 tokens at a time. In particular, evaluation of some regular +expressions may cause the Python regular expression engine to +exceed its maximum recursion depth. We have attempted to minimize +these problems, but it is impossible to avoid them completely. We +therefore recommend that you apply the chunk parser to a single +sentence at a time. + +Emacs Tip +--------- + +If you evaluate the following elisp expression in emacs, it will +colorize a ``ChunkString`` when you use an interactive python shell +with emacs or xemacs ("C-c !"):: + + (let () + (defconst comint-mode-font-lock-keywords + '(("<[^>]+>" 0 'font-lock-reference-face) + ("[{}]" 0 'font-lock-function-name-face))) + (add-hook 'comint-mode-hook (lambda () (turn-on-font-lock)))) + +You can evaluate this code by copying it to a temporary buffer, +placing the cursor after the last close parenthesis, and typing +"``C-x C-e``". You should evaluate it before running the interactive +session. The change will last until you close emacs. + +Unresolved Issues +----------------- + +If we use the ``re`` module for regular expressions, Python's +regular expression engine generates "maximum recursion depth +exceeded" errors when processing very large texts, even for +regular expressions that should not require any recursion. We +therefore use the ``pre`` module instead. But note that ``pre`` +does not include Unicode support, so this module will not work +with unicode strings. Note also that ``pre`` regular expressions +are not quite as advanced as ``re`` ones (e.g., no leftward +zero-length assertions). + +:type CHUNK_TAG_PATTERN: regexp +:var CHUNK_TAG_PATTERN: A regular expression to test whether a tag + pattern is valid. +""" + +from nltk.data import load + +from nltk.chunk.api import ChunkParserI +from nltk.chunk.util import ( + ChunkScore, + accuracy, + tagstr2tree, + conllstr2tree, + conlltags2tree, + tree2conlltags, + tree2conllstr, + tree2conlltags, + ieerstr2tree, +) +from nltk.chunk.regexp import RegexpChunkParser, RegexpParser + +# Standard treebank POS tagger +_BINARY_NE_CHUNKER = 'chunkers/maxent_ne_chunker/english_ace_binary.pickle' +_MULTICLASS_NE_CHUNKER = 'chunkers/maxent_ne_chunker/english_ace_multiclass.pickle' + + +def ne_chunk(tagged_tokens, binary=False): + """ + Use NLTK's currently recommended named entity chunker to + chunk the given list of tagged tokens. + """ + if binary: + chunker_pickle = _BINARY_NE_CHUNKER + else: + chunker_pickle = _MULTICLASS_NE_CHUNKER + chunker = load(chunker_pickle) + return chunker.parse(tagged_tokens) + + +def ne_chunk_sents(tagged_sentences, binary=False): + """ + Use NLTK's currently recommended named entity chunker to chunk the + given list of tagged sentences, each consisting of a list of tagged tokens. + """ + if binary: + chunker_pickle = _BINARY_NE_CHUNKER + else: + chunker_pickle = _MULTICLASS_NE_CHUNKER + chunker = load(chunker_pickle) + return chunker.parse_sents(tagged_sentences) diff --git a/venv.bak/lib/python3.7/site-packages/nltk/chunk/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/chunk/__pycache__/__init__.cpython-37.pyc new file mode 100644 index 0000000..e9198cf Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/chunk/__pycache__/__init__.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/chunk/__pycache__/api.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/chunk/__pycache__/api.cpython-37.pyc new file mode 100644 index 0000000..56ddba7 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/chunk/__pycache__/api.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/chunk/__pycache__/named_entity.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/chunk/__pycache__/named_entity.cpython-37.pyc new file mode 100644 index 0000000..b45397e Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/chunk/__pycache__/named_entity.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/chunk/__pycache__/regexp.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/chunk/__pycache__/regexp.cpython-37.pyc new file mode 100644 index 0000000..1ba7768 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/chunk/__pycache__/regexp.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/chunk/__pycache__/util.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/chunk/__pycache__/util.cpython-37.pyc new file mode 100644 index 0000000..f503788 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/chunk/__pycache__/util.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/chunk/api.py b/venv.bak/lib/python3.7/site-packages/nltk/chunk/api.py new file mode 100644 index 0000000..1454825 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/chunk/api.py @@ -0,0 +1,52 @@ +# Natural Language Toolkit: Chunk parsing API +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Edward Loper +# Steven Bird (minor additions) +# URL: +# For license information, see LICENSE.TXT + +##////////////////////////////////////////////////////// +## Chunk Parser Interface +##////////////////////////////////////////////////////// + +from nltk.parse import ParserI + +from nltk.chunk.util import ChunkScore + + +class ChunkParserI(ParserI): + """ + A processing interface for identifying non-overlapping groups in + unrestricted text. Typically, chunk parsers are used to find base + syntactic constituents, such as base noun phrases. Unlike + ``ParserI``, ``ChunkParserI`` guarantees that the ``parse()`` method + will always generate a parse. + """ + + def parse(self, tokens): + """ + Return the best chunk structure for the given tokens + and return a tree. + + :param tokens: The list of (word, tag) tokens to be chunked. + :type tokens: list(tuple) + :rtype: Tree + """ + raise NotImplementedError() + + def evaluate(self, gold): + """ + Score the accuracy of the chunker against the gold standard. + Remove the chunking the gold standard text, rechunk it using + the chunker, and return a ``ChunkScore`` object + reflecting the performance of this chunk peraser. + + :type gold: list(Tree) + :param gold: The list of chunked sentences to score the chunker on. + :rtype: ChunkScore + """ + chunkscore = ChunkScore() + for correct in gold: + chunkscore.score(correct, self.parse(correct.leaves())) + return chunkscore diff --git a/venv.bak/lib/python3.7/site-packages/nltk/chunk/named_entity.py b/venv.bak/lib/python3.7/site-packages/nltk/chunk/named_entity.py new file mode 100644 index 0000000..07d3067 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/chunk/named_entity.py @@ -0,0 +1,354 @@ +# Natural Language Toolkit: Chunk parsing API +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Edward Loper +# URL: +# For license information, see LICENSE.TXT + +""" +Named entity chunker +""" +from __future__ import print_function +from __future__ import unicode_literals + +import os, re, pickle +from xml.etree import ElementTree as ET + +from nltk.tag import ClassifierBasedTagger, pos_tag + +try: + from nltk.classify import MaxentClassifier +except ImportError: + pass + +from nltk.tree import Tree +from nltk.tokenize import word_tokenize +from nltk.data import find + +from nltk.chunk.api import ChunkParserI +from nltk.chunk.util import ChunkScore + + +class NEChunkParserTagger(ClassifierBasedTagger): + """ + The IOB tagger used by the chunk parser. + """ + + def __init__(self, train): + ClassifierBasedTagger.__init__( + self, train=train, classifier_builder=self._classifier_builder + ) + + def _classifier_builder(self, train): + return MaxentClassifier.train( + train, algorithm='megam', gaussian_prior_sigma=1, trace=2 + ) + + def _english_wordlist(self): + try: + wl = self._en_wordlist + except AttributeError: + from nltk.corpus import words + + self._en_wordlist = set(words.words('en-basic')) + wl = self._en_wordlist + return wl + + def _feature_detector(self, tokens, index, history): + word = tokens[index][0] + pos = simplify_pos(tokens[index][1]) + if index == 0: + prevword = prevprevword = None + prevpos = prevprevpos = None + prevshape = prevtag = prevprevtag = None + elif index == 1: + prevword = tokens[index - 1][0].lower() + prevprevword = None + prevpos = simplify_pos(tokens[index - 1][1]) + prevprevpos = None + prevtag = history[index - 1][0] + prevshape = prevprevtag = None + else: + prevword = tokens[index - 1][0].lower() + prevprevword = tokens[index - 2][0].lower() + prevpos = simplify_pos(tokens[index - 1][1]) + prevprevpos = simplify_pos(tokens[index - 2][1]) + prevtag = history[index - 1] + prevprevtag = history[index - 2] + prevshape = shape(prevword) + if index == len(tokens) - 1: + nextword = nextnextword = None + nextpos = nextnextpos = None + elif index == len(tokens) - 2: + nextword = tokens[index + 1][0].lower() + nextpos = tokens[index + 1][1].lower() + nextnextword = None + nextnextpos = None + else: + nextword = tokens[index + 1][0].lower() + nextpos = tokens[index + 1][1].lower() + nextnextword = tokens[index + 2][0].lower() + nextnextpos = tokens[index + 2][1].lower() + + # 89.6 + features = { + 'bias': True, + 'shape': shape(word), + 'wordlen': len(word), + 'prefix3': word[:3].lower(), + 'suffix3': word[-3:].lower(), + 'pos': pos, + 'word': word, + 'en-wordlist': (word in self._english_wordlist()), + 'prevtag': prevtag, + 'prevpos': prevpos, + 'nextpos': nextpos, + 'prevword': prevword, + 'nextword': nextword, + 'word+nextpos': '{0}+{1}'.format(word.lower(), nextpos), + 'pos+prevtag': '{0}+{1}'.format(pos, prevtag), + 'shape+prevtag': '{0}+{1}'.format(prevshape, prevtag), + } + + return features + + +class NEChunkParser(ChunkParserI): + """ + Expected input: list of pos-tagged words + """ + + def __init__(self, train): + self._train(train) + + def parse(self, tokens): + """ + Each token should be a pos-tagged word + """ + tagged = self._tagger.tag(tokens) + tree = self._tagged_to_parse(tagged) + return tree + + def _train(self, corpus): + # Convert to tagged sequence + corpus = [self._parse_to_tagged(s) for s in corpus] + + self._tagger = NEChunkParserTagger(train=corpus) + + def _tagged_to_parse(self, tagged_tokens): + """ + Convert a list of tagged tokens to a chunk-parse tree. + """ + sent = Tree('S', []) + + for (tok, tag) in tagged_tokens: + if tag == 'O': + sent.append(tok) + elif tag.startswith('B-'): + sent.append(Tree(tag[2:], [tok])) + elif tag.startswith('I-'): + if sent and isinstance(sent[-1], Tree) and sent[-1].label() == tag[2:]: + sent[-1].append(tok) + else: + sent.append(Tree(tag[2:], [tok])) + return sent + + @staticmethod + def _parse_to_tagged(sent): + """ + Convert a chunk-parse tree to a list of tagged tokens. + """ + toks = [] + for child in sent: + if isinstance(child, Tree): + if len(child) == 0: + print("Warning -- empty chunk in sentence") + continue + toks.append((child[0], 'B-{0}'.format(child.label()))) + for tok in child[1:]: + toks.append((tok, 'I-{0}'.format(child.label()))) + else: + toks.append((child, 'O')) + return toks + + +def shape(word): + if re.match('[0-9]+(\.[0-9]*)?|[0-9]*\.[0-9]+$', word, re.UNICODE): + return 'number' + elif re.match('\W+$', word, re.UNICODE): + return 'punct' + elif re.match('\w+$', word, re.UNICODE): + if word.istitle(): + return 'upcase' + elif word.islower(): + return 'downcase' + else: + return 'mixedcase' + else: + return 'other' + + +def simplify_pos(s): + if s.startswith('V'): + return "V" + else: + return s.split('-')[0] + + +def postag_tree(tree): + # Part-of-speech tagging. + words = tree.leaves() + tag_iter = (pos for (word, pos) in pos_tag(words)) + newtree = Tree('S', []) + for child in tree: + if isinstance(child, Tree): + newtree.append(Tree(child.label(), [])) + for subchild in child: + newtree[-1].append((subchild, next(tag_iter))) + else: + newtree.append((child, next(tag_iter))) + return newtree + + +def load_ace_data(roots, fmt='binary', skip_bnews=True): + for root in roots: + for root, dirs, files in os.walk(root): + if root.endswith('bnews') and skip_bnews: + continue + for f in files: + if f.endswith('.sgm'): + for sent in load_ace_file(os.path.join(root, f), fmt): + yield sent + + +def load_ace_file(textfile, fmt): + print(' - {0}'.format(os.path.split(textfile)[1])) + annfile = textfile + '.tmx.rdc.xml' + + # Read the xml file, and get a list of entities + entities = [] + with open(annfile, 'r') as infile: + xml = ET.parse(infile).getroot() + for entity in xml.findall('document/entity'): + typ = entity.find('entity_type').text + for mention in entity.findall('entity_mention'): + if mention.get('TYPE') != 'NAME': + continue # only NEs + s = int(mention.find('head/charseq/start').text) + e = int(mention.find('head/charseq/end').text) + 1 + entities.append((s, e, typ)) + + # Read the text file, and mark the entities. + with open(textfile, 'r') as infile: + text = infile.read() + + # Strip XML tags, since they don't count towards the indices + text = re.sub('<(?!/?TEXT)[^>]+>', '', text) + + # Blank out anything before/after + def subfunc(m): + return ' ' * (m.end() - m.start() - 6) + + text = re.sub('[\s\S]*', subfunc, text) + text = re.sub('[\s\S]*', '', text) + + # Simplify quotes + text = re.sub("``", ' "', text) + text = re.sub("''", '" ', text) + + entity_types = set(typ for (s, e, typ) in entities) + + # Binary distinction (NE or not NE) + if fmt == 'binary': + i = 0 + toks = Tree('S', []) + for (s, e, typ) in sorted(entities): + if s < i: + s = i # Overlapping! Deal with this better? + if e <= s: + continue + toks.extend(word_tokenize(text[i:s])) + toks.append(Tree('NE', text[s:e].split())) + i = e + toks.extend(word_tokenize(text[i:])) + yield toks + + # Multiclass distinction (NE type) + elif fmt == 'multiclass': + i = 0 + toks = Tree('S', []) + for (s, e, typ) in sorted(entities): + if s < i: + s = i # Overlapping! Deal with this better? + if e <= s: + continue + toks.extend(word_tokenize(text[i:s])) + toks.append(Tree(typ, text[s:e].split())) + i = e + toks.extend(word_tokenize(text[i:])) + yield toks + + else: + raise ValueError('bad fmt value') + + +# This probably belongs in a more general-purpose location (as does +# the parse_to_tagged function). +def cmp_chunks(correct, guessed): + correct = NEChunkParser._parse_to_tagged(correct) + guessed = NEChunkParser._parse_to_tagged(guessed) + ellipsis = False + for (w, ct), (w, gt) in zip(correct, guessed): + if ct == gt == 'O': + if not ellipsis: + print(" {:15} {:15} {2}".format(ct, gt, w)) + print(' {:15} {:15} {2}'.format('...', '...', '...')) + ellipsis = True + else: + ellipsis = False + print(" {:15} {:15} {2}".format(ct, gt, w)) + + +def build_model(fmt='binary'): + print('Loading training data...') + train_paths = [ + find('corpora/ace_data/ace.dev'), + find('corpora/ace_data/ace.heldout'), + find('corpora/ace_data/bbn.dev'), + find('corpora/ace_data/muc.dev'), + ] + train_trees = load_ace_data(train_paths, fmt) + train_data = [postag_tree(t) for t in train_trees] + print('Training...') + cp = NEChunkParser(train_data) + del train_data + + print('Loading eval data...') + eval_paths = [find('corpora/ace_data/ace.eval')] + eval_trees = load_ace_data(eval_paths, fmt) + eval_data = [postag_tree(t) for t in eval_trees] + + print('Evaluating...') + chunkscore = ChunkScore() + for i, correct in enumerate(eval_data): + guess = cp.parse(correct.leaves()) + chunkscore.score(correct, guess) + if i < 3: + cmp_chunks(correct, guess) + print(chunkscore) + + outfilename = '/tmp/ne_chunker_{0}.pickle'.format(fmt) + print('Saving chunker to {0}...'.format(outfilename)) + + with open(outfilename, 'wb') as outfile: + pickle.dump(cp, outfile, -1) + + return cp + + +if __name__ == '__main__': + # Make sure that the pickled object has the right class name: + from nltk.chunk.named_entity import build_model + + build_model('binary') + build_model('multiclass') diff --git a/venv.bak/lib/python3.7/site-packages/nltk/chunk/regexp.py b/venv.bak/lib/python3.7/site-packages/nltk/chunk/regexp.py new file mode 100644 index 0000000..fe4ab5b --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/chunk/regexp.py @@ -0,0 +1,1488 @@ +# Natural Language Toolkit: Regular Expression Chunkers +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Edward Loper +# Steven Bird (minor additions) +# URL: +# For license information, see LICENSE.TXT +from __future__ import print_function, unicode_literals +from __future__ import division + +import re + +from six import string_types + +from nltk.tree import Tree +from nltk.chunk.api import ChunkParserI +from nltk.compat import python_2_unicode_compatible, unicode_repr + +##////////////////////////////////////////////////////// +## ChunkString +##////////////////////////////////////////////////////// + + +@python_2_unicode_compatible +class ChunkString(object): + """ + A string-based encoding of a particular chunking of a text. + Internally, the ``ChunkString`` class uses a single string to + encode the chunking of the input text. This string contains a + sequence of angle-bracket delimited tags, with chunking indicated + by braces. An example of this encoding is:: + + {
    }{
    }<.>{
    }<.> + + ``ChunkString`` are created from tagged texts (i.e., lists of + ``tokens`` whose type is ``TaggedType``). Initially, nothing is + chunked. + + The chunking of a ``ChunkString`` can be modified with the ``xform()`` + method, which uses a regular expression to transform the string + representation. These transformations should only add and remove + braces; they should *not* modify the sequence of angle-bracket + delimited tags. + + :type _str: str + :ivar _str: The internal string representation of the text's + encoding. This string representation contains a sequence of + angle-bracket delimited tags, with chunking indicated by + braces. An example of this encoding is:: + + {
    }{
    }<.>{
    }<.> + + :type _pieces: list(tagged tokens and chunks) + :ivar _pieces: The tagged tokens and chunks encoded by this ``ChunkString``. + :ivar _debug: The debug level. See the constructor docs. + + :cvar IN_CHUNK_PATTERN: A zero-width regexp pattern string that + will only match positions that are in chunks. + :cvar IN_CHINK_PATTERN: A zero-width regexp pattern string that + will only match positions that are in chinks. + """ + + CHUNK_TAG_CHAR = r'[^\{\}<>]' + CHUNK_TAG = r'(<%s+?>)' % CHUNK_TAG_CHAR + + IN_CHUNK_PATTERN = r'(?=[^\{]*\})' + IN_CHINK_PATTERN = r'(?=[^\}]*(\{|$))' + + # These are used by _verify + _CHUNK = r'(\{%s+?\})+?' % CHUNK_TAG + _CHINK = r'(%s+?)+?' % CHUNK_TAG + _VALID = re.compile(r'^(\{?%s\}?)*?$' % CHUNK_TAG) + _BRACKETS = re.compile('[^\{\}]+') + _BALANCED_BRACKETS = re.compile(r'(\{\})*$') + + def __init__(self, chunk_struct, debug_level=1): + """ + Construct a new ``ChunkString`` that encodes the chunking of + the text ``tagged_tokens``. + + :type chunk_struct: Tree + :param chunk_struct: The chunk structure to be further chunked. + :type debug_level: int + :param debug_level: The level of debugging which should be + applied to transformations on the ``ChunkString``. The + valid levels are: + - 0: no checks + - 1: full check on to_chunkstruct + - 2: full check on to_chunkstruct and cursory check after + each transformation. + - 3: full check on to_chunkstruct and full check after + each transformation. + We recommend you use at least level 1. You should + probably use level 3 if you use any non-standard + subclasses of ``RegexpChunkRule``. + """ + self._root_label = chunk_struct.label() + self._pieces = chunk_struct[:] + tags = [self._tag(tok) for tok in self._pieces] + self._str = '<' + '><'.join(tags) + '>' + self._debug = debug_level + + def _tag(self, tok): + if isinstance(tok, tuple): + return tok[1] + elif isinstance(tok, Tree): + return tok.label() + else: + raise ValueError('chunk structures must contain tagged ' 'tokens or trees') + + def _verify(self, s, verify_tags): + """ + Check to make sure that ``s`` still corresponds to some chunked + version of ``_pieces``. + + :type verify_tags: bool + :param verify_tags: Whether the individual tags should be + checked. If this is false, ``_verify`` will check to make + sure that ``_str`` encodes a chunked version of *some* + list of tokens. If this is true, then ``_verify`` will + check to make sure that the tags in ``_str`` match those in + ``_pieces``. + + :raise ValueError: if the internal string representation of + this ``ChunkString`` is invalid or not consistent with _pieces. + """ + # Check overall form + if not ChunkString._VALID.match(s): + raise ValueError( + 'Transformation generated invalid ' 'chunkstring:\n %s' % s + ) + + # Check that parens are balanced. If the string is long, we + # have to do this in pieces, to avoid a maximum recursion + # depth limit for regular expressions. + brackets = ChunkString._BRACKETS.sub('', s) + for i in range(1 + len(brackets) // 5000): + substr = brackets[i * 5000 : i * 5000 + 5000] + if not ChunkString._BALANCED_BRACKETS.match(substr): + raise ValueError( + 'Transformation generated invalid ' 'chunkstring:\n %s' % s + ) + + if verify_tags <= 0: + return + + tags1 = (re.split(r'[\{\}<>]+', s))[1:-1] + tags2 = [self._tag(piece) for piece in self._pieces] + if tags1 != tags2: + raise ValueError( + 'Transformation generated invalid ' 'chunkstring: tag changed' + ) + + def to_chunkstruct(self, chunk_label='CHUNK'): + """ + Return the chunk structure encoded by this ``ChunkString``. + + :rtype: Tree + :raise ValueError: If a transformation has generated an + invalid chunkstring. + """ + if self._debug > 0: + self._verify(self._str, 1) + + # Use this alternating list to create the chunkstruct. + pieces = [] + index = 0 + piece_in_chunk = 0 + for piece in re.split('[{}]', self._str): + + # Find the list of tokens contained in this piece. + length = piece.count('<') + subsequence = self._pieces[index : index + length] + + # Add this list of tokens to our pieces. + if piece_in_chunk: + pieces.append(Tree(chunk_label, subsequence)) + else: + pieces += subsequence + + # Update index, piece_in_chunk + index += length + piece_in_chunk = not piece_in_chunk + + return Tree(self._root_label, pieces) + + def xform(self, regexp, repl): + """ + Apply the given transformation to the string encoding of this + ``ChunkString``. In particular, find all occurrences that match + ``regexp``, and replace them using ``repl`` (as done by + ``re.sub``). + + This transformation should only add and remove braces; it + should *not* modify the sequence of angle-bracket delimited + tags. Furthermore, this transformation may not result in + improper bracketing. Note, in particular, that bracketing may + not be nested. + + :type regexp: str or regexp + :param regexp: A regular expression matching the substring + that should be replaced. This will typically include a + named group, which can be used by ``repl``. + :type repl: str + :param repl: An expression specifying what should replace the + matched substring. Typically, this will include a named + replacement group, specified by ``regexp``. + :rtype: None + :raise ValueError: If this transformation generated an + invalid chunkstring. + """ + # Do the actual substitution + s = re.sub(regexp, repl, self._str) + + # The substitution might have generated "empty chunks" + # (substrings of the form "{}"). Remove them, so they don't + # interfere with other transformations. + s = re.sub('\{\}', '', s) + + # Make sure that the transformation was legal. + if self._debug > 1: + self._verify(s, self._debug - 2) + + # Commit the transformation. + self._str = s + + def __repr__(self): + """ + Return a string representation of this ``ChunkString``. + It has the form:: + + }{
    }'> + + :rtype: str + """ + return '' % unicode_repr(self._str) + + def __str__(self): + """ + Return a formatted representation of this ``ChunkString``. + This representation will include extra spaces to ensure that + tags will line up with the representation of other + ``ChunkStrings`` for the same text, regardless of the chunking. + + :rtype: str + """ + # Add spaces to make everything line up. + str = re.sub(r'>(?!\})', r'> ', self._str) + str = re.sub(r'([^\{])<', r'\1 <', str) + if str[0] == '<': + str = ' ' + str + return str + + +##////////////////////////////////////////////////////// +## Chunking Rules +##////////////////////////////////////////////////////// + + +@python_2_unicode_compatible +class RegexpChunkRule(object): + """ + A rule specifying how to modify the chunking in a ``ChunkString``, + using a transformational regular expression. The + ``RegexpChunkRule`` class itself can be used to implement any + transformational rule based on regular expressions. There are + also a number of subclasses, which can be used to implement + simpler types of rules, based on matching regular expressions. + + Each ``RegexpChunkRule`` has a regular expression and a + replacement expression. When a ``RegexpChunkRule`` is "applied" + to a ``ChunkString``, it searches the ``ChunkString`` for any + substring that matches the regular expression, and replaces it + using the replacement expression. This search/replace operation + has the same semantics as ``re.sub``. + + Each ``RegexpChunkRule`` also has a description string, which + gives a short (typically less than 75 characters) description of + the purpose of the rule. + + This transformation defined by this ``RegexpChunkRule`` should + only add and remove braces; it should *not* modify the sequence + of angle-bracket delimited tags. Furthermore, this transformation + may not result in nested or mismatched bracketing. + """ + + def __init__(self, regexp, repl, descr): + """ + Construct a new RegexpChunkRule. + + :type regexp: regexp or str + :param regexp: The regular expression for this ``RegexpChunkRule``. + When this rule is applied to a ``ChunkString``, any + substring that matches ``regexp`` will be replaced using + the replacement string ``repl``. Note that this must be a + normal regular expression, not a tag pattern. + :type repl: str + :param repl: The replacement expression for this ``RegexpChunkRule``. + When this rule is applied to a ``ChunkString``, any substring + that matches ``regexp`` will be replaced using ``repl``. + :type descr: str + :param descr: A short description of the purpose and/or effect + of this rule. + """ + if isinstance(regexp, string_types): + regexp = re.compile(regexp) + self._repl = repl + self._descr = descr + self._regexp = regexp + + def apply(self, chunkstr): + # Keep docstring generic so we can inherit it. + """ + Apply this rule to the given ``ChunkString``. See the + class reference documentation for a description of what it + means to apply a rule. + + :type chunkstr: ChunkString + :param chunkstr: The chunkstring to which this rule is applied. + :rtype: None + :raise ValueError: If this transformation generated an + invalid chunkstring. + """ + chunkstr.xform(self._regexp, self._repl) + + def descr(self): + """ + Return a short description of the purpose and/or effect of + this rule. + + :rtype: str + """ + return self._descr + + def __repr__(self): + """ + Return a string representation of this rule. It has the form:: + + }'->''> + + Note that this representation does not include the + description string; that string can be accessed + separately with the ``descr()`` method. + + :rtype: str + """ + return ( + '' + + unicode_repr(self._repl) + + '>' + ) + + @staticmethod + def fromstring(s): + """ + Create a RegexpChunkRule from a string description. + Currently, the following formats are supported:: + + {regexp} # chunk rule + }regexp{ # chink rule + regexp}{regexp # split rule + regexp{}regexp # merge rule + + Where ``regexp`` is a regular expression for the rule. Any + text following the comment marker (``#``) will be used as + the rule's description: + + >>> from nltk.chunk.regexp import RegexpChunkRule + >>> RegexpChunkRule.fromstring('{
    ?+}') + ?+'> + """ + # Split off the comment (but don't split on '\#') + m = re.match(r'(?P(\\.|[^#])*)(?P#.*)?', s) + rule = m.group('rule').strip() + comment = (m.group('comment') or '')[1:].strip() + + # Pattern bodies: chunk, chink, split, merge + try: + if not rule: + raise ValueError('Empty chunk pattern') + if rule[0] == '{' and rule[-1] == '}': + return ChunkRule(rule[1:-1], comment) + elif rule[0] == '}' and rule[-1] == '{': + return ChinkRule(rule[1:-1], comment) + elif '}{' in rule: + left, right = rule.split('}{') + return SplitRule(left, right, comment) + elif '{}' in rule: + left, right = rule.split('{}') + return MergeRule(left, right, comment) + elif re.match('[^{}]*{[^{}]*}[^{}]*', rule): + left, chunk, right = re.split('[{}]', rule) + return ChunkRuleWithContext(left, chunk, right, comment) + else: + raise ValueError('Illegal chunk pattern: %s' % rule) + except (ValueError, re.error): + raise ValueError('Illegal chunk pattern: %s' % rule) + + +@python_2_unicode_compatible +class ChunkRule(RegexpChunkRule): + """ + A rule specifying how to add chunks to a ``ChunkString``, using a + matching tag pattern. When applied to a ``ChunkString``, it will + find any substring that matches this tag pattern and that is not + already part of a chunk, and create a new chunk containing that + substring. + """ + + def __init__(self, tag_pattern, descr): + + """ + Construct a new ``ChunkRule``. + + :type tag_pattern: str + :param tag_pattern: This rule's tag pattern. When + applied to a ``ChunkString``, this rule will + chunk any substring that matches this tag pattern and that + is not already part of a chunk. + :type descr: str + :param descr: A short description of the purpose and/or effect + of this rule. + """ + self._pattern = tag_pattern + regexp = re.compile( + '(?P%s)%s' + % (tag_pattern2re_pattern(tag_pattern), ChunkString.IN_CHINK_PATTERN) + ) + RegexpChunkRule.__init__(self, regexp, '{\g}', descr) + + def __repr__(self): + """ + Return a string representation of this rule. It has the form:: + + '> + + Note that this representation does not include the + description string; that string can be accessed + separately with the ``descr()`` method. + + :rtype: str + """ + return '' + + +@python_2_unicode_compatible +class ChinkRule(RegexpChunkRule): + """ + A rule specifying how to remove chinks to a ``ChunkString``, + using a matching tag pattern. When applied to a + ``ChunkString``, it will find any substring that matches this + tag pattern and that is contained in a chunk, and remove it + from that chunk, thus creating two new chunks. + """ + + def __init__(self, tag_pattern, descr): + """ + Construct a new ``ChinkRule``. + + :type tag_pattern: str + :param tag_pattern: This rule's tag pattern. When + applied to a ``ChunkString``, this rule will + find any substring that matches this tag pattern and that + is contained in a chunk, and remove it from that chunk, + thus creating two new chunks. + :type descr: str + :param descr: A short description of the purpose and/or effect + of this rule. + """ + self._pattern = tag_pattern + regexp = re.compile( + '(?P%s)%s' + % (tag_pattern2re_pattern(tag_pattern), ChunkString.IN_CHUNK_PATTERN) + ) + RegexpChunkRule.__init__(self, regexp, '}\g{', descr) + + def __repr__(self): + """ + Return a string representation of this rule. It has the form:: + + '> + + Note that this representation does not include the + description string; that string can be accessed + separately with the ``descr()`` method. + + :rtype: str + """ + return '' + + +@python_2_unicode_compatible +class UnChunkRule(RegexpChunkRule): + """ + A rule specifying how to remove chunks to a ``ChunkString``, + using a matching tag pattern. When applied to a + ``ChunkString``, it will find any complete chunk that matches this + tag pattern, and un-chunk it. + """ + + def __init__(self, tag_pattern, descr): + """ + Construct a new ``UnChunkRule``. + + :type tag_pattern: str + :param tag_pattern: This rule's tag pattern. When + applied to a ``ChunkString``, this rule will + find any complete chunk that matches this tag pattern, + and un-chunk it. + :type descr: str + :param descr: A short description of the purpose and/or effect + of this rule. + """ + self._pattern = tag_pattern + regexp = re.compile('\{(?P%s)\}' % tag_pattern2re_pattern(tag_pattern)) + RegexpChunkRule.__init__(self, regexp, '\g', descr) + + def __repr__(self): + """ + Return a string representation of this rule. It has the form:: + + '> + + Note that this representation does not include the + description string; that string can be accessed + separately with the ``descr()`` method. + + :rtype: str + """ + return '' + + +@python_2_unicode_compatible +class MergeRule(RegexpChunkRule): + """ + A rule specifying how to merge chunks in a ``ChunkString``, using + two matching tag patterns: a left pattern, and a right pattern. + When applied to a ``ChunkString``, it will find any chunk whose end + matches left pattern, and immediately followed by a chunk whose + beginning matches right pattern. It will then merge those two + chunks into a single chunk. + """ + + def __init__(self, left_tag_pattern, right_tag_pattern, descr): + """ + Construct a new ``MergeRule``. + + :type right_tag_pattern: str + :param right_tag_pattern: This rule's right tag + pattern. When applied to a ``ChunkString``, this + rule will find any chunk whose end matches + ``left_tag_pattern``, and immediately followed by a chunk + whose beginning matches this pattern. It will + then merge those two chunks into a single chunk. + :type left_tag_pattern: str + :param left_tag_pattern: This rule's left tag + pattern. When applied to a ``ChunkString``, this + rule will find any chunk whose end matches + this pattern, and immediately followed by a chunk + whose beginning matches ``right_tag_pattern``. It will + then merge those two chunks into a single chunk. + + :type descr: str + :param descr: A short description of the purpose and/or effect + of this rule. + """ + # Ensure that the individual patterns are coherent. E.g., if + # left='(' and right=')', then this will raise an exception: + re.compile(tag_pattern2re_pattern(left_tag_pattern)) + re.compile(tag_pattern2re_pattern(right_tag_pattern)) + + self._left_tag_pattern = left_tag_pattern + self._right_tag_pattern = right_tag_pattern + regexp = re.compile( + '(?P%s)}{(?=%s)' + % ( + tag_pattern2re_pattern(left_tag_pattern), + tag_pattern2re_pattern(right_tag_pattern), + ) + ) + RegexpChunkRule.__init__(self, regexp, '\g', descr) + + def __repr__(self): + """ + Return a string representation of this rule. It has the form:: + + ', ''> + + Note that this representation does not include the + description string; that string can be accessed + separately with the ``descr()`` method. + + :rtype: str + """ + return ( + '' + ) + + +@python_2_unicode_compatible +class SplitRule(RegexpChunkRule): + """ + A rule specifying how to split chunks in a ``ChunkString``, using + two matching tag patterns: a left pattern, and a right pattern. + When applied to a ``ChunkString``, it will find any chunk that + matches the left pattern followed by the right pattern. It will + then split the chunk into two new chunks, at the point between the + two pattern matches. + """ + + def __init__(self, left_tag_pattern, right_tag_pattern, descr): + """ + Construct a new ``SplitRule``. + + :type right_tag_pattern: str + :param right_tag_pattern: This rule's right tag + pattern. When applied to a ``ChunkString``, this rule will + find any chunk containing a substring that matches + ``left_tag_pattern`` followed by this pattern. It will + then split the chunk into two new chunks at the point + between these two matching patterns. + :type left_tag_pattern: str + :param left_tag_pattern: This rule's left tag + pattern. When applied to a ``ChunkString``, this rule will + find any chunk containing a substring that matches this + pattern followed by ``right_tag_pattern``. It will then + split the chunk into two new chunks at the point between + these two matching patterns. + :type descr: str + :param descr: A short description of the purpose and/or effect + of this rule. + """ + # Ensure that the individual patterns are coherent. E.g., if + # left='(' and right=')', then this will raise an exception: + re.compile(tag_pattern2re_pattern(left_tag_pattern)) + re.compile(tag_pattern2re_pattern(right_tag_pattern)) + + self._left_tag_pattern = left_tag_pattern + self._right_tag_pattern = right_tag_pattern + regexp = re.compile( + '(?P%s)(?=%s)' + % ( + tag_pattern2re_pattern(left_tag_pattern), + tag_pattern2re_pattern(right_tag_pattern), + ) + ) + RegexpChunkRule.__init__(self, regexp, r'\g}{', descr) + + def __repr__(self): + """ + Return a string representation of this rule. It has the form:: + + ', '
    '> + + Note that this representation does not include the + description string; that string can be accessed + separately with the ``descr()`` method. + + :rtype: str + """ + return ( + '' + ) + + +@python_2_unicode_compatible +class ExpandLeftRule(RegexpChunkRule): + """ + A rule specifying how to expand chunks in a ``ChunkString`` to the left, + using two matching tag patterns: a left pattern, and a right pattern. + When applied to a ``ChunkString``, it will find any chunk whose beginning + matches right pattern, and immediately preceded by a chink whose + end matches left pattern. It will then expand the chunk to incorporate + the new material on the left. + """ + + def __init__(self, left_tag_pattern, right_tag_pattern, descr): + """ + Construct a new ``ExpandRightRule``. + + :type right_tag_pattern: str + :param right_tag_pattern: This rule's right tag + pattern. When applied to a ``ChunkString``, this + rule will find any chunk whose beginning matches + ``right_tag_pattern``, and immediately preceded by a chink + whose end matches this pattern. It will + then merge those two chunks into a single chunk. + :type left_tag_pattern: str + :param left_tag_pattern: This rule's left tag + pattern. When applied to a ``ChunkString``, this + rule will find any chunk whose beginning matches + this pattern, and immediately preceded by a chink + whose end matches ``left_tag_pattern``. It will + then expand the chunk to incorporate the new material on the left. + + :type descr: str + :param descr: A short description of the purpose and/or effect + of this rule. + """ + # Ensure that the individual patterns are coherent. E.g., if + # left='(' and right=')', then this will raise an exception: + re.compile(tag_pattern2re_pattern(left_tag_pattern)) + re.compile(tag_pattern2re_pattern(right_tag_pattern)) + + self._left_tag_pattern = left_tag_pattern + self._right_tag_pattern = right_tag_pattern + regexp = re.compile( + '(?P%s)\{(?P%s)' + % ( + tag_pattern2re_pattern(left_tag_pattern), + tag_pattern2re_pattern(right_tag_pattern), + ) + ) + RegexpChunkRule.__init__(self, regexp, '{\g\g', descr) + + def __repr__(self): + """ + Return a string representation of this rule. It has the form:: + + ', ''> + + Note that this representation does not include the + description string; that string can be accessed + separately with the ``descr()`` method. + + :rtype: str + """ + return ( + '' + ) + + +@python_2_unicode_compatible +class ExpandRightRule(RegexpChunkRule): + """ + A rule specifying how to expand chunks in a ``ChunkString`` to the + right, using two matching tag patterns: a left pattern, and a + right pattern. When applied to a ``ChunkString``, it will find any + chunk whose end matches left pattern, and immediately followed by + a chink whose beginning matches right pattern. It will then + expand the chunk to incorporate the new material on the right. + """ + + def __init__(self, left_tag_pattern, right_tag_pattern, descr): + """ + Construct a new ``ExpandRightRule``. + + :type right_tag_pattern: str + :param right_tag_pattern: This rule's right tag + pattern. When applied to a ``ChunkString``, this + rule will find any chunk whose end matches + ``left_tag_pattern``, and immediately followed by a chink + whose beginning matches this pattern. It will + then merge those two chunks into a single chunk. + :type left_tag_pattern: str + :param left_tag_pattern: This rule's left tag + pattern. When applied to a ``ChunkString``, this + rule will find any chunk whose end matches + this pattern, and immediately followed by a chink + whose beginning matches ``right_tag_pattern``. It will + then expand the chunk to incorporate the new material on the right. + + :type descr: str + :param descr: A short description of the purpose and/or effect + of this rule. + """ + # Ensure that the individual patterns are coherent. E.g., if + # left='(' and right=')', then this will raise an exception: + re.compile(tag_pattern2re_pattern(left_tag_pattern)) + re.compile(tag_pattern2re_pattern(right_tag_pattern)) + + self._left_tag_pattern = left_tag_pattern + self._right_tag_pattern = right_tag_pattern + regexp = re.compile( + '(?P%s)\}(?P%s)' + % ( + tag_pattern2re_pattern(left_tag_pattern), + tag_pattern2re_pattern(right_tag_pattern), + ) + ) + RegexpChunkRule.__init__(self, regexp, '\g\g}', descr) + + def __repr__(self): + """ + Return a string representation of this rule. It has the form:: + + ', ''> + + Note that this representation does not include the + description string; that string can be accessed + separately with the ``descr()`` method. + + :rtype: str + """ + return ( + '' + ) + + +@python_2_unicode_compatible +class ChunkRuleWithContext(RegexpChunkRule): + """ + A rule specifying how to add chunks to a ``ChunkString``, using + three matching tag patterns: one for the left context, one for the + chunk, and one for the right context. When applied to a + ``ChunkString``, it will find any substring that matches the chunk + tag pattern, is surrounded by substrings that match the two + context patterns, and is not already part of a chunk; and create a + new chunk containing the substring that matched the chunk tag + pattern. + + Caveat: Both the left and right context are consumed when this + rule matches; therefore, if you need to find overlapping matches, + you will need to apply your rule more than once. + """ + + def __init__( + self, + left_context_tag_pattern, + chunk_tag_pattern, + right_context_tag_pattern, + descr, + ): + """ + Construct a new ``ChunkRuleWithContext``. + + :type left_context_tag_pattern: str + :param left_context_tag_pattern: A tag pattern that must match + the left context of ``chunk_tag_pattern`` for this rule to + apply. + :type chunk_tag_pattern: str + :param chunk_tag_pattern: A tag pattern that must match for this + rule to apply. If the rule does apply, then this pattern + also identifies the substring that will be made into a chunk. + :type right_context_tag_pattern: str + :param right_context_tag_pattern: A tag pattern that must match + the right context of ``chunk_tag_pattern`` for this rule to + apply. + :type descr: str + :param descr: A short description of the purpose and/or effect + of this rule. + """ + # Ensure that the individual patterns are coherent. E.g., if + # left='(' and right=')', then this will raise an exception: + re.compile(tag_pattern2re_pattern(left_context_tag_pattern)) + re.compile(tag_pattern2re_pattern(chunk_tag_pattern)) + re.compile(tag_pattern2re_pattern(right_context_tag_pattern)) + + self._left_context_tag_pattern = left_context_tag_pattern + self._chunk_tag_pattern = chunk_tag_pattern + self._right_context_tag_pattern = right_context_tag_pattern + regexp = re.compile( + '(?P%s)(?P%s)(?P%s)%s' + % ( + tag_pattern2re_pattern(left_context_tag_pattern), + tag_pattern2re_pattern(chunk_tag_pattern), + tag_pattern2re_pattern(right_context_tag_pattern), + ChunkString.IN_CHINK_PATTERN, + ) + ) + replacement = r'\g{\g}\g' + RegexpChunkRule.__init__(self, regexp, replacement, descr) + + def __repr__(self): + """ + Return a string representation of this rule. It has the form:: + + ', '', '
    '> + + Note that this representation does not include the + description string; that string can be accessed + separately with the ``descr()`` method. + + :rtype: str + """ + return '' % ( + self._left_context_tag_pattern, + self._chunk_tag_pattern, + self._right_context_tag_pattern, + ) + + +##////////////////////////////////////////////////////// +## Tag Pattern Format Conversion +##////////////////////////////////////////////////////// + +# this should probably be made more strict than it is -- e.g., it +# currently accepts 'foo'. +CHUNK_TAG_PATTERN = re.compile( + r'^((%s|<%s>)*)$' % ('([^\{\}<>]|\{\d+,?\}|\{\d*,\d+\})+', '[^\{\}<>]+') +) + + +def tag_pattern2re_pattern(tag_pattern): + """ + Convert a tag pattern to a regular expression pattern. A "tag + pattern" is a modified version of a regular expression, designed + for matching sequences of tags. The differences between regular + expression patterns and tag patterns are: + + - In tag patterns, ``'<'`` and ``'>'`` act as parentheses; so + ``'+'`` matches one or more repetitions of ``''``, not + ``''``. + - Whitespace in tag patterns is ignored. So + ``'
    | '`` is equivalant to ``'
    |'`` + - In tag patterns, ``'.'`` is equivalant to ``'[^{}<>]'``; so + ``''`` matches any single tag starting with ``'NN'``. + + In particular, ``tag_pattern2re_pattern`` performs the following + transformations on the given pattern: + + - Replace '.' with '[^<>{}]' + - Remove any whitespace + - Add extra parens around '<' and '>', to make '<' and '>' act + like parentheses. E.g., so that in '+', the '+' has scope + over the entire ''; and so that in '', the '|' has + scope over 'NN' and 'IN', but not '<' or '>'. + - Check to make sure the resulting pattern is valid. + + :type tag_pattern: str + :param tag_pattern: The tag pattern to convert to a regular + expression pattern. + :raise ValueError: If ``tag_pattern`` is not a valid tag pattern. + In particular, ``tag_pattern`` should not include braces; and it + should not contain nested or mismatched angle-brackets. + :rtype: str + :return: A regular expression pattern corresponding to + ``tag_pattern``. + """ + # Clean up the regular expression + tag_pattern = re.sub(r'\s', '', tag_pattern) + tag_pattern = re.sub(r'<', '(<(', tag_pattern) + tag_pattern = re.sub(r'>', ')>)', tag_pattern) + + # Check the regular expression + if not CHUNK_TAG_PATTERN.match(tag_pattern): + raise ValueError('Bad tag pattern: %r' % tag_pattern) + + # Replace "." with CHUNK_TAG_CHAR. + # We have to do this after, since it adds {}[]<>s, which would + # confuse CHUNK_TAG_PATTERN. + # PRE doesn't have lookback assertions, so reverse twice, and do + # the pattern backwards (with lookahead assertions). This can be + # made much cleaner once we can switch back to SRE. + def reverse_str(str): + lst = list(str) + lst.reverse() + return ''.join(lst) + + tc_rev = reverse_str(ChunkString.CHUNK_TAG_CHAR) + reversed = reverse_str(tag_pattern) + reversed = re.sub(r'\.(?!\\(\\\\)*($|[^\\]))', tc_rev, reversed) + tag_pattern = reverse_str(reversed) + + return tag_pattern + + +##////////////////////////////////////////////////////// +## RegexpChunkParser +##////////////////////////////////////////////////////// + + +@python_2_unicode_compatible +class RegexpChunkParser(ChunkParserI): + """ + A regular expression based chunk parser. ``RegexpChunkParser`` uses a + sequence of "rules" to find chunks of a single type within a + text. The chunking of the text is encoded using a ``ChunkString``, + and each rule acts by modifying the chunking in the + ``ChunkString``. The rules are all implemented using regular + expression matching and substitution. + + The ``RegexpChunkRule`` class and its subclasses (``ChunkRule``, + ``ChinkRule``, ``UnChunkRule``, ``MergeRule``, and ``SplitRule``) + define the rules that are used by ``RegexpChunkParser``. Each rule + defines an ``apply()`` method, which modifies the chunking encoded + by a given ``ChunkString``. + + :type _rules: list(RegexpChunkRule) + :ivar _rules: The list of rules that should be applied to a text. + :type _trace: int + :ivar _trace: The default level of tracing. + + """ + + def __init__(self, rules, chunk_label='NP', root_label='S', trace=0): + """ + Construct a new ``RegexpChunkParser``. + + :type rules: list(RegexpChunkRule) + :param rules: The sequence of rules that should be used to + generate the chunking for a tagged text. + :type chunk_label: str + :param chunk_label: The node value that should be used for + chunk subtrees. This is typically a short string + describing the type of information contained by the chunk, + such as ``"NP"`` for base noun phrases. + :type root_label: str + :param root_label: The node value that should be used for the + top node of the chunk structure. + :type trace: int + :param trace: The level of tracing that should be used when + parsing a text. ``0`` will generate no tracing output; + ``1`` will generate normal tracing output; and ``2`` or + higher will generate verbose tracing output. + """ + self._rules = rules + self._trace = trace + self._chunk_label = chunk_label + self._root_label = root_label + + def _trace_apply(self, chunkstr, verbose): + """ + Apply each rule of this ``RegexpChunkParser`` to ``chunkstr``, in + turn. Generate trace output between each rule. If ``verbose`` + is true, then generate verbose output. + + :type chunkstr: ChunkString + :param chunkstr: The chunk string to which each rule should be + applied. + :type verbose: bool + :param verbose: Whether output should be verbose. + :rtype: None + """ + print('# Input:') + print(chunkstr) + for rule in self._rules: + rule.apply(chunkstr) + if verbose: + print('#', rule.descr() + ' (' + unicode_repr(rule) + '):') + else: + print('#', rule.descr() + ':') + print(chunkstr) + + def _notrace_apply(self, chunkstr): + """ + Apply each rule of this ``RegexpChunkParser`` to ``chunkstr``, in + turn. + + :param chunkstr: The chunk string to which each rule should be + applied. + :type chunkstr: ChunkString + :rtype: None + """ + + for rule in self._rules: + rule.apply(chunkstr) + + def parse(self, chunk_struct, trace=None): + """ + :type chunk_struct: Tree + :param chunk_struct: the chunk structure to be (further) chunked + :type trace: int + :param trace: The level of tracing that should be used when + parsing a text. ``0`` will generate no tracing output; + ``1`` will generate normal tracing output; and ``2`` or + highter will generate verbose tracing output. This value + overrides the trace level value that was given to the + constructor. + :rtype: Tree + :return: a chunk structure that encodes the chunks in a given + tagged sentence. A chunk is a non-overlapping linguistic + group, such as a noun phrase. The set of chunks + identified in the chunk structure depends on the rules + used to define this ``RegexpChunkParser``. + """ + if len(chunk_struct) == 0: + print('Warning: parsing empty text') + return Tree(self._root_label, []) + + try: + chunk_struct.label() + except AttributeError: + chunk_struct = Tree(self._root_label, chunk_struct) + + # Use the default trace value? + if trace is None: + trace = self._trace + + chunkstr = ChunkString(chunk_struct) + + # Apply the sequence of rules to the chunkstring. + if trace: + verbose = trace > 1 + self._trace_apply(chunkstr, verbose) + else: + self._notrace_apply(chunkstr) + + # Use the chunkstring to create a chunk structure. + return chunkstr.to_chunkstruct(self._chunk_label) + + def rules(self): + """ + :return: the sequence of rules used by ``RegexpChunkParser``. + :rtype: list(RegexpChunkRule) + """ + return self._rules + + def __repr__(self): + """ + :return: a concise string representation of this + ``RegexpChunkParser``. + :rtype: str + """ + return "" % len(self._rules) + + def __str__(self): + """ + :return: a verbose string representation of this ``RegexpChunkParser``. + :rtype: str + """ + s = "RegexpChunkParser with %d rules:\n" % len(self._rules) + margin = 0 + for rule in self._rules: + margin = max(margin, len(rule.descr())) + if margin < 35: + format = " %" + repr(-(margin + 3)) + "s%s\n" + else: + format = " %s\n %s\n" + for rule in self._rules: + s += format % (rule.descr(), unicode_repr(rule)) + return s[:-1] + + +##////////////////////////////////////////////////////// +## Chunk Grammar +##////////////////////////////////////////////////////// + + +@python_2_unicode_compatible +class RegexpParser(ChunkParserI): + """ + A grammar based chunk parser. ``chunk.RegexpParser`` uses a set of + regular expression patterns to specify the behavior of the parser. + The chunking of the text is encoded using a ``ChunkString``, and + each rule acts by modifying the chunking in the ``ChunkString``. + The rules are all implemented using regular expression matching + and substitution. + + A grammar contains one or more clauses in the following form:: + + NP: + {} # chunk determiners and adjectives + }<[\.VI].*>+{ # chink any tag beginning with V, I, or . + <.*>}{
    # split a chunk at a determiner + {} # merge chunk ending with det/adj + # with one starting with a noun + + The patterns of a clause are executed in order. An earlier + pattern may introduce a chunk boundary that prevents a later + pattern from executing. Sometimes an individual pattern will + match on multiple, overlapping extents of the input. As with + regular expression substitution more generally, the chunker will + identify the first match possible, then continue looking for matches + after this one has ended. + + The clauses of a grammar are also executed in order. A cascaded + chunk parser is one having more than one clause. The maximum depth + of a parse tree created by this chunk parser is the same as the + number of clauses in the grammar. + + When tracing is turned on, the comment portion of a line is displayed + each time the corresponding pattern is applied. + + :type _start: str + :ivar _start: The start symbol of the grammar (the root node of + resulting trees) + :type _stages: int + :ivar _stages: The list of parsing stages corresponding to the grammar + + """ + + def __init__(self, grammar, root_label='S', loop=1, trace=0): + """ + Create a new chunk parser, from the given start state + and set of chunk patterns. + + :param grammar: The grammar, or a list of RegexpChunkParser objects + :type grammar: str or list(RegexpChunkParser) + :param root_label: The top node of the tree being created + :type root_label: str or Nonterminal + :param loop: The number of times to run through the patterns + :type loop: int + :type trace: int + :param trace: The level of tracing that should be used when + parsing a text. ``0`` will generate no tracing output; + ``1`` will generate normal tracing output; and ``2`` or + higher will generate verbose tracing output. + """ + self._trace = trace + self._stages = [] + self._grammar = grammar + self._loop = loop + + if isinstance(grammar, string_types): + self._read_grammar(grammar, root_label, trace) + else: + # Make sur the grammar looks like it has the right type: + type_err = ( + 'Expected string or list of RegexpChunkParsers ' 'for the grammar.' + ) + try: + grammar = list(grammar) + except: + raise TypeError(type_err) + for elt in grammar: + if not isinstance(elt, RegexpChunkParser): + raise TypeError(type_err) + self._stages = grammar + + def _read_grammar(self, grammar, root_label, trace): + """ + Helper function for __init__: read the grammar if it is a + string. + """ + rules = [] + lhs = None + for line in grammar.split('\n'): + line = line.strip() + + # New stage begins if there's an unescaped ':' + m = re.match('(?P(\\.|[^:])*)(:(?P.*))', line) + if m: + # Record the stage that we just completed. + self._add_stage(rules, lhs, root_label, trace) + # Start a new stage. + lhs = m.group('nonterminal').strip() + rules = [] + line = m.group('rule').strip() + + # Skip blank & comment-only lines + if line == '' or line.startswith('#'): + continue + + # Add the rule + rules.append(RegexpChunkRule.fromstring(line)) + + # Record the final stage + self._add_stage(rules, lhs, root_label, trace) + + def _add_stage(self, rules, lhs, root_label, trace): + """ + Helper function for __init__: add a new stage to the parser. + """ + if rules != []: + if not lhs: + raise ValueError('Expected stage marker (eg NP:)') + parser = RegexpChunkParser( + rules, chunk_label=lhs, root_label=root_label, trace=trace + ) + self._stages.append(parser) + + def parse(self, chunk_struct, trace=None): + """ + Apply the chunk parser to this input. + + :type chunk_struct: Tree + :param chunk_struct: the chunk structure to be (further) chunked + (this tree is modified, and is also returned) + :type trace: int + :param trace: The level of tracing that should be used when + parsing a text. ``0`` will generate no tracing output; + ``1`` will generate normal tracing output; and ``2`` or + highter will generate verbose tracing output. This value + overrides the trace level value that was given to the + constructor. + :return: the chunked output. + :rtype: Tree + """ + if trace is None: + trace = self._trace + for i in range(self._loop): + for parser in self._stages: + chunk_struct = parser.parse(chunk_struct, trace=trace) + return chunk_struct + + def __repr__(self): + """ + :return: a concise string representation of this ``chunk.RegexpParser``. + :rtype: str + """ + return "" % len(self._stages) + + def __str__(self): + """ + :return: a verbose string representation of this + ``RegexpParser``. + :rtype: str + """ + s = "chunk.RegexpParser with %d stages:\n" % len(self._stages) + margin = 0 + for parser in self._stages: + s += "%s\n" % parser + return s[:-1] + + +##////////////////////////////////////////////////////// +## Demonstration code +##////////////////////////////////////////////////////// + + +def demo_eval(chunkparser, text): + """ + Demonstration code for evaluating a chunk parser, using a + ``ChunkScore``. This function assumes that ``text`` contains one + sentence per line, and that each sentence has the form expected by + ``tree.chunk``. It runs the given chunk parser on each sentence in + the text, and scores the result. It prints the final score + (precision, recall, and f-measure); and reports the set of chunks + that were missed and the set of chunks that were incorrect. (At + most 10 missing chunks and 10 incorrect chunks are reported). + + :param chunkparser: The chunkparser to be tested + :type chunkparser: ChunkParserI + :param text: The chunked tagged text that should be used for + evaluation. + :type text: str + """ + from nltk import chunk + from nltk.tree import Tree + + # Evaluate our chunk parser. + chunkscore = chunk.ChunkScore() + + for sentence in text.split('\n'): + print(sentence) + sentence = sentence.strip() + if not sentence: + continue + gold = chunk.tagstr2tree(sentence) + tokens = gold.leaves() + test = chunkparser.parse(Tree('S', tokens), trace=1) + chunkscore.score(gold, test) + print() + + print('/' + ('=' * 75) + '\\') + print('Scoring', chunkparser) + print(('-' * 77)) + print('Precision: %5.1f%%' % (chunkscore.precision() * 100), ' ' * 4, end=' ') + print('Recall: %5.1f%%' % (chunkscore.recall() * 100), ' ' * 6, end=' ') + print('F-Measure: %5.1f%%' % (chunkscore.f_measure() * 100)) + + # Missed chunks. + if chunkscore.missed(): + print('Missed:') + missed = chunkscore.missed() + for chunk in missed[:10]: + print(' ', ' '.join(map(str, chunk))) + if len(chunkscore.missed()) > 10: + print(' ...') + + # Incorrect chunks. + if chunkscore.incorrect(): + print('Incorrect:') + incorrect = chunkscore.incorrect() + for chunk in incorrect[:10]: + print(' ', ' '.join(map(str, chunk))) + if len(chunkscore.incorrect()) > 10: + print(' ...') + + print('\\' + ('=' * 75) + '/') + print() + + +def demo(): + """ + A demonstration for the ``RegexpChunkParser`` class. A single text is + parsed with four different chunk parsers, using a variety of rules + and strategies. + """ + + from nltk import chunk, Tree + + text = """\ + [ the/DT little/JJ cat/NN ] sat/VBD on/IN [ the/DT mat/NN ] ./. + [ John/NNP ] saw/VBD [the/DT cats/NNS] [the/DT dog/NN] chased/VBD ./. + [ John/NNP ] thinks/VBZ [ Mary/NN ] saw/VBD [ the/DT cat/NN ] sit/VB on/IN [ the/DT mat/NN ]./. + """ + + print('*' * 75) + print('Evaluation text:') + print(text) + print('*' * 75) + print() + + grammar = r""" + NP: # NP stage + {
    ?*} # chunk determiners, adjectives and nouns + {+} # chunk proper nouns + """ + cp = chunk.RegexpParser(grammar) + demo_eval(cp, text) + + grammar = r""" + NP: + {<.*>} # start by chunking each tag + }<[\.VI].*>+{ # unchunk any verbs, prepositions or periods + {} # merge det/adj with nouns + """ + cp = chunk.RegexpParser(grammar) + demo_eval(cp, text) + + grammar = r""" + NP: {
    ?*} # chunk determiners, adjectives and nouns + VP: {?} # VP = verb words + """ + cp = chunk.RegexpParser(grammar) + demo_eval(cp, text) + + grammar = r""" + NP: {<.*>*} # start by chunking everything + }<[\.VI].*>+{ # chink any verbs, prepositions or periods + <.*>}{
    # separate on determiners + PP: {} # PP = preposition + noun phrase + VP: {*} # VP = verb words + NPs and PPs + """ + cp = chunk.RegexpParser(grammar) + demo_eval(cp, text) + + # Evaluation + + from nltk.corpus import conll2000 + + print() + print("Demonstration of empty grammar:") + + cp = chunk.RegexpParser("") + print(chunk.accuracy(cp, conll2000.chunked_sents('test.txt', chunk_types=('NP',)))) + + print() + print("Demonstration of accuracy evaluation using CoNLL tags:") + + grammar = r""" + NP: + {<.*>} # start by chunking each tag + }<[\.VI].*>+{ # unchunk any verbs, prepositions or periods + {} # merge det/adj with nouns + """ + cp = chunk.RegexpParser(grammar) + print(chunk.accuracy(cp, conll2000.chunked_sents('test.txt')[:5])) + + print() + print("Demonstration of tagged token input") + + grammar = r""" + NP: {<.*>*} # start by chunking everything + }<[\.VI].*>+{ # chink any verbs, prepositions or periods + <.*>}{
    # separate on determiners + PP: {} # PP = preposition + noun phrase + VP: {*} # VP = verb words + NPs and PPs + """ + cp = chunk.RegexpParser(grammar) + print( + cp.parse( + [ + ("the", "DT"), + ("little", "JJ"), + ("cat", "NN"), + ("sat", "VBD"), + ("on", "IN"), + ("the", "DT"), + ("mat", "NN"), + (".", "."), + ] + ) + ) + + +if __name__ == '__main__': + demo() diff --git a/venv.bak/lib/python3.7/site-packages/nltk/chunk/util.py b/venv.bak/lib/python3.7/site-packages/nltk/chunk/util.py new file mode 100644 index 0000000..e29760d --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/chunk/util.py @@ -0,0 +1,646 @@ +# Natural Language Toolkit: Chunk format conversions +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Edward Loper +# Steven Bird (minor additions) +# URL: +# For license information, see LICENSE.TXT +from __future__ import print_function, unicode_literals, division + +import re + +from nltk.tree import Tree +from nltk.tag.mapping import map_tag +from nltk.tag.util import str2tuple +from nltk.compat import python_2_unicode_compatible + +##////////////////////////////////////////////////////// +## EVALUATION +##////////////////////////////////////////////////////// + +from nltk.metrics import accuracy as _accuracy + + +def accuracy(chunker, gold): + """ + Score the accuracy of the chunker against the gold standard. + Strip the chunk information from the gold standard and rechunk it using + the chunker, then compute the accuracy score. + + :type chunker: ChunkParserI + :param chunker: The chunker being evaluated. + :type gold: tree + :param gold: The chunk structures to score the chunker on. + :rtype: float + """ + + gold_tags = [] + test_tags = [] + for gold_tree in gold: + test_tree = chunker.parse(gold_tree.flatten()) + gold_tags += tree2conlltags(gold_tree) + test_tags += tree2conlltags(test_tree) + + # print 'GOLD:', gold_tags[:50] + # print 'TEST:', test_tags[:50] + return _accuracy(gold_tags, test_tags) + + +# Patched for increased performance by Yoav Goldberg , 2006-01-13 +# -- statistics are evaluated only on demand, instead of at every sentence evaluation +# +# SB: use nltk.metrics for precision/recall scoring? +# +class ChunkScore(object): + """ + A utility class for scoring chunk parsers. ``ChunkScore`` can + evaluate a chunk parser's output, based on a number of statistics + (precision, recall, f-measure, misssed chunks, incorrect chunks). + It can also combine the scores from the parsing of multiple texts; + this makes it significantly easier to evaluate a chunk parser that + operates one sentence at a time. + + Texts are evaluated with the ``score`` method. The results of + evaluation can be accessed via a number of accessor methods, such + as ``precision`` and ``f_measure``. A typical use of the + ``ChunkScore`` class is:: + + >>> chunkscore = ChunkScore() # doctest: +SKIP + >>> for correct in correct_sentences: # doctest: +SKIP + ... guess = chunkparser.parse(correct.leaves()) # doctest: +SKIP + ... chunkscore.score(correct, guess) # doctest: +SKIP + >>> print('F Measure:', chunkscore.f_measure()) # doctest: +SKIP + F Measure: 0.823 + + :ivar kwargs: Keyword arguments: + + - max_tp_examples: The maximum number actual examples of true + positives to record. This affects the ``correct`` member + function: ``correct`` will not return more than this number + of true positive examples. This does *not* affect any of + the numerical metrics (precision, recall, or f-measure) + + - max_fp_examples: The maximum number actual examples of false + positives to record. This affects the ``incorrect`` member + function and the ``guessed`` member function: ``incorrect`` + will not return more than this number of examples, and + ``guessed`` will not return more than this number of true + positive examples. This does *not* affect any of the + numerical metrics (precision, recall, or f-measure) + + - max_fn_examples: The maximum number actual examples of false + negatives to record. This affects the ``missed`` member + function and the ``correct`` member function: ``missed`` + will not return more than this number of examples, and + ``correct`` will not return more than this number of true + negative examples. This does *not* affect any of the + numerical metrics (precision, recall, or f-measure) + + - chunk_label: A regular expression indicating which chunks + should be compared. Defaults to ``'.*'`` (i.e., all chunks). + + :type _tp: list(Token) + :ivar _tp: List of true positives + :type _fp: list(Token) + :ivar _fp: List of false positives + :type _fn: list(Token) + :ivar _fn: List of false negatives + + :type _tp_num: int + :ivar _tp_num: Number of true positives + :type _fp_num: int + :ivar _fp_num: Number of false positives + :type _fn_num: int + :ivar _fn_num: Number of false negatives. + """ + + def __init__(self, **kwargs): + self._correct = set() + self._guessed = set() + self._tp = set() + self._fp = set() + self._fn = set() + self._max_tp = kwargs.get('max_tp_examples', 100) + self._max_fp = kwargs.get('max_fp_examples', 100) + self._max_fn = kwargs.get('max_fn_examples', 100) + self._chunk_label = kwargs.get('chunk_label', '.*') + self._tp_num = 0 + self._fp_num = 0 + self._fn_num = 0 + self._count = 0 + self._tags_correct = 0.0 + self._tags_total = 0.0 + + self._measuresNeedUpdate = False + + def _updateMeasures(self): + if self._measuresNeedUpdate: + self._tp = self._guessed & self._correct + self._fn = self._correct - self._guessed + self._fp = self._guessed - self._correct + self._tp_num = len(self._tp) + self._fp_num = len(self._fp) + self._fn_num = len(self._fn) + self._measuresNeedUpdate = False + + def score(self, correct, guessed): + """ + Given a correctly chunked sentence, score another chunked + version of the same sentence. + + :type correct: chunk structure + :param correct: The known-correct ("gold standard") chunked + sentence. + :type guessed: chunk structure + :param guessed: The chunked sentence to be scored. + """ + self._correct |= _chunksets(correct, self._count, self._chunk_label) + self._guessed |= _chunksets(guessed, self._count, self._chunk_label) + self._count += 1 + self._measuresNeedUpdate = True + # Keep track of per-tag accuracy (if possible) + try: + correct_tags = tree2conlltags(correct) + guessed_tags = tree2conlltags(guessed) + except ValueError: + # This exception case is for nested chunk structures, + # where tree2conlltags will fail with a ValueError: "Tree + # is too deeply nested to be printed in CoNLL format." + correct_tags = guessed_tags = () + self._tags_total += len(correct_tags) + self._tags_correct += sum( + 1 for (t, g) in zip(guessed_tags, correct_tags) if t == g + ) + + def accuracy(self): + """ + Return the overall tag-based accuracy for all text that have + been scored by this ``ChunkScore``, using the IOB (conll2000) + tag encoding. + + :rtype: float + """ + if self._tags_total == 0: + return 1 + return self._tags_correct / self._tags_total + + def precision(self): + """ + Return the overall precision for all texts that have been + scored by this ``ChunkScore``. + + :rtype: float + """ + self._updateMeasures() + div = self._tp_num + self._fp_num + if div == 0: + return 0 + else: + return self._tp_num / div + + def recall(self): + """ + Return the overall recall for all texts that have been + scored by this ``ChunkScore``. + + :rtype: float + """ + self._updateMeasures() + div = self._tp_num + self._fn_num + if div == 0: + return 0 + else: + return self._tp_num / div + + def f_measure(self, alpha=0.5): + """ + Return the overall F measure for all texts that have been + scored by this ``ChunkScore``. + + :param alpha: the relative weighting of precision and recall. + Larger alpha biases the score towards the precision value, + while smaller alpha biases the score towards the recall + value. ``alpha`` should have a value in the range [0,1]. + :type alpha: float + :rtype: float + """ + self._updateMeasures() + p = self.precision() + r = self.recall() + if p == 0 or r == 0: # what if alpha is 0 or 1? + return 0 + return 1 / (alpha / p + (1 - alpha) / r) + + def missed(self): + """ + Return the chunks which were included in the + correct chunk structures, but not in the guessed chunk + structures, listed in input order. + + :rtype: list of chunks + """ + self._updateMeasures() + chunks = list(self._fn) + return [c[1] for c in chunks] # discard position information + + def incorrect(self): + """ + Return the chunks which were included in the guessed chunk structures, + but not in the correct chunk structures, listed in input order. + + :rtype: list of chunks + """ + self._updateMeasures() + chunks = list(self._fp) + return [c[1] for c in chunks] # discard position information + + def correct(self): + """ + Return the chunks which were included in the correct + chunk structures, listed in input order. + + :rtype: list of chunks + """ + chunks = list(self._correct) + return [c[1] for c in chunks] # discard position information + + def guessed(self): + """ + Return the chunks which were included in the guessed + chunk structures, listed in input order. + + :rtype: list of chunks + """ + chunks = list(self._guessed) + return [c[1] for c in chunks] # discard position information + + def __len__(self): + self._updateMeasures() + return self._tp_num + self._fn_num + + def __repr__(self): + """ + Return a concise representation of this ``ChunkScoring``. + + :rtype: str + """ + return '' + + def __str__(self): + """ + Return a verbose representation of this ``ChunkScoring``. + This representation includes the precision, recall, and + f-measure scores. For other information about the score, + use the accessor methods (e.g., ``missed()`` and ``incorrect()``). + + :rtype: str + """ + return ( + "ChunkParse score:\n" + + (" IOB Accuracy: {:5.1f}%%\n".format(self.accuracy() * 100)) + + (" Precision: {:5.1f}%%\n".format(self.precision() * 100)) + + (" Recall: {:5.1f}%%\n".format(self.recall() * 100)) + + (" F-Measure: {:5.1f}%%".format(self.f_measure() * 100)) + ) + + +# extract chunks, and assign unique id, the absolute position of +# the first word of the chunk +def _chunksets(t, count, chunk_label): + pos = 0 + chunks = [] + for child in t: + if isinstance(child, Tree): + if re.match(chunk_label, child.label()): + chunks.append(((count, pos), child.freeze())) + pos += len(child.leaves()) + else: + pos += 1 + return set(chunks) + + +def tagstr2tree( + s, chunk_label="NP", root_label="S", sep='/', source_tagset=None, target_tagset=None +): + """ + Divide a string of bracketted tagged text into + chunks and unchunked tokens, and produce a Tree. + Chunks are marked by square brackets (``[...]``). Words are + delimited by whitespace, and each word should have the form + ``text/tag``. Words that do not contain a slash are + assigned a ``tag`` of None. + + :param s: The string to be converted + :type s: str + :param chunk_label: The label to use for chunk nodes + :type chunk_label: str + :param root_label: The label to use for the root of the tree + :type root_label: str + :rtype: Tree + """ + + WORD_OR_BRACKET = re.compile(r'\[|\]|[^\[\]\s]+') + + stack = [Tree(root_label, [])] + for match in WORD_OR_BRACKET.finditer(s): + text = match.group() + if text[0] == '[': + if len(stack) != 1: + raise ValueError('Unexpected [ at char {:d}'.format(match.start())) + chunk = Tree(chunk_label, []) + stack[-1].append(chunk) + stack.append(chunk) + elif text[0] == ']': + if len(stack) != 2: + raise ValueError('Unexpected ] at char {:d}'.format(match.start())) + stack.pop() + else: + if sep is None: + stack[-1].append(text) + else: + word, tag = str2tuple(text, sep) + if source_tagset and target_tagset: + tag = map_tag(source_tagset, target_tagset, tag) + stack[-1].append((word, tag)) + + if len(stack) != 1: + raise ValueError('Expected ] at char {:d}'.format(len(s))) + return stack[0] + + +### CONLL + +_LINE_RE = re.compile('(\S+)\s+(\S+)\s+([IOB])-?(\S+)?') + + +def conllstr2tree(s, chunk_types=('NP', 'PP', 'VP'), root_label="S"): + """ + Return a chunk structure for a single sentence + encoded in the given CONLL 2000 style string. + This function converts a CoNLL IOB string into a tree. + It uses the specified chunk types + (defaults to NP, PP and VP), and creates a tree rooted at a node + labeled S (by default). + + :param s: The CoNLL string to be converted. + :type s: str + :param chunk_types: The chunk types to be converted. + :type chunk_types: tuple + :param root_label: The node label to use for the root. + :type root_label: str + :rtype: Tree + """ + + stack = [Tree(root_label, [])] + + for lineno, line in enumerate(s.split('\n')): + if not line.strip(): + continue + + # Decode the line. + match = _LINE_RE.match(line) + if match is None: + raise ValueError('Error on line {:d}'.format(lineno)) + (word, tag, state, chunk_type) = match.groups() + + # If it's a chunk type we don't care about, treat it as O. + if chunk_types is not None and chunk_type not in chunk_types: + state = 'O' + + # For "Begin"/"Outside", finish any completed chunks - + # also do so for "Inside" which don't match the previous token. + mismatch_I = state == 'I' and chunk_type != stack[-1].label() + if state in 'BO' or mismatch_I: + if len(stack) == 2: + stack.pop() + + # For "Begin", start a new chunk. + if state == 'B' or mismatch_I: + chunk = Tree(chunk_type, []) + stack[-1].append(chunk) + stack.append(chunk) + + # Add the new word token. + stack[-1].append((word, tag)) + + return stack[0] + + +def tree2conlltags(t): + """ + Return a list of 3-tuples containing ``(word, tag, IOB-tag)``. + Convert a tree to the CoNLL IOB tag format. + + :param t: The tree to be converted. + :type t: Tree + :rtype: list(tuple) + """ + + tags = [] + for child in t: + try: + category = child.label() + prefix = "B-" + for contents in child: + if isinstance(contents, Tree): + raise ValueError( + "Tree is too deeply nested to be printed in CoNLL format" + ) + tags.append((contents[0], contents[1], prefix + category)) + prefix = "I-" + except AttributeError: + tags.append((child[0], child[1], "O")) + return tags + + +def conlltags2tree( + sentence, chunk_types=('NP', 'PP', 'VP'), root_label='S', strict=False +): + """ + Convert the CoNLL IOB format to a tree. + """ + tree = Tree(root_label, []) + for (word, postag, chunktag) in sentence: + if chunktag is None: + if strict: + raise ValueError("Bad conll tag sequence") + else: + # Treat as O + tree.append((word, postag)) + elif chunktag.startswith('B-'): + tree.append(Tree(chunktag[2:], [(word, postag)])) + elif chunktag.startswith('I-'): + if ( + len(tree) == 0 + or not isinstance(tree[-1], Tree) + or tree[-1].label() != chunktag[2:] + ): + if strict: + raise ValueError("Bad conll tag sequence") + else: + # Treat as B-* + tree.append(Tree(chunktag[2:], [(word, postag)])) + else: + tree[-1].append((word, postag)) + elif chunktag == 'O': + tree.append((word, postag)) + else: + raise ValueError("Bad conll tag {0!r}".format(chunktag)) + return tree + + +def tree2conllstr(t): + """ + Return a multiline string where each line contains a word, tag and IOB tag. + Convert a tree to the CoNLL IOB string format + + :param t: The tree to be converted. + :type t: Tree + :rtype: str + """ + lines = [" ".join(token) for token in tree2conlltags(t)] + return '\n'.join(lines) + + +### IEER + +_IEER_DOC_RE = re.compile( + r'\s*' + r'(\s*(?P.+?)\s*\s*)?' + r'(\s*(?P.+?)\s*\s*)?' + r'(\s*(?P.+?)\s*\s*)?' + r'\s*' + r'(\s*(?P.+?)\s*\s*)?' + r'(?P.*?)\s*' + r'\s*\s*', + re.DOTALL, +) + +_IEER_TYPE_RE = re.compile(']*?type="(?P\w+)"') + + +def _ieer_read_text(s, root_label): + stack = [Tree(root_label, [])] + # s will be None if there is no headline in the text + # return the empty list in place of a Tree + if s is None: + return [] + for piece_m in re.finditer('<[^>]+>|[^\s<]+', s): + piece = piece_m.group() + try: + if piece.startswith('.... + m = _IEER_DOC_RE.match(s) + if m: + return { + 'text': _ieer_read_text(m.group('text'), root_label), + 'docno': m.group('docno'), + 'doctype': m.group('doctype'), + 'date_time': m.group('date_time'), + #'headline': m.group('headline') + # we want to capture NEs in the headline too! + 'headline': _ieer_read_text(m.group('headline'), root_label), + } + else: + return _ieer_read_text(s, root_label) + + +def demo(): + + s = "[ Pierre/NNP Vinken/NNP ] ,/, [ 61/CD years/NNS ] old/JJ ,/, will/MD join/VB [ the/DT board/NN ] ./." + import nltk + + t = nltk.chunk.tagstr2tree(s, chunk_label='NP') + t.pprint() + print() + + s = """ +These DT B-NP +research NN I-NP +protocols NNS I-NP +offer VBP B-VP +to TO B-PP +the DT B-NP +patient NN I-NP +not RB O +only RB O +the DT B-NP +very RB I-NP +best JJS I-NP +therapy NN I-NP +which WDT B-NP +we PRP B-NP +have VBP B-VP +established VBN I-VP +today NN B-NP +but CC B-NP +also RB I-NP +the DT B-NP +hope NN I-NP +of IN B-PP +something NN B-NP +still RB B-ADJP +better JJR I-ADJP +. . O +""" + + conll_tree = conllstr2tree(s, chunk_types=('NP', 'PP')) + conll_tree.pprint() + + # Demonstrate CoNLL output + print("CoNLL output:") + print(nltk.chunk.tree2conllstr(conll_tree)) + print() + + +if __name__ == '__main__': + demo() diff --git a/venv.bak/lib/python3.7/site-packages/nltk/classify/__init__.py b/venv.bak/lib/python3.7/site-packages/nltk/classify/__init__.py new file mode 100644 index 0000000..551c82c --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/classify/__init__.py @@ -0,0 +1,101 @@ +# Natural Language Toolkit: Classifiers +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Edward Loper +# URL: +# For license information, see LICENSE.TXT + +""" +Classes and interfaces for labeling tokens with category labels (or +"class labels"). Typically, labels are represented with strings +(such as ``'health'`` or ``'sports'``). Classifiers can be used to +perform a wide range of classification tasks. For example, +classifiers can be used... + +- to classify documents by topic +- to classify ambiguous words by which word sense is intended +- to classify acoustic signals by which phoneme they represent +- to classify sentences by their author + +Features +======== +In order to decide which category label is appropriate for a given +token, classifiers examine one or more 'features' of the token. These +"features" are typically chosen by hand, and indicate which aspects +of the token are relevant to the classification decision. For +example, a document classifier might use a separate feature for each +word, recording how often that word occurred in the document. + +Featuresets +=========== +The features describing a token are encoded using a "featureset", +which is a dictionary that maps from "feature names" to "feature +values". Feature names are unique strings that indicate what aspect +of the token is encoded by the feature. Examples include +``'prevword'``, for a feature whose value is the previous word; and +``'contains-word(library)'`` for a feature that is true when a document +contains the word ``'library'``. Feature values are typically +booleans, numbers, or strings, depending on which feature they +describe. + +Featuresets are typically constructed using a "feature detector" +(also known as a "feature extractor"). A feature detector is a +function that takes a token (and sometimes information about its +context) as its input, and returns a featureset describing that token. +For example, the following feature detector converts a document +(stored as a list of words) to a featureset describing the set of +words included in the document: + + >>> # Define a feature detector function. + >>> def document_features(document): + ... return dict([('contains-word(%s)' % w, True) for w in document]) + +Feature detectors are typically applied to each token before it is fed +to the classifier: + + >>> # Classify each Gutenberg document. + >>> from nltk.corpus import gutenberg + >>> for fileid in gutenberg.fileids(): # doctest: +SKIP + ... doc = gutenberg.words(fileid) # doctest: +SKIP + ... print fileid, classifier.classify(document_features(doc)) # doctest: +SKIP + +The parameters that a feature detector expects will vary, depending on +the task and the needs of the feature detector. For example, a +feature detector for word sense disambiguation (WSD) might take as its +input a sentence, and the index of a word that should be classified, +and return a featureset for that word. The following feature detector +for WSD includes features describing the left and right contexts of +the target word: + + >>> def wsd_features(sentence, index): + ... featureset = {} + ... for i in range(max(0, index-3), index): + ... featureset['left-context(%s)' % sentence[i]] = True + ... for i in range(index, max(index+3, len(sentence))): + ... featureset['right-context(%s)' % sentence[i]] = True + ... return featureset + +Training Classifiers +==================== +Most classifiers are built by training them on a list of hand-labeled +examples, known as the "training set". Training sets are represented +as lists of ``(featuredict, label)`` tuples. +""" + +from nltk.classify.api import ClassifierI, MultiClassifierI +from nltk.classify.megam import config_megam, call_megam +from nltk.classify.weka import WekaClassifier, config_weka +from nltk.classify.naivebayes import NaiveBayesClassifier +from nltk.classify.positivenaivebayes import PositiveNaiveBayesClassifier +from nltk.classify.decisiontree import DecisionTreeClassifier +from nltk.classify.rte_classify import rte_classifier, rte_features, RTEFeatureExtractor +from nltk.classify.util import accuracy, apply_features, log_likelihood +from nltk.classify.scikitlearn import SklearnClassifier +from nltk.classify.maxent import ( + MaxentClassifier, + BinaryMaxentFeatureEncoding, + TypedMaxentFeatureEncoding, + ConditionalExponentialClassifier, +) +from nltk.classify.senna import Senna +from nltk.classify.textcat import TextCat diff --git a/venv.bak/lib/python3.7/site-packages/nltk/classify/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/classify/__pycache__/__init__.cpython-37.pyc new file mode 100644 index 0000000..91b95b3 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/classify/__pycache__/__init__.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/classify/__pycache__/api.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/classify/__pycache__/api.cpython-37.pyc new file mode 100644 index 0000000..e5379ab Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/classify/__pycache__/api.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/classify/__pycache__/decisiontree.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/classify/__pycache__/decisiontree.cpython-37.pyc new file mode 100644 index 0000000..fc801f2 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/classify/__pycache__/decisiontree.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/classify/__pycache__/maxent.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/classify/__pycache__/maxent.cpython-37.pyc new file mode 100644 index 0000000..7240968 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/classify/__pycache__/maxent.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/classify/__pycache__/megam.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/classify/__pycache__/megam.cpython-37.pyc new file mode 100644 index 0000000..6c0ef3a Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/classify/__pycache__/megam.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/classify/__pycache__/naivebayes.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/classify/__pycache__/naivebayes.cpython-37.pyc new file mode 100644 index 0000000..c36a2d5 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/classify/__pycache__/naivebayes.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/classify/__pycache__/positivenaivebayes.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/classify/__pycache__/positivenaivebayes.cpython-37.pyc new file mode 100644 index 0000000..480ba1b Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/classify/__pycache__/positivenaivebayes.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/classify/__pycache__/rte_classify.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/classify/__pycache__/rte_classify.cpython-37.pyc new file mode 100644 index 0000000..27d63ff Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/classify/__pycache__/rte_classify.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/classify/__pycache__/scikitlearn.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/classify/__pycache__/scikitlearn.cpython-37.pyc new file mode 100644 index 0000000..6a01153 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/classify/__pycache__/scikitlearn.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/classify/__pycache__/senna.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/classify/__pycache__/senna.cpython-37.pyc new file mode 100644 index 0000000..9802f16 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/classify/__pycache__/senna.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/classify/__pycache__/svm.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/classify/__pycache__/svm.cpython-37.pyc new file mode 100644 index 0000000..7f8ff1c Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/classify/__pycache__/svm.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/classify/__pycache__/tadm.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/classify/__pycache__/tadm.cpython-37.pyc new file mode 100644 index 0000000..5ac3eae Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/classify/__pycache__/tadm.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/classify/__pycache__/textcat.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/classify/__pycache__/textcat.cpython-37.pyc new file mode 100644 index 0000000..0e46bb5 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/classify/__pycache__/textcat.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/classify/__pycache__/util.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/classify/__pycache__/util.cpython-37.pyc new file mode 100644 index 0000000..e65b682 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/classify/__pycache__/util.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/classify/__pycache__/weka.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/classify/__pycache__/weka.cpython-37.pyc new file mode 100644 index 0000000..c6516fc Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/classify/__pycache__/weka.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/classify/api.py b/venv.bak/lib/python3.7/site-packages/nltk/classify/api.py new file mode 100644 index 0000000..91987c1 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/classify/api.py @@ -0,0 +1,195 @@ +# Natural Language Toolkit: Classifier Interface +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Edward Loper +# Steven Bird (minor additions) +# URL: +# For license information, see LICENSE.TXT + +""" +Interfaces for labeling tokens with category labels (or "class labels"). + +``ClassifierI`` is a standard interface for "single-category +classification", in which the set of categories is known, the number +of categories is finite, and each text belongs to exactly one +category. + +``MultiClassifierI`` is a standard interface for "multi-category +classification", which is like single-category classification except +that each text belongs to zero or more categories. +""" +from nltk.internals import overridden + +##////////////////////////////////////////////////////// +# { Classification Interfaces +##////////////////////////////////////////////////////// + + +class ClassifierI(object): + """ + A processing interface for labeling tokens with a single category + label (or "class"). Labels are typically strs or + ints, but can be any immutable type. The set of labels + that the classifier chooses from must be fixed and finite. + + Subclasses must define: + - ``labels()`` + - either ``classify()`` or ``classify_many()`` (or both) + + Subclasses may define: + - either ``prob_classify()`` or ``prob_classify_many()`` (or both) + """ + + def labels(self): + """ + :return: the list of category labels used by this classifier. + :rtype: list of (immutable) + """ + raise NotImplementedError() + + def classify(self, featureset): + """ + :return: the most appropriate label for the given featureset. + :rtype: label + """ + if overridden(self.classify_many): + return self.classify_many([featureset])[0] + else: + raise NotImplementedError() + + def prob_classify(self, featureset): + """ + :return: a probability distribution over labels for the given + featureset. + :rtype: ProbDistI + """ + if overridden(self.prob_classify_many): + return self.prob_classify_many([featureset])[0] + else: + raise NotImplementedError() + + def classify_many(self, featuresets): + """ + Apply ``self.classify()`` to each element of ``featuresets``. I.e.: + + return [self.classify(fs) for fs in featuresets] + + :rtype: list(label) + """ + return [self.classify(fs) for fs in featuresets] + + def prob_classify_many(self, featuresets): + """ + Apply ``self.prob_classify()`` to each element of ``featuresets``. I.e.: + + return [self.prob_classify(fs) for fs in featuresets] + + :rtype: list(ProbDistI) + """ + return [self.prob_classify(fs) for fs in featuresets] + + +class MultiClassifierI(object): + """ + A processing interface for labeling tokens with zero or more + category labels (or "labels"). Labels are typically strs + or ints, but can be any immutable type. The set of labels + that the multi-classifier chooses from must be fixed and finite. + + Subclasses must define: + - ``labels()`` + - either ``classify()`` or ``classify_many()`` (or both) + + Subclasses may define: + - either ``prob_classify()`` or ``prob_classify_many()`` (or both) + """ + + def labels(self): + """ + :return: the list of category labels used by this classifier. + :rtype: list of (immutable) + """ + raise NotImplementedError() + + def classify(self, featureset): + """ + :return: the most appropriate set of labels for the given featureset. + :rtype: set(label) + """ + if overridden(self.classify_many): + return self.classify_many([featureset])[0] + else: + raise NotImplementedError() + + def prob_classify(self, featureset): + """ + :return: a probability distribution over sets of labels for the + given featureset. + :rtype: ProbDistI + """ + if overridden(self.prob_classify_many): + return self.prob_classify_many([featureset])[0] + else: + raise NotImplementedError() + + def classify_many(self, featuresets): + """ + Apply ``self.classify()`` to each element of ``featuresets``. I.e.: + + return [self.classify(fs) for fs in featuresets] + + :rtype: list(set(label)) + """ + return [self.classify(fs) for fs in featuresets] + + def prob_classify_many(self, featuresets): + """ + Apply ``self.prob_classify()`` to each element of ``featuresets``. I.e.: + + return [self.prob_classify(fs) for fs in featuresets] + + :rtype: list(ProbDistI) + """ + return [self.prob_classify(fs) for fs in featuresets] + + +# # [XX] IN PROGRESS: +# class SequenceClassifierI(object): +# """ +# A processing interface for labeling sequences of tokens with a +# single category label (or "class"). Labels are typically +# strs or ints, but can be any immutable type. The set +# of labels that the classifier chooses from must be fixed and +# finite. +# """ +# def labels(self): +# """ +# :return: the list of category labels used by this classifier. +# :rtype: list of (immutable) +# """ +# raise NotImplementedError() + +# def prob_classify(self, featureset): +# """ +# Return a probability distribution over labels for the given +# featureset. + +# If ``featureset`` is a list of featuresets, then return a +# corresponding list containing the probability distribution +# over labels for each of the given featuresets, where the +# *i*\ th element of this list is the most appropriate label for +# the *i*\ th element of ``featuresets``. +# """ +# raise NotImplementedError() + +# def classify(self, featureset): +# """ +# Return the most appropriate label for the given featureset. + +# If ``featureset`` is a list of featuresets, then return a +# corresponding list containing the most appropriate label for +# each of the given featuresets, where the *i*\ th element of +# this list is the most appropriate label for the *i*\ th element +# of ``featuresets``. +# """ +# raise NotImplementedError() diff --git a/venv.bak/lib/python3.7/site-packages/nltk/classify/decisiontree.py b/venv.bak/lib/python3.7/site-packages/nltk/classify/decisiontree.py new file mode 100644 index 0000000..0739cf4 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/classify/decisiontree.py @@ -0,0 +1,350 @@ +# Natural Language Toolkit: Decision Tree Classifiers +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Edward Loper +# URL: +# For license information, see LICENSE.TXT + +""" +A classifier model that decides which label to assign to a token on +the basis of a tree structure, where branches correspond to conditions +on feature values, and leaves correspond to label assignments. +""" +from __future__ import print_function, unicode_literals, division + +from collections import defaultdict + +from nltk.probability import FreqDist, MLEProbDist, entropy +from nltk.classify.api import ClassifierI +from nltk.compat import python_2_unicode_compatible + + +@python_2_unicode_compatible +class DecisionTreeClassifier(ClassifierI): + def __init__(self, label, feature_name=None, decisions=None, default=None): + """ + :param label: The most likely label for tokens that reach + this node in the decision tree. If this decision tree + has no children, then this label will be assigned to + any token that reaches this decision tree. + :param feature_name: The name of the feature that this + decision tree selects for. + :param decisions: A dictionary mapping from feature values + for the feature identified by ``feature_name`` to + child decision trees. + :param default: The child that will be used if the value of + feature ``feature_name`` does not match any of the keys in + ``decisions``. This is used when constructing binary + decision trees. + """ + self._label = label + self._fname = feature_name + self._decisions = decisions + self._default = default + + def labels(self): + labels = [self._label] + if self._decisions is not None: + for dt in self._decisions.values(): + labels.extend(dt.labels()) + if self._default is not None: + labels.extend(self._default.labels()) + return list(set(labels)) + + def classify(self, featureset): + # Decision leaf: + if self._fname is None: + return self._label + + # Decision tree: + fval = featureset.get(self._fname) + if fval in self._decisions: + return self._decisions[fval].classify(featureset) + elif self._default is not None: + return self._default.classify(featureset) + else: + return self._label + + def error(self, labeled_featuresets): + errors = 0 + for featureset, label in labeled_featuresets: + if self.classify(featureset) != label: + errors += 1 + return errors / len(labeled_featuresets) + + def pretty_format(self, width=70, prefix='', depth=4): + """ + Return a string containing a pretty-printed version of this + decision tree. Each line in this string corresponds to a + single decision tree node or leaf, and indentation is used to + display the structure of the decision tree. + """ + # [xx] display default!! + if self._fname is None: + n = width - len(prefix) - 15 + return '{0}{1} {2}\n'.format(prefix, '.' * n, self._label) + s = '' + for i, (fval, result) in enumerate(sorted(self._decisions.items())): + hdr = '{0}{1}={2}? '.format(prefix, self._fname, fval) + n = width - 15 - len(hdr) + s += '{0}{1} {2}\n'.format(hdr, '.' * (n), result._label) + if result._fname is not None and depth > 1: + s += result.pretty_format(width, prefix + ' ', depth - 1) + if self._default is not None: + n = width - len(prefix) - 21 + s += '{0}else: {1} {2}\n'.format(prefix, '.' * n, self._default._label) + if self._default._fname is not None and depth > 1: + s += self._default.pretty_format(width, prefix + ' ', depth - 1) + return s + + def pseudocode(self, prefix='', depth=4): + """ + Return a string representation of this decision tree that + expresses the decisions it makes as a nested set of pseudocode + if statements. + """ + if self._fname is None: + return "{0}return {1!r}\n".format(prefix, self._label) + s = '' + for (fval, result) in sorted(self._decisions.items()): + s += '{0}if {1} == {2!r}: '.format(prefix, self._fname, fval) + if result._fname is not None and depth > 1: + s += '\n' + result.pseudocode(prefix + ' ', depth - 1) + else: + s += 'return {0!r}\n'.format(result._label) + if self._default is not None: + if len(self._decisions) == 1: + s += '{0}if {1} != {2!r}: '.format( + prefix, self._fname, list(self._decisions.keys())[0] + ) + else: + s += '{0}else: '.format(prefix) + if self._default._fname is not None and depth > 1: + s += '\n' + self._default.pseudocode(prefix + ' ', depth - 1) + else: + s += 'return {0!r}\n'.format(self._default._label) + return s + + def __str__(self): + return self.pretty_format() + + @staticmethod + def train( + labeled_featuresets, + entropy_cutoff=0.05, + depth_cutoff=100, + support_cutoff=10, + binary=False, + feature_values=None, + verbose=False, + ): + """ + :param binary: If true, then treat all feature/value pairs as + individual binary features, rather than using a single n-way + branch for each feature. + """ + # Collect a list of all feature names. + feature_names = set() + for featureset, label in labeled_featuresets: + for fname in featureset: + feature_names.add(fname) + + # Collect a list of the values each feature can take. + if feature_values is None and binary: + feature_values = defaultdict(set) + for featureset, label in labeled_featuresets: + for fname, fval in featureset.items(): + feature_values[fname].add(fval) + + # Start with a stump. + if not binary: + tree = DecisionTreeClassifier.best_stump( + feature_names, labeled_featuresets, verbose + ) + else: + tree = DecisionTreeClassifier.best_binary_stump( + feature_names, labeled_featuresets, feature_values, verbose + ) + + # Refine the stump. + tree.refine( + labeled_featuresets, + entropy_cutoff, + depth_cutoff - 1, + support_cutoff, + binary, + feature_values, + verbose, + ) + + # Return it + return tree + + @staticmethod + def leaf(labeled_featuresets): + label = FreqDist(label for (featureset, label) in labeled_featuresets).max() + return DecisionTreeClassifier(label) + + @staticmethod + def stump(feature_name, labeled_featuresets): + label = FreqDist(label for (featureset, label) in labeled_featuresets).max() + + # Find the best label for each value. + freqs = defaultdict(FreqDist) # freq(label|value) + for featureset, label in labeled_featuresets: + feature_value = featureset.get(feature_name) + freqs[feature_value][label] += 1 + + decisions = dict( + (val, DecisionTreeClassifier(freqs[val].max())) for val in freqs + ) + return DecisionTreeClassifier(label, feature_name, decisions) + + def refine( + self, + labeled_featuresets, + entropy_cutoff, + depth_cutoff, + support_cutoff, + binary=False, + feature_values=None, + verbose=False, + ): + if len(labeled_featuresets) <= support_cutoff: + return + if self._fname is None: + return + if depth_cutoff <= 0: + return + for fval in self._decisions: + fval_featuresets = [ + (featureset, label) + for (featureset, label) in labeled_featuresets + if featureset.get(self._fname) == fval + ] + + label_freqs = FreqDist(label for (featureset, label) in fval_featuresets) + if entropy(MLEProbDist(label_freqs)) > entropy_cutoff: + self._decisions[fval] = DecisionTreeClassifier.train( + fval_featuresets, + entropy_cutoff, + depth_cutoff, + support_cutoff, + binary, + feature_values, + verbose, + ) + if self._default is not None: + default_featuresets = [ + (featureset, label) + for (featureset, label) in labeled_featuresets + if featureset.get(self._fname) not in self._decisions + ] + label_freqs = FreqDist(label for (featureset, label) in default_featuresets) + if entropy(MLEProbDist(label_freqs)) > entropy_cutoff: + self._default = DecisionTreeClassifier.train( + default_featuresets, + entropy_cutoff, + depth_cutoff, + support_cutoff, + binary, + feature_values, + verbose, + ) + + @staticmethod + def best_stump(feature_names, labeled_featuresets, verbose=False): + best_stump = DecisionTreeClassifier.leaf(labeled_featuresets) + best_error = best_stump.error(labeled_featuresets) + for fname in feature_names: + stump = DecisionTreeClassifier.stump(fname, labeled_featuresets) + stump_error = stump.error(labeled_featuresets) + if stump_error < best_error: + best_error = stump_error + best_stump = stump + if verbose: + print( + ( + 'best stump for {:6d} toks uses {:20} err={:6.4f}'.format( + len(labeled_featuresets), best_stump._fname, best_error + ) + ) + ) + return best_stump + + @staticmethod + def binary_stump(feature_name, feature_value, labeled_featuresets): + label = FreqDist(label for (featureset, label) in labeled_featuresets).max() + + # Find the best label for each value. + pos_fdist = FreqDist() + neg_fdist = FreqDist() + for featureset, label in labeled_featuresets: + if featureset.get(feature_name) == feature_value: + pos_fdist[label] += 1 + else: + neg_fdist[label] += 1 + + decisions = {} + default = label + # But hopefully we have observations! + if pos_fdist.N() > 0: + decisions = {feature_value: DecisionTreeClassifier(pos_fdist.max())} + if neg_fdist.N() > 0: + default = DecisionTreeClassifier(neg_fdist.max()) + + return DecisionTreeClassifier(label, feature_name, decisions, default) + + @staticmethod + def best_binary_stump( + feature_names, labeled_featuresets, feature_values, verbose=False + ): + best_stump = DecisionTreeClassifier.leaf(labeled_featuresets) + best_error = best_stump.error(labeled_featuresets) + for fname in feature_names: + for fval in feature_values[fname]: + stump = DecisionTreeClassifier.binary_stump( + fname, fval, labeled_featuresets + ) + stump_error = stump.error(labeled_featuresets) + if stump_error < best_error: + best_error = stump_error + best_stump = stump + if verbose: + if best_stump._decisions: + descr = '{0}={1}'.format( + best_stump._fname, list(best_stump._decisions.keys())[0] + ) + else: + descr = '(default)' + print( + ( + 'best stump for {:6d} toks uses {:20} err={:6.4f}'.format( + len(labeled_featuresets), descr, best_error + ) + ) + ) + return best_stump + + +##////////////////////////////////////////////////////// +## Demo +##////////////////////////////////////////////////////// + + +def f(x): + return DecisionTreeClassifier.train(x, binary=True, verbose=True) + + +def demo(): + from nltk.classify.util import names_demo, binary_names_demo_features + + classifier = names_demo( + f, binary_names_demo_features # DecisionTreeClassifier.train, + ) + print(classifier.pp(depth=7)) + print(classifier.pseudocode(depth=7)) + + +if __name__ == '__main__': + demo() diff --git a/venv.bak/lib/python3.7/site-packages/nltk/classify/maxent.py b/venv.bak/lib/python3.7/site-packages/nltk/classify/maxent.py new file mode 100644 index 0000000..e74b676 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/classify/maxent.py @@ -0,0 +1,1580 @@ +# Natural Language Toolkit: Maximum Entropy Classifiers +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Edward Loper +# Dmitry Chichkov (TypedMaxentFeatureEncoding) +# URL: +# For license information, see LICENSE.TXT + +""" +A classifier model based on maximum entropy modeling framework. This +framework considers all of the probability distributions that are +empirically consistent with the training data; and chooses the +distribution with the highest entropy. A probability distribution is +"empirically consistent" with a set of training data if its estimated +frequency with which a class and a feature vector value co-occur is +equal to the actual frequency in the data. + +Terminology: 'feature' +====================== +The term *feature* is usually used to refer to some property of an +unlabeled token. For example, when performing word sense +disambiguation, we might define a ``'prevword'`` feature whose value is +the word preceding the target word. However, in the context of +maxent modeling, the term *feature* is typically used to refer to a +property of a "labeled" token. In order to prevent confusion, we +will introduce two distinct terms to disambiguate these two different +concepts: + + - An "input-feature" is a property of an unlabeled token. + - A "joint-feature" is a property of a labeled token. + +In the rest of the ``nltk.classify`` module, the term "features" is +used to refer to what we will call "input-features" in this module. + +In literature that describes and discusses maximum entropy models, +input-features are typically called "contexts", and joint-features +are simply referred to as "features". + +Converting Input-Features to Joint-Features +------------------------------------------- +In maximum entropy models, joint-features are required to have numeric +values. Typically, each input-feature ``input_feat`` is mapped to a +set of joint-features of the form: + +| joint_feat(token, label) = { 1 if input_feat(token) == feat_val +| { and label == some_label +| { +| { 0 otherwise + +For all values of ``feat_val`` and ``some_label``. This mapping is +performed by classes that implement the ``MaxentFeatureEncodingI`` +interface. +""" +from __future__ import print_function, unicode_literals + +try: + import numpy +except ImportError: + pass + +import tempfile +import os +from collections import defaultdict + +from six import integer_types + +from nltk import compat +from nltk.data import gzip_open_unicode +from nltk.util import OrderedDict +from nltk.probability import DictionaryProbDist + +from nltk.classify.api import ClassifierI +from nltk.classify.util import CutoffChecker, accuracy, log_likelihood +from nltk.classify.megam import call_megam, write_megam_file, parse_megam_weights +from nltk.classify.tadm import call_tadm, write_tadm_file, parse_tadm_weights + +__docformat__ = 'epytext en' + +###################################################################### +# { Classifier Model +###################################################################### + + +@compat.python_2_unicode_compatible +class MaxentClassifier(ClassifierI): + """ + A maximum entropy classifier (also known as a "conditional + exponential classifier"). This classifier is parameterized by a + set of "weights", which are used to combine the joint-features + that are generated from a featureset by an "encoding". In + particular, the encoding maps each ``(featureset, label)`` pair to + a vector. The probability of each label is then computed using + the following equation:: + + dotprod(weights, encode(fs,label)) + prob(fs|label) = --------------------------------------------------- + sum(dotprod(weights, encode(fs,l)) for l in labels) + + Where ``dotprod`` is the dot product:: + + dotprod(a,b) = sum(x*y for (x,y) in zip(a,b)) + """ + + def __init__(self, encoding, weights, logarithmic=True): + """ + Construct a new maxent classifier model. Typically, new + classifier models are created using the ``train()`` method. + + :type encoding: MaxentFeatureEncodingI + :param encoding: An encoding that is used to convert the + featuresets that are given to the ``classify`` method into + joint-feature vectors, which are used by the maxent + classifier model. + + :type weights: list of float + :param weights: The feature weight vector for this classifier. + + :type logarithmic: bool + :param logarithmic: If false, then use non-logarithmic weights. + """ + self._encoding = encoding + self._weights = weights + self._logarithmic = logarithmic + # self._logarithmic = False + assert encoding.length() == len(weights) + + def labels(self): + return self._encoding.labels() + + def set_weights(self, new_weights): + """ + Set the feature weight vector for this classifier. + :param new_weights: The new feature weight vector. + :type new_weights: list of float + """ + self._weights = new_weights + assert self._encoding.length() == len(new_weights) + + def weights(self): + """ + :return: The feature weight vector for this classifier. + :rtype: list of float + """ + return self._weights + + def classify(self, featureset): + return self.prob_classify(featureset).max() + + def prob_classify(self, featureset): + prob_dict = {} + for label in self._encoding.labels(): + feature_vector = self._encoding.encode(featureset, label) + + if self._logarithmic: + total = 0.0 + for (f_id, f_val) in feature_vector: + total += self._weights[f_id] * f_val + prob_dict[label] = total + + else: + prod = 1.0 + for (f_id, f_val) in feature_vector: + prod *= self._weights[f_id] ** f_val + prob_dict[label] = prod + + # Normalize the dictionary to give a probability distribution + return DictionaryProbDist(prob_dict, log=self._logarithmic, normalize=True) + + def explain(self, featureset, columns=4): + """ + Print a table showing the effect of each of the features in + the given feature set, and how they combine to determine the + probabilities of each label for that featureset. + """ + descr_width = 50 + TEMPLATE = ' %-' + str(descr_width - 2) + 's%s%8.3f' + + pdist = self.prob_classify(featureset) + labels = sorted(pdist.samples(), key=pdist.prob, reverse=True) + labels = labels[:columns] + print( + ' Feature'.ljust(descr_width) + + ''.join('%8s' % (("%s" % l)[:7]) for l in labels) + ) + print(' ' + '-' * (descr_width - 2 + 8 * len(labels))) + sums = defaultdict(int) + for i, label in enumerate(labels): + feature_vector = self._encoding.encode(featureset, label) + feature_vector.sort( + key=lambda fid__: abs(self._weights[fid__[0]]), reverse=True + ) + for (f_id, f_val) in feature_vector: + if self._logarithmic: + score = self._weights[f_id] * f_val + else: + score = self._weights[f_id] ** f_val + descr = self._encoding.describe(f_id) + descr = descr.split(' and label is ')[0] # hack + descr += ' (%s)' % f_val # hack + if len(descr) > 47: + descr = descr[:44] + '...' + print(TEMPLATE % (descr, i * 8 * ' ', score)) + sums[label] += score + print(' ' + '-' * (descr_width - 1 + 8 * len(labels))) + print( + ' TOTAL:'.ljust(descr_width) + ''.join('%8.3f' % sums[l] for l in labels) + ) + print( + ' PROBS:'.ljust(descr_width) + + ''.join('%8.3f' % pdist.prob(l) for l in labels) + ) + + def most_informative_features(self, n=10): + """ + Generates the ranked list of informative features from most to least. + """ + if hasattr(self, '_most_informative_features'): + return self._most_informative_features[:n] + else: + self._most_informative_features = sorted( + list(range(len(self._weights))), + key=lambda fid: abs(self._weights[fid]), + reverse=True, + ) + return self._most_informative_features[:n] + + def show_most_informative_features(self, n=10, show='all'): + """ + :param show: all, neg, or pos (for negative-only or positive-only) + :type show: str + :param n: The no. of top features + :type n: int + """ + # Use None the full list of ranked features. + fids = self.most_informative_features(None) + if show == 'pos': + fids = [fid for fid in fids if self._weights[fid] > 0] + elif show == 'neg': + fids = [fid for fid in fids if self._weights[fid] < 0] + for fid in fids[:n]: + print('%8.3f %s' % (self._weights[fid], self._encoding.describe(fid))) + + def __repr__(self): + return '' % ( + len(self._encoding.labels()), + self._encoding.length(), + ) + + #: A list of the algorithm names that are accepted for the + #: ``train()`` method's ``algorithm`` parameter. + ALGORITHMS = ['GIS', 'IIS', 'MEGAM', 'TADM'] + + @classmethod + def train( + cls, + train_toks, + algorithm=None, + trace=3, + encoding=None, + labels=None, + gaussian_prior_sigma=0, + **cutoffs + ): + """ + Train a new maxent classifier based on the given corpus of + training samples. This classifier will have its weights + chosen to maximize entropy while remaining empirically + consistent with the training corpus. + + :rtype: MaxentClassifier + :return: The new maxent classifier + + :type train_toks: list + :param train_toks: Training data, represented as a list of + pairs, the first member of which is a featureset, + and the second of which is a classification label. + + :type algorithm: str + :param algorithm: A case-insensitive string, specifying which + algorithm should be used to train the classifier. The + following algorithms are currently available. + + - Iterative Scaling Methods: Generalized Iterative Scaling (``'GIS'``), + Improved Iterative Scaling (``'IIS'``) + - External Libraries (requiring megam): + LM-BFGS algorithm, with training performed by Megam (``'megam'``) + + The default algorithm is ``'IIS'``. + + :type trace: int + :param trace: The level of diagnostic tracing output to produce. + Higher values produce more verbose output. + :type encoding: MaxentFeatureEncodingI + :param encoding: A feature encoding, used to convert featuresets + into feature vectors. If none is specified, then a + ``BinaryMaxentFeatureEncoding`` will be built based on the + features that are attested in the training corpus. + :type labels: list(str) + :param labels: The set of possible labels. If none is given, then + the set of all labels attested in the training data will be + used instead. + :param gaussian_prior_sigma: The sigma value for a gaussian + prior on model weights. Currently, this is supported by + ``megam``. For other algorithms, its value is ignored. + :param cutoffs: Arguments specifying various conditions under + which the training should be halted. (Some of the cutoff + conditions are not supported by some algorithms.) + + - ``max_iter=v``: Terminate after ``v`` iterations. + - ``min_ll=v``: Terminate after the negative average + log-likelihood drops under ``v``. + - ``min_lldelta=v``: Terminate if a single iteration improves + log likelihood by less than ``v``. + """ + if algorithm is None: + algorithm = 'iis' + for key in cutoffs: + if key not in ( + 'max_iter', + 'min_ll', + 'min_lldelta', + 'max_acc', + 'min_accdelta', + 'count_cutoff', + 'norm', + 'explicit', + 'bernoulli', + ): + raise TypeError('Unexpected keyword arg %r' % key) + algorithm = algorithm.lower() + if algorithm == 'iis': + return train_maxent_classifier_with_iis( + train_toks, trace, encoding, labels, **cutoffs + ) + elif algorithm == 'gis': + return train_maxent_classifier_with_gis( + train_toks, trace, encoding, labels, **cutoffs + ) + elif algorithm == 'megam': + return train_maxent_classifier_with_megam( + train_toks, trace, encoding, labels, gaussian_prior_sigma, **cutoffs + ) + elif algorithm == 'tadm': + kwargs = cutoffs + kwargs['trace'] = trace + kwargs['encoding'] = encoding + kwargs['labels'] = labels + kwargs['gaussian_prior_sigma'] = gaussian_prior_sigma + return TadmMaxentClassifier.train(train_toks, **kwargs) + else: + raise ValueError('Unknown algorithm %s' % algorithm) + + +#: Alias for MaxentClassifier. +ConditionalExponentialClassifier = MaxentClassifier + + +###################################################################### +# { Feature Encodings +###################################################################### + + +class MaxentFeatureEncodingI(object): + """ + A mapping that converts a set of input-feature values to a vector + of joint-feature values, given a label. This conversion is + necessary to translate featuresets into a format that can be used + by maximum entropy models. + + The set of joint-features used by a given encoding is fixed, and + each index in the generated joint-feature vectors corresponds to a + single joint-feature. The length of the generated joint-feature + vectors is therefore constant (for a given encoding). + + Because the joint-feature vectors generated by + ``MaxentFeatureEncodingI`` are typically very sparse, they are + represented as a list of ``(index, value)`` tuples, specifying the + value of each non-zero joint-feature. + + Feature encodings are generally created using the ``train()`` + method, which generates an appropriate encoding based on the + input-feature values and labels that are present in a given + corpus. + """ + + def encode(self, featureset, label): + """ + Given a (featureset, label) pair, return the corresponding + vector of joint-feature values. This vector is represented as + a list of ``(index, value)`` tuples, specifying the value of + each non-zero joint-feature. + + :type featureset: dict + :rtype: list(tuple(int, int)) + """ + raise NotImplementedError() + + def length(self): + """ + :return: The size of the fixed-length joint-feature vectors + that are generated by this encoding. + :rtype: int + """ + raise NotImplementedError() + + def labels(self): + """ + :return: A list of the \"known labels\" -- i.e., all labels + ``l`` such that ``self.encode(fs,l)`` can be a nonzero + joint-feature vector for some value of ``fs``. + :rtype: list + """ + raise NotImplementedError() + + def describe(self, fid): + """ + :return: A string describing the value of the joint-feature + whose index in the generated feature vectors is ``fid``. + :rtype: str + """ + raise NotImplementedError() + + def train(cls, train_toks): + """ + Construct and return new feature encoding, based on a given + training corpus ``train_toks``. + + :type train_toks: list(tuple(dict, str)) + :param train_toks: Training data, represented as a list of + pairs, the first member of which is a feature dictionary, + and the second of which is a classification label. + """ + raise NotImplementedError() + + +class FunctionBackedMaxentFeatureEncoding(MaxentFeatureEncodingI): + """ + A feature encoding that calls a user-supplied function to map a + given featureset/label pair to a sparse joint-feature vector. + """ + + def __init__(self, func, length, labels): + """ + Construct a new feature encoding based on the given function. + + :type func: (callable) + :param func: A function that takes two arguments, a featureset + and a label, and returns the sparse joint feature vector + that encodes them:: + + func(featureset, label) -> feature_vector + + This sparse joint feature vector (``feature_vector``) is a + list of ``(index,value)`` tuples. + + :type length: int + :param length: The size of the fixed-length joint-feature + vectors that are generated by this encoding. + + :type labels: list + :param labels: A list of the \"known labels\" for this + encoding -- i.e., all labels ``l`` such that + ``self.encode(fs,l)`` can be a nonzero joint-feature vector + for some value of ``fs``. + """ + self._length = length + self._func = func + self._labels = labels + + def encode(self, featureset, label): + return self._func(featureset, label) + + def length(self): + return self._length + + def labels(self): + return self._labels + + def describe(self, fid): + return 'no description available' + + +class BinaryMaxentFeatureEncoding(MaxentFeatureEncodingI): + """ + A feature encoding that generates vectors containing a binary + joint-features of the form: + + | joint_feat(fs, l) = { 1 if (fs[fname] == fval) and (l == label) + | { + | { 0 otherwise + + Where ``fname`` is the name of an input-feature, ``fval`` is a value + for that input-feature, and ``label`` is a label. + + Typically, these features are constructed based on a training + corpus, using the ``train()`` method. This method will create one + feature for each combination of ``fname``, ``fval``, and ``label`` + that occurs at least once in the training corpus. + + The ``unseen_features`` parameter can be used to add "unseen-value + features", which are used whenever an input feature has a value + that was not encountered in the training corpus. These features + have the form: + + | joint_feat(fs, l) = { 1 if is_unseen(fname, fs[fname]) + | { and l == label + | { + | { 0 otherwise + + Where ``is_unseen(fname, fval)`` is true if the encoding does not + contain any joint features that are true when ``fs[fname]==fval``. + + The ``alwayson_features`` parameter can be used to add "always-on + features", which have the form:: + + | joint_feat(fs, l) = { 1 if (l == label) + | { + | { 0 otherwise + + These always-on features allow the maxent model to directly model + the prior probabilities of each label. + """ + + def __init__(self, labels, mapping, unseen_features=False, alwayson_features=False): + """ + :param labels: A list of the \"known labels\" for this encoding. + + :param mapping: A dictionary mapping from ``(fname,fval,label)`` + tuples to corresponding joint-feature indexes. These + indexes must be the set of integers from 0...len(mapping). + If ``mapping[fname,fval,label]=id``, then + ``self.encode(..., fname:fval, ..., label)[id]`` is 1; + otherwise, it is 0. + + :param unseen_features: If true, then include unseen value + features in the generated joint-feature vectors. + + :param alwayson_features: If true, then include always-on + features in the generated joint-feature vectors. + """ + if set(mapping.values()) != set(range(len(mapping))): + raise ValueError( + 'Mapping values must be exactly the ' + 'set of integers from 0...len(mapping)' + ) + + self._labels = list(labels) + """A list of attested labels.""" + + self._mapping = mapping + """dict mapping from (fname,fval,label) -> fid""" + + self._length = len(mapping) + """The length of generated joint feature vectors.""" + + self._alwayson = None + """dict mapping from label -> fid""" + + self._unseen = None + """dict mapping from fname -> fid""" + + if alwayson_features: + self._alwayson = dict( + (label, i + self._length) for (i, label) in enumerate(labels) + ) + self._length += len(self._alwayson) + + if unseen_features: + fnames = set(fname for (fname, fval, label) in mapping) + self._unseen = dict( + (fname, i + self._length) for (i, fname) in enumerate(fnames) + ) + self._length += len(fnames) + + def encode(self, featureset, label): + # Inherit docs. + encoding = [] + + # Convert input-features to joint-features: + for fname, fval in featureset.items(): + # Known feature name & value: + if (fname, fval, label) in self._mapping: + encoding.append((self._mapping[fname, fval, label], 1)) + + # Otherwise, we might want to fire an "unseen-value feature". + elif self._unseen: + # Have we seen this fname/fval combination with any label? + for label2 in self._labels: + if (fname, fval, label2) in self._mapping: + break # we've seen this fname/fval combo + # We haven't -- fire the unseen-value feature + else: + if fname in self._unseen: + encoding.append((self._unseen[fname], 1)) + + # Add always-on features: + if self._alwayson and label in self._alwayson: + encoding.append((self._alwayson[label], 1)) + + return encoding + + def describe(self, f_id): + # Inherit docs. + if not isinstance(f_id, integer_types): + raise TypeError('describe() expected an int') + try: + self._inv_mapping + except AttributeError: + self._inv_mapping = [-1] * len(self._mapping) + for (info, i) in self._mapping.items(): + self._inv_mapping[i] = info + + if f_id < len(self._mapping): + (fname, fval, label) = self._inv_mapping[f_id] + return '%s==%r and label is %r' % (fname, fval, label) + elif self._alwayson and f_id in self._alwayson.values(): + for (label, f_id2) in self._alwayson.items(): + if f_id == f_id2: + return 'label is %r' % label + elif self._unseen and f_id in self._unseen.values(): + for (fname, f_id2) in self._unseen.items(): + if f_id == f_id2: + return '%s is unseen' % fname + else: + raise ValueError('Bad feature id') + + def labels(self): + # Inherit docs. + return self._labels + + def length(self): + # Inherit docs. + return self._length + + @classmethod + def train(cls, train_toks, count_cutoff=0, labels=None, **options): + """ + Construct and return new feature encoding, based on a given + training corpus ``train_toks``. See the class description + ``BinaryMaxentFeatureEncoding`` for a description of the + joint-features that will be included in this encoding. + + :type train_toks: list(tuple(dict, str)) + :param train_toks: Training data, represented as a list of + pairs, the first member of which is a feature dictionary, + and the second of which is a classification label. + + :type count_cutoff: int + :param count_cutoff: A cutoff value that is used to discard + rare joint-features. If a joint-feature's value is 1 + fewer than ``count_cutoff`` times in the training corpus, + then that joint-feature is not included in the generated + encoding. + + :type labels: list + :param labels: A list of labels that should be used by the + classifier. If not specified, then the set of labels + attested in ``train_toks`` will be used. + + :param options: Extra parameters for the constructor, such as + ``unseen_features`` and ``alwayson_features``. + """ + mapping = {} # maps (fname, fval, label) -> fid + seen_labels = set() # The set of labels we've encountered + count = defaultdict(int) # maps (fname, fval) -> count + + for (tok, label) in train_toks: + if labels and label not in labels: + raise ValueError('Unexpected label %s' % label) + seen_labels.add(label) + + # Record each of the features. + for (fname, fval) in tok.items(): + + # If a count cutoff is given, then only add a joint + # feature once the corresponding (fname, fval, label) + # tuple exceeds that cutoff. + count[fname, fval] += 1 + if count[fname, fval] >= count_cutoff: + if (fname, fval, label) not in mapping: + mapping[fname, fval, label] = len(mapping) + + if labels is None: + labels = seen_labels + return cls(labels, mapping, **options) + + +class GISEncoding(BinaryMaxentFeatureEncoding): + """ + A binary feature encoding which adds one new joint-feature to the + joint-features defined by ``BinaryMaxentFeatureEncoding``: a + correction feature, whose value is chosen to ensure that the + sparse vector always sums to a constant non-negative number. This + new feature is used to ensure two preconditions for the GIS + training algorithm: + + - At least one feature vector index must be nonzero for every + token. + - The feature vector must sum to a constant non-negative number + for every token. + """ + + def __init__( + self, labels, mapping, unseen_features=False, alwayson_features=False, C=None + ): + """ + :param C: The correction constant. The value of the correction + feature is based on this value. In particular, its value is + ``C - sum([v for (f,v) in encoding])``. + :seealso: ``BinaryMaxentFeatureEncoding.__init__`` + """ + BinaryMaxentFeatureEncoding.__init__( + self, labels, mapping, unseen_features, alwayson_features + ) + if C is None: + C = len(set(fname for (fname, fval, label) in mapping)) + 1 + self._C = C + + @property + def C(self): + """The non-negative constant that all encoded feature vectors + will sum to.""" + return self._C + + def encode(self, featureset, label): + # Get the basic encoding. + encoding = BinaryMaxentFeatureEncoding.encode(self, featureset, label) + base_length = BinaryMaxentFeatureEncoding.length(self) + + # Add a correction feature. + total = sum(v for (f, v) in encoding) + if total >= self._C: + raise ValueError('Correction feature is not high enough!') + encoding.append((base_length, self._C - total)) + + # Return the result + return encoding + + def length(self): + return BinaryMaxentFeatureEncoding.length(self) + 1 + + def describe(self, f_id): + if f_id == BinaryMaxentFeatureEncoding.length(self): + return 'Correction feature (%s)' % self._C + else: + return BinaryMaxentFeatureEncoding.describe(self, f_id) + + +class TadmEventMaxentFeatureEncoding(BinaryMaxentFeatureEncoding): + def __init__(self, labels, mapping, unseen_features=False, alwayson_features=False): + self._mapping = OrderedDict(mapping) + self._label_mapping = OrderedDict() + BinaryMaxentFeatureEncoding.__init__( + self, labels, self._mapping, unseen_features, alwayson_features + ) + + def encode(self, featureset, label): + encoding = [] + for feature, value in featureset.items(): + if (feature, label) not in self._mapping: + self._mapping[(feature, label)] = len(self._mapping) + if value not in self._label_mapping: + if not isinstance(value, int): + self._label_mapping[value] = len(self._label_mapping) + else: + self._label_mapping[value] = value + encoding.append( + (self._mapping[(feature, label)], self._label_mapping[value]) + ) + return encoding + + def labels(self): + return self._labels + + def describe(self, fid): + for (feature, label) in self._mapping: + if self._mapping[(feature, label)] == fid: + return (feature, label) + + def length(self): + return len(self._mapping) + + @classmethod + def train(cls, train_toks, count_cutoff=0, labels=None, **options): + mapping = OrderedDict() + if not labels: + labels = [] + + # This gets read twice, so compute the values in case it's lazy. + train_toks = list(train_toks) + + for (featureset, label) in train_toks: + if label not in labels: + labels.append(label) + + for (featureset, label) in train_toks: + for label in labels: + for feature in featureset: + if (feature, label) not in mapping: + mapping[(feature, label)] = len(mapping) + + return cls(labels, mapping, **options) + + +class TypedMaxentFeatureEncoding(MaxentFeatureEncodingI): + """ + A feature encoding that generates vectors containing integer, + float and binary joint-features of the form: + + Binary (for string and boolean features): + + | joint_feat(fs, l) = { 1 if (fs[fname] == fval) and (l == label) + | { + | { 0 otherwise + + Value (for integer and float features): + + | joint_feat(fs, l) = { fval if (fs[fname] == type(fval)) + | { and (l == label) + | { + | { not encoded otherwise + + Where ``fname`` is the name of an input-feature, ``fval`` is a value + for that input-feature, and ``label`` is a label. + + Typically, these features are constructed based on a training + corpus, using the ``train()`` method. + + For string and boolean features [type(fval) not in (int, float)] + this method will create one feature for each combination of + ``fname``, ``fval``, and ``label`` that occurs at least once in the + training corpus. + + For integer and float features [type(fval) in (int, float)] this + method will create one feature for each combination of ``fname`` + and ``label`` that occurs at least once in the training corpus. + + For binary features the ``unseen_features`` parameter can be used + to add "unseen-value features", which are used whenever an input + feature has a value that was not encountered in the training + corpus. These features have the form: + + | joint_feat(fs, l) = { 1 if is_unseen(fname, fs[fname]) + | { and l == label + | { + | { 0 otherwise + + Where ``is_unseen(fname, fval)`` is true if the encoding does not + contain any joint features that are true when ``fs[fname]==fval``. + + The ``alwayson_features`` parameter can be used to add "always-on + features", which have the form: + + | joint_feat(fs, l) = { 1 if (l == label) + | { + | { 0 otherwise + + These always-on features allow the maxent model to directly model + the prior probabilities of each label. + """ + + def __init__(self, labels, mapping, unseen_features=False, alwayson_features=False): + """ + :param labels: A list of the \"known labels\" for this encoding. + + :param mapping: A dictionary mapping from ``(fname,fval,label)`` + tuples to corresponding joint-feature indexes. These + indexes must be the set of integers from 0...len(mapping). + If ``mapping[fname,fval,label]=id``, then + ``self.encode({..., fname:fval, ...``, label)[id]} is 1; + otherwise, it is 0. + + :param unseen_features: If true, then include unseen value + features in the generated joint-feature vectors. + + :param alwayson_features: If true, then include always-on + features in the generated joint-feature vectors. + """ + if set(mapping.values()) != set(range(len(mapping))): + raise ValueError( + 'Mapping values must be exactly the ' + 'set of integers from 0...len(mapping)' + ) + + self._labels = list(labels) + """A list of attested labels.""" + + self._mapping = mapping + """dict mapping from (fname,fval,label) -> fid""" + + self._length = len(mapping) + """The length of generated joint feature vectors.""" + + self._alwayson = None + """dict mapping from label -> fid""" + + self._unseen = None + """dict mapping from fname -> fid""" + + if alwayson_features: + self._alwayson = dict( + (label, i + self._length) for (i, label) in enumerate(labels) + ) + self._length += len(self._alwayson) + + if unseen_features: + fnames = set(fname for (fname, fval, label) in mapping) + self._unseen = dict( + (fname, i + self._length) for (i, fname) in enumerate(fnames) + ) + self._length += len(fnames) + + def encode(self, featureset, label): + # Inherit docs. + encoding = [] + + # Convert input-features to joint-features: + for fname, fval in featureset.items(): + if isinstance(fval, (integer_types, float)): + # Known feature name & value: + if (fname, type(fval), label) in self._mapping: + encoding.append((self._mapping[fname, type(fval), label], fval)) + else: + # Known feature name & value: + if (fname, fval, label) in self._mapping: + encoding.append((self._mapping[fname, fval, label], 1)) + + # Otherwise, we might want to fire an "unseen-value feature". + elif self._unseen: + # Have we seen this fname/fval combination with any label? + for label2 in self._labels: + if (fname, fval, label2) in self._mapping: + break # we've seen this fname/fval combo + # We haven't -- fire the unseen-value feature + else: + if fname in self._unseen: + encoding.append((self._unseen[fname], 1)) + + # Add always-on features: + if self._alwayson and label in self._alwayson: + encoding.append((self._alwayson[label], 1)) + + return encoding + + def describe(self, f_id): + # Inherit docs. + if not isinstance(f_id, integer_types): + raise TypeError('describe() expected an int') + try: + self._inv_mapping + except AttributeError: + self._inv_mapping = [-1] * len(self._mapping) + for (info, i) in self._mapping.items(): + self._inv_mapping[i] = info + + if f_id < len(self._mapping): + (fname, fval, label) = self._inv_mapping[f_id] + return '%s==%r and label is %r' % (fname, fval, label) + elif self._alwayson and f_id in self._alwayson.values(): + for (label, f_id2) in self._alwayson.items(): + if f_id == f_id2: + return 'label is %r' % label + elif self._unseen and f_id in self._unseen.values(): + for (fname, f_id2) in self._unseen.items(): + if f_id == f_id2: + return '%s is unseen' % fname + else: + raise ValueError('Bad feature id') + + def labels(self): + # Inherit docs. + return self._labels + + def length(self): + # Inherit docs. + return self._length + + @classmethod + def train(cls, train_toks, count_cutoff=0, labels=None, **options): + """ + Construct and return new feature encoding, based on a given + training corpus ``train_toks``. See the class description + ``TypedMaxentFeatureEncoding`` for a description of the + joint-features that will be included in this encoding. + + Note: recognized feature values types are (int, float), over + types are interpreted as regular binary features. + + :type train_toks: list(tuple(dict, str)) + :param train_toks: Training data, represented as a list of + pairs, the first member of which is a feature dictionary, + and the second of which is a classification label. + + :type count_cutoff: int + :param count_cutoff: A cutoff value that is used to discard + rare joint-features. If a joint-feature's value is 1 + fewer than ``count_cutoff`` times in the training corpus, + then that joint-feature is not included in the generated + encoding. + + :type labels: list + :param labels: A list of labels that should be used by the + classifier. If not specified, then the set of labels + attested in ``train_toks`` will be used. + + :param options: Extra parameters for the constructor, such as + ``unseen_features`` and ``alwayson_features``. + """ + mapping = {} # maps (fname, fval, label) -> fid + seen_labels = set() # The set of labels we've encountered + count = defaultdict(int) # maps (fname, fval) -> count + + for (tok, label) in train_toks: + if labels and label not in labels: + raise ValueError('Unexpected label %s' % label) + seen_labels.add(label) + + # Record each of the features. + for (fname, fval) in tok.items(): + if type(fval) in (int, float): + fval = type(fval) + # If a count cutoff is given, then only add a joint + # feature once the corresponding (fname, fval, label) + # tuple exceeds that cutoff. + count[fname, fval] += 1 + if count[fname, fval] >= count_cutoff: + if (fname, fval, label) not in mapping: + mapping[fname, fval, label] = len(mapping) + + if labels is None: + labels = seen_labels + return cls(labels, mapping, **options) + + +###################################################################### +# { Classifier Trainer: Generalized Iterative Scaling +###################################################################### + + +def train_maxent_classifier_with_gis( + train_toks, trace=3, encoding=None, labels=None, **cutoffs +): + """ + Train a new ``ConditionalExponentialClassifier``, using the given + training samples, using the Generalized Iterative Scaling + algorithm. This ``ConditionalExponentialClassifier`` will encode + the model that maximizes entropy from all the models that are + empirically consistent with ``train_toks``. + + :see: ``train_maxent_classifier()`` for parameter descriptions. + """ + cutoffs.setdefault('max_iter', 100) + cutoffchecker = CutoffChecker(cutoffs) + + # Construct an encoding from the training data. + if encoding is None: + encoding = GISEncoding.train(train_toks, labels=labels) + + if not hasattr(encoding, 'C'): + raise TypeError( + 'The GIS algorithm requires an encoding that ' + 'defines C (e.g., GISEncoding).' + ) + + # Cinv is the inverse of the sum of each joint feature vector. + # This controls the learning rate: higher Cinv (or lower C) gives + # faster learning. + Cinv = 1.0 / encoding.C + + # Count how many times each feature occurs in the training data. + empirical_fcount = calculate_empirical_fcount(train_toks, encoding) + + # Check for any features that are not attested in train_toks. + unattested = set(numpy.nonzero(empirical_fcount == 0)[0]) + + # Build the classifier. Start with weight=0 for each attested + # feature, and weight=-infinity for each unattested feature. + weights = numpy.zeros(len(empirical_fcount), 'd') + for fid in unattested: + weights[fid] = numpy.NINF + classifier = ConditionalExponentialClassifier(encoding, weights) + + # Take the log of the empirical fcount. + log_empirical_fcount = numpy.log2(empirical_fcount) + del empirical_fcount + + if trace > 0: + print(' ==> Training (%d iterations)' % cutoffs['max_iter']) + if trace > 2: + print() + print(' Iteration Log Likelihood Accuracy') + print(' ---------------------------------------') + + # Train the classifier. + try: + while True: + if trace > 2: + ll = cutoffchecker.ll or log_likelihood(classifier, train_toks) + acc = cutoffchecker.acc or accuracy(classifier, train_toks) + iternum = cutoffchecker.iter + print(' %9d %14.5f %9.3f' % (iternum, ll, acc)) + + # Use the model to estimate the number of times each + # feature should occur in the training data. + estimated_fcount = calculate_estimated_fcount( + classifier, train_toks, encoding + ) + + # Take the log of estimated fcount (avoid taking log(0).) + for fid in unattested: + estimated_fcount[fid] += 1 + log_estimated_fcount = numpy.log2(estimated_fcount) + del estimated_fcount + + # Update the classifier weights + weights = classifier.weights() + weights += (log_empirical_fcount - log_estimated_fcount) * Cinv + classifier.set_weights(weights) + + # Check the log-likelihood & accuracy cutoffs. + if cutoffchecker.check(classifier, train_toks): + break + + except KeyboardInterrupt: + print(' Training stopped: keyboard interrupt') + except: + raise + + if trace > 2: + ll = log_likelihood(classifier, train_toks) + acc = accuracy(classifier, train_toks) + print(' Final %14.5f %9.3f' % (ll, acc)) + + # Return the classifier. + return classifier + + +def calculate_empirical_fcount(train_toks, encoding): + fcount = numpy.zeros(encoding.length(), 'd') + + for tok, label in train_toks: + for (index, val) in encoding.encode(tok, label): + fcount[index] += val + + return fcount + + +def calculate_estimated_fcount(classifier, train_toks, encoding): + fcount = numpy.zeros(encoding.length(), 'd') + + for tok, label in train_toks: + pdist = classifier.prob_classify(tok) + for label in pdist.samples(): + prob = pdist.prob(label) + for (fid, fval) in encoding.encode(tok, label): + fcount[fid] += prob * fval + + return fcount + + +###################################################################### +# { Classifier Trainer: Improved Iterative Scaling +###################################################################### + + +def train_maxent_classifier_with_iis( + train_toks, trace=3, encoding=None, labels=None, **cutoffs +): + """ + Train a new ``ConditionalExponentialClassifier``, using the given + training samples, using the Improved Iterative Scaling algorithm. + This ``ConditionalExponentialClassifier`` will encode the model + that maximizes entropy from all the models that are empirically + consistent with ``train_toks``. + + :see: ``train_maxent_classifier()`` for parameter descriptions. + """ + cutoffs.setdefault('max_iter', 100) + cutoffchecker = CutoffChecker(cutoffs) + + # Construct an encoding from the training data. + if encoding is None: + encoding = BinaryMaxentFeatureEncoding.train(train_toks, labels=labels) + + # Count how many times each feature occurs in the training data. + empirical_ffreq = calculate_empirical_fcount(train_toks, encoding) / len(train_toks) + + # Find the nf map, and related variables nfarray and nfident. + # nf is the sum of the features for a given labeled text. + # nfmap compresses this sparse set of values to a dense list. + # nfarray performs the reverse operation. nfident is + # nfarray multiplied by an identity matrix. + nfmap = calculate_nfmap(train_toks, encoding) + nfarray = numpy.array(sorted(nfmap, key=nfmap.__getitem__), 'd') + nftranspose = numpy.reshape(nfarray, (len(nfarray), 1)) + + # Check for any features that are not attested in train_toks. + unattested = set(numpy.nonzero(empirical_ffreq == 0)[0]) + + # Build the classifier. Start with weight=0 for each attested + # feature, and weight=-infinity for each unattested feature. + weights = numpy.zeros(len(empirical_ffreq), 'd') + for fid in unattested: + weights[fid] = numpy.NINF + classifier = ConditionalExponentialClassifier(encoding, weights) + + if trace > 0: + print(' ==> Training (%d iterations)' % cutoffs['max_iter']) + if trace > 2: + print() + print(' Iteration Log Likelihood Accuracy') + print(' ---------------------------------------') + + # Train the classifier. + try: + while True: + if trace > 2: + ll = cutoffchecker.ll or log_likelihood(classifier, train_toks) + acc = cutoffchecker.acc or accuracy(classifier, train_toks) + iternum = cutoffchecker.iter + print(' %9d %14.5f %9.3f' % (iternum, ll, acc)) + + # Calculate the deltas for this iteration, using Newton's method. + deltas = calculate_deltas( + train_toks, + classifier, + unattested, + empirical_ffreq, + nfmap, + nfarray, + nftranspose, + encoding, + ) + + # Use the deltas to update our weights. + weights = classifier.weights() + weights += deltas + classifier.set_weights(weights) + + # Check the log-likelihood & accuracy cutoffs. + if cutoffchecker.check(classifier, train_toks): + break + + except KeyboardInterrupt: + print(' Training stopped: keyboard interrupt') + except: + raise + + if trace > 2: + ll = log_likelihood(classifier, train_toks) + acc = accuracy(classifier, train_toks) + print(' Final %14.5f %9.3f' % (ll, acc)) + + # Return the classifier. + return classifier + + +def calculate_nfmap(train_toks, encoding): + """ + Construct a map that can be used to compress ``nf`` (which is + typically sparse). + + *nf(feature_vector)* is the sum of the feature values for + *feature_vector*. + + This represents the number of features that are active for a + given labeled text. This method finds all values of *nf(t)* + that are attested for at least one token in the given list of + training tokens; and constructs a dictionary mapping these + attested values to a continuous range *0...N*. For example, + if the only values of *nf()* that were attested were 3, 5, and + 7, then ``_nfmap`` might return the dictionary ``{3:0, 5:1, 7:2}``. + + :return: A map that can be used to compress ``nf`` to a dense + vector. + :rtype: dict(int -> int) + """ + # Map from nf to indices. This allows us to use smaller arrays. + nfset = set() + for tok, _ in train_toks: + for label in encoding.labels(): + nfset.add(sum(val for (id, val) in encoding.encode(tok, label))) + return dict((nf, i) for (i, nf) in enumerate(nfset)) + + +def calculate_deltas( + train_toks, + classifier, + unattested, + ffreq_empirical, + nfmap, + nfarray, + nftranspose, + encoding, +): + """ + Calculate the update values for the classifier weights for + this iteration of IIS. These update weights are the value of + ``delta`` that solves the equation:: + + ffreq_empirical[i] + = + SUM[fs,l] (classifier.prob_classify(fs).prob(l) * + feature_vector(fs,l)[i] * + exp(delta[i] * nf(feature_vector(fs,l)))) + + Where: + - *(fs,l)* is a (featureset, label) tuple from ``train_toks`` + - *feature_vector(fs,l)* = ``encoding.encode(fs,l)`` + - *nf(vector)* = ``sum([val for (id,val) in vector])`` + + This method uses Newton's method to solve this equation for + *delta[i]*. In particular, it starts with a guess of + ``delta[i]`` = 1; and iteratively updates ``delta`` with: + + | delta[i] -= (ffreq_empirical[i] - sum1[i])/(-sum2[i]) + + until convergence, where *sum1* and *sum2* are defined as: + + | sum1[i](delta) = SUM[fs,l] f[i](fs,l,delta) + | sum2[i](delta) = SUM[fs,l] (f[i](fs,l,delta).nf(feature_vector(fs,l))) + | f[i](fs,l,delta) = (classifier.prob_classify(fs).prob(l) . + | feature_vector(fs,l)[i] . + | exp(delta[i] . nf(feature_vector(fs,l)))) + + Note that *sum1* and *sum2* depend on ``delta``; so they need + to be re-computed each iteration. + + The variables ``nfmap``, ``nfarray``, and ``nftranspose`` are + used to generate a dense encoding for *nf(ltext)*. This + allows ``_deltas`` to calculate *sum1* and *sum2* using + matrices, which yields a significant performance improvement. + + :param train_toks: The set of training tokens. + :type train_toks: list(tuple(dict, str)) + :param classifier: The current classifier. + :type classifier: ClassifierI + :param ffreq_empirical: An array containing the empirical + frequency for each feature. The *i*\ th element of this + array is the empirical frequency for feature *i*. + :type ffreq_empirical: sequence of float + :param unattested: An array that is 1 for features that are + not attested in the training data; and 0 for features that + are attested. In other words, ``unattested[i]==0`` iff + ``ffreq_empirical[i]==0``. + :type unattested: sequence of int + :param nfmap: A map that can be used to compress ``nf`` to a dense + vector. + :type nfmap: dict(int -> int) + :param nfarray: An array that can be used to uncompress ``nf`` + from a dense vector. + :type nfarray: array(float) + :param nftranspose: The transpose of ``nfarray`` + :type nftranspose: array(float) + """ + # These parameters control when we decide that we've + # converged. It probably should be possible to set these + # manually, via keyword arguments to train. + NEWTON_CONVERGE = 1e-12 + MAX_NEWTON = 300 + + deltas = numpy.ones(encoding.length(), 'd') + + # Precompute the A matrix: + # A[nf][id] = sum ( p(fs) * p(label|fs) * f(fs,label) ) + # over all label,fs s.t. num_features[label,fs]=nf + A = numpy.zeros((len(nfmap), encoding.length()), 'd') + + for tok, label in train_toks: + dist = classifier.prob_classify(tok) + + for label in encoding.labels(): + # Generate the feature vector + feature_vector = encoding.encode(tok, label) + # Find the number of active features + nf = sum(val for (id, val) in feature_vector) + # Update the A matrix + for (id, val) in feature_vector: + A[nfmap[nf], id] += dist.prob(label) * val + A /= len(train_toks) + + # Iteratively solve for delta. Use the following variables: + # - nf_delta[x][y] = nfarray[x] * delta[y] + # - exp_nf_delta[x][y] = exp(nf[x] * delta[y]) + # - nf_exp_nf_delta[x][y] = nf[x] * exp(nf[x] * delta[y]) + # - sum1[i][nf] = sum p(fs)p(label|fs)f[i](label,fs) + # exp(delta[i]nf) + # - sum2[i][nf] = sum p(fs)p(label|fs)f[i](label,fs) + # nf exp(delta[i]nf) + for rangenum in range(MAX_NEWTON): + nf_delta = numpy.outer(nfarray, deltas) + exp_nf_delta = 2 ** nf_delta + nf_exp_nf_delta = nftranspose * exp_nf_delta + sum1 = numpy.sum(exp_nf_delta * A, axis=0) + sum2 = numpy.sum(nf_exp_nf_delta * A, axis=0) + + # Avoid division by zero. + for fid in unattested: + sum2[fid] += 1 + + # Update the deltas. + deltas -= (ffreq_empirical - sum1) / -sum2 + + # We can stop once we converge. + n_error = numpy.sum(abs((ffreq_empirical - sum1))) / numpy.sum(abs(deltas)) + if n_error < NEWTON_CONVERGE: + return deltas + + return deltas + + +###################################################################### +# { Classifier Trainer: megam +###################################################################### + +# [xx] possible extension: add support for using implicit file format; +# this would need to put requirements on what encoding is used. But +# we may need this for other maxent classifier trainers that require +# implicit formats anyway. +def train_maxent_classifier_with_megam( + train_toks, trace=3, encoding=None, labels=None, gaussian_prior_sigma=0, **kwargs +): + """ + Train a new ``ConditionalExponentialClassifier``, using the given + training samples, using the external ``megam`` library. This + ``ConditionalExponentialClassifier`` will encode the model that + maximizes entropy from all the models that are empirically + consistent with ``train_toks``. + + :see: ``train_maxent_classifier()`` for parameter descriptions. + :see: ``nltk.classify.megam`` + """ + + explicit = True + bernoulli = True + if 'explicit' in kwargs: + explicit = kwargs['explicit'] + if 'bernoulli' in kwargs: + bernoulli = kwargs['bernoulli'] + + # Construct an encoding from the training data. + if encoding is None: + # Count cutoff can also be controlled by megam with the -minfc + # option. Not sure where the best place for it is. + count_cutoff = kwargs.get('count_cutoff', 0) + encoding = BinaryMaxentFeatureEncoding.train( + train_toks, count_cutoff, labels=labels, alwayson_features=True + ) + elif labels is not None: + raise ValueError('Specify encoding or labels, not both') + + # Write a training file for megam. + try: + fd, trainfile_name = tempfile.mkstemp(prefix='nltk-') + with open(trainfile_name, 'w') as trainfile: + write_megam_file( + train_toks, encoding, trainfile, explicit=explicit, bernoulli=bernoulli + ) + os.close(fd) + except (OSError, IOError, ValueError) as e: + raise ValueError('Error while creating megam training file: %s' % e) + + # Run megam on the training file. + options = [] + options += ['-nobias', '-repeat', '10'] + if explicit: + options += ['-explicit'] + if not bernoulli: + options += ['-fvals'] + if gaussian_prior_sigma: + # Lambda is just the precision of the Gaussian prior, i.e. it's the + # inverse variance, so the parameter conversion is 1.0/sigma**2. + # See http://www.umiacs.umd.edu/~hal/docs/daume04cg-bfgs.pdf. + inv_variance = 1.0 / gaussian_prior_sigma ** 2 + else: + inv_variance = 0 + options += ['-lambda', '%.2f' % inv_variance, '-tune'] + if trace < 3: + options += ['-quiet'] + if 'max_iter' in kwargs: + options += ['-maxi', '%s' % kwargs['max_iter']] + if 'll_delta' in kwargs: + # [xx] this is actually a perplexity delta, not a log + # likelihood delta + options += ['-dpp', '%s' % abs(kwargs['ll_delta'])] + if hasattr(encoding, 'cost'): + options += ['-multilabel'] # each possible la + options += ['multiclass', trainfile_name] + stdout = call_megam(options) + # print './megam_i686.opt ', ' '.join(options) + # Delete the training file + try: + os.remove(trainfile_name) + except (OSError, IOError) as e: + print('Warning: unable to delete %s: %s' % (trainfile_name, e)) + + # Parse the generated weight vector. + weights = parse_megam_weights(stdout, encoding.length(), explicit) + + # Convert from base-e to base-2 weights. + weights *= numpy.log2(numpy.e) + + # Build the classifier + return MaxentClassifier(encoding, weights) + + +###################################################################### +# { Classifier Trainer: tadm +###################################################################### + + +class TadmMaxentClassifier(MaxentClassifier): + @classmethod + def train(cls, train_toks, **kwargs): + algorithm = kwargs.get('algorithm', 'tao_lmvm') + trace = kwargs.get('trace', 3) + encoding = kwargs.get('encoding', None) + labels = kwargs.get('labels', None) + sigma = kwargs.get('gaussian_prior_sigma', 0) + count_cutoff = kwargs.get('count_cutoff', 0) + max_iter = kwargs.get('max_iter') + ll_delta = kwargs.get('min_lldelta') + + # Construct an encoding from the training data. + if not encoding: + encoding = TadmEventMaxentFeatureEncoding.train( + train_toks, count_cutoff, labels=labels + ) + + trainfile_fd, trainfile_name = tempfile.mkstemp( + prefix='nltk-tadm-events-', suffix='.gz' + ) + weightfile_fd, weightfile_name = tempfile.mkstemp(prefix='nltk-tadm-weights-') + + trainfile = gzip_open_unicode(trainfile_name, 'w') + write_tadm_file(train_toks, encoding, trainfile) + trainfile.close() + + options = [] + options.extend(['-monitor']) + options.extend(['-method', algorithm]) + if sigma: + options.extend(['-l2', '%.6f' % sigma ** 2]) + if max_iter: + options.extend(['-max_it', '%d' % max_iter]) + if ll_delta: + options.extend(['-fatol', '%.6f' % abs(ll_delta)]) + options.extend(['-events_in', trainfile_name]) + options.extend(['-params_out', weightfile_name]) + if trace < 3: + options.extend(['2>&1']) + else: + options.extend(['-summary']) + + call_tadm(options) + + with open(weightfile_name, 'r') as weightfile: + weights = parse_tadm_weights(weightfile) + + os.remove(trainfile_name) + os.remove(weightfile_name) + + # Convert from base-e to base-2 weights. + weights *= numpy.log2(numpy.e) + + # Build the classifier + return cls(encoding, weights) + + +###################################################################### +# { Demo +###################################################################### +def demo(): + from nltk.classify.util import names_demo + + classifier = names_demo(MaxentClassifier.train) + + +if __name__ == '__main__': + demo() diff --git a/venv.bak/lib/python3.7/site-packages/nltk/classify/megam.py b/venv.bak/lib/python3.7/site-packages/nltk/classify/megam.py new file mode 100644 index 0000000..f86d8aa --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/classify/megam.py @@ -0,0 +1,189 @@ +# Natural Language Toolkit: Interface to Megam Classifier +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Edward Loper +# URL: +# For license information, see LICENSE.TXT + +""" +A set of functions used to interface with the external megam_ maxent +optimization package. Before megam can be used, you should tell NLTK where it +can find the megam binary, using the ``config_megam()`` function. Typical +usage: + + >>> from nltk.classify import megam + >>> megam.config_megam() # pass path to megam if not found in PATH # doctest: +SKIP + [Found megam: ...] + +Use with MaxentClassifier. Example below, see MaxentClassifier documentation +for details. + + nltk.classify.MaxentClassifier.train(corpus, 'megam') + +.. _megam: http://www.umiacs.umd.edu/~hal/megam/index.html +""" +from __future__ import print_function + +import subprocess + +from six import string_types + +from nltk import compat +from nltk.internals import find_binary + +try: + import numpy +except ImportError: + numpy = None + +###################################################################### +# { Configuration +###################################################################### + +_megam_bin = None + + +def config_megam(bin=None): + """ + Configure NLTK's interface to the ``megam`` maxent optimization + package. + + :param bin: The full path to the ``megam`` binary. If not specified, + then nltk will search the system for a ``megam`` binary; and if + one is not found, it will raise a ``LookupError`` exception. + :type bin: str + """ + global _megam_bin + _megam_bin = find_binary( + 'megam', + bin, + env_vars=['MEGAM'], + binary_names=['megam.opt', 'megam', 'megam_686', 'megam_i686.opt'], + url='http://www.umiacs.umd.edu/~hal/megam/index.html', + ) + + +###################################################################### +# { Megam Interface Functions +###################################################################### + + +def write_megam_file(train_toks, encoding, stream, bernoulli=True, explicit=True): + """ + Generate an input file for ``megam`` based on the given corpus of + classified tokens. + + :type train_toks: list(tuple(dict, str)) + :param train_toks: Training data, represented as a list of + pairs, the first member of which is a feature dictionary, + and the second of which is a classification label. + + :type encoding: MaxentFeatureEncodingI + :param encoding: A feature encoding, used to convert featuresets + into feature vectors. May optionally implement a cost() method + in order to assign different costs to different class predictions. + + :type stream: stream + :param stream: The stream to which the megam input file should be + written. + + :param bernoulli: If true, then use the 'bernoulli' format. I.e., + all joint features have binary values, and are listed iff they + are true. Otherwise, list feature values explicitly. If + ``bernoulli=False``, then you must call ``megam`` with the + ``-fvals`` option. + + :param explicit: If true, then use the 'explicit' format. I.e., + list the features that would fire for any of the possible + labels, for each token. If ``explicit=True``, then you must + call ``megam`` with the ``-explicit`` option. + """ + # Look up the set of labels. + labels = encoding.labels() + labelnum = dict((label, i) for (i, label) in enumerate(labels)) + + # Write the file, which contains one line per instance. + for featureset, label in train_toks: + # First, the instance number (or, in the weighted multiclass case, the cost of each label). + if hasattr(encoding, 'cost'): + stream.write( + ':'.join(str(encoding.cost(featureset, label, l)) for l in labels) + ) + else: + stream.write('%d' % labelnum[label]) + + # For implicit file formats, just list the features that fire + # for this instance's actual label. + if not explicit: + _write_megam_features(encoding.encode(featureset, label), stream, bernoulli) + + # For explicit formats, list the features that would fire for + # any of the possible labels. + else: + for l in labels: + stream.write(' #') + _write_megam_features(encoding.encode(featureset, l), stream, bernoulli) + + # End of the instance. + stream.write('\n') + + +def parse_megam_weights(s, features_count, explicit=True): + """ + Given the stdout output generated by ``megam`` when training a + model, return a ``numpy`` array containing the corresponding weight + vector. This function does not currently handle bias features. + """ + if numpy is None: + raise ValueError('This function requires that numpy be installed') + assert explicit, 'non-explicit not supported yet' + lines = s.strip().split('\n') + weights = numpy.zeros(features_count, 'd') + for line in lines: + if line.strip(): + fid, weight = line.split() + weights[int(fid)] = float(weight) + return weights + + +def _write_megam_features(vector, stream, bernoulli): + if not vector: + raise ValueError( + 'MEGAM classifier requires the use of an ' 'always-on feature.' + ) + for (fid, fval) in vector: + if bernoulli: + if fval == 1: + stream.write(' %s' % fid) + elif fval != 0: + raise ValueError( + 'If bernoulli=True, then all' 'features must be binary.' + ) + else: + stream.write(' %s %s' % (fid, fval)) + + +def call_megam(args): + """ + Call the ``megam`` binary with the given arguments. + """ + if isinstance(args, string_types): + raise TypeError('args should be a list of strings') + if _megam_bin is None: + config_megam() + + # Call megam via a subprocess + cmd = [_megam_bin] + args + p = subprocess.Popen(cmd, stdout=subprocess.PIPE) + (stdout, stderr) = p.communicate() + + # Check the return code. + if p.returncode != 0: + print() + print(stderr) + raise OSError('megam command failed!') + + if isinstance(stdout, string_types): + return stdout + else: + return stdout.decode('utf-8') diff --git a/venv.bak/lib/python3.7/site-packages/nltk/classify/naivebayes.py b/venv.bak/lib/python3.7/site-packages/nltk/classify/naivebayes.py new file mode 100644 index 0000000..8859439 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/classify/naivebayes.py @@ -0,0 +1,256 @@ +# Natural Language Toolkit: Naive Bayes Classifiers +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Edward Loper +# URL: +# For license information, see LICENSE.TXT + +""" +A classifier based on the Naive Bayes algorithm. In order to find the +probability for a label, this algorithm first uses the Bayes rule to +express P(label|features) in terms of P(label) and P(features|label): + +| P(label) * P(features|label) +| P(label|features) = ------------------------------ +| P(features) + +The algorithm then makes the 'naive' assumption that all features are +independent, given the label: + +| P(label) * P(f1|label) * ... * P(fn|label) +| P(label|features) = -------------------------------------------- +| P(features) + +Rather than computing P(features) explicitly, the algorithm just +calculates the numerator for each label, and normalizes them so they +sum to one: + +| P(label) * P(f1|label) * ... * P(fn|label) +| P(label|features) = -------------------------------------------- +| SUM[l]( P(l) * P(f1|l) * ... * P(fn|l) ) +""" +from __future__ import print_function, unicode_literals + +from collections import defaultdict + +from nltk.probability import FreqDist, DictionaryProbDist, ELEProbDist, sum_logs +from nltk.classify.api import ClassifierI + +##////////////////////////////////////////////////////// +## Naive Bayes Classifier +##////////////////////////////////////////////////////// + + +class NaiveBayesClassifier(ClassifierI): + """ + A Naive Bayes classifier. Naive Bayes classifiers are + paramaterized by two probability distributions: + + - P(label) gives the probability that an input will receive each + label, given no information about the input's features. + + - P(fname=fval|label) gives the probability that a given feature + (fname) will receive a given value (fval), given that the + label (label). + + If the classifier encounters an input with a feature that has + never been seen with any label, then rather than assigning a + probability of 0 to all labels, it will ignore that feature. + + The feature value 'None' is reserved for unseen feature values; + you generally should not use 'None' as a feature value for one of + your own features. + """ + + def __init__(self, label_probdist, feature_probdist): + """ + :param label_probdist: P(label), the probability distribution + over labels. It is expressed as a ``ProbDistI`` whose + samples are labels. I.e., P(label) = + ``label_probdist.prob(label)``. + + :param feature_probdist: P(fname=fval|label), the probability + distribution for feature values, given labels. It is + expressed as a dictionary whose keys are ``(label, fname)`` + pairs and whose values are ``ProbDistI`` objects over feature + values. I.e., P(fname=fval|label) = + ``feature_probdist[label,fname].prob(fval)``. If a given + ``(label,fname)`` is not a key in ``feature_probdist``, then + it is assumed that the corresponding P(fname=fval|label) + is 0 for all values of ``fval``. + """ + self._label_probdist = label_probdist + self._feature_probdist = feature_probdist + self._labels = list(label_probdist.samples()) + + def labels(self): + return self._labels + + def classify(self, featureset): + return self.prob_classify(featureset).max() + + def prob_classify(self, featureset): + # Discard any feature names that we've never seen before. + # Otherwise, we'll just assign a probability of 0 to + # everything. + featureset = featureset.copy() + for fname in list(featureset.keys()): + for label in self._labels: + if (label, fname) in self._feature_probdist: + break + else: + # print 'Ignoring unseen feature %s' % fname + del featureset[fname] + + # Find the log probabilty of each label, given the features. + # Start with the log probability of the label itself. + logprob = {} + for label in self._labels: + logprob[label] = self._label_probdist.logprob(label) + + # Then add in the log probability of features given labels. + for label in self._labels: + for (fname, fval) in featureset.items(): + if (label, fname) in self._feature_probdist: + feature_probs = self._feature_probdist[label, fname] + logprob[label] += feature_probs.logprob(fval) + else: + # nb: This case will never come up if the + # classifier was created by + # NaiveBayesClassifier.train(). + logprob[label] += sum_logs([]) # = -INF. + + return DictionaryProbDist(logprob, normalize=True, log=True) + + def show_most_informative_features(self, n=10): + # Determine the most relevant features, and display them. + cpdist = self._feature_probdist + print('Most Informative Features') + + for (fname, fval) in self.most_informative_features(n): + + def labelprob(l): + return cpdist[l, fname].prob(fval) + + labels = sorted( + [l for l in self._labels if fval in cpdist[l, fname].samples()], + key=labelprob, + ) + if len(labels) == 1: + continue + l0 = labels[0] + l1 = labels[-1] + if cpdist[l0, fname].prob(fval) == 0: + ratio = 'INF' + else: + ratio = '%8.1f' % ( + cpdist[l1, fname].prob(fval) / cpdist[l0, fname].prob(fval) + ) + print( + ( + '%24s = %-14r %6s : %-6s = %s : 1.0' + % (fname, fval, ("%s" % l1)[:6], ("%s" % l0)[:6], ratio) + ) + ) + + def most_informative_features(self, n=100): + """ + Return a list of the 'most informative' features used by this + classifier. For the purpose of this function, the + informativeness of a feature ``(fname,fval)`` is equal to the + highest value of P(fname=fval|label), for any label, divided by + the lowest value of P(fname=fval|label), for any label: + + | max[ P(fname=fval|label1) / P(fname=fval|label2) ] + """ + if hasattr(self, '_most_informative_features'): + return self._most_informative_features[:n] + else: + # The set of (fname, fval) pairs used by this classifier. + features = set() + # The max & min probability associated w/ each (fname, fval) + # pair. Maps (fname,fval) -> float. + maxprob = defaultdict(lambda: 0.0) + minprob = defaultdict(lambda: 1.0) + + for (label, fname), probdist in self._feature_probdist.items(): + for fval in probdist.samples(): + feature = (fname, fval) + features.add(feature) + p = probdist.prob(fval) + maxprob[feature] = max(p, maxprob[feature]) + minprob[feature] = min(p, minprob[feature]) + if minprob[feature] == 0: + features.discard(feature) + + # Convert features to a list, & sort it by how informative + # features are. + self._most_informative_features = sorted( + features, key=lambda feature_: minprob[feature_] / maxprob[feature_] + ) + return self._most_informative_features[:n] + + @classmethod + def train(cls, labeled_featuresets, estimator=ELEProbDist): + """ + :param labeled_featuresets: A list of classified featuresets, + i.e., a list of tuples ``(featureset, label)``. + """ + label_freqdist = FreqDist() + feature_freqdist = defaultdict(FreqDist) + feature_values = defaultdict(set) + fnames = set() + + # Count up how many times each feature value occurred, given + # the label and featurename. + for featureset, label in labeled_featuresets: + label_freqdist[label] += 1 + for fname, fval in featureset.items(): + # Increment freq(fval|label, fname) + feature_freqdist[label, fname][fval] += 1 + # Record that fname can take the value fval. + feature_values[fname].add(fval) + # Keep a list of all feature names. + fnames.add(fname) + + # If a feature didn't have a value given for an instance, then + # we assume that it gets the implicit value 'None.' This loop + # counts up the number of 'missing' feature values for each + # (label,fname) pair, and increments the count of the fval + # 'None' by that amount. + for label in label_freqdist: + num_samples = label_freqdist[label] + for fname in fnames: + count = feature_freqdist[label, fname].N() + # Only add a None key when necessary, i.e. if there are + # any samples with feature 'fname' missing. + if num_samples - count > 0: + feature_freqdist[label, fname][None] += num_samples - count + feature_values[fname].add(None) + + # Create the P(label) distribution + label_probdist = estimator(label_freqdist) + + # Create the P(fval|label, fname) distribution + feature_probdist = {} + for ((label, fname), freqdist) in feature_freqdist.items(): + probdist = estimator(freqdist, bins=len(feature_values[fname])) + feature_probdist[label, fname] = probdist + + return cls(label_probdist, feature_probdist) + + +##////////////////////////////////////////////////////// +## Demo +##////////////////////////////////////////////////////// + + +def demo(): + from nltk.classify.util import names_demo + + classifier = names_demo(NaiveBayesClassifier.train) + classifier.show_most_informative_features() + + +if __name__ == '__main__': + demo() diff --git a/venv.bak/lib/python3.7/site-packages/nltk/classify/positivenaivebayes.py b/venv.bak/lib/python3.7/site-packages/nltk/classify/positivenaivebayes.py new file mode 100644 index 0000000..58621f1 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/classify/positivenaivebayes.py @@ -0,0 +1,181 @@ +# Natural Language Toolkit: Positive Naive Bayes Classifier +# +# Copyright (C) 2012 NLTK Project +# Author: Alessandro Presta +# URL: +# For license information, see LICENSE.TXT + +""" +A variant of the Naive Bayes Classifier that performs binary classification with +partially-labeled training sets. In other words, assume we want to build a classifier +that assigns each example to one of two complementary classes (e.g., male names and +female names). +If we have a training set with labeled examples for both classes, we can use a +standard Naive Bayes Classifier. However, consider the case when we only have labeled +examples for one of the classes, and other, unlabeled, examples. +Then, assuming a prior distribution on the two labels, we can use the unlabeled set +to estimate the frequencies of the various features. + +Let the two possible labels be 1 and 0, and let's say we only have examples labeled 1 +and unlabeled examples. We are also given an estimate of P(1). + +We compute P(feature|1) exactly as in the standard case. + +To compute P(feature|0), we first estimate P(feature) from the unlabeled set (we are +assuming that the unlabeled examples are drawn according to the given prior distribution) +and then express the conditional probability as: + +| P(feature) - P(feature|1) * P(1) +| P(feature|0) = ---------------------------------- +| P(0) + +Example: + + >>> from nltk.classify import PositiveNaiveBayesClassifier + +Some sentences about sports: + + >>> sports_sentences = [ 'The team dominated the game', + ... 'They lost the ball', + ... 'The game was intense', + ... 'The goalkeeper catched the ball', + ... 'The other team controlled the ball' ] + +Mixed topics, including sports: + + >>> various_sentences = [ 'The President did not comment', + ... 'I lost the keys', + ... 'The team won the game', + ... 'Sara has two kids', + ... 'The ball went off the court', + ... 'They had the ball for the whole game', + ... 'The show is over' ] + +The features of a sentence are simply the words it contains: + + >>> def features(sentence): + ... words = sentence.lower().split() + ... return dict(('contains(%s)' % w, True) for w in words) + +We use the sports sentences as positive examples, the mixed ones ad unlabeled examples: + + >>> positive_featuresets = map(features, sports_sentences) + >>> unlabeled_featuresets = map(features, various_sentences) + >>> classifier = PositiveNaiveBayesClassifier.train(positive_featuresets, + ... unlabeled_featuresets) + +Is the following sentence about sports? + + >>> classifier.classify(features('The cat is on the table')) + False + +What about this one? + + >>> classifier.classify(features('My team lost the game')) + True +""" + +from collections import defaultdict + +from nltk.probability import FreqDist, DictionaryProbDist, ELEProbDist + +from nltk.classify.naivebayes import NaiveBayesClassifier + +##////////////////////////////////////////////////////// +## Positive Naive Bayes Classifier +##////////////////////////////////////////////////////// + + +class PositiveNaiveBayesClassifier(NaiveBayesClassifier): + @staticmethod + def train( + positive_featuresets, + unlabeled_featuresets, + positive_prob_prior=0.5, + estimator=ELEProbDist, + ): + """ + :param positive_featuresets: An iterable of featuresets that are known as positive + examples (i.e., their label is ``True``). + + :param unlabeled_featuresets: An iterable of featuresets whose label is unknown. + + :param positive_prob_prior: A prior estimate of the probability of the label + ``True`` (default 0.5). + """ + positive_feature_freqdist = defaultdict(FreqDist) + unlabeled_feature_freqdist = defaultdict(FreqDist) + feature_values = defaultdict(set) + fnames = set() + + # Count up how many times each feature value occurred in positive examples. + num_positive_examples = 0 + for featureset in positive_featuresets: + for fname, fval in featureset.items(): + positive_feature_freqdist[fname][fval] += 1 + feature_values[fname].add(fval) + fnames.add(fname) + num_positive_examples += 1 + + # Count up how many times each feature value occurred in unlabeled examples. + num_unlabeled_examples = 0 + for featureset in unlabeled_featuresets: + for fname, fval in featureset.items(): + unlabeled_feature_freqdist[fname][fval] += 1 + feature_values[fname].add(fval) + fnames.add(fname) + num_unlabeled_examples += 1 + + # If a feature didn't have a value given for an instance, then we assume that + # it gets the implicit value 'None'. + for fname in fnames: + count = positive_feature_freqdist[fname].N() + positive_feature_freqdist[fname][None] += num_positive_examples - count + feature_values[fname].add(None) + + for fname in fnames: + count = unlabeled_feature_freqdist[fname].N() + unlabeled_feature_freqdist[fname][None] += num_unlabeled_examples - count + feature_values[fname].add(None) + + negative_prob_prior = 1.0 - positive_prob_prior + + # Create the P(label) distribution. + label_probdist = DictionaryProbDist( + {True: positive_prob_prior, False: negative_prob_prior} + ) + + # Create the P(fval|label, fname) distribution. + feature_probdist = {} + for fname, freqdist in positive_feature_freqdist.items(): + probdist = estimator(freqdist, bins=len(feature_values[fname])) + feature_probdist[True, fname] = probdist + + for fname, freqdist in unlabeled_feature_freqdist.items(): + global_probdist = estimator(freqdist, bins=len(feature_values[fname])) + negative_feature_probs = {} + for fval in feature_values[fname]: + prob = ( + global_probdist.prob(fval) + - positive_prob_prior * feature_probdist[True, fname].prob(fval) + ) / negative_prob_prior + # TODO: We need to add some kind of smoothing here, instead of + # setting negative probabilities to zero and normalizing. + negative_feature_probs[fval] = max(prob, 0.0) + feature_probdist[False, fname] = DictionaryProbDist( + negative_feature_probs, normalize=True + ) + + return PositiveNaiveBayesClassifier(label_probdist, feature_probdist) + + +##////////////////////////////////////////////////////// +## Demo +##////////////////////////////////////////////////////// + + +def demo(): + from nltk.classify.util import partial_names_demo + + classifier = partial_names_demo(PositiveNaiveBayesClassifier.train) + classifier.show_most_informative_features() diff --git a/venv.bak/lib/python3.7/site-packages/nltk/classify/rte_classify.py b/venv.bak/lib/python3.7/site-packages/nltk/classify/rte_classify.py new file mode 100644 index 0000000..19e1332 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/classify/rte_classify.py @@ -0,0 +1,180 @@ +# Natural Language Toolkit: RTE Classifier +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Ewan Klein +# URL: +# For license information, see LICENSE.TXT + +""" +Simple classifier for RTE corpus. + +It calculates the overlap in words and named entities between text and +hypothesis, and also whether there are words / named entities in the +hypothesis which fail to occur in the text, since this is an indicator that +the hypothesis is more informative than (i.e not entailed by) the text. + +TO DO: better Named Entity classification +TO DO: add lemmatization +""" +from __future__ import print_function + +from nltk.tokenize import RegexpTokenizer +from nltk.classify.util import accuracy, check_megam_config +from nltk.classify.maxent import MaxentClassifier + + +class RTEFeatureExtractor(object): + """ + This builds a bag of words for both the text and the hypothesis after + throwing away some stopwords, then calculates overlap and difference. + """ + + def __init__(self, rtepair, stop=True, use_lemmatize=False): + """ + :param rtepair: a ``RTEPair`` from which features should be extracted + :param stop: if ``True``, stopwords are thrown away. + :type stop: bool + """ + self.stop = stop + self.stopwords = set( + [ + 'a', + 'the', + 'it', + 'they', + 'of', + 'in', + 'to', + 'is', + 'have', + 'are', + 'were', + 'and', + 'very', + '.', + ',', + ] + ) + + self.negwords = set(['no', 'not', 'never', 'failed', 'rejected', 'denied']) + # Try to tokenize so that abbreviations, monetary amounts, email + # addresses, URLs are single tokens. + tokenizer = RegexpTokenizer('[\w.@:/]+|\w+|\$[\d.]+') + + # Get the set of word types for text and hypothesis + self.text_tokens = tokenizer.tokenize(rtepair.text) + self.hyp_tokens = tokenizer.tokenize(rtepair.hyp) + self.text_words = set(self.text_tokens) + self.hyp_words = set(self.hyp_tokens) + + if use_lemmatize: + self.text_words = set(self._lemmatize(token) for token in self.text_tokens) + self.hyp_words = set(self._lemmatize(token) for token in self.hyp_tokens) + + if self.stop: + self.text_words = self.text_words - self.stopwords + self.hyp_words = self.hyp_words - self.stopwords + + self._overlap = self.hyp_words & self.text_words + self._hyp_extra = self.hyp_words - self.text_words + self._txt_extra = self.text_words - self.hyp_words + + def overlap(self, toktype, debug=False): + """ + Compute the overlap between text and hypothesis. + + :param toktype: distinguish Named Entities from ordinary words + :type toktype: 'ne' or 'word' + """ + ne_overlap = set(token for token in self._overlap if self._ne(token)) + if toktype == 'ne': + if debug: + print("ne overlap", ne_overlap) + return ne_overlap + elif toktype == 'word': + if debug: + print("word overlap", self._overlap - ne_overlap) + return self._overlap - ne_overlap + else: + raise ValueError("Type not recognized:'%s'" % toktype) + + def hyp_extra(self, toktype, debug=True): + """ + Compute the extraneous material in the hypothesis. + + :param toktype: distinguish Named Entities from ordinary words + :type toktype: 'ne' or 'word' + """ + ne_extra = set(token for token in self._hyp_extra if self._ne(token)) + if toktype == 'ne': + return ne_extra + elif toktype == 'word': + return self._hyp_extra - ne_extra + else: + raise ValueError("Type not recognized: '%s'" % toktype) + + @staticmethod + def _ne(token): + """ + This just assumes that words in all caps or titles are + named entities. + + :type token: str + """ + if token.istitle() or token.isupper(): + return True + return False + + @staticmethod + def _lemmatize(word): + """ + Use morphy from WordNet to find the base form of verbs. + """ + lemma = nltk.corpus.wordnet.morphy(word, pos=nltk.corpus.wordnet.VERB) + if lemma is not None: + return lemma + return word + + +def rte_features(rtepair): + extractor = RTEFeatureExtractor(rtepair) + features = {} + features['alwayson'] = True + features['word_overlap'] = len(extractor.overlap('word')) + features['word_hyp_extra'] = len(extractor.hyp_extra('word')) + features['ne_overlap'] = len(extractor.overlap('ne')) + features['ne_hyp_extra'] = len(extractor.hyp_extra('ne')) + features['neg_txt'] = len(extractor.negwords & extractor.text_words) + features['neg_hyp'] = len(extractor.negwords & extractor.hyp_words) + return features + + +def rte_featurize(rte_pairs): + return [(rte_features(pair), pair.value) for pair in rte_pairs] + + +def rte_classifier(algorithm): + from nltk.corpus import rte as rte_corpus + + train_set = rte_corpus.pairs(['rte1_dev.xml', 'rte2_dev.xml', 'rte3_dev.xml']) + test_set = rte_corpus.pairs(['rte1_test.xml', 'rte2_test.xml', 'rte3_test.xml']) + featurized_train_set = rte_featurize(train_set) + featurized_test_set = rte_featurize(test_set) + # Train the classifier + print('Training classifier...') + if algorithm in ['megam', 'BFGS']: # MEGAM based algorithms. + # Ensure that MEGAM is configured first. + check_megam_config() + clf = lambda x: MaxentClassifier.train(featurized_train_set, algorithm) + elif algorithm in ['GIS', 'IIS']: # Use default GIS/IIS MaxEnt algorithm + clf = MaxentClassifier.train(featurized_train_set, algorithm) + else: + err_msg = str( + "RTEClassifier only supports these algorithms:\n " + "'megam', 'BFGS', 'GIS', 'IIS'.\n" + ) + raise Exception(err_msg) + print('Testing classifier...') + acc = accuracy(clf, featurized_test_set) + print('Accuracy: %6.4f' % acc) + return clf diff --git a/venv.bak/lib/python3.7/site-packages/nltk/classify/scikitlearn.py b/venv.bak/lib/python3.7/site-packages/nltk/classify/scikitlearn.py new file mode 100644 index 0000000..c00dcdc --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/classify/scikitlearn.py @@ -0,0 +1,157 @@ +# Natural Language Toolkit: Interface to scikit-learn classifiers +# +# Author: Lars Buitinck +# URL: +# For license information, see LICENSE.TXT +""" +scikit-learn (http://scikit-learn.org) is a machine learning library for +Python. It supports many classification algorithms, including SVMs, +Naive Bayes, logistic regression (MaxEnt) and decision trees. + +This package implements a wrapper around scikit-learn classifiers. To use this +wrapper, construct a scikit-learn estimator object, then use that to construct +a SklearnClassifier. E.g., to wrap a linear SVM with default settings: + +>>> from sklearn.svm import LinearSVC +>>> from nltk.classify.scikitlearn import SklearnClassifier +>>> classif = SklearnClassifier(LinearSVC()) + +A scikit-learn classifier may include preprocessing steps when it's wrapped +in a Pipeline object. The following constructs and wraps a Naive Bayes text +classifier with tf-idf weighting and chi-square feature selection to get the +best 1000 features: + +>>> from sklearn.feature_extraction.text import TfidfTransformer +>>> from sklearn.feature_selection import SelectKBest, chi2 +>>> from sklearn.naive_bayes import MultinomialNB +>>> from sklearn.pipeline import Pipeline +>>> pipeline = Pipeline([('tfidf', TfidfTransformer()), +... ('chi2', SelectKBest(chi2, k=1000)), +... ('nb', MultinomialNB())]) +>>> classif = SklearnClassifier(pipeline) +""" +from __future__ import print_function, unicode_literals + +from six.moves import zip + +from nltk.classify.api import ClassifierI +from nltk.probability import DictionaryProbDist +from nltk import compat + +try: + from sklearn.feature_extraction import DictVectorizer + from sklearn.preprocessing import LabelEncoder +except ImportError: + pass + +__all__ = ['SklearnClassifier'] + + +@compat.python_2_unicode_compatible +class SklearnClassifier(ClassifierI): + """Wrapper for scikit-learn classifiers.""" + + def __init__(self, estimator, dtype=float, sparse=True): + """ + :param estimator: scikit-learn classifier object. + + :param dtype: data type used when building feature array. + scikit-learn estimators work exclusively on numeric data. The + default value should be fine for almost all situations. + + :param sparse: Whether to use sparse matrices internally. + The estimator must support these; not all scikit-learn classifiers + do (see their respective documentation and look for "sparse + matrix"). The default value is True, since most NLP problems + involve sparse feature sets. Setting this to False may take a + great amount of memory. + :type sparse: boolean. + """ + self._clf = estimator + self._encoder = LabelEncoder() + self._vectorizer = DictVectorizer(dtype=dtype, sparse=sparse) + + def __repr__(self): + return "" % self._clf + + def classify_many(self, featuresets): + """Classify a batch of samples. + + :param featuresets: An iterable over featuresets, each a dict mapping + strings to either numbers, booleans or strings. + :return: The predicted class label for each input sample. + :rtype: list + """ + X = self._vectorizer.transform(featuresets) + classes = self._encoder.classes_ + return [classes[i] for i in self._clf.predict(X)] + + def prob_classify_many(self, featuresets): + """Compute per-class probabilities for a batch of samples. + + :param featuresets: An iterable over featuresets, each a dict mapping + strings to either numbers, booleans or strings. + :rtype: list of ``ProbDistI`` + """ + X = self._vectorizer.transform(featuresets) + y_proba_list = self._clf.predict_proba(X) + return [self._make_probdist(y_proba) for y_proba in y_proba_list] + + def labels(self): + """The class labels used by this classifier. + + :rtype: list + """ + return list(self._encoder.classes_) + + def train(self, labeled_featuresets): + """ + Train (fit) the scikit-learn estimator. + + :param labeled_featuresets: A list of ``(featureset, label)`` + where each ``featureset`` is a dict mapping strings to either + numbers, booleans or strings. + """ + + X, y = list(zip(*labeled_featuresets)) + X = self._vectorizer.fit_transform(X) + y = self._encoder.fit_transform(y) + self._clf.fit(X, y) + + return self + + def _make_probdist(self, y_proba): + classes = self._encoder.classes_ + return DictionaryProbDist(dict((classes[i], p) for i, p in enumerate(y_proba))) + + +# skip doctests if scikit-learn is not installed +def setup_module(module): + from nose import SkipTest + + try: + import sklearn + except ImportError: + raise SkipTest("scikit-learn is not installed") + + +if __name__ == "__main__": + from nltk.classify.util import names_demo, names_demo_features + from sklearn.linear_model import LogisticRegression + from sklearn.naive_bayes import BernoulliNB + + # Bernoulli Naive Bayes is designed for binary classification. We set the + # binarize option to False since we know we're passing boolean features. + print("scikit-learn Naive Bayes:") + names_demo( + SklearnClassifier(BernoulliNB(binarize=False)).train, + features=names_demo_features, + ) + + # The C parameter on logistic regression (MaxEnt) controls regularization. + # The higher it's set, the less regularized the classifier is. + print("\n\nscikit-learn logistic regression:") + names_demo( + SklearnClassifier(LogisticRegression(C=1000)).train, + features=names_demo_features, + ) diff --git a/venv.bak/lib/python3.7/site-packages/nltk/classify/senna.py b/venv.bak/lib/python3.7/site-packages/nltk/classify/senna.py new file mode 100644 index 0000000..0ccd29f --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/classify/senna.py @@ -0,0 +1,195 @@ +# encoding: utf-8 +# Natural Language Toolkit: Senna Interface +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Rami Al-Rfou' +# URL: +# For license information, see LICENSE.TXT + +""" +A general interface to the SENNA pipeline that supports any of the +operations specified in SUPPORTED_OPERATIONS. + +Applying multiple operations at once has the speed advantage. For example, +Senna will automatically determine POS tags if you are extracting named +entities. Applying both of the operations will cost only the time of +extracting the named entities. + +The SENNA pipeline has a fixed maximum size of the sentences that it can read. +By default it is 1024 token/sentence. If you have larger sentences, changing +the MAX_SENTENCE_SIZE value in SENNA_main.c should be considered and your +system specific binary should be rebuilt. Otherwise this could introduce +misalignment errors. + +The input is: +- path to the directory that contains SENNA executables. If the path is incorrect, + Senna will automatically search for executable file specified in SENNA environment variable +- List of the operations needed to be performed. +- (optionally) the encoding of the input data (default:utf-8) + +Note: Unit tests for this module can be found in test/unit/test_senna.py + + >>> from __future__ import unicode_literals + >>> from nltk.classify import Senna + >>> pipeline = Senna('/usr/share/senna-v3.0', ['pos', 'chk', 'ner']) + >>> sent = 'Dusseldorf is an international business center'.split() + >>> [(token['word'], token['chk'], token['ner'], token['pos']) for token in pipeline.tag(sent)] # doctest: +SKIP + [('Dusseldorf', 'B-NP', 'B-LOC', 'NNP'), ('is', 'B-VP', 'O', 'VBZ'), ('an', 'B-NP', 'O', 'DT'), + ('international', 'I-NP', 'O', 'JJ'), ('business', 'I-NP', 'O', 'NN'), ('center', 'I-NP', 'O', 'NN')] +""" + + +from __future__ import unicode_literals +from os import path, sep, environ +from subprocess import Popen, PIPE +from platform import architecture, system + +from six import text_type + +from nltk.tag.api import TaggerI +from nltk.compat import python_2_unicode_compatible + +_senna_url = 'http://ml.nec-labs.com/senna/' + + +@python_2_unicode_compatible +class Senna(TaggerI): + + SUPPORTED_OPERATIONS = ['pos', 'chk', 'ner'] + + def __init__(self, senna_path, operations, encoding='utf-8'): + self._encoding = encoding + self._path = path.normpath(senna_path) + sep + + # Verifies the existence of the executable on the self._path first + # senna_binary_file_1 = self.executable(self._path) + exe_file_1 = self.executable(self._path) + if not path.isfile(exe_file_1): + # Check for the system environment + if 'SENNA' in environ: + # self._path = path.join(environ['SENNA'],'') + self._path = path.normpath(environ['SENNA']) + sep + exe_file_2 = self.executable(self._path) + if not path.isfile(exe_file_2): + raise OSError( + "Senna executable expected at %s or %s but not found" + % (exe_file_1, exe_file_2) + ) + + self.operations = operations + + def executable(self, base_path): + """ + The function that determines the system specific binary that should be + used in the pipeline. In case, the system is not known the default senna binary will + be used. + """ + os_name = system() + if os_name == 'Linux': + bits = architecture()[0] + if bits == '64bit': + return path.join(base_path, 'senna-linux64') + return path.join(base_path, 'senna-linux32') + if os_name == 'Windows': + return path.join(base_path, 'senna-win32.exe') + if os_name == 'Darwin': + return path.join(base_path, 'senna-osx') + return path.join(base_path, 'senna') + + def _map(self): + """ + A method that calculates the order of the columns that SENNA pipeline + will output the tags into. This depends on the operations being ordered. + """ + _map = {} + i = 1 + for operation in Senna.SUPPORTED_OPERATIONS: + if operation in self.operations: + _map[operation] = i + i += 1 + return _map + + def tag(self, tokens): + """ + Applies the specified operation(s) on a list of tokens. + """ + return self.tag_sents([tokens])[0] + + def tag_sents(self, sentences): + """ + Applies the tag method over a list of sentences. This method will return a + list of dictionaries. Every dictionary will contain a word with its + calculated annotations/tags. + """ + encoding = self._encoding + + if not path.isfile(self.executable(self._path)): + raise OSError( + "Senna executable expected at %s but not found" + % self.executable(self._path) + ) + + # Build the senna command to run the tagger + _senna_cmd = [ + self.executable(self._path), + '-path', + self._path, + '-usrtokens', + '-iobtags', + ] + _senna_cmd.extend(['-' + op for op in self.operations]) + + # Serialize the actual sentences to a temporary string + _input = '\n'.join((' '.join(x) for x in sentences)) + '\n' + if isinstance(_input, text_type) and encoding: + _input = _input.encode(encoding) + + # Run the tagger and get the output + p = Popen(_senna_cmd, stdin=PIPE, stdout=PIPE, stderr=PIPE) + (stdout, stderr) = p.communicate(input=_input) + senna_output = stdout + + # Check the return code. + if p.returncode != 0: + raise RuntimeError('Senna command failed! Details: %s' % stderr) + + if encoding: + senna_output = stdout.decode(encoding) + + # Output the tagged sentences + map_ = self._map() + tagged_sentences = [[]] + sentence_index = 0 + token_index = 0 + for tagged_word in senna_output.strip().split("\n"): + if not tagged_word: + tagged_sentences.append([]) + sentence_index += 1 + token_index = 0 + continue + tags = tagged_word.split('\t') + result = {} + for tag in map_: + result[tag] = tags[map_[tag]].strip() + try: + result['word'] = sentences[sentence_index][token_index] + except IndexError: + raise IndexError( + "Misalignment error occurred at sentence number %d. Possible reason" + " is that the sentence size exceeded the maximum size. Check the " + "documentation of Senna class for more information." + % sentence_index + ) + tagged_sentences[-1].append(result) + token_index += 1 + return tagged_sentences + + +# skip doctests if Senna is not installed +def setup_module(module): + from nose import SkipTest + + try: + tagger = Senna('/usr/share/senna-v3.0', ['pos', 'chk', 'ner']) + except OSError: + raise SkipTest("Senna executable not found") diff --git a/venv.bak/lib/python3.7/site-packages/nltk/classify/svm.py b/venv.bak/lib/python3.7/site-packages/nltk/classify/svm.py new file mode 100644 index 0000000..b6e0b3a --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/classify/svm.py @@ -0,0 +1,17 @@ +# Natural Language Toolkit: SVM-based classifier +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Leon Derczynski +# +# URL: +# For license information, see LICENSE.TXT +""" +nltk.classify.svm was deprecated. For classification based +on support vector machines SVMs use nltk.classify.scikitlearn +(or `scikit-learn `_ directly). +""" + + +class SvmClassifier(object): + def __init__(self, *args, **kwargs): + raise NotImplementedError(__doc__) diff --git a/venv.bak/lib/python3.7/site-packages/nltk/classify/tadm.py b/venv.bak/lib/python3.7/site-packages/nltk/classify/tadm.py new file mode 100644 index 0000000..a2f8daf --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/classify/tadm.py @@ -0,0 +1,124 @@ +# Natural Language Toolkit: Interface to TADM Classifier +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Joseph Frazee +# URL: +# For license information, see LICENSE.TXT +from __future__ import print_function, unicode_literals + +import sys +import subprocess + +from six import string_types + +from nltk.internals import find_binary + +try: + import numpy +except ImportError: + pass + +_tadm_bin = None + + +def config_tadm(bin=None): + global _tadm_bin + _tadm_bin = find_binary( + 'tadm', bin, env_vars=['TADM'], binary_names=['tadm'], url='http://tadm.sf.net' + ) + + +def write_tadm_file(train_toks, encoding, stream): + """ + Generate an input file for ``tadm`` based on the given corpus of + classified tokens. + + :type train_toks: list(tuple(dict, str)) + :param train_toks: Training data, represented as a list of + pairs, the first member of which is a feature dictionary, + and the second of which is a classification label. + :type encoding: TadmEventMaxentFeatureEncoding + :param encoding: A feature encoding, used to convert featuresets + into feature vectors. + :type stream: stream + :param stream: The stream to which the ``tadm`` input file should be + written. + """ + # See the following for a file format description: + # + # http://sf.net/forum/forum.php?thread_id=1391502&forum_id=473054 + # http://sf.net/forum/forum.php?thread_id=1675097&forum_id=473054 + labels = encoding.labels() + for featureset, label in train_toks: + length_line = '%d\n' % len(labels) + stream.write(length_line) + for known_label in labels: + v = encoding.encode(featureset, known_label) + line = '%d %d %s\n' % ( + int(label == known_label), + len(v), + ' '.join('%d %d' % u for u in v), + ) + stream.write(line) + + +def parse_tadm_weights(paramfile): + """ + Given the stdout output generated by ``tadm`` when training a + model, return a ``numpy`` array containing the corresponding weight + vector. + """ + weights = [] + for line in paramfile: + weights.append(float(line.strip())) + return numpy.array(weights, 'd') + + +def call_tadm(args): + """ + Call the ``tadm`` binary with the given arguments. + """ + if isinstance(args, string_types): + raise TypeError('args should be a list of strings') + if _tadm_bin is None: + config_tadm() + + # Call tadm via a subprocess + cmd = [_tadm_bin] + args + p = subprocess.Popen(cmd, stdout=sys.stdout) + (stdout, stderr) = p.communicate() + + # Check the return code. + if p.returncode != 0: + print() + print(stderr) + raise OSError('tadm command failed!') + + +def names_demo(): + from nltk.classify.util import names_demo + from nltk.classify.maxent import TadmMaxentClassifier + + classifier = names_demo(TadmMaxentClassifier.train) + + +def encoding_demo(): + import sys + from nltk.classify.maxent import TadmEventMaxentFeatureEncoding + + tokens = [ + ({'f0': 1, 'f1': 1, 'f3': 1}, 'A'), + ({'f0': 1, 'f2': 1, 'f4': 1}, 'B'), + ({'f0': 2, 'f2': 1, 'f3': 1, 'f4': 1}, 'A'), + ] + encoding = TadmEventMaxentFeatureEncoding.train(tokens) + write_tadm_file(tokens, encoding, sys.stdout) + print() + for i in range(encoding.length()): + print('%s --> %d' % (encoding.describe(i), i)) + print() + + +if __name__ == '__main__': + encoding_demo() + names_demo() diff --git a/venv.bak/lib/python3.7/site-packages/nltk/classify/textcat.py b/venv.bak/lib/python3.7/site-packages/nltk/classify/textcat.py new file mode 100644 index 0000000..8d35605 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/classify/textcat.py @@ -0,0 +1,208 @@ +# -*- coding: utf-8 -*- +# Natural Language Toolkit: Language ID module using TextCat algorithm +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Avital Pekker +# +# URL: +# For license information, see LICENSE.TXT + +""" +A module for language identification using the TextCat algorithm. +An implementation of the text categorization algorithm +presented in Cavnar, W. B. and J. M. Trenkle, +"N-Gram-Based Text Categorization". + +The algorithm takes advantage of Zipf's law and uses +n-gram frequencies to profile languages and text-yet to +be identified-then compares using a distance measure. + +Language n-grams are provided by the "An Crubadan" +project. A corpus reader was created separately to read +those files. + +For details regarding the algorithm, see: +http://www.let.rug.nl/~vannoord/TextCat/textcat.pdf + +For details about An Crubadan, see: +http://borel.slu.edu/crubadan/index.html +""" + +# Ensure that literal strings default to unicode rather than str. +from __future__ import print_function, unicode_literals + +from nltk.compat import PY3 +from nltk.util import trigrams + +if PY3: + from sys import maxsize +else: + from sys import maxint + +# Note: this is NOT "re" you're likely used to. The regex module +# is an alternative to the standard re module that supports +# Unicode codepoint properties with the \p{} syntax. +# You may have to "pip install regx" +try: + import regex as re +except ImportError: + re = None +###################################################################### +## Language identification using TextCat +###################################################################### + + +class TextCat(object): + + _corpus = None + fingerprints = {} + _START_CHAR = "<" + _END_CHAR = ">" + + last_distances = {} + + def __init__(self): + if not re: + raise EnvironmentError( + "classify.textcat requires the regex module that " + "supports unicode. Try '$ pip install regex' and " + "see https://pypi.python.org/pypi/regex for " + "further details." + ) + + from nltk.corpus import crubadan + + self._corpus = crubadan + # Load all language ngrams into cache + for lang in self._corpus.langs(): + self._corpus.lang_freq(lang) + + def remove_punctuation(self, text): + ''' Get rid of punctuation except apostrophes ''' + return re.sub(r"[^\P{P}\']+", "", text) + + def profile(self, text): + ''' Create FreqDist of trigrams within text ''' + from nltk import word_tokenize, FreqDist + + clean_text = self.remove_punctuation(text) + tokens = word_tokenize(clean_text) + + fingerprint = FreqDist() + for t in tokens: + token_trigram_tuples = trigrams(self._START_CHAR + t + self._END_CHAR) + token_trigrams = [''.join(tri) for tri in token_trigram_tuples] + + for cur_trigram in token_trigrams: + if cur_trigram in fingerprint: + fingerprint[cur_trigram] += 1 + else: + fingerprint[cur_trigram] = 1 + + return fingerprint + + def calc_dist(self, lang, trigram, text_profile): + ''' Calculate the "out-of-place" measure between the + text and language profile for a single trigram ''' + + lang_fd = self._corpus.lang_freq(lang) + dist = 0 + + if trigram in lang_fd: + idx_lang_profile = list(lang_fd.keys()).index(trigram) + idx_text = list(text_profile.keys()).index(trigram) + + # print(idx_lang_profile, ", ", idx_text) + dist = abs(idx_lang_profile - idx_text) + else: + # Arbitrary but should be larger than + # any possible trigram file length + # in terms of total lines + if PY3: + dist = maxsize + else: + dist = maxint + + return dist + + def lang_dists(self, text): + ''' Calculate the "out-of-place" measure between + the text and all languages ''' + + distances = {} + profile = self.profile(text) + # For all the languages + for lang in self._corpus._all_lang_freq.keys(): + # Calculate distance metric for every trigram in + # input text to be identified + lang_dist = 0 + for trigram in profile: + lang_dist += self.calc_dist(lang, trigram, profile) + + distances[lang] = lang_dist + + return distances + + def guess_language(self, text): + ''' Find the language with the min distance + to the text and return its ISO 639-3 code ''' + self.last_distances = self.lang_dists(text) + + return min(self.last_distances, key=self.last_distances.get) + #################################################') + + +def demo(): + from nltk.corpus import udhr + + langs = [ + 'Kurdish-UTF8', + 'Abkhaz-UTF8', + 'Farsi_Persian-UTF8', + 'Hindi-UTF8', + 'Hawaiian-UTF8', + 'Russian-UTF8', + 'Vietnamese-UTF8', + 'Serbian_Srpski-UTF8', + 'Esperanto-UTF8', + ] + + friendly = { + 'kmr': 'Northern Kurdish', + 'abk': 'Abkhazian', + 'pes': 'Iranian Persian', + 'hin': 'Hindi', + 'haw': 'Hawaiian', + 'rus': 'Russian', + 'vie': 'Vietnamese', + 'srp': 'Serbian', + 'epo': 'Esperanto', + } + + tc = TextCat() + + for cur_lang in langs: + # Get raw data from UDHR corpus + raw_sentences = udhr.sents(cur_lang) + rows = len(raw_sentences) - 1 + cols = list(map(len, raw_sentences)) + + sample = '' + + # Generate a sample text of the language + for i in range(0, rows): + cur_sent = '' + for j in range(0, cols[i]): + cur_sent += ' ' + raw_sentences[i][j] + + sample += cur_sent + + # Try to detect what it is + print('Language snippet: ' + sample[0:140] + '...') + guess = tc.guess_language(sample) + print('Language detection: %s (%s)' % (guess, friendly[guess])) + print('#' * 140) + + +if __name__ == '__main__': + demo() diff --git a/venv.bak/lib/python3.7/site-packages/nltk/classify/util.py b/venv.bak/lib/python3.7/site-packages/nltk/classify/util.py new file mode 100644 index 0000000..a0a15a6 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/classify/util.py @@ -0,0 +1,344 @@ +# Natural Language Toolkit: Classifier Utility Functions +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Edward Loper +# Steven Bird (minor additions) +# URL: +# For license information, see LICENSE.TXT + +""" +Utility functions and classes for classifiers. +""" +from __future__ import print_function, division + +import math + +# from nltk.util import Deprecated +import nltk.classify.util # for accuracy & log_likelihood +from nltk.util import LazyMap + +###################################################################### +# { Helper Functions +###################################################################### + +# alternative name possibility: 'map_featurefunc()'? +# alternative name possibility: 'detect_features()'? +# alternative name possibility: 'map_featuredetect()'? +# or.. just have users use LazyMap directly? +def apply_features(feature_func, toks, labeled=None): + """ + Use the ``LazyMap`` class to construct a lazy list-like + object that is analogous to ``map(feature_func, toks)``. In + particular, if ``labeled=False``, then the returned list-like + object's values are equal to:: + + [feature_func(tok) for tok in toks] + + If ``labeled=True``, then the returned list-like object's values + are equal to:: + + [(feature_func(tok), label) for (tok, label) in toks] + + The primary purpose of this function is to avoid the memory + overhead involved in storing all the featuresets for every token + in a corpus. Instead, these featuresets are constructed lazily, + as-needed. The reduction in memory overhead can be especially + significant when the underlying list of tokens is itself lazy (as + is the case with many corpus readers). + + :param feature_func: The function that will be applied to each + token. It should return a featureset -- i.e., a dict + mapping feature names to feature values. + :param toks: The list of tokens to which ``feature_func`` should be + applied. If ``labeled=True``, then the list elements will be + passed directly to ``feature_func()``. If ``labeled=False``, + then the list elements should be tuples ``(tok,label)``, and + ``tok`` will be passed to ``feature_func()``. + :param labeled: If true, then ``toks`` contains labeled tokens -- + i.e., tuples of the form ``(tok, label)``. (Default: + auto-detect based on types.) + """ + if labeled is None: + labeled = toks and isinstance(toks[0], (tuple, list)) + if labeled: + + def lazy_func(labeled_token): + return (feature_func(labeled_token[0]), labeled_token[1]) + + return LazyMap(lazy_func, toks) + else: + return LazyMap(feature_func, toks) + + +def attested_labels(tokens): + """ + :return: A list of all labels that are attested in the given list + of tokens. + :rtype: list of (immutable) + :param tokens: The list of classified tokens from which to extract + labels. A classified token has the form ``(token, label)``. + :type tokens: list + """ + return tuple(set(label for (tok, label) in tokens)) + + +def log_likelihood(classifier, gold): + results = classifier.prob_classify_many([fs for (fs, l) in gold]) + ll = [pdist.prob(l) for ((fs, l), pdist) in zip(gold, results)] + return math.log(sum(ll) / len(ll)) + + +def accuracy(classifier, gold): + results = classifier.classify_many([fs for (fs, l) in gold]) + correct = [l == r for ((fs, l), r) in zip(gold, results)] + if correct: + return sum(correct) / len(correct) + else: + return 0 + + +class CutoffChecker(object): + """ + A helper class that implements cutoff checks based on number of + iterations and log likelihood. + + Accuracy cutoffs are also implemented, but they're almost never + a good idea to use. + """ + + def __init__(self, cutoffs): + self.cutoffs = cutoffs.copy() + if 'min_ll' in cutoffs: + cutoffs['min_ll'] = -abs(cutoffs['min_ll']) + if 'min_lldelta' in cutoffs: + cutoffs['min_lldelta'] = abs(cutoffs['min_lldelta']) + self.ll = None + self.acc = None + self.iter = 1 + + def check(self, classifier, train_toks): + cutoffs = self.cutoffs + self.iter += 1 + if 'max_iter' in cutoffs and self.iter >= cutoffs['max_iter']: + return True # iteration cutoff. + + new_ll = nltk.classify.util.log_likelihood(classifier, train_toks) + if math.isnan(new_ll): + return True + + if 'min_ll' in cutoffs or 'min_lldelta' in cutoffs: + if 'min_ll' in cutoffs and new_ll >= cutoffs['min_ll']: + return True # log likelihood cutoff + if ( + 'min_lldelta' in cutoffs + and self.ll + and ((new_ll - self.ll) <= abs(cutoffs['min_lldelta'])) + ): + return True # log likelihood delta cutoff + self.ll = new_ll + + if 'max_acc' in cutoffs or 'min_accdelta' in cutoffs: + new_acc = nltk.classify.util.log_likelihood(classifier, train_toks) + if 'max_acc' in cutoffs and new_acc >= cutoffs['max_acc']: + return True # log likelihood cutoff + if ( + 'min_accdelta' in cutoffs + and self.acc + and ((new_acc - self.acc) <= abs(cutoffs['min_accdelta'])) + ): + return True # log likelihood delta cutoff + self.acc = new_acc + + return False # no cutoff reached. + + +###################################################################### +# { Demos +###################################################################### + + +def names_demo_features(name): + features = {} + features['alwayson'] = True + features['startswith'] = name[0].lower() + features['endswith'] = name[-1].lower() + for letter in 'abcdefghijklmnopqrstuvwxyz': + features['count(%s)' % letter] = name.lower().count(letter) + features['has(%s)' % letter] = letter in name.lower() + return features + + +def binary_names_demo_features(name): + features = {} + features['alwayson'] = True + features['startswith(vowel)'] = name[0].lower() in 'aeiouy' + features['endswith(vowel)'] = name[-1].lower() in 'aeiouy' + for letter in 'abcdefghijklmnopqrstuvwxyz': + features['count(%s)' % letter] = name.lower().count(letter) + features['has(%s)' % letter] = letter in name.lower() + features['startswith(%s)' % letter] = letter == name[0].lower() + features['endswith(%s)' % letter] = letter == name[-1].lower() + return features + + +def names_demo(trainer, features=names_demo_features): + from nltk.corpus import names + import random + + # Construct a list of classified names, using the names corpus. + namelist = [(name, 'male') for name in names.words('male.txt')] + [ + (name, 'female') for name in names.words('female.txt') + ] + + # Randomly split the names into a test & train set. + random.seed(123456) + random.shuffle(namelist) + train = namelist[:5000] + test = namelist[5000:5500] + + # Train up a classifier. + print('Training classifier...') + classifier = trainer([(features(n), g) for (n, g) in train]) + + # Run the classifier on the test data. + print('Testing classifier...') + acc = accuracy(classifier, [(features(n), g) for (n, g) in test]) + print('Accuracy: %6.4f' % acc) + + # For classifiers that can find probabilities, show the log + # likelihood and some sample probability distributions. + try: + test_featuresets = [features(n) for (n, g) in test] + pdists = classifier.prob_classify_many(test_featuresets) + ll = [pdist.logprob(gold) for ((name, gold), pdist) in zip(test, pdists)] + print('Avg. log likelihood: %6.4f' % (sum(ll) / len(test))) + print() + print('Unseen Names P(Male) P(Female)\n' + '-' * 40) + for ((name, gender), pdist) in list(zip(test, pdists))[:5]: + if gender == 'male': + fmt = ' %-15s *%6.4f %6.4f' + else: + fmt = ' %-15s %6.4f *%6.4f' + print(fmt % (name, pdist.prob('male'), pdist.prob('female'))) + except NotImplementedError: + pass + + # Return the classifier + return classifier + + +def partial_names_demo(trainer, features=names_demo_features): + from nltk.corpus import names + import random + + male_names = names.words('male.txt') + female_names = names.words('female.txt') + + random.seed(654321) + random.shuffle(male_names) + random.shuffle(female_names) + + # Create a list of male names to be used as positive-labeled examples for training + positive = map(features, male_names[:2000]) + + # Create a list of male and female names to be used as unlabeled examples + unlabeled = map(features, male_names[2000:2500] + female_names[:500]) + + # Create a test set with correctly-labeled male and female names + test = [(name, True) for name in male_names[2500:2750]] + [ + (name, False) for name in female_names[500:750] + ] + + random.shuffle(test) + + # Train up a classifier. + print('Training classifier...') + classifier = trainer(positive, unlabeled) + + # Run the classifier on the test data. + print('Testing classifier...') + acc = accuracy(classifier, [(features(n), m) for (n, m) in test]) + print('Accuracy: %6.4f' % acc) + + # For classifiers that can find probabilities, show the log + # likelihood and some sample probability distributions. + try: + test_featuresets = [features(n) for (n, m) in test] + pdists = classifier.prob_classify_many(test_featuresets) + ll = [pdist.logprob(gold) for ((name, gold), pdist) in zip(test, pdists)] + print('Avg. log likelihood: %6.4f' % (sum(ll) / len(test))) + print() + print('Unseen Names P(Male) P(Female)\n' + '-' * 40) + for ((name, is_male), pdist) in zip(test, pdists)[:5]: + if is_male == True: + fmt = ' %-15s *%6.4f %6.4f' + else: + fmt = ' %-15s %6.4f *%6.4f' + print(fmt % (name, pdist.prob(True), pdist.prob(False))) + except NotImplementedError: + pass + + # Return the classifier + return classifier + + +_inst_cache = {} + + +def wsd_demo(trainer, word, features, n=1000): + from nltk.corpus import senseval + import random + + # Get the instances. + print('Reading data...') + global _inst_cache + if word not in _inst_cache: + _inst_cache[word] = [(i, i.senses[0]) for i in senseval.instances(word)] + instances = _inst_cache[word][:] + if n > len(instances): + n = len(instances) + senses = list(set(l for (i, l) in instances)) + print(' Senses: ' + ' '.join(senses)) + + # Randomly split the names into a test & train set. + print('Splitting into test & train...') + random.seed(123456) + random.shuffle(instances) + train = instances[: int(0.8 * n)] + test = instances[int(0.8 * n) : n] + + # Train up a classifier. + print('Training classifier...') + classifier = trainer([(features(i), l) for (i, l) in train]) + + # Run the classifier on the test data. + print('Testing classifier...') + acc = accuracy(classifier, [(features(i), l) for (i, l) in test]) + print('Accuracy: %6.4f' % acc) + + # For classifiers that can find probabilities, show the log + # likelihood and some sample probability distributions. + try: + test_featuresets = [features(i) for (i, n) in test] + pdists = classifier.prob_classify_many(test_featuresets) + ll = [pdist.logprob(gold) for ((name, gold), pdist) in zip(test, pdists)] + print('Avg. log likelihood: %6.4f' % (sum(ll) / len(test))) + except NotImplementedError: + pass + + # Return the classifier + return classifier + + +def check_megam_config(): + """ + Checks whether the MEGAM binary is configured. + """ + try: + _megam_bin + except NameError: + err_msg = str( + "Please configure your megam binary first, e.g.\n" + ">>> nltk.config_megam('/usr/bin/local/megam')" + ) + raise NameError(err_msg) diff --git a/venv.bak/lib/python3.7/site-packages/nltk/classify/weka.py b/venv.bak/lib/python3.7/site-packages/nltk/classify/weka.py new file mode 100644 index 0000000..fbd4302 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/classify/weka.py @@ -0,0 +1,382 @@ +# Natural Language Toolkit: Interface to Weka Classsifiers +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Edward Loper +# URL: +# For license information, see LICENSE.TXT + +""" +Classifiers that make use of the external 'Weka' package. +""" +from __future__ import print_function +import time +import tempfile +import os +import subprocess +import re +import zipfile +from sys import stdin + +from six import integer_types, string_types + +from nltk.probability import DictionaryProbDist +from nltk.internals import java, config_java + +from nltk.classify.api import ClassifierI + +_weka_classpath = None +_weka_search = [ + '.', + '/usr/share/weka', + '/usr/local/share/weka', + '/usr/lib/weka', + '/usr/local/lib/weka', +] + + +def config_weka(classpath=None): + global _weka_classpath + + # Make sure java's configured first. + config_java() + + if classpath is not None: + _weka_classpath = classpath + + if _weka_classpath is None: + searchpath = _weka_search + if 'WEKAHOME' in os.environ: + searchpath.insert(0, os.environ['WEKAHOME']) + + for path in searchpath: + if os.path.exists(os.path.join(path, 'weka.jar')): + _weka_classpath = os.path.join(path, 'weka.jar') + version = _check_weka_version(_weka_classpath) + if version: + print( + ('[Found Weka: %s (version %s)]' % (_weka_classpath, version)) + ) + else: + print('[Found Weka: %s]' % _weka_classpath) + _check_weka_version(_weka_classpath) + + if _weka_classpath is None: + raise LookupError( + 'Unable to find weka.jar! Use config_weka() ' + 'or set the WEKAHOME environment variable. ' + 'For more information about Weka, please see ' + 'http://www.cs.waikato.ac.nz/ml/weka/' + ) + + +def _check_weka_version(jar): + try: + zf = zipfile.ZipFile(jar) + except (SystemExit, KeyboardInterrupt): + raise + except: + return None + try: + try: + return zf.read('weka/core/version.txt') + except KeyError: + return None + finally: + zf.close() + + +class WekaClassifier(ClassifierI): + def __init__(self, formatter, model_filename): + self._formatter = formatter + self._model = model_filename + + def prob_classify_many(self, featuresets): + return self._classify_many(featuresets, ['-p', '0', '-distribution']) + + def classify_many(self, featuresets): + return self._classify_many(featuresets, ['-p', '0']) + + def _classify_many(self, featuresets, options): + # Make sure we can find java & weka. + config_weka() + + temp_dir = tempfile.mkdtemp() + try: + # Write the test data file. + test_filename = os.path.join(temp_dir, 'test.arff') + self._formatter.write(test_filename, featuresets) + + # Call weka to classify the data. + cmd = [ + 'weka.classifiers.bayes.NaiveBayes', + '-l', + self._model, + '-T', + test_filename, + ] + options + (stdout, stderr) = java( + cmd, + classpath=_weka_classpath, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) + + # Check if something went wrong: + if stderr and not stdout: + if 'Illegal options: -distribution' in stderr: + raise ValueError( + 'The installed version of weka does ' + 'not support probability distribution ' + 'output.' + ) + else: + raise ValueError('Weka failed to generate output:\n%s' % stderr) + + # Parse weka's output. + return self.parse_weka_output(stdout.decode(stdin.encoding).split('\n')) + + finally: + for f in os.listdir(temp_dir): + os.remove(os.path.join(temp_dir, f)) + os.rmdir(temp_dir) + + def parse_weka_distribution(self, s): + probs = [float(v) for v in re.split('[*,]+', s) if v.strip()] + probs = dict(zip(self._formatter.labels(), probs)) + return DictionaryProbDist(probs) + + def parse_weka_output(self, lines): + # Strip unwanted text from stdout + for i, line in enumerate(lines): + if line.strip().startswith("inst#"): + lines = lines[i:] + break + + if lines[0].split() == ['inst#', 'actual', 'predicted', 'error', 'prediction']: + return [line.split()[2].split(':')[1] for line in lines[1:] if line.strip()] + elif lines[0].split() == [ + 'inst#', + 'actual', + 'predicted', + 'error', + 'distribution', + ]: + return [ + self.parse_weka_distribution(line.split()[-1]) + for line in lines[1:] + if line.strip() + ] + + # is this safe:? + elif re.match(r'^0 \w+ [01]\.[0-9]* \?\s*$', lines[0]): + return [line.split()[1] for line in lines if line.strip()] + + else: + for line in lines[:10]: + print(line) + raise ValueError( + 'Unhandled output format -- your version ' + 'of weka may not be supported.\n' + ' Header: %s' % lines[0] + ) + + # [xx] full list of classifiers (some may be abstract?): + # ADTree, AODE, BayesNet, ComplementNaiveBayes, ConjunctiveRule, + # DecisionStump, DecisionTable, HyperPipes, IB1, IBk, Id3, J48, + # JRip, KStar, LBR, LeastMedSq, LinearRegression, LMT, Logistic, + # LogisticBase, M5Base, MultilayerPerceptron, + # MultipleClassifiersCombiner, NaiveBayes, NaiveBayesMultinomial, + # NaiveBayesSimple, NBTree, NNge, OneR, PaceRegression, PART, + # PreConstructedLinearModel, Prism, RandomForest, + # RandomizableClassifier, RandomTree, RBFNetwork, REPTree, Ridor, + # RuleNode, SimpleLinearRegression, SimpleLogistic, + # SingleClassifierEnhancer, SMO, SMOreg, UserClassifier, VFI, + # VotedPerceptron, Winnow, ZeroR + + _CLASSIFIER_CLASS = { + 'naivebayes': 'weka.classifiers.bayes.NaiveBayes', + 'C4.5': 'weka.classifiers.trees.J48', + 'log_regression': 'weka.classifiers.functions.Logistic', + 'svm': 'weka.classifiers.functions.SMO', + 'kstar': 'weka.classifiers.lazy.KStar', + 'ripper': 'weka.classifiers.rules.JRip', + } + + @classmethod + def train( + cls, + model_filename, + featuresets, + classifier='naivebayes', + options=[], + quiet=True, + ): + # Make sure we can find java & weka. + config_weka() + + # Build an ARFF formatter. + formatter = ARFF_Formatter.from_train(featuresets) + + temp_dir = tempfile.mkdtemp() + try: + # Write the training data file. + train_filename = os.path.join(temp_dir, 'train.arff') + formatter.write(train_filename, featuresets) + + if classifier in cls._CLASSIFIER_CLASS: + javaclass = cls._CLASSIFIER_CLASS[classifier] + elif classifier in cls._CLASSIFIER_CLASS.values(): + javaclass = classifier + else: + raise ValueError('Unknown classifier %s' % classifier) + + # Train the weka model. + cmd = [javaclass, '-d', model_filename, '-t', train_filename] + cmd += list(options) + if quiet: + stdout = subprocess.PIPE + else: + stdout = None + java(cmd, classpath=_weka_classpath, stdout=stdout) + + # Return the new classifier. + return WekaClassifier(formatter, model_filename) + + finally: + for f in os.listdir(temp_dir): + os.remove(os.path.join(temp_dir, f)) + os.rmdir(temp_dir) + + +class ARFF_Formatter: + """ + Converts featuresets and labeled featuresets to ARFF-formatted + strings, appropriate for input into Weka. + + Features and classes can be specified manually in the constructor, or may + be determined from data using ``from_train``. + """ + + def __init__(self, labels, features): + """ + :param labels: A list of all class labels that can be generated. + :param features: A list of feature specifications, where + each feature specification is a tuple (fname, ftype); + and ftype is an ARFF type string such as NUMERIC or + STRING. + """ + self._labels = labels + self._features = features + + def format(self, tokens): + """Returns a string representation of ARFF output for the given data.""" + return self.header_section() + self.data_section(tokens) + + def labels(self): + """Returns the list of classes.""" + return list(self._labels) + + def write(self, outfile, tokens): + """Writes ARFF data to a file for the given data.""" + if not hasattr(outfile, 'write'): + outfile = open(outfile, 'w') + outfile.write(self.format(tokens)) + outfile.close() + + @staticmethod + def from_train(tokens): + """ + Constructs an ARFF_Formatter instance with class labels and feature + types determined from the given data. Handles boolean, numeric and + string (note: not nominal) types. + """ + # Find the set of all attested labels. + labels = set(label for (tok, label) in tokens) + + # Determine the types of all features. + features = {} + for tok, label in tokens: + for (fname, fval) in tok.items(): + if issubclass(type(fval), bool): + ftype = '{True, False}' + elif issubclass(type(fval), (integer_types, float, bool)): + ftype = 'NUMERIC' + elif issubclass(type(fval), string_types): + ftype = 'STRING' + elif fval is None: + continue # can't tell the type. + else: + raise ValueError('Unsupported value type %r' % ftype) + + if features.get(fname, ftype) != ftype: + raise ValueError('Inconsistent type for %s' % fname) + features[fname] = ftype + features = sorted(features.items()) + + return ARFF_Formatter(labels, features) + + def header_section(self): + """Returns an ARFF header as a string.""" + # Header comment. + s = ( + '% Weka ARFF file\n' + + '% Generated automatically by NLTK\n' + + '%% %s\n\n' % time.ctime() + ) + + # Relation name + s += '@RELATION rel\n\n' + + # Input attribute specifications + for fname, ftype in self._features: + s += '@ATTRIBUTE %-30r %s\n' % (fname, ftype) + + # Label attribute specification + s += '@ATTRIBUTE %-30r {%s}\n' % ('-label-', ','.join(self._labels)) + + return s + + def data_section(self, tokens, labeled=None): + """ + Returns the ARFF data section for the given data. + + :param tokens: a list of featuresets (dicts) or labelled featuresets + which are tuples (featureset, label). + :param labeled: Indicates whether the given tokens are labeled + or not. If None, then the tokens will be assumed to be + labeled if the first token's value is a tuple or list. + """ + # Check if the tokens are labeled or unlabeled. If unlabeled, + # then use 'None' + if labeled is None: + labeled = tokens and isinstance(tokens[0], (tuple, list)) + if not labeled: + tokens = [(tok, None) for tok in tokens] + + # Data section + s = '\n@DATA\n' + for (tok, label) in tokens: + for fname, ftype in self._features: + s += '%s,' % self._fmt_arff_val(tok.get(fname)) + s += '%s\n' % self._fmt_arff_val(label) + + return s + + def _fmt_arff_val(self, fval): + if fval is None: + return '?' + elif isinstance(fval, (bool, integer_types)): + return '%s' % fval + elif isinstance(fval, float): + return '%r' % fval + else: + return '%r' % fval + + +if __name__ == '__main__': + from nltk.classify.util import names_demo, binary_names_demo_features + + def make_classifier(featuresets): + return WekaClassifier.train('/tmp/name.model', featuresets, 'C4.5') + + classifier = names_demo(make_classifier, binary_names_demo_features) diff --git a/venv.bak/lib/python3.7/site-packages/nltk/cluster/__init__.py b/venv.bak/lib/python3.7/site-packages/nltk/cluster/__init__.py new file mode 100644 index 0000000..c7fc100 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/cluster/__init__.py @@ -0,0 +1,90 @@ +# Natural Language Toolkit: Clusterers +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Trevor Cohn +# URL: +# For license information, see LICENSE.TXT + +""" +This module contains a number of basic clustering algorithms. Clustering +describes the task of discovering groups of similar items with a large +collection. It is also describe as unsupervised machine learning, as the data +from which it learns is unannotated with class information, as is the case for +supervised learning. Annotated data is difficult and expensive to obtain in +the quantities required for the majority of supervised learning algorithms. +This problem, the knowledge acquisition bottleneck, is common to most natural +language processing tasks, thus fueling the need for quality unsupervised +approaches. + +This module contains a k-means clusterer, E-M clusterer and a group average +agglomerative clusterer (GAAC). All these clusterers involve finding good +cluster groupings for a set of vectors in multi-dimensional space. + +The K-means clusterer starts with k arbitrary chosen means then allocates each +vector to the cluster with the closest mean. It then recalculates the means of +each cluster as the centroid of the vectors in the cluster. This process +repeats until the cluster memberships stabilise. This is a hill-climbing +algorithm which may converge to a local maximum. Hence the clustering is +often repeated with random initial means and the most commonly occurring +output means are chosen. + +The GAAC clusterer starts with each of the *N* vectors as singleton clusters. +It then iteratively merges pairs of clusters which have the closest centroids. +This continues until there is only one cluster. The order of merges gives rise +to a dendrogram - a tree with the earlier merges lower than later merges. The +membership of a given number of clusters *c*, *1 <= c <= N*, can be found by +cutting the dendrogram at depth *c*. + +The Gaussian EM clusterer models the vectors as being produced by a mixture +of k Gaussian sources. The parameters of these sources (prior probability, +mean and covariance matrix) are then found to maximise the likelihood of the +given data. This is done with the expectation maximisation algorithm. It +starts with k arbitrarily chosen means, priors and covariance matrices. It +then calculates the membership probabilities for each vector in each of the +clusters - this is the 'E' step. The cluster parameters are then updated in +the 'M' step using the maximum likelihood estimate from the cluster membership +probabilities. This process continues until the likelihood of the data does +not significantly increase. + +They all extend the ClusterI interface which defines common operations +available with each clusterer. These operations include. + - cluster: clusters a sequence of vectors + - classify: assign a vector to a cluster + - classification_probdist: give the probability distribution over cluster memberships + +The current existing classifiers also extend cluster.VectorSpace, an +abstract class which allows for singular value decomposition (SVD) and vector +normalisation. SVD is used to reduce the dimensionality of the vector space in +such a manner as to preserve as much of the variation as possible, by +reparameterising the axes in order of variability and discarding all bar the +first d dimensions. Normalisation ensures that vectors fall in the unit +hypersphere. + +Usage example (see also demo()):: + from nltk import cluster + from nltk.cluster import euclidean_distance + from numpy import array + + vectors = [array(f) for f in [[3, 3], [1, 2], [4, 2], [4, 0]]] + + # initialise the clusterer (will also assign the vectors to clusters) + clusterer = cluster.KMeansClusterer(2, euclidean_distance) + clusterer.cluster(vectors, True) + + # classify a new vector + print(clusterer.classify(array([3, 3]))) + +Note that the vectors must use numpy array-like +objects. nltk_contrib.unimelb.tacohn.SparseArrays may be used for +efficiency when required. +""" + +from nltk.cluster.util import ( + VectorSpaceClusterer, + Dendrogram, + euclidean_distance, + cosine_distance, +) +from nltk.cluster.kmeans import KMeansClusterer +from nltk.cluster.gaac import GAAClusterer +from nltk.cluster.em import EMClusterer diff --git a/venv.bak/lib/python3.7/site-packages/nltk/cluster/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/cluster/__pycache__/__init__.cpython-37.pyc new file mode 100644 index 0000000..71cd3cd Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/cluster/__pycache__/__init__.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/cluster/__pycache__/api.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/cluster/__pycache__/api.cpython-37.pyc new file mode 100644 index 0000000..534300f Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/cluster/__pycache__/api.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/cluster/__pycache__/em.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/cluster/__pycache__/em.cpython-37.pyc new file mode 100644 index 0000000..febda1e Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/cluster/__pycache__/em.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/cluster/__pycache__/gaac.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/cluster/__pycache__/gaac.cpython-37.pyc new file mode 100644 index 0000000..b4cf21c Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/cluster/__pycache__/gaac.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/cluster/__pycache__/kmeans.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/cluster/__pycache__/kmeans.cpython-37.pyc new file mode 100644 index 0000000..d97c3b0 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/cluster/__pycache__/kmeans.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/cluster/__pycache__/util.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/cluster/__pycache__/util.cpython-37.pyc new file mode 100644 index 0000000..e4f8010 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/cluster/__pycache__/util.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/cluster/api.py b/venv.bak/lib/python3.7/site-packages/nltk/cluster/api.py new file mode 100644 index 0000000..3f22f7f --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/cluster/api.py @@ -0,0 +1,76 @@ +# Natural Language Toolkit: Clusterer Interfaces +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Trevor Cohn +# Porting: Steven Bird +# URL: +# For license information, see LICENSE.TXT + +from abc import ABCMeta, abstractmethod +from six import add_metaclass + +from nltk.probability import DictionaryProbDist + + +@add_metaclass(ABCMeta) +class ClusterI(object): + """ + Interface covering basic clustering functionality. + """ + + @abstractmethod + def cluster(self, vectors, assign_clusters=False): + """ + Assigns the vectors to clusters, learning the clustering parameters + from the data. Returns a cluster identifier for each vector. + """ + + @abstractmethod + def classify(self, token): + """ + Classifies the token into a cluster, setting the token's CLUSTER + parameter to that cluster identifier. + """ + + def likelihood(self, vector, label): + """ + Returns the likelihood (a float) of the token having the + corresponding cluster. + """ + if self.classify(vector) == label: + return 1.0 + else: + return 0.0 + + def classification_probdist(self, vector): + """ + Classifies the token into a cluster, returning + a probability distribution over the cluster identifiers. + """ + likelihoods = {} + sum = 0.0 + for cluster in self.cluster_names(): + likelihoods[cluster] = self.likelihood(vector, cluster) + sum += likelihoods[cluster] + for cluster in self.cluster_names(): + likelihoods[cluster] /= sum + return DictionaryProbDist(likelihoods) + + @abstractmethod + def num_clusters(self): + """ + Returns the number of clusters. + """ + + def cluster_names(self): + """ + Returns the names of the clusters. + :rtype: list + """ + return list(range(self.num_clusters())) + + def cluster_name(self, index): + """ + Returns the names of the cluster at index. + """ + return index diff --git a/venv.bak/lib/python3.7/site-packages/nltk/cluster/em.py b/venv.bak/lib/python3.7/site-packages/nltk/cluster/em.py new file mode 100644 index 0000000..51dcf1f --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/cluster/em.py @@ -0,0 +1,257 @@ +# Natural Language Toolkit: Expectation Maximization Clusterer +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Trevor Cohn +# URL: +# For license information, see LICENSE.TXT +from __future__ import print_function, unicode_literals + +try: + import numpy +except ImportError: + pass + +from nltk.compat import python_2_unicode_compatible +from nltk.cluster.util import VectorSpaceClusterer + + +@python_2_unicode_compatible +class EMClusterer(VectorSpaceClusterer): + """ + The Gaussian EM clusterer models the vectors as being produced by + a mixture of k Gaussian sources. The parameters of these sources + (prior probability, mean and covariance matrix) are then found to + maximise the likelihood of the given data. This is done with the + expectation maximisation algorithm. It starts with k arbitrarily + chosen means, priors and covariance matrices. It then calculates + the membership probabilities for each vector in each of the + clusters; this is the 'E' step. The cluster parameters are then + updated in the 'M' step using the maximum likelihood estimate from + the cluster membership probabilities. This process continues until + the likelihood of the data does not significantly increase. + """ + + def __init__( + self, + initial_means, + priors=None, + covariance_matrices=None, + conv_threshold=1e-6, + bias=0.1, + normalise=False, + svd_dimensions=None, + ): + """ + Creates an EM clusterer with the given starting parameters, + convergence threshold and vector mangling parameters. + + :param initial_means: the means of the gaussian cluster centers + :type initial_means: [seq of] numpy array or seq of SparseArray + :param priors: the prior probability for each cluster + :type priors: numpy array or seq of float + :param covariance_matrices: the covariance matrix for each cluster + :type covariance_matrices: [seq of] numpy array + :param conv_threshold: maximum change in likelihood before deemed + convergent + :type conv_threshold: int or float + :param bias: variance bias used to ensure non-singular covariance + matrices + :type bias: float + :param normalise: should vectors be normalised to length 1 + :type normalise: boolean + :param svd_dimensions: number of dimensions to use in reducing vector + dimensionsionality with SVD + :type svd_dimensions: int + """ + VectorSpaceClusterer.__init__(self, normalise, svd_dimensions) + self._means = numpy.array(initial_means, numpy.float64) + self._num_clusters = len(initial_means) + self._conv_threshold = conv_threshold + self._covariance_matrices = covariance_matrices + self._priors = priors + self._bias = bias + + def num_clusters(self): + return self._num_clusters + + def cluster_vectorspace(self, vectors, trace=False): + assert len(vectors) > 0 + + # set the parameters to initial values + dimensions = len(vectors[0]) + means = self._means + priors = self._priors + if not priors: + priors = self._priors = ( + numpy.ones(self._num_clusters, numpy.float64) / self._num_clusters + ) + covariances = self._covariance_matrices + if not covariances: + covariances = self._covariance_matrices = [ + numpy.identity(dimensions, numpy.float64) + for i in range(self._num_clusters) + ] + + # do the E and M steps until the likelihood plateaus + lastl = self._loglikelihood(vectors, priors, means, covariances) + converged = False + + while not converged: + if trace: + print('iteration; loglikelihood', lastl) + # E-step, calculate hidden variables, h[i,j] + h = numpy.zeros((len(vectors), self._num_clusters), numpy.float64) + for i in range(len(vectors)): + for j in range(self._num_clusters): + h[i, j] = priors[j] * self._gaussian( + means[j], covariances[j], vectors[i] + ) + h[i, :] /= sum(h[i, :]) + + # M-step, update parameters - cvm, p, mean + for j in range(self._num_clusters): + covariance_before = covariances[j] + new_covariance = numpy.zeros((dimensions, dimensions), numpy.float64) + new_mean = numpy.zeros(dimensions, numpy.float64) + sum_hj = 0.0 + for i in range(len(vectors)): + delta = vectors[i] - means[j] + new_covariance += h[i, j] * numpy.multiply.outer(delta, delta) + sum_hj += h[i, j] + new_mean += h[i, j] * vectors[i] + covariances[j] = new_covariance / sum_hj + means[j] = new_mean / sum_hj + priors[j] = sum_hj / len(vectors) + + # bias term to stop covariance matrix being singular + covariances[j] += self._bias * numpy.identity(dimensions, numpy.float64) + + # calculate likelihood - FIXME: may be broken + l = self._loglikelihood(vectors, priors, means, covariances) + + # check for convergence + if abs(lastl - l) < self._conv_threshold: + converged = True + lastl = l + + def classify_vectorspace(self, vector): + best = None + for j in range(self._num_clusters): + p = self._priors[j] * self._gaussian( + self._means[j], self._covariance_matrices[j], vector + ) + if not best or p > best[0]: + best = (p, j) + return best[1] + + def likelihood_vectorspace(self, vector, cluster): + cid = self.cluster_names().index(cluster) + return self._priors[cluster] * self._gaussian( + self._means[cluster], self._covariance_matrices[cluster], vector + ) + + def _gaussian(self, mean, cvm, x): + m = len(mean) + assert cvm.shape == (m, m), 'bad sized covariance matrix, %s' % str(cvm.shape) + try: + det = numpy.linalg.det(cvm) + inv = numpy.linalg.inv(cvm) + a = det ** -0.5 * (2 * numpy.pi) ** (-m / 2.0) + dx = x - mean + print(dx, inv) + b = -0.5 * numpy.dot(numpy.dot(dx, inv), dx) + return a * numpy.exp(b) + except OverflowError: + # happens when the exponent is negative infinity - i.e. b = 0 + # i.e. the inverse of cvm is huge (cvm is almost zero) + return 0 + + def _loglikelihood(self, vectors, priors, means, covariances): + llh = 0.0 + for vector in vectors: + p = 0 + for j in range(len(priors)): + p += priors[j] * self._gaussian(means[j], covariances[j], vector) + llh += numpy.log(p) + return llh + + def __repr__(self): + return '' % list(self._means) + + +def demo(): + """ + Non-interactive demonstration of the clusterers with simple 2-D data. + """ + + from nltk import cluster + + # example from figure 14.10, page 519, Manning and Schutze + + vectors = [numpy.array(f) for f in [[0.5, 0.5], [1.5, 0.5], [1, 3]]] + means = [[4, 2], [4, 2.01]] + + clusterer = cluster.EMClusterer(means, bias=0.1) + clusters = clusterer.cluster(vectors, True, trace=True) + + print('Clustered:', vectors) + print('As: ', clusters) + print() + + for c in range(2): + print('Cluster:', c) + print('Prior: ', clusterer._priors[c]) + print('Mean: ', clusterer._means[c]) + print('Covar: ', clusterer._covariance_matrices[c]) + print() + + # classify a new vector + vector = numpy.array([2, 2]) + print('classify(%s):' % vector, end=' ') + print(clusterer.classify(vector)) + + # show the classification probabilities + vector = numpy.array([2, 2]) + print('classification_probdist(%s):' % vector) + pdist = clusterer.classification_probdist(vector) + for sample in pdist.samples(): + print('%s => %.0f%%' % (sample, pdist.prob(sample) * 100)) + + +# +# The following demo code is broken. +# +# # use a set of tokens with 2D indices +# vectors = [numpy.array(f) for f in [[3, 3], [1, 2], [4, 2], [4, 0], [2, 3], [3, 1]]] + +# # test the EM clusterer with means given by k-means (2) and +# # dimensionality reduction +# clusterer = cluster.KMeans(2, euclidean_distance, svd_dimensions=1) +# print 'Clusterer:', clusterer +# clusters = clusterer.cluster(vectors) +# means = clusterer.means() +# print 'Means:', clusterer.means() +# print + +# clusterer = cluster.EMClusterer(means, svd_dimensions=1) +# clusters = clusterer.cluster(vectors, True) +# print 'Clusterer:', clusterer +# print 'Clustered:', str(vectors)[:60], '...' +# print 'As:', str(clusters)[:60], '...' +# print + +# # classify a new vector +# vector = numpy.array([3, 3]) +# print 'classify(%s):' % vector, +# print clusterer.classify(vector) +# print + +# # show the classification probabilities +# vector = numpy.array([2.2, 2]) +# print 'classification_probdist(%s)' % vector +# pdist = clusterer.classification_probdist(vector) +# for sample in pdist: +# print '%s => %.0f%%' % (sample, pdist.prob(sample) *100) + +if __name__ == '__main__': + demo() diff --git a/venv.bak/lib/python3.7/site-packages/nltk/cluster/gaac.py b/venv.bak/lib/python3.7/site-packages/nltk/cluster/gaac.py new file mode 100644 index 0000000..06eb30e --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/cluster/gaac.py @@ -0,0 +1,173 @@ +# Natural Language Toolkit: Group Average Agglomerative Clusterer +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Trevor Cohn +# URL: +# For license information, see LICENSE.TXT +from __future__ import print_function, unicode_literals, division + +try: + import numpy +except ImportError: + pass + +from nltk.cluster.util import VectorSpaceClusterer, Dendrogram, cosine_distance +from nltk.compat import python_2_unicode_compatible + + +@python_2_unicode_compatible +class GAAClusterer(VectorSpaceClusterer): + """ + The Group Average Agglomerative starts with each of the N vectors as singleton + clusters. It then iteratively merges pairs of clusters which have the + closest centroids. This continues until there is only one cluster. The + order of merges gives rise to a dendrogram: a tree with the earlier merges + lower than later merges. The membership of a given number of clusters c, 1 + <= c <= N, can be found by cutting the dendrogram at depth c. + + This clusterer uses the cosine similarity metric only, which allows for + efficient speed-up in the clustering process. + """ + + def __init__(self, num_clusters=1, normalise=True, svd_dimensions=None): + VectorSpaceClusterer.__init__(self, normalise, svd_dimensions) + self._num_clusters = num_clusters + self._dendrogram = None + self._groups_values = None + + def cluster(self, vectors, assign_clusters=False, trace=False): + # stores the merge order + self._dendrogram = Dendrogram( + [numpy.array(vector, numpy.float64) for vector in vectors] + ) + return VectorSpaceClusterer.cluster(self, vectors, assign_clusters, trace) + + def cluster_vectorspace(self, vectors, trace=False): + # variables describing the initial situation + N = len(vectors) + cluster_len = [1] * N + cluster_count = N + index_map = numpy.arange(N) + + # construct the similarity matrix + dims = (N, N) + dist = numpy.ones(dims, dtype=numpy.float) * numpy.inf + for i in range(N): + for j in range(i + 1, N): + dist[i, j] = cosine_distance(vectors[i], vectors[j]) + + while cluster_count > max(self._num_clusters, 1): + i, j = numpy.unravel_index(dist.argmin(), dims) + if trace: + print("merging %d and %d" % (i, j)) + + # update similarities for merging i and j + self._merge_similarities(dist, cluster_len, i, j) + + # remove j + dist[:, j] = numpy.inf + dist[j, :] = numpy.inf + + # merge the clusters + cluster_len[i] = cluster_len[i] + cluster_len[j] + self._dendrogram.merge(index_map[i], index_map[j]) + cluster_count -= 1 + + # update the index map to reflect the indexes if we + # had removed j + index_map[j + 1 :] -= 1 + index_map[j] = N + + self.update_clusters(self._num_clusters) + + def _merge_similarities(self, dist, cluster_len, i, j): + # the new cluster i merged from i and j adopts the average of + # i and j's similarity to each other cluster, weighted by the + # number of points in the clusters i and j + i_weight = cluster_len[i] + j_weight = cluster_len[j] + weight_sum = i_weight + j_weight + + # update for x 0 + if self._should_normalise: + centroid = self._normalise(cluster[0]) + else: + centroid = numpy.array(cluster[0]) + for vector in cluster[1:]: + if self._should_normalise: + centroid += self._normalise(vector) + else: + centroid += vector + centroid /= len(cluster) + self._centroids.append(centroid) + self._num_clusters = len(self._centroids) + + def classify_vectorspace(self, vector): + best = None + for i in range(self._num_clusters): + centroid = self._centroids[i] + dist = cosine_distance(vector, centroid) + if not best or dist < best[0]: + best = (dist, i) + return best[1] + + def dendrogram(self): + """ + :return: The dendrogram representing the current clustering + :rtype: Dendrogram + """ + return self._dendrogram + + def num_clusters(self): + return self._num_clusters + + def __repr__(self): + return '' % self._num_clusters + + +def demo(): + """ + Non-interactive demonstration of the clusterers with simple 2-D data. + """ + + from nltk.cluster import GAAClusterer + + # use a set of tokens with 2D indices + vectors = [numpy.array(f) for f in [[3, 3], [1, 2], [4, 2], [4, 0], [2, 3], [3, 1]]] + + # test the GAAC clusterer with 4 clusters + clusterer = GAAClusterer(4) + clusters = clusterer.cluster(vectors, True) + + print('Clusterer:', clusterer) + print('Clustered:', vectors) + print('As:', clusters) + print() + + # show the dendrogram + clusterer.dendrogram().show() + + # classify a new vector + vector = numpy.array([3, 3]) + print('classify(%s):' % vector, end=' ') + print(clusterer.classify(vector)) + print() + + +if __name__ == '__main__': + demo() diff --git a/venv.bak/lib/python3.7/site-packages/nltk/cluster/kmeans.py b/venv.bak/lib/python3.7/site-packages/nltk/cluster/kmeans.py new file mode 100644 index 0000000..bfe1604 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/cluster/kmeans.py @@ -0,0 +1,234 @@ +# Natural Language Toolkit: K-Means Clusterer +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Trevor Cohn +# URL: +# For license information, see LICENSE.TXT +from __future__ import print_function, unicode_literals, division + +import copy +import random +import sys + +try: + import numpy +except ImportError: + pass + + +from nltk.cluster.util import VectorSpaceClusterer +from nltk.compat import python_2_unicode_compatible + + +@python_2_unicode_compatible +class KMeansClusterer(VectorSpaceClusterer): + """ + The K-means clusterer starts with k arbitrary chosen means then allocates + each vector to the cluster with the closest mean. It then recalculates the + means of each cluster as the centroid of the vectors in the cluster. This + process repeats until the cluster memberships stabilise. This is a + hill-climbing algorithm which may converge to a local maximum. Hence the + clustering is often repeated with random initial means and the most + commonly occurring output means are chosen. + """ + + def __init__( + self, + num_means, + distance, + repeats=1, + conv_test=1e-6, + initial_means=None, + normalise=False, + svd_dimensions=None, + rng=None, + avoid_empty_clusters=False, + ): + + """ + :param num_means: the number of means to use (may use fewer) + :type num_means: int + :param distance: measure of distance between two vectors + :type distance: function taking two vectors and returing a float + :param repeats: number of randomised clustering trials to use + :type repeats: int + :param conv_test: maximum variation in mean differences before + deemed convergent + :type conv_test: number + :param initial_means: set of k initial means + :type initial_means: sequence of vectors + :param normalise: should vectors be normalised to length 1 + :type normalise: boolean + :param svd_dimensions: number of dimensions to use in reducing vector + dimensionsionality with SVD + :type svd_dimensions: int + :param rng: random number generator (or None) + :type rng: Random + :param avoid_empty_clusters: include current centroid in computation + of next one; avoids undefined behavior + when clusters become empty + :type avoid_empty_clusters: boolean + """ + VectorSpaceClusterer.__init__(self, normalise, svd_dimensions) + self._num_means = num_means + self._distance = distance + self._max_difference = conv_test + assert not initial_means or len(initial_means) == num_means + self._means = initial_means + assert repeats >= 1 + assert not (initial_means and repeats > 1) + self._repeats = repeats + self._rng = rng if rng else random.Random() + self._avoid_empty_clusters = avoid_empty_clusters + + def cluster_vectorspace(self, vectors, trace=False): + if self._means and self._repeats > 1: + print('Warning: means will be discarded for subsequent trials') + + meanss = [] + for trial in range(self._repeats): + if trace: + print('k-means trial', trial) + if not self._means or trial > 1: + self._means = self._rng.sample(list(vectors), self._num_means) + self._cluster_vectorspace(vectors, trace) + meanss.append(self._means) + + if len(meanss) > 1: + # sort the means first (so that different cluster numbering won't + # effect the distance comparison) + for means in meanss: + means.sort(key=sum) + + # find the set of means that's minimally different from the others + min_difference = min_means = None + for i in range(len(meanss)): + d = 0 + for j in range(len(meanss)): + if i != j: + d += self._sum_distances(meanss[i], meanss[j]) + if min_difference is None or d < min_difference: + min_difference, min_means = d, meanss[i] + + # use the best means + self._means = min_means + + def _cluster_vectorspace(self, vectors, trace=False): + if self._num_means < len(vectors): + # perform k-means clustering + converged = False + while not converged: + # assign the tokens to clusters based on minimum distance to + # the cluster means + clusters = [[] for m in range(self._num_means)] + for vector in vectors: + index = self.classify_vectorspace(vector) + clusters[index].append(vector) + + if trace: + print('iteration') + # for i in range(self._num_means): + # print ' mean', i, 'allocated', len(clusters[i]), 'vectors' + + # recalculate cluster means by computing the centroid of each cluster + new_means = list(map(self._centroid, clusters, self._means)) + + # measure the degree of change from the previous step for convergence + difference = self._sum_distances(self._means, new_means) + if difference < self._max_difference: + converged = True + + # remember the new means + self._means = new_means + + def classify_vectorspace(self, vector): + # finds the closest cluster centroid + # returns that cluster's index + best_distance = best_index = None + for index in range(len(self._means)): + mean = self._means[index] + dist = self._distance(vector, mean) + if best_distance is None or dist < best_distance: + best_index, best_distance = index, dist + return best_index + + def num_clusters(self): + if self._means: + return len(self._means) + else: + return self._num_means + + def means(self): + """ + The means used for clustering. + """ + return self._means + + def _sum_distances(self, vectors1, vectors2): + difference = 0.0 + for u, v in zip(vectors1, vectors2): + difference += self._distance(u, v) + return difference + + def _centroid(self, cluster, mean): + if self._avoid_empty_clusters: + centroid = copy.copy(mean) + for vector in cluster: + centroid += vector + return centroid / (1 + len(cluster)) + else: + if not len(cluster): + sys.stderr.write('Error: no centroid defined for empty cluster.\n') + sys.stderr.write( + 'Try setting argument \'avoid_empty_clusters\' to True\n' + ) + assert False + centroid = copy.copy(cluster[0]) + for vector in cluster[1:]: + centroid += vector + return centroid / len(cluster) + + def __repr__(self): + return '' % (self._means, self._repeats) + + +################################################################################# + + +def demo(): + # example from figure 14.9, page 517, Manning and Schutze + + from nltk.cluster import KMeansClusterer, euclidean_distance + + vectors = [numpy.array(f) for f in [[2, 1], [1, 3], [4, 7], [6, 7]]] + means = [[4, 3], [5, 5]] + + clusterer = KMeansClusterer(2, euclidean_distance, initial_means=means) + clusters = clusterer.cluster(vectors, True, trace=True) + + print('Clustered:', vectors) + print('As:', clusters) + print('Means:', clusterer.means()) + print() + + vectors = [numpy.array(f) for f in [[3, 3], [1, 2], [4, 2], [4, 0], [2, 3], [3, 1]]] + + # test k-means using the euclidean distance metric, 2 means and repeat + # clustering 10 times with random seeds + + clusterer = KMeansClusterer(2, euclidean_distance, repeats=10) + clusters = clusterer.cluster(vectors, True) + print('Clustered:', vectors) + print('As:', clusters) + print('Means:', clusterer.means()) + print() + + # classify a new vector + vector = numpy.array([3, 3]) + print('classify(%s):' % vector, end=' ') + print(clusterer.classify(vector)) + print() + + +if __name__ == '__main__': + demo() diff --git a/venv.bak/lib/python3.7/site-packages/nltk/cluster/util.py b/venv.bak/lib/python3.7/site-packages/nltk/cluster/util.py new file mode 100644 index 0000000..a3576e7 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/cluster/util.py @@ -0,0 +1,303 @@ +# Natural Language Toolkit: Clusterer Utilities +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Trevor Cohn +# Contributor: J Richard Snape +# URL: +# For license information, see LICENSE.TXT +from __future__ import print_function, unicode_literals, division +from abc import abstractmethod + +import copy +from sys import stdout +from math import sqrt + +try: + import numpy +except ImportError: + pass + +from nltk.cluster.api import ClusterI +from nltk.compat import python_2_unicode_compatible + + +class VectorSpaceClusterer(ClusterI): + """ + Abstract clusterer which takes tokens and maps them into a vector space. + Optionally performs singular value decomposition to reduce the + dimensionality. + """ + + def __init__(self, normalise=False, svd_dimensions=None): + """ + :param normalise: should vectors be normalised to length 1 + :type normalise: boolean + :param svd_dimensions: number of dimensions to use in reducing vector + dimensionsionality with SVD + :type svd_dimensions: int + """ + self._Tt = None + self._should_normalise = normalise + self._svd_dimensions = svd_dimensions + + def cluster(self, vectors, assign_clusters=False, trace=False): + assert len(vectors) > 0 + + # normalise the vectors + if self._should_normalise: + vectors = list(map(self._normalise, vectors)) + + # use SVD to reduce the dimensionality + if self._svd_dimensions and self._svd_dimensions < len(vectors[0]): + [u, d, vt] = numpy.linalg.svd(numpy.transpose(numpy.array(vectors))) + S = d[: self._svd_dimensions] * numpy.identity( + self._svd_dimensions, numpy.float64 + ) + T = u[:, : self._svd_dimensions] + Dt = vt[: self._svd_dimensions, :] + vectors = numpy.transpose(numpy.dot(S, Dt)) + self._Tt = numpy.transpose(T) + + # call abstract method to cluster the vectors + self.cluster_vectorspace(vectors, trace) + + # assign the vectors to clusters + if assign_clusters: + return [self.classify(vector) for vector in vectors] + + @abstractmethod + def cluster_vectorspace(self, vectors, trace): + """ + Finds the clusters using the given set of vectors. + """ + + def classify(self, vector): + if self._should_normalise: + vector = self._normalise(vector) + if self._Tt is not None: + vector = numpy.dot(self._Tt, vector) + cluster = self.classify_vectorspace(vector) + return self.cluster_name(cluster) + + @abstractmethod + def classify_vectorspace(self, vector): + """ + Returns the index of the appropriate cluster for the vector. + """ + + def likelihood(self, vector, label): + if self._should_normalise: + vector = self._normalise(vector) + if self._Tt is not None: + vector = numpy.dot(self._Tt, vector) + return self.likelihood_vectorspace(vector, label) + + def likelihood_vectorspace(self, vector, cluster): + """ + Returns the likelihood of the vector belonging to the cluster. + """ + predicted = self.classify_vectorspace(vector) + return 1.0 if cluster == predicted else 0.0 + + def vector(self, vector): + """ + Returns the vector after normalisation and dimensionality reduction + """ + if self._should_normalise: + vector = self._normalise(vector) + if self._Tt is not None: + vector = numpy.dot(self._Tt, vector) + return vector + + def _normalise(self, vector): + """ + Normalises the vector to unit length. + """ + return vector / sqrt(numpy.dot(vector, vector)) + + +def euclidean_distance(u, v): + """ + Returns the euclidean distance between vectors u and v. This is equivalent + to the length of the vector (u - v). + """ + diff = u - v + return sqrt(numpy.dot(diff, diff)) + + +def cosine_distance(u, v): + """ + Returns 1 minus the cosine of the angle between vectors v and u. This is + equal to 1 - (u.v / |u||v|). + """ + return 1 - (numpy.dot(u, v) / (sqrt(numpy.dot(u, u)) * sqrt(numpy.dot(v, v)))) + + +class _DendrogramNode(object): + """ Tree node of a dendrogram. """ + + def __init__(self, value, *children): + self._value = value + self._children = children + + def leaves(self, values=True): + if self._children: + leaves = [] + for child in self._children: + leaves.extend(child.leaves(values)) + return leaves + elif values: + return [self._value] + else: + return [self] + + def groups(self, n): + queue = [(self._value, self)] + + while len(queue) < n: + priority, node = queue.pop() + if not node._children: + queue.push((priority, node)) + break + for child in node._children: + if child._children: + queue.append((child._value, child)) + else: + queue.append((0, child)) + # makes the earliest merges at the start, latest at the end + queue.sort() + + groups = [] + for priority, node in queue: + groups.append(node.leaves()) + return groups + + def __lt__(self, comparator): + return cosine_distance(self._value, comparator._value) < 0 + + +@python_2_unicode_compatible +class Dendrogram(object): + """ + Represents a dendrogram, a tree with a specified branching order. This + must be initialised with the leaf items, then iteratively call merge for + each branch. This class constructs a tree representing the order of calls + to the merge function. + """ + + def __init__(self, items=[]): + """ + :param items: the items at the leaves of the dendrogram + :type items: sequence of (any) + """ + self._items = [_DendrogramNode(item) for item in items] + self._original_items = copy.copy(self._items) + self._merge = 1 + + def merge(self, *indices): + """ + Merges nodes at given indices in the dendrogram. The nodes will be + combined which then replaces the first node specified. All other nodes + involved in the merge will be removed. + + :param indices: indices of the items to merge (at least two) + :type indices: seq of int + """ + assert len(indices) >= 2 + node = _DendrogramNode(self._merge, *[self._items[i] for i in indices]) + self._merge += 1 + self._items[indices[0]] = node + for i in indices[1:]: + del self._items[i] + + def groups(self, n): + """ + Finds the n-groups of items (leaves) reachable from a cut at depth n. + :param n: number of groups + :type n: int + """ + if len(self._items) > 1: + root = _DendrogramNode(self._merge, *self._items) + else: + root = self._items[0] + return root.groups(n) + + def show(self, leaf_labels=[]): + """ + Print the dendrogram in ASCII art to standard out. + :param leaf_labels: an optional list of strings to use for labeling the + leaves + :type leaf_labels: list + """ + + # ASCII rendering characters + JOIN, HLINK, VLINK = '+', '-', '|' + + # find the root (or create one) + if len(self._items) > 1: + root = _DendrogramNode(self._merge, *self._items) + else: + root = self._items[0] + leaves = self._original_items + + if leaf_labels: + last_row = leaf_labels + else: + last_row = ["%s" % leaf._value for leaf in leaves] + + # find the bottom row and the best cell width + width = max(map(len, last_row)) + 1 + lhalf = width // 2 + rhalf = int(width - lhalf - 1) + + # display functions + def format(centre, left=' ', right=' '): + return '%s%s%s' % (lhalf * left, centre, right * rhalf) + + def display(str): + stdout.write(str) + + # for each merge, top down + queue = [(root._value, root)] + verticals = [format(' ') for leaf in leaves] + while queue: + priority, node = queue.pop() + child_left_leaf = list(map(lambda c: c.leaves(False)[0], node._children)) + indices = list(map(leaves.index, child_left_leaf)) + if child_left_leaf: + min_idx = min(indices) + max_idx = max(indices) + for i in range(len(leaves)): + if leaves[i] in child_left_leaf: + if i == min_idx: + display(format(JOIN, ' ', HLINK)) + elif i == max_idx: + display(format(JOIN, HLINK, ' ')) + else: + display(format(JOIN, HLINK, HLINK)) + verticals[i] = format(VLINK) + elif min_idx <= i <= max_idx: + display(format(HLINK, HLINK, HLINK)) + else: + display(verticals[i]) + display('\n') + for child in node._children: + if child._children: + queue.append((child._value, child)) + queue.sort() + + for vertical in verticals: + display(vertical) + display('\n') + + # finally, display the last line + display(''.join(item.center(width) for item in last_row)) + display('\n') + + def __repr__(self): + if len(self._items) > 1: + root = _DendrogramNode(self._merge, *self._items) + else: + root = self._items[0] + leaves = root.leaves(False) + return '' % len(leaves) diff --git a/venv.bak/lib/python3.7/site-packages/nltk/collections.py b/venv.bak/lib/python3.7/site-packages/nltk/collections.py new file mode 100644 index 0000000..882e15c --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/collections.py @@ -0,0 +1,665 @@ +# Natural Language Toolkit: Collections +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Steven Bird +# URL: +# For license information, see LICENSE.TXT +from __future__ import print_function, absolute_import + +import bisect +from itertools import islice, chain +from functools import total_ordering +# this unused import is for python 2.7 +from collections import defaultdict, deque, Counter + +from six import text_type + +from nltk.internals import slice_bounds, raise_unorderable_types +from nltk.compat import python_2_unicode_compatible + + +########################################################################## +# Ordered Dictionary +########################################################################## + + +class OrderedDict(dict): + def __init__(self, data=None, **kwargs): + self._keys = self.keys(data, kwargs.get('keys')) + self._default_factory = kwargs.get('default_factory') + if data is None: + dict.__init__(self) + else: + dict.__init__(self, data) + + def __delitem__(self, key): + dict.__delitem__(self, key) + self._keys.remove(key) + + def __getitem__(self, key): + try: + return dict.__getitem__(self, key) + except KeyError: + return self.__missing__(key) + + def __iter__(self): + return (key for key in self.keys()) + + def __missing__(self, key): + if not self._default_factory and key not in self._keys: + raise KeyError() + return self._default_factory() + + def __setitem__(self, key, item): + dict.__setitem__(self, key, item) + if key not in self._keys: + self._keys.append(key) + + def clear(self): + dict.clear(self) + self._keys.clear() + + def copy(self): + d = dict.copy(self) + d._keys = self._keys + return d + + def items(self): + # returns iterator under python 3 and list under python 2 + return zip(self.keys(), self.values()) + + def keys(self, data=None, keys=None): + if data: + if keys: + assert isinstance(keys, list) + assert len(data) == len(keys) + return keys + else: + assert ( + isinstance(data, dict) + or isinstance(data, OrderedDict) + or isinstance(data, list) + ) + if isinstance(data, dict) or isinstance(data, OrderedDict): + return data.keys() + elif isinstance(data, list): + return [key for (key, value) in data] + elif '_keys' in self.__dict__: + return self._keys + else: + return [] + + def popitem(self): + if not self._keys: + raise KeyError() + + key = self._keys.pop() + value = self[key] + del self[key] + return (key, value) + + def setdefault(self, key, failobj=None): + dict.setdefault(self, key, failobj) + if key not in self._keys: + self._keys.append(key) + + def update(self, data): + dict.update(self, data) + for key in self.keys(data): + if key not in self._keys: + self._keys.append(key) + + def values(self): + # returns iterator under python 3 + return map(self.get, self._keys) + + +###################################################################### +# Lazy Sequences +###################################################################### + + +@total_ordering +@python_2_unicode_compatible +class AbstractLazySequence(object): + """ + An abstract base class for read-only sequences whose values are + computed as needed. Lazy sequences act like tuples -- they can be + indexed, sliced, and iterated over; but they may not be modified. + + The most common application of lazy sequences in NLTK is for + corpus view objects, which provide access to the contents of a + corpus without loading the entire corpus into memory, by loading + pieces of the corpus from disk as needed. + + The result of modifying a mutable element of a lazy sequence is + undefined. In particular, the modifications made to the element + may or may not persist, depending on whether and when the lazy + sequence caches that element's value or reconstructs it from + scratch. + + Subclasses are required to define two methods: ``__len__()`` + and ``iterate_from()``. + """ + + def __len__(self): + """ + Return the number of tokens in the corpus file underlying this + corpus view. + """ + raise NotImplementedError('should be implemented by subclass') + + def iterate_from(self, start): + """ + Return an iterator that generates the tokens in the corpus + file underlying this corpus view, starting at the token number + ``start``. If ``start>=len(self)``, then this iterator will + generate no tokens. + """ + raise NotImplementedError('should be implemented by subclass') + + def __getitem__(self, i): + """ + Return the *i* th token in the corpus file underlying this + corpus view. Negative indices and spans are both supported. + """ + if isinstance(i, slice): + start, stop = slice_bounds(self, i) + return LazySubsequence(self, start, stop) + else: + # Handle negative indices + if i < 0: + i += len(self) + if i < 0: + raise IndexError('index out of range') + # Use iterate_from to extract it. + try: + return next(self.iterate_from(i)) + except StopIteration: + raise IndexError('index out of range') + + def __iter__(self): + """Return an iterator that generates the tokens in the corpus + file underlying this corpus view.""" + return self.iterate_from(0) + + def count(self, value): + """Return the number of times this list contains ``value``.""" + return sum(1 for elt in self if elt == value) + + def index(self, value, start=None, stop=None): + """Return the index of the first occurrence of ``value`` in this + list that is greater than or equal to ``start`` and less than + ``stop``. Negative start and stop values are treated like negative + slice bounds -- i.e., they count from the end of the list.""" + start, stop = slice_bounds(self, slice(start, stop)) + for i, elt in enumerate(islice(self, start, stop)): + if elt == value: + return i + start + raise ValueError('index(x): x not in list') + + def __contains__(self, value): + """Return true if this list contains ``value``.""" + return bool(self.count(value)) + + def __add__(self, other): + """Return a list concatenating self with other.""" + return LazyConcatenation([self, other]) + + def __radd__(self, other): + """Return a list concatenating other with self.""" + return LazyConcatenation([other, self]) + + def __mul__(self, count): + """Return a list concatenating self with itself ``count`` times.""" + return LazyConcatenation([self] * count) + + def __rmul__(self, count): + """Return a list concatenating self with itself ``count`` times.""" + return LazyConcatenation([self] * count) + + _MAX_REPR_SIZE = 60 + + def __repr__(self): + """ + Return a string representation for this corpus view that is + similar to a list's representation; but if it would be more + than 60 characters long, it is truncated. + """ + pieces = [] + length = 5 + for elt in self: + pieces.append(repr(elt)) + length += len(pieces[-1]) + 2 + if length > self._MAX_REPR_SIZE and len(pieces) > 2: + return '[%s, ...]' % text_type(', ').join(pieces[:-1]) + return '[%s]' % text_type(', ').join(pieces) + + def __eq__(self, other): + return type(self) == type(other) and list(self) == list(other) + + def __ne__(self, other): + return not self == other + + def __lt__(self, other): + if type(other) != type(self): + raise_unorderable_types("<", self, other) + return list(self) < list(other) + + def __hash__(self): + """ + :raise ValueError: Corpus view objects are unhashable. + """ + raise ValueError('%s objects are unhashable' % self.__class__.__name__) + + +class LazySubsequence(AbstractLazySequence): + """ + A subsequence produced by slicing a lazy sequence. This slice + keeps a reference to its source sequence, and generates its values + by looking them up in the source sequence. + """ + + MIN_SIZE = 100 + """ + The minimum size for which lazy slices should be created. If + ``LazySubsequence()`` is called with a subsequence that is + shorter than ``MIN_SIZE``, then a tuple will be returned instead. + """ + + def __new__(cls, source, start, stop): + """ + Construct a new slice from a given underlying sequence. The + ``start`` and ``stop`` indices should be absolute indices -- + i.e., they should not be negative (for indexing from the back + of a list) or greater than the length of ``source``. + """ + # If the slice is small enough, just use a tuple. + if stop - start < cls.MIN_SIZE: + return list(islice(source.iterate_from(start), stop - start)) + else: + return object.__new__(cls) + + def __init__(self, source, start, stop): + self._source = source + self._start = start + self._stop = stop + + def __len__(self): + return self._stop - self._start + + def iterate_from(self, start): + return islice( + self._source.iterate_from(start + self._start), max(0, len(self) - start) + ) + + +class LazyConcatenation(AbstractLazySequence): + """ + A lazy sequence formed by concatenating a list of lists. This + underlying list of lists may itself be lazy. ``LazyConcatenation`` + maintains an index that it uses to keep track of the relationship + between offsets in the concatenated lists and offsets in the + sublists. + """ + + def __init__(self, list_of_lists): + self._list = list_of_lists + self._offsets = [0] + + def __len__(self): + if len(self._offsets) <= len(self._list): + for tok in self.iterate_from(self._offsets[-1]): + pass + return self._offsets[-1] + + def iterate_from(self, start_index): + if start_index < self._offsets[-1]: + sublist_index = bisect.bisect_right(self._offsets, start_index) - 1 + else: + sublist_index = len(self._offsets) - 1 + + index = self._offsets[sublist_index] + + # Construct an iterator over the sublists. + if isinstance(self._list, AbstractLazySequence): + sublist_iter = self._list.iterate_from(sublist_index) + else: + sublist_iter = islice(self._list, sublist_index, None) + + for sublist in sublist_iter: + if sublist_index == (len(self._offsets) - 1): + assert ( + index + len(sublist) >= self._offsets[-1] + ), 'offests not monotonic increasing!' + self._offsets.append(index + len(sublist)) + else: + assert self._offsets[sublist_index + 1] == index + len( + sublist + ), 'inconsistent list value (num elts)' + + for value in sublist[max(0, start_index - index) :]: + yield value + + index += len(sublist) + sublist_index += 1 + + +class LazyMap(AbstractLazySequence): + """ + A lazy sequence whose elements are formed by applying a given + function to each element in one or more underlying lists. The + function is applied lazily -- i.e., when you read a value from the + list, ``LazyMap`` will calculate that value by applying its + function to the underlying lists' value(s). ``LazyMap`` is + essentially a lazy version of the Python primitive function + ``map``. In particular, the following two expressions are + equivalent: + + >>> from nltk.collections import LazyMap + >>> function = str + >>> sequence = [1,2,3] + >>> map(function, sequence) # doctest: +SKIP + ['1', '2', '3'] + >>> list(LazyMap(function, sequence)) + ['1', '2', '3'] + + Like the Python ``map`` primitive, if the source lists do not have + equal size, then the value None will be supplied for the + 'missing' elements. + + Lazy maps can be useful for conserving memory, in cases where + individual values take up a lot of space. This is especially true + if the underlying list's values are constructed lazily, as is the + case with many corpus readers. + + A typical example of a use case for this class is performing + feature detection on the tokens in a corpus. Since featuresets + are encoded as dictionaries, which can take up a lot of memory, + using a ``LazyMap`` can significantly reduce memory usage when + training and running classifiers. + """ + + def __init__(self, function, *lists, **config): + """ + :param function: The function that should be applied to + elements of ``lists``. It should take as many arguments + as there are ``lists``. + :param lists: The underlying lists. + :param cache_size: Determines the size of the cache used + by this lazy map. (default=5) + """ + if not lists: + raise TypeError('LazyMap requires at least two args') + + self._lists = lists + self._func = function + self._cache_size = config.get('cache_size', 5) + self._cache = {} if self._cache_size > 0 else None + + # If you just take bool() of sum() here _all_lazy will be true just + # in case n >= 1 list is an AbstractLazySequence. Presumably this + # isn't what's intended. + self._all_lazy = sum( + isinstance(lst, AbstractLazySequence) for lst in lists + ) == len(lists) + + def iterate_from(self, index): + # Special case: one lazy sublist + if len(self._lists) == 1 and self._all_lazy: + for value in self._lists[0].iterate_from(index): + yield self._func(value) + return + + # Special case: one non-lazy sublist + elif len(self._lists) == 1: + while True: + try: + yield self._func(self._lists[0][index]) + except IndexError: + return + index += 1 + + # Special case: n lazy sublists + elif self._all_lazy: + iterators = [lst.iterate_from(index) for lst in self._lists] + while True: + elements = [] + for iterator in iterators: + try: + elements.append(next(iterator)) + except: # FIXME: What is this except really catching? StopIteration? + elements.append(None) + if elements == [None] * len(self._lists): + return + yield self._func(*elements) + index += 1 + + # general case + else: + while True: + try: + elements = [lst[index] for lst in self._lists] + except IndexError: + elements = [None] * len(self._lists) + for i, lst in enumerate(self._lists): + try: + elements[i] = lst[index] + except IndexError: + pass + if elements == [None] * len(self._lists): + return + yield self._func(*elements) + index += 1 + + def __getitem__(self, index): + if isinstance(index, slice): + sliced_lists = [lst[index] for lst in self._lists] + return LazyMap(self._func, *sliced_lists) + else: + # Handle negative indices + if index < 0: + index += len(self) + if index < 0: + raise IndexError('index out of range') + # Check the cache + if self._cache is not None and index in self._cache: + return self._cache[index] + # Calculate the value + try: + val = next(self.iterate_from(index)) + except StopIteration: + raise IndexError('index out of range') + # Update the cache + if self._cache is not None: + if len(self._cache) > self._cache_size: + self._cache.popitem() # discard random entry + self._cache[index] = val + # Return the value + return val + + def __len__(self): + return max(len(lst) for lst in self._lists) + + +class LazyZip(LazyMap): + """ + A lazy sequence whose elements are tuples, each containing the i-th + element from each of the argument sequences. The returned list is + truncated in length to the length of the shortest argument sequence. The + tuples are constructed lazily -- i.e., when you read a value from the + list, ``LazyZip`` will calculate that value by forming a tuple from + the i-th element of each of the argument sequences. + + ``LazyZip`` is essentially a lazy version of the Python primitive function + ``zip``. In particular, an evaluated LazyZip is equivalent to a zip: + + >>> from nltk.collections import LazyZip + >>> sequence1, sequence2 = [1, 2, 3], ['a', 'b', 'c'] + >>> zip(sequence1, sequence2) # doctest: +SKIP + [(1, 'a'), (2, 'b'), (3, 'c')] + >>> list(LazyZip(sequence1, sequence2)) + [(1, 'a'), (2, 'b'), (3, 'c')] + >>> sequences = [sequence1, sequence2, [6,7,8,9]] + >>> list(zip(*sequences)) == list(LazyZip(*sequences)) + True + + Lazy zips can be useful for conserving memory in cases where the argument + sequences are particularly long. + + A typical example of a use case for this class is combining long sequences + of gold standard and predicted values in a classification or tagging task + in order to calculate accuracy. By constructing tuples lazily and + avoiding the creation of an additional long sequence, memory usage can be + significantly reduced. + """ + + def __init__(self, *lists): + """ + :param lists: the underlying lists + :type lists: list(list) + """ + LazyMap.__init__(self, lambda *elts: elts, *lists) + + def iterate_from(self, index): + iterator = LazyMap.iterate_from(self, index) + while index < len(self): + yield next(iterator) + index += 1 + return + + def __len__(self): + return min(len(lst) for lst in self._lists) + + +class LazyEnumerate(LazyZip): + """ + A lazy sequence whose elements are tuples, each ontaining a count (from + zero) and a value yielded by underlying sequence. ``LazyEnumerate`` is + useful for obtaining an indexed list. The tuples are constructed lazily + -- i.e., when you read a value from the list, ``LazyEnumerate`` will + calculate that value by forming a tuple from the count of the i-th + element and the i-th element of the underlying sequence. + + ``LazyEnumerate`` is essentially a lazy version of the Python primitive + function ``enumerate``. In particular, the following two expressions are + equivalent: + + >>> from nltk.collections import LazyEnumerate + >>> sequence = ['first', 'second', 'third'] + >>> list(enumerate(sequence)) + [(0, 'first'), (1, 'second'), (2, 'third')] + >>> list(LazyEnumerate(sequence)) + [(0, 'first'), (1, 'second'), (2, 'third')] + + Lazy enumerations can be useful for conserving memory in cases where the + argument sequences are particularly long. + + A typical example of a use case for this class is obtaining an indexed + list for a long sequence of values. By constructing tuples lazily and + avoiding the creation of an additional long sequence, memory usage can be + significantly reduced. + """ + + def __init__(self, lst): + """ + :param lst: the underlying list + :type lst: list + """ + LazyZip.__init__(self, range(len(lst)), lst) + + +class LazyIteratorList(AbstractLazySequence): + """ + Wraps an iterator, loading its elements on demand + and making them subscriptable. + __repr__ displays only the first few elements. + """ + + def __init__(self, it, known_len=None): + self._it = it + self._len = known_len + self._cache = [] + + def __len__(self): + if self._len: + return self._len + for x in self.iterate_from(len(self._cache)): + pass + self._len = len(self._cache) + return self._len + + def iterate_from(self, start): + """Create a new iterator over this list starting at the given offset.""" + while len(self._cache) < start: + v = next(self._it) + self._cache.append(v) + i = start + while i < len(self._cache): + yield self._cache[i] + i += 1 + while True: + v = next(self._it) + self._cache.append(v) + yield v + i += 1 + + def __add__(self, other): + """Return a list concatenating self with other.""" + return type(self)(chain(self, other)) + + def __radd__(self, other): + """Return a list concatenating other with self.""" + return type(self)(chain(other, self)) + + +###################################################################### +# Trie Implementation +###################################################################### +class Trie(dict): + """A Trie implementation for strings""" + + LEAF = True + + def __init__(self, strings=None): + """Builds a Trie object, which is built around a ``dict`` + + If ``strings`` is provided, it will add the ``strings``, which + consist of a ``list`` of ``strings``, to the Trie. + Otherwise, it'll construct an empty Trie. + + :param strings: List of strings to insert into the trie + (Default is ``None``) + :type strings: list(str) + + """ + super(Trie, self).__init__() + if strings: + for string in strings: + self.insert(string) + + def insert(self, string): + """Inserts ``string`` into the Trie + + :param string: String to insert into the trie + :type string: str + + :Example: + + >>> from nltk.collections import Trie + >>> trie = Trie(["abc", "def"]) + >>> expected = {'a': {'b': {'c': {True: None}}}, \ + 'd': {'e': {'f': {True: None}}}} + >>> trie == expected + True + + """ + if len(string): + self[string[0]].insert(string[1:]) + else: + # mark the string is complete + self[Trie.LEAF] = None + + def __missing__(self, key): + self[key] = Trie() + return self[key] diff --git a/venv.bak/lib/python3.7/site-packages/nltk/collocations.py b/venv.bak/lib/python3.7/site-packages/nltk/collocations.py new file mode 100644 index 0000000..a3676fa --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/collocations.py @@ -0,0 +1,408 @@ +# Natural Language Toolkit: Collocations and Association Measures +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Joel Nothman +# URL: +# For license information, see LICENSE.TXT +# +""" +Tools to identify collocations --- words that often appear consecutively +--- within corpora. They may also be used to find other associations between +word occurrences. +See Manning and Schutze ch. 5 at http://nlp.stanford.edu/fsnlp/promo/colloc.pdf +and the Text::NSP Perl package at http://ngram.sourceforge.net + +Finding collocations requires first calculating the frequencies of words and +their appearance in the context of other words. Often the collection of words +will then requiring filtering to only retain useful content terms. Each ngram +of words may then be scored according to some association measure, in order +to determine the relative likelihood of each ngram being a collocation. + +The ``BigramCollocationFinder`` and ``TrigramCollocationFinder`` classes provide +these functionalities, dependent on being provided a function which scores a +ngram given appropriate frequency counts. A number of standard association +measures are provided in bigram_measures and trigram_measures. +""" +from __future__ import print_function + +# Possible TODOs: +# - consider the distinction between f(x,_) and f(x) and whether our +# approximation is good enough for fragmented data, and mention it +# - add a n-gram collocation finder with measures which only utilise n-gram +# and unigram counts (raw_freq, pmi, student_t) + +import itertools as _itertools +from six import iteritems + +from nltk.probability import FreqDist +from nltk.util import ngrams +# these two unused imports are referenced in collocations.doctest +from nltk.metrics import ContingencyMeasures, BigramAssocMeasures, TrigramAssocMeasures, QuadgramAssocMeasures +from nltk.metrics.spearman import ranks_from_scores, spearman_correlation + + +class AbstractCollocationFinder(object): + """ + An abstract base class for collocation finders whose purpose is to + collect collocation candidate frequencies, filter and rank them. + + As a minimum, collocation finders require the frequencies of each + word in a corpus, and the joint frequency of word tuples. This data + should be provided through nltk.probability.FreqDist objects or an + identical interface. + """ + + def __init__(self, word_fd, ngram_fd): + self.word_fd = word_fd + self.N = word_fd.N() + self.ngram_fd = ngram_fd + + @classmethod + def _build_new_documents( + cls, documents, window_size, pad_left=False, pad_right=False, pad_symbol=None + ): + ''' + Pad the document with the place holder according to the window_size + ''' + padding = (pad_symbol,) * (window_size - 1) + if pad_right: + return _itertools.chain.from_iterable( + _itertools.chain(doc, padding) for doc in documents + ) + if pad_left: + return _itertools.chain.from_iterable( + _itertools.chain(padding, doc) for doc in documents + ) + + @classmethod + def from_documents(cls, documents): + """Constructs a collocation finder given a collection of documents, + each of which is a list (or iterable) of tokens. + """ + # return cls.from_words(_itertools.chain(*documents)) + return cls.from_words( + cls._build_new_documents(documents, cls.default_ws, pad_right=True) + ) + + @staticmethod + def _ngram_freqdist(words, n): + return FreqDist(tuple(words[i : i + n]) for i in range(len(words) - 1)) + + def _apply_filter(self, fn=lambda ngram, freq: False): + """Generic filter removes ngrams from the frequency distribution + if the function returns True when passed an ngram tuple. + """ + tmp_ngram = FreqDist() + for ngram, freq in iteritems(self.ngram_fd): + if not fn(ngram, freq): + tmp_ngram[ngram] = freq + self.ngram_fd = tmp_ngram + + def apply_freq_filter(self, min_freq): + """Removes candidate ngrams which have frequency less than min_freq.""" + self._apply_filter(lambda ng, freq: freq < min_freq) + + def apply_ngram_filter(self, fn): + """Removes candidate ngrams (w1, w2, ...) where fn(w1, w2, ...) + evaluates to True. + """ + self._apply_filter(lambda ng, f: fn(*ng)) + + def apply_word_filter(self, fn): + """Removes candidate ngrams (w1, w2, ...) where any of (fn(w1), fn(w2), + ...) evaluates to True. + """ + self._apply_filter(lambda ng, f: any(fn(w) for w in ng)) + + def _score_ngrams(self, score_fn): + """Generates of (ngram, score) pairs as determined by the scoring + function provided. + """ + for tup in self.ngram_fd: + score = self.score_ngram(score_fn, *tup) + if score is not None: + yield tup, score + + def score_ngrams(self, score_fn): + """Returns a sequence of (ngram, score) pairs ordered from highest to + lowest score, as determined by the scoring function provided. + """ + return sorted(self._score_ngrams(score_fn), key=lambda t: (-t[1], t[0])) + + def nbest(self, score_fn, n): + """Returns the top n ngrams when scored by the given function.""" + return [p for p, s in self.score_ngrams(score_fn)[:n]] + + def above_score(self, score_fn, min_score): + """Returns a sequence of ngrams, ordered by decreasing score, whose + scores each exceed the given minimum score. + """ + for ngram, score in self.score_ngrams(score_fn): + if score > min_score: + yield ngram + else: + break + + +class BigramCollocationFinder(AbstractCollocationFinder): + """A tool for the finding and ranking of bigram collocations or other + association measures. It is often useful to use from_words() rather than + constructing an instance directly. + """ + + default_ws = 2 + + def __init__(self, word_fd, bigram_fd, window_size=2): + """Construct a BigramCollocationFinder, given FreqDists for + appearances of words and (possibly non-contiguous) bigrams. + """ + AbstractCollocationFinder.__init__(self, word_fd, bigram_fd) + self.window_size = window_size + + @classmethod + def from_words(cls, words, window_size=2): + """Construct a BigramCollocationFinder for all bigrams in the given + sequence. When window_size > 2, count non-contiguous bigrams, in the + style of Church and Hanks's (1990) association ratio. + """ + wfd = FreqDist() + bfd = FreqDist() + + if window_size < 2: + raise ValueError("Specify window_size at least 2") + + for window in ngrams(words, window_size, pad_right=True): + w1 = window[0] + if w1 is None: + continue + wfd[w1] += 1 + for w2 in window[1:]: + if w2 is not None: + bfd[(w1, w2)] += 1 + return cls(wfd, bfd, window_size=window_size) + + def score_ngram(self, score_fn, w1, w2): + """Returns the score for a given bigram using the given scoring + function. Following Church and Hanks (1990), counts are scaled by + a factor of 1/(window_size - 1). + """ + n_all = self.N + n_ii = self.ngram_fd[(w1, w2)] / (self.window_size - 1.0) + if not n_ii: + return + n_ix = self.word_fd[w1] + n_xi = self.word_fd[w2] + return score_fn(n_ii, (n_ix, n_xi), n_all) + + +class TrigramCollocationFinder(AbstractCollocationFinder): + """A tool for the finding and ranking of trigram collocations or other + association measures. It is often useful to use from_words() rather than + constructing an instance directly. + """ + + default_ws = 3 + + def __init__(self, word_fd, bigram_fd, wildcard_fd, trigram_fd): + """Construct a TrigramCollocationFinder, given FreqDists for + appearances of words, bigrams, two words with any word between them, + and trigrams. + """ + AbstractCollocationFinder.__init__(self, word_fd, trigram_fd) + self.wildcard_fd = wildcard_fd + self.bigram_fd = bigram_fd + + @classmethod + def from_words(cls, words, window_size=3): + """Construct a TrigramCollocationFinder for all trigrams in the given + sequence. + """ + if window_size < 3: + raise ValueError("Specify window_size at least 3") + + wfd = FreqDist() + wildfd = FreqDist() + bfd = FreqDist() + tfd = FreqDist() + for window in ngrams(words, window_size, pad_right=True): + w1 = window[0] + if w1 is None: + continue + for w2, w3 in _itertools.combinations(window[1:], 2): + wfd[w1] += 1 + if w2 is None: + continue + bfd[(w1, w2)] += 1 + if w3 is None: + continue + wildfd[(w1, w3)] += 1 + tfd[(w1, w2, w3)] += 1 + return cls(wfd, bfd, wildfd, tfd) + + def bigram_finder(self): + """Constructs a bigram collocation finder with the bigram and unigram + data from this finder. Note that this does not include any filtering + applied to this finder. + """ + return BigramCollocationFinder(self.word_fd, self.bigram_fd) + + def score_ngram(self, score_fn, w1, w2, w3): + """Returns the score for a given trigram using the given scoring + function. + """ + n_all = self.N + n_iii = self.ngram_fd[(w1, w2, w3)] + if not n_iii: + return + n_iix = self.bigram_fd[(w1, w2)] + n_ixi = self.wildcard_fd[(w1, w3)] + n_xii = self.bigram_fd[(w2, w3)] + n_ixx = self.word_fd[w1] + n_xix = self.word_fd[w2] + n_xxi = self.word_fd[w3] + return score_fn(n_iii, (n_iix, n_ixi, n_xii), (n_ixx, n_xix, n_xxi), n_all) + + +class QuadgramCollocationFinder(AbstractCollocationFinder): + """A tool for the finding and ranking of quadgram collocations or other association measures. + It is often useful to use from_words() rather than constructing an instance directly. + """ + + default_ws = 4 + + def __init__(self, word_fd, quadgram_fd, ii, iii, ixi, ixxi, iixi, ixii): + """Construct a QuadgramCollocationFinder, given FreqDists for appearances of words, + bigrams, trigrams, two words with one word and two words between them, three words + with a word between them in both variations. + """ + AbstractCollocationFinder.__init__(self, word_fd, quadgram_fd) + self.iii = iii + self.ii = ii + self.ixi = ixi + self.ixxi = ixxi + self.iixi = iixi + self.ixii = ixii + + @classmethod + def from_words(cls, words, window_size=4): + if window_size < 4: + raise ValueError("Specify window_size at least 4") + ixxx = FreqDist() + iiii = FreqDist() + ii = FreqDist() + iii = FreqDist() + ixi = FreqDist() + ixxi = FreqDist() + iixi = FreqDist() + ixii = FreqDist() + + for window in ngrams(words, window_size, pad_right=True): + w1 = window[0] + if w1 is None: + continue + for w2, w3, w4 in _itertools.combinations(window[1:], 3): + ixxx[w1] += 1 + if w2 is None: + continue + ii[(w1, w2)] += 1 + if w3 is None: + continue + iii[(w1, w2, w3)] += 1 + ixi[(w1, w3)] += 1 + if w4 is None: + continue + iiii[(w1, w2, w3, w4)] += 1 + ixxi[(w1, w4)] += 1 + ixii[(w1, w3, w4)] += 1 + iixi[(w1, w2, w4)] += 1 + + return cls(ixxx, iiii, ii, iii, ixi, ixxi, iixi, ixii) + + def score_ngram(self, score_fn, w1, w2, w3, w4): + n_all = self.N + n_iiii = self.ngram_fd[(w1, w2, w3, w4)] + if not n_iiii: + return + n_iiix = self.iii[(w1, w2, w3)] + n_xiii = self.iii[(w2, w3, w4)] + n_iixi = self.iixi[(w1, w2, w4)] + n_ixii = self.ixii[(w1, w3, w4)] + + n_iixx = self.ii[(w1, w2)] + n_xxii = self.ii[(w3, w4)] + n_xiix = self.ii[(w2, w3)] + n_ixix = self.ixi[(w1, w3)] + n_ixxi = self.ixxi[(w1, w4)] + n_xixi = self.ixi[(w2, w4)] + + n_ixxx = self.word_fd[w1] + n_xixx = self.word_fd[w2] + n_xxix = self.word_fd[w3] + n_xxxi = self.word_fd[w4] + return score_fn( + n_iiii, + (n_iiix, n_iixi, n_ixii, n_xiii), + (n_iixx, n_ixix, n_ixxi, n_xixi, n_xxii, n_xiix), + (n_ixxx, n_xixx, n_xxix, n_xxxi), + n_all, + ) + + +def demo(scorer=None, compare_scorer=None): + """Finds bigram collocations in the files of the WebText corpus.""" + from nltk.metrics import ( + BigramAssocMeasures, + spearman_correlation, + ranks_from_scores, + ) + + if scorer is None: + scorer = BigramAssocMeasures.likelihood_ratio + if compare_scorer is None: + compare_scorer = BigramAssocMeasures.raw_freq + + from nltk.corpus import stopwords, webtext + + ignored_words = stopwords.words('english') + word_filter = lambda w: len(w) < 3 or w.lower() in ignored_words + + for file in webtext.fileids(): + words = [word.lower() for word in webtext.words(file)] + + cf = BigramCollocationFinder.from_words(words) + cf.apply_freq_filter(3) + cf.apply_word_filter(word_filter) + + corr = spearman_correlation( + ranks_from_scores(cf.score_ngrams(scorer)), + ranks_from_scores(cf.score_ngrams(compare_scorer)), + ) + print(file) + print('\t', [' '.join(tup) for tup in cf.nbest(scorer, 15)]) + print('\t Correlation to %s: %0.4f' % (compare_scorer.__name__, corr)) + + +# Slows down loading too much +# bigram_measures = BigramAssocMeasures() +# trigram_measures = TrigramAssocMeasures() + +if __name__ == '__main__': + import sys + from nltk.metrics import BigramAssocMeasures + + try: + scorer = eval('BigramAssocMeasures.' + sys.argv[1]) + except IndexError: + scorer = None + try: + compare_scorer = eval('BigramAssocMeasures.' + sys.argv[2]) + except IndexError: + compare_scorer = None + + demo(scorer, compare_scorer) + +__all__ = [ + 'BigramCollocationFinder', + 'TrigramCollocationFinder', + 'QuadgramCollocationFinder', +] diff --git a/venv.bak/lib/python3.7/site-packages/nltk/compat.py b/venv.bak/lib/python3.7/site-packages/nltk/compat.py new file mode 100644 index 0000000..fef28a6 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/compat.py @@ -0,0 +1,373 @@ +# -*- coding: utf-8 -*- +# Natural Language Toolkit: Compatibility +# +# Copyright (C) 2001-2019 NLTK Project +# +# URL: +# For license information, see LICENSE.TXT + +from __future__ import absolute_import, print_function +import os +import sys +from functools import update_wrapper, wraps +import fractions +import unicodedata + +from six import string_types, text_type + +# Python 2/3 compatibility layer. Based on six. + +PY3 = sys.version_info[0] == 3 + +if PY3: + + def get_im_class(meth): + return meth.__self__.__class__ + + import io + + StringIO = io.StringIO + BytesIO = io.BytesIO + + from datetime import timezone + + UTC = timezone.utc + + from tempfile import TemporaryDirectory + +else: + + def get_im_class(meth): + return meth.im_class + + try: + from cStringIO import StringIO + except ImportError: + from StringIO import StringIO + BytesIO = StringIO + + from datetime import tzinfo, timedelta + + ZERO = timedelta(0) + HOUR = timedelta(hours=1) + + # A UTC class for python 2.7 + class UTC(tzinfo): + """UTC""" + + def utcoffset(self, dt): + return ZERO + + def tzname(self, dt): + return "UTC" + + def dst(self, dt): + return ZERO + + UTC = UTC() + + import csv + import codecs + import cStringIO + + class UnicodeWriter: + """ + A CSV writer which will write rows to CSV file "f", + which is encoded in the given encoding. + see https://docs.python.org/2/library/csv.html + """ + + def __init__( + self, f, dialect=csv.excel, encoding="utf-8", errors='replace', **kwds + ): + # Redirect output to a queue + self.queue = cStringIO.StringIO() + self.writer = csv.writer(self.queue, dialect=dialect, **kwds) + self.stream = f + encoder_cls = codecs.getincrementalencoder(encoding) + self.encoder = encoder_cls(errors=errors) + + def encode(self, data): + if isinstance(data, string_types): + return data.encode("utf-8") + else: + return data + + def writerow(self, row): + self.writer.writerow([self.encode(s) for s in row]) + # Fetch UTF-8 output from the queue ... + data = self.queue.getvalue() + data = data.decode("utf-8") + # ... and reencode it into the target encoding + data = self.encoder.encode(data, 'replace') + # write to the target stream + self.stream.write(data) + # empty queue + self.queue.truncate(0) + + import warnings as _warnings + import os as _os + from tempfile import mkdtemp + + class TemporaryDirectory(object): + """Create and return a temporary directory. This has the same + behavior as mkdtemp but can be used as a context manager. For + example: + + with TemporaryDirectory() as tmpdir: + ... + + Upon exiting the context, the directory and everything contained + in it are removed. + + http://stackoverflow.com/questions/19296146/tempfile-temporarydirectory-context-manager-in-python-2-7 + """ + + def __init__(self, suffix="", prefix="tmp", dir=None): + self._closed = False + self.name = None # Handle mkdtemp raising an exception + self.name = mkdtemp(suffix, prefix, dir) + + def __repr__(self): + return "<{} {!r}>".format(self.__class__.__name__, self.name) + + def __enter__(self): + return self.name + + def cleanup(self, _warn=False): + if self.name and not self._closed: + try: + self._rmtree(self.name) + except (TypeError, AttributeError) as ex: + # Issue #10188: Emit a warning on stderr + # if the directory could not be cleaned + # up due to missing globals + if "None" not in str(ex): + raise + print( + "ERROR: {!r} while cleaning up {!r}".format(ex, self), + file=sys.stderr, + ) + return + self._closed = True + if _warn: + self._warn("Implicitly cleaning up {!r}".format(self), Warning) + + def __exit__(self, exc, value, tb): + self.cleanup() + + def __del__(self): + # Issue a Warning if implicit cleanup needed + self.cleanup(_warn=True) + + # XXX (ncoghlan): The following code attempts to make + # this class tolerant of the module nulling out process + # that happens during CPython interpreter shutdown + # Alas, it doesn't actually manage it. See issue #10188 + _listdir = staticmethod(_os.listdir) + _path_join = staticmethod(_os.path.join) + _isdir = staticmethod(_os.path.isdir) + _islink = staticmethod(_os.path.islink) + _remove = staticmethod(_os.remove) + _rmdir = staticmethod(_os.rmdir) + _warn = _warnings.warn + + def _rmtree(self, path): + # Essentially a stripped down version of shutil.rmtree. We can't + # use globals because they may be None'ed out at shutdown. + for name in self._listdir(path): + fullname = self._path_join(path, name) + try: + isdir = self._isdir(fullname) and not self._islink(fullname) + except OSError: + isdir = False + if isdir: + self._rmtree(fullname) + else: + try: + self._remove(fullname) + except OSError: + pass + try: + self._rmdir(path) + except OSError: + pass + + +# ======= Compatibility for datasets that care about Python versions ======== + +# The following datasets have a /PY3 subdirectory containing +# a full copy of the data which has been re-encoded or repickled. +DATA_UPDATES = [ + ("chunkers", "maxent_ne_chunker"), + ("help", "tagsets"), + ("taggers", "maxent_treebank_pos_tagger"), + ("tokenizers", "punkt"), +] + +_PY3_DATA_UPDATES = [os.path.join(*path_list) for path_list in DATA_UPDATES] + + +def add_py3_data(path): + if PY3: + for item in _PY3_DATA_UPDATES: + if item in str(path) and "/PY3" not in str(path): + pos = path.index(item) + len(item) + if path[pos : pos + 4] == ".zip": + pos += 4 + path = path[:pos] + "/PY3" + path[pos:] + break + return path + + +# for use in adding /PY3 to the second (filename) argument +# of the file pointers in data.py +def py3_data(init_func): + def _decorator(*args, **kwargs): + args = (args[0], add_py3_data(args[1])) + args[2:] + return init_func(*args, **kwargs) + + return wraps(init_func)(_decorator) + + +# ======= Compatibility layer for __str__ and __repr__ ========== +def remove_accents(text): + + if isinstance(text, bytes): + text = text.decode('ascii') + + category = unicodedata.category # this gives a small (~10%) speedup + return ''.join( + c for c in unicodedata.normalize('NFKD', text) if category(c) != 'Mn' + ) + + +# Select the best transliteration method: +try: + # Older versions of Unidecode are licensed under Artistic License; + # assume an older version is installed. + from unidecode import unidecode as transliterate +except ImportError: + try: + # text-unidecode implementation is worse than Unidecode + # implementation so Unidecode is preferred. + from text_unidecode import unidecode as transliterate + except ImportError: + # This transliteration method should be enough + # for many Western languages. + transliterate = remove_accents + + +def python_2_unicode_compatible(klass): + """ + This decorator defines __unicode__ method and fixes + __repr__ and __str__ methods under Python 2. + + To support Python 2 and 3 with a single code base, + define __str__ and __repr__ methods returning unicode + text and apply this decorator to the class. + + Original __repr__ and __str__ would be available + as unicode_repr and __unicode__ (under both Python 2 + and Python 3). + """ + + if not issubclass(klass, object): + raise ValueError("This decorator doesn't work for old-style classes") + + # both __unicode__ and unicode_repr are public because they + # may be useful in console under Python 2.x + + # if __str__ or __repr__ are not overriden in a subclass, + # they may be already fixed by this decorator in a parent class + # and we shouldn't them again + + if not _was_fixed(klass.__str__): + klass.__unicode__ = klass.__str__ + if not PY3: + klass.__str__ = _7bit(_transliterated(klass.__unicode__)) + + if not _was_fixed(klass.__repr__): + klass.unicode_repr = klass.__repr__ + if not PY3: + klass.__repr__ = _7bit(klass.unicode_repr) + + return klass + + +def unicode_repr(obj): + """ + For classes that was fixed with @python_2_unicode_compatible + ``unicode_repr`` returns ``obj.unicode_repr()``; for unicode strings + the result is returned without "u" letter (to make output the + same under Python 2.x and Python 3.x); for other variables + it is the same as ``repr``. + """ + if PY3: + return repr(obj) + + # Python 2.x + if hasattr(obj, 'unicode_repr'): + return obj.unicode_repr() + + if isinstance(obj, text_type): + return repr(obj)[1:] # strip "u" letter from output + + return repr(obj) + + +def _transliterated(method): + def wrapper(self): + return transliterate(method(self)) + + update_wrapper(wrapper, method, ["__name__", "__doc__"]) + if hasattr(method, "_nltk_compat_7bit"): + wrapper._nltk_compat_7bit = method._nltk_compat_7bit + + wrapper._nltk_compat_transliterated = True + return wrapper + + +def _7bit(method): + def wrapper(self): + return method(self).encode('ascii', 'backslashreplace') + + update_wrapper(wrapper, method, ["__name__", "__doc__"]) + + if hasattr(method, "_nltk_compat_transliterated"): + wrapper._nltk_compat_transliterated = method._nltk_compat_transliterated + + wrapper._nltk_compat_7bit = True + return wrapper + + +def _was_fixed(method): + return getattr(method, "_nltk_compat_7bit", False) or getattr( + method, "_nltk_compat_transliterated", False + ) + + +class Fraction(fractions.Fraction): + """ + This is a simplified backwards compatible version of fractions.Fraction + from Python >=3.5. It adds the `_normalize` parameter such that it does + not normalize the denominator to the Greatest Common Divisor (gcd) when + the numerator is 0. + + This is most probably only used by the nltk.translate.bleu_score.py where + numerator and denominator of the different ngram precisions are mutable. + But the idea of "mutable" fraction might not be applicable to other usages, + See http://stackoverflow.com/questions/34561265 + + This objects should be deprecated once NLTK stops supporting Python < 3.5 + See https://github.com/nltk/nltk/issues/1330 + """ + + def __new__(cls, numerator=0, denominator=None, _normalize=True): + cls = super(Fraction, cls).__new__(cls, numerator, denominator) + # To emulate fraction.Fraction.from_float across Python >=2.7, + # check that numerator is an integer and denominator is not None. + if not _normalize and type(numerator) == int and denominator: + cls._numerator = numerator + cls._denominator = denominator + return cls diff --git a/venv.bak/lib/python3.7/site-packages/nltk/corpus/__init__.py b/venv.bak/lib/python3.7/site-packages/nltk/corpus/__init__.py new file mode 100644 index 0000000..54b4d36 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/corpus/__init__.py @@ -0,0 +1,493 @@ +# Natural Language Toolkit: Corpus Readers +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Edward Loper +# URL: +# For license information, see LICENSE.TXT + +# TODO this docstring isn't up-to-date! +""" +NLTK corpus readers. The modules in this package provide functions +that can be used to read corpus files in a variety of formats. These +functions can be used to read both the corpus files that are +distributed in the NLTK corpus package, and corpus files that are part +of external corpora. + +Available Corpora +================= + +Please see http://www.nltk.org/nltk_data/ for a complete list. +Install corpora using nltk.download(). + +Corpus Reader Functions +======================= +Each corpus module defines one or more "corpus reader functions", +which can be used to read documents from that corpus. These functions +take an argument, ``item``, which is used to indicate which document +should be read from the corpus: + +- If ``item`` is one of the unique identifiers listed in the corpus + module's ``items`` variable, then the corresponding document will + be loaded from the NLTK corpus package. +- If ``item`` is a filename, then that file will be read. + +Additionally, corpus reader functions can be given lists of item +names; in which case, they will return a concatenation of the +corresponding documents. + +Corpus reader functions are named based on the type of information +they return. Some common examples, and their return types, are: + +- words(): list of str +- sents(): list of (list of str) +- paras(): list of (list of (list of str)) +- tagged_words(): list of (str,str) tuple +- tagged_sents(): list of (list of (str,str)) +- tagged_paras(): list of (list of (list of (str,str))) +- chunked_sents(): list of (Tree w/ (str,str) leaves) +- parsed_sents(): list of (Tree with str leaves) +- parsed_paras(): list of (list of (Tree with str leaves)) +- xml(): A single xml ElementTree +- raw(): unprocessed corpus contents + +For example, to read a list of the words in the Brown Corpus, use +``nltk.corpus.brown.words()``: + + >>> from nltk.corpus import brown + >>> print(", ".join(brown.words())) + The, Fulton, County, Grand, Jury, said, ... + +""" + +import re + +from nltk.tokenize import RegexpTokenizer +from nltk.corpus.util import LazyCorpusLoader +from nltk.corpus.reader import * + +abc = LazyCorpusLoader( + 'abc', + PlaintextCorpusReader, + r'(?!\.).*\.txt', + encoding=[('science', 'latin_1'), ('rural', 'utf8')], +) +alpino = LazyCorpusLoader('alpino', AlpinoCorpusReader, tagset='alpino') +brown = LazyCorpusLoader( + 'brown', + CategorizedTaggedCorpusReader, + r'c[a-z]\d\d', + cat_file='cats.txt', + tagset='brown', + encoding="ascii", +) +cess_cat = LazyCorpusLoader( + 'cess_cat', + BracketParseCorpusReader, + r'(?!\.).*\.tbf', + tagset='unknown', + encoding='ISO-8859-15', +) +cess_esp = LazyCorpusLoader( + 'cess_esp', + BracketParseCorpusReader, + r'(?!\.).*\.tbf', + tagset='unknown', + encoding='ISO-8859-15', +) +cmudict = LazyCorpusLoader('cmudict', CMUDictCorpusReader, ['cmudict']) +comtrans = LazyCorpusLoader('comtrans', AlignedCorpusReader, r'(?!\.).*\.txt') +comparative_sentences = LazyCorpusLoader( + 'comparative_sentences', + ComparativeSentencesCorpusReader, + r'labeledSentences\.txt', + encoding='latin-1', +) +conll2000 = LazyCorpusLoader( + 'conll2000', + ConllChunkCorpusReader, + ['train.txt', 'test.txt'], + ('NP', 'VP', 'PP'), + tagset='wsj', + encoding='ascii', +) +conll2002 = LazyCorpusLoader( + 'conll2002', + ConllChunkCorpusReader, + '.*\.(test|train).*', + ('LOC', 'PER', 'ORG', 'MISC'), + encoding='utf-8', +) +conll2007 = LazyCorpusLoader( + 'conll2007', + DependencyCorpusReader, + '.*\.(test|train).*', + encoding=[('eus', 'ISO-8859-2'), ('esp', 'utf8')], +) +crubadan = LazyCorpusLoader('crubadan', CrubadanCorpusReader, '.*\.txt') +dependency_treebank = LazyCorpusLoader( + 'dependency_treebank', DependencyCorpusReader, '.*\.dp', encoding='ascii' +) +floresta = LazyCorpusLoader( + 'floresta', + BracketParseCorpusReader, + r'(?!\.).*\.ptb', + '#', + tagset='unknown', + encoding='ISO-8859-15', +) +framenet15 = LazyCorpusLoader( + 'framenet_v15', + FramenetCorpusReader, + [ + 'frRelation.xml', + 'frameIndex.xml', + 'fulltextIndex.xml', + 'luIndex.xml', + 'semTypes.xml', + ], +) +framenet = LazyCorpusLoader( + 'framenet_v17', + FramenetCorpusReader, + [ + 'frRelation.xml', + 'frameIndex.xml', + 'fulltextIndex.xml', + 'luIndex.xml', + 'semTypes.xml', + ], +) +gazetteers = LazyCorpusLoader( + 'gazetteers', WordListCorpusReader, r'(?!LICENSE|\.).*\.txt', encoding='ISO-8859-2' +) +genesis = LazyCorpusLoader( + 'genesis', + PlaintextCorpusReader, + r'(?!\.).*\.txt', + encoding=[ + ('finnish|french|german', 'latin_1'), + ('swedish', 'cp865'), + ('.*', 'utf_8'), + ], +) +gutenberg = LazyCorpusLoader( + 'gutenberg', PlaintextCorpusReader, r'(?!\.).*\.txt', encoding='latin1' +) +ieer = LazyCorpusLoader('ieer', IEERCorpusReader, r'(?!README|\.).*') +inaugural = LazyCorpusLoader( + 'inaugural', PlaintextCorpusReader, r'(?!\.).*\.txt', encoding='latin1' +) +# [XX] This should probably just use TaggedCorpusReader: +indian = LazyCorpusLoader( + 'indian', IndianCorpusReader, r'(?!\.).*\.pos', tagset='unknown', encoding='utf8' +) + +jeita = LazyCorpusLoader('jeita', ChasenCorpusReader, r'.*\.chasen', encoding='utf-8') +knbc = LazyCorpusLoader('knbc/corpus1', KNBCorpusReader, r'.*/KN.*', encoding='euc-jp') +lin_thesaurus = LazyCorpusLoader('lin_thesaurus', LinThesaurusCorpusReader, r'.*\.lsp') +mac_morpho = LazyCorpusLoader( + 'mac_morpho', + MacMorphoCorpusReader, + r'(?!\.).*\.txt', + tagset='unknown', + encoding='latin-1', +) +machado = LazyCorpusLoader( + 'machado', + PortugueseCategorizedPlaintextCorpusReader, + r'(?!\.).*\.txt', + cat_pattern=r'([a-z]*)/.*', + encoding='latin-1', +) +masc_tagged = LazyCorpusLoader( + 'masc_tagged', + CategorizedTaggedCorpusReader, + r'(spoken|written)/.*\.txt', + cat_file='categories.txt', + tagset='wsj', + encoding="utf-8", + sep="_", +) +movie_reviews = LazyCorpusLoader( + 'movie_reviews', + CategorizedPlaintextCorpusReader, + r'(?!\.).*\.txt', + cat_pattern=r'(neg|pos)/.*', + encoding='ascii', +) +multext_east = LazyCorpusLoader( + 'mte_teip5', MTECorpusReader, r'(oana).*\.xml', encoding="utf-8" +) +names = LazyCorpusLoader( + 'names', WordListCorpusReader, r'(?!\.).*\.txt', encoding='ascii' +) +nps_chat = LazyCorpusLoader( + 'nps_chat', NPSChatCorpusReader, r'(?!README|\.).*\.xml', tagset='wsj' +) +opinion_lexicon = LazyCorpusLoader( + 'opinion_lexicon', + OpinionLexiconCorpusReader, + r'(\w+)\-words\.txt', + encoding='ISO-8859-2', +) +ppattach = LazyCorpusLoader( + 'ppattach', PPAttachmentCorpusReader, ['training', 'test', 'devset'] +) +product_reviews_1 = LazyCorpusLoader( + 'product_reviews_1', ReviewsCorpusReader, r'^(?!Readme).*\.txt', encoding='utf8' +) +product_reviews_2 = LazyCorpusLoader( + 'product_reviews_2', ReviewsCorpusReader, r'^(?!Readme).*\.txt', encoding='utf8' +) +pros_cons = LazyCorpusLoader( + 'pros_cons', + ProsConsCorpusReader, + r'Integrated(Cons|Pros)\.txt', + cat_pattern=r'Integrated(Cons|Pros)\.txt', + encoding='ISO-8859-2', +) +ptb = LazyCorpusLoader( # Penn Treebank v3: WSJ and Brown portions + 'ptb', + CategorizedBracketParseCorpusReader, + r'(WSJ/\d\d/WSJ_\d\d|BROWN/C[A-Z]/C[A-Z])\d\d.MRG', + cat_file='allcats.txt', + tagset='wsj', +) +qc = LazyCorpusLoader( + 'qc', StringCategoryCorpusReader, ['train.txt', 'test.txt'], encoding='ISO-8859-2' +) +reuters = LazyCorpusLoader( + 'reuters', + CategorizedPlaintextCorpusReader, + '(training|test).*', + cat_file='cats.txt', + encoding='ISO-8859-2', +) +rte = LazyCorpusLoader('rte', RTECorpusReader, r'(?!\.).*\.xml') +senseval = LazyCorpusLoader('senseval', SensevalCorpusReader, r'(?!\.).*\.pos') +sentence_polarity = LazyCorpusLoader( + 'sentence_polarity', + CategorizedSentencesCorpusReader, + r'rt-polarity\.(neg|pos)', + cat_pattern=r'rt-polarity\.(neg|pos)', + encoding='utf-8', +) +sentiwordnet = LazyCorpusLoader( + 'sentiwordnet', SentiWordNetCorpusReader, 'SentiWordNet_3.0.0.txt', encoding='utf-8' +) +shakespeare = LazyCorpusLoader('shakespeare', XMLCorpusReader, r'(?!\.).*\.xml') +sinica_treebank = LazyCorpusLoader( + 'sinica_treebank', + SinicaTreebankCorpusReader, + ['parsed'], + tagset='unknown', + encoding='utf-8', +) +state_union = LazyCorpusLoader( + 'state_union', PlaintextCorpusReader, r'(?!\.).*\.txt', encoding='ISO-8859-2' +) +stopwords = LazyCorpusLoader( + 'stopwords', WordListCorpusReader, r'(?!README|\.).*', encoding='utf8' +) +subjectivity = LazyCorpusLoader( + 'subjectivity', + CategorizedSentencesCorpusReader, + r'(quote.tok.gt9|plot.tok.gt9)\.5000', + cat_map={'quote.tok.gt9.5000': ['subj'], 'plot.tok.gt9.5000': ['obj']}, + encoding='latin-1', +) +swadesh = LazyCorpusLoader( + 'swadesh', SwadeshCorpusReader, r'(?!README|\.).*', encoding='utf8' +) +swadesh110 = LazyCorpusLoader( + 'panlex_swadesh', PanlexSwadeshCorpusReader, r'swadesh110/.*\.txt', encoding='utf8' +) +swadesh207 = LazyCorpusLoader( + 'panlex_swadesh', PanlexSwadeshCorpusReader, r'swadesh207/.*\.txt', encoding='utf8' +) +switchboard = LazyCorpusLoader('switchboard', SwitchboardCorpusReader, tagset='wsj') +timit = LazyCorpusLoader('timit', TimitCorpusReader) +timit_tagged = LazyCorpusLoader( + 'timit', TimitTaggedCorpusReader, '.+\.tags', tagset='wsj', encoding='ascii' +) +toolbox = LazyCorpusLoader( + 'toolbox', ToolboxCorpusReader, r'(?!.*(README|\.)).*\.(dic|txt)' +) +treebank = LazyCorpusLoader( + 'treebank/combined', + BracketParseCorpusReader, + r'wsj_.*\.mrg', + tagset='wsj', + encoding='ascii', +) +treebank_chunk = LazyCorpusLoader( + 'treebank/tagged', + ChunkedCorpusReader, + r'wsj_.*\.pos', + sent_tokenizer=RegexpTokenizer(r'(?<=/\.)\s*(?![^\[]*\])', gaps=True), + para_block_reader=tagged_treebank_para_block_reader, + tagset='wsj', + encoding='ascii', +) +treebank_raw = LazyCorpusLoader( + 'treebank/raw', PlaintextCorpusReader, r'wsj_.*', encoding='ISO-8859-2' +) +twitter_samples = LazyCorpusLoader('twitter_samples', TwitterCorpusReader, '.*\.json') +udhr = LazyCorpusLoader('udhr', UdhrCorpusReader) +udhr2 = LazyCorpusLoader('udhr2', PlaintextCorpusReader, r'.*\.txt', encoding='utf8') +universal_treebanks = LazyCorpusLoader( + 'universal_treebanks_v20', + ConllCorpusReader, + r'.*\.conll', + columntypes=( + 'ignore', + 'words', + 'ignore', + 'ignore', + 'pos', + 'ignore', + 'ignore', + 'ignore', + 'ignore', + 'ignore', + ), +) +verbnet = LazyCorpusLoader('verbnet', VerbnetCorpusReader, r'(?!\.).*\.xml') +webtext = LazyCorpusLoader( + 'webtext', PlaintextCorpusReader, r'(?!README|\.).*\.txt', encoding='ISO-8859-2' +) +wordnet = LazyCorpusLoader( + 'wordnet', + WordNetCorpusReader, + LazyCorpusLoader('omw', CorpusReader, r'.*/wn-data-.*\.tab', encoding='utf8'), +) +wordnet_ic = LazyCorpusLoader('wordnet_ic', WordNetICCorpusReader, '.*\.dat') +words = LazyCorpusLoader( + 'words', WordListCorpusReader, r'(?!README|\.).*', encoding='ascii' +) + +# defined after treebank +propbank = LazyCorpusLoader( + 'propbank', + PropbankCorpusReader, + 'prop.txt', + 'frames/.*\.xml', + 'verbs.txt', + lambda filename: re.sub(r'^wsj/\d\d/', '', filename), + treebank, +) # Must be defined *after* treebank corpus. +nombank = LazyCorpusLoader( + 'nombank.1.0', + NombankCorpusReader, + 'nombank.1.0', + 'frames/.*\.xml', + 'nombank.1.0.words', + lambda filename: re.sub(r'^wsj/\d\d/', '', filename), + treebank, +) # Must be defined *after* treebank corpus. +propbank_ptb = LazyCorpusLoader( + 'propbank', + PropbankCorpusReader, + 'prop.txt', + 'frames/.*\.xml', + 'verbs.txt', + lambda filename: filename.upper(), + ptb, +) # Must be defined *after* ptb corpus. +nombank_ptb = LazyCorpusLoader( + 'nombank.1.0', + NombankCorpusReader, + 'nombank.1.0', + 'frames/.*\.xml', + 'nombank.1.0.words', + lambda filename: filename.upper(), + ptb, +) # Must be defined *after* ptb corpus. +semcor = LazyCorpusLoader( + 'semcor', SemcorCorpusReader, r'brown./tagfiles/br-.*\.xml', wordnet +) # Must be defined *after* wordnet corpus. + +nonbreaking_prefixes = LazyCorpusLoader( + 'nonbreaking_prefixes', + NonbreakingPrefixesCorpusReader, + r'(?!README|\.).*', + encoding='utf8', +) +perluniprops = LazyCorpusLoader( + 'perluniprops', + UnicharsCorpusReader, + r'(?!README|\.).*', + nltk_data_subdir='misc', + encoding='utf8', +) + +# mwa_ppdb = LazyCorpusLoader( +# 'mwa_ppdb', MWAPPDBCorpusReader, r'(?!README|\.).*', nltk_data_subdir='misc', encoding='utf8') + +# See https://github.com/nltk/nltk/issues/1579 +# and https://github.com/nltk/nltk/issues/1716 +# +# pl196x = LazyCorpusLoader( +# 'pl196x', Pl196xCorpusReader, r'[a-z]-.*\.xml', +# cat_file='cats.txt', textid_file='textids.txt', encoding='utf8') +# +# ipipan = LazyCorpusLoader( +# 'ipipan', IPIPANCorpusReader, r'(?!\.).*morph\.xml') +# +# nkjp = LazyCorpusLoader( +# 'nkjp', NKJPCorpusReader, r'', encoding='utf8') +# +# panlex_lite = LazyCorpusLoader( +# 'panlex_lite', PanLexLiteCorpusReader) +# +# ycoe = LazyCorpusLoader( +# 'ycoe', YCOECorpusReader) +# +# corpus not available with NLTK; these lines caused help(nltk.corpus) to break +# hebrew_treebank = LazyCorpusLoader( +# 'hebrew_treebank', BracketParseCorpusReader, r'.*\.txt') + +# FIXME: override any imported demo from various corpora, see https://github.com/nltk/nltk/issues/2116 +def demo(): + # This is out-of-date: + abc.demo() + brown.demo() + # chat80.demo() + cmudict.demo() + conll2000.demo() + conll2002.demo() + genesis.demo() + gutenberg.demo() + ieer.demo() + inaugural.demo() + indian.demo() + names.demo() + ppattach.demo() + senseval.demo() + shakespeare.demo() + sinica_treebank.demo() + state_union.demo() + stopwords.demo() + timit.demo() + toolbox.demo() + treebank.demo() + udhr.demo() + webtext.demo() + words.demo() + + +# ycoe.demo() + +if __name__ == '__main__': + # demo() + pass + +# ** this is for nose ** +# unload all corpus after tests +def teardown_module(module=None): + import nltk.corpus + + for name in dir(nltk.corpus): + obj = getattr(nltk.corpus, name, None) + if isinstance(obj, CorpusReader) and hasattr(obj, '_unload'): + obj._unload() diff --git a/venv.bak/lib/python3.7/site-packages/nltk/corpus/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/corpus/__pycache__/__init__.cpython-37.pyc new file mode 100644 index 0000000..368f3b5 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/corpus/__pycache__/__init__.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/corpus/__pycache__/europarl_raw.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/corpus/__pycache__/europarl_raw.cpython-37.pyc new file mode 100644 index 0000000..57ae00b Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/corpus/__pycache__/europarl_raw.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/corpus/__pycache__/util.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/corpus/__pycache__/util.cpython-37.pyc new file mode 100644 index 0000000..a64ed14 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/corpus/__pycache__/util.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/corpus/europarl_raw.py b/venv.bak/lib/python3.7/site-packages/nltk/corpus/europarl_raw.py new file mode 100644 index 0000000..b03011c --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/corpus/europarl_raw.py @@ -0,0 +1,55 @@ +# Natural Language Toolkit: Europarl Corpus Readers +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Nitin Madnani +# URL: +# For license information, see LICENSE.TXT + +import re +from nltk.corpus.util import LazyCorpusLoader +from nltk.corpus.reader import * + +# Create a new corpus reader instance for each European language +danish = LazyCorpusLoader( + 'europarl_raw/danish', EuroparlCorpusReader, r'ep-.*\.da', encoding='utf-8' +) + +dutch = LazyCorpusLoader( + 'europarl_raw/dutch', EuroparlCorpusReader, r'ep-.*\.nl', encoding='utf-8' +) + +english = LazyCorpusLoader( + 'europarl_raw/english', EuroparlCorpusReader, r'ep-.*\.en', encoding='utf-8' +) + +finnish = LazyCorpusLoader( + 'europarl_raw/finnish', EuroparlCorpusReader, r'ep-.*\.fi', encoding='utf-8' +) + +french = LazyCorpusLoader( + 'europarl_raw/french', EuroparlCorpusReader, r'ep-.*\.fr', encoding='utf-8' +) + +german = LazyCorpusLoader( + 'europarl_raw/german', EuroparlCorpusReader, r'ep-.*\.de', encoding='utf-8' +) + +greek = LazyCorpusLoader( + 'europarl_raw/greek', EuroparlCorpusReader, r'ep-.*\.el', encoding='utf-8' +) + +italian = LazyCorpusLoader( + 'europarl_raw/italian', EuroparlCorpusReader, r'ep-.*\.it', encoding='utf-8' +) + +portuguese = LazyCorpusLoader( + 'europarl_raw/portuguese', EuroparlCorpusReader, r'ep-.*\.pt', encoding='utf-8' +) + +spanish = LazyCorpusLoader( + 'europarl_raw/spanish', EuroparlCorpusReader, r'ep-.*\.es', encoding='utf-8' +) + +swedish = LazyCorpusLoader( + 'europarl_raw/swedish', EuroparlCorpusReader, r'ep-.*\.sv', encoding='utf-8' +) diff --git a/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__init__.py b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__init__.py new file mode 100644 index 0000000..f8c9585 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__init__.py @@ -0,0 +1,183 @@ +# Natural Language Toolkit: Corpus Readers +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Steven Bird +# Edward Loper +# URL: +# For license information, see LICENSE.TXT + +""" +NLTK corpus readers. The modules in this package provide functions +that can be used to read corpus fileids in a variety of formats. These +functions can be used to read both the corpus fileids that are +distributed in the NLTK corpus package, and corpus fileids that are part +of external corpora. + +Corpus Reader Functions +======================= +Each corpus module defines one or more "corpus reader functions", +which can be used to read documents from that corpus. These functions +take an argument, ``item``, which is used to indicate which document +should be read from the corpus: + +- If ``item`` is one of the unique identifiers listed in the corpus + module's ``items`` variable, then the corresponding document will + be loaded from the NLTK corpus package. +- If ``item`` is a fileid, then that file will be read. + +Additionally, corpus reader functions can be given lists of item +names; in which case, they will return a concatenation of the +corresponding documents. + +Corpus reader functions are named based on the type of information +they return. Some common examples, and their return types, are: + +- words(): list of str +- sents(): list of (list of str) +- paras(): list of (list of (list of str)) +- tagged_words(): list of (str,str) tuple +- tagged_sents(): list of (list of (str,str)) +- tagged_paras(): list of (list of (list of (str,str))) +- chunked_sents(): list of (Tree w/ (str,str) leaves) +- parsed_sents(): list of (Tree with str leaves) +- parsed_paras(): list of (list of (Tree with str leaves)) +- xml(): A single xml ElementTree +- raw(): unprocessed corpus contents + +For example, to read a list of the words in the Brown Corpus, use +``nltk.corpus.brown.words()``: + + >>> from nltk.corpus import brown + >>> print(", ".join(brown.words())) + The, Fulton, County, Grand, Jury, said, ... + +""" + +from nltk.corpus.reader.plaintext import * +from nltk.corpus.reader.util import * +from nltk.corpus.reader.api import * +from nltk.corpus.reader.tagged import * +from nltk.corpus.reader.cmudict import * +from nltk.corpus.reader.conll import * +from nltk.corpus.reader.chunked import * +from nltk.corpus.reader.wordlist import * +from nltk.corpus.reader.xmldocs import * +from nltk.corpus.reader.ppattach import * +from nltk.corpus.reader.senseval import * +from nltk.corpus.reader.ieer import * +from nltk.corpus.reader.sinica_treebank import * +from nltk.corpus.reader.bracket_parse import * +from nltk.corpus.reader.indian import * +from nltk.corpus.reader.toolbox import * +from nltk.corpus.reader.timit import * +from nltk.corpus.reader.ycoe import * +from nltk.corpus.reader.rte import * +from nltk.corpus.reader.string_category import * +from nltk.corpus.reader.propbank import * +from nltk.corpus.reader.verbnet import * +from nltk.corpus.reader.bnc import * +from nltk.corpus.reader.nps_chat import * +from nltk.corpus.reader.wordnet import * +from nltk.corpus.reader.switchboard import * +from nltk.corpus.reader.dependency import * +from nltk.corpus.reader.nombank import * +from nltk.corpus.reader.ipipan import * +from nltk.corpus.reader.pl196x import * +from nltk.corpus.reader.knbc import * +from nltk.corpus.reader.chasen import * +from nltk.corpus.reader.childes import * +from nltk.corpus.reader.aligned import * +from nltk.corpus.reader.lin import * +from nltk.corpus.reader.semcor import * +from nltk.corpus.reader.framenet import * +from nltk.corpus.reader.udhr import * +from nltk.corpus.reader.bnc import * +from nltk.corpus.reader.sentiwordnet import * +from nltk.corpus.reader.twitter import * +from nltk.corpus.reader.nkjp import * +from nltk.corpus.reader.crubadan import * +from nltk.corpus.reader.mte import * +from nltk.corpus.reader.reviews import * +from nltk.corpus.reader.opinion_lexicon import * +from nltk.corpus.reader.pros_cons import * +from nltk.corpus.reader.categorized_sents import * +from nltk.corpus.reader.comparative_sents import * +from nltk.corpus.reader.panlex_lite import * +from nltk.corpus.reader.panlex_swadesh import * + +# Make sure that nltk.corpus.reader.bracket_parse gives the module, not +# the function bracket_parse() defined in nltk.tree: +from nltk.corpus.reader import bracket_parse + +__all__ = [ + 'CorpusReader', + 'CategorizedCorpusReader', + 'PlaintextCorpusReader', + 'find_corpus_fileids', + 'TaggedCorpusReader', + 'CMUDictCorpusReader', + 'ConllChunkCorpusReader', + 'WordListCorpusReader', + 'PPAttachmentCorpusReader', + 'SensevalCorpusReader', + 'IEERCorpusReader', + 'ChunkedCorpusReader', + 'SinicaTreebankCorpusReader', + 'BracketParseCorpusReader', + 'IndianCorpusReader', + 'ToolboxCorpusReader', + 'TimitCorpusReader', + 'YCOECorpusReader', + 'MacMorphoCorpusReader', + 'SyntaxCorpusReader', + 'AlpinoCorpusReader', + 'RTECorpusReader', + 'StringCategoryCorpusReader', + 'EuroparlCorpusReader', + 'CategorizedBracketParseCorpusReader', + 'CategorizedTaggedCorpusReader', + 'CategorizedPlaintextCorpusReader', + 'PortugueseCategorizedPlaintextCorpusReader', + 'tagged_treebank_para_block_reader', + 'PropbankCorpusReader', + 'VerbnetCorpusReader', + 'BNCCorpusReader', + 'ConllCorpusReader', + 'XMLCorpusReader', + 'NPSChatCorpusReader', + 'SwadeshCorpusReader', + 'WordNetCorpusReader', + 'WordNetICCorpusReader', + 'SwitchboardCorpusReader', + 'DependencyCorpusReader', + 'NombankCorpusReader', + 'IPIPANCorpusReader', + 'Pl196xCorpusReader', + 'TEICorpusView', + 'KNBCorpusReader', + 'ChasenCorpusReader', + 'CHILDESCorpusReader', + 'AlignedCorpusReader', + 'TimitTaggedCorpusReader', + 'LinThesaurusCorpusReader', + 'SemcorCorpusReader', + 'FramenetCorpusReader', + 'UdhrCorpusReader', + 'BNCCorpusReader', + 'SentiWordNetCorpusReader', + 'SentiSynset', + 'TwitterCorpusReader', + 'NKJPCorpusReader', + 'CrubadanCorpusReader', + 'MTECorpusReader', + 'ReviewsCorpusReader', + 'OpinionLexiconCorpusReader', + 'ProsConsCorpusReader', + 'CategorizedSentencesCorpusReader', + 'ComparativeSentencesCorpusReader', + 'PanLexLiteCorpusReader', + 'NonbreakingPrefixesCorpusReader', + 'UnicharsCorpusReader', + 'MWAPPDBCorpusReader', + 'PanlexSwadeshCorpusReader', +] diff --git a/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__pycache__/__init__.cpython-37.pyc new file mode 100644 index 0000000..ce114b6 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__pycache__/__init__.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__pycache__/aligned.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__pycache__/aligned.cpython-37.pyc new file mode 100644 index 0000000..9a4f031 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__pycache__/aligned.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__pycache__/api.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__pycache__/api.cpython-37.pyc new file mode 100644 index 0000000..2ce8352 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__pycache__/api.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__pycache__/bnc.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__pycache__/bnc.cpython-37.pyc new file mode 100644 index 0000000..7b1b4fd Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__pycache__/bnc.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__pycache__/bracket_parse.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__pycache__/bracket_parse.cpython-37.pyc new file mode 100644 index 0000000..879512b Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__pycache__/bracket_parse.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__pycache__/categorized_sents.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__pycache__/categorized_sents.cpython-37.pyc new file mode 100644 index 0000000..ab83f35 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__pycache__/categorized_sents.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__pycache__/chasen.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__pycache__/chasen.cpython-37.pyc new file mode 100644 index 0000000..44791c4 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__pycache__/chasen.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__pycache__/childes.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__pycache__/childes.cpython-37.pyc new file mode 100644 index 0000000..fcf3e2b Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__pycache__/childes.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__pycache__/chunked.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__pycache__/chunked.cpython-37.pyc new file mode 100644 index 0000000..3ed99f6 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__pycache__/chunked.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__pycache__/cmudict.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__pycache__/cmudict.cpython-37.pyc new file mode 100644 index 0000000..f2f2c4e Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__pycache__/cmudict.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__pycache__/comparative_sents.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__pycache__/comparative_sents.cpython-37.pyc new file mode 100644 index 0000000..a7400ec Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__pycache__/comparative_sents.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__pycache__/conll.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__pycache__/conll.cpython-37.pyc new file mode 100644 index 0000000..ee85229 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__pycache__/conll.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__pycache__/crubadan.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__pycache__/crubadan.cpython-37.pyc new file mode 100644 index 0000000..9b59a28 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__pycache__/crubadan.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__pycache__/dependency.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__pycache__/dependency.cpython-37.pyc new file mode 100644 index 0000000..bced0a9 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__pycache__/dependency.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__pycache__/framenet.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__pycache__/framenet.cpython-37.pyc new file mode 100644 index 0000000..9132bdc Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__pycache__/framenet.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__pycache__/ieer.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__pycache__/ieer.cpython-37.pyc new file mode 100644 index 0000000..dd397fa Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__pycache__/ieer.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__pycache__/indian.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__pycache__/indian.cpython-37.pyc new file mode 100644 index 0000000..736e268 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__pycache__/indian.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__pycache__/ipipan.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__pycache__/ipipan.cpython-37.pyc new file mode 100644 index 0000000..3b16e79 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__pycache__/ipipan.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__pycache__/knbc.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__pycache__/knbc.cpython-37.pyc new file mode 100644 index 0000000..2dcccab Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__pycache__/knbc.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__pycache__/lin.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__pycache__/lin.cpython-37.pyc new file mode 100644 index 0000000..6d4ba1b Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__pycache__/lin.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__pycache__/mte.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__pycache__/mte.cpython-37.pyc new file mode 100644 index 0000000..e4a236e Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__pycache__/mte.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__pycache__/nkjp.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__pycache__/nkjp.cpython-37.pyc new file mode 100644 index 0000000..7839efb Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__pycache__/nkjp.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__pycache__/nombank.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__pycache__/nombank.cpython-37.pyc new file mode 100644 index 0000000..7377475 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__pycache__/nombank.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__pycache__/nps_chat.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__pycache__/nps_chat.cpython-37.pyc new file mode 100644 index 0000000..0b97425 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__pycache__/nps_chat.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__pycache__/opinion_lexicon.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__pycache__/opinion_lexicon.cpython-37.pyc new file mode 100644 index 0000000..ea507fb Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__pycache__/opinion_lexicon.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__pycache__/panlex_lite.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__pycache__/panlex_lite.cpython-37.pyc new file mode 100644 index 0000000..e420169 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__pycache__/panlex_lite.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__pycache__/panlex_swadesh.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__pycache__/panlex_swadesh.cpython-37.pyc new file mode 100644 index 0000000..1cb0a47 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__pycache__/panlex_swadesh.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__pycache__/pl196x.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__pycache__/pl196x.cpython-37.pyc new file mode 100644 index 0000000..a5ecdc4 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__pycache__/pl196x.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__pycache__/plaintext.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__pycache__/plaintext.cpython-37.pyc new file mode 100644 index 0000000..601a5bd Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__pycache__/plaintext.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__pycache__/ppattach.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__pycache__/ppattach.cpython-37.pyc new file mode 100644 index 0000000..f000631 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__pycache__/ppattach.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__pycache__/propbank.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__pycache__/propbank.cpython-37.pyc new file mode 100644 index 0000000..15afcba Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__pycache__/propbank.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__pycache__/pros_cons.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__pycache__/pros_cons.cpython-37.pyc new file mode 100644 index 0000000..f708fad Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__pycache__/pros_cons.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__pycache__/reviews.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__pycache__/reviews.cpython-37.pyc new file mode 100644 index 0000000..f7ad2f5 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__pycache__/reviews.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__pycache__/rte.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__pycache__/rte.cpython-37.pyc new file mode 100644 index 0000000..fd9e2ec Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__pycache__/rte.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__pycache__/semcor.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__pycache__/semcor.cpython-37.pyc new file mode 100644 index 0000000..1568f2d Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__pycache__/semcor.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__pycache__/senseval.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__pycache__/senseval.cpython-37.pyc new file mode 100644 index 0000000..e318c4e Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__pycache__/senseval.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__pycache__/sentiwordnet.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__pycache__/sentiwordnet.cpython-37.pyc new file mode 100644 index 0000000..547a258 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__pycache__/sentiwordnet.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__pycache__/sinica_treebank.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__pycache__/sinica_treebank.cpython-37.pyc new file mode 100644 index 0000000..f03dd76 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__pycache__/sinica_treebank.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__pycache__/string_category.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__pycache__/string_category.cpython-37.pyc new file mode 100644 index 0000000..7035c0e Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__pycache__/string_category.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__pycache__/switchboard.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__pycache__/switchboard.cpython-37.pyc new file mode 100644 index 0000000..f7efbb1 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__pycache__/switchboard.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__pycache__/tagged.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__pycache__/tagged.cpython-37.pyc new file mode 100644 index 0000000..e6d137d Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__pycache__/tagged.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__pycache__/timit.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__pycache__/timit.cpython-37.pyc new file mode 100644 index 0000000..f82d223 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__pycache__/timit.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__pycache__/toolbox.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__pycache__/toolbox.cpython-37.pyc new file mode 100644 index 0000000..240728c Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__pycache__/toolbox.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__pycache__/twitter.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__pycache__/twitter.cpython-37.pyc new file mode 100644 index 0000000..6a4214d Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__pycache__/twitter.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__pycache__/udhr.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__pycache__/udhr.cpython-37.pyc new file mode 100644 index 0000000..9c277ae Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__pycache__/udhr.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__pycache__/util.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__pycache__/util.cpython-37.pyc new file mode 100644 index 0000000..e1e363e Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__pycache__/util.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__pycache__/verbnet.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__pycache__/verbnet.cpython-37.pyc new file mode 100644 index 0000000..0ca0a4f Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__pycache__/verbnet.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__pycache__/wordlist.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__pycache__/wordlist.cpython-37.pyc new file mode 100644 index 0000000..33edb86 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__pycache__/wordlist.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__pycache__/wordnet.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__pycache__/wordnet.cpython-37.pyc new file mode 100644 index 0000000..55532e9 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__pycache__/wordnet.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__pycache__/xmldocs.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__pycache__/xmldocs.cpython-37.pyc new file mode 100644 index 0000000..19f1eaf Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__pycache__/xmldocs.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__pycache__/ycoe.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__pycache__/ycoe.cpython-37.pyc new file mode 100644 index 0000000..65883f5 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/__pycache__/ycoe.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/aligned.py b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/aligned.py new file mode 100644 index 0000000..0d8a67a --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/aligned.py @@ -0,0 +1,168 @@ +# Natural Language Toolkit: Aligned Corpus Reader +# +# Copyright (C) 2001-2019 NLTK Project +# URL: +# Author: Steven Bird +# For license information, see LICENSE.TXT + +from six import string_types + +from nltk.tokenize import WhitespaceTokenizer, RegexpTokenizer +from nltk.translate import AlignedSent, Alignment + +from nltk.corpus.reader.api import CorpusReader +from nltk.corpus.reader.util import ( + StreamBackedCorpusView, + concat, + read_alignedsent_block, +) + + +class AlignedCorpusReader(CorpusReader): + """ + Reader for corpora of word-aligned sentences. Tokens are assumed + to be separated by whitespace. Sentences begin on separate lines. + """ + + def __init__( + self, + root, + fileids, + sep='/', + word_tokenizer=WhitespaceTokenizer(), + sent_tokenizer=RegexpTokenizer('\n', gaps=True), + alignedsent_block_reader=read_alignedsent_block, + encoding='latin1', + ): + """ + Construct a new Aligned Corpus reader for a set of documents + located at the given root directory. Example usage: + + >>> root = '/...path to corpus.../' + >>> reader = AlignedCorpusReader(root, '.*', '.txt') # doctest: +SKIP + + :param root: The root directory for this corpus. + :param fileids: A list or regexp specifying the fileids in this corpus. + """ + CorpusReader.__init__(self, root, fileids, encoding) + self._sep = sep + self._word_tokenizer = word_tokenizer + self._sent_tokenizer = sent_tokenizer + self._alignedsent_block_reader = alignedsent_block_reader + + def raw(self, fileids=None): + """ + :return: the given file(s) as a single string. + :rtype: str + """ + if fileids is None: + fileids = self._fileids + elif isinstance(fileids, string_types): + fileids = [fileids] + return concat([self.open(f).read() for f in fileids]) + + def words(self, fileids=None): + """ + :return: the given file(s) as a list of words + and punctuation symbols. + :rtype: list(str) + """ + return concat( + [ + AlignedSentCorpusView( + fileid, + enc, + False, + False, + self._word_tokenizer, + self._sent_tokenizer, + self._alignedsent_block_reader, + ) + for (fileid, enc) in self.abspaths(fileids, True) + ] + ) + + def sents(self, fileids=None): + """ + :return: the given file(s) as a list of + sentences or utterances, each encoded as a list of word + strings. + :rtype: list(list(str)) + """ + return concat( + [ + AlignedSentCorpusView( + fileid, + enc, + False, + True, + self._word_tokenizer, + self._sent_tokenizer, + self._alignedsent_block_reader, + ) + for (fileid, enc) in self.abspaths(fileids, True) + ] + ) + + def aligned_sents(self, fileids=None): + """ + :return: the given file(s) as a list of AlignedSent objects. + :rtype: list(AlignedSent) + """ + return concat( + [ + AlignedSentCorpusView( + fileid, + enc, + True, + True, + self._word_tokenizer, + self._sent_tokenizer, + self._alignedsent_block_reader, + ) + for (fileid, enc) in self.abspaths(fileids, True) + ] + ) + + +class AlignedSentCorpusView(StreamBackedCorpusView): + """ + A specialized corpus view for aligned sentences. + ``AlignedSentCorpusView`` objects are typically created by + ``AlignedCorpusReader`` (not directly by nltk users). + """ + + def __init__( + self, + corpus_file, + encoding, + aligned, + group_by_sent, + word_tokenizer, + sent_tokenizer, + alignedsent_block_reader, + ): + self._aligned = aligned + self._group_by_sent = group_by_sent + self._word_tokenizer = word_tokenizer + self._sent_tokenizer = sent_tokenizer + self._alignedsent_block_reader = alignedsent_block_reader + StreamBackedCorpusView.__init__(self, corpus_file, encoding=encoding) + + def read_block(self, stream): + block = [ + self._word_tokenizer.tokenize(sent_str) + for alignedsent_str in self._alignedsent_block_reader(stream) + for sent_str in self._sent_tokenizer.tokenize(alignedsent_str) + ] + if self._aligned: + block[2] = Alignment.fromstring( + " ".join(block[2]) + ) # kludge; we shouldn't have tokenized the alignment string + block = [AlignedSent(*block)] + elif self._group_by_sent: + block = [block[0]] + else: + block = block[0] + + return block diff --git a/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/api.py b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/api.py new file mode 100644 index 0000000..0b30f5a --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/api.py @@ -0,0 +1,484 @@ +# Natural Language Toolkit: API for Corpus Readers +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Steven Bird +# Edward Loper +# URL: +# For license information, see LICENSE.TXT + +""" +API for corpus readers. +""" +from __future__ import unicode_literals + +import os +import re +from collections import defaultdict +from itertools import chain + +from six import string_types + +from nltk import compat +from nltk.data import PathPointer, FileSystemPathPointer, ZipFilePathPointer + +from nltk.corpus.reader.util import * + + +@compat.python_2_unicode_compatible +class CorpusReader(object): + """ + A base class for "corpus reader" classes, each of which can be + used to read a specific corpus format. Each individual corpus + reader instance is used to read a specific corpus, consisting of + one or more files under a common root directory. Each file is + identified by its ``file identifier``, which is the relative path + to the file from the root directory. + + A separate subclass is defined for each corpus format. These + subclasses define one or more methods that provide 'views' on the + corpus contents, such as ``words()`` (for a list of words) and + ``parsed_sents()`` (for a list of parsed sentences). Called with + no arguments, these methods will return the contents of the entire + corpus. For most corpora, these methods define one or more + selection arguments, such as ``fileids`` or ``categories``, which can + be used to select which portion of the corpus should be returned. + """ + + def __init__(self, root, fileids, encoding='utf8', tagset=None): + """ + :type root: PathPointer or str + :param root: A path pointer identifying the root directory for + this corpus. If a string is specified, then it will be + converted to a ``PathPointer`` automatically. + :param fileids: A list of the files that make up this corpus. + This list can either be specified explicitly, as a list of + strings; or implicitly, as a regular expression over file + paths. The absolute path for each file will be constructed + by joining the reader's root to each file name. + :param encoding: The default unicode encoding for the files + that make up the corpus. The value of ``encoding`` can be any + of the following: + - A string: ``encoding`` is the encoding name for all files. + - A dictionary: ``encoding[file_id]`` is the encoding + name for the file whose identifier is ``file_id``. If + ``file_id`` is not in ``encoding``, then the file + contents will be processed using non-unicode byte strings. + - A list: ``encoding`` should be a list of ``(regexp, encoding)`` + tuples. The encoding for a file whose identifier is ``file_id`` + will be the ``encoding`` value for the first tuple whose + ``regexp`` matches the ``file_id``. If no tuple's ``regexp`` + matches the ``file_id``, the file contents will be processed + using non-unicode byte strings. + - None: the file contents of all files will be + processed using non-unicode byte strings. + :param tagset: The name of the tagset used by this corpus, to be used + for normalizing or converting the POS tags returned by the + tagged_...() methods. + """ + # Convert the root to a path pointer, if necessary. + if isinstance(root, string_types) and not isinstance(root, PathPointer): + m = re.match('(.*\.zip)/?(.*)$|', root) + zipfile, zipentry = m.groups() + if zipfile: + root = ZipFilePathPointer(zipfile, zipentry) + else: + root = FileSystemPathPointer(root) + elif not isinstance(root, PathPointer): + raise TypeError('CorpusReader: expected a string or a PathPointer') + + # If `fileids` is a regexp, then expand it. + if isinstance(fileids, string_types): + fileids = find_corpus_fileids(root, fileids) + + self._fileids = fileids + """A list of the relative paths for the fileids that make up + this corpus.""" + + self._root = root + """The root directory for this corpus.""" + + # If encoding was specified as a list of regexps, then convert + # it to a dictionary. + if isinstance(encoding, list): + encoding_dict = {} + for fileid in self._fileids: + for x in encoding: + (regexp, enc) = x + if re.match(regexp, fileid): + encoding_dict[fileid] = enc + break + encoding = encoding_dict + + self._encoding = encoding + """The default unicode encoding for the fileids that make up + this corpus. If ``encoding`` is None, then the file + contents are processed using byte strings.""" + self._tagset = tagset + + def __repr__(self): + if isinstance(self._root, ZipFilePathPointer): + path = '%s/%s' % (self._root.zipfile.filename, self._root.entry) + else: + path = '%s' % self._root.path + return '<%s in %r>' % (self.__class__.__name__, path) + + def ensure_loaded(self): + """ + Load this corpus (if it has not already been loaded). This is + used by LazyCorpusLoader as a simple method that can be used to + make sure a corpus is loaded -- e.g., in case a user wants to + do help(some_corpus). + """ + pass # no need to actually do anything. + + def readme(self): + """ + Return the contents of the corpus README file, if it exists. + """ + return self.open("README").read() + + def license(self): + """ + Return the contents of the corpus LICENSE file, if it exists. + """ + return self.open("LICENSE").read() + + def citation(self): + """ + Return the contents of the corpus citation.bib file, if it exists. + """ + return self.open("citation.bib").read() + + def fileids(self): + """ + Return a list of file identifiers for the fileids that make up + this corpus. + """ + return self._fileids + + def abspath(self, fileid): + """ + Return the absolute path for the given file. + + :type fileid: str + :param fileid: The file identifier for the file whose path + should be returned. + :rtype: PathPointer + """ + return self._root.join(fileid) + + def abspaths(self, fileids=None, include_encoding=False, include_fileid=False): + """ + Return a list of the absolute paths for all fileids in this corpus; + or for the given list of fileids, if specified. + + :type fileids: None or str or list + :param fileids: Specifies the set of fileids for which paths should + be returned. Can be None, for all fileids; a list of + file identifiers, for a specified set of fileids; or a single + file identifier, for a single file. Note that the return + value is always a list of paths, even if ``fileids`` is a + single file identifier. + + :param include_encoding: If true, then return a list of + ``(path_pointer, encoding)`` tuples. + + :rtype: list(PathPointer) + """ + if fileids is None: + fileids = self._fileids + elif isinstance(fileids, string_types): + fileids = [fileids] + + paths = [self._root.join(f) for f in fileids] + + if include_encoding and include_fileid: + return list(zip(paths, [self.encoding(f) for f in fileids], fileids)) + elif include_fileid: + return list(zip(paths, fileids)) + elif include_encoding: + return list(zip(paths, [self.encoding(f) for f in fileids])) + else: + return paths + + def open(self, file): + """ + Return an open stream that can be used to read the given file. + If the file's encoding is not None, then the stream will + automatically decode the file's contents into unicode. + + :param file: The file identifier of the file to read. + """ + encoding = self.encoding(file) + stream = self._root.join(file).open(encoding) + return stream + + def encoding(self, file): + """ + Return the unicode encoding for the given corpus file, if known. + If the encoding is unknown, or if the given file should be + processed using byte strings (str), then return None. + """ + if isinstance(self._encoding, dict): + return self._encoding.get(file) + else: + return self._encoding + + def _get_root(self): + return self._root + + root = property( + _get_root, + doc=""" + The directory where this corpus is stored. + + :type: PathPointer""", + ) + + +###################################################################### +# { Corpora containing categorized items +###################################################################### + + +class CategorizedCorpusReader(object): + """ + A mixin class used to aid in the implementation of corpus readers + for categorized corpora. This class defines the method + ``categories()``, which returns a list of the categories for the + corpus or for a specified set of fileids; and overrides ``fileids()`` + to take a ``categories`` argument, restricting the set of fileids to + be returned. + + Subclasses are expected to: + + - Call ``__init__()`` to set up the mapping. + + - Override all view methods to accept a ``categories`` parameter, + which can be used *instead* of the ``fileids`` parameter, to + select which fileids should be included in the returned view. + """ + + def __init__(self, kwargs): + """ + Initialize this mapping based on keyword arguments, as + follows: + + - cat_pattern: A regular expression pattern used to find the + category for each file identifier. The pattern will be + applied to each file identifier, and the first matching + group will be used as the category label for that file. + + - cat_map: A dictionary, mapping from file identifiers to + category labels. + + - cat_file: The name of a file that contains the mapping + from file identifiers to categories. The argument + ``cat_delimiter`` can be used to specify a delimiter. + + The corresponding argument will be deleted from ``kwargs``. If + more than one argument is specified, an exception will be + raised. + """ + self._f2c = None #: file-to-category mapping + self._c2f = None #: category-to-file mapping + + self._pattern = None #: regexp specifying the mapping + self._map = None #: dict specifying the mapping + self._file = None #: fileid of file containing the mapping + self._delimiter = None #: delimiter for ``self._file`` + + if 'cat_pattern' in kwargs: + self._pattern = kwargs['cat_pattern'] + del kwargs['cat_pattern'] + elif 'cat_map' in kwargs: + self._map = kwargs['cat_map'] + del kwargs['cat_map'] + elif 'cat_file' in kwargs: + self._file = kwargs['cat_file'] + del kwargs['cat_file'] + if 'cat_delimiter' in kwargs: + self._delimiter = kwargs['cat_delimiter'] + del kwargs['cat_delimiter'] + else: + raise ValueError( + 'Expected keyword argument cat_pattern or ' 'cat_map or cat_file.' + ) + + if 'cat_pattern' in kwargs or 'cat_map' in kwargs or 'cat_file' in kwargs: + raise ValueError( + 'Specify exactly one of: cat_pattern, ' 'cat_map, cat_file.' + ) + + def _init(self): + self._f2c = defaultdict(set) + self._c2f = defaultdict(set) + + if self._pattern is not None: + for file_id in self._fileids: + category = re.match(self._pattern, file_id).group(1) + self._add(file_id, category) + + elif self._map is not None: + for (file_id, categories) in self._map.items(): + for category in categories: + self._add(file_id, category) + + elif self._file is not None: + for line in self.open(self._file).readlines(): + line = line.strip() + file_id, categories = line.split(self._delimiter, 1) + if file_id not in self.fileids(): + raise ValueError( + 'In category mapping file %s: %s ' + 'not found' % (self._file, file_id) + ) + for category in categories.split(self._delimiter): + self._add(file_id, category) + + def _add(self, file_id, category): + self._f2c[file_id].add(category) + self._c2f[category].add(file_id) + + def categories(self, fileids=None): + """ + Return a list of the categories that are defined for this corpus, + or for the file(s) if it is given. + """ + if self._f2c is None: + self._init() + if fileids is None: + return sorted(self._c2f) + if isinstance(fileids, string_types): + fileids = [fileids] + return sorted(set.union(*[self._f2c[d] for d in fileids])) + + def fileids(self, categories=None): + """ + Return a list of file identifiers for the files that make up + this corpus, or that make up the given category(s) if specified. + """ + if categories is None: + return super(CategorizedCorpusReader, self).fileids() + elif isinstance(categories, string_types): + if self._f2c is None: + self._init() + if categories in self._c2f: + return sorted(self._c2f[categories]) + else: + raise ValueError('Category %s not found' % categories) + else: + if self._f2c is None: + self._init() + return sorted(set.union(*[self._c2f[c] for c in categories])) + + +###################################################################### +# { Treebank readers +###################################################################### + +# [xx] is it worth it to factor this out? +class SyntaxCorpusReader(CorpusReader): + """ + An abstract base class for reading corpora consisting of + syntactically parsed text. Subclasses should define: + + - ``__init__``, which specifies the location of the corpus + and a method for detecting the sentence blocks in corpus files. + - ``_read_block``, which reads a block from the input stream. + - ``_word``, which takes a block and returns a list of list of words. + - ``_tag``, which takes a block and returns a list of list of tagged + words. + - ``_parse``, which takes a block and returns a list of parsed + sentences. + """ + + def _parse(self, s): + raise NotImplementedError() + + def _word(self, s): + raise NotImplementedError() + + def _tag(self, s): + raise NotImplementedError() + + def _read_block(self, stream): + raise NotImplementedError() + + def raw(self, fileids=None): + if fileids is None: + fileids = self._fileids + elif isinstance(fileids, string_types): + fileids = [fileids] + return concat([self.open(f).read() for f in fileids]) + + def parsed_sents(self, fileids=None): + reader = self._read_parsed_sent_block + return concat( + [ + StreamBackedCorpusView(fileid, reader, encoding=enc) + for fileid, enc in self.abspaths(fileids, True) + ] + ) + + def tagged_sents(self, fileids=None, tagset=None): + def reader(stream): + return self._read_tagged_sent_block(stream, tagset) + + return concat( + [ + StreamBackedCorpusView(fileid, reader, encoding=enc) + for fileid, enc in self.abspaths(fileids, True) + ] + ) + + def sents(self, fileids=None): + reader = self._read_sent_block + return concat( + [ + StreamBackedCorpusView(fileid, reader, encoding=enc) + for fileid, enc in self.abspaths(fileids, True) + ] + ) + + def tagged_words(self, fileids=None, tagset=None): + def reader(stream): + return self._read_tagged_word_block(stream, tagset) + + return concat( + [ + StreamBackedCorpusView(fileid, reader, encoding=enc) + for fileid, enc in self.abspaths(fileids, True) + ] + ) + + def words(self, fileids=None): + return concat( + [ + StreamBackedCorpusView(fileid, self._read_word_block, encoding=enc) + for fileid, enc in self.abspaths(fileids, True) + ] + ) + + # ------------------------------------------------------------ + # { Block Readers + + def _read_word_block(self, stream): + return list(chain(*self._read_sent_block(stream))) + + def _read_tagged_word_block(self, stream, tagset=None): + return list(chain(*self._read_tagged_sent_block(stream, tagset))) + + def _read_sent_block(self, stream): + return list(filter(None, [self._word(t) for t in self._read_block(stream)])) + + def _read_tagged_sent_block(self, stream, tagset=None): + return list( + filter(None, [self._tag(t, tagset) for t in self._read_block(stream)]) + ) + + def _read_parsed_sent_block(self, stream): + return list(filter(None, [self._parse(t) for t in self._read_block(stream)])) + + # } End of Block Readers + # ------------------------------------------------------------ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/bnc.py b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/bnc.py new file mode 100644 index 0000000..9d02754 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/bnc.py @@ -0,0 +1,258 @@ +# Natural Language Toolkit: Plaintext Corpus Reader +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Edward Loper +# URL: +# For license information, see LICENSE.TXT + +"""Corpus reader for the XML version of the British National Corpus.""" + +from nltk.corpus.reader.util import concat +from nltk.corpus.reader.xmldocs import XMLCorpusReader, XMLCorpusView, ElementTree + + +class BNCCorpusReader(XMLCorpusReader): + """Corpus reader for the XML version of the British National Corpus. + + For access to the complete XML data structure, use the ``xml()`` + method. For access to simple word lists and tagged word lists, use + ``words()``, ``sents()``, ``tagged_words()``, and ``tagged_sents()``. + + You can obtain the full version of the BNC corpus at + http://www.ota.ox.ac.uk/desc/2554 + + If you extracted the archive to a directory called `BNC`, then you can + instantiate the reader as:: + + BNCCorpusReader(root='BNC/Texts/', fileids=r'[A-K]/\w*/\w*\.xml') + + """ + + def __init__(self, root, fileids, lazy=True): + XMLCorpusReader.__init__(self, root, fileids) + self._lazy = lazy + + def words(self, fileids=None, strip_space=True, stem=False): + """ + :return: the given file(s) as a list of words + and punctuation symbols. + :rtype: list(str) + + :param strip_space: If true, then strip trailing spaces from + word tokens. Otherwise, leave the spaces on the tokens. + :param stem: If true, then use word stems instead of word strings. + """ + return self._views(fileids, False, None, strip_space, stem) + + def tagged_words(self, fileids=None, c5=False, strip_space=True, stem=False): + """ + :return: the given file(s) as a list of tagged + words and punctuation symbols, encoded as tuples + ``(word,tag)``. + :rtype: list(tuple(str,str)) + + :param c5: If true, then the tags used will be the more detailed + c5 tags. Otherwise, the simplified tags will be used. + :param strip_space: If true, then strip trailing spaces from + word tokens. Otherwise, leave the spaces on the tokens. + :param stem: If true, then use word stems instead of word strings. + """ + tag = 'c5' if c5 else 'pos' + return self._views(fileids, False, tag, strip_space, stem) + + def sents(self, fileids=None, strip_space=True, stem=False): + """ + :return: the given file(s) as a list of + sentences or utterances, each encoded as a list of word + strings. + :rtype: list(list(str)) + + :param strip_space: If true, then strip trailing spaces from + word tokens. Otherwise, leave the spaces on the tokens. + :param stem: If true, then use word stems instead of word strings. + """ + return self._views(fileids, True, None, strip_space, stem) + + def tagged_sents(self, fileids=None, c5=False, strip_space=True, stem=False): + """ + :return: the given file(s) as a list of + sentences, each encoded as a list of ``(word,tag)`` tuples. + :rtype: list(list(tuple(str,str))) + + :param c5: If true, then the tags used will be the more detailed + c5 tags. Otherwise, the simplified tags will be used. + :param strip_space: If true, then strip trailing spaces from + word tokens. Otherwise, leave the spaces on the tokens. + :param stem: If true, then use word stems instead of word strings. + """ + tag = 'c5' if c5 else 'pos' + return self._views( + fileids, sent=True, tag=tag, strip_space=strip_space, stem=stem + ) + + def _views(self, fileids=None, sent=False, tag=False, strip_space=True, stem=False): + """A helper function that instantiates BNCWordViews or the list of words/sentences.""" + f = BNCWordView if self._lazy else self._words + return concat( + [ + f(fileid, sent, tag, strip_space, stem) + for fileid in self.abspaths(fileids) + ] + ) + + def _words(self, fileid, bracket_sent, tag, strip_space, stem): + """ + Helper used to implement the view methods -- returns a list of + words or a list of sentences, optionally tagged. + + :param fileid: The name of the underlying file. + :param bracket_sent: If true, include sentence bracketing. + :param tag: The name of the tagset to use, or None for no tags. + :param strip_space: If true, strip spaces from word tokens. + :param stem: If true, then substitute stems for words. + """ + result = [] + + xmldoc = ElementTree.parse(fileid).getroot() + for xmlsent in xmldoc.findall('.//s'): + sent = [] + for xmlword in _all_xmlwords_in(xmlsent): + word = xmlword.text + if not word: + word = "" # fixes issue 337? + if strip_space or stem: + word = word.strip() + if stem: + word = xmlword.get('hw', word) + if tag == 'c5': + word = (word, xmlword.get('c5')) + elif tag == 'pos': + word = (word, xmlword.get('pos', xmlword.get('c5'))) + sent.append(word) + if bracket_sent: + result.append(BNCSentence(xmlsent.attrib['n'], sent)) + else: + result.extend(sent) + + assert None not in result + return result + + +def _all_xmlwords_in(elt, result=None): + if result is None: + result = [] + for child in elt: + if child.tag in ('c', 'w'): + result.append(child) + else: + _all_xmlwords_in(child, result) + return result + + +class BNCSentence(list): + """ + A list of words, augmented by an attribute ``num`` used to record + the sentence identifier (the ``n`` attribute from the XML). + """ + + def __init__(self, num, items): + self.num = num + list.__init__(self, items) + + +class BNCWordView(XMLCorpusView): + """ + A stream backed corpus view specialized for use with the BNC corpus. + """ + + tags_to_ignore = set( + ['pb', 'gap', 'vocal', 'event', 'unclear', 'shift', 'pause', 'align'] + ) + """These tags are ignored. For their description refer to the + technical documentation, for example, + http://www.natcorp.ox.ac.uk/docs/URG/ref-vocal.html + + """ + + def __init__(self, fileid, sent, tag, strip_space, stem): + """ + :param fileid: The name of the underlying file. + :param sent: If true, include sentence bracketing. + :param tag: The name of the tagset to use, or None for no tags. + :param strip_space: If true, strip spaces from word tokens. + :param stem: If true, then substitute stems for words. + """ + if sent: + tagspec = '.*/s' + else: + tagspec = '.*/s/(.*/)?(c|w)' + self._sent = sent + self._tag = tag + self._strip_space = strip_space + self._stem = stem + + self.title = None #: Title of the document. + self.author = None #: Author of the document. + self.editor = None #: Editor + self.resps = None #: Statement of responsibility + + XMLCorpusView.__init__(self, fileid, tagspec) + + # Read in a tasty header. + self._open() + self.read_block(self._stream, '.*/teiHeader$', self.handle_header) + self.close() + + # Reset tag context. + self._tag_context = {0: ()} + + def handle_header(self, elt, context): + # Set up some metadata! + titles = elt.findall('titleStmt/title') + if titles: + self.title = '\n'.join(title.text.strip() for title in titles) + + authors = elt.findall('titleStmt/author') + if authors: + self.author = '\n'.join(author.text.strip() for author in authors) + + editors = elt.findall('titleStmt/editor') + if editors: + self.editor = '\n'.join(editor.text.strip() for editor in editors) + + resps = elt.findall('titleStmt/respStmt') + if resps: + self.resps = '\n\n'.join( + '\n'.join(resp_elt.text.strip() for resp_elt in resp) for resp in resps + ) + + def handle_elt(self, elt, context): + if self._sent: + return self.handle_sent(elt) + else: + return self.handle_word(elt) + + def handle_word(self, elt): + word = elt.text + if not word: + word = "" # fixes issue 337? + if self._strip_space or self._stem: + word = word.strip() + if self._stem: + word = elt.get('hw', word) + if self._tag == 'c5': + word = (word, elt.get('c5')) + elif self._tag == 'pos': + word = (word, elt.get('pos', elt.get('c5'))) + return word + + def handle_sent(self, elt): + sent = [] + for child in elt: + if child.tag in ('mw', 'hi', 'corr', 'trunc'): + sent += [self.handle_word(w) for w in child] + elif child.tag in ('w', 'c'): + sent.append(self.handle_word(child)) + elif child.tag not in self.tags_to_ignore: + raise ValueError('Unexpected element %s' % child.tag) + return BNCSentence(elt.attrib['n'], sent) diff --git a/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/bracket_parse.py b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/bracket_parse.py new file mode 100644 index 0000000..55093af --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/bracket_parse.py @@ -0,0 +1,271 @@ +# Natural Language Toolkit: Penn Treebank Reader +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Steven Bird +# Edward Loper +# URL: +# For license information, see LICENSE.TXT +""" +Corpus reader for corpora that consist of parenthesis-delineated parse trees. +""" + +import sys + +from nltk.tree import Tree +from nltk.tag import map_tag + +from nltk.corpus.reader.util import * +from nltk.corpus.reader.api import * + +# we use [^\s()]+ instead of \S+? to avoid matching () +SORTTAGWRD = re.compile(r'\((\d+) ([^\s()]+) ([^\s()]+)\)') +TAGWORD = re.compile(r'\(([^\s()]+) ([^\s()]+)\)') +WORD = re.compile(r'\([^\s()]+ ([^\s()]+)\)') +EMPTY_BRACKETS = re.compile(r'\s*\(\s*\(') + + +class BracketParseCorpusReader(SyntaxCorpusReader): + """ + Reader for corpora that consist of parenthesis-delineated parse trees, + like those found in the "combined" section of the Penn Treebank, + e.g. "(S (NP (DT the) (JJ little) (NN dog)) (VP (VBD barked)))". + + """ + + def __init__( + self, + root, + fileids, + comment_char=None, + detect_blocks='unindented_paren', + encoding='utf8', + tagset=None, + ): + """ + :param root: The root directory for this corpus. + :param fileids: A list or regexp specifying the fileids in this corpus. + :param comment_char: The character which can appear at the start of + a line to indicate that the rest of the line is a comment. + :param detect_blocks: The method that is used to find blocks + in the corpus; can be 'unindented_paren' (every unindented + parenthesis starts a new parse) or 'sexpr' (brackets are + matched). + :param tagset: The name of the tagset used by this corpus, to be used + for normalizing or converting the POS tags returned by the + tagged_...() methods. + """ + # FIXME: Why is it inheritting from SyntaxCorpusReader but initializing + # from CorpusReader? + CorpusReader.__init__(self, root, fileids, encoding) + self._comment_char = comment_char + self._detect_blocks = detect_blocks + self._tagset = tagset + + def _read_block(self, stream): + if self._detect_blocks == 'sexpr': + return read_sexpr_block(stream, comment_char=self._comment_char) + elif self._detect_blocks == 'blankline': + return read_blankline_block(stream) + elif self._detect_blocks == 'unindented_paren': + # Tokens start with unindented left parens. + toks = read_regexp_block(stream, start_re=r'^\(') + # Strip any comments out of the tokens. + if self._comment_char: + toks = [ + re.sub('(?m)^%s.*' % re.escape(self._comment_char), '', tok) + for tok in toks + ] + return toks + else: + assert 0, 'bad block type' + + def _normalize(self, t): + # If there's an empty set of brackets surrounding the actual + # parse, then strip them off. + if EMPTY_BRACKETS.match(t): + t = t.strip()[1:-1] + # Replace leaves of the form (!), (,), with (! !), (, ,) + t = re.sub(r"\((.)\)", r"(\1 \1)", t) + # Replace leaves of the form (tag word root) with (tag word) + t = re.sub(r"\(([^\s()]+) ([^\s()]+) [^\s()]+\)", r"(\1 \2)", t) + return t + + def _parse(self, t): + try: + return Tree.fromstring(self._normalize(t)) + + except ValueError as e: + sys.stderr.write("Bad tree detected; trying to recover...\n") + # Try to recover, if we can: + if e.args == ('mismatched parens',): + for n in range(1, 5): + try: + v = Tree(self._normalize(t + ')' * n)) + sys.stderr.write( + " Recovered by adding %d close " "paren(s)\n" % n + ) + return v + except ValueError: + pass + # Try something else: + sys.stderr.write(" Recovered by returning a flat parse.\n") + # sys.stderr.write(' '.join(t.split())+'\n') + return Tree('S', self._tag(t)) + + def _tag(self, t, tagset=None): + tagged_sent = [(w, p) for (p, w) in TAGWORD.findall(self._normalize(t))] + if tagset and tagset != self._tagset: + tagged_sent = [ + (w, map_tag(self._tagset, tagset, p)) for (w, p) in tagged_sent + ] + return tagged_sent + + def _word(self, t): + return WORD.findall(self._normalize(t)) + + +class CategorizedBracketParseCorpusReader( + CategorizedCorpusReader, BracketParseCorpusReader +): + """ + A reader for parsed corpora whose documents are + divided into categories based on their file identifiers. + @author: Nathan Schneider + """ + + def __init__(self, *args, **kwargs): + """ + Initialize the corpus reader. Categorization arguments + (C{cat_pattern}, C{cat_map}, and C{cat_file}) are passed to + the L{CategorizedCorpusReader constructor + }. The remaining arguments + are passed to the L{BracketParseCorpusReader constructor + }. + """ + CategorizedCorpusReader.__init__(self, kwargs) + BracketParseCorpusReader.__init__(self, *args, **kwargs) + + def _resolve(self, fileids, categories): + if fileids is not None and categories is not None: + raise ValueError('Specify fileids or categories, not both') + if categories is not None: + return self.fileids(categories) + else: + return fileids + + def raw(self, fileids=None, categories=None): + return BracketParseCorpusReader.raw(self, self._resolve(fileids, categories)) + + def words(self, fileids=None, categories=None): + return BracketParseCorpusReader.words(self, self._resolve(fileids, categories)) + + def sents(self, fileids=None, categories=None): + return BracketParseCorpusReader.sents(self, self._resolve(fileids, categories)) + + def paras(self, fileids=None, categories=None): + return BracketParseCorpusReader.paras(self, self._resolve(fileids, categories)) + + def tagged_words(self, fileids=None, categories=None, tagset=None): + return BracketParseCorpusReader.tagged_words( + self, self._resolve(fileids, categories), tagset + ) + + def tagged_sents(self, fileids=None, categories=None, tagset=None): + return BracketParseCorpusReader.tagged_sents( + self, self._resolve(fileids, categories), tagset + ) + + def tagged_paras(self, fileids=None, categories=None, tagset=None): + return BracketParseCorpusReader.tagged_paras( + self, self._resolve(fileids, categories), tagset + ) + + def parsed_words(self, fileids=None, categories=None): + return BracketParseCorpusReader.parsed_words( + self, self._resolve(fileids, categories) + ) + + def parsed_sents(self, fileids=None, categories=None): + return BracketParseCorpusReader.parsed_sents( + self, self._resolve(fileids, categories) + ) + + def parsed_paras(self, fileids=None, categories=None): + return BracketParseCorpusReader.parsed_paras( + self, self._resolve(fileids, categories) + ) + + +class AlpinoCorpusReader(BracketParseCorpusReader): + """ + Reader for the Alpino Dutch Treebank. + This corpus has a lexical breakdown structure embedded, as read by _parse + Unfortunately this puts punctuation and some other words out of the sentence + order in the xml element tree. This is no good for tag_ and word_ + _tag and _word will be overridden to use a non-default new parameter 'ordered' + to the overridden _normalize function. The _parse function can then remain + untouched. + """ + + def __init__(self, root, encoding='ISO-8859-1', tagset=None): + BracketParseCorpusReader.__init__( + self, + root, + 'alpino\.xml', + detect_blocks='blankline', + encoding=encoding, + tagset=tagset, + ) + + def _normalize(self, t, ordered=False): + """Normalize the xml sentence element in t. + The sentence elements , although embedded in a few overall + xml elements, are seperated by blank lines. That's how the reader can + deliver them one at a time. + Each sentence has a few category subnodes that are of no use to us. + The remaining word nodes may or may not appear in the proper order. + Each word node has attributes, among which: + - begin : the position of the word in the sentence + - pos : Part of Speech: the Tag + - word : the actual word + The return value is a string with all xml elementes replaced by + clauses: either a cat clause with nested clauses, or a word clause. + The order of the bracket clauses closely follows the xml. + If ordered == True, the word clauses include an order sequence number. + If ordered == False, the word clauses only have pos and word parts. + """ + if t[:10] != "', r"(\1", t) + if ordered: + t = re.sub( + r' ', + r"(\1 \2 \3)", + t, + ) + else: + t = re.sub(r' ', r"(\1 \2)", t) + t = re.sub(r" ", r")", t) + t = re.sub(r".*", r"", t) + t = re.sub(r"", r"", t) + return t + + def _tag(self, t, tagset=None): + tagged_sent = [ + (int(o), w, p) + for (o, p, w) in SORTTAGWRD.findall(self._normalize(t, ordered=True)) + ] + tagged_sent.sort() + if tagset and tagset != self._tagset: + tagged_sent = [ + (w, map_tag(self._tagset, tagset, p)) for (o, w, p) in tagged_sent + ] + else: + tagged_sent = [(w, p) for (o, w, p) in tagged_sent] + return tagged_sent + + def _word(self, t): + """Return a correctly ordered list if words""" + tagged_sent = self._tag(t) + return [w for (w, p) in tagged_sent] diff --git a/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/categorized_sents.py b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/categorized_sents.py new file mode 100644 index 0000000..e0a3034 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/categorized_sents.py @@ -0,0 +1,199 @@ +# Natural Language Toolkit: Categorized Sentences Corpus Reader +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Pierpaolo Pantone <24alsecondo@gmail.com> +# URL: +# For license information, see LICENSE.TXT + +""" +CorpusReader structured for corpora that contain one instance on each row. +This CorpusReader is specifically used for the Subjectivity Dataset and the +Sentence Polarity Dataset. + +- Subjectivity Dataset information - + +Authors: Bo Pang and Lillian Lee. +Url: http://www.cs.cornell.edu/people/pabo/movie-review-data + +Distributed with permission. + +Related papers: + +- Bo Pang and Lillian Lee. "A Sentimental Education: Sentiment Analysis Using + Subjectivity Summarization Based on Minimum Cuts". Proceedings of the ACL, + 2004. + +- Sentence Polarity Dataset information - + +Authors: Bo Pang and Lillian Lee. +Url: http://www.cs.cornell.edu/people/pabo/movie-review-data + +Related papers: + +- Bo Pang and Lillian Lee. "Seeing stars: Exploiting class relationships for + sentiment categorization with respect to rating scales". Proceedings of the + ACL, 2005. +""" +from six import string_types + +from nltk.corpus.reader.api import * +from nltk.tokenize import * + + +class CategorizedSentencesCorpusReader(CategorizedCorpusReader, CorpusReader): + """ + A reader for corpora in which each row represents a single instance, mainly + a sentence. Istances are divided into categories based on their file identifiers + (see CategorizedCorpusReader). + Since many corpora allow rows that contain more than one sentence, it is + possible to specify a sentence tokenizer to retrieve all sentences instead + than all rows. + + Examples using the Subjectivity Dataset: + + >>> from nltk.corpus import subjectivity + >>> subjectivity.sents()[23] + ['television', 'made', 'him', 'famous', ',', 'but', 'his', 'biggest', 'hits', + 'happened', 'off', 'screen', '.'] + >>> subjectivity.categories() + ['obj', 'subj'] + >>> subjectivity.words(categories='subj') + ['smart', 'and', 'alert', ',', 'thirteen', ...] + + Examples using the Sentence Polarity Dataset: + + >>> from nltk.corpus import sentence_polarity + >>> sentence_polarity.sents() + [['simplistic', ',', 'silly', 'and', 'tedious', '.'], ["it's", 'so', 'laddish', + 'and', 'juvenile', ',', 'only', 'teenage', 'boys', 'could', 'possibly', 'find', + 'it', 'funny', '.'], ...] + >>> sentence_polarity.categories() + ['neg', 'pos'] + """ + + CorpusView = StreamBackedCorpusView + + def __init__( + self, + root, + fileids, + word_tokenizer=WhitespaceTokenizer(), + sent_tokenizer=None, + encoding='utf8', + **kwargs + ): + """ + :param root: The root directory for the corpus. + :param fileids: a list or regexp specifying the fileids in the corpus. + :param word_tokenizer: a tokenizer for breaking sentences or paragraphs + into words. Default: `WhitespaceTokenizer` + :param sent_tokenizer: a tokenizer for breaking paragraphs into sentences. + :param encoding: the encoding that should be used to read the corpus. + :param kwargs: additional parameters passed to CategorizedCorpusReader. + """ + + CorpusReader.__init__(self, root, fileids, encoding) + CategorizedCorpusReader.__init__(self, kwargs) + self._word_tokenizer = word_tokenizer + self._sent_tokenizer = sent_tokenizer + + def _resolve(self, fileids, categories): + if fileids is not None and categories is not None: + raise ValueError('Specify fileids or categories, not both') + if categories is not None: + return self.fileids(categories) + else: + return fileids + + def raw(self, fileids=None, categories=None): + """ + :param fileids: a list or regexp specifying the fileids that have to be + returned as a raw string. + :param categories: a list specifying the categories whose files have to + be returned as a raw string. + :return: the given file(s) as a single string. + :rtype: str + """ + fileids = self._resolve(fileids, categories) + if fileids is None: + fileids = self._fileids + elif isinstance(fileids, string_types): + fileids = [fileids] + return concat([self.open(f).read() for f in fileids]) + + def readme(self): + """ + Return the contents of the corpus Readme.txt file. + """ + return self.open("README").read() + + def sents(self, fileids=None, categories=None): + """ + Return all sentences in the corpus or in the specified file(s). + + :param fileids: a list or regexp specifying the ids of the files whose + sentences have to be returned. + :param categories: a list specifying the categories whose sentences have + to be returned. + :return: the given file(s) as a list of sentences. + Each sentence is tokenized using the specified word_tokenizer. + :rtype: list(list(str)) + """ + fileids = self._resolve(fileids, categories) + if fileids is None: + fileids = self._fileids + elif isinstance(fileids, string_types): + fileids = [fileids] + return concat( + [ + self.CorpusView(path, self._read_sent_block, encoding=enc) + for (path, enc, fileid) in self.abspaths(fileids, True, True) + ] + ) + + def words(self, fileids=None, categories=None): + """ + Return all words and punctuation symbols in the corpus or in the specified + file(s). + + :param fileids: a list or regexp specifying the ids of the files whose + words have to be returned. + :param categories: a list specifying the categories whose words have to + be returned. + :return: the given file(s) as a list of words and punctuation symbols. + :rtype: list(str) + """ + fileids = self._resolve(fileids, categories) + if fileids is None: + fileids = self._fileids + elif isinstance(fileids, string_types): + fileids = [fileids] + return concat( + [ + self.CorpusView(path, self._read_word_block, encoding=enc) + for (path, enc, fileid) in self.abspaths(fileids, True, True) + ] + ) + + def _read_sent_block(self, stream): + sents = [] + for i in range(20): # Read 20 lines at a time. + line = stream.readline() + if not line: + continue + if self._sent_tokenizer: + sents.extend( + [ + self._word_tokenizer.tokenize(sent) + for sent in self._sent_tokenizer.tokenize(line) + ] + ) + else: + sents.append(self._word_tokenizer.tokenize(line)) + return sents + + def _read_word_block(self, stream): + words = [] + for sent in self._read_sent_block(stream): + words.extend(sent) + return words diff --git a/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/chasen.py b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/chasen.py new file mode 100644 index 0000000..ef60b0d --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/chasen.py @@ -0,0 +1,171 @@ +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Masato Hagiwara +# URL: +# For license information, see LICENSE.TXT + +# For more information, see http://lilyx.net/pages/nltkjapanesecorpus.html +from __future__ import print_function + +import sys + +from six import string_types + +from nltk.corpus.reader import util + +from nltk.corpus.reader.util import * +from nltk.corpus.reader.api import * + + +class ChasenCorpusReader(CorpusReader): + def __init__(self, root, fileids, encoding='utf8', sent_splitter=None): + self._sent_splitter = sent_splitter + CorpusReader.__init__(self, root, fileids, encoding) + + def raw(self, fileids=None): + if fileids is None: + fileids = self._fileids + elif isinstance(fileids, string_types): + fileids = [fileids] + return concat([self.open(f).read() for f in fileids]) + + def words(self, fileids=None): + return concat( + [ + ChasenCorpusView(fileid, enc, False, False, False, self._sent_splitter) + for (fileid, enc) in self.abspaths(fileids, True) + ] + ) + + def tagged_words(self, fileids=None): + return concat( + [ + ChasenCorpusView(fileid, enc, True, False, False, self._sent_splitter) + for (fileid, enc) in self.abspaths(fileids, True) + ] + ) + + def sents(self, fileids=None): + return concat( + [ + ChasenCorpusView(fileid, enc, False, True, False, self._sent_splitter) + for (fileid, enc) in self.abspaths(fileids, True) + ] + ) + + def tagged_sents(self, fileids=None): + return concat( + [ + ChasenCorpusView(fileid, enc, True, True, False, self._sent_splitter) + for (fileid, enc) in self.abspaths(fileids, True) + ] + ) + + def paras(self, fileids=None): + return concat( + [ + ChasenCorpusView(fileid, enc, False, True, True, self._sent_splitter) + for (fileid, enc) in self.abspaths(fileids, True) + ] + ) + + def tagged_paras(self, fileids=None): + return concat( + [ + ChasenCorpusView(fileid, enc, True, True, True, self._sent_splitter) + for (fileid, enc) in self.abspaths(fileids, True) + ] + ) + + +class ChasenCorpusView(StreamBackedCorpusView): + """ + A specialized corpus view for ChasenReader. Similar to ``TaggedCorpusView``, + but this'll use fixed sets of word and sentence tokenizer. + """ + + def __init__( + self, + corpus_file, + encoding, + tagged, + group_by_sent, + group_by_para, + sent_splitter=None, + ): + self._tagged = tagged + self._group_by_sent = group_by_sent + self._group_by_para = group_by_para + self._sent_splitter = sent_splitter + StreamBackedCorpusView.__init__(self, corpus_file, encoding=encoding) + + def read_block(self, stream): + """Reads one paragraph at a time.""" + block = [] + for para_str in read_regexp_block(stream, r".", r"^EOS\n"): + + para = [] + + sent = [] + for line in para_str.splitlines(): + + _eos = line.strip() == 'EOS' + _cells = line.split('\t') + w = (_cells[0], '\t'.join(_cells[1:])) + if not _eos: + sent.append(w) + + if _eos or (self._sent_splitter and self._sent_splitter(w)): + if not self._tagged: + sent = [w for (w, t) in sent] + if self._group_by_sent: + para.append(sent) + else: + para.extend(sent) + sent = [] + + if len(sent) > 0: + if not self._tagged: + sent = [w for (w, t) in sent] + + if self._group_by_sent: + para.append(sent) + else: + para.extend(sent) + + if self._group_by_para: + block.append(para) + else: + block.extend(para) + + return block + + +def demo(): + + import nltk + from nltk.corpus.util import LazyCorpusLoader + + jeita = LazyCorpusLoader('jeita', ChasenCorpusReader, r'.*chasen', encoding='utf-8') + print('/'.join(jeita.words()[22100:22140])) + + print( + '\nEOS\n'.join( + '\n'.join("%s/%s" % (w[0], w[1].split('\t')[2]) for w in sent) + for sent in jeita.tagged_sents()[2170:2173] + ) + ) + + +def test(): + + from nltk.corpus.util import LazyCorpusLoader + + jeita = LazyCorpusLoader('jeita', ChasenCorpusReader, r'.*chasen', encoding='utf-8') + + assert isinstance(jeita.tagged_words()[0][1], string_types) + + +if __name__ == '__main__': + demo() + test() diff --git a/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/childes.py b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/childes.py new file mode 100644 index 0000000..b6ff220 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/childes.py @@ -0,0 +1,633 @@ +# CHILDES XML Corpus Reader + +# Copyright (C) 2001-2019 NLTK Project +# Author: Tomonori Nagano +# Alexis Dimitriadis +# URL: +# For license information, see LICENSE.TXT + +""" +Corpus reader for the XML version of the CHILDES corpus. +""" +from __future__ import print_function, division + +__docformat__ = 'epytext en' + +import re +from collections import defaultdict +from six import string_types + +from nltk.util import flatten, LazyMap, LazyConcatenation + +from nltk.corpus.reader.util import concat +from nltk.corpus.reader.xmldocs import XMLCorpusReader, ElementTree + +# to resolve the namespace issue +NS = 'http://www.talkbank.org/ns/talkbank' + + +class CHILDESCorpusReader(XMLCorpusReader): + """ + Corpus reader for the XML version of the CHILDES corpus. + The CHILDES corpus is available at ``https://childes.talkbank.org/``. The XML + version of CHILDES is located at ``https://childes.talkbank.org/data-xml/``. + Copy the needed parts of the CHILDES XML corpus into the NLTK data directory + (``nltk_data/corpora/CHILDES/``). + + For access to the file text use the usual nltk functions, + ``words()``, ``sents()``, ``tagged_words()`` and ``tagged_sents()``. + """ + + def __init__(self, root, fileids, lazy=True): + XMLCorpusReader.__init__(self, root, fileids) + self._lazy = lazy + + def words( + self, + fileids=None, + speaker='ALL', + stem=False, + relation=False, + strip_space=True, + replace=False, + ): + """ + :return: the given file(s) as a list of words + :rtype: list(str) + + :param speaker: If specified, select specific speaker(s) defined + in the corpus. Default is 'ALL' (all participants). Common choices + are 'CHI' (the child), 'MOT' (mother), ['CHI','MOT'] (exclude + researchers) + :param stem: If true, then use word stems instead of word strings. + :param relation: If true, then return tuples of (stem, index, + dependent_index) + :param strip_space: If true, then strip trailing spaces from word + tokens. Otherwise, leave the spaces on the tokens. + :param replace: If true, then use the replaced (intended) word instead + of the original word (e.g., 'wat' will be replaced with 'watch') + """ + sent = None + pos = False + if not self._lazy: + return [ + self._get_words( + fileid, speaker, sent, stem, relation, pos, strip_space, replace + ) + for fileid in self.abspaths(fileids) + ] + + get_words = lambda fileid: self._get_words( + fileid, speaker, sent, stem, relation, pos, strip_space, replace + ) + return LazyConcatenation(LazyMap(get_words, self.abspaths(fileids))) + + def tagged_words( + self, + fileids=None, + speaker='ALL', + stem=False, + relation=False, + strip_space=True, + replace=False, + ): + """ + :return: the given file(s) as a list of tagged + words and punctuation symbols, encoded as tuples + ``(word,tag)``. + :rtype: list(tuple(str,str)) + + :param speaker: If specified, select specific speaker(s) defined + in the corpus. Default is 'ALL' (all participants). Common choices + are 'CHI' (the child), 'MOT' (mother), ['CHI','MOT'] (exclude + researchers) + :param stem: If true, then use word stems instead of word strings. + :param relation: If true, then return tuples of (stem, index, + dependent_index) + :param strip_space: If true, then strip trailing spaces from word + tokens. Otherwise, leave the spaces on the tokens. + :param replace: If true, then use the replaced (intended) word instead + of the original word (e.g., 'wat' will be replaced with 'watch') + """ + sent = None + pos = True + if not self._lazy: + return [ + self._get_words( + fileid, speaker, sent, stem, relation, pos, strip_space, replace + ) + for fileid in self.abspaths(fileids) + ] + + get_words = lambda fileid: self._get_words( + fileid, speaker, sent, stem, relation, pos, strip_space, replace + ) + return LazyConcatenation(LazyMap(get_words, self.abspaths(fileids))) + + def sents( + self, + fileids=None, + speaker='ALL', + stem=False, + relation=None, + strip_space=True, + replace=False, + ): + """ + :return: the given file(s) as a list of sentences or utterances, each + encoded as a list of word strings. + :rtype: list(list(str)) + + :param speaker: If specified, select specific speaker(s) defined + in the corpus. Default is 'ALL' (all participants). Common choices + are 'CHI' (the child), 'MOT' (mother), ['CHI','MOT'] (exclude + researchers) + :param stem: If true, then use word stems instead of word strings. + :param relation: If true, then return tuples of ``(str,pos,relation_list)``. + If there is manually-annotated relation info, it will return + tuples of ``(str,pos,test_relation_list,str,pos,gold_relation_list)`` + :param strip_space: If true, then strip trailing spaces from word + tokens. Otherwise, leave the spaces on the tokens. + :param replace: If true, then use the replaced (intended) word instead + of the original word (e.g., 'wat' will be replaced with 'watch') + """ + sent = True + pos = False + if not self._lazy: + return [ + self._get_words( + fileid, speaker, sent, stem, relation, pos, strip_space, replace + ) + for fileid in self.abspaths(fileids) + ] + + get_words = lambda fileid: self._get_words( + fileid, speaker, sent, stem, relation, pos, strip_space, replace + ) + return LazyConcatenation(LazyMap(get_words, self.abspaths(fileids))) + + def tagged_sents( + self, + fileids=None, + speaker='ALL', + stem=False, + relation=None, + strip_space=True, + replace=False, + ): + """ + :return: the given file(s) as a list of + sentences, each encoded as a list of ``(word,tag)`` tuples. + :rtype: list(list(tuple(str,str))) + + :param speaker: If specified, select specific speaker(s) defined + in the corpus. Default is 'ALL' (all participants). Common choices + are 'CHI' (the child), 'MOT' (mother), ['CHI','MOT'] (exclude + researchers) + :param stem: If true, then use word stems instead of word strings. + :param relation: If true, then return tuples of ``(str,pos,relation_list)``. + If there is manually-annotated relation info, it will return + tuples of ``(str,pos,test_relation_list,str,pos,gold_relation_list)`` + :param strip_space: If true, then strip trailing spaces from word + tokens. Otherwise, leave the spaces on the tokens. + :param replace: If true, then use the replaced (intended) word instead + of the original word (e.g., 'wat' will be replaced with 'watch') + """ + sent = True + pos = True + if not self._lazy: + return [ + self._get_words( + fileid, speaker, sent, stem, relation, pos, strip_space, replace + ) + for fileid in self.abspaths(fileids) + ] + + get_words = lambda fileid: self._get_words( + fileid, speaker, sent, stem, relation, pos, strip_space, replace + ) + return LazyConcatenation(LazyMap(get_words, self.abspaths(fileids))) + + def corpus(self, fileids=None): + """ + :return: the given file(s) as a dict of ``(corpus_property_key, value)`` + :rtype: list(dict) + """ + if not self._lazy: + return [self._get_corpus(fileid) for fileid in self.abspaths(fileids)] + return LazyMap(self._get_corpus, self.abspaths(fileids)) + + def _get_corpus(self, fileid): + results = dict() + xmldoc = ElementTree.parse(fileid).getroot() + for key, value in xmldoc.items(): + results[key] = value + return results + + def participants(self, fileids=None): + """ + :return: the given file(s) as a dict of + ``(participant_property_key, value)`` + :rtype: list(dict) + """ + if not self._lazy: + return [self._get_participants(fileid) for fileid in self.abspaths(fileids)] + return LazyMap(self._get_participants, self.abspaths(fileids)) + + def _get_participants(self, fileid): + # multidimensional dicts + def dictOfDicts(): + return defaultdict(dictOfDicts) + + xmldoc = ElementTree.parse(fileid).getroot() + # getting participants' data + pat = dictOfDicts() + for participant in xmldoc.findall( + './/{%s}Participants/{%s}participant' % (NS, NS) + ): + for (key, value) in participant.items(): + pat[participant.get('id')][key] = value + return pat + + def age(self, fileids=None, speaker='CHI', month=False): + """ + :return: the given file(s) as string or int + :rtype: list or int + + :param month: If true, return months instead of year-month-date + """ + if not self._lazy: + return [ + self._get_age(fileid, speaker, month) + for fileid in self.abspaths(fileids) + ] + get_age = lambda fileid: self._get_age(fileid, speaker, month) + return LazyMap(get_age, self.abspaths(fileids)) + + def _get_age(self, fileid, speaker, month): + xmldoc = ElementTree.parse(fileid).getroot() + for pat in xmldoc.findall('.//{%s}Participants/{%s}participant' % (NS, NS)): + try: + if pat.get('id') == speaker: + age = pat.get('age') + if month: + age = self.convert_age(age) + return age + # some files don't have age data + except (TypeError, AttributeError) as e: + return None + + def convert_age(self, age_year): + "Caclculate age in months from a string in CHILDES format" + m = re.match("P(\d+)Y(\d+)M?(\d?\d?)D?", age_year) + age_month = int(m.group(1)) * 12 + int(m.group(2)) + try: + if int(m.group(3)) > 15: + age_month += 1 + # some corpora don't have age information? + except ValueError as e: + pass + return age_month + + def MLU(self, fileids=None, speaker='CHI'): + """ + :return: the given file(s) as a floating number + :rtype: list(float) + """ + if not self._lazy: + return [ + self._getMLU(fileid, speaker=speaker) + for fileid in self.abspaths(fileids) + ] + get_MLU = lambda fileid: self._getMLU(fileid, speaker=speaker) + return LazyMap(get_MLU, self.abspaths(fileids)) + + def _getMLU(self, fileid, speaker): + sents = self._get_words( + fileid, + speaker=speaker, + sent=True, + stem=True, + relation=False, + pos=True, + strip_space=True, + replace=True, + ) + results = [] + lastSent = [] + numFillers = 0 + sentDiscount = 0 + for sent in sents: + posList = [pos for (word, pos) in sent] + # if any part of the sentence is intelligible + if any(pos == 'unk' for pos in posList): + continue + # if the sentence is null + elif sent == []: + continue + # if the sentence is the same as the last sent + elif sent == lastSent: + continue + else: + results.append([word for (word, pos) in sent]) + # count number of fillers + if len(set(['co', None]).intersection(posList)) > 0: + numFillers += posList.count('co') + numFillers += posList.count(None) + sentDiscount += 1 + lastSent = sent + try: + thisWordList = flatten(results) + # count number of morphemes + # (e.g., 'read' = 1 morpheme but 'read-PAST' is 2 morphemes) + numWords = ( + len(flatten([word.split('-') for word in thisWordList])) - numFillers + ) + numSents = len(results) - sentDiscount + mlu = numWords / numSents + except ZeroDivisionError: + mlu = 0 + # return {'mlu':mlu,'wordNum':numWords,'sentNum':numSents} + return mlu + + def _get_words( + self, fileid, speaker, sent, stem, relation, pos, strip_space, replace + ): + if ( + isinstance(speaker, string_types) and speaker != 'ALL' + ): # ensure we have a list of speakers + speaker = [speaker] + xmldoc = ElementTree.parse(fileid).getroot() + # processing each xml doc + results = [] + for xmlsent in xmldoc.findall('.//{%s}u' % NS): + sents = [] + # select speakers + if speaker == 'ALL' or xmlsent.get('who') in speaker: + for xmlword in xmlsent.findall('.//{%s}w' % NS): + infl = None + suffixStem = None + suffixTag = None + # getting replaced words + if replace and xmlsent.find('.//{%s}w/{%s}replacement' % (NS, NS)): + xmlword = xmlsent.find( + './/{%s}w/{%s}replacement/{%s}w' % (NS, NS, NS) + ) + elif replace and xmlsent.find('.//{%s}w/{%s}wk' % (NS, NS)): + xmlword = xmlsent.find('.//{%s}w/{%s}wk' % (NS, NS)) + # get text + if xmlword.text: + word = xmlword.text + else: + word = '' + # strip tailing space + if strip_space: + word = word.strip() + # stem + if relation or stem: + try: + xmlstem = xmlword.find('.//{%s}stem' % NS) + word = xmlstem.text + except AttributeError as e: + pass + # if there is an inflection + try: + xmlinfl = xmlword.find( + './/{%s}mor/{%s}mw/{%s}mk' % (NS, NS, NS) + ) + word += '-' + xmlinfl.text + except: + pass + # if there is a suffix + try: + xmlsuffix = xmlword.find( + './/{%s}mor/{%s}mor-post/{%s}mw/{%s}stem' + % (NS, NS, NS, NS) + ) + suffixStem = xmlsuffix.text + except AttributeError: + suffixStem = "" + if suffixStem: + word += "~" + suffixStem + # pos + if relation or pos: + try: + xmlpos = xmlword.findall(".//{%s}c" % NS) + xmlpos2 = xmlword.findall(".//{%s}s" % NS) + if xmlpos2 != []: + tag = xmlpos[0].text + ":" + xmlpos2[0].text + else: + tag = xmlpos[0].text + except (AttributeError, IndexError) as e: + tag = "" + try: + xmlsuffixpos = xmlword.findall( + './/{%s}mor/{%s}mor-post/{%s}mw/{%s}pos/{%s}c' + % (NS, NS, NS, NS, NS) + ) + xmlsuffixpos2 = xmlword.findall( + './/{%s}mor/{%s}mor-post/{%s}mw/{%s}pos/{%s}s' + % (NS, NS, NS, NS, NS) + ) + if xmlsuffixpos2: + suffixTag = ( + xmlsuffixpos[0].text + ":" + xmlsuffixpos2[0].text + ) + else: + suffixTag = xmlsuffixpos[0].text + except: + pass + if suffixTag: + tag += "~" + suffixTag + word = (word, tag) + # relational + # the gold standard is stored in + # + if relation == True: + for xmlstem_rel in xmlword.findall( + './/{%s}mor/{%s}gra' % (NS, NS) + ): + if not xmlstem_rel.get('type') == 'grt': + word = ( + word[0], + word[1], + xmlstem_rel.get('index') + + "|" + + xmlstem_rel.get('head') + + "|" + + xmlstem_rel.get('relation'), + ) + else: + word = ( + word[0], + word[1], + word[2], + word[0], + word[1], + xmlstem_rel.get('index') + + "|" + + xmlstem_rel.get('head') + + "|" + + xmlstem_rel.get('relation'), + ) + try: + for xmlpost_rel in xmlword.findall( + './/{%s}mor/{%s}mor-post/{%s}gra' % (NS, NS, NS) + ): + if not xmlpost_rel.get('type') == 'grt': + suffixStem = ( + suffixStem[0], + suffixStem[1], + xmlpost_rel.get('index') + + "|" + + xmlpost_rel.get('head') + + "|" + + xmlpost_rel.get('relation'), + ) + else: + suffixStem = ( + suffixStem[0], + suffixStem[1], + suffixStem[2], + suffixStem[0], + suffixStem[1], + xmlpost_rel.get('index') + + "|" + + xmlpost_rel.get('head') + + "|" + + xmlpost_rel.get('relation'), + ) + except: + pass + sents.append(word) + if sent or relation: + results.append(sents) + else: + results.extend(sents) + return LazyMap(lambda x: x, results) + + # Ready-to-use browser opener + + """ + The base URL for viewing files on the childes website. This + shouldn't need to be changed, unless CHILDES changes the configuration + of their server or unless the user sets up their own corpus webserver. + """ + childes_url_base = r'https://childes.talkbank.org/browser/index.php?url=' + + def webview_file(self, fileid, urlbase=None): + """Map a corpus file to its web version on the CHILDES website, + and open it in a web browser. + + The complete URL to be used is: + childes.childes_url_base + urlbase + fileid.replace('.xml', '.cha') + + If no urlbase is passed, we try to calculate it. This + requires that the childes corpus was set up to mirror the + folder hierarchy under childes.psy.cmu.edu/data-xml/, e.g.: + nltk_data/corpora/childes/Eng-USA/Cornell/??? or + nltk_data/corpora/childes/Romance/Spanish/Aguirre/??? + + The function first looks (as a special case) if "Eng-USA" is + on the path consisting of +fileid; then if + "childes", possibly followed by "data-xml", appears. If neither + one is found, we use the unmodified fileid and hope for the best. + If this is not right, specify urlbase explicitly, e.g., if the + corpus root points to the Cornell folder, urlbase='Eng-USA/Cornell'. + """ + + import webbrowser + + if urlbase: + path = urlbase + "/" + fileid + else: + full = self.root + "/" + fileid + full = re.sub(r'\\', '/', full) + if '/childes/' in full.lower(): + # Discard /data-xml/ if present + path = re.findall(r'(?i)/childes(?:/data-xml)?/(.*)\.xml', full)[0] + elif 'eng-usa' in full.lower(): + path = 'Eng-USA/' + re.findall(r'/(?i)Eng-USA/(.*)\.xml', full)[0] + else: + path = fileid + + # Strip ".xml" and add ".cha", as necessary: + if path.endswith('.xml'): + path = path[:-4] + + if not path.endswith('.cha'): + path = path + '.cha' + + url = self.childes_url_base + path + + webbrowser.open_new_tab(url) + print("Opening in browser:", url) + # Pausing is a good idea, but it's up to the user... + # raw_input("Hit Return to continue") + + +def demo(corpus_root=None): + """ + The CHILDES corpus should be manually downloaded and saved + to ``[NLTK_Data_Dir]/corpora/childes/`` + """ + if not corpus_root: + from nltk.data import find + + corpus_root = find('corpora/childes/data-xml/Eng-USA/') + + try: + childes = CHILDESCorpusReader(corpus_root, '.*.xml') + # describe all corpus + for file in childes.fileids()[:5]: + corpus = '' + corpus_id = '' + for (key, value) in childes.corpus(file)[0].items(): + if key == "Corpus": + corpus = value + if key == "Id": + corpus_id = value + print('Reading', corpus, corpus_id, ' .....') + print("words:", childes.words(file)[:7], "...") + print( + "words with replaced words:", + childes.words(file, replace=True)[:7], + " ...", + ) + print("words with pos tags:", childes.tagged_words(file)[:7], " ...") + print("words (only MOT):", childes.words(file, speaker='MOT')[:7], "...") + print("words (only CHI):", childes.words(file, speaker='CHI')[:7], "...") + print("stemmed words:", childes.words(file, stem=True)[:7], " ...") + print( + "words with relations and pos-tag:", + childes.words(file, relation=True)[:5], + " ...", + ) + print("sentence:", childes.sents(file)[:2], " ...") + for (participant, values) in childes.participants(file)[0].items(): + for (key, value) in values.items(): + print("\tparticipant", participant, key, ":", value) + print("num of sent:", len(childes.sents(file))) + print("num of morphemes:", len(childes.words(file, stem=True))) + print("age:", childes.age(file)) + print("age in month:", childes.age(file, month=True)) + print("MLU:", childes.MLU(file)) + print() + + except LookupError as e: + print( + """The CHILDES corpus, or the parts you need, should be manually + downloaded from https://childes.talkbank.org/data-xml/ and saved at + [NLTK_Data_Dir]/corpora/childes/ + Alternately, you can call the demo with the path to a portion of the CHILDES corpus, e.g.: + demo('/path/to/childes/data-xml/Eng-USA/") + """ + ) + # corpus_root_http = urllib2.urlopen('https://childes.talkbank.org/data-xml/Eng-USA/Bates.zip') + # corpus_root_http_bates = zipfile.ZipFile(cStringIO.StringIO(corpus_root_http.read())) + ##this fails + # childes = CHILDESCorpusReader(corpus_root_http_bates,corpus_root_http_bates.namelist()) + + +if __name__ == "__main__": + demo() diff --git a/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/chunked.py b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/chunked.py new file mode 100644 index 0000000..0edd0ea --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/chunked.py @@ -0,0 +1,285 @@ +# Natural Language Toolkit: Chunked Corpus Reader +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Steven Bird +# Edward Loper +# URL: +# For license information, see LICENSE.TXT + +""" +A reader for corpora that contain chunked (and optionally tagged) +documents. +""" + +import os.path, codecs + +from six import string_types + +import nltk +from nltk.corpus.reader.bracket_parse import BracketParseCorpusReader +from nltk.tree import Tree +from nltk.tokenize import * +from nltk.chunk import tagstr2tree +from nltk.corpus.reader.util import * +from nltk.corpus.reader.api import * + + +class ChunkedCorpusReader(CorpusReader): + """ + Reader for chunked (and optionally tagged) corpora. Paragraphs + are split using a block reader. They are then tokenized into + sentences using a sentence tokenizer. Finally, these sentences + are parsed into chunk trees using a string-to-chunktree conversion + function. Each of these steps can be performed using a default + function or a custom function. By default, paragraphs are split + on blank lines; sentences are listed one per line; and sentences + are parsed into chunk trees using ``nltk.chunk.tagstr2tree``. + """ + + def __init__( + self, + root, + fileids, + extension='', + str2chunktree=tagstr2tree, + sent_tokenizer=RegexpTokenizer('\n', gaps=True), + para_block_reader=read_blankline_block, + encoding='utf8', + tagset=None, + ): + """ + :param root: The root directory for this corpus. + :param fileids: A list or regexp specifying the fileids in this corpus. + """ + CorpusReader.__init__(self, root, fileids, encoding) + self._cv_args = (str2chunktree, sent_tokenizer, para_block_reader, tagset) + """Arguments for corpus views generated by this corpus: a tuple + (str2chunktree, sent_tokenizer, para_block_tokenizer)""" + + def raw(self, fileids=None): + """ + :return: the given file(s) as a single string. + :rtype: str + """ + if fileids is None: + fileids = self._fileids + elif isinstance(fileids, string_types): + fileids = [fileids] + return concat([self.open(f).read() for f in fileids]) + + def words(self, fileids=None): + """ + :return: the given file(s) as a list of words + and punctuation symbols. + :rtype: list(str) + """ + return concat( + [ + ChunkedCorpusView(f, enc, 0, 0, 0, 0, *self._cv_args) + for (f, enc) in self.abspaths(fileids, True) + ] + ) + + def sents(self, fileids=None): + """ + :return: the given file(s) as a list of + sentences or utterances, each encoded as a list of word + strings. + :rtype: list(list(str)) + """ + return concat( + [ + ChunkedCorpusView(f, enc, 0, 1, 0, 0, *self._cv_args) + for (f, enc) in self.abspaths(fileids, True) + ] + ) + + def paras(self, fileids=None): + """ + :return: the given file(s) as a list of + paragraphs, each encoded as a list of sentences, which are + in turn encoded as lists of word strings. + :rtype: list(list(list(str))) + """ + return concat( + [ + ChunkedCorpusView(f, enc, 0, 1, 1, 0, *self._cv_args) + for (f, enc) in self.abspaths(fileids, True) + ] + ) + + def tagged_words(self, fileids=None, tagset=None): + """ + :return: the given file(s) as a list of tagged + words and punctuation symbols, encoded as tuples + ``(word,tag)``. + :rtype: list(tuple(str,str)) + """ + return concat( + [ + ChunkedCorpusView( + f, enc, 1, 0, 0, 0, *self._cv_args, target_tagset=tagset + ) + for (f, enc) in self.abspaths(fileids, True) + ] + ) + + def tagged_sents(self, fileids=None, tagset=None): + """ + :return: the given file(s) as a list of + sentences, each encoded as a list of ``(word,tag)`` tuples. + + :rtype: list(list(tuple(str,str))) + """ + return concat( + [ + ChunkedCorpusView( + f, enc, 1, 1, 0, 0, *self._cv_args, target_tagset=tagset + ) + for (f, enc) in self.abspaths(fileids, True) + ] + ) + + def tagged_paras(self, fileids=None, tagset=None): + """ + :return: the given file(s) as a list of + paragraphs, each encoded as a list of sentences, which are + in turn encoded as lists of ``(word,tag)`` tuples. + :rtype: list(list(list(tuple(str,str)))) + """ + return concat( + [ + ChunkedCorpusView( + f, enc, 1, 1, 1, 0, *self._cv_args, target_tagset=tagset + ) + for (f, enc) in self.abspaths(fileids, True) + ] + ) + + def chunked_words(self, fileids=None, tagset=None): + """ + :return: the given file(s) as a list of tagged + words and chunks. Words are encoded as ``(word, tag)`` + tuples (if the corpus has tags) or word strings (if the + corpus has no tags). Chunks are encoded as depth-one + trees over ``(word,tag)`` tuples or word strings. + :rtype: list(tuple(str,str) and Tree) + """ + return concat( + [ + ChunkedCorpusView( + f, enc, 1, 0, 0, 1, *self._cv_args, target_tagset=tagset + ) + for (f, enc) in self.abspaths(fileids, True) + ] + ) + + def chunked_sents(self, fileids=None, tagset=None): + """ + :return: the given file(s) as a list of + sentences, each encoded as a shallow Tree. The leaves + of these trees are encoded as ``(word, tag)`` tuples (if + the corpus has tags) or word strings (if the corpus has no + tags). + :rtype: list(Tree) + """ + return concat( + [ + ChunkedCorpusView( + f, enc, 1, 1, 0, 1, *self._cv_args, target_tagset=tagset + ) + for (f, enc) in self.abspaths(fileids, True) + ] + ) + + def chunked_paras(self, fileids=None, tagset=None): + """ + :return: the given file(s) as a list of + paragraphs, each encoded as a list of sentences, which are + in turn encoded as a shallow Tree. The leaves of these + trees are encoded as ``(word, tag)`` tuples (if the corpus + has tags) or word strings (if the corpus has no tags). + :rtype: list(list(Tree)) + """ + return concat( + [ + ChunkedCorpusView( + f, enc, 1, 1, 1, 1, *self._cv_args, target_tagset=tagset + ) + for (f, enc) in self.abspaths(fileids, True) + ] + ) + + def _read_block(self, stream): + return [tagstr2tree(t) for t in read_blankline_block(stream)] + + +class ChunkedCorpusView(StreamBackedCorpusView): + def __init__( + self, + fileid, + encoding, + tagged, + group_by_sent, + group_by_para, + chunked, + str2chunktree, + sent_tokenizer, + para_block_reader, + source_tagset=None, + target_tagset=None, + ): + StreamBackedCorpusView.__init__(self, fileid, encoding=encoding) + self._tagged = tagged + self._group_by_sent = group_by_sent + self._group_by_para = group_by_para + self._chunked = chunked + self._str2chunktree = str2chunktree + self._sent_tokenizer = sent_tokenizer + self._para_block_reader = para_block_reader + self._source_tagset = source_tagset + self._target_tagset = target_tagset + + def read_block(self, stream): + block = [] + for para_str in self._para_block_reader(stream): + para = [] + for sent_str in self._sent_tokenizer.tokenize(para_str): + sent = self._str2chunktree( + sent_str, + source_tagset=self._source_tagset, + target_tagset=self._target_tagset, + ) + + # If requested, throw away the tags. + if not self._tagged: + sent = self._untag(sent) + + # If requested, throw away the chunks. + if not self._chunked: + sent = sent.leaves() + + # Add the sentence to `para`. + if self._group_by_sent: + para.append(sent) + else: + para.extend(sent) + + # Add the paragraph to `block`. + if self._group_by_para: + block.append(para) + else: + block.extend(para) + + # Return the block + return block + + def _untag(self, tree): + for i, child in enumerate(tree): + if isinstance(child, Tree): + self._untag(child) + elif isinstance(child, tuple): + tree[i] = child[0] + else: + raise ValueError('expected child to be Tree or tuple') + return tree diff --git a/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/cmudict.py b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/cmudict.py new file mode 100644 index 0000000..a4aef7d --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/cmudict.py @@ -0,0 +1,99 @@ +# Natural Language Toolkit: Carnegie Mellon Pronouncing Dictionary Corpus Reader +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Steven Bird +# URL: +# For license information, see LICENSE.TXT + +""" +The Carnegie Mellon Pronouncing Dictionary [cmudict.0.6] +ftp://ftp.cs.cmu.edu/project/speech/dict/ +Copyright 1998 Carnegie Mellon University + +File Format: Each line consists of an uppercased word, a counter +(for alternative pronunciations), and a transcription. Vowels are +marked for stress (1=primary, 2=secondary, 0=no stress). E.g.: +NATURAL 1 N AE1 CH ER0 AH0 L + +The dictionary contains 127069 entries. Of these, 119400 words are assigned +a unique pronunciation, 6830 words have two pronunciations, and 839 words have +three or more pronunciations. Many of these are fast-speech variants. + +Phonemes: There are 39 phonemes, as shown below: + +Phoneme Example Translation Phoneme Example Translation +------- ------- ----------- ------- ------- ----------- +AA odd AA D AE at AE T +AH hut HH AH T AO ought AO T +AW cow K AW AY hide HH AY D +B be B IY CH cheese CH IY Z +D dee D IY DH thee DH IY +EH Ed EH D ER hurt HH ER T +EY ate EY T F fee F IY +G green G R IY N HH he HH IY +IH it IH T IY eat IY T +JH gee JH IY K key K IY +L lee L IY M me M IY +N knee N IY NG ping P IH NG +OW oat OW T OY toy T OY +P pee P IY R read R IY D +S sea S IY SH she SH IY +T tea T IY TH theta TH EY T AH +UH hood HH UH D UW two T UW +V vee V IY W we W IY +Y yield Y IY L D Z zee Z IY +ZH seizure S IY ZH ER +""" + +from nltk import compat +from nltk.util import Index + +from nltk.corpus.reader.util import * +from nltk.corpus.reader.api import * + + +class CMUDictCorpusReader(CorpusReader): + def entries(self): + """ + :return: the cmudict lexicon as a list of entries + containing (word, transcriptions) tuples. + """ + return concat( + [ + StreamBackedCorpusView(fileid, read_cmudict_block, encoding=enc) + for fileid, enc in self.abspaths(None, True) + ] + ) + + def raw(self): + """ + :return: the cmudict lexicon as a raw string. + """ + fileids = self._fileids + if isinstance(fileids, string_types): + fileids = [fileids] + return concat([self.open(f).read() for f in fileids]) + + def words(self): + """ + :return: a list of all words defined in the cmudict lexicon. + """ + return [word.lower() for (word, _) in self.entries()] + + def dict(self): + """ + :return: the cmudict lexicon as a dictionary, whose keys are + lowercase words and whose values are lists of pronunciations. + """ + return dict(Index(self.entries())) + + +def read_cmudict_block(stream): + entries = [] + while len(entries) < 100: # Read 100 at a time. + line = stream.readline() + if line == '': + return entries # end of file. + pieces = line.split() + entries.append((pieces[0].lower(), pieces[2:])) + return entries diff --git a/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/comparative_sents.py b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/comparative_sents.py new file mode 100644 index 0000000..30d00cc --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/comparative_sents.py @@ -0,0 +1,328 @@ +# Natural Language Toolkit: Comparative Sentence Corpus Reader +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Pierpaolo Pantone <24alsecondo@gmail.com> +# URL: +# For license information, see LICENSE.TXT + +""" +CorpusReader for the Comparative Sentence Dataset. + +- Comparative Sentence Dataset information - + +Annotated by: Nitin Jindal and Bing Liu, 2006. + Department of Computer Sicence + University of Illinois at Chicago + +Contact: Nitin Jindal, njindal@cs.uic.edu + Bing Liu, liub@cs.uic.edu (http://www.cs.uic.edu/~liub) + +Distributed with permission. + +Related papers: + +- Nitin Jindal and Bing Liu. "Identifying Comparative Sentences in Text Documents". + Proceedings of the ACM SIGIR International Conference on Information Retrieval + (SIGIR-06), 2006. + +- Nitin Jindal and Bing Liu. "Mining Comprative Sentences and Relations". + Proceedings of Twenty First National Conference on Artificial Intelligence + (AAAI-2006), 2006. + +- Murthy Ganapathibhotla and Bing Liu. "Mining Opinions in Comparative Sentences". + Proceedings of the 22nd International Conference on Computational Linguistics + (Coling-2008), Manchester, 18-22 August, 2008. +""" +import re + +from six import string_types + +from nltk.corpus.reader.api import * +from nltk.tokenize import * + +# Regular expressions for dataset components +STARS = re.compile(r'^\*+$') +COMPARISON = re.compile(r'') +CLOSE_COMPARISON = re.compile(r'') +GRAD_COMPARISON = re.compile(r'') +NON_GRAD_COMPARISON = re.compile(r'') +ENTITIES_FEATS = re.compile(r"(\d)_((?:[\.\w\s/-](?!\d_))+)") +KEYWORD = re.compile(r'\((?!.*\()(.*)\)$') + + +class Comparison(object): + """ + A Comparison represents a comparative sentence and its constituents. + """ + + def __init__( + self, + text=None, + comp_type=None, + entity_1=None, + entity_2=None, + feature=None, + keyword=None, + ): + """ + :param text: a string (optionally tokenized) containing a comparation. + :param comp_type: an integer defining the type of comparison expressed. + Values can be: 1 (Non-equal gradable), 2 (Equative), 3 (Superlative), + 4 (Non-gradable). + :param entity_1: the first entity considered in the comparison relation. + :param entity_2: the second entity considered in the comparison relation. + :param feature: the feature considered in the comparison relation. + :param keyword: the word or phrase which is used for that comparative relation. + """ + self.text = text + self.comp_type = comp_type + self.entity_1 = entity_1 + self.entity_2 = entity_2 + self.feature = feature + self.keyword = keyword + + def __repr__(self): + return ( + "Comparison(text=\"{}\", comp_type={}, entity_1=\"{}\", entity_2=\"{}\", " + "feature=\"{}\", keyword=\"{}\")" + ).format( + self.text, + self.comp_type, + self.entity_1, + self.entity_2, + self.feature, + self.keyword, + ) + + +class ComparativeSentencesCorpusReader(CorpusReader): + """ + Reader for the Comparative Sentence Dataset by Jindal and Liu (2006). + + >>> from nltk.corpus import comparative_sentences + >>> comparison = comparative_sentences.comparisons()[0] + >>> comparison.text + ['its', 'fast-forward', 'and', 'rewind', 'work', 'much', 'more', 'smoothly', + 'and', 'consistently', 'than', 'those', 'of', 'other', 'models', 'i', "'ve", + 'had', '.'] + >>> comparison.entity_2 + 'models' + >>> (comparison.feature, comparison.keyword) + ('rewind', 'more') + >>> len(comparative_sentences.comparisons()) + 853 + """ + + CorpusView = StreamBackedCorpusView + + def __init__( + self, + root, + fileids, + word_tokenizer=WhitespaceTokenizer(), + sent_tokenizer=None, + encoding='utf8', + ): + """ + :param root: The root directory for this corpus. + :param fileids: a list or regexp specifying the fileids in this corpus. + :param word_tokenizer: tokenizer for breaking sentences or paragraphs + into words. Default: `WhitespaceTokenizer` + :param sent_tokenizer: tokenizer for breaking paragraphs into sentences. + :param encoding: the encoding that should be used to read the corpus. + """ + + CorpusReader.__init__(self, root, fileids, encoding) + self._word_tokenizer = word_tokenizer + self._sent_tokenizer = sent_tokenizer + + def comparisons(self, fileids=None): + """ + Return all comparisons in the corpus. + + :param fileids: a list or regexp specifying the ids of the files whose + comparisons have to be returned. + :return: the given file(s) as a list of Comparison objects. + :rtype: list(Comparison) + """ + if fileids is None: + fileids = self._fileids + elif isinstance(fileids, string_types): + fileids = [fileids] + return concat( + [ + self.CorpusView(path, self._read_comparison_block, encoding=enc) + for (path, enc, fileid) in self.abspaths(fileids, True, True) + ] + ) + + def keywords(self, fileids=None): + """ + Return a set of all keywords used in the corpus. + + :param fileids: a list or regexp specifying the ids of the files whose + keywords have to be returned. + :return: the set of keywords and comparative phrases used in the corpus. + :rtype: set(str) + """ + all_keywords = concat( + [ + self.CorpusView(path, self._read_keyword_block, encoding=enc) + for (path, enc, fileid) in self.abspaths(fileids, True, True) + ] + ) + + keywords_set = set(keyword.lower() for keyword in all_keywords if keyword) + return keywords_set + + def keywords_readme(self): + """ + Return the list of words and constituents considered as clues of a + comparison (from listOfkeywords.txt). + """ + keywords = [] + raw_text = self.open("listOfkeywords.txt").read() + for line in raw_text.split("\n"): + if not line or line.startswith("//"): + continue + keywords.append(line.strip()) + return keywords + + def raw(self, fileids=None): + """ + :param fileids: a list or regexp specifying the fileids that have to be + returned as a raw string. + :return: the given file(s) as a single string. + :rtype: str + """ + if fileids is None: + fileids = self._fileids + elif isinstance(fileids, string_types): + fileids = [fileids] + return concat([self.open(f).read() for f in fileids]) + + def readme(self): + """ + Return the contents of the corpus readme file. + """ + return self.open("README.txt").read() + + def sents(self, fileids=None): + """ + Return all sentences in the corpus. + + :param fileids: a list or regexp specifying the ids of the files whose + sentences have to be returned. + :return: all sentences of the corpus as lists of tokens (or as plain + strings, if no word tokenizer is specified). + :rtype: list(list(str)) or list(str) + """ + return concat( + [ + self.CorpusView(path, self._read_sent_block, encoding=enc) + for (path, enc, fileid) in self.abspaths(fileids, True, True) + ] + ) + + def words(self, fileids=None): + """ + Return all words and punctuation symbols in the corpus. + + :param fileids: a list or regexp specifying the ids of the files whose + words have to be returned. + :return: the given file(s) as a list of words and punctuation symbols. + :rtype: list(str) + """ + return concat( + [ + self.CorpusView(path, self._read_word_block, encoding=enc) + for (path, enc, fileid) in self.abspaths(fileids, True, True) + ] + ) + + def _read_comparison_block(self, stream): + while True: + line = stream.readline() + if not line: + return [] # end of file. + comparison_tags = re.findall(COMPARISON, line) + if comparison_tags: + grad_comparisons = re.findall(GRAD_COMPARISON, line) + non_grad_comparisons = re.findall(NON_GRAD_COMPARISON, line) + # Advance to the next line (it contains the comparative sentence) + comparison_text = stream.readline().strip() + if self._word_tokenizer: + comparison_text = self._word_tokenizer.tokenize(comparison_text) + # Skip the next line (it contains closing comparison tags) + stream.readline() + # If gradable comparisons are found, create Comparison instances + # and populate their fields + comparison_bundle = [] + if grad_comparisons: + # Each comparison tag has its own relations on a separate line + for comp in grad_comparisons: + comp_type = int(re.match(r'', comp).group(1)) + comparison = Comparison( + text=comparison_text, comp_type=comp_type + ) + line = stream.readline() + entities_feats = ENTITIES_FEATS.findall(line) + if entities_feats: + for (code, entity_feat) in entities_feats: + if code == '1': + comparison.entity_1 = entity_feat.strip() + elif code == '2': + comparison.entity_2 = entity_feat.strip() + elif code == '3': + comparison.feature = entity_feat.strip() + keyword = KEYWORD.findall(line) + if keyword: + comparison.keyword = keyword[0] + comparison_bundle.append(comparison) + # If non-gradable comparisons are found, create a simple Comparison + # instance for each one + if non_grad_comparisons: + for comp in non_grad_comparisons: + # comp_type in this case should always be 4. + comp_type = int(re.match(r'', comp).group(1)) + comparison = Comparison( + text=comparison_text, comp_type=comp_type + ) + comparison_bundle.append(comparison) + # Flatten the list of comparisons before returning them + # return concat([comparison_bundle]) + return comparison_bundle + + def _read_keyword_block(self, stream): + keywords = [] + for comparison in self._read_comparison_block(stream): + keywords.append(comparison.keyword) + return keywords + + def _read_sent_block(self, stream): + while True: + line = stream.readline() + if re.match(STARS, line): + while True: + line = stream.readline() + if re.match(STARS, line): + break + continue + if ( + not re.findall(COMPARISON, line) + and not ENTITIES_FEATS.findall(line) + and not re.findall(CLOSE_COMPARISON, line) + ): + if self._sent_tokenizer: + return [ + self._word_tokenizer.tokenize(sent) + for sent in self._sent_tokenizer.tokenize(line) + ] + else: + return [self._word_tokenizer.tokenize(line)] + + def _read_word_block(self, stream): + words = [] + for sent in self._read_sent_block(stream): + words.extend(sent) + return words diff --git a/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/conll.py b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/conll.py new file mode 100644 index 0000000..26849be --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/conll.py @@ -0,0 +1,592 @@ +# Natural Language Toolkit: CONLL Corpus Reader +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Steven Bird +# Edward Loper +# URL: +# For license information, see LICENSE.TXT + +""" +Read CoNLL-style chunk fileids. +""" + +from __future__ import unicode_literals + +import textwrap + +from nltk import compat +from nltk.tree import Tree +from nltk.util import LazyMap, LazyConcatenation +from nltk.tag import map_tag + +from nltk.corpus.reader.util import * +from nltk.corpus.reader.api import * + + +class ConllCorpusReader(CorpusReader): + """ + A corpus reader for CoNLL-style files. These files consist of a + series of sentences, separated by blank lines. Each sentence is + encoded using a table (or "grid") of values, where each line + corresponds to a single word, and each column corresponds to an + annotation type. The set of columns used by CoNLL-style files can + vary from corpus to corpus; the ``ConllCorpusReader`` constructor + therefore takes an argument, ``columntypes``, which is used to + specify the columns that are used by a given corpus. By default + columns are split by consecutive whitespaces, with the + ``separator`` argument you can set a string to split by (e.g. + ``\'\t\'``). + + + @todo: Add support for reading from corpora where different + parallel files contain different columns. + @todo: Possibly add caching of the grid corpus view? This would + allow the same grid view to be used by different data access + methods (eg words() and parsed_sents() could both share the + same grid corpus view object). + @todo: Better support for -DOCSTART-. Currently, we just ignore + it, but it could be used to define methods that retrieve a + document at a time (eg parsed_documents()). + """ + + # ///////////////////////////////////////////////////////////////// + # Column Types + # ///////////////////////////////////////////////////////////////// + + WORDS = 'words' #: column type for words + POS = 'pos' #: column type for part-of-speech tags + TREE = 'tree' #: column type for parse trees + CHUNK = 'chunk' #: column type for chunk structures + NE = 'ne' #: column type for named entities + SRL = 'srl' #: column type for semantic role labels + IGNORE = 'ignore' #: column type for column that should be ignored + + #: A list of all column types supported by the conll corpus reader. + COLUMN_TYPES = (WORDS, POS, TREE, CHUNK, NE, SRL, IGNORE) + + # ///////////////////////////////////////////////////////////////// + # Constructor + # ///////////////////////////////////////////////////////////////// + + def __init__( + self, + root, + fileids, + columntypes, + chunk_types=None, + root_label='S', + pos_in_tree=False, + srl_includes_roleset=True, + encoding='utf8', + tree_class=Tree, + tagset=None, + separator=None, + ): + for columntype in columntypes: + if columntype not in self.COLUMN_TYPES: + raise ValueError('Bad column type %r' % columntype) + if isinstance(chunk_types, string_types): + chunk_types = [chunk_types] + self._chunk_types = chunk_types + self._colmap = dict((c, i) for (i, c) in enumerate(columntypes)) + self._pos_in_tree = pos_in_tree + self._root_label = root_label # for chunks + self._srl_includes_roleset = srl_includes_roleset + self._tree_class = tree_class + CorpusReader.__init__(self, root, fileids, encoding) + self._tagset = tagset + self.sep = separator + + # ///////////////////////////////////////////////////////////////// + # Data Access Methods + # ///////////////////////////////////////////////////////////////// + + def raw(self, fileids=None): + if fileids is None: + fileids = self._fileids + elif isinstance(fileids, string_types): + fileids = [fileids] + return concat([self.open(f).read() for f in fileids]) + + def words(self, fileids=None): + self._require(self.WORDS) + return LazyConcatenation(LazyMap(self._get_words, self._grids(fileids))) + + def sents(self, fileids=None): + self._require(self.WORDS) + return LazyMap(self._get_words, self._grids(fileids)) + + def tagged_words(self, fileids=None, tagset=None): + self._require(self.WORDS, self.POS) + + def get_tagged_words(grid): + return self._get_tagged_words(grid, tagset) + + return LazyConcatenation(LazyMap(get_tagged_words, self._grids(fileids))) + + def tagged_sents(self, fileids=None, tagset=None): + self._require(self.WORDS, self.POS) + + def get_tagged_words(grid): + return self._get_tagged_words(grid, tagset) + + return LazyMap(get_tagged_words, self._grids(fileids)) + + def chunked_words(self, fileids=None, chunk_types=None, tagset=None): + self._require(self.WORDS, self.POS, self.CHUNK) + if chunk_types is None: + chunk_types = self._chunk_types + + def get_chunked_words(grid): # capture chunk_types as local var + return self._get_chunked_words(grid, chunk_types, tagset) + + return LazyConcatenation(LazyMap(get_chunked_words, self._grids(fileids))) + + def chunked_sents(self, fileids=None, chunk_types=None, tagset=None): + self._require(self.WORDS, self.POS, self.CHUNK) + if chunk_types is None: + chunk_types = self._chunk_types + + def get_chunked_words(grid): # capture chunk_types as local var + return self._get_chunked_words(grid, chunk_types, tagset) + + return LazyMap(get_chunked_words, self._grids(fileids)) + + def parsed_sents(self, fileids=None, pos_in_tree=None, tagset=None): + self._require(self.WORDS, self.POS, self.TREE) + if pos_in_tree is None: + pos_in_tree = self._pos_in_tree + + def get_parsed_sent(grid): # capture pos_in_tree as local var + return self._get_parsed_sent(grid, pos_in_tree, tagset) + + return LazyMap(get_parsed_sent, self._grids(fileids)) + + def srl_spans(self, fileids=None): + self._require(self.SRL) + return LazyMap(self._get_srl_spans, self._grids(fileids)) + + def srl_instances(self, fileids=None, pos_in_tree=None, flatten=True): + self._require(self.WORDS, self.POS, self.TREE, self.SRL) + if pos_in_tree is None: + pos_in_tree = self._pos_in_tree + + def get_srl_instances(grid): # capture pos_in_tree as local var + return self._get_srl_instances(grid, pos_in_tree) + + result = LazyMap(get_srl_instances, self._grids(fileids)) + if flatten: + result = LazyConcatenation(result) + return result + + def iob_words(self, fileids=None, tagset=None): + """ + :return: a list of word/tag/IOB tuples + :rtype: list(tuple) + :param fileids: the list of fileids that make up this corpus + :type fileids: None or str or list + """ + self._require(self.WORDS, self.POS, self.CHUNK) + + def get_iob_words(grid): + return self._get_iob_words(grid, tagset) + + return LazyConcatenation(LazyMap(get_iob_words, self._grids(fileids))) + + def iob_sents(self, fileids=None, tagset=None): + """ + :return: a list of lists of word/tag/IOB tuples + :rtype: list(list) + :param fileids: the list of fileids that make up this corpus + :type fileids: None or str or list + """ + self._require(self.WORDS, self.POS, self.CHUNK) + + def get_iob_words(grid): + return self._get_iob_words(grid, tagset) + + return LazyMap(get_iob_words, self._grids(fileids)) + + # ///////////////////////////////////////////////////////////////// + # Grid Reading + # ///////////////////////////////////////////////////////////////// + + def _grids(self, fileids=None): + # n.b.: we could cache the object returned here (keyed on + # fileids), which would let us reuse the same corpus view for + # different things (eg srl and parse trees). + return concat( + [ + StreamBackedCorpusView(fileid, self._read_grid_block, encoding=enc) + for (fileid, enc) in self.abspaths(fileids, True) + ] + ) + + def _read_grid_block(self, stream): + grids = [] + for block in read_blankline_block(stream): + block = block.strip() + if not block: + continue + + grid = [line.split(self.sep) for line in block.split('\n')] + + # If there's a docstart row, then discard. ([xx] eventually it + # would be good to actually use it) + if grid[0][self._colmap.get('words', 0)] == '-DOCSTART-': + del grid[0] + + # Check that the grid is consistent. + for row in grid: + if len(row) != len(grid[0]): + raise ValueError('Inconsistent number of columns:\n%s' % block) + grids.append(grid) + return grids + + # ///////////////////////////////////////////////////////////////// + # Transforms + # ///////////////////////////////////////////////////////////////// + # given a grid, transform it into some representation (e.g., + # a list of words or a parse tree). + + def _get_words(self, grid): + return self._get_column(grid, self._colmap['words']) + + def _get_tagged_words(self, grid, tagset=None): + pos_tags = self._get_column(grid, self._colmap['pos']) + if tagset and tagset != self._tagset: + pos_tags = [map_tag(self._tagset, tagset, t) for t in pos_tags] + return list(zip(self._get_column(grid, self._colmap['words']), pos_tags)) + + def _get_iob_words(self, grid, tagset=None): + pos_tags = self._get_column(grid, self._colmap['pos']) + if tagset and tagset != self._tagset: + pos_tags = [map_tag(self._tagset, tagset, t) for t in pos_tags] + return list( + zip( + self._get_column(grid, self._colmap['words']), + pos_tags, + self._get_column(grid, self._colmap['chunk']), + ) + ) + + def _get_chunked_words(self, grid, chunk_types, tagset=None): + # n.b.: this method is very similar to conllstr2tree. + words = self._get_column(grid, self._colmap['words']) + pos_tags = self._get_column(grid, self._colmap['pos']) + if tagset and tagset != self._tagset: + pos_tags = [map_tag(self._tagset, tagset, t) for t in pos_tags] + chunk_tags = self._get_column(grid, self._colmap['chunk']) + + stack = [Tree(self._root_label, [])] + + for (word, pos_tag, chunk_tag) in zip(words, pos_tags, chunk_tags): + if chunk_tag == 'O': + state, chunk_type = 'O', '' + else: + (state, chunk_type) = chunk_tag.split('-') + # If it's a chunk we don't care about, treat it as O. + if chunk_types is not None and chunk_type not in chunk_types: + state = 'O' + # Treat a mismatching I like a B. + if state == 'I' and chunk_type != stack[-1].label(): + state = 'B' + # For B or I: close any open chunks + if state in 'BO' and len(stack) == 2: + stack.pop() + # For B: start a new chunk. + if state == 'B': + new_chunk = Tree(chunk_type, []) + stack[-1].append(new_chunk) + stack.append(new_chunk) + # Add the word token. + stack[-1].append((word, pos_tag)) + + return stack[0] + + def _get_parsed_sent(self, grid, pos_in_tree, tagset=None): + words = self._get_column(grid, self._colmap['words']) + pos_tags = self._get_column(grid, self._colmap['pos']) + if tagset and tagset != self._tagset: + pos_tags = [map_tag(self._tagset, tagset, t) for t in pos_tags] + parse_tags = self._get_column(grid, self._colmap['tree']) + + treestr = '' + for (word, pos_tag, parse_tag) in zip(words, pos_tags, parse_tags): + if word == '(': + word = '-LRB-' + if word == ')': + word = '-RRB-' + if pos_tag == '(': + pos_tag = '-LRB-' + if pos_tag == ')': + pos_tag = '-RRB-' + (left, right) = parse_tag.split('*') + right = right.count(')') * ')' # only keep ')'. + treestr += '%s (%s %s) %s' % (left, pos_tag, word, right) + try: + tree = self._tree_class.fromstring(treestr) + except (ValueError, IndexError): + tree = self._tree_class.fromstring('(%s %s)' % (self._root_label, treestr)) + + if not pos_in_tree: + for subtree in tree.subtrees(): + for i, child in enumerate(subtree): + if ( + isinstance(child, Tree) + and len(child) == 1 + and isinstance(child[0], string_types) + ): + subtree[i] = (child[0], child.label()) + + return tree + + def _get_srl_spans(self, grid): + """ + list of list of (start, end), tag) tuples + """ + if self._srl_includes_roleset: + predicates = self._get_column(grid, self._colmap['srl'] + 1) + start_col = self._colmap['srl'] + 2 + else: + predicates = self._get_column(grid, self._colmap['srl']) + start_col = self._colmap['srl'] + 1 + + # Count how many predicates there are. This tells us how many + # columns to expect for SRL data. + num_preds = len([p for p in predicates if p != '-']) + + spanlists = [] + for i in range(num_preds): + col = self._get_column(grid, start_col + i) + spanlist = [] + stack = [] + for wordnum, srl_tag in enumerate(col): + (left, right) = srl_tag.split('*') + for tag in left.split('('): + if tag: + stack.append((tag, wordnum)) + for i in range(right.count(')')): + (tag, start) = stack.pop() + spanlist.append(((start, wordnum + 1), tag)) + spanlists.append(spanlist) + + return spanlists + + def _get_srl_instances(self, grid, pos_in_tree): + tree = self._get_parsed_sent(grid, pos_in_tree) + spanlists = self._get_srl_spans(grid) + if self._srl_includes_roleset: + predicates = self._get_column(grid, self._colmap['srl'] + 1) + rolesets = self._get_column(grid, self._colmap['srl']) + else: + predicates = self._get_column(grid, self._colmap['srl']) + rolesets = [None] * len(predicates) + + instances = ConllSRLInstanceList(tree) + for wordnum, predicate in enumerate(predicates): + if predicate == '-': + continue + # Decide which spanlist to use. Don't assume that they're + # sorted in the same order as the predicates (even though + # they usually are). + for spanlist in spanlists: + for (start, end), tag in spanlist: + if wordnum in range(start, end) and tag in ('V', 'C-V'): + break + else: + continue + break + else: + raise ValueError('No srl column found for %r' % predicate) + instances.append( + ConllSRLInstance(tree, wordnum, predicate, rolesets[wordnum], spanlist) + ) + + return instances + + # ///////////////////////////////////////////////////////////////// + # Helper Methods + # ///////////////////////////////////////////////////////////////// + + def _require(self, *columntypes): + for columntype in columntypes: + if columntype not in self._colmap: + raise ValueError( + 'This corpus does not contain a %s ' 'column.' % columntype + ) + + @staticmethod + def _get_column(grid, column_index): + return [grid[i][column_index] for i in range(len(grid))] + + +@compat.python_2_unicode_compatible +class ConllSRLInstance(object): + """ + An SRL instance from a CoNLL corpus, which identifies and + providing labels for the arguments of a single verb. + """ + + # [xx] add inst.core_arguments, inst.argm_arguments? + + def __init__(self, tree, verb_head, verb_stem, roleset, tagged_spans): + self.verb = [] + """A list of the word indices of the words that compose the + verb whose arguments are identified by this instance. + This will contain multiple word indices when multi-word + verbs are used (e.g. 'turn on').""" + + self.verb_head = verb_head + """The word index of the head word of the verb whose arguments + are identified by this instance. E.g., for a sentence that + uses the verb 'turn on,' ``verb_head`` will be the word index + of the word 'turn'.""" + + self.verb_stem = verb_stem + + self.roleset = roleset + + self.arguments = [] + """A list of ``(argspan, argid)`` tuples, specifying the location + and type for each of the arguments identified by this + instance. ``argspan`` is a tuple ``start, end``, indicating + that the argument consists of the ``words[start:end]``.""" + + self.tagged_spans = tagged_spans + """A list of ``(span, id)`` tuples, specifying the location and + type for each of the arguments, as well as the verb pieces, + that make up this instance.""" + + self.tree = tree + """The parse tree for the sentence containing this instance.""" + + self.words = tree.leaves() + """A list of the words in the sentence containing this + instance.""" + + # Fill in the self.verb and self.arguments values. + for (start, end), tag in tagged_spans: + if tag in ('V', 'C-V'): + self.verb += list(range(start, end)) + else: + self.arguments.append(((start, end), tag)) + + def __repr__(self): + # Originally, its: + ##plural = 's' if len(self.arguments) != 1 else '' + plural = 's' if len(self.arguments) != 1 else '' + return '' % ( + (self.verb_stem, len(self.arguments), plural) + ) + + def pprint(self): + verbstr = ' '.join(self.words[i][0] for i in self.verb) + hdr = 'SRL for %r (stem=%r):\n' % (verbstr, self.verb_stem) + s = '' + for i, word in enumerate(self.words): + if isinstance(word, tuple): + word = word[0] + for (start, end), argid in self.arguments: + if i == start: + s += '[%s ' % argid + if i == end: + s += '] ' + if i in self.verb: + word = '<<%s>>' % word + s += word + ' ' + return hdr + textwrap.fill( + s.replace(' ]', ']'), initial_indent=' ', subsequent_indent=' ' + ) + + +@compat.python_2_unicode_compatible +class ConllSRLInstanceList(list): + """ + Set of instances for a single sentence + """ + + def __init__(self, tree, instances=()): + self.tree = tree + list.__init__(self, instances) + + def __str__(self): + return self.pprint() + + def pprint(self, include_tree=False): + # Sanity check: trees should be the same + for inst in self: + if inst.tree != self.tree: + raise ValueError('Tree mismatch!') + + # If desired, add trees: + if include_tree: + words = self.tree.leaves() + pos = [None] * len(words) + synt = ['*'] * len(words) + self._tree2conll(self.tree, 0, words, pos, synt) + + s = '' + for i in range(len(words)): + # optional tree columns + if include_tree: + s += '%-20s ' % words[i] + s += '%-8s ' % pos[i] + s += '%15s*%-8s ' % tuple(synt[i].split('*')) + + # verb head column + for inst in self: + if i == inst.verb_head: + s += '%-20s ' % inst.verb_stem + break + else: + s += '%-20s ' % '-' + # Remaining columns: self + for inst in self: + argstr = '*' + for (start, end), argid in inst.tagged_spans: + if i == start: + argstr = '(%s%s' % (argid, argstr) + if i == (end - 1): + argstr += ')' + s += '%-12s ' % argstr + s += '\n' + return s + + def _tree2conll(self, tree, wordnum, words, pos, synt): + assert isinstance(tree, Tree) + if len(tree) == 1 and isinstance(tree[0], string_types): + pos[wordnum] = tree.label() + assert words[wordnum] == tree[0] + return wordnum + 1 + elif len(tree) == 1 and isinstance(tree[0], tuple): + assert len(tree[0]) == 2 + pos[wordnum], pos[wordnum] = tree[0] + return wordnum + 1 + else: + synt[wordnum] = '(%s%s' % (tree.label(), synt[wordnum]) + for child in tree: + wordnum = self._tree2conll(child, wordnum, words, pos, synt) + synt[wordnum - 1] += ')' + return wordnum + + +class ConllChunkCorpusReader(ConllCorpusReader): + """ + A ConllCorpusReader whose data file contains three columns: words, + pos, and chunk. + """ + + def __init__( + self, root, fileids, chunk_types, encoding='utf8', tagset=None, separator=None + ): + ConllCorpusReader.__init__( + self, + root, + fileids, + ('words', 'pos', 'chunk'), + chunk_types=chunk_types, + encoding=encoding, + tagset=tagset, + separator=separator, + ) diff --git a/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/crubadan.py b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/crubadan.py new file mode 100644 index 0000000..8470b06 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/crubadan.py @@ -0,0 +1,119 @@ +# -*- coding: utf-8 -*- +# Natural Language Toolkit: An Crubadan N-grams Reader +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Avital Pekker +# +# URL: +# For license information, see LICENSE.TXT + +""" +An NLTK interface for the n-gram statistics gathered from +the corpora for each language using An Crubadan. + +There are multiple potential applications for the data but +this reader was created with the goal of using it in the +context of language identification. + +For details about An Crubadan, this data, and its potential uses, see: +http://borel.slu.edu/crubadan/index.html +""" + +from __future__ import print_function, unicode_literals + +import re +from os import path + +from nltk.compat import PY3 +from nltk.corpus.reader import CorpusReader +from nltk.probability import FreqDist +from nltk.data import ZipFilePathPointer + + +class CrubadanCorpusReader(CorpusReader): + """ + A corpus reader used to access language An Crubadan n-gram files. + """ + + _LANG_MAPPER_FILE = 'table.txt' + _all_lang_freq = {} + + def __init__(self, root, fileids, encoding='utf8', tagset=None): + super(CrubadanCorpusReader, self).__init__(root, fileids, encoding='utf8') + self._lang_mapping_data = [] + self._load_lang_mapping_data() + + def lang_freq(self, lang): + ''' Return n-gram FreqDist for a specific language + given ISO 639-3 language code ''' + + if lang not in self._all_lang_freq: + self._all_lang_freq[lang] = self._load_lang_ngrams(lang) + + return self._all_lang_freq[lang] + + def langs(self): + ''' Return a list of supported languages as ISO 639-3 codes ''' + return [row[1] for row in self._lang_mapping_data] + + def iso_to_crubadan(self, lang): + ''' Return internal Crubadan code based on ISO 639-3 code ''' + for i in self._lang_mapping_data: + if i[1].lower() == lang.lower(): + return i[0] + + def crubadan_to_iso(self, lang): + ''' Return ISO 639-3 code given internal Crubadan code ''' + for i in self._lang_mapping_data: + if i[0].lower() == lang.lower(): + return i[1] + + def _load_lang_mapping_data(self): + ''' Load language mappings between codes and description from table.txt ''' + if isinstance(self.root, ZipFilePathPointer): + raise RuntimeError( + "Please install the 'crubadan' corpus first, use nltk.download()" + ) + + mapper_file = path.join(self.root, self._LANG_MAPPER_FILE) + if self._LANG_MAPPER_FILE not in self.fileids(): + raise RuntimeError("Could not find language mapper file: " + mapper_file) + + if PY3: + raw = open(mapper_file, 'r', encoding='utf-8').read().strip() + else: + raw = open(mapper_file, 'rU').read().decode('utf-8').strip() + + self._lang_mapping_data = [row.split('\t') for row in raw.split('\n')] + + def _load_lang_ngrams(self, lang): + ''' Load single n-gram language file given the ISO 639-3 language code + and return its FreqDist ''' + + if lang not in self.langs(): + raise RuntimeError("Unsupported language.") + + crubadan_code = self.iso_to_crubadan(lang) + ngram_file = path.join(self.root, crubadan_code + '-3grams.txt') + + if not path.isfile(ngram_file): + raise RuntimeError("No N-gram file found for requested language.") + + counts = FreqDist() + if PY3: + f = open(ngram_file, 'r', encoding='utf-8') + else: + f = open(ngram_file, 'rU') + + for line in f: + if PY3: + data = line.split(' ') + else: + data = line.decode('utf8').split(' ') + + ngram = data[1].strip('\n') + freq = int(data[0]) + + counts[ngram] = freq + + return counts diff --git a/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/dependency.py b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/dependency.py new file mode 100644 index 0000000..49e7423 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/dependency.py @@ -0,0 +1,134 @@ +# Natural Language Toolkit: Dependency Corpus Reader +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Kepa Sarasola +# Iker Manterola +# +# URL: +# For license information, see LICENSE.TXT + +import codecs + +from nltk.parse import DependencyGraph +from nltk.tokenize import * + +from nltk.corpus.reader.util import * +from nltk.corpus.reader.api import * + + +class DependencyCorpusReader(SyntaxCorpusReader): + def __init__( + self, + root, + fileids, + encoding='utf8', + word_tokenizer=TabTokenizer(), + sent_tokenizer=RegexpTokenizer('\n', gaps=True), + para_block_reader=read_blankline_block, + ): + # FIXME: Why is it inheritting from SyntaxCorpusReader but initializing + # from CorpusReader? + CorpusReader.__init__(self, root, fileids, encoding) + + ######################################################### + + def raw(self, fileids=None): + """ + :return: the given file(s) as a single string. + :rtype: str + """ + result = [] + for fileid, encoding in self.abspaths(fileids, include_encoding=True): + if isinstance(fileid, PathPointer): + result.append(fileid.open(encoding=encoding).read()) + else: + with codecs.open(fileid, "r", encoding) as fp: + result.append(fp.read()) + return concat(result) + + def words(self, fileids=None): + return concat( + [ + DependencyCorpusView(fileid, False, False, False, encoding=enc) + for fileid, enc in self.abspaths(fileids, include_encoding=True) + ] + ) + + def tagged_words(self, fileids=None): + return concat( + [ + DependencyCorpusView(fileid, True, False, False, encoding=enc) + for fileid, enc in self.abspaths(fileids, include_encoding=True) + ] + ) + + def sents(self, fileids=None): + return concat( + [ + DependencyCorpusView(fileid, False, True, False, encoding=enc) + for fileid, enc in self.abspaths(fileids, include_encoding=True) + ] + ) + + def tagged_sents(self, fileids=None): + return concat( + [ + DependencyCorpusView(fileid, True, True, False, encoding=enc) + for fileid, enc in self.abspaths(fileids, include_encoding=True) + ] + ) + + def parsed_sents(self, fileids=None): + sents = concat( + [ + DependencyCorpusView(fileid, False, True, True, encoding=enc) + for fileid, enc in self.abspaths(fileids, include_encoding=True) + ] + ) + return [DependencyGraph(sent) for sent in sents] + + +class DependencyCorpusView(StreamBackedCorpusView): + _DOCSTART = '-DOCSTART- -DOCSTART- O\n' # dokumentu hasiera definitzen da + + def __init__( + self, + corpus_file, + tagged, + group_by_sent, + dependencies, + chunk_types=None, + encoding='utf8', + ): + self._tagged = tagged + self._dependencies = dependencies + self._group_by_sent = group_by_sent + self._chunk_types = chunk_types + StreamBackedCorpusView.__init__(self, corpus_file, encoding=encoding) + + def read_block(self, stream): + # Read the next sentence. + sent = read_blankline_block(stream)[0].strip() + # Strip off the docstart marker, if present. + if sent.startswith(self._DOCSTART): + sent = sent[len(self._DOCSTART) :].lstrip() + + # extract word and tag from any of the formats + if not self._dependencies: + lines = [line.split('\t') for line in sent.split('\n')] + if len(lines[0]) == 3 or len(lines[0]) == 4: + sent = [(line[0], line[1]) for line in lines] + elif len(lines[0]) == 10: + sent = [(line[1], line[4]) for line in lines] + else: + raise ValueError('Unexpected number of fields in dependency tree file') + + # discard tags if they weren't requested + if not self._tagged: + sent = [word for (word, tag) in sent] + + # Return the result. + if self._group_by_sent: + return [sent] + else: + return list(sent) diff --git a/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/framenet.py b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/framenet.py new file mode 100644 index 0000000..9705f4a --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/framenet.py @@ -0,0 +1,3459 @@ +# Natural Language Toolkit: Framenet Corpus Reader +# +# Copyright (C) 2001-2019 NLTK Project +# Authors: Chuck Wooters , +# Nathan Schneider +# URL: +# For license information, see LICENSE.TXT + + +""" +Corpus reader for the FrameNet 1.7 lexicon and corpus. +""" +from __future__ import print_function, unicode_literals + +import os +import re +import textwrap +import itertools +import sys +import types +from collections import defaultdict, OrderedDict +from operator import itemgetter + +from six import string_types, text_type +from six.moves import zip_longest +from pprint import pprint + +from nltk.corpus.reader import XMLCorpusReader, XMLCorpusView +from nltk.compat import python_2_unicode_compatible +from nltk.util import LazyConcatenation, LazyMap, LazyIteratorList + +__docformat__ = 'epytext en' + + +def mimic_wrap(lines, wrap_at=65, **kwargs): + """ + Wrap the first of 'lines' with textwrap and the remaining lines at exactly the same + positions as the first. + """ + l0 = textwrap.fill(lines[0], wrap_at, drop_whitespace=False).split('\n') + yield l0 + + def _(line): + il0 = 0 + while line and il0 < len(l0) - 1: + yield line[: len(l0[il0])] + line = line[len(l0[il0]) :] + il0 += 1 + if line: # Remaining stuff on this line past the end of the mimicked line. + # So just textwrap this line. + for ln in textwrap.fill(line, wrap_at, drop_whitespace=False).split('\n'): + yield ln + + for l in lines[1:]: + yield list(_(l)) + + +def _pretty_longstring(defstr, prefix='', wrap_at=65): + + """ + Helper function for pretty-printing a long string. + + :param defstr: The string to be printed. + :type defstr: str + :return: A nicely formated string representation of the long string. + :rtype: str + """ + + outstr = "" + for line in textwrap.fill(defstr, wrap_at).split('\n'): + outstr += prefix + line + '\n' + return outstr + + +def _pretty_any(obj): + + """ + Helper function for pretty-printing any AttrDict object. + + :param obj: The obj to be printed. + :type obj: AttrDict + :return: A nicely formated string representation of the AttrDict object. + :rtype: str + """ + + outstr = "" + for k in obj: + if isinstance(obj[k], string_types) and len(obj[k]) > 65: + outstr += "[{0}]\n".format(k) + outstr += "{0}".format(_pretty_longstring(obj[k], prefix=' ')) + outstr += '\n' + else: + outstr += "[{0}] {1}\n".format(k, obj[k]) + + return outstr + + +def _pretty_semtype(st): + + """ + Helper function for pretty-printing a semantic type. + + :param st: The semantic type to be printed. + :type st: AttrDict + :return: A nicely formated string representation of the semantic type. + :rtype: str + """ + + semkeys = st.keys() + if len(semkeys) == 1: + return "" + + outstr = "" + outstr += "semantic type ({0.ID}): {0.name}\n".format(st) + if 'abbrev' in semkeys: + outstr += "[abbrev] {0}\n".format(st.abbrev) + if 'definition' in semkeys: + outstr += "[definition]\n" + outstr += _pretty_longstring(st.definition, ' ') + outstr += "[rootType] {0}({1})\n".format(st.rootType.name, st.rootType.ID) + if st.superType is None: + outstr += "[superType] \n" + else: + outstr += "[superType] {0}({1})\n".format(st.superType.name, st.superType.ID) + outstr += "[subTypes] {0} subtypes\n".format(len(st.subTypes)) + outstr += ( + " " + + ", ".join('{0}({1})'.format(x.name, x.ID) for x in st.subTypes) + + '\n' * (len(st.subTypes) > 0) + ) + return outstr + + +def _pretty_frame_relation_type(freltyp): + + """ + Helper function for pretty-printing a frame relation type. + + :param freltyp: The frame relation type to be printed. + :type freltyp: AttrDict + :return: A nicely formated string representation of the frame relation type. + :rtype: str + """ + outstr = " {0.subFrameName}>".format( + freltyp + ) + return outstr + + +def _pretty_frame_relation(frel): + + """ + Helper function for pretty-printing a frame relation. + + :param frel: The frame relation to be printed. + :type frel: AttrDict + :return: A nicely formated string representation of the frame relation. + :rtype: str + """ + outstr = "<{0.type.superFrameName}={0.superFrameName} -- {0.type.name} -> {0.type.subFrameName}={0.subFrameName}>".format( + frel + ) + return outstr + + +def _pretty_fe_relation(ferel): + + """ + Helper function for pretty-printing an FE relation. + + :param ferel: The FE relation to be printed. + :type ferel: AttrDict + :return: A nicely formated string representation of the FE relation. + :rtype: str + """ + outstr = "<{0.type.superFrameName}={0.frameRelation.superFrameName}.{0.superFEName} -- {0.type.name} -> {0.type.subFrameName}={0.frameRelation.subFrameName}.{0.subFEName}>".format( + ferel + ) + return outstr + + +def _pretty_lu(lu): + + """ + Helper function for pretty-printing a lexical unit. + + :param lu: The lu to be printed. + :type lu: AttrDict + :return: A nicely formated string representation of the lexical unit. + :rtype: str + """ + + lukeys = lu.keys() + outstr = "" + outstr += "lexical unit ({0.ID}): {0.name}\n\n".format(lu) + if 'definition' in lukeys: + outstr += "[definition]\n" + outstr += _pretty_longstring(lu.definition, ' ') + if 'frame' in lukeys: + outstr += "\n[frame] {0}({1})\n".format(lu.frame.name, lu.frame.ID) + if 'incorporatedFE' in lukeys: + outstr += "\n[incorporatedFE] {0}\n".format(lu.incorporatedFE) + if 'POS' in lukeys: + outstr += "\n[POS] {0}\n".format(lu.POS) + if 'status' in lukeys: + outstr += "\n[status] {0}\n".format(lu.status) + if 'totalAnnotated' in lukeys: + outstr += "\n[totalAnnotated] {0} annotated examples\n".format( + lu.totalAnnotated + ) + if 'lexemes' in lukeys: + outstr += "\n[lexemes] {0}\n".format( + ' '.join('{0}/{1}'.format(lex.name, lex.POS) for lex in lu.lexemes) + ) + if 'semTypes' in lukeys: + outstr += "\n[semTypes] {0} semantic types\n".format(len(lu.semTypes)) + outstr += ( + " " * (len(lu.semTypes) > 0) + + ", ".join('{0}({1})'.format(x.name, x.ID) for x in lu.semTypes) + + '\n' * (len(lu.semTypes) > 0) + ) + if 'URL' in lukeys: + outstr += "\n[URL] {0}\n".format(lu.URL) + if 'subCorpus' in lukeys: + subc = [x.name for x in lu.subCorpus] + outstr += "\n[subCorpus] {0} subcorpora\n".format(len(lu.subCorpus)) + for line in textwrap.fill(", ".join(sorted(subc)), 60).split('\n'): + outstr += " {0}\n".format(line) + if 'exemplars' in lukeys: + outstr += "\n[exemplars] {0} sentences across all subcorpora\n".format( + len(lu.exemplars) + ) + + return outstr + + +def _pretty_exemplars(exemplars, lu): + """ + Helper function for pretty-printing a list of exemplar sentences for a lexical unit. + + :param sent: The list of exemplar sentences to be printed. + :type sent: list(AttrDict) + :return: An index of the text of the exemplar sentences. + :rtype: str + """ + + outstr = "" + outstr += "exemplar sentences for {0.name} in {0.frame.name}:\n\n".format(lu) + for i, sent in enumerate(exemplars): + outstr += "[{0}] {1}\n".format(i, sent.text) + outstr += "\n" + return outstr + + +def _pretty_fulltext_sentences(sents): + """ + Helper function for pretty-printing a list of annotated sentences for a full-text document. + + :param sent: The list of sentences to be printed. + :type sent: list(AttrDict) + :return: An index of the text of the sentences. + :rtype: str + """ + + outstr = "" + outstr += "full-text document ({0.ID}) {0.name}:\n\n".format(sents) + outstr += "[corpid] {0.corpid}\n[corpname] {0.corpname}\n[description] {0.description}\n[URL] {0.URL}\n\n".format( + sents + ) + outstr += "[sentence]\n".format(sents) + for i, sent in enumerate(sents.sentence): + outstr += "[{0}] {1}\n".format(i, sent.text) + outstr += "\n" + return outstr + + +def _pretty_fulltext_sentence(sent): + """ + Helper function for pretty-printing an annotated sentence from a full-text document. + + :param sent: The sentence to be printed. + :type sent: list(AttrDict) + :return: The text of the sentence with annotation set indices on frame targets. + :rtype: str + """ + + outstr = "" + outstr += "full-text sentence ({0.ID}) in {1}:\n\n".format( + sent, sent.doc.get('name', sent.doc.description) + ) + outstr += "\n[POS] {0} tags\n".format(len(sent.POS)) + outstr += "\n[POS_tagset] {0}\n\n".format(sent.POS_tagset) + outstr += "[text] + [annotationSet]\n\n" + outstr += sent._ascii() # -> _annotation_ascii() + outstr += "\n" + return outstr + + +def _pretty_pos(aset): + """ + Helper function for pretty-printing a sentence with its POS tags. + + :param aset: The POS annotation set of the sentence to be printed. + :type sent: list(AttrDict) + :return: The text of the sentence and its POS tags. + :rtype: str + """ + + outstr = "" + outstr += "POS annotation set ({0.ID}) {0.POS_tagset} in sentence {0.sent.ID}:\n\n".format( + aset + ) + + # list the target spans and their associated aset index + overt = sorted(aset.POS) + + sent = aset.sent + s0 = sent.text + s1 = '' + s2 = '' + i = 0 + adjust = 0 + for j, k, lbl in overt: + assert j >= i, ('Overlapping targets?', (j, k, lbl)) + s1 += ' ' * (j - i) + '-' * (k - j) + if len(lbl) > (k - j): + # add space in the sentence to make room for the annotation index + amt = len(lbl) - (k - j) + s0 = ( + s0[: k + adjust] + '~' * amt + s0[k + adjust :] + ) # '~' to prevent line wrapping + s1 = s1[: k + adjust] + ' ' * amt + s1[k + adjust :] + adjust += amt + s2 += ' ' * (j - i) + lbl.ljust(k - j) + i = k + + long_lines = [s0, s1, s2] + + outstr += '\n\n'.join( + map('\n'.join, zip_longest(*mimic_wrap(long_lines), fillvalue=' ')) + ).replace('~', ' ') + outstr += "\n" + return outstr + + +def _pretty_annotation(sent, aset_level=False): + """ + Helper function for pretty-printing an exemplar sentence for a lexical unit. + + :param sent: An annotation set or exemplar sentence to be printed. + :param aset_level: If True, 'sent' is actually an annotation set within a sentence. + :type sent: AttrDict + :return: A nicely formated string representation of the exemplar sentence + with its target, frame, and FE annotations. + :rtype: str + """ + + sentkeys = sent.keys() + outstr = "annotation set" if aset_level else "exemplar sentence" + outstr += " ({0.ID}):\n".format(sent) + if aset_level: # TODO: any UNANN exemplars? + outstr += "\n[status] {0}\n".format(sent.status) + for k in ('corpID', 'docID', 'paragNo', 'sentNo', 'aPos'): + if k in sentkeys: + outstr += "[{0}] {1}\n".format(k, sent[k]) + outstr += ( + "\n[LU] ({0.ID}) {0.name} in {0.frame.name}\n".format(sent.LU) + if sent.LU + else '\n[LU] Not found!' + ) + outstr += "\n[frame] ({0.ID}) {0.name}\n".format( + sent.frame + ) # redundant with above, but .frame is convenient + if not aset_level: + outstr += "\n[annotationSet] {0} annotation sets\n".format( + len(sent.annotationSet) + ) + outstr += "\n[POS] {0} tags\n".format(len(sent.POS)) + outstr += "\n[POS_tagset] {0}\n".format(sent.POS_tagset) + outstr += "\n[GF] {0} relation{1}\n".format( + len(sent.GF), "s" if len(sent.GF) != 1 else "" + ) + outstr += "\n[PT] {0} phrase{1}\n".format( + len(sent.PT), "s" if len(sent.PT) != 1 else "" + ) + """ + Special Layers + -------------- + + The 'NER' layer contains, for some of the data, named entity labels. + + The 'WSL' (word status layer) contains, for some of the data, + spans which should not in principle be considered targets (NT). + + The 'Other' layer records relative clause constructions (Rel=relativizer, Ant=antecedent), + pleonastic 'it' (Null), and existential 'there' (Exist). + On occasion they are duplicated by accident (e.g., annotationSet 1467275 in lu6700.xml). + + The 'Sent' layer appears to contain labels that the annotator has flagged the + sentence with for their convenience: values include + 'sense1', 'sense2', 'sense3', etc.; + 'Blend', 'Canonical', 'Idiom', 'Metaphor', 'Special-Sent', + 'keepS', 'deleteS', 'reexamine' + (sometimes they are duplicated for no apparent reason). + + The POS-specific layers may contain the following kinds of spans: + Asp (aspectual particle), Non-Asp (non-aspectual particle), + Cop (copula), Supp (support), Ctrlr (controller), + Gov (governor), X. Gov and X always cooccur. + + >>> from nltk.corpus import framenet as fn + >>> def f(luRE, lyr, ignore=set()): + ... for i,ex in enumerate(fn.exemplars(luRE)): + ... if lyr in ex and ex[lyr] and set(zip(*ex[lyr])[2]) - ignore: + ... print(i,ex[lyr]) + + - Verb: Asp, Non-Asp + - Noun: Cop, Supp, Ctrlr, Gov, X + - Adj: Cop, Supp, Ctrlr, Gov, X + - Prep: Cop, Supp, Ctrlr + - Adv: Ctrlr + - Scon: (none) + - Art: (none) + """ + for lyr in ('NER', 'WSL', 'Other', 'Sent'): + if lyr in sent and sent[lyr]: + outstr += "\n[{0}] {1} entr{2}\n".format( + lyr, len(sent[lyr]), "ies" if len(sent[lyr]) != 1 else "y" + ) + outstr += "\n[text] + [Target] + [FE]" + # POS-specific layers: syntactically important words that are neither the target + # nor the FEs. Include these along with the first FE layer but with '^' underlining. + for lyr in ('Verb', 'Noun', 'Adj', 'Adv', 'Prep', 'Scon', 'Art'): + if lyr in sent and sent[lyr]: + outstr += " + [{0}]".format(lyr) + if 'FE2' in sentkeys: + outstr += " + [FE2]" + if 'FE3' in sentkeys: + outstr += " + [FE3]" + outstr += "\n\n" + outstr += sent._ascii() # -> _annotation_ascii() + outstr += "\n" + + return outstr + + +def _annotation_ascii(sent): + ''' + Given a sentence or FE annotation set, construct the width-limited string showing + an ASCII visualization of the sentence's annotations, calling either + _annotation_ascii_frames() or _annotation_ascii_FEs() as appropriate. + This will be attached as a method to appropriate AttrDict instances + and called in the full pretty-printing of the instance. + ''' + if sent._type == 'fulltext_sentence' or ( + 'annotationSet' in sent and len(sent.annotationSet) > 2 + ): + # a full-text sentence OR sentence with multiple targets. + # (multiple targets = >2 annotation sets, because the first annotation set is POS.) + return _annotation_ascii_frames(sent) + else: # an FE annotation set, or an LU sentence with 1 target + return _annotation_ascii_FEs(sent) + + +def _annotation_ascii_frames(sent): + ''' + ASCII string rendering of the sentence along with its targets and frame names. + Called for all full-text sentences, as well as the few LU sentences with multiple + targets (e.g., fn.lu(6412).exemplars[82] has two want.v targets). + Line-wrapped to limit the display width. + ''' + # list the target spans and their associated aset index + overt = [] + for a, aset in enumerate(sent.annotationSet[1:]): + for j, k in aset.Target: + indexS = "[{0}]".format(a + 1) + if aset.status == 'UNANN' or aset.LU.status == 'Problem': + indexS += " " + if aset.status == 'UNANN': + indexS += ( + "!" + ) # warning indicator that there is a frame annotation but no FE annotation + if aset.LU.status == 'Problem': + indexS += ( + "?" + ) # warning indicator that there is a missing LU definition (because the LU has Problem status) + overt.append((j, k, aset.LU.frame.name, indexS)) + overt = sorted(overt) + + duplicates = set() + for o, (j, k, fname, asetIndex) in enumerate(overt): + if o > 0 and j <= overt[o - 1][1]: + # multiple annotation sets on the same target + # (e.g. due to a coordination construction or multiple annotators) + if ( + overt[o - 1][:2] == (j, k) and overt[o - 1][2] == fname + ): # same target, same frame + # splice indices together + combinedIndex = ( + overt[o - 1][3] + asetIndex + ) # e.g., '[1][2]', '[1]! [2]' + combinedIndex = combinedIndex.replace(' !', '! ').replace(' ?', '? ') + overt[o - 1] = overt[o - 1][:3] + (combinedIndex,) + duplicates.add(o) + else: # different frames, same or overlapping targets + s = sent.text + for j, k, fname, asetIndex in overt: + s += '\n' + asetIndex + ' ' + sent.text[j:k] + ' :: ' + fname + s += '\n(Unable to display sentence with targets marked inline due to overlap)' + return s + for o in reversed(sorted(duplicates)): + del overt[o] + + s0 = sent.text + s1 = '' + s11 = '' + s2 = '' + i = 0 + adjust = 0 + fAbbrevs = OrderedDict() + for j, k, fname, asetIndex in overt: + if not j >= i: + assert j >= i, ( + 'Overlapping targets?' + + ( + ' UNANN' + if any(aset.status == 'UNANN' for aset in sent.annotationSet[1:]) + else '' + ), + (j, k, asetIndex), + ) + s1 += ' ' * (j - i) + '*' * (k - j) + short = fname[: k - j] + if (k - j) < len(fname): + r = 0 + while short in fAbbrevs: + if fAbbrevs[short] == fname: + break + r += 1 + short = fname[: k - j - 1] + str(r) + else: # short not in fAbbrevs + fAbbrevs[short] = fname + s11 += ' ' * (j - i) + short.ljust(k - j) + if len(asetIndex) > (k - j): + # add space in the sentence to make room for the annotation index + amt = len(asetIndex) - (k - j) + s0 = ( + s0[: k + adjust] + '~' * amt + s0[k + adjust :] + ) # '~' to prevent line wrapping + s1 = s1[: k + adjust] + ' ' * amt + s1[k + adjust :] + s11 = s11[: k + adjust] + ' ' * amt + s11[k + adjust :] + adjust += amt + s2 += ' ' * (j - i) + asetIndex.ljust(k - j) + i = k + + long_lines = [s0, s1, s11, s2] + + outstr = '\n\n'.join( + map('\n'.join, zip_longest(*mimic_wrap(long_lines), fillvalue=' ')) + ).replace('~', ' ') + outstr += '\n' + if fAbbrevs: + outstr += ' (' + ', '.join('='.join(pair) for pair in fAbbrevs.items()) + ')' + assert len(fAbbrevs) == len(dict(fAbbrevs)), 'Abbreviation clash' + + return outstr + + +def _annotation_ascii_FE_layer(overt, ni, feAbbrevs): + '''Helper for _annotation_ascii_FEs().''' + s1 = '' + s2 = '' + i = 0 + for j, k, fename in overt: + s1 += ' ' * (j - i) + ('^' if fename.islower() else '-') * (k - j) + short = fename[: k - j] + if len(fename) > len(short): + r = 0 + while short in feAbbrevs: + if feAbbrevs[short] == fename: + break + r += 1 + short = fename[: k - j - 1] + str(r) + else: # short not in feAbbrevs + feAbbrevs[short] = fename + s2 += ' ' * (j - i) + short.ljust(k - j) + i = k + + sNI = '' + if ni: + sNI += ' [' + ', '.join(':'.join(x) for x in sorted(ni.items())) + ']' + return [s1, s2, sNI] + + +def _annotation_ascii_FEs(sent): + ''' + ASCII string rendering of the sentence along with a single target and its FEs. + Secondary and tertiary FE layers are included if present. + 'sent' can be an FE annotation set or an LU sentence with a single target. + Line-wrapped to limit the display width. + ''' + feAbbrevs = OrderedDict() + posspec = [] # POS-specific layer spans (e.g., Supp[ort], Cop[ula]) + posspec_separate = False + for lyr in ('Verb', 'Noun', 'Adj', 'Adv', 'Prep', 'Scon', 'Art'): + if lyr in sent and sent[lyr]: + for a, b, lbl in sent[lyr]: + if ( + lbl == 'X' + ): # skip this, which covers an entire phrase typically containing the target and all its FEs + # (but do display the Gov) + continue + if any(1 for x, y, felbl in sent.FE[0] if x <= a < y or a <= x < b): + # overlap between one of the POS-specific layers and first FE layer + posspec_separate = ( + True + ) # show POS-specific layers on a separate line + posspec.append( + (a, b, lbl.lower().replace('-', '')) + ) # lowercase Cop=>cop, Non-Asp=>nonasp, etc. to distinguish from FE names + if posspec_separate: + POSSPEC = _annotation_ascii_FE_layer(posspec, {}, feAbbrevs) + FE1 = _annotation_ascii_FE_layer( + sorted(sent.FE[0] + (posspec if not posspec_separate else [])), + sent.FE[1], + feAbbrevs, + ) + FE2 = FE3 = None + if 'FE2' in sent: + FE2 = _annotation_ascii_FE_layer(sent.FE2[0], sent.FE2[1], feAbbrevs) + if 'FE3' in sent: + FE3 = _annotation_ascii_FE_layer(sent.FE3[0], sent.FE3[1], feAbbrevs) + + for i, j in sent.Target: + FE1span, FE1name, FE1exp = FE1 + if len(FE1span) < j: + FE1span += ' ' * (j - len(FE1span)) + if len(FE1name) < j: + FE1name += ' ' * (j - len(FE1name)) + FE1[1] = FE1name + FE1[0] = ( + FE1span[:i] + FE1span[i:j].replace(' ', '*').replace('-', '=') + FE1span[j:] + ) + long_lines = [sent.text] + if posspec_separate: + long_lines.extend(POSSPEC[:2]) + long_lines.extend([FE1[0], FE1[1] + FE1[2]]) # lines with no length limit + if FE2: + long_lines.extend([FE2[0], FE2[1] + FE2[2]]) + if FE3: + long_lines.extend([FE3[0], FE3[1] + FE3[2]]) + long_lines.append('') + outstr = '\n'.join( + map('\n'.join, zip_longest(*mimic_wrap(long_lines), fillvalue=' ')) + ) + if feAbbrevs: + outstr += '(' + ', '.join('='.join(pair) for pair in feAbbrevs.items()) + ')' + assert len(feAbbrevs) == len(dict(feAbbrevs)), 'Abbreviation clash' + outstr += "\n" + + return outstr + + +def _pretty_fe(fe): + + """ + Helper function for pretty-printing a frame element. + + :param fe: The frame element to be printed. + :type fe: AttrDict + :return: A nicely formated string representation of the frame element. + :rtype: str + """ + fekeys = fe.keys() + outstr = "" + outstr += "frame element ({0.ID}): {0.name}\n of {1.name}({1.ID})\n".format( + fe, fe.frame + ) + if 'definition' in fekeys: + outstr += "[definition]\n" + outstr += _pretty_longstring(fe.definition, ' ') + if 'abbrev' in fekeys: + outstr += "[abbrev] {0}\n".format(fe.abbrev) + if 'coreType' in fekeys: + outstr += "[coreType] {0}\n".format(fe.coreType) + if 'requiresFE' in fekeys: + outstr += "[requiresFE] " + if fe.requiresFE is None: + outstr += "\n" + else: + outstr += "{0}({1})\n".format(fe.requiresFE.name, fe.requiresFE.ID) + if 'excludesFE' in fekeys: + outstr += "[excludesFE] " + if fe.excludesFE is None: + outstr += "\n" + else: + outstr += "{0}({1})\n".format(fe.excludesFE.name, fe.excludesFE.ID) + if 'semType' in fekeys: + outstr += "[semType] " + if fe.semType is None: + outstr += "\n" + else: + outstr += "\n " + "{0}({1})".format(fe.semType.name, fe.semType.ID) + '\n' + + return outstr + + +def _pretty_frame(frame): + + """ + Helper function for pretty-printing a frame. + + :param frame: The frame to be printed. + :type frame: AttrDict + :return: A nicely formated string representation of the frame. + :rtype: str + """ + + outstr = "" + outstr += "frame ({0.ID}): {0.name}\n\n".format(frame) + outstr += "[URL] {0}\n\n".format(frame.URL) + outstr += "[definition]\n" + outstr += _pretty_longstring(frame.definition, ' ') + '\n' + + outstr += "[semTypes] {0} semantic types\n".format(len(frame.semTypes)) + outstr += ( + " " * (len(frame.semTypes) > 0) + + ", ".join("{0}({1})".format(x.name, x.ID) for x in frame.semTypes) + + '\n' * (len(frame.semTypes) > 0) + ) + + outstr += "\n[frameRelations] {0} frame relations\n".format( + len(frame.frameRelations) + ) + outstr += ' ' + '\n '.join(repr(frel) for frel in frame.frameRelations) + '\n' + + outstr += "\n[lexUnit] {0} lexical units\n".format(len(frame.lexUnit)) + lustrs = [] + for luName, lu in sorted(frame.lexUnit.items()): + tmpstr = '{0} ({1})'.format(luName, lu.ID) + lustrs.append(tmpstr) + outstr += "{0}\n".format(_pretty_longstring(', '.join(lustrs), prefix=' ')) + + outstr += "\n[FE] {0} frame elements\n".format(len(frame.FE)) + fes = {} + for feName, fe in sorted(frame.FE.items()): + try: + fes[fe.coreType].append("{0} ({1})".format(feName, fe.ID)) + except KeyError: + fes[fe.coreType] = [] + fes[fe.coreType].append("{0} ({1})".format(feName, fe.ID)) + for ct in sorted( + fes.keys(), + key=lambda ct2: [ + 'Core', + 'Core-Unexpressed', + 'Peripheral', + 'Extra-Thematic', + ].index(ct2), + ): + outstr += "{0:>16}: {1}\n".format(ct, ', '.join(sorted(fes[ct]))) + + outstr += "\n[FEcoreSets] {0} frame element core sets\n".format( + len(frame.FEcoreSets) + ) + outstr += ( + " " + + '\n '.join( + ", ".join([x.name for x in coreSet]) for coreSet in frame.FEcoreSets + ) + + '\n' + ) + + return outstr + + +class FramenetError(Exception): + + """An exception class for framenet-related errors.""" + + +@python_2_unicode_compatible +class AttrDict(dict): + + """A class that wraps a dict and allows accessing the keys of the + dict as if they were attributes. Taken from here: + http://stackoverflow.com/a/14620633/8879 + + >>> foo = {'a':1, 'b':2, 'c':3} + >>> bar = AttrDict(foo) + >>> pprint(dict(bar)) + {'a': 1, 'b': 2, 'c': 3} + >>> bar.b + 2 + >>> bar.d = 4 + >>> pprint(dict(bar)) + {'a': 1, 'b': 2, 'c': 3, 'd': 4} + """ + + def __init__(self, *args, **kwargs): + super(AttrDict, self).__init__(*args, **kwargs) + # self.__dict__ = self + + def __setattr__(self, name, value): + self[name] = value + + def __getattr__(self, name): + if name == '_short_repr': + return self._short_repr + return self[name] + + def __getitem__(self, name): + v = super(AttrDict, self).__getitem__(name) + if isinstance(v, Future): + return v._data() + return v + + def _short_repr(self): + if '_type' in self: + if self['_type'].endswith('relation'): + return self.__repr__() + try: + return "<{0} ID={1} name={2}>".format( + self['_type'], self['ID'], self['name'] + ) + except KeyError: + try: # no ID--e.g., for _type=lusubcorpus + return "<{0} name={1}>".format(self['_type'], self['name']) + except KeyError: # no name--e.g., for _type=lusentence + return "<{0} ID={1}>".format(self['_type'], self['ID']) + else: + return self.__repr__() + + def _str(self): + outstr = "" + + if '_type' not in self: + outstr = _pretty_any(self) + elif self['_type'] == 'frame': + outstr = _pretty_frame(self) + elif self['_type'] == 'fe': + outstr = _pretty_fe(self) + elif self['_type'] == 'lu': + outstr = _pretty_lu(self) + elif self['_type'] == 'luexemplars': # list of ALL exemplars for LU + outstr = _pretty_exemplars(self, self[0].LU) + elif ( + self['_type'] == 'fulltext_annotation' + ): # list of all sentences for full-text doc + outstr = _pretty_fulltext_sentences(self) + elif self['_type'] == 'lusentence': + outstr = _pretty_annotation(self) + elif self['_type'] == 'fulltext_sentence': + outstr = _pretty_fulltext_sentence(self) + elif self['_type'] in ('luannotationset', 'fulltext_annotationset'): + outstr = _pretty_annotation(self, aset_level=True) + elif self['_type'] == 'posannotationset': + outstr = _pretty_pos(self) + elif self['_type'] == 'semtype': + outstr = _pretty_semtype(self) + elif self['_type'] == 'framerelationtype': + outstr = _pretty_frame_relation_type(self) + elif self['_type'] == 'framerelation': + outstr = _pretty_frame_relation(self) + elif self['_type'] == 'ferelation': + outstr = _pretty_fe_relation(self) + else: + outstr = _pretty_any(self) + + # ensure result is unicode string prior to applying the + # @python_2_unicode_compatible decorator (because non-ASCII characters + # could in principle occur in the data and would trigger an encoding error when + # passed as arguments to str.format()). + # assert isinstance(outstr, unicode) # not in Python 3.2 + return outstr + + def __str__(self): + return self._str() + + def __repr__(self): + return self.__str__() + + +@python_2_unicode_compatible +class SpecialList(list): + """ + A list subclass which adds a '_type' attribute for special printing + (similar to an AttrDict, though this is NOT an AttrDict subclass). + """ + + def __init__(self, typ, *args, **kwargs): + super(SpecialList, self).__init__(*args, **kwargs) + self._type = typ + + def _str(self): + outstr = "" + + assert self._type + if len(self) == 0: + outstr = "[]" + elif self._type == 'luexemplars': # list of ALL exemplars for LU + outstr = _pretty_exemplars(self, self[0].LU) + else: + assert False, self._type + return outstr + + def __str__(self): + return self._str() + + def __repr__(self): + return self.__str__() + + +class Future(object): + """ + Wraps and acts as a proxy for a value to be loaded lazily (on demand). + Adapted from https://gist.github.com/sergey-miryanov/2935416 + """ + + def __init__(self, loader, *args, **kwargs): + """ + :param loader: when called with no arguments, returns the value to be stored + :type loader: callable + """ + super(Future, self).__init__(*args, **kwargs) + self._loader = loader + self._d = None + + def _data(self): + if callable(self._loader): + self._d = self._loader() + self._loader = None # the data is now cached + return self._d + + def __nonzero__(self): + return bool(self._data()) + + def __len__(self): + return len(self._data()) + + def __setitem__(self, key, value): + return self._data().__setitem__(key, value) + + def __getitem__(self, key): + return self._data().__getitem__(key) + + def __getattr__(self, key): + return self._data().__getattr__(key) + + def __str__(self): + return self._data().__str__() + + def __repr__(self): + return self._data().__repr__() + + +@python_2_unicode_compatible +class PrettyDict(AttrDict): + """ + Displays an abbreviated repr of values where possible. + Inherits from AttrDict, so a callable value will + be lazily converted to an actual value. + """ + + def __init__(self, *args, **kwargs): + _BREAK_LINES = kwargs.pop('breakLines', False) + super(PrettyDict, self).__init__(*args, **kwargs) + dict.__setattr__(self, '_BREAK_LINES', _BREAK_LINES) + + def __repr__(self): + parts = [] + for k, v in sorted(self.items()): + kv = repr(k) + ': ' + try: + kv += v._short_repr() + except AttributeError: + kv += repr(v) + parts.append(kv) + return '{' + (',\n ' if self._BREAK_LINES else ', ').join(parts) + '}' + + +@python_2_unicode_compatible +class PrettyList(list): + """ + Displays an abbreviated repr of only the first several elements, not the whole list. + """ + + # from nltk.util + def __init__(self, *args, **kwargs): + self._MAX_REPR_SIZE = kwargs.pop('maxReprSize', 60) + self._BREAK_LINES = kwargs.pop('breakLines', False) + super(PrettyList, self).__init__(*args, **kwargs) + + def __repr__(self): + """ + Return a string representation for this corpus view that is + similar to a list's representation; but if it would be more + than 60 characters long, it is truncated. + """ + pieces = [] + length = 5 + + for elt in self: + pieces.append( + elt._short_repr() + ) # key difference from inherited version: call to _short_repr() + length += len(pieces[-1]) + 2 + if self._MAX_REPR_SIZE and length > self._MAX_REPR_SIZE and len(pieces) > 2: + return "[%s, ...]" % text_type( + ',\n ' if self._BREAK_LINES else ', ' + ).join(pieces[:-1]) + return "[%s]" % text_type(',\n ' if self._BREAK_LINES else ', ').join(pieces) + + +@python_2_unicode_compatible +class PrettyLazyMap(LazyMap): + """ + Displays an abbreviated repr of only the first several elements, not the whole list. + """ + + # from nltk.util + _MAX_REPR_SIZE = 60 + + def __repr__(self): + """ + Return a string representation for this corpus view that is + similar to a list's representation; but if it would be more + than 60 characters long, it is truncated. + """ + pieces = [] + length = 5 + for elt in self: + pieces.append( + elt._short_repr() + ) # key difference from inherited version: call to _short_repr() + length += len(pieces[-1]) + 2 + if length > self._MAX_REPR_SIZE and len(pieces) > 2: + return "[%s, ...]" % text_type(', ').join(pieces[:-1]) + return "[%s]" % text_type(', ').join(pieces) + + +@python_2_unicode_compatible +class PrettyLazyIteratorList(LazyIteratorList): + """ + Displays an abbreviated repr of only the first several elements, not the whole list. + """ + + # from nltk.util + _MAX_REPR_SIZE = 60 + + def __repr__(self): + """ + Return a string representation for this corpus view that is + similar to a list's representation; but if it would be more + than 60 characters long, it is truncated. + """ + pieces = [] + length = 5 + for elt in self: + pieces.append( + elt._short_repr() + ) # key difference from inherited version: call to _short_repr() + length += len(pieces[-1]) + 2 + if length > self._MAX_REPR_SIZE and len(pieces) > 2: + return "[%s, ...]" % text_type(', ').join(pieces[:-1]) + return "[%s]" % text_type(', ').join(pieces) + + +@python_2_unicode_compatible +class PrettyLazyConcatenation(LazyConcatenation): + """ + Displays an abbreviated repr of only the first several elements, not the whole list. + """ + + # from nltk.util + _MAX_REPR_SIZE = 60 + + def __repr__(self): + """ + Return a string representation for this corpus view that is + similar to a list's representation; but if it would be more + than 60 characters long, it is truncated. + """ + pieces = [] + length = 5 + for elt in self: + pieces.append( + elt._short_repr() + ) # key difference from inherited version: call to _short_repr() + length += len(pieces[-1]) + 2 + if length > self._MAX_REPR_SIZE and len(pieces) > 2: + return "[%s, ...]" % text_type(', ').join(pieces[:-1]) + return "[%s]" % text_type(', ').join(pieces) + + def __add__(self, other): + """Return a list concatenating self with other.""" + return PrettyLazyIteratorList(itertools.chain(self, other)) + + def __radd__(self, other): + """Return a list concatenating other with self.""" + return PrettyLazyIteratorList(itertools.chain(other, self)) + + +class FramenetCorpusReader(XMLCorpusReader): + """A corpus reader for the Framenet Corpus. + + >>> from nltk.corpus import framenet as fn + >>> fn.lu(3238).frame.lexUnit['glint.v'] is fn.lu(3238) + True + >>> fn.frame_by_name('Replacing') is fn.lus('replace.v')[0].frame + True + >>> fn.lus('prejudice.n')[0].frame.frameRelations == fn.frame_relations('Partiality') + True + """ + + _bad_statuses = ['Problem'] + """ + When loading LUs for a frame, those whose status is in this list will be ignored. + Due to caching, if user code modifies this, it should do so before loading any data. + 'Problem' should always be listed for FrameNet 1.5, as these LUs are not included + in the XML index. + """ + + _warnings = False + + def warnings(self, v): + """Enable or disable warnings of data integrity issues as they are encountered. + If v is truthy, warnings will be enabled. + + (This is a function rather than just an attribute/property to ensure that if + enabling warnings is the first action taken, the corpus reader is instantiated first.) + """ + self._warnings = v + + def __init__(self, root, fileids): + XMLCorpusReader.__init__(self, root, fileids) + + # framenet corpus sub dirs + # sub dir containing the xml files for frames + self._frame_dir = "frame" + # sub dir containing the xml files for lexical units + self._lu_dir = "lu" + # sub dir containing the xml files for fulltext annotation files + self._fulltext_dir = "fulltext" + + # location of latest development version of FrameNet + self._fnweb_url = "https://framenet2.icsi.berkeley.edu/fnReports/data" + + # Indexes used for faster look-ups + self._frame_idx = None + self._cached_frames = {} # name -> ID + self._lu_idx = None + self._fulltext_idx = None + self._semtypes = None + self._freltyp_idx = None # frame relation types (Inheritance, Using, etc.) + self._frel_idx = None # frame-to-frame relation instances + self._ferel_idx = None # FE-to-FE relation instances + self._frel_f_idx = None # frame-to-frame relations associated with each frame + + def help(self, attrname=None): + """Display help information summarizing the main methods.""" + + if attrname is not None: + return help(self.__getattribute__(attrname)) + + # No need to mention frame_by_name() or frame_by_id(), + # as it's easier to just call frame(). + # Also not mentioning lu_basic(). + + msg = """ +Citation: Nathan Schneider and Chuck Wooters (2017), +"The NLTK FrameNet API: Designing for Discoverability with a Rich Linguistic Resource". +Proceedings of EMNLP: System Demonstrations. https://arxiv.org/abs/1703.07438 + +Use the following methods to access data in FrameNet. +Provide a method name to `help()` for more information. + +FRAMES +====== + +frame() to look up a frame by its exact name or ID +frames() to get frames matching a name pattern +frames_by_lemma() to get frames containing an LU matching a name pattern +frame_ids_and_names() to get a mapping from frame IDs to names + +FRAME ELEMENTS +============== + +fes() to get frame elements (a.k.a. roles) matching a name pattern, optionally constrained + by a frame name pattern + +LEXICAL UNITS +============= + +lu() to look up an LU by its ID +lus() to get lexical units matching a name pattern, optionally constrained by frame +lu_ids_and_names() to get a mapping from LU IDs to names + +RELATIONS +========= + +frame_relation_types() to get the different kinds of frame-to-frame relations + (Inheritance, Subframe, Using, etc.). +frame_relations() to get the relation instances, optionally constrained by + frame(s) or relation type +fe_relations() to get the frame element pairs belonging to a frame-to-frame relation + +SEMANTIC TYPES +============== + +semtypes() to get the different kinds of semantic types that can be applied to + FEs, LUs, and entire frames +semtype() to look up a particular semtype by name, ID, or abbreviation +semtype_inherits() to check whether two semantic types have a subtype-supertype + relationship in the semtype hierarchy +propagate_semtypes() to apply inference rules that distribute semtypes over relations + between FEs + +ANNOTATIONS +=========== + +annotations() to get annotation sets, in which a token in a sentence is annotated + with a lexical unit in a frame, along with its frame elements and their syntactic properties; + can be constrained by LU name pattern and limited to lexicographic exemplars or full-text. + Sentences of full-text annotation can have multiple annotation sets. +sents() to get annotated sentences illustrating one or more lexical units +exemplars() to get sentences of lexicographic annotation, most of which have + just 1 annotation set; can be constrained by LU name pattern, frame, and overt FE(s) +doc() to look up a document of full-text annotation by its ID +docs() to get documents of full-text annotation that match a name pattern +docs_metadata() to get metadata about all full-text documents without loading them +ft_sents() to iterate over sentences of full-text annotation + +UTILITIES +========= + +buildindexes() loads metadata about all frames, LUs, etc. into memory to avoid + delay when one is accessed for the first time. It does not load annotations. +readme() gives the text of the FrameNet README file +warnings(True) to display corpus consistency warnings when loading data + """ + print(msg) + + def _buildframeindex(self): + # The total number of Frames in Framenet is fairly small (~1200) so + # this index should not be very large + if not self._frel_idx: + self._buildrelationindex() # always load frame relations before frames, + # otherwise weird ordering effects might result in incomplete information + self._frame_idx = {} + for f in XMLCorpusView( + self.abspath("frameIndex.xml"), 'frameIndex/frame', self._handle_elt + ): + self._frame_idx[f['ID']] = f + + def _buildcorpusindex(self): + # The total number of fulltext annotated documents in Framenet + # is fairly small (~90) so this index should not be very large + self._fulltext_idx = {} + for doclist in XMLCorpusView( + self.abspath("fulltextIndex.xml"), + 'fulltextIndex/corpus', + self._handle_fulltextindex_elt, + ): + for doc in doclist: + self._fulltext_idx[doc.ID] = doc + + def _buildluindex(self): + # The number of LUs in Framenet is about 13,000 so this index + # should not be very large + self._lu_idx = {} + for lu in XMLCorpusView( + self.abspath("luIndex.xml"), 'luIndex/lu', self._handle_elt + ): + self._lu_idx[ + lu['ID'] + ] = lu # populate with LU index entries. if any of these + # are looked up they will be replaced by full LU objects. + + def _buildrelationindex(self): + # print('building relation index...', file=sys.stderr) + freltypes = PrettyList( + x + for x in XMLCorpusView( + self.abspath("frRelation.xml"), + 'frameRelations/frameRelationType', + self._handle_framerelationtype_elt, + ) + ) + self._freltyp_idx = {} + self._frel_idx = {} + self._frel_f_idx = defaultdict(set) + self._ferel_idx = {} + + for freltyp in freltypes: + self._freltyp_idx[freltyp.ID] = freltyp + for frel in freltyp.frameRelations: + supF = frel.superFrame = frel[freltyp.superFrameName] = Future( + (lambda fID: lambda: self.frame_by_id(fID))(frel.supID) + ) + subF = frel.subFrame = frel[freltyp.subFrameName] = Future( + (lambda fID: lambda: self.frame_by_id(fID))(frel.subID) + ) + self._frel_idx[frel.ID] = frel + self._frel_f_idx[frel.supID].add(frel.ID) + self._frel_f_idx[frel.subID].add(frel.ID) + for ferel in frel.feRelations: + ferel.superFrame = supF + ferel.subFrame = subF + ferel.superFE = Future( + (lambda fer: lambda: fer.superFrame.FE[fer.superFEName])(ferel) + ) + ferel.subFE = Future( + (lambda fer: lambda: fer.subFrame.FE[fer.subFEName])(ferel) + ) + self._ferel_idx[ferel.ID] = ferel + # print('...done building relation index', file=sys.stderr) + + def _warn(self, *message, **kwargs): + if self._warnings: + kwargs.setdefault('file', sys.stderr) + print(*message, **kwargs) + + def readme(self): + """ + Return the contents of the corpus README.txt (or README) file. + """ + try: + return self.open("README.txt").read() + except IOError: + return self.open("README").read() + + def buildindexes(self): + """ + Build the internal indexes to make look-ups faster. + """ + # Frames + self._buildframeindex() + # LUs + self._buildluindex() + # Fulltext annotation corpora index + self._buildcorpusindex() + # frame and FE relations + self._buildrelationindex() + + def doc(self, fn_docid): + """ + Returns the annotated document whose id number is + ``fn_docid``. This id number can be obtained by calling the + Documents() function. + + The dict that is returned from this function will contain the + following keys: + + - '_type' : 'fulltextannotation' + - 'sentence' : a list of sentences in the document + - Each item in the list is a dict containing the following keys: + - 'ID' : the ID number of the sentence + - '_type' : 'sentence' + - 'text' : the text of the sentence + - 'paragNo' : the paragraph number + - 'sentNo' : the sentence number + - 'docID' : the document ID number + - 'corpID' : the corpus ID number + - 'aPos' : the annotation position + - 'annotationSet' : a list of annotation layers for the sentence + - Each item in the list is a dict containing the following keys: + - 'ID' : the ID number of the annotation set + - '_type' : 'annotationset' + - 'status' : either 'MANUAL' or 'UNANN' + - 'luName' : (only if status is 'MANUAL') + - 'luID' : (only if status is 'MANUAL') + - 'frameID' : (only if status is 'MANUAL') + - 'frameName': (only if status is 'MANUAL') + - 'layer' : a list of labels for the layer + - Each item in the layer is a dict containing the + following keys: + - '_type': 'layer' + - 'rank' + - 'name' + - 'label' : a list of labels in the layer + - Each item is a dict containing the following keys: + - 'start' + - 'end' + - 'name' + - 'feID' (optional) + + :param fn_docid: The Framenet id number of the document + :type fn_docid: int + :return: Information about the annotated document + :rtype: dict + """ + try: + xmlfname = self._fulltext_idx[fn_docid].filename + except TypeError: # happens when self._fulltext_idx == None + # build the index + self._buildcorpusindex() + xmlfname = self._fulltext_idx[fn_docid].filename + except KeyError: # probably means that fn_docid was not in the index + raise FramenetError("Unknown document id: {0}".format(fn_docid)) + + # construct the path name for the xml file containing the document info + locpath = os.path.join("{0}".format(self._root), self._fulltext_dir, xmlfname) + + # Grab the top-level xml element containing the fulltext annotation + elt = XMLCorpusView(locpath, 'fullTextAnnotation')[0] + info = self._handle_fulltextannotation_elt(elt) + # add metadata + for k, v in self._fulltext_idx[fn_docid].items(): + info[k] = v + return info + + def frame_by_id(self, fn_fid, ignorekeys=[]): + """ + Get the details for the specified Frame using the frame's id + number. + + Usage examples: + + >>> from nltk.corpus import framenet as fn + >>> f = fn.frame_by_id(256) + >>> f.ID + 256 + >>> f.name + 'Medical_specialties' + >>> f.definition + "This frame includes words that name ..." + + :param fn_fid: The Framenet id number of the frame + :type fn_fid: int + :param ignorekeys: The keys to ignore. These keys will not be + included in the output. (optional) + :type ignorekeys: list(str) + :return: Information about a frame + :rtype: dict + + Also see the ``frame()`` function for details about what is + contained in the dict that is returned. + """ + + # get the name of the frame with this id number + try: + fentry = self._frame_idx[fn_fid] + if '_type' in fentry: + return fentry # full frame object is cached + name = fentry['name'] + except TypeError: + self._buildframeindex() + name = self._frame_idx[fn_fid]['name'] + except KeyError: + raise FramenetError('Unknown frame id: {0}'.format(fn_fid)) + + return self.frame_by_name(name, ignorekeys, check_cache=False) + + def frame_by_name(self, fn_fname, ignorekeys=[], check_cache=True): + """ + Get the details for the specified Frame using the frame's name. + + Usage examples: + + >>> from nltk.corpus import framenet as fn + >>> f = fn.frame_by_name('Medical_specialties') + >>> f.ID + 256 + >>> f.name + 'Medical_specialties' + >>> f.definition + "This frame includes words that name ..." + + :param fn_fname: The name of the frame + :type fn_fname: str + :param ignorekeys: The keys to ignore. These keys will not be + included in the output. (optional) + :type ignorekeys: list(str) + :return: Information about a frame + :rtype: dict + + Also see the ``frame()`` function for details about what is + contained in the dict that is returned. + """ + + if check_cache and fn_fname in self._cached_frames: + return self._frame_idx[self._cached_frames[fn_fname]] + elif not self._frame_idx: + self._buildframeindex() + + # construct the path name for the xml file containing the Frame info + locpath = os.path.join( + "{0}".format(self._root), self._frame_dir, fn_fname + ".xml" + ) + # print(locpath, file=sys.stderr) + # Grab the xml for the frame + try: + elt = XMLCorpusView(locpath, 'frame')[0] + except IOError: + raise FramenetError('Unknown frame: {0}'.format(fn_fname)) + + fentry = self._handle_frame_elt(elt, ignorekeys) + assert fentry + + fentry.URL = self._fnweb_url + '/' + self._frame_dir + '/' + fn_fname + '.xml' + + # INFERENCE RULE: propagate lexical semtypes from the frame to all its LUs + for st in fentry.semTypes: + if st.rootType.name == 'Lexical_type': + for lu in fentry.lexUnit.values(): + if not any( + x is st for x in lu.semTypes + ): # identity containment check + lu.semTypes.append(st) + + self._frame_idx[fentry.ID] = fentry + self._cached_frames[fentry.name] = fentry.ID + ''' + # now set up callables to resolve the LU pointers lazily. + # (could also do this here--caching avoids infinite recursion.) + for luName,luinfo in fentry.lexUnit.items(): + fentry.lexUnit[luName] = (lambda luID: Future(lambda: self.lu(luID)))(luinfo.ID) + ''' + return fentry + + def frame(self, fn_fid_or_fname, ignorekeys=[]): + """ + Get the details for the specified Frame using the frame's name + or id number. + + Usage examples: + + >>> from nltk.corpus import framenet as fn + >>> f = fn.frame(256) + >>> f.name + 'Medical_specialties' + >>> f = fn.frame('Medical_specialties') + >>> f.ID + 256 + >>> # ensure non-ASCII character in definition doesn't trigger an encoding error: + >>> fn.frame('Imposing_obligation') + frame (1494): Imposing_obligation... + + The dict that is returned from this function will contain the + following information about the Frame: + + - 'name' : the name of the Frame (e.g. 'Birth', 'Apply_heat', etc.) + - 'definition' : textual definition of the Frame + - 'ID' : the internal ID number of the Frame + - 'semTypes' : a list of semantic types for this frame + - Each item in the list is a dict containing the following keys: + - 'name' : can be used with the semtype() function + - 'ID' : can be used with the semtype() function + + - 'lexUnit' : a dict containing all of the LUs for this frame. + The keys in this dict are the names of the LUs and + the value for each key is itself a dict containing + info about the LU (see the lu() function for more info.) + + - 'FE' : a dict containing the Frame Elements that are part of this frame + The keys in this dict are the names of the FEs (e.g. 'Body_system') + and the values are dicts containing the following keys + - 'definition' : The definition of the FE + - 'name' : The name of the FE e.g. 'Body_system' + - 'ID' : The id number + - '_type' : 'fe' + - 'abbrev' : Abbreviation e.g. 'bod' + - 'coreType' : one of "Core", "Peripheral", or "Extra-Thematic" + - 'semType' : if not None, a dict with the following two keys: + - 'name' : name of the semantic type. can be used with + the semtype() function + - 'ID' : id number of the semantic type. can be used with + the semtype() function + - 'requiresFE' : if not None, a dict with the following two keys: + - 'name' : the name of another FE in this frame + - 'ID' : the id of the other FE in this frame + - 'excludesFE' : if not None, a dict with the following two keys: + - 'name' : the name of another FE in this frame + - 'ID' : the id of the other FE in this frame + + - 'frameRelation' : a list of objects describing frame relations + - 'FEcoreSets' : a list of Frame Element core sets for this frame + - Each item in the list is a list of FE objects + + :param fn_fid_or_fname: The Framenet name or id number of the frame + :type fn_fid_or_fname: int or str + :param ignorekeys: The keys to ignore. These keys will not be + included in the output. (optional) + :type ignorekeys: list(str) + :return: Information about a frame + :rtype: dict + """ + + # get the frame info by name or id number + if isinstance(fn_fid_or_fname, string_types): + f = self.frame_by_name(fn_fid_or_fname, ignorekeys) + else: + f = self.frame_by_id(fn_fid_or_fname, ignorekeys) + + return f + + def frames_by_lemma(self, pat): + """ + Returns a list of all frames that contain LUs in which the + ``name`` attribute of the LU matchs the given regular expression + ``pat``. Note that LU names are composed of "lemma.POS", where + the "lemma" part can be made up of either a single lexeme + (e.g. 'run') or multiple lexemes (e.g. 'a little'). + + Note: if you are going to be doing a lot of this type of + searching, you'd want to build an index that maps from lemmas to + frames because each time frames_by_lemma() is called, it has to + search through ALL of the frame XML files in the db. + + >>> from nltk.corpus import framenet as fn + >>> from nltk.corpus.reader.framenet import PrettyList + >>> PrettyList(sorted(fn.frames_by_lemma(r'(?i)a little'), key=itemgetter('ID'))) # doctest: +ELLIPSIS + [, ] + + :return: A list of frame objects. + :rtype: list(AttrDict) + """ + return PrettyList( + f + for f in self.frames() + if any(re.search(pat, luName) for luName in f.lexUnit) + ) + + def lu_basic(self, fn_luid): + """ + Returns basic information about the LU whose id is + ``fn_luid``. This is basically just a wrapper around the + ``lu()`` function with "subCorpus" info excluded. + + >>> from nltk.corpus import framenet as fn + >>> lu = PrettyDict(fn.lu_basic(256), breakLines=True) + >>> # ellipses account for differences between FN 1.5 and 1.7 + >>> lu # doctest: +ELLIPSIS + {'ID': 256, + 'POS': 'V', + 'URL': u'https://framenet2.icsi.berkeley.edu/fnReports/data/lu/lu256.xml', + '_type': 'lu', + 'cBy': ..., + 'cDate': '02/08/2001 01:27:50 PST Thu', + 'definition': 'COD: be aware of beforehand; predict.', + 'definitionMarkup': 'COD: be aware of beforehand; predict.', + 'frame': , + 'lemmaID': 15082, + 'lexemes': [{'POS': 'V', 'breakBefore': 'false', 'headword': 'false', 'name': 'foresee', 'order': 1}], + 'name': 'foresee.v', + 'semTypes': [], + 'sentenceCount': {'annotated': ..., 'total': ...}, + 'status': 'FN1_Sent'} + + :param fn_luid: The id number of the desired LU + :type fn_luid: int + :return: Basic information about the lexical unit + :rtype: dict + """ + return self.lu(fn_luid, ignorekeys=['subCorpus', 'exemplars']) + + def lu(self, fn_luid, ignorekeys=[], luName=None, frameID=None, frameName=None): + """ + Access a lexical unit by its ID. luName, frameID, and frameName are used + only in the event that the LU does not have a file in the database + (which is the case for LUs with "Problem" status); in this case, + a placeholder LU is created which just contains its name, ID, and frame. + + + Usage examples: + + >>> from nltk.corpus import framenet as fn + >>> fn.lu(256).name + 'foresee.v' + >>> fn.lu(256).definition + 'COD: be aware of beforehand; predict.' + >>> fn.lu(256).frame.name + 'Expectation' + >>> pprint(list(map(PrettyDict, fn.lu(256).lexemes))) + [{'POS': 'V', 'breakBefore': 'false', 'headword': 'false', 'name': 'foresee', 'order': 1}] + + >>> fn.lu(227).exemplars[23] + exemplar sentence (352962): + [sentNo] 0 + [aPos] 59699508 + + [LU] (227) guess.v in Coming_to_believe + + [frame] (23) Coming_to_believe + + [annotationSet] 2 annotation sets + + [POS] 18 tags + + [POS_tagset] BNC + + [GF] 3 relations + + [PT] 3 phrases + + [Other] 1 entry + + [text] + [Target] + [FE] + + When he was inside the house , Culley noticed the characteristic + ------------------ + Content + + he would n't have guessed at . + -- ******* -- + Co C1 [Evidence:INI] + (Co=Cognizer, C1=Content) + + + + The dict that is returned from this function will contain most of the + following information about the LU. Note that some LUs do not contain + all of these pieces of information - particularly 'totalAnnotated' and + 'incorporatedFE' may be missing in some LUs: + + - 'name' : the name of the LU (e.g. 'merger.n') + - 'definition' : textual definition of the LU + - 'ID' : the internal ID number of the LU + - '_type' : 'lu' + - 'status' : e.g. 'Created' + - 'frame' : Frame that this LU belongs to + - 'POS' : the part of speech of this LU (e.g. 'N') + - 'totalAnnotated' : total number of examples annotated with this LU + - 'incorporatedFE' : FE that incorporates this LU (e.g. 'Ailment') + - 'sentenceCount' : a dict with the following two keys: + - 'annotated': number of sentences annotated with this LU + - 'total' : total number of sentences with this LU + + - 'lexemes' : a list of dicts describing the lemma of this LU. + Each dict in the list contains these keys: + - 'POS' : part of speech e.g. 'N' + - 'name' : either single-lexeme e.g. 'merger' or + multi-lexeme e.g. 'a little' + - 'order': the order of the lexeme in the lemma (starting from 1) + - 'headword': a boolean ('true' or 'false') + - 'breakBefore': Can this lexeme be separated from the previous lexeme? + Consider: "take over.v" as in: + Germany took over the Netherlands in 2 days. + Germany took the Netherlands over in 2 days. + In this case, 'breakBefore' would be "true" for the lexeme + "over". Contrast this with "take after.v" as in: + Mary takes after her grandmother. + *Mary takes her grandmother after. + In this case, 'breakBefore' would be "false" for the lexeme "after" + + - 'lemmaID' : Can be used to connect lemmas in different LUs + - 'semTypes' : a list of semantic type objects for this LU + - 'subCorpus' : a list of subcorpora + - Each item in the list is a dict containing the following keys: + - 'name' : + - 'sentence' : a list of sentences in the subcorpus + - each item in the list is a dict with the following keys: + - 'ID': + - 'sentNo': + - 'text': the text of the sentence + - 'aPos': + - 'annotationSet': a list of annotation sets + - each item in the list is a dict with the following keys: + - 'ID': + - 'status': + - 'layer': a list of layers + - each layer is a dict containing the following keys: + - 'name': layer name (e.g. 'BNC') + - 'rank': + - 'label': a list of labels for the layer + - each label is a dict containing the following keys: + - 'start': start pos of label in sentence 'text' (0-based) + - 'end': end pos of label in sentence 'text' (0-based) + - 'name': name of label (e.g. 'NN1') + + Under the hood, this implementation looks up the lexical unit information + in the *frame* definition file. That file does not contain + corpus annotations, so the LU files will be accessed on demand if those are + needed. In principle, valence patterns could be loaded here too, + though these are not currently supported. + + :param fn_luid: The id number of the lexical unit + :type fn_luid: int + :param ignorekeys: The keys to ignore. These keys will not be + included in the output. (optional) + :type ignorekeys: list(str) + :return: All information about the lexical unit + :rtype: dict + """ + # look for this LU in cache + if not self._lu_idx: + self._buildluindex() + OOV = object() + luinfo = self._lu_idx.get(fn_luid, OOV) + if luinfo is OOV: + # LU not in the index. We create a placeholder by falling back to + # luName, frameID, and frameName. However, this will not be listed + # among the LUs for its frame. + self._warn( + 'LU ID not found: {0} ({1}) in {2} ({3})'.format( + luName, fn_luid, frameName, frameID + ) + ) + luinfo = AttrDict( + { + '_type': 'lu', + 'ID': fn_luid, + 'name': luName, + 'frameID': frameID, + 'status': 'Problem', + } + ) + f = self.frame_by_id(luinfo.frameID) + assert f.name == frameName, (f.name, frameName) + luinfo['frame'] = f + self._lu_idx[fn_luid] = luinfo + elif '_type' not in luinfo: + # we only have an index entry for the LU. loading the frame will replace this. + f = self.frame_by_id(luinfo.frameID) + luinfo = self._lu_idx[fn_luid] + if ignorekeys: + return AttrDict( + dict((k, v) for k, v in luinfo.items() if k not in ignorekeys) + ) + + return luinfo + + def _lu_file(self, lu, ignorekeys=[]): + """ + Augment the LU information that was loaded from the frame file + with additional information from the LU file. + """ + fn_luid = lu.ID + + fname = "lu{0}.xml".format(fn_luid) + locpath = os.path.join("{0}".format(self._root), self._lu_dir, fname) + # print(locpath, file=sys.stderr) + if not self._lu_idx: + self._buildluindex() + + try: + elt = XMLCorpusView(locpath, 'lexUnit')[0] + except IOError: + raise FramenetError('Unknown LU id: {0}'.format(fn_luid)) + + lu2 = self._handle_lexunit_elt(elt, ignorekeys) + lu.URL = self._fnweb_url + '/' + self._lu_dir + '/' + fname + lu.subCorpus = lu2.subCorpus + lu.exemplars = SpecialList( + 'luexemplars', [sent for subc in lu.subCorpus for sent in subc.sentence] + ) + for sent in lu.exemplars: + sent['LU'] = lu + sent['frame'] = lu.frame + for aset in sent.annotationSet: + aset['LU'] = lu + aset['frame'] = lu.frame + + return lu + + def _loadsemtypes(self): + """Create the semantic types index.""" + self._semtypes = AttrDict() + semtypeXML = [ + x + for x in XMLCorpusView( + self.abspath("semTypes.xml"), + 'semTypes/semType', + self._handle_semtype_elt, + ) + ] + for st in semtypeXML: + n = st['name'] + a = st['abbrev'] + i = st['ID'] + # Both name and abbrev should be able to retrieve the + # ID. The ID will retrieve the semantic type dict itself. + self._semtypes[n] = i + self._semtypes[a] = i + self._semtypes[i] = st + # now that all individual semtype XML is loaded, we can link them together + roots = [] + for st in self.semtypes(): + if st.superType: + st.superType = self.semtype(st.superType.supID) + st.superType.subTypes.append(st) + else: + if st not in roots: + roots.append(st) + st.rootType = st + queue = list(roots) + assert queue + while queue: + st = queue.pop(0) + for child in st.subTypes: + child.rootType = st.rootType + queue.append(child) + # self.propagate_semtypes() # apply inferencing over FE relations + + def propagate_semtypes(self): + """ + Apply inference rules to distribute semtypes over relations between FEs. + For FrameNet 1.5, this results in 1011 semtypes being propagated. + (Not done by default because it requires loading all frame files, + which takes several seconds. If this needed to be fast, it could be rewritten + to traverse the neighboring relations on demand for each FE semtype.) + + >>> from nltk.corpus import framenet as fn + >>> x = sum(1 for f in fn.frames() for fe in f.FE.values() if fe.semType) + >>> fn.propagate_semtypes() + >>> y = sum(1 for f in fn.frames() for fe in f.FE.values() if fe.semType) + >>> y-x > 1000 + True + """ + if not self._semtypes: + self._loadsemtypes() + if not self._ferel_idx: + self._buildrelationindex() + changed = True + i = 0 + nPropagations = 0 + while changed: + # make a pass and see if anything needs to be propagated + i += 1 + changed = False + for ferel in self.fe_relations(): + superST = ferel.superFE.semType + subST = ferel.subFE.semType + try: + if superST and superST is not subST: + # propagate downward + assert subST is None or self.semtype_inherits(subST, superST), ( + superST.name, + ferel, + subST.name, + ) + if subST is None: + ferel.subFE.semType = subST = superST + changed = True + nPropagations += 1 + if ( + ferel.type.name in ['Perspective_on', 'Subframe', 'Precedes'] + and subST + and subST is not superST + ): + # propagate upward + assert superST is None, (superST.name, ferel, subST.name) + ferel.superFE.semType = superST = subST + changed = True + nPropagations += 1 + except AssertionError as ex: + # bug in the data! ignore + # print(ex, file=sys.stderr) + continue + # print(i, nPropagations, file=sys.stderr) + + def semtype(self, key): + """ + >>> from nltk.corpus import framenet as fn + >>> fn.semtype(233).name + 'Temperature' + >>> fn.semtype(233).abbrev + 'Temp' + >>> fn.semtype('Temperature').ID + 233 + + :param key: The name, abbreviation, or id number of the semantic type + :type key: string or int + :return: Information about a semantic type + :rtype: dict + """ + if isinstance(key, int): + stid = key + else: + try: + stid = self._semtypes[key] + except TypeError: + self._loadsemtypes() + stid = self._semtypes[key] + + try: + st = self._semtypes[stid] + except TypeError: + self._loadsemtypes() + st = self._semtypes[stid] + + return st + + def semtype_inherits(self, st, superST): + if not isinstance(st, dict): + st = self.semtype(st) + if not isinstance(superST, dict): + superST = self.semtype(superST) + par = st.superType + while par: + if par is superST: + return True + par = par.superType + return False + + def frames(self, name=None): + """ + Obtain details for a specific frame. + + >>> from nltk.corpus import framenet as fn + >>> len(fn.frames()) in (1019, 1221) # FN 1.5 and 1.7, resp. + True + >>> x = PrettyList(fn.frames(r'(?i)crim'), maxReprSize=0, breakLines=True) + >>> x.sort(key=itemgetter('ID')) + >>> x + [, + , + , + ] + + A brief intro to Frames (excerpted from "FrameNet II: Extended + Theory and Practice" by Ruppenhofer et. al., 2010): + + A Frame is a script-like conceptual structure that describes a + particular type of situation, object, or event along with the + participants and props that are needed for that Frame. For + example, the "Apply_heat" frame describes a common situation + involving a Cook, some Food, and a Heating_Instrument, and is + evoked by words such as bake, blanch, boil, broil, brown, + simmer, steam, etc. + + We call the roles of a Frame "frame elements" (FEs) and the + frame-evoking words are called "lexical units" (LUs). + + FrameNet includes relations between Frames. Several types of + relations are defined, of which the most important are: + + - Inheritance: An IS-A relation. The child frame is a subtype + of the parent frame, and each FE in the parent is bound to + a corresponding FE in the child. An example is the + "Revenge" frame which inherits from the + "Rewards_and_punishments" frame. + + - Using: The child frame presupposes the parent frame as + background, e.g the "Speed" frame "uses" (or presupposes) + the "Motion" frame; however, not all parent FEs need to be + bound to child FEs. + + - Subframe: The child frame is a subevent of a complex event + represented by the parent, e.g. the "Criminal_process" frame + has subframes of "Arrest", "Arraignment", "Trial", and + "Sentencing". + + - Perspective_on: The child frame provides a particular + perspective on an un-perspectivized parent frame. A pair of + examples consists of the "Hiring" and "Get_a_job" frames, + which perspectivize the "Employment_start" frame from the + Employer's and the Employee's point of view, respectively. + + :param name: A regular expression pattern used to match against + Frame names. If 'name' is None, then a list of all + Framenet Frames will be returned. + :type name: str + :return: A list of matching Frames (or all Frames). + :rtype: list(AttrDict) + """ + try: + fIDs = list(self._frame_idx.keys()) + except AttributeError: + self._buildframeindex() + fIDs = list(self._frame_idx.keys()) + + if name is not None: + return PrettyList( + self.frame(fID) for fID, finfo in self.frame_ids_and_names(name).items() + ) + else: + return PrettyLazyMap(self.frame, fIDs) + + def frame_ids_and_names(self, name=None): + """ + Uses the frame index, which is much faster than looking up each frame definition + if only the names and IDs are needed. + """ + if not self._frame_idx: + self._buildframeindex() + return dict( + (fID, finfo.name) + for fID, finfo in self._frame_idx.items() + if name is None or re.search(name, finfo.name) is not None + ) + + def fes(self, name=None, frame=None): + ''' + Lists frame element objects. If 'name' is provided, this is treated as + a case-insensitive regular expression to filter by frame name. + (Case-insensitivity is because casing of frame element names is not always + consistent across frames.) Specify 'frame' to filter by a frame name pattern, + ID, or object. + + >>> from nltk.corpus import framenet as fn + >>> fn.fes('Noise_maker') + [] + >>> sorted([(fe.frame.name,fe.name) for fe in fn.fes('sound')]) + [('Cause_to_make_noise', 'Sound_maker'), ('Make_noise', 'Sound'), + ('Make_noise', 'Sound_source'), ('Sound_movement', 'Location_of_sound_source'), + ('Sound_movement', 'Sound'), ('Sound_movement', 'Sound_source'), + ('Sounds', 'Component_sound'), ('Sounds', 'Location_of_sound_source'), + ('Sounds', 'Sound_source'), ('Vocalizations', 'Location_of_sound_source'), + ('Vocalizations', 'Sound_source')] + >>> sorted([(fe.frame.name,fe.name) for fe in fn.fes('sound',r'(?i)make_noise')]) + [('Cause_to_make_noise', 'Sound_maker'), + ('Make_noise', 'Sound'), + ('Make_noise', 'Sound_source')] + >>> sorted(set(fe.name for fe in fn.fes('^sound'))) + ['Sound', 'Sound_maker', 'Sound_source'] + >>> len(fn.fes('^sound$')) + 2 + + :param name: A regular expression pattern used to match against + frame element names. If 'name' is None, then a list of all + frame elements will be returned. + :type name: str + :return: A list of matching frame elements + :rtype: list(AttrDict) + ''' + # what frames are we searching in? + if frame is not None: + if isinstance(frame, int): + frames = [self.frame(frame)] + elif isinstance(frame, string_types): + frames = self.frames(frame) + else: + frames = [frame] + else: + frames = self.frames() + + return PrettyList( + fe + for f in frames + for fename, fe in f.FE.items() + if name is None or re.search(name, fename, re.I) + ) + + def lus(self, name=None, frame=None): + """ + Obtain details for lexical units. + Optionally restrict by lexical unit name pattern, and/or to a certain frame + or frames whose name matches a pattern. + + >>> from nltk.corpus import framenet as fn + >>> len(fn.lus()) in (11829, 13572) # FN 1.5 and 1.7, resp. + True + >>> PrettyList(sorted(fn.lus(r'(?i)a little'), key=itemgetter('ID')), maxReprSize=0, breakLines=True) + [, + , + ] + >>> PrettyList(sorted(fn.lus(r'interest', r'(?i)stimulus'), key=itemgetter('ID'))) + [, ] + + A brief intro to Lexical Units (excerpted from "FrameNet II: + Extended Theory and Practice" by Ruppenhofer et. al., 2010): + + A lexical unit (LU) is a pairing of a word with a meaning. For + example, the "Apply_heat" Frame describes a common situation + involving a Cook, some Food, and a Heating Instrument, and is + _evoked_ by words such as bake, blanch, boil, broil, brown, + simmer, steam, etc. These frame-evoking words are the LUs in the + Apply_heat frame. Each sense of a polysemous word is a different + LU. + + We have used the word "word" in talking about LUs. The reality + is actually rather complex. When we say that the word "bake" is + polysemous, we mean that the lemma "bake.v" (which has the + word-forms "bake", "bakes", "baked", and "baking") is linked to + three different frames: + + - Apply_heat: "Michelle baked the potatoes for 45 minutes." + + - Cooking_creation: "Michelle baked her mother a cake for her birthday." + + - Absorb_heat: "The potatoes have to bake for more than 30 minutes." + + These constitute three different LUs, with different + definitions. + + Multiword expressions such as "given name" and hyphenated words + like "shut-eye" can also be LUs. Idiomatic phrases such as + "middle of nowhere" and "give the slip (to)" are also defined as + LUs in the appropriate frames ("Isolated_places" and "Evading", + respectively), and their internal structure is not analyzed. + + Framenet provides multiple annotated examples of each sense of a + word (i.e. each LU). Moreover, the set of examples + (approximately 20 per LU) illustrates all of the combinatorial + possibilities of the lexical unit. + + Each LU is linked to a Frame, and hence to the other words which + evoke that Frame. This makes the FrameNet database similar to a + thesaurus, grouping together semantically similar words. + + In the simplest case, frame-evoking words are verbs such as + "fried" in: + + "Matilde fried the catfish in a heavy iron skillet." + + Sometimes event nouns may evoke a Frame. For example, + "reduction" evokes "Cause_change_of_scalar_position" in: + + "...the reduction of debt levels to $665 million from $2.6 billion." + + Adjectives may also evoke a Frame. For example, "asleep" may + evoke the "Sleep" frame as in: + + "They were asleep for hours." + + Many common nouns, such as artifacts like "hat" or "tower", + typically serve as dependents rather than clearly evoking their + own frames. + + :param name: A regular expression pattern used to search the LU + names. Note that LU names take the form of a dotted + string (e.g. "run.v" or "a little.adv") in which a + lemma preceeds the "." and a POS follows the + dot. The lemma may be composed of a single lexeme + (e.g. "run") or of multiple lexemes (e.g. "a + little"). If 'name' is not given, then all LUs will + be returned. + + The valid POSes are: + + v - verb + n - noun + a - adjective + adv - adverb + prep - preposition + num - numbers + intj - interjection + art - article + c - conjunction + scon - subordinating conjunction + + :type name: str + :type frame: str or int or frame + :return: A list of selected (or all) lexical units + :rtype: list of LU objects (dicts). See the lu() function for info + about the specifics of LU objects. + + """ + if not self._lu_idx: + self._buildluindex() + + if name is not None: # match LUs, then restrict by frame + result = PrettyList( + self.lu(luID) for luID, luName in self.lu_ids_and_names(name).items() + ) + if frame is not None: + if isinstance(frame, int): + frameIDs = {frame} + elif isinstance(frame, string_types): + frameIDs = {f.ID for f in self.frames(frame)} + else: + frameIDs = {frame.ID} + result = PrettyList(lu for lu in result if lu.frame.ID in frameIDs) + elif frame is not None: # all LUs in matching frames + if isinstance(frame, int): + frames = [self.frame(frame)] + elif isinstance(frame, string_types): + frames = self.frames(frame) + else: + frames = [frame] + result = PrettyLazyIteratorList( + iter(LazyConcatenation(list(f.lexUnit.values()) for f in frames)) + ) + else: # all LUs + luIDs = [ + luID + for luID, lu in self._lu_idx.items() + if lu.status not in self._bad_statuses + ] + result = PrettyLazyMap(self.lu, luIDs) + return result + + def lu_ids_and_names(self, name=None): + """ + Uses the LU index, which is much faster than looking up each LU definition + if only the names and IDs are needed. + """ + if not self._lu_idx: + self._buildluindex() + return { + luID: luinfo.name + for luID, luinfo in self._lu_idx.items() + if luinfo.status not in self._bad_statuses + and (name is None or re.search(name, luinfo.name) is not None) + } + + def docs_metadata(self, name=None): + """ + Return an index of the annotated documents in Framenet. + + Details for a specific annotated document can be obtained using this + class's doc() function and pass it the value of the 'ID' field. + + >>> from nltk.corpus import framenet as fn + >>> len(fn.docs()) in (78, 107) # FN 1.5 and 1.7, resp. + True + >>> set([x.corpname for x in fn.docs_metadata()])>=set(['ANC', 'KBEval', \ + 'LUCorpus-v0.3', 'Miscellaneous', 'NTI', 'PropBank']) + True + + :param name: A regular expression pattern used to search the + file name of each annotated document. The document's + file name contains the name of the corpus that the + document is from, followed by two underscores "__" + followed by the document name. So, for example, the + file name "LUCorpus-v0.3__20000410_nyt-NEW.xml" is + from the corpus named "LUCorpus-v0.3" and the + document name is "20000410_nyt-NEW.xml". + :type name: str + :return: A list of selected (or all) annotated documents + :rtype: list of dicts, where each dict object contains the following + keys: + + - 'name' + - 'ID' + - 'corpid' + - 'corpname' + - 'description' + - 'filename' + """ + try: + ftlist = PrettyList(self._fulltext_idx.values()) + except AttributeError: + self._buildcorpusindex() + ftlist = PrettyList(self._fulltext_idx.values()) + + if name is None: + return ftlist + else: + return PrettyList( + x for x in ftlist if re.search(name, x['filename']) is not None + ) + + def docs(self, name=None): + """ + Return a list of the annotated full-text documents in FrameNet, + optionally filtered by a regex to be matched against the document name. + """ + return PrettyLazyMap((lambda x: self.doc(x.ID)), self.docs_metadata(name)) + + def sents(self, exemplars=True, full_text=True): + """ + Annotated sentences matching the specified criteria. + """ + if exemplars: + if full_text: + return self.exemplars() + self.ft_sents() + else: + return self.exemplars() + elif full_text: + return self.ft_sents() + + def annotations(self, luNamePattern=None, exemplars=True, full_text=True): + """ + Frame annotation sets matching the specified criteria. + """ + + if exemplars: + epart = PrettyLazyIteratorList( + sent.frameAnnotation for sent in self.exemplars(luNamePattern) + ) + else: + epart = [] + + if full_text: + if luNamePattern is not None: + matchedLUIDs = set(self.lu_ids_and_names(luNamePattern).keys()) + ftpart = PrettyLazyIteratorList( + aset + for sent in self.ft_sents() + for aset in sent.annotationSet[1:] + if luNamePattern is None or aset.get('luID', 'CXN_ASET') in matchedLUIDs + ) + else: + ftpart = [] + + if exemplars: + if full_text: + return epart + ftpart + else: + return epart + elif full_text: + return ftpart + + def exemplars(self, luNamePattern=None, frame=None, fe=None, fe2=None): + """ + Lexicographic exemplar sentences, optionally filtered by LU name and/or 1-2 FEs that + are realized overtly. 'frame' may be a name pattern, frame ID, or frame instance. + 'fe' may be a name pattern or FE instance; if specified, 'fe2' may also + be specified to retrieve sentences with both overt FEs (in either order). + """ + if fe is None and fe2 is not None: + raise FramenetError('exemplars(..., fe=None, fe2=) is not allowed') + elif fe is not None and fe2 is not None: + if not isinstance(fe2, string_types): + if isinstance(fe, string_types): + # fe2 is specific to a particular frame. swap fe and fe2 so fe is always used to determine the frame. + fe, fe2 = fe2, fe + elif fe.frame is not fe2.frame: # ensure frames match + raise FramenetError( + 'exemplars() call with inconsistent `fe` and `fe2` specification (frames must match)' + ) + if frame is None and fe is not None and not isinstance(fe, string_types): + frame = fe.frame + + # narrow down to frames matching criteria + + lusByFrame = defaultdict( + list + ) # frame name -> matching LUs, if luNamePattern is specified + if frame is not None or luNamePattern is not None: + if frame is None or isinstance(frame, string_types): + if luNamePattern is not None: + frames = set() + for lu in self.lus(luNamePattern, frame=frame): + frames.add(lu.frame.ID) + lusByFrame[lu.frame.name].append(lu) + frames = LazyMap(self.frame, list(frames)) + else: + frames = self.frames(frame) + else: + if isinstance(frame, int): + frames = [self.frame(frame)] + else: # frame object + frames = [frame] + + if luNamePattern is not None: + lusByFrame = {frame.name: self.lus(luNamePattern, frame=frame)} + + if fe is not None: # narrow to frames that define this FE + if isinstance(fe, string_types): + frames = PrettyLazyIteratorList( + f + for f in frames + if fe in f.FE + or any(re.search(fe, ffe, re.I) for ffe in f.FE.keys()) + ) + else: + if fe.frame not in frames: + raise FramenetError( + 'exemplars() call with inconsistent `frame` and `fe` specification' + ) + frames = [fe.frame] + + if fe2 is not None: # narrow to frames that ALSO define this FE + if isinstance(fe2, string_types): + frames = PrettyLazyIteratorList( + f + for f in frames + if fe2 in f.FE + or any(re.search(fe2, ffe, re.I) for ffe in f.FE.keys()) + ) + # else we already narrowed it to a single frame + else: # frame, luNamePattern are None. fe, fe2 are None or strings + if fe is not None: + frames = {ffe.frame.ID for ffe in self.fes(fe)} + if fe2 is not None: + frames2 = {ffe.frame.ID for ffe in self.fes(fe2)} + frames = frames & frames2 + frames = LazyMap(self.frame, list(frames)) + else: + frames = self.frames() + + # we've narrowed down 'frames' + # now get exemplars for relevant LUs in those frames + + def _matching_exs(): + for f in frames: + fes = fes2 = None # FEs of interest + if fe is not None: + fes = ( + {ffe for ffe in f.FE.keys() if re.search(fe, ffe, re.I)} + if isinstance(fe, string_types) + else {fe.name} + ) + if fe2 is not None: + fes2 = ( + {ffe for ffe in f.FE.keys() if re.search(fe2, ffe, re.I)} + if isinstance(fe2, string_types) + else {fe2.name} + ) + + for lu in ( + lusByFrame[f.name] + if luNamePattern is not None + else f.lexUnit.values() + ): + for ex in lu.exemplars: + if (fes is None or self._exemplar_of_fes(ex, fes)) and ( + fes2 is None or self._exemplar_of_fes(ex, fes2) + ): + yield ex + + return PrettyLazyIteratorList(_matching_exs()) + + def _exemplar_of_fes(self, ex, fes=None): + """ + Given an exemplar sentence and a set of FE names, return the subset of FE names + that are realized overtly in the sentence on the FE, FE2, or FE3 layer. + + If 'fes' is None, returns all overt FE names. + """ + overtNames = set(list(zip(*ex.FE[0]))[2]) if ex.FE[0] else set() + if 'FE2' in ex: + overtNames |= set(list(zip(*ex.FE2[0]))[2]) if ex.FE2[0] else set() + if 'FE3' in ex: + overtNames |= set(list(zip(*ex.FE3[0]))[2]) if ex.FE3[0] else set() + return overtNames & fes if fes is not None else overtNames + + def ft_sents(self, docNamePattern=None): + """ + Full-text annotation sentences, optionally filtered by document name. + """ + return PrettyLazyIteratorList( + sent for d in self.docs(docNamePattern) for sent in d.sentence + ) + + def frame_relation_types(self): + """ + Obtain a list of frame relation types. + + >>> from nltk.corpus import framenet as fn + >>> frts = sorted(fn.frame_relation_types(), key=itemgetter('ID')) + >>> isinstance(frts, list) + True + >>> len(frts) in (9, 10) # FN 1.5 and 1.7, resp. + True + >>> PrettyDict(frts[0], breakLines=True) + {'ID': 1, + '_type': 'framerelationtype', + 'frameRelations': [ Child=Change_of_consistency>, Child=Rotting>, ...], + 'name': 'Inheritance', + 'subFrameName': 'Child', + 'superFrameName': 'Parent'} + + :return: A list of all of the frame relation types in framenet + :rtype: list(dict) + """ + if not self._freltyp_idx: + self._buildrelationindex() + return self._freltyp_idx.values() + + def frame_relations(self, frame=None, frame2=None, type=None): + """ + :param frame: (optional) frame object, name, or ID; only relations involving + this frame will be returned + :param frame2: (optional; 'frame' must be a different frame) only show relations + between the two specified frames, in either direction + :param type: (optional) frame relation type (name or object); show only relations + of this type + :type frame: int or str or AttrDict + :return: A list of all of the frame relations in framenet + :rtype: list(dict) + + >>> from nltk.corpus import framenet as fn + >>> frels = fn.frame_relations() + >>> isinstance(frels, list) + True + >>> len(frels) in (1676, 2070) # FN 1.5 and 1.7, resp. + True + >>> PrettyList(fn.frame_relations('Cooking_creation'), maxReprSize=0, breakLines=True) + [ Child=Cooking_creation>, + Child=Cooking_creation>, + ReferringEntry=Cooking_creation>] + >>> PrettyList(fn.frame_relations(274), breakLines=True) + [ Child=Dodging>, + Child=Evading>, ...] + >>> PrettyList(fn.frame_relations(fn.frame('Cooking_creation')), breakLines=True) + [ Child=Cooking_creation>, + Child=Cooking_creation>, ...] + >>> PrettyList(fn.frame_relations('Cooking_creation', type='Inheritance')) + [ Child=Cooking_creation>] + >>> PrettyList(fn.frame_relations('Cooking_creation', 'Apply_heat'), breakLines=True) + [ Child=Cooking_creation>, + ReferringEntry=Cooking_creation>] + """ + relation_type = type + + if not self._frel_idx: + self._buildrelationindex() + + rels = None + + if relation_type is not None: + if not isinstance(relation_type, dict): + type = [rt for rt in self.frame_relation_types() if rt.name == type][0] + assert isinstance(type, dict) + + # lookup by 'frame' + if frame is not None: + if isinstance(frame, dict) and 'frameRelations' in frame: + rels = PrettyList(frame.frameRelations) + else: + if not isinstance(frame, int): + if isinstance(frame, dict): + frame = frame.ID + else: + frame = self.frame_by_name(frame).ID + rels = [self._frel_idx[frelID] for frelID in self._frel_f_idx[frame]] + + # filter by 'type' + if type is not None: + rels = [rel for rel in rels if rel.type is type] + elif type is not None: + # lookup by 'type' + rels = type.frameRelations + else: + rels = self._frel_idx.values() + + # filter by 'frame2' + if frame2 is not None: + if frame is None: + raise FramenetError( + "frame_relations(frame=None, frame2=) is not allowed" + ) + if not isinstance(frame2, int): + if isinstance(frame2, dict): + frame2 = frame2.ID + else: + frame2 = self.frame_by_name(frame2).ID + if frame == frame2: + raise FramenetError( + "The two frame arguments to frame_relations() must be different frames" + ) + rels = [ + rel + for rel in rels + if rel.superFrame.ID == frame2 or rel.subFrame.ID == frame2 + ] + + return PrettyList( + sorted( + rels, + key=lambda frel: (frel.type.ID, frel.superFrameName, frel.subFrameName), + ) + ) + + def fe_relations(self): + """ + Obtain a list of frame element relations. + + >>> from nltk.corpus import framenet as fn + >>> ferels = fn.fe_relations() + >>> isinstance(ferels, list) + True + >>> len(ferels) in (10020, 12393) # FN 1.5 and 1.7, resp. + True + >>> PrettyDict(ferels[0], breakLines=True) + {'ID': 14642, + '_type': 'ferelation', + 'frameRelation': Child=Lively_place>, + 'subFE': , + 'subFEName': 'Degree', + 'subFrame': , + 'subID': 11370, + 'supID': 2271, + 'superFE': , + 'superFEName': 'Degree', + 'superFrame': , + 'type': } + + :return: A list of all of the frame element relations in framenet + :rtype: list(dict) + """ + if not self._ferel_idx: + self._buildrelationindex() + return PrettyList( + sorted( + self._ferel_idx.values(), + key=lambda ferel: ( + ferel.type.ID, + ferel.frameRelation.superFrameName, + ferel.superFEName, + ferel.frameRelation.subFrameName, + ferel.subFEName, + ), + ) + ) + + def semtypes(self): + """ + Obtain a list of semantic types. + + >>> from nltk.corpus import framenet as fn + >>> stypes = fn.semtypes() + >>> len(stypes) in (73, 109) # FN 1.5 and 1.7, resp. + True + >>> sorted(stypes[0].keys()) + ['ID', '_type', 'abbrev', 'definition', 'definitionMarkup', 'name', 'rootType', 'subTypes', 'superType'] + + :return: A list of all of the semantic types in framenet + :rtype: list(dict) + """ + if not self._semtypes: + self._loadsemtypes() + return PrettyList( + self._semtypes[i] for i in self._semtypes if isinstance(i, int) + ) + + def _load_xml_attributes(self, d, elt): + """ + Extracts a subset of the attributes from the given element and + returns them in a dictionary. + + :param d: A dictionary in which to store the attributes. + :type d: dict + :param elt: An ElementTree Element + :type elt: Element + :return: Returns the input dict ``d`` possibly including attributes from ``elt`` + :rtype: dict + """ + + d = type(d)(d) + + try: + attr_dict = elt.attrib + except AttributeError: + return d + + if attr_dict is None: + return d + + # Ignore these attributes when loading attributes from an xml node + ignore_attrs = [ #'cBy', 'cDate', 'mDate', # <-- annotation metadata that could be of interest + 'xsi', + 'schemaLocation', + 'xmlns', + 'bgColor', + 'fgColor', + ] + + for attr in attr_dict: + + if any(attr.endswith(x) for x in ignore_attrs): + continue + + val = attr_dict[attr] + if val.isdigit(): + d[attr] = int(val) + else: + d[attr] = val + + return d + + def _strip_tags(self, data): + """ + Gets rid of all tags and newline characters from the given input + + :return: A cleaned-up version of the input string + :rtype: str + """ + + try: + ''' + # Look for boundary issues in markup. (Sometimes FEs are pluralized in definitions.) + m = re.search(r'\w[<][^/]|[<][/][^>]+[>](s\w|[a-rt-z0-9])', data) + if m: + print('Markup boundary:', data[max(0,m.start(0)-10):m.end(0)+10].replace('\n',' '), file=sys.stderr) + ''' + + data = data.replace('', '') + data = data.replace('', '') + data = re.sub('', '', data) + data = data.replace('', '') + data = data.replace('', '') + data = data.replace('', '') + data = data.replace('', '') + data = data.replace('', '') + data = data.replace('', '') + data = data.replace('', '') + data = data.replace('', "'") + data = data.replace('', "'") + data = data.replace('', '') + data = data.replace('', '') + data = data.replace('', '') + data = data.replace('', '') + + # Get rid of and tags + data = data.replace('', '') + data = data.replace('', '') + + data = data.replace('\n', ' ') + except AttributeError: + pass + + return data + + def _handle_elt(self, elt, tagspec=None): + """Extracts and returns the attributes of the given element""" + return self._load_xml_attributes(AttrDict(), elt) + + def _handle_fulltextindex_elt(self, elt, tagspec=None): + """ + Extracts corpus/document info from the fulltextIndex.xml file. + + Note that this function "flattens" the information contained + in each of the "corpus" elements, so that each "document" + element will contain attributes for the corpus and + corpusid. Also, each of the "document" items will contain a + new attribute called "filename" that is the base file name of + the xml file for the document in the "fulltext" subdir of the + Framenet corpus. + """ + ftinfo = self._load_xml_attributes(AttrDict(), elt) + corpname = ftinfo.name + corpid = ftinfo.ID + retlist = [] + for sub in elt: + if sub.tag.endswith('document'): + doc = self._load_xml_attributes(AttrDict(), sub) + if 'name' in doc: + docname = doc.name + else: + docname = doc.description + doc.filename = "{0}__{1}.xml".format(corpname, docname) + doc.URL = ( + self._fnweb_url + '/' + self._fulltext_dir + '/' + doc.filename + ) + doc.corpname = corpname + doc.corpid = corpid + retlist.append(doc) + + return retlist + + def _handle_frame_elt(self, elt, ignorekeys=[]): + """Load the info for a Frame from a frame xml file""" + frinfo = self._load_xml_attributes(AttrDict(), elt) + + frinfo['_type'] = 'frame' + frinfo['definition'] = "" + frinfo['definitionMarkup'] = "" + frinfo['FE'] = PrettyDict() + frinfo['FEcoreSets'] = [] + frinfo['lexUnit'] = PrettyDict() + frinfo['semTypes'] = [] + for k in ignorekeys: + if k in frinfo: + del frinfo[k] + + for sub in elt: + if sub.tag.endswith('definition') and 'definition' not in ignorekeys: + frinfo['definitionMarkup'] = sub.text + frinfo['definition'] = self._strip_tags(sub.text) + elif sub.tag.endswith('FE') and 'FE' not in ignorekeys: + feinfo = self._handle_fe_elt(sub) + frinfo['FE'][feinfo.name] = feinfo + feinfo['frame'] = frinfo # backpointer + elif sub.tag.endswith('FEcoreSet') and 'FEcoreSet' not in ignorekeys: + coreset = self._handle_fecoreset_elt(sub) + # assumes all FEs have been loaded before coresets + frinfo['FEcoreSets'].append( + PrettyList(frinfo['FE'][fe.name] for fe in coreset) + ) + elif sub.tag.endswith('lexUnit') and 'lexUnit' not in ignorekeys: + luentry = self._handle_framelexunit_elt(sub) + if luentry['status'] in self._bad_statuses: + # problematic LU entry; ignore it + continue + luentry['frame'] = frinfo + luentry['URL'] = ( + self._fnweb_url + + '/' + + self._lu_dir + + '/' + + "lu{0}.xml".format(luentry['ID']) + ) + luentry['subCorpus'] = Future( + (lambda lu: lambda: self._lu_file(lu).subCorpus)(luentry) + ) + luentry['exemplars'] = Future( + (lambda lu: lambda: self._lu_file(lu).exemplars)(luentry) + ) + frinfo['lexUnit'][luentry.name] = luentry + if not self._lu_idx: + self._buildluindex() + self._lu_idx[luentry.ID] = luentry + elif sub.tag.endswith('semType') and 'semTypes' not in ignorekeys: + semtypeinfo = self._load_xml_attributes(AttrDict(), sub) + frinfo['semTypes'].append(self.semtype(semtypeinfo.ID)) + + frinfo['frameRelations'] = self.frame_relations(frame=frinfo) + + # resolve 'requires' and 'excludes' links between FEs of this frame + for fe in frinfo.FE.values(): + if fe.requiresFE: + name, ID = fe.requiresFE.name, fe.requiresFE.ID + fe.requiresFE = frinfo.FE[name] + assert fe.requiresFE.ID == ID + if fe.excludesFE: + name, ID = fe.excludesFE.name, fe.excludesFE.ID + fe.excludesFE = frinfo.FE[name] + assert fe.excludesFE.ID == ID + + return frinfo + + def _handle_fecoreset_elt(self, elt): + """Load fe coreset info from xml.""" + info = self._load_xml_attributes(AttrDict(), elt) + tmp = [] + for sub in elt: + tmp.append(self._load_xml_attributes(AttrDict(), sub)) + + return tmp + + def _handle_framerelationtype_elt(self, elt, *args): + """Load frame-relation element and its child fe-relation elements from frRelation.xml.""" + info = self._load_xml_attributes(AttrDict(), elt) + info['_type'] = 'framerelationtype' + info['frameRelations'] = PrettyList() + + for sub in elt: + if sub.tag.endswith('frameRelation'): + frel = self._handle_framerelation_elt(sub) + frel['type'] = info # backpointer + for ferel in frel.feRelations: + ferel['type'] = info + info['frameRelations'].append(frel) + + return info + + def _handle_framerelation_elt(self, elt): + """Load frame-relation element and its child fe-relation elements from frRelation.xml.""" + info = self._load_xml_attributes(AttrDict(), elt) + assert info['superFrameName'] != info['subFrameName'], (elt, info) + info['_type'] = 'framerelation' + info['feRelations'] = PrettyList() + + for sub in elt: + if sub.tag.endswith('FERelation'): + ferel = self._handle_elt(sub) + ferel['_type'] = 'ferelation' + ferel['frameRelation'] = info # backpointer + info['feRelations'].append(ferel) + + return info + + def _handle_fulltextannotation_elt(self, elt): + """Load full annotation info for a document from its xml + file. The main element (fullTextAnnotation) contains a 'header' + element (which we ignore here) and a bunch of 'sentence' + elements.""" + info = AttrDict() + info['_type'] = 'fulltext_annotation' + info['sentence'] = [] + + for sub in elt: + if sub.tag.endswith('header'): + continue # not used + elif sub.tag.endswith('sentence'): + s = self._handle_fulltext_sentence_elt(sub) + s.doc = info + info['sentence'].append(s) + + return info + + def _handle_fulltext_sentence_elt(self, elt): + """Load information from the given 'sentence' element. Each + 'sentence' element contains a "text" and "annotationSet" sub + elements.""" + info = self._load_xml_attributes(AttrDict(), elt) + info['_type'] = "fulltext_sentence" + info['annotationSet'] = [] + info['targets'] = [] + target_spans = set() + info['_ascii'] = types.MethodType( + _annotation_ascii, info + ) # attach a method for this instance + info['text'] = "" + + for sub in elt: + if sub.tag.endswith('text'): + info['text'] = self._strip_tags(sub.text) + elif sub.tag.endswith('annotationSet'): + a = self._handle_fulltextannotationset_elt( + sub, is_pos=(len(info['annotationSet']) == 0) + ) + if 'cxnID' in a: # ignoring construction annotations for now + continue + a.sent = info + a.text = info.text + info['annotationSet'].append(a) + if 'Target' in a: + for tspan in a.Target: + if tspan in target_spans: + self._warn( + 'Duplicate target span "{0}"'.format( + info.text[slice(*tspan)] + ), + tspan, + 'in sentence', + info['ID'], + info.text, + ) + # this can happen in cases like "chemical and biological weapons" + # being annotated as "chemical weapons" and "biological weapons" + else: + target_spans.add(tspan) + info['targets'].append((a.Target, a.luName, a.frameName)) + + assert info['annotationSet'][0].status == 'UNANN' + info['POS'] = info['annotationSet'][0].POS + info['POS_tagset'] = info['annotationSet'][0].POS_tagset + return info + + def _handle_fulltextannotationset_elt(self, elt, is_pos=False): + """Load information from the given 'annotationSet' element. Each + 'annotationSet' contains several "layer" elements.""" + + info = self._handle_luannotationset_elt(elt, is_pos=is_pos) + if not is_pos: + info['_type'] = 'fulltext_annotationset' + if 'cxnID' not in info: # ignoring construction annotations for now + info['LU'] = self.lu( + info.luID, + luName=info.luName, + frameID=info.frameID, + frameName=info.frameName, + ) + info['frame'] = info.LU.frame + return info + + def _handle_fulltextlayer_elt(self, elt): + """Load information from the given 'layer' element. Each + 'layer' contains several "label" elements.""" + info = self._load_xml_attributes(AttrDict(), elt) + info['_type'] = 'layer' + info['label'] = [] + + for sub in elt: + if sub.tag.endswith('label'): + l = self._load_xml_attributes(AttrDict(), sub) + info['label'].append(l) + + return info + + def _handle_framelexunit_elt(self, elt): + """Load the lexical unit info from an xml element in a frame's xml file.""" + luinfo = AttrDict() + luinfo['_type'] = 'lu' + luinfo = self._load_xml_attributes(luinfo, elt) + luinfo["definition"] = "" + luinfo["definitionMarkup"] = "" + luinfo["sentenceCount"] = PrettyDict() + luinfo['lexemes'] = PrettyList() # multiword LUs have multiple lexemes + luinfo['semTypes'] = PrettyList() # an LU can have multiple semtypes + + for sub in elt: + if sub.tag.endswith('definition'): + luinfo['definitionMarkup'] = sub.text + luinfo['definition'] = self._strip_tags(sub.text) + elif sub.tag.endswith('sentenceCount'): + luinfo['sentenceCount'] = self._load_xml_attributes(PrettyDict(), sub) + elif sub.tag.endswith('lexeme'): + lexemeinfo = self._load_xml_attributes(PrettyDict(), sub) + if not isinstance(lexemeinfo.name, string_types): + # some lexeme names are ints by default: e.g., + # thousand.num has lexeme with name="1000" + lexemeinfo.name = str(lexemeinfo.name) + luinfo['lexemes'].append(lexemeinfo) + elif sub.tag.endswith('semType'): + semtypeinfo = self._load_xml_attributes(PrettyDict(), sub) + luinfo['semTypes'].append(self.semtype(semtypeinfo.ID)) + + # sort lexemes by 'order' attribute + # otherwise, e.g., 'write down.v' may have lexemes in wrong order + luinfo['lexemes'].sort(key=lambda x: x.order) + + return luinfo + + def _handle_lexunit_elt(self, elt, ignorekeys): + """ + Load full info for a lexical unit from its xml file. + This should only be called when accessing corpus annotations + (which are not included in frame files). + """ + luinfo = self._load_xml_attributes(AttrDict(), elt) + luinfo['_type'] = 'lu' + luinfo['definition'] = "" + luinfo['definitionMarkup'] = "" + luinfo['subCorpus'] = PrettyList() + luinfo['lexemes'] = PrettyList() # multiword LUs have multiple lexemes + luinfo['semTypes'] = PrettyList() # an LU can have multiple semtypes + for k in ignorekeys: + if k in luinfo: + del luinfo[k] + + for sub in elt: + if sub.tag.endswith('header'): + continue # not used + elif sub.tag.endswith('valences'): + continue # not used + elif sub.tag.endswith('definition') and 'definition' not in ignorekeys: + luinfo['definitionMarkup'] = sub.text + luinfo['definition'] = self._strip_tags(sub.text) + elif sub.tag.endswith('subCorpus') and 'subCorpus' not in ignorekeys: + sc = self._handle_lusubcorpus_elt(sub) + if sc is not None: + luinfo['subCorpus'].append(sc) + elif sub.tag.endswith('lexeme') and 'lexeme' not in ignorekeys: + luinfo['lexemes'].append(self._load_xml_attributes(PrettyDict(), sub)) + elif sub.tag.endswith('semType') and 'semType' not in ignorekeys: + semtypeinfo = self._load_xml_attributes(AttrDict(), sub) + luinfo['semTypes'].append(self.semtype(semtypeinfo.ID)) + + return luinfo + + def _handle_lusubcorpus_elt(self, elt): + """Load a subcorpus of a lexical unit from the given xml.""" + sc = AttrDict() + try: + sc['name'] = elt.get('name') + except AttributeError: + return None + sc['_type'] = "lusubcorpus" + sc['sentence'] = [] + + for sub in elt: + if sub.tag.endswith('sentence'): + s = self._handle_lusentence_elt(sub) + if s is not None: + sc['sentence'].append(s) + + return sc + + def _handle_lusentence_elt(self, elt): + """Load a sentence from a subcorpus of an LU from xml.""" + info = self._load_xml_attributes(AttrDict(), elt) + info['_type'] = 'lusentence' + info['annotationSet'] = [] + info['_ascii'] = types.MethodType( + _annotation_ascii, info + ) # attach a method for this instance + for sub in elt: + if sub.tag.endswith('text'): + info['text'] = self._strip_tags(sub.text) + elif sub.tag.endswith('annotationSet'): + annset = self._handle_luannotationset_elt( + sub, is_pos=(len(info['annotationSet']) == 0) + ) + if annset is not None: + assert annset.status == 'UNANN' or 'FE' in annset, annset + if annset.status != 'UNANN': + info['frameAnnotation'] = annset + # copy layer info up to current level + for k in ( + 'Target', + 'FE', + 'FE2', + 'FE3', + 'GF', + 'PT', + 'POS', + 'POS_tagset', + 'Other', + 'Sent', + 'Verb', + 'Noun', + 'Adj', + 'Adv', + 'Prep', + 'Scon', + 'Art', + ): + if k in annset: + info[k] = annset[k] + info['annotationSet'].append(annset) + annset['sent'] = info + annset['text'] = info.text + return info + + def _handle_luannotationset_elt(self, elt, is_pos=False): + """Load an annotation set from a sentence in an subcorpus of an LU""" + info = self._load_xml_attributes(AttrDict(), elt) + info['_type'] = 'posannotationset' if is_pos else 'luannotationset' + info['layer'] = [] + info['_ascii'] = types.MethodType( + _annotation_ascii, info + ) # attach a method for this instance + + if 'cxnID' in info: # ignoring construction annotations for now. + return info + + for sub in elt: + if sub.tag.endswith('layer'): + l = self._handle_lulayer_elt(sub) + if l is not None: + overt = [] + ni = {} # null instantiations + + info['layer'].append(l) + for lbl in l.label: + if 'start' in lbl: + thespan = (lbl.start, lbl.end + 1, lbl.name) + if l.name not in ( + 'Sent', + 'Other', + ): # 'Sent' and 'Other' layers sometimes contain accidental duplicate spans + assert thespan not in overt, (info.ID, l.name, thespan) + overt.append(thespan) + else: # null instantiation + if lbl.name in ni: + self._warn( + 'FE with multiple NI entries:', + lbl.name, + ni[lbl.name], + lbl.itype, + ) + else: + ni[lbl.name] = lbl.itype + overt = sorted(overt) + + if l.name == 'Target': + if not overt: + self._warn( + 'Skipping empty Target layer in annotation set ID={0}'.format( + info.ID + ) + ) + continue + assert all(lblname == 'Target' for i, j, lblname in overt) + if 'Target' in info: + self._warn( + 'Annotation set {0} has multiple Target layers'.format( + info.ID + ) + ) + else: + info['Target'] = [(i, j) for (i, j, _) in overt] + elif l.name == 'FE': + if l.rank == 1: + assert 'FE' not in info + info['FE'] = (overt, ni) + # assert False,info + else: + # sometimes there are 3 FE layers! e.g. Change_position_on_a_scale.fall.v + assert 2 <= l.rank <= 3, l.rank + k = 'FE' + str(l.rank) + assert k not in info + info[k] = (overt, ni) + elif l.name in ('GF', 'PT'): + assert l.rank == 1 + info[l.name] = overt + elif l.name in ('BNC', 'PENN'): + assert l.rank == 1 + info['POS'] = overt + info['POS_tagset'] = l.name + else: + if is_pos: + if l.name not in ('NER', 'WSL'): + self._warn( + 'Unexpected layer in sentence annotationset:', + l.name, + ) + else: + if l.name not in ( + 'Sent', + 'Verb', + 'Noun', + 'Adj', + 'Adv', + 'Prep', + 'Scon', + 'Art', + 'Other', + ): + self._warn( + 'Unexpected layer in frame annotationset:', l.name + ) + info[l.name] = overt + if not is_pos and 'cxnID' not in info: + if 'Target' not in info: + self._warn('Missing target in annotation set ID={0}'.format(info.ID)) + assert 'FE' in info + if 'FE3' in info: + assert 'FE2' in info + + return info + + def _handle_lulayer_elt(self, elt): + """Load a layer from an annotation set""" + layer = self._load_xml_attributes(AttrDict(), elt) + layer['_type'] = 'lulayer' + layer['label'] = [] + + for sub in elt: + if sub.tag.endswith('label'): + l = self._load_xml_attributes(AttrDict(), sub) + if l is not None: + layer['label'].append(l) + return layer + + def _handle_fe_elt(self, elt): + feinfo = self._load_xml_attributes(AttrDict(), elt) + feinfo['_type'] = 'fe' + feinfo['definition'] = "" + feinfo['definitionMarkup'] = "" + feinfo['semType'] = None + feinfo['requiresFE'] = None + feinfo['excludesFE'] = None + for sub in elt: + if sub.tag.endswith('definition'): + feinfo['definitionMarkup'] = sub.text + feinfo['definition'] = self._strip_tags(sub.text) + elif sub.tag.endswith('semType'): + stinfo = self._load_xml_attributes(AttrDict(), sub) + feinfo['semType'] = self.semtype(stinfo.ID) + elif sub.tag.endswith('requiresFE'): + feinfo['requiresFE'] = self._load_xml_attributes(AttrDict(), sub) + elif sub.tag.endswith('excludesFE'): + feinfo['excludesFE'] = self._load_xml_attributes(AttrDict(), sub) + + return feinfo + + def _handle_semtype_elt(self, elt, tagspec=None): + semt = self._load_xml_attributes(AttrDict(), elt) + semt['_type'] = 'semtype' + semt['superType'] = None + semt['subTypes'] = PrettyList() + for sub in elt: + if sub.text is not None: + semt['definitionMarkup'] = sub.text + semt['definition'] = self._strip_tags(sub.text) + else: + supertypeinfo = self._load_xml_attributes(AttrDict(), sub) + semt['superType'] = supertypeinfo + # the supertype may not have been loaded yet + + return semt + + +# +# Demo +# +def demo(): + from nltk.corpus import framenet as fn + + # + # It is not necessary to explicitly build the indexes by calling + # buildindexes(). We do this here just for demo purposes. If the + # indexes are not built explicitely, they will be built as needed. + # + print('Building the indexes...') + fn.buildindexes() + + # + # Get some statistics about the corpus + # + print('Number of Frames:', len(fn.frames())) + print('Number of Lexical Units:', len(fn.lus())) + print('Number of annotated documents:', len(fn.docs())) + print() + + # + # Frames + # + print( + 'getting frames whose name matches the (case insensitive) regex: "(?i)medical"' + ) + medframes = fn.frames(r'(?i)medical') + print('Found {0} Frames whose name matches "(?i)medical":'.format(len(medframes))) + print([(f.name, f.ID) for f in medframes]) + + # + # store the first frame in the list of frames + # + tmp_id = medframes[0].ID + m_frame = fn.frame(tmp_id) # reads all info for the frame + + # + # get the frame relations + # + print( + '\nNumber of frame relations for the "{0}" ({1}) frame:'.format( + m_frame.name, m_frame.ID + ), + len(m_frame.frameRelations), + ) + for fr in m_frame.frameRelations: + print(' ', fr) + + # + # get the names of the Frame Elements + # + print( + '\nNumber of Frame Elements in the "{0}" frame:'.format(m_frame.name), + len(m_frame.FE), + ) + print(' ', [x for x in m_frame.FE]) + + # + # get the names of the "Core" Frame Elements + # + print('\nThe "core" Frame Elements in the "{0}" frame:'.format(m_frame.name)) + print(' ', [x.name for x in m_frame.FE.values() if x.coreType == "Core"]) + + # + # get all of the Lexical Units that are incorporated in the + # 'Ailment' FE of the 'Medical_conditions' frame (id=239) + # + print('\nAll Lexical Units that are incorporated in the "Ailment" FE:') + m_frame = fn.frame(239) + ailment_lus = [ + x + for x in m_frame.lexUnit.values() + if 'incorporatedFE' in x and x.incorporatedFE == 'Ailment' + ] + print(' ', [x.name for x in ailment_lus]) + + # + # get all of the Lexical Units for the frame + # + print( + '\nNumber of Lexical Units in the "{0}" frame:'.format(m_frame.name), + len(m_frame.lexUnit), + ) + print(' ', [x.name for x in m_frame.lexUnit.values()][:5], '...') + + # + # get basic info on the second LU in the frame + # + tmp_id = m_frame.lexUnit['ailment.n'].ID # grab the id of the specified LU + luinfo = fn.lu_basic(tmp_id) # get basic info on the LU + print('\nInformation on the LU: {0}'.format(luinfo.name)) + pprint(luinfo) + + # + # Get a list of all of the corpora used for fulltext annotation + # + print('\nNames of all of the corpora used for fulltext annotation:') + allcorpora = set(x.corpname for x in fn.docs_metadata()) + pprint(list(allcorpora)) + + # + # Get the names of the annotated documents in the first corpus + # + firstcorp = list(allcorpora)[0] + firstcorp_docs = fn.docs(firstcorp) + print('\nNames of the annotated documents in the "{0}" corpus:'.format(firstcorp)) + pprint([x.filename for x in firstcorp_docs]) + + # + # Search for frames containing LUs whose name attribute matches a + # regexp pattern. + # + # Note: if you were going to be doing a lot of this type of + # searching, you'd want to build an index that maps from + # lemmas to frames because each time frames_by_lemma() is + # called, it has to search through ALL of the frame XML files + # in the db. + print( + '\nSearching for all Frames that have a lemma that matches the regexp: "^run.v$":' + ) + pprint(fn.frames_by_lemma(r'^run.v$')) + + +if __name__ == '__main__': + demo() diff --git a/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/ieer.py b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/ieer.py new file mode 100644 index 0000000..1628e9c --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/ieer.py @@ -0,0 +1,129 @@ +# Natural Language Toolkit: IEER Corpus Reader +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Steven Bird +# Edward Loper +# URL: +# For license information, see LICENSE.TXT + +""" +Corpus reader for the Information Extraction and Entity Recognition Corpus. + +NIST 1999 Information Extraction: Entity Recognition Evaluation +http://www.itl.nist.gov/iad/894.01/tests/ie-er/er_99/er_99.htm + +This corpus contains the NEWSWIRE development test data for the +NIST 1999 IE-ER Evaluation. The files were taken from the +subdirectory: /ie_er_99/english/devtest/newswire/*.ref.nwt +and filenames were shortened. + +The corpus contains the following files: APW_19980314, APW_19980424, +APW_19980429, NYT_19980315, NYT_19980403, and NYT_19980407. +""" +from __future__ import unicode_literals + +from six import string_types + +import nltk +from nltk import compat +from nltk.corpus.reader.api import * + +#: A dictionary whose keys are the names of documents in this corpus; +#: and whose values are descriptions of those documents' contents. +titles = { + 'APW_19980314': 'Associated Press Weekly, 14 March 1998', + 'APW_19980424': 'Associated Press Weekly, 24 April 1998', + 'APW_19980429': 'Associated Press Weekly, 29 April 1998', + 'NYT_19980315': 'New York Times, 15 March 1998', + 'NYT_19980403': 'New York Times, 3 April 1998', + 'NYT_19980407': 'New York Times, 7 April 1998', +} + +#: A list of all documents in this corpus. +documents = sorted(titles) + + +@compat.python_2_unicode_compatible +class IEERDocument(object): + def __init__(self, text, docno=None, doctype=None, date_time=None, headline=''): + self.text = text + self.docno = docno + self.doctype = doctype + self.date_time = date_time + self.headline = headline + + def __repr__(self): + if self.headline: + headline = ' '.join(self.headline.leaves()) + else: + headline = ( + ' '.join([w for w in self.text.leaves() if w[:1] != '<'][:12]) + '...' + ) + if self.docno is not None: + return '' % (self.docno, headline) + else: + return '' % headline + + +class IEERCorpusReader(CorpusReader): + """ + """ + + def raw(self, fileids=None): + if fileids is None: + fileids = self._fileids + elif isinstance(fileids, string_types): + fileids = [fileids] + return concat([self.open(f).read() for f in fileids]) + + def docs(self, fileids=None): + return concat( + [ + StreamBackedCorpusView(fileid, self._read_block, encoding=enc) + for (fileid, enc) in self.abspaths(fileids, True) + ] + ) + + def parsed_docs(self, fileids=None): + return concat( + [ + StreamBackedCorpusView(fileid, self._read_parsed_block, encoding=enc) + for (fileid, enc) in self.abspaths(fileids, True) + ] + ) + + def _read_parsed_block(self, stream): + # TODO: figure out while empty documents are being returned + return [ + self._parse(doc) + for doc in self._read_block(stream) + if self._parse(doc).docno is not None + ] + + def _parse(self, doc): + val = nltk.chunk.ieerstr2tree(doc, root_label="DOCUMENT") + if isinstance(val, dict): + return IEERDocument(**val) + else: + return IEERDocument(val) + + def _read_block(self, stream): + out = [] + # Skip any preamble. + while True: + line = stream.readline() + if not line: + break + if line.strip() == '': + break + out.append(line) + # Read the document + while True: + line = stream.readline() + if not line: + break + out.append(line) + if line.strip() == '': + break + # Return the document + return ['\n'.join(out)] diff --git a/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/indian.py b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/indian.py new file mode 100644 index 0000000..6f39754 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/indian.py @@ -0,0 +1,103 @@ +# Natural Language Toolkit: Indian Language POS-Tagged Corpus Reader +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Steven Bird +# Edward Loper +# URL: +# For license information, see LICENSE.TXT + +""" +Indian Language POS-Tagged Corpus +Collected by A Kumaran, Microsoft Research, India +Distributed with permission + +Contents: + - Bangla: IIT Kharagpur + - Hindi: Microsoft Research India + - Marathi: IIT Bombay + - Telugu: IIIT Hyderabad +""" + +from six import string_types + +from nltk.tag import str2tuple, map_tag + +from nltk.corpus.reader.util import * +from nltk.corpus.reader.api import * + + +class IndianCorpusReader(CorpusReader): + """ + List of words, one per line. Blank lines are ignored. + """ + + def words(self, fileids=None): + return concat( + [ + IndianCorpusView(fileid, enc, False, False) + for (fileid, enc) in self.abspaths(fileids, True) + ] + ) + + def tagged_words(self, fileids=None, tagset=None): + if tagset and tagset != self._tagset: + tag_mapping_function = lambda t: map_tag(self._tagset, tagset, t) + else: + tag_mapping_function = None + return concat( + [ + IndianCorpusView(fileid, enc, True, False, tag_mapping_function) + for (fileid, enc) in self.abspaths(fileids, True) + ] + ) + + def sents(self, fileids=None): + return concat( + [ + IndianCorpusView(fileid, enc, False, True) + for (fileid, enc) in self.abspaths(fileids, True) + ] + ) + + def tagged_sents(self, fileids=None, tagset=None): + if tagset and tagset != self._tagset: + tag_mapping_function = lambda t: map_tag(self._tagset, tagset, t) + else: + tag_mapping_function = None + return concat( + [ + IndianCorpusView(fileid, enc, True, True, tag_mapping_function) + for (fileid, enc) in self.abspaths(fileids, True) + ] + ) + + def raw(self, fileids=None): + if fileids is None: + fileids = self._fileids + elif isinstance(fileids, string_types): + fileids = [fileids] + return concat([self.open(f).read() for f in fileids]) + + +class IndianCorpusView(StreamBackedCorpusView): + def __init__( + self, corpus_file, encoding, tagged, group_by_sent, tag_mapping_function=None + ): + self._tagged = tagged + self._group_by_sent = group_by_sent + self._tag_mapping_function = tag_mapping_function + StreamBackedCorpusView.__init__(self, corpus_file, encoding=encoding) + + def read_block(self, stream): + line = stream.readline() + if line.startswith('<'): + return [] + sent = [str2tuple(word, sep='_') for word in line.split()] + if self._tag_mapping_function: + sent = [(w, self._tag_mapping_function(t)) for (w, t) in sent] + if not self._tagged: + sent = [w for (w, t) in sent] + if self._group_by_sent: + return [sent] + else: + return sent diff --git a/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/ipipan.py b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/ipipan.py new file mode 100644 index 0000000..47c509d --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/ipipan.py @@ -0,0 +1,368 @@ +# Natural Language Toolkit: IPI PAN Corpus Reader +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Konrad Goluchowski +# URL: +# For license information, see LICENSE.TXT + +import functools + +from six import string_types + +from nltk.corpus.reader.util import StreamBackedCorpusView, concat +from nltk.corpus.reader.api import CorpusReader + + +def _parse_args(fun): + @functools.wraps(fun) + def decorator(self, fileids=None, **kwargs): + kwargs.pop('tags', None) + if not fileids: + fileids = self.fileids() + return fun(self, fileids, **kwargs) + + return decorator + + +class IPIPANCorpusReader(CorpusReader): + """ + Corpus reader designed to work with corpus created by IPI PAN. + See http://korpus.pl/en/ for more details about IPI PAN corpus. + + The corpus includes information about text domain, channel and categories. + You can access possible values using ``domains()``, ``channels()`` and + ``categories()``. You can use also this metadata to filter files, e.g.: + ``fileids(channel='prasa')``, ``fileids(categories='publicystyczny')``. + + The reader supports methods: words, sents, paras and their tagged versions. + You can get part of speech instead of full tag by giving "simplify_tags=True" + parameter, e.g.: ``tagged_sents(simplify_tags=True)``. + + Also you can get all tags disambiguated tags specifying parameter + "one_tag=False", e.g.: ``tagged_paras(one_tag=False)``. + + You can get all tags that were assigned by a morphological analyzer specifying + parameter "disamb_only=False", e.g. ``tagged_words(disamb_only=False)``. + + The IPIPAN Corpus contains tags indicating if there is a space between two + tokens. To add special "no space" markers, you should specify parameter + "append_no_space=True", e.g. ``tagged_words(append_no_space=True)``. + As a result in place where there should be no space between two tokens new + pair ('', 'no-space') will be inserted (for tagged data) and just '' for + methods without tags. + + The corpus reader can also try to append spaces between words. To enable this + option, specify parameter "append_space=True", e.g. ``words(append_space=True)``. + As a result either ' ' or (' ', 'space') will be inserted between tokens. + + By default, xml entities like " and & are replaced by corresponding + characters. You can turn off this feature, specifying parameter + "replace_xmlentities=False", e.g. ``words(replace_xmlentities=False)``. + """ + + def __init__(self, root, fileids): + CorpusReader.__init__(self, root, fileids, None, None) + + def raw(self, fileids=None): + if not fileids: + fileids = self.fileids() + + filecontents = [] + for fileid in self._list_morph_files(fileids): + with open(fileid, 'r') as infile: + filecontents.append(infile.read()) + return ''.join(filecontents) + + def channels(self, fileids=None): + if not fileids: + fileids = self.fileids() + return self._parse_header(fileids, 'channel') + + def domains(self, fileids=None): + if not fileids: + fileids = self.fileids() + return self._parse_header(fileids, 'domain') + + def categories(self, fileids=None): + if not fileids: + fileids = self.fileids() + return [ + self._map_category(cat) for cat in self._parse_header(fileids, 'keyTerm') + ] + + def fileids(self, channels=None, domains=None, categories=None): + if channels is not None and domains is not None and categories is not None: + raise ValueError( + 'You can specify only one of channels, domains ' + 'and categories parameter at once' + ) + if channels is None and domains is None and categories is None: + return CorpusReader.fileids(self) + if isinstance(channels, string_types): + channels = [channels] + if isinstance(domains, string_types): + domains = [domains] + if isinstance(categories, string_types): + categories = [categories] + if channels: + return self._list_morph_files_by('channel', channels) + elif domains: + return self._list_morph_files_by('domain', domains) + else: + return self._list_morph_files_by( + 'keyTerm', categories, map=self._map_category + ) + + @_parse_args + def sents(self, fileids=None, **kwargs): + return concat( + [ + self._view( + fileid, mode=IPIPANCorpusView.SENTS_MODE, tags=False, **kwargs + ) + for fileid in self._list_morph_files(fileids) + ] + ) + + @_parse_args + def paras(self, fileids=None, **kwargs): + return concat( + [ + self._view( + fileid, mode=IPIPANCorpusView.PARAS_MODE, tags=False, **kwargs + ) + for fileid in self._list_morph_files(fileids) + ] + ) + + @_parse_args + def words(self, fileids=None, **kwargs): + return concat( + [ + self._view(fileid, tags=False, **kwargs) + for fileid in self._list_morph_files(fileids) + ] + ) + + @_parse_args + def tagged_sents(self, fileids=None, **kwargs): + return concat( + [ + self._view(fileid, mode=IPIPANCorpusView.SENTS_MODE, **kwargs) + for fileid in self._list_morph_files(fileids) + ] + ) + + @_parse_args + def tagged_paras(self, fileids=None, **kwargs): + return concat( + [ + self._view(fileid, mode=IPIPANCorpusView.PARAS_MODE, **kwargs) + for fileid in self._list_morph_files(fileids) + ] + ) + + @_parse_args + def tagged_words(self, fileids=None, **kwargs): + return concat( + [self._view(fileid, **kwargs) for fileid in self._list_morph_files(fileids)] + ) + + def _list_morph_files(self, fileids): + return [f for f in self.abspaths(fileids)] + + def _list_header_files(self, fileids): + return [ + f.replace('morph.xml', 'header.xml') + for f in self._list_morph_files(fileids) + ] + + def _parse_header(self, fileids, tag): + values = set() + for f in self._list_header_files(fileids): + values_list = self._get_tag(f, tag) + for v in values_list: + values.add(v) + return list(values) + + def _list_morph_files_by(self, tag, values, map=None): + fileids = self.fileids() + ret_fileids = set() + for f in fileids: + fp = self.abspath(f).replace('morph.xml', 'header.xml') + values_list = self._get_tag(fp, tag) + for value in values_list: + if map is not None: + value = map(value) + if value in values: + ret_fileids.add(f) + return list(ret_fileids) + + def _get_tag(self, f, tag): + tags = [] + with open(f, 'r') as infile: + header = infile.read() + tag_end = 0 + while True: + tag_pos = header.find('<' + tag, tag_end) + if tag_pos < 0: + return tags + tag_end = header.find('', tag_pos) + tags.append(header[tag_pos + len(tag) + 2 : tag_end]) + + def _map_category(self, cat): + pos = cat.find('>') + if pos == -1: + return cat + else: + return cat[pos + 1 :] + + def _view(self, filename, **kwargs): + tags = kwargs.pop('tags', True) + mode = kwargs.pop('mode', 0) + simplify_tags = kwargs.pop('simplify_tags', False) + one_tag = kwargs.pop('one_tag', True) + disamb_only = kwargs.pop('disamb_only', True) + append_no_space = kwargs.pop('append_no_space', False) + append_space = kwargs.pop('append_space', False) + replace_xmlentities = kwargs.pop('replace_xmlentities', True) + + if len(kwargs) > 0: + raise ValueError('Unexpected arguments: %s' % kwargs.keys()) + if not one_tag and not disamb_only: + raise ValueError( + 'You cannot specify both one_tag=False and ' 'disamb_only=False' + ) + if not tags and (simplify_tags or not one_tag or not disamb_only): + raise ValueError( + 'You cannot specify simplify_tags, one_tag or ' + 'disamb_only with functions other than tagged_*' + ) + + return IPIPANCorpusView( + filename, + tags=tags, + mode=mode, + simplify_tags=simplify_tags, + one_tag=one_tag, + disamb_only=disamb_only, + append_no_space=append_no_space, + append_space=append_space, + replace_xmlentities=replace_xmlentities, + ) + + +class IPIPANCorpusView(StreamBackedCorpusView): + + WORDS_MODE = 0 + SENTS_MODE = 1 + PARAS_MODE = 2 + + def __init__(self, filename, startpos=0, **kwargs): + StreamBackedCorpusView.__init__(self, filename, None, startpos, None) + self.in_sentence = False + self.position = 0 + + self.show_tags = kwargs.pop('tags', True) + self.disamb_only = kwargs.pop('disamb_only', True) + self.mode = kwargs.pop('mode', IPIPANCorpusView.WORDS_MODE) + self.simplify_tags = kwargs.pop('simplify_tags', False) + self.one_tag = kwargs.pop('one_tag', True) + self.append_no_space = kwargs.pop('append_no_space', False) + self.append_space = kwargs.pop('append_space', False) + self.replace_xmlentities = kwargs.pop('replace_xmlentities', True) + + def read_block(self, stream): + sentence = [] + sentences = [] + space = False + no_space = False + + tags = set() + + lines = self._read_data(stream) + + while True: + + # we may have only part of last line + if len(lines) <= 1: + self._seek(stream) + lines = self._read_data(stream) + + if lines == ['']: + assert not sentences + return [] + + line = lines.pop() + self.position += len(line) + 1 + + if line.startswith(''): + if self.append_space: + no_space = True + if self.append_no_space: + if self.show_tags: + sentence.append(('', 'no-space')) + else: + sentence.append('') + elif line.startswith(' +# URL: +# For license information, see LICENSE.TXT + +# For more information, see http://lilyx.net/pages/nltkjapanesecorpus.html +from __future__ import print_function + +import re +from six import string_types + +from nltk.parse import DependencyGraph + +from nltk.corpus.reader.util import ( + FileSystemPathPointer, + find_corpus_fileids, + read_blankline_block, +) +from nltk.corpus.reader.api import SyntaxCorpusReader, CorpusReader + +# default function to convert morphlist to str for tree representation +_morphs2str_default = lambda morphs: '/'.join(m[0] for m in morphs if m[0] != 'EOS') + + +class KNBCorpusReader(SyntaxCorpusReader): + """ + This class implements: + - ``__init__``, which specifies the location of the corpus + and a method for detecting the sentence blocks in corpus files. + - ``_read_block``, which reads a block from the input stream. + - ``_word``, which takes a block and returns a list of list of words. + - ``_tag``, which takes a block and returns a list of list of tagged + words. + - ``_parse``, which takes a block and returns a list of parsed + sentences. + + The structure of tagged words: + tagged_word = (word(str), tags(tuple)) + tags = (surface, reading, lemma, pos1, posid1, pos2, posid2, pos3, posid3, others ...) + + Usage example + ------------- + + >>> from nltk.corpus.util import LazyCorpusLoader + >>> knbc = LazyCorpusLoader( + ... 'knbc/corpus1', + ... KNBCorpusReader, + ... r'.*/KN.*', + ... encoding='euc-jp', + ... ) + + >>> len(knbc.sents()[0]) + 9 + + """ + + def __init__(self, root, fileids, encoding='utf8', morphs2str=_morphs2str_default): + """ + Initialize KNBCorpusReader + morphs2str is a function to convert morphlist to str for tree representation + for _parse() + """ + # FIXME: Why is it inheritting from SyntaxCorpusReader but initializing + # from CorpusReader? + CorpusReader.__init__(self, root, fileids, encoding) + self.morphs2str = morphs2str + + def _read_block(self, stream): + # blocks are split by blankline (or EOF) - default + return read_blankline_block(stream) + + def _word(self, t): + res = [] + for line in t.splitlines(): + # ignore the Bunsets headers + if not re.match(r"EOS|\*|\#|\+", line): + cells = line.strip().split(" ") + res.append(cells[0]) + + return res + + # ignores tagset argument + def _tag(self, t, tagset=None): + res = [] + for line in t.splitlines(): + # ignore the Bunsets headers + if not re.match(r"EOS|\*|\#|\+", line): + cells = line.strip().split(" ") + # convert cells to morph tuples + res.append((cells[0], ' '.join(cells[1:]))) + + return res + + def _parse(self, t): + dg = DependencyGraph() + i = 0 + for line in t.splitlines(): + if line[0] in '*+': + # start of bunsetsu or tag + + cells = line.strip().split(" ", 3) + m = re.match(r"([\-0-9]*)([ADIP])", cells[1]) + + assert m is not None + + node = dg.nodes[i] + node.update({'address': i, 'rel': m.group(2), 'word': []}) + + dep_parent = int(m.group(1)) + + if dep_parent == -1: + dg.root = node + else: + dg.nodes[dep_parent]['deps'].append(i) + + i += 1 + elif line[0] != '#': + # normal morph + cells = line.strip().split(" ") + # convert cells to morph tuples + morph = cells[0], ' '.join(cells[1:]) + dg.nodes[i - 1]['word'].append(morph) + + if self.morphs2str: + for node in dg.nodes.values(): + node['word'] = self.morphs2str(node['word']) + + return dg.tree() + + +###################################################################### +# Demo +###################################################################### + + +def demo(): + + import nltk + from nltk.corpus.util import LazyCorpusLoader + + root = nltk.data.find('corpora/knbc/corpus1') + fileids = [ + f + for f in find_corpus_fileids(FileSystemPathPointer(root), ".*") + if re.search(r"\d\-\d\-[\d]+\-[\d]+", f) + ] + + def _knbc_fileids_sort(x): + cells = x.split('-') + return (cells[0], int(cells[1]), int(cells[2]), int(cells[3])) + + knbc = LazyCorpusLoader( + 'knbc/corpus1', + KNBCorpusReader, + sorted(fileids, key=_knbc_fileids_sort), + encoding='euc-jp', + ) + + print(knbc.fileids()[:10]) + print(''.join(knbc.words()[:100])) + + print('\n\n'.join(str(tree) for tree in knbc.parsed_sents()[:2])) + + knbc.morphs2str = lambda morphs: '/'.join( + "%s(%s)" % (m[0], m[1].split(' ')[2]) for m in morphs if m[0] != 'EOS' + ).encode('utf-8') + + print('\n\n'.join('%s' % tree for tree in knbc.parsed_sents()[:2])) + + print( + '\n'.join( + ' '.join("%s/%s" % (w[0], w[1].split(' ')[2]) for w in sent) + for sent in knbc.tagged_sents()[0:2] + ) + ) + + +def test(): + + from nltk.corpus.util import LazyCorpusLoader + + knbc = LazyCorpusLoader( + 'knbc/corpus1', KNBCorpusReader, r'.*/KN.*', encoding='euc-jp' + ) + assert isinstance(knbc.words()[0], string_types) + assert isinstance(knbc.sents()[0][0], string_types) + assert isinstance(knbc.tagged_words()[0], tuple) + assert isinstance(knbc.tagged_sents()[0][0], tuple) + + +if __name__ == '__main__': + demo() diff --git a/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/lin.py b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/lin.py new file mode 100644 index 0000000..493b1b0 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/lin.py @@ -0,0 +1,184 @@ +# Natural Language Toolkit: Lin's Thesaurus +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Dan Blanchard +# URL: +# For license information, see LICENSE.txt +from __future__ import print_function + +import re +from collections import defaultdict +from functools import reduce + +from nltk.corpus.reader import CorpusReader + + +class LinThesaurusCorpusReader(CorpusReader): + """ Wrapper for the LISP-formatted thesauruses distributed by Dekang Lin. """ + + # Compiled regular expression for extracting the key from the first line of each + # thesaurus entry + _key_re = re.compile(r'\("?([^"]+)"? \(desc [0-9.]+\).+') + + @staticmethod + def __defaultdict_factory(): + ''' Factory for creating defaultdict of defaultdict(dict)s ''' + return defaultdict(dict) + + def __init__(self, root, badscore=0.0): + ''' + Initialize the thesaurus. + + :param root: root directory containing thesaurus LISP files + :type root: C{string} + :param badscore: the score to give to words which do not appear in each other's sets of synonyms + :type badscore: C{float} + ''' + + super(LinThesaurusCorpusReader, self).__init__(root, r'sim[A-Z]\.lsp') + self._thesaurus = defaultdict(LinThesaurusCorpusReader.__defaultdict_factory) + self._badscore = badscore + for path, encoding, fileid in self.abspaths( + include_encoding=True, include_fileid=True + ): + with open(path) as lin_file: + first = True + for line in lin_file: + line = line.strip() + # Start of entry + if first: + key = LinThesaurusCorpusReader._key_re.sub(r'\1', line) + first = False + # End of entry + elif line == '))': + first = True + # Lines with pairs of ngrams and scores + else: + split_line = line.split('\t') + if len(split_line) == 2: + ngram, score = split_line + self._thesaurus[fileid][key][ngram.strip('"')] = float( + score + ) + + def similarity(self, ngram1, ngram2, fileid=None): + ''' + Returns the similarity score for two ngrams. + + :param ngram1: first ngram to compare + :type ngram1: C{string} + :param ngram2: second ngram to compare + :type ngram2: C{string} + :param fileid: thesaurus fileid to search in. If None, search all fileids. + :type fileid: C{string} + :return: If fileid is specified, just the score for the two ngrams; otherwise, + list of tuples of fileids and scores. + ''' + # Entries don't contain themselves, so make sure similarity between item and itself is 1.0 + if ngram1 == ngram2: + if fileid: + return 1.0 + else: + return [(fid, 1.0) for fid in self._fileids] + else: + if fileid: + return ( + self._thesaurus[fileid][ngram1][ngram2] + if ngram2 in self._thesaurus[fileid][ngram1] + else self._badscore + ) + else: + return [ + ( + fid, + ( + self._thesaurus[fid][ngram1][ngram2] + if ngram2 in self._thesaurus[fid][ngram1] + else self._badscore + ), + ) + for fid in self._fileids + ] + + def scored_synonyms(self, ngram, fileid=None): + ''' + Returns a list of scored synonyms (tuples of synonyms and scores) for the current ngram + + :param ngram: ngram to lookup + :type ngram: C{string} + :param fileid: thesaurus fileid to search in. If None, search all fileids. + :type fileid: C{string} + :return: If fileid is specified, list of tuples of scores and synonyms; otherwise, + list of tuples of fileids and lists, where inner lists consist of tuples of + scores and synonyms. + ''' + if fileid: + return self._thesaurus[fileid][ngram].items() + else: + return [ + (fileid, self._thesaurus[fileid][ngram].items()) + for fileid in self._fileids + ] + + def synonyms(self, ngram, fileid=None): + ''' + Returns a list of synonyms for the current ngram. + + :param ngram: ngram to lookup + :type ngram: C{string} + :param fileid: thesaurus fileid to search in. If None, search all fileids. + :type fileid: C{string} + :return: If fileid is specified, list of synonyms; otherwise, list of tuples of fileids and + lists, where inner lists contain synonyms. + ''' + if fileid: + return self._thesaurus[fileid][ngram].keys() + else: + return [ + (fileid, self._thesaurus[fileid][ngram].keys()) + for fileid in self._fileids + ] + + def __contains__(self, ngram): + ''' + Determines whether or not the given ngram is in the thesaurus. + + :param ngram: ngram to lookup + :type ngram: C{string} + :return: whether the given ngram is in the thesaurus. + ''' + return reduce( + lambda accum, fileid: accum or (ngram in self._thesaurus[fileid]), + self._fileids, + False, + ) + + +###################################################################### +# Demo +###################################################################### + + +def demo(): + from nltk.corpus import lin_thesaurus as thes + + word1 = "business" + word2 = "enterprise" + print("Getting synonyms for " + word1) + print(thes.synonyms(word1)) + + print("Getting scored synonyms for " + word1) + print(thes.scored_synonyms(word1)) + + print("Getting synonyms from simN.lsp (noun subsection) for " + word1) + print(thes.synonyms(word1, fileid="simN.lsp")) + + print("Getting synonyms from simN.lsp (noun subsection) for " + word1) + print(thes.synonyms(word1, fileid="simN.lsp")) + + print("Similarity score for %s and %s:" % (word1, word2)) + print(thes.similarity(word1, word2)) + + +if __name__ == '__main__': + demo() diff --git a/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/mte.py b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/mte.py new file mode 100644 index 0000000..4198d3f --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/mte.py @@ -0,0 +1,414 @@ +""" +A reader for corpora whose documents are in MTE format. +""" +import os +import re +from functools import reduce + +from six import string_types + +from nltk.corpus.reader import concat, TaggedCorpusReader +from nltk.corpus.reader.xmldocs import XMLCorpusView + + +def xpath(root, path, ns): + return root.findall(path, ns) + + +class MTECorpusView(XMLCorpusView): + """ + Class for lazy viewing the MTE Corpus. + """ + + def __init__(self, fileid, tagspec, elt_handler=None): + XMLCorpusView.__init__(self, fileid, tagspec, elt_handler) + + def read_block(self, stream, tagspec=None, elt_handler=None): + return list( + filter( + lambda x: x is not None, + XMLCorpusView.read_block(self, stream, tagspec, elt_handler), + ) + ) + + +class MTEFileReader: + """ + Class for loading the content of the multext-east corpus. It + parses the xml files and does some tag-filtering depending on the + given method parameters. + """ + + ns = { + 'tei': 'http://www.tei-c.org/ns/1.0', + 'xml': 'http://www.w3.org/XML/1998/namespace', + } + tag_ns = '{http://www.tei-c.org/ns/1.0}' + xml_ns = '{http://www.w3.org/XML/1998/namespace}' + word_path = "TEI/text/body/div/div/p/s/(w|c)" + sent_path = "TEI/text/body/div/div/p/s" + para_path = "TEI/text/body/div/div/p" + + def __init__(self, file_path): + self.__file_path = file_path + + @classmethod + def _word_elt(cls, elt, context): + return elt.text + + @classmethod + def _sent_elt(cls, elt, context): + return [cls._word_elt(w, None) for w in xpath(elt, '*', cls.ns)] + + @classmethod + def _para_elt(cls, elt, context): + return [cls._sent_elt(s, None) for s in xpath(elt, '*', cls.ns)] + + @classmethod + def _tagged_word_elt(cls, elt, context): + if 'ana' not in elt.attrib: + return (elt.text, '') + + if cls.__tags == "" and cls.__tagset == "msd": + return (elt.text, elt.attrib['ana']) + elif cls.__tags == "" and cls.__tagset == "universal": + return (elt.text, MTETagConverter.msd_to_universal(elt.attrib['ana'])) + else: + tags = re.compile('^' + re.sub("-", ".", cls.__tags) + '.*$') + if tags.match(elt.attrib['ana']): + if cls.__tagset == "msd": + return (elt.text, elt.attrib['ana']) + else: + return ( + elt.text, + MTETagConverter.msd_to_universal(elt.attrib['ana']), + ) + else: + return None + + @classmethod + def _tagged_sent_elt(cls, elt, context): + return list( + filter( + lambda x: x is not None, + [cls._tagged_word_elt(w, None) for w in xpath(elt, '*', cls.ns)], + ) + ) + + @classmethod + def _tagged_para_elt(cls, elt, context): + return list( + filter( + lambda x: x is not None, + [cls._tagged_sent_elt(s, None) for s in xpath(elt, '*', cls.ns)], + ) + ) + + @classmethod + def _lemma_word_elt(cls, elt, context): + if 'lemma' not in elt.attrib: + return (elt.text, '') + else: + return (elt.text, elt.attrib['lemma']) + + @classmethod + def _lemma_sent_elt(cls, elt, context): + return [cls._lemma_word_elt(w, None) for w in xpath(elt, '*', cls.ns)] + + @classmethod + def _lemma_para_elt(cls, elt, context): + return [cls._lemma_sent_elt(s, None) for s in xpath(elt, '*', cls.ns)] + + def words(self): + return MTECorpusView( + self.__file_path, MTEFileReader.word_path, MTEFileReader._word_elt + ) + + def sents(self): + return MTECorpusView( + self.__file_path, MTEFileReader.sent_path, MTEFileReader._sent_elt + ) + + def paras(self): + return MTECorpusView( + self.__file_path, MTEFileReader.para_path, MTEFileReader._para_elt + ) + + def lemma_words(self): + return MTECorpusView( + self.__file_path, MTEFileReader.word_path, MTEFileReader._lemma_word_elt + ) + + def tagged_words(self, tagset, tags): + MTEFileReader.__tagset = tagset + MTEFileReader.__tags = tags + return MTECorpusView( + self.__file_path, MTEFileReader.word_path, MTEFileReader._tagged_word_elt + ) + + def lemma_sents(self): + return MTECorpusView( + self.__file_path, MTEFileReader.sent_path, MTEFileReader._lemma_sent_elt + ) + + def tagged_sents(self, tagset, tags): + MTEFileReader.__tagset = tagset + MTEFileReader.__tags = tags + return MTECorpusView( + self.__file_path, MTEFileReader.sent_path, MTEFileReader._tagged_sent_elt + ) + + def lemma_paras(self): + return MTECorpusView( + self.__file_path, MTEFileReader.para_path, MTEFileReader._lemma_para_elt + ) + + def tagged_paras(self, tagset, tags): + MTEFileReader.__tagset = tagset + MTEFileReader.__tags = tags + return MTECorpusView( + self.__file_path, MTEFileReader.para_path, MTEFileReader._tagged_para_elt + ) + + +class MTETagConverter: + """ + Class for converting msd tags to universal tags, more conversion + options are currently not implemented. + """ + + mapping_msd_universal = { + 'A': 'ADJ', + 'S': 'ADP', + 'R': 'ADV', + 'C': 'CONJ', + 'D': 'DET', + 'N': 'NOUN', + 'M': 'NUM', + 'Q': 'PRT', + 'P': 'PRON', + 'V': 'VERB', + '.': '.', + '-': 'X', + } + + @staticmethod + def msd_to_universal(tag): + """ + This function converts the annotation from the Multex-East to the universal tagset + as described in Chapter 5 of the NLTK-Book + + Unknown Tags will be mapped to X. Punctuation marks are not supported in MSD tags, so + """ + indicator = tag[0] if not tag[0] == "#" else tag[1] + + if not indicator in MTETagConverter.mapping_msd_universal: + indicator = '-' + + return MTETagConverter.mapping_msd_universal[indicator] + + +class MTECorpusReader(TaggedCorpusReader): + """ + Reader for corpora following the TEI-p5 xml scheme, such as MULTEXT-East. + MULTEXT-East contains part-of-speech-tagged words with a quite precise tagging + scheme. These tags can be converted to the Universal tagset + """ + + def __init__(self, root=None, fileids=None, encoding='utf8'): + """ + Construct a new MTECorpusreader for a set of documents + located at the given root directory. Example usage: + + >>> root = '/...path to corpus.../' + >>> reader = MTECorpusReader(root, 'oana-*.xml', 'utf8') # doctest: +SKIP + + :param root: The root directory for this corpus. (default points to location in multext config file) + :param fileids: A list or regexp specifying the fileids in this corpus. (default is oana-en.xml) + :param enconding: The encoding of the given files (default is utf8) + """ + TaggedCorpusReader.__init__(self, root, fileids, encoding) + + def __fileids(self, fileids): + if fileids is None: + fileids = self._fileids + elif isinstance(fileids, string_types): + fileids = [fileids] + # filter wrong userinput + fileids = filter(lambda x: x in self._fileids, fileids) + # filter multext-east sourcefiles that are not compatible to the teip5 specification + fileids = filter(lambda x: x not in ["oana-bg.xml", "oana-mk.xml"], fileids) + if not fileids: + print("No valid multext-east file specified") + return fileids + + def readme(self): + """ + Prints some information about this corpus. + :return: the content of the attached README file + :rtype: str + """ + return self.open("00README.txt").read() + + def raw(self, fileids=None): + """ + :param fileids: A list specifying the fileids that should be used. + :return: the given file(s) as a single string. + :rtype: str + """ + return reduce([self.open(f).read() for f in self.__fileids(fileids)], []) + + def words(self, fileids=None): + """ + :param fileids: A list specifying the fileids that should be used. + :return: the given file(s) as a list of words and punctuation symbols. + :rtype: list(str) + """ + return concat( + [ + MTEFileReader(os.path.join(self._root, f)).words() + for f in self.__fileids(fileids) + ] + ) + + def sents(self, fileids=None): + """ + :param fileids: A list specifying the fileids that should be used. + :return: the given file(s) as a list of sentences or utterances, + each encoded as a list of word strings + :rtype: list(list(str)) + """ + return concat( + [ + MTEFileReader(os.path.join(self._root, f)).sents() + for f in self.__fileids(fileids) + ] + ) + + def paras(self, fileids=None): + """ + :param fileids: A list specifying the fileids that should be used. + :return: the given file(s) as a list of paragraphs, each encoded as a list + of sentences, which are in turn encoded as lists of word string + :rtype: list(list(list(str))) + """ + return concat( + [ + MTEFileReader(os.path.join(self._root, f)).paras() + for f in self.__fileids(fileids) + ] + ) + + def lemma_words(self, fileids=None): + """ + :param fileids: A list specifying the fileids that should be used. + :return: the given file(s) as a list of words, the corresponding lemmas + and punctuation symbols, encoded as tuples (word, lemma) + :rtype: list(tuple(str,str)) + """ + return concat( + [ + MTEFileReader(os.path.join(self._root, f)).lemma_words() + for f in self.__fileids(fileids) + ] + ) + + def tagged_words(self, fileids=None, tagset="msd", tags=""): + """ + :param fileids: A list specifying the fileids that should be used. + :param tagset: The tagset that should be used in the returned object, + either "universal" or "msd", "msd" is the default + :param tags: An MSD Tag that is used to filter all parts of the used corpus + that are not more precise or at least equal to the given tag + :return: the given file(s) as a list of tagged words and punctuation symbols + encoded as tuples (word, tag) + :rtype: list(tuple(str, str)) + """ + if tagset == "universal" or tagset == "msd": + return concat( + [ + MTEFileReader(os.path.join(self._root, f)).tagged_words( + tagset, tags + ) + for f in self.__fileids(fileids) + ] + ) + else: + print("Unknown tagset specified.") + + def lemma_sents(self, fileids=None): + """ + :param fileids: A list specifying the fileids that should be used. + :return: the given file(s) as a list of sentences or utterances, each + encoded as a list of tuples of the word and the corresponding + lemma (word, lemma) + :rtype: list(list(tuple(str, str))) + """ + return concat( + [ + MTEFileReader(os.path.join(self._root, f)).lemma_sents() + for f in self.__fileids(fileids) + ] + ) + + def tagged_sents(self, fileids=None, tagset="msd", tags=""): + """ + :param fileids: A list specifying the fileids that should be used. + :param tagset: The tagset that should be used in the returned object, + either "universal" or "msd", "msd" is the default + :param tags: An MSD Tag that is used to filter all parts of the used corpus + that are not more precise or at least equal to the given tag + :return: the given file(s) as a list of sentences or utterances, each + each encoded as a list of (word,tag) tuples + :rtype: list(list(tuple(str, str))) + """ + if tagset == "universal" or tagset == "msd": + return concat( + [ + MTEFileReader(os.path.join(self._root, f)).tagged_sents( + tagset, tags + ) + for f in self.__fileids(fileids) + ] + ) + else: + print("Unknown tagset specified.") + + def lemma_paras(self, fileids=None): + """ + :param fileids: A list specifying the fileids that should be used. + :return: the given file(s) as a list of paragraphs, each encoded as a + list of sentences, which are in turn encoded as a list of + tuples of the word and the corresponding lemma (word, lemma) + :rtype: list(List(List(tuple(str, str)))) + """ + return concat( + [ + MTEFileReader(os.path.join(self._root, f)).lemma_paras() + for f in self.__fileids(fileids) + ] + ) + + def tagged_paras(self, fileids=None, tagset="msd", tags=""): + """ + :param fileids: A list specifying the fileids that should be used. + :param tagset: The tagset that should be used in the returned object, + either "universal" or "msd", "msd" is the default + :param tags: An MSD Tag that is used to filter all parts of the used corpus + that are not more precise or at least equal to the given tag + :return: the given file(s) as a list of paragraphs, each encoded as a + list of sentences, which are in turn encoded as a list + of (word,tag) tuples + :rtype: list(list(list(tuple(str, str)))) + """ + if tagset == "universal" or tagset == "msd": + return concat( + [ + MTEFileReader(os.path.join(self._root, f)).tagged_paras( + tagset, tags + ) + for f in self.__fileids(fileids) + ] + ) + else: + print("Unknown tagset specified.") diff --git a/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/nkjp.py b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/nkjp.py new file mode 100644 index 0000000..aea84b0 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/nkjp.py @@ -0,0 +1,489 @@ +# Natural Language Toolkit: NKJP Corpus Reader +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Gabriela Kaczka +# URL: +# For license information, see LICENSE.TXT + +import functools +import os +import re +import tempfile + +from six import string_types + +from nltk.corpus.reader.util import concat +from nltk.corpus.reader.xmldocs import XMLCorpusReader, XMLCorpusView + + +def _parse_args(fun): + """ + Wraps function arguments: + if fileids not specified then function set NKJPCorpusReader paths. + """ + + @functools.wraps(fun) + def decorator(self, fileids=None, **kwargs): + if not fileids: + fileids = self._paths + return fun(self, fileids, **kwargs) + + return decorator + + +class NKJPCorpusReader(XMLCorpusReader): + WORDS_MODE = 0 + SENTS_MODE = 1 + HEADER_MODE = 2 + RAW_MODE = 3 + + def __init__(self, root, fileids='.*'): + """ + Corpus reader designed to work with National Corpus of Polish. + See http://nkjp.pl/ for more details about NKJP. + use example: + import nltk + import nkjp + from nkjp import NKJPCorpusReader + x = NKJPCorpusReader(root='/home/USER/nltk_data/corpora/nkjp/', fileids='') # obtain the whole corpus + x.header() + x.raw() + x.words() + x.tagged_words(tags=['subst', 'comp']) #Link to find more tags: nkjp.pl/poliqarp/help/ense2.html + x.sents() + x = NKJPCorpusReader(root='/home/USER/nltk_data/corpora/nkjp/', fileids='Wilk*') # obtain particular file(s) + x.header(fileids=['WilkDom', '/home/USER/nltk_data/corpora/nkjp/WilkWilczy']) + x.tagged_words(fileids=['WilkDom', '/home/USER/nltk_data/corpora/nkjp/WilkWilczy'], tags=['subst', 'comp']) + """ + if isinstance(fileids, string_types): + XMLCorpusReader.__init__(self, root, fileids + '.*/header.xml') + else: + XMLCorpusReader.__init__( + self, root, [fileid + '/header.xml' for fileid in fileids] + ) + self._paths = self.get_paths() + + def get_paths(self): + return [ + os.path.join(str(self._root), f.split("header.xml")[0]) + for f in self._fileids + ] + + def fileids(self): + """ + Returns a list of file identifiers for the fileids that make up + this corpus. + """ + return [f.split("header.xml")[0] for f in self._fileids] + + def _view(self, filename, tags=None, **kwargs): + """ + Returns a view specialised for use with particular corpus file. + """ + mode = kwargs.pop('mode', NKJPCorpusReader.WORDS_MODE) + if mode is NKJPCorpusReader.WORDS_MODE: + return NKJPCorpus_Morph_View(filename, tags=tags) + elif mode is NKJPCorpusReader.SENTS_MODE: + return NKJPCorpus_Segmentation_View(filename, tags=tags) + elif mode is NKJPCorpusReader.HEADER_MODE: + return NKJPCorpus_Header_View(filename, tags=tags) + elif mode is NKJPCorpusReader.RAW_MODE: + return NKJPCorpus_Text_View( + filename, tags=tags, mode=NKJPCorpus_Text_View.RAW_MODE + ) + + else: + raise NameError('No such mode!') + + def add_root(self, fileid): + """ + Add root if necessary to specified fileid. + """ + if self.root in fileid: + return fileid + return self.root + fileid + + @_parse_args + def header(self, fileids=None, **kwargs): + """ + Returns header(s) of specified fileids. + """ + return concat( + [ + self._view( + self.add_root(fileid), mode=NKJPCorpusReader.HEADER_MODE, **kwargs + ).handle_query() + for fileid in fileids + ] + ) + + @_parse_args + def sents(self, fileids=None, **kwargs): + """ + Returns sentences in specified fileids. + """ + return concat( + [ + self._view( + self.add_root(fileid), mode=NKJPCorpusReader.SENTS_MODE, **kwargs + ).handle_query() + for fileid in fileids + ] + ) + + @_parse_args + def words(self, fileids=None, **kwargs): + """ + Returns words in specified fileids. + """ + + return concat( + [ + self._view( + self.add_root(fileid), mode=NKJPCorpusReader.WORDS_MODE, **kwargs + ).handle_query() + for fileid in fileids + ] + ) + + @_parse_args + def tagged_words(self, fileids=None, **kwargs): + """ + Call with specified tags as a list, e.g. tags=['subst', 'comp']. + Returns tagged words in specified fileids. + """ + tags = kwargs.pop('tags', []) + return concat( + [ + self._view( + self.add_root(fileid), + mode=NKJPCorpusReader.WORDS_MODE, + tags=tags, + **kwargs + ).handle_query() + for fileid in fileids + ] + ) + + @_parse_args + def raw(self, fileids=None, **kwargs): + """ + Returns words in specified fileids. + """ + return concat( + [ + self._view( + self.add_root(fileid), mode=NKJPCorpusReader.RAW_MODE, **kwargs + ).handle_query() + for fileid in fileids + ] + ) + + +class NKJPCorpus_Header_View(XMLCorpusView): + def __init__(self, filename, **kwargs): + """ + HEADER_MODE + A stream backed corpus view specialized for use with + header.xml files in NKJP corpus. + """ + self.tagspec = ".*/sourceDesc$" + XMLCorpusView.__init__(self, filename + 'header.xml', self.tagspec) + + def handle_query(self): + self._open() + header = [] + while True: + segm = XMLCorpusView.read_block(self, self._stream) + if len(segm) == 0: + break + header.extend(segm) + self.close() + return header + + def handle_elt(self, elt, context): + titles = elt.findall('bibl/title') + title = [] + if titles: + title = '\n'.join(title.text.strip() for title in titles) + + authors = elt.findall('bibl/author') + author = [] + if authors: + author = '\n'.join(author.text.strip() for author in authors) + + dates = elt.findall('bibl/date') + date = [] + if dates: + date = '\n'.join(date.text.strip() for date in dates) + + publishers = elt.findall('bibl/publisher') + publisher = [] + if publishers: + publisher = '\n'.join(publisher.text.strip() for publisher in publishers) + + idnos = elt.findall('bibl/idno') + idno = [] + if idnos: + idno = '\n'.join(idno.text.strip() for idno in idnos) + + notes = elt.findall('bibl/note') + note = [] + if notes: + note = '\n'.join(note.text.strip() for note in notes) + + return { + 'title': title, + 'author': author, + 'date': date, + 'publisher': publisher, + 'idno': idno, + 'note': note, + } + + +class XML_Tool: + """ + Helper class creating xml file to one without references to nkjp: namespace. + That's needed because the XMLCorpusView assumes that one can find short substrings + of XML that are valid XML, which is not true if a namespace is declared at top level + """ + + def __init__(self, root, filename): + self.read_file = os.path.join(root, filename) + self.write_file = tempfile.NamedTemporaryFile(delete=False) + + def build_preprocessed_file(self): + try: + fr = open(self.read_file, 'r') + fw = self.write_file + line = ' ' + while len(line): + line = fr.readline() + x = re.split(r'nkjp:[^ ]* ', line) # in all files + ret = ' '.join(x) + x = re.split('', ret) # in ann_segmentation.xml + ret = ' '.join(x) + x = re.split('', ret) # in ann_segmentation.xml + ret = ' '.join(x) + x = re.split('', ret) # in ann_segmentation.xml + ret = ' '.join(x) + x = re.split('', ret) # in ann_segmentation.xml + ret = ' '.join(x) + fw.write(ret) + fr.close() + fw.close() + return self.write_file.name + except Exception: + self.remove_preprocessed_file() + raise Exception + + def remove_preprocessed_file(self): + os.remove(self.write_file.name) + + +class NKJPCorpus_Segmentation_View(XMLCorpusView): + """ + A stream backed corpus view specialized for use with + ann_segmentation.xml files in NKJP corpus. + """ + + def __init__(self, filename, **kwargs): + self.tagspec = '.*p/.*s' + # intersperse NKJPCorpus_Text_View + self.text_view = NKJPCorpus_Text_View( + filename, mode=NKJPCorpus_Text_View.SENTS_MODE + ) + self.text_view.handle_query() + # xml preprocessing + self.xml_tool = XML_Tool(filename, 'ann_segmentation.xml') + # base class init + XMLCorpusView.__init__( + self, self.xml_tool.build_preprocessed_file(), self.tagspec + ) + + def get_segm_id(self, example_word): + return example_word.split('(')[1].split(',')[0] + + def get_sent_beg(self, beg_word): + # returns index of beginning letter in sentence + return int(beg_word.split(',')[1]) + + def get_sent_end(self, end_word): + # returns index of end letter in sentence + splitted = end_word.split(')')[0].split(',') + return int(splitted[1]) + int(splitted[2]) + + def get_sentences(self, sent_segm): + # returns one sentence + id = self.get_segm_id(sent_segm[0]) + segm = self.text_view.segm_dict[id] # text segment + beg = self.get_sent_beg(sent_segm[0]) + end = self.get_sent_end(sent_segm[len(sent_segm) - 1]) + return segm[beg:end] + + def remove_choice(self, segm): + ret = [] + prev_txt_end = -1 + prev_txt_nr = -1 + for word in segm: + txt_nr = self.get_segm_id(word) + # get increasing sequence of ids: in case of choice get first possibility + if self.get_sent_beg(word) > prev_txt_end - 1 or prev_txt_nr != txt_nr: + ret.append(word) + prev_txt_end = self.get_sent_end(word) + prev_txt_nr = txt_nr + + return ret + + def handle_query(self): + try: + self._open() + sentences = [] + while True: + sent_segm = XMLCorpusView.read_block(self, self._stream) + if len(sent_segm) == 0: + break + for segm in sent_segm: + segm = self.remove_choice(segm) + sentences.append(self.get_sentences(segm)) + self.close() + self.xml_tool.remove_preprocessed_file() + return sentences + except Exception: + self.xml_tool.remove_preprocessed_file() + raise Exception + + def handle_elt(self, elt, context): + ret = [] + for seg in elt: + ret.append(seg.get('corresp')) + return ret + + +class NKJPCorpus_Text_View(XMLCorpusView): + """ + A stream backed corpus view specialized for use with + text.xml files in NKJP corpus. + """ + + SENTS_MODE = 0 + RAW_MODE = 1 + + def __init__(self, filename, **kwargs): + self.mode = kwargs.pop('mode', 0) + self.tagspec = '.*/div/ab' + self.segm_dict = dict() + # xml preprocessing + self.xml_tool = XML_Tool(filename, 'text.xml') + # base class init + XMLCorpusView.__init__( + self, self.xml_tool.build_preprocessed_file(), self.tagspec + ) + + def handle_query(self): + try: + self._open() + x = self.read_block(self._stream) + self.close() + self.xml_tool.remove_preprocessed_file() + return x + except Exception: + self.xml_tool.remove_preprocessed_file() + raise Exception + + def read_block(self, stream, tagspec=None, elt_handler=None): + """ + Returns text as a list of sentences. + """ + txt = [] + while True: + segm = XMLCorpusView.read_block(self, stream) + if len(segm) == 0: + break + for part in segm: + txt.append(part) + + return [' '.join([segm for segm in txt])] + + def get_segm_id(self, elt): + for attr in elt.attrib: + if attr.endswith('id'): + return elt.get(attr) + + def handle_elt(self, elt, context): + # fill dictionary to use later in sents mode + if self.mode is NKJPCorpus_Text_View.SENTS_MODE: + self.segm_dict[self.get_segm_id(elt)] = elt.text + return elt.text + + +class NKJPCorpus_Morph_View(XMLCorpusView): + """ + A stream backed corpus view specialized for use with + ann_morphosyntax.xml files in NKJP corpus. + """ + + def __init__(self, filename, **kwargs): + self.tags = kwargs.pop('tags', None) + self.tagspec = '.*/seg/fs' + self.xml_tool = XML_Tool(filename, 'ann_morphosyntax.xml') + XMLCorpusView.__init__( + self, self.xml_tool.build_preprocessed_file(), self.tagspec + ) + + def handle_query(self): + try: + self._open() + words = [] + while True: + segm = XMLCorpusView.read_block(self, self._stream) + if len(segm) == 0: + break + for part in segm: + if part is not None: + words.append(part) + self.close() + self.xml_tool.remove_preprocessed_file() + return words + except Exception: + self.xml_tool.remove_preprocessed_file() + raise Exception + + def handle_elt(self, elt, context): + word = '' + flag = False + is_not_interp = True + # if tags not specified, then always return word + if self.tags is None: + flag = True + + for child in elt: + + # get word + if 'name' in child.keys() and child.attrib['name'] == 'orth': + for symbol in child: + if symbol.tag == 'string': + word = symbol.text + elif 'name' in child.keys() and child.attrib['name'] == 'interps': + for symbol in child: + if 'type' in symbol.keys() and symbol.attrib['type'] == 'lex': + for symbol2 in symbol: + if ( + 'name' in symbol2.keys() + and symbol2.attrib['name'] == 'ctag' + ): + for symbol3 in symbol2: + if ( + 'value' in symbol3.keys() + and self.tags is not None + and symbol3.attrib['value'] in self.tags + ): + flag = True + elif ( + 'value' in symbol3.keys() + and symbol3.attrib['value'] == 'interp' + ): + is_not_interp = False + if flag and is_not_interp: + return word diff --git a/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/nombank.py b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/nombank.py new file mode 100644 index 0000000..2bc3eff --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/nombank.py @@ -0,0 +1,485 @@ +# Natural Language Toolkit: NomBank Corpus Reader +# +# Copyright (C) 2001-2019 NLTK Project +# Authors: Paul Bedaride +# Edward Loper +# URL: +# For license information, see LICENSE.TXT + +from __future__ import unicode_literals +from xml.etree import ElementTree +from functools import total_ordering + +from six import string_types + +from nltk.tree import Tree +from nltk.internals import raise_unorderable_types +from nltk.compat import python_2_unicode_compatible + +from nltk.corpus.reader.util import * +from nltk.corpus.reader.api import * + + +class NombankCorpusReader(CorpusReader): + """ + Corpus reader for the nombank corpus, which augments the Penn + Treebank with information about the predicate argument structure + of every noun instance. The corpus consists of two parts: the + predicate-argument annotations themselves, and a set of "frameset + files" which define the argument labels used by the annotations, + on a per-noun basis. Each "frameset file" contains one or more + predicates, such as ``'turn'`` or ``'turn_on'``, each of which is + divided into coarse-grained word senses called "rolesets". For + each "roleset", the frameset file provides descriptions of the + argument roles, along with examples. + """ + + def __init__( + self, + root, + nomfile, + framefiles='', + nounsfile=None, + parse_fileid_xform=None, + parse_corpus=None, + encoding='utf8', + ): + """ + :param root: The root directory for this corpus. + :param nomfile: The name of the file containing the predicate- + argument annotations (relative to ``root``). + :param framefiles: A list or regexp specifying the frameset + fileids for this corpus. + :param parse_fileid_xform: A transform that should be applied + to the fileids in this corpus. This should be a function + of one argument (a fileid) that returns a string (the new + fileid). + :param parse_corpus: The corpus containing the parse trees + corresponding to this corpus. These parse trees are + necessary to resolve the tree pointers used by nombank. + """ + + # If framefiles is specified as a regexp, expand it. + if isinstance(framefiles, string_types): + self._fileids = find_corpus_fileids(root, framefiles) + self._fileids = list(framefiles) + # Initialze the corpus reader. + CorpusReader.__init__(self, root, framefiles, encoding) + + # Record our nom file & nouns file. + self._nomfile = nomfile + self._nounsfile = nounsfile + self._parse_fileid_xform = parse_fileid_xform + self._parse_corpus = parse_corpus + + def raw(self, fileids=None): + """ + :return: the text contents of the given fileids, as a single string. + """ + if fileids is None: + fileids = self._fileids + elif isinstance(fileids, string_types): + fileids = [fileids] + return concat([self.open(f).read() for f in fileids]) + + def instances(self, baseform=None): + """ + :return: a corpus view that acts as a list of + ``NombankInstance`` objects, one for each noun in the corpus. + """ + kwargs = {} + if baseform is not None: + kwargs['instance_filter'] = lambda inst: inst.baseform == baseform + return StreamBackedCorpusView( + self.abspath(self._nomfile), + lambda stream: self._read_instance_block(stream, **kwargs), + encoding=self.encoding(self._nomfile), + ) + + def lines(self): + """ + :return: a corpus view that acts as a list of strings, one for + each line in the predicate-argument annotation file. + """ + return StreamBackedCorpusView( + self.abspath(self._nomfile), + read_line_block, + encoding=self.encoding(self._nomfile), + ) + + def roleset(self, roleset_id): + """ + :return: the xml description for the given roleset. + """ + baseform = roleset_id.split('.')[0] + baseform = baseform.replace('perc-sign', '%') + baseform = baseform.replace('oneslashonezero', '1/10').replace( + '1/10', '1-slash-10' + ) + framefile = 'frames/%s.xml' % baseform + if framefile not in self.fileids(): + raise ValueError('Frameset file for %s not found' % roleset_id) + + # n.b.: The encoding for XML fileids is specified by the file + # itself; so we ignore self._encoding here. + etree = ElementTree.parse(self.abspath(framefile).open()).getroot() + for roleset in etree.findall('predicate/roleset'): + if roleset.attrib['id'] == roleset_id: + return roleset + raise ValueError('Roleset %s not found in %s' % (roleset_id, framefile)) + + def rolesets(self, baseform=None): + """ + :return: list of xml descriptions for rolesets. + """ + if baseform is not None: + framefile = 'frames/%s.xml' % baseform + if framefile not in self.fileids(): + raise ValueError('Frameset file for %s not found' % baseform) + framefiles = [framefile] + else: + framefiles = self.fileids() + + rsets = [] + for framefile in framefiles: + # n.b.: The encoding for XML fileids is specified by the file + # itself; so we ignore self._encoding here. + etree = ElementTree.parse(self.abspath(framefile).open()).getroot() + rsets.append(etree.findall('predicate/roleset')) + return LazyConcatenation(rsets) + + def nouns(self): + """ + :return: a corpus view that acts as a list of all noun lemmas + in this corpus (from the nombank.1.0.words file). + """ + return StreamBackedCorpusView( + self.abspath(self._nounsfile), + read_line_block, + encoding=self.encoding(self._nounsfile), + ) + + def _read_instance_block(self, stream, instance_filter=lambda inst: True): + block = [] + + # Read 100 at a time. + for i in range(100): + line = stream.readline().strip() + if line: + inst = NombankInstance.parse( + line, self._parse_fileid_xform, self._parse_corpus + ) + if instance_filter(inst): + block.append(inst) + + return block + + +###################################################################### +# { Nombank Instance & related datatypes +###################################################################### + + +@python_2_unicode_compatible +class NombankInstance(object): + def __init__( + self, + fileid, + sentnum, + wordnum, + baseform, + sensenumber, + predicate, + predid, + arguments, + parse_corpus=None, + ): + + self.fileid = fileid + """The name of the file containing the parse tree for this + instance's sentence.""" + + self.sentnum = sentnum + """The sentence number of this sentence within ``fileid``. + Indexing starts from zero.""" + + self.wordnum = wordnum + """The word number of this instance's predicate within its + containing sentence. Word numbers are indexed starting from + zero, and include traces and other empty parse elements.""" + + self.baseform = baseform + """The baseform of the predicate.""" + + self.sensenumber = sensenumber + """The sense number of the predicate.""" + + self.predicate = predicate + """A ``NombankTreePointer`` indicating the position of this + instance's predicate within its containing sentence.""" + + self.predid = predid + """Identifier of the predicate.""" + + self.arguments = tuple(arguments) + """A list of tuples (argloc, argid), specifying the location + and identifier for each of the predicate's argument in the + containing sentence. Argument identifiers are strings such as + ``'ARG0'`` or ``'ARGM-TMP'``. This list does *not* contain + the predicate.""" + + self.parse_corpus = parse_corpus + """A corpus reader for the parse trees corresponding to the + instances in this nombank corpus.""" + + @property + def roleset(self): + """The name of the roleset used by this instance's predicate. + Use ``nombank.roleset() `` to + look up information about the roleset.""" + r = self.baseform.replace('%', 'perc-sign') + r = r.replace('1/10', '1-slash-10').replace('1-slash-10', 'oneslashonezero') + return '%s.%s' % (r, self.sensenumber) + + def __repr__(self): + return '' % ( + self.fileid, + self.sentnum, + self.wordnum, + ) + + def __str__(self): + s = '%s %s %s %s %s' % ( + self.fileid, + self.sentnum, + self.wordnum, + self.baseform, + self.sensenumber, + ) + items = self.arguments + ((self.predicate, 'rel'),) + for (argloc, argid) in sorted(items): + s += ' %s-%s' % (argloc, argid) + return s + + def _get_tree(self): + if self.parse_corpus is None: + return None + if self.fileid not in self.parse_corpus.fileids(): + return None + return self.parse_corpus.parsed_sents(self.fileid)[self.sentnum] + + tree = property( + _get_tree, + doc=""" + The parse tree corresponding to this instance, or None if + the corresponding tree is not available.""", + ) + + @staticmethod + def parse(s, parse_fileid_xform=None, parse_corpus=None): + pieces = s.split() + if len(pieces) < 6: + raise ValueError('Badly formatted nombank line: %r' % s) + + # Divide the line into its basic pieces. + (fileid, sentnum, wordnum, baseform, sensenumber) = pieces[:5] + + args = pieces[5:] + rel = [args.pop(i) for i, p in enumerate(args) if '-rel' in p] + if len(rel) != 1: + raise ValueError('Badly formatted nombank line: %r' % s) + + # Apply the fileid selector, if any. + if parse_fileid_xform is not None: + fileid = parse_fileid_xform(fileid) + + # Convert sentence & word numbers to ints. + sentnum = int(sentnum) + wordnum = int(wordnum) + + # Parse the predicate location. + + predloc, predid = rel[0].split('-', 1) + predicate = NombankTreePointer.parse(predloc) + + # Parse the arguments. + arguments = [] + for arg in args: + argloc, argid = arg.split('-', 1) + arguments.append((NombankTreePointer.parse(argloc), argid)) + + # Put it all together. + return NombankInstance( + fileid, + sentnum, + wordnum, + baseform, + sensenumber, + predicate, + predid, + arguments, + parse_corpus, + ) + + +class NombankPointer(object): + """ + A pointer used by nombank to identify one or more constituents in + a parse tree. ``NombankPointer`` is an abstract base class with + three concrete subclasses: + + - ``NombankTreePointer`` is used to point to single constituents. + - ``NombankSplitTreePointer`` is used to point to 'split' + constituents, which consist of a sequence of two or more + ``NombankTreePointer`` pointers. + - ``NombankChainTreePointer`` is used to point to entire trace + chains in a tree. It consists of a sequence of pieces, which + can be ``NombankTreePointer`` or ``NombankSplitTreePointer`` pointers. + """ + + def __init__(self): + if self.__class__ == NombankPointer: + raise NotImplementedError() + + +@python_2_unicode_compatible +class NombankChainTreePointer(NombankPointer): + def __init__(self, pieces): + self.pieces = pieces + """A list of the pieces that make up this chain. Elements may + be either ``NombankSplitTreePointer`` or + ``NombankTreePointer`` pointers.""" + + def __str__(self): + return '*'.join('%s' % p for p in self.pieces) + + def __repr__(self): + return '' % self + + def select(self, tree): + if tree is None: + raise ValueError('Parse tree not avaialable') + return Tree('*CHAIN*', [p.select(tree) for p in self.pieces]) + + +@python_2_unicode_compatible +class NombankSplitTreePointer(NombankPointer): + def __init__(self, pieces): + self.pieces = pieces + """A list of the pieces that make up this chain. Elements are + all ``NombankTreePointer`` pointers.""" + + def __str__(self): + return ','.join('%s' % p for p in self.pieces) + + def __repr__(self): + return '' % self + + def select(self, tree): + if tree is None: + raise ValueError('Parse tree not avaialable') + return Tree('*SPLIT*', [p.select(tree) for p in self.pieces]) + + +@total_ordering +@python_2_unicode_compatible +class NombankTreePointer(NombankPointer): + """ + wordnum:height*wordnum:height*... + wordnum:height, + + """ + + def __init__(self, wordnum, height): + self.wordnum = wordnum + self.height = height + + @staticmethod + def parse(s): + # Deal with chains (xx*yy*zz) + pieces = s.split('*') + if len(pieces) > 1: + return NombankChainTreePointer( + [NombankTreePointer.parse(elt) for elt in pieces] + ) + + # Deal with split args (xx,yy,zz) + pieces = s.split(',') + if len(pieces) > 1: + return NombankSplitTreePointer( + [NombankTreePointer.parse(elt) for elt in pieces] + ) + + # Deal with normal pointers. + pieces = s.split(':') + if len(pieces) != 2: + raise ValueError('bad nombank pointer %r' % s) + return NombankTreePointer(int(pieces[0]), int(pieces[1])) + + def __str__(self): + return '%s:%s' % (self.wordnum, self.height) + + def __repr__(self): + return 'NombankTreePointer(%d, %d)' % (self.wordnum, self.height) + + def __eq__(self, other): + while isinstance(other, (NombankChainTreePointer, NombankSplitTreePointer)): + other = other.pieces[0] + + if not isinstance(other, NombankTreePointer): + return self is other + + return self.wordnum == other.wordnum and self.height == other.height + + def __ne__(self, other): + return not self == other + + def __lt__(self, other): + while isinstance(other, (NombankChainTreePointer, NombankSplitTreePointer)): + other = other.pieces[0] + + if not isinstance(other, NombankTreePointer): + return id(self) < id(other) + + return (self.wordnum, -self.height) < (other.wordnum, -other.height) + + def select(self, tree): + if tree is None: + raise ValueError('Parse tree not avaialable') + return tree[self.treepos(tree)] + + def treepos(self, tree): + """ + Convert this pointer to a standard 'tree position' pointer, + given that it points to the given tree. + """ + if tree is None: + raise ValueError('Parse tree not avaialable') + stack = [tree] + treepos = [] + + wordnum = 0 + while True: + # print treepos + # print stack[-1] + # tree node: + if isinstance(stack[-1], Tree): + # Select the next child. + if len(treepos) < len(stack): + treepos.append(0) + else: + treepos[-1] += 1 + # Update the stack. + if treepos[-1] < len(stack[-1]): + stack.append(stack[-1][treepos[-1]]) + else: + # End of node's child list: pop up a level. + stack.pop() + treepos.pop() + # word node: + else: + if wordnum == self.wordnum: + return tuple(treepos[: len(treepos) - self.height - 1]) + else: + wordnum += 1 + stack.pop() diff --git a/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/nps_chat.py b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/nps_chat.py new file mode 100644 index 0000000..391f61d --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/nps_chat.py @@ -0,0 +1,92 @@ +# Natural Language Toolkit: NPS Chat Corpus Reader +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Edward Loper +# URL: +# For license information, see LICENSE.TXT +from __future__ import unicode_literals + +import re +import textwrap + +from nltk.util import LazyConcatenation +from nltk.internals import ElementWrapper +from nltk.tag import map_tag + +from nltk.corpus.reader.util import * +from nltk.corpus.reader.api import * +from nltk.corpus.reader.xmldocs import * + + +class NPSChatCorpusReader(XMLCorpusReader): + def __init__(self, root, fileids, wrap_etree=False, tagset=None): + XMLCorpusReader.__init__(self, root, fileids, wrap_etree) + self._tagset = tagset + + def xml_posts(self, fileids=None): + if self._wrap_etree: + return concat( + [ + XMLCorpusView(fileid, 'Session/Posts/Post', self._wrap_elt) + for fileid in self.abspaths(fileids) + ] + ) + else: + return concat( + [ + XMLCorpusView(fileid, 'Session/Posts/Post') + for fileid in self.abspaths(fileids) + ] + ) + + def posts(self, fileids=None): + return concat( + [ + XMLCorpusView( + fileid, 'Session/Posts/Post/terminals', self._elt_to_words + ) + for fileid in self.abspaths(fileids) + ] + ) + + def tagged_posts(self, fileids=None, tagset=None): + def reader(elt, handler): + return self._elt_to_tagged_words(elt, handler, tagset) + + return concat( + [ + XMLCorpusView(fileid, 'Session/Posts/Post/terminals', reader) + for fileid in self.abspaths(fileids) + ] + ) + + def words(self, fileids=None): + return LazyConcatenation(self.posts(fileids)) + + def tagged_words(self, fileids=None, tagset=None): + return LazyConcatenation(self.tagged_posts(fileids, tagset)) + + def _wrap_elt(self, elt, handler): + return ElementWrapper(elt) + + def _elt_to_words(self, elt, handler): + return [self._simplify_username(t.attrib['word']) for t in elt.findall('t')] + + def _elt_to_tagged_words(self, elt, handler, tagset=None): + tagged_post = [ + (self._simplify_username(t.attrib['word']), t.attrib['pos']) + for t in elt.findall('t') + ] + if tagset and tagset != self._tagset: + tagged_post = [ + (w, map_tag(self._tagset, tagset, t)) for (w, t) in tagged_post + ] + return tagged_post + + @staticmethod + def _simplify_username(word): + if 'User' in word: + word = 'U' + word.split('User', 1)[1] + elif isinstance(word, bytes): + word = word.decode('ascii') + return word diff --git a/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/opinion_lexicon.py b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/opinion_lexicon.py new file mode 100644 index 0000000..cfe7f6e --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/opinion_lexicon.py @@ -0,0 +1,123 @@ +# Natural Language Toolkit: Opinion Lexicon Corpus Reader +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Pierpaolo Pantone <24alsecondo@gmail.com> +# URL: +# For license information, see LICENSE.TXT + +""" +CorpusReader for the Opinion Lexicon. + +- Opinion Lexicon information - +Authors: Minqing Hu and Bing Liu, 2004. + Department of Computer Sicence + University of Illinois at Chicago + +Contact: Bing Liu, liub@cs.uic.edu + http://www.cs.uic.edu/~liub + +Distributed with permission. + +Related papers: +- Minqing Hu and Bing Liu. "Mining and summarizing customer reviews". + Proceedings of the ACM SIGKDD International Conference on Knowledge Discovery + & Data Mining (KDD-04), Aug 22-25, 2004, Seattle, Washington, USA. + +- Bing Liu, Minqing Hu and Junsheng Cheng. "Opinion Observer: Analyzing and + Comparing Opinions on the Web". Proceedings of the 14th International World + Wide Web conference (WWW-2005), May 10-14, 2005, Chiba, Japan. +""" +from six import string_types + +from nltk.corpus.reader import WordListCorpusReader +from nltk.corpus.reader.api import * + + +class IgnoreReadmeCorpusView(StreamBackedCorpusView): + """ + This CorpusView is used to skip the initial readme block of the corpus. + """ + + def __init__(self, *args, **kwargs): + StreamBackedCorpusView.__init__(self, *args, **kwargs) + # open self._stream + self._open() + # skip the readme block + read_blankline_block(self._stream) + # Set the initial position to the current stream position + self._filepos = [self._stream.tell()] + + +class OpinionLexiconCorpusReader(WordListCorpusReader): + """ + Reader for Liu and Hu opinion lexicon. Blank lines and readme are ignored. + + >>> from nltk.corpus import opinion_lexicon + >>> opinion_lexicon.words() + ['2-faced', '2-faces', 'abnormal', 'abolish', ...] + + The OpinionLexiconCorpusReader provides shortcuts to retrieve positive/negative + words: + + >>> opinion_lexicon.negative() + ['2-faced', '2-faces', 'abnormal', 'abolish', ...] + + Note that words from `words()` method are sorted by file id, not alphabetically: + + >>> opinion_lexicon.words()[0:10] + ['2-faced', '2-faces', 'abnormal', 'abolish', 'abominable', 'abominably', + 'abominate', 'abomination', 'abort', 'aborted'] + >>> sorted(opinion_lexicon.words())[0:10] + ['2-faced', '2-faces', 'a+', 'abnormal', 'abolish', 'abominable', 'abominably', + 'abominate', 'abomination', 'abort'] + """ + + CorpusView = IgnoreReadmeCorpusView + + def words(self, fileids=None): + """ + Return all words in the opinion lexicon. Note that these words are not + sorted in alphabetical order. + + :param fileids: a list or regexp specifying the ids of the files whose + words have to be returned. + :return: the given file(s) as a list of words and punctuation symbols. + :rtype: list(str) + """ + if fileids is None: + fileids = self._fileids + elif isinstance(fileids, string_types): + fileids = [fileids] + return concat( + [ + self.CorpusView(path, self._read_word_block, encoding=enc) + for (path, enc, fileid) in self.abspaths(fileids, True, True) + ] + ) + + def positive(self): + """ + Return all positive words in alphabetical order. + + :return: a list of positive words. + :rtype: list(str) + """ + return self.words('positive-words.txt') + + def negative(self): + """ + Return all negative words in alphabetical order. + + :return: a list of negative words. + :rtype: list(str) + """ + return self.words('negative-words.txt') + + def _read_word_block(self, stream): + words = [] + for i in range(20): # Read 20 lines at a time. + line = stream.readline() + if not line: + continue + words.append(line.strip()) + return words diff --git a/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/panlex_lite.py b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/panlex_lite.py new file mode 100644 index 0000000..44bfb96 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/panlex_lite.py @@ -0,0 +1,174 @@ +# Natural Language Toolkit: PanLex Corpus Reader +# +# Copyright (C) 2001-2019 NLTK Project +# Author: David Kamholz +# URL: +# For license information, see LICENSE.TXT + +""" +CorpusReader for PanLex Lite, a stripped down version of PanLex distributed +as an SQLite database. See the README.txt in the panlex_lite corpus directory +for more information on PanLex Lite. +""" + +import os +import sqlite3 + +from nltk.corpus.reader.api import CorpusReader + + +class PanLexLiteCorpusReader(CorpusReader): + MEANING_Q = """ + SELECT dnx2.mn, dnx2.uq, dnx2.ap, dnx2.ui, ex2.tt, ex2.lv + FROM dnx + JOIN ex ON (ex.ex = dnx.ex) + JOIN dnx dnx2 ON (dnx2.mn = dnx.mn) + JOIN ex ex2 ON (ex2.ex = dnx2.ex) + WHERE dnx.ex != dnx2.ex AND ex.tt = ? AND ex.lv = ? + ORDER BY dnx2.uq DESC + """ + + TRANSLATION_Q = """ + SELECT s.tt, sum(s.uq) AS trq FROM ( + SELECT ex2.tt, max(dnx.uq) AS uq + FROM dnx + JOIN ex ON (ex.ex = dnx.ex) + JOIN dnx dnx2 ON (dnx2.mn = dnx.mn) + JOIN ex ex2 ON (ex2.ex = dnx2.ex) + WHERE dnx.ex != dnx2.ex AND ex.lv = ? AND ex.tt = ? AND ex2.lv = ? + GROUP BY ex2.tt, dnx.ui + ) s + GROUP BY s.tt + ORDER BY trq DESC, s.tt + """ + + def __init__(self, root): + self._c = sqlite3.connect(os.path.join(root, 'db.sqlite')).cursor() + + self._uid_lv = {} + self._lv_uid = {} + + for row in self._c.execute('SELECT uid, lv FROM lv'): + self._uid_lv[row[0]] = row[1] + self._lv_uid[row[1]] = row[0] + + def language_varieties(self, lc=None): + """ + Return a list of PanLex language varieties. + + :param lc: ISO 639 alpha-3 code. If specified, filters returned varieties + by this code. If unspecified, all varieties are returned. + :return: the specified language varieties as a list of tuples. The first + element is the language variety's seven-character uniform identifier, + and the second element is its default name. + :rtype: list(tuple) + """ + + if lc is None: + return self._c.execute('SELECT uid, tt FROM lv ORDER BY uid').fetchall() + else: + return self._c.execute( + 'SELECT uid, tt FROM lv WHERE lc = ? ORDER BY uid', (lc,) + ).fetchall() + + def meanings(self, expr_uid, expr_tt): + """ + Return a list of meanings for an expression. + + :param expr_uid: the expression's language variety, as a seven-character + uniform identifier. + :param expr_tt: the expression's text. + :return: a list of Meaning objects. + :rtype: list(Meaning) + """ + + expr_lv = self._uid_lv[expr_uid] + + mn_info = {} + + for i in self._c.execute(self.MEANING_Q, (expr_tt, expr_lv)): + mn = i[0] + uid = self._lv_uid[i[5]] + + if not mn in mn_info: + mn_info[mn] = { + 'uq': i[1], + 'ap': i[2], + 'ui': i[3], + 'ex': {expr_uid: [expr_tt]}, + } + + if not uid in mn_info[mn]['ex']: + mn_info[mn]['ex'][uid] = [] + + mn_info[mn]['ex'][uid].append(i[4]) + + return [Meaning(mn, mn_info[mn]) for mn in mn_info] + + def translations(self, from_uid, from_tt, to_uid): + """ + Return a list of translations for an expression into a single language + variety. + + :param from_uid: the source expression's language variety, as a + seven-character uniform identifier. + :param from_tt: the source expression's text. + :param to_uid: the target language variety, as a seven-character + uniform identifier. + :return a list of translation tuples. The first element is the expression + text and the second element is the translation quality. + :rtype: list(tuple) + """ + + from_lv = self._uid_lv[from_uid] + to_lv = self._uid_lv[to_uid] + + return self._c.execute(self.TRANSLATION_Q, (from_lv, from_tt, to_lv)).fetchall() + + +class Meaning(dict): + """ + Represents a single PanLex meaning. A meaning is a translation set derived + from a single source. + """ + + def __init__(self, mn, attr): + super(Meaning, self).__init__(**attr) + self['mn'] = mn + + def id(self): + """ + :return: the meaning's id. + :rtype: int + """ + return self['mn'] + + def quality(self): + """ + :return: the meaning's source's quality (0=worst, 9=best). + :rtype: int + """ + return self['uq'] + + def source(self): + """ + :return: the meaning's source id. + :rtype: int + """ + return self['ap'] + + def source_group(self): + """ + :return: the meaning's source group id. + :rtype: int + """ + return self['ui'] + + def expressions(self): + """ + :return: the meaning's expressions as a dictionary whose keys are language + variety uniform identifiers and whose values are lists of expression + texts. + :rtype: dict + """ + return self['ex'] diff --git a/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/panlex_swadesh.py b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/panlex_swadesh.py new file mode 100644 index 0000000..221d317 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/panlex_swadesh.py @@ -0,0 +1,94 @@ +# -*- coding: utf-8 -*- +# Natural Language Toolkit: Word List Corpus Reader +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Steven Bird +# Edward Loper +# URL: +# For license information, see LICENSE.TXT + + +from __future__ import print_function +from collections import namedtuple, defaultdict +import re +from six import string_types + + +from nltk.tokenize import line_tokenize + +from nltk.corpus.reader.wordlist import WordListCorpusReader +from nltk.corpus.reader.util import * +from nltk.corpus.reader.api import * + +PanlexLanguage = namedtuple('PanlexLanguage', + ['panlex_uid', # (1) PanLex UID + 'iso639', # (2) ISO 639 language code + 'iso639_type', # (3) ISO 639 language type, see README + 'script', # (4) normal scripts of expressions + 'name', # (5) PanLex default name + 'langvar_uid' # (6) UID of the language variety in which the default name is an expression + ]) + +class PanlexSwadeshCorpusReader(WordListCorpusReader): + """ + This is a class to read the PanLex Swadesh list from + + David Kamholz, Jonathan Pool, and Susan M. Colowick (2014). + PanLex: Building a Resource for Panlingual Lexical Translation. + In LREC. http://www.lrec-conf.org/proceedings/lrec2014/pdf/1029_Paper.pdf + + License: CC0 1.0 Universal + https://creativecommons.org/publicdomain/zero/1.0/legalcode + """ + def __init__(self, *args, **kwargs): + super(PanlexSwadeshCorpusReader, self).__init__(*args, **kwargs) + # Find the swadesh size using the fileids' path. + self.swadesh_size = re.match(r'swadesh([0-9].*)\/', self.fileids()[0]).group(1) + self._languages = {lang.panlex_uid:lang for lang in self.get_languages()} + self._macro_langauges = self.get_macrolanguages() + + def license(self): + print('CC0 1.0 Universal') + + def readme(self): + print(self.raw('README')) + + def language_codes(self): + return self._languages.keys() + + def get_languages(self): + for line in self.raw('langs{}.txt'.format(self.swadesh_size)).split('\n'): + if not line.strip(): # Skip empty lines. + continue + yield PanlexLanguage(*line.strip().split('\t')) + + def get_macrolanguages(self): + macro_langauges = defaultdict(list) + for lang in self._languages.values(): + macro_langauges[lang.iso639].append(lang.panlex_uid) + return macro_langauges + + def words_by_lang(self, lang_code): + """ + :return: a list of list(str) + """ + fileid = 'swadesh{}/{}.txt'.format(self.swadesh_size, lang_code) + return [concept.split('\t') for concept in self.words(fileid)] + + def words_by_iso639(self, iso63_code): + """ + :return: a list of list(str) + """ + fileids = ['swadesh{}/{}.txt'.format(self.swadesh_size, lang_code) + for lang_code in self._macro_langauges[iso63_code]] + return [concept.split('\t') for fileid in fileids for concept in self.words(fileid)] + + def entries(self, fileids=None): + """ + :return: a tuple of words for the specified fileids. + """ + if not fileids: + fileids = self.fileids() + + wordlists = [self.words(f) for f in fileids] + return list(zip(*wordlists)) diff --git a/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/pl196x.py b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/pl196x.py new file mode 100644 index 0000000..a8a1f6f --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/pl196x.py @@ -0,0 +1,383 @@ +# Natural Language Toolkit: +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Piotr Kasprzyk +# URL: +# For license information, see LICENSE.TXT + +from six import string_types + +from nltk.corpus.reader.api import * +from nltk.corpus.reader.xmldocs import XMLCorpusReader + + +PARA = re.compile(r']*){0,1}>(.*?)

    ') +SENT = re.compile(r']*){0,1}>(.*?)
    ') + +TAGGEDWORD = re.compile(r'<([wc](?: [^>]*){0,1}>)(.*?)') +WORD = re.compile(r'<[wc](?: [^>]*){0,1}>(.*?)') + +TYPE = re.compile(r'type="(.*?)"') +ANA = re.compile(r'ana="(.*?)"') + +TEXTID = re.compile(r'text id="(.*?)"') + + +class TEICorpusView(StreamBackedCorpusView): + def __init__( + self, + corpus_file, + tagged, + group_by_sent, + group_by_para, + tagset=None, + head_len=0, + textids=None, + ): + + self._tagged = tagged + self._textids = textids + + self._group_by_sent = group_by_sent + self._group_by_para = group_by_para + # WARNING -- skip header + StreamBackedCorpusView.__init__(self, corpus_file, startpos=head_len) + + _pagesize = 4096 + + def read_block(self, stream): + block = stream.readlines(self._pagesize) + block = concat(block) + while (block.count(' block.count('')) or block.count( + '') + len('') + block = block[:beg] + block[beg + end :] + + output = [] + for para_str in PARA.findall(block): + para = [] + for sent_str in SENT.findall(para_str): + if not self._tagged: + sent = WORD.findall(sent_str) + else: + sent = list(map(self._parse_tag, TAGGEDWORD.findall(sent_str))) + if self._group_by_sent: + para.append(sent) + else: + para.extend(sent) + if self._group_by_para: + output.append(para) + else: + output.extend(para) + return output + + def _parse_tag(self, tag_word_tuple): + (tag, word) = tag_word_tuple + if tag.startswith('w'): + tag = ANA.search(tag).group(1) + else: # tag.startswith('c') + tag = TYPE.search(tag).group(1) + return word, tag + + +class Pl196xCorpusReader(CategorizedCorpusReader, XMLCorpusReader): + head_len = 2770 + + def __init__(self, *args, **kwargs): + if 'textid_file' in kwargs: + self._textids = kwargs['textid_file'] + else: + self._textids = None + + XMLCorpusReader.__init__(self, *args) + CategorizedCorpusReader.__init__(self, kwargs) + + self._init_textids() + + def _init_textids(self): + self._f2t = defaultdict(list) + self._t2f = defaultdict(list) + if self._textids is not None: + with open(self._textids) as fp: + for line in fp: + line = line.strip() + file_id, text_ids = line.split(' ', 1) + if file_id not in self.fileids(): + raise ValueError( + 'In text_id mapping file %s: %s not found' + % (self._textids, file_id) + ) + for text_id in text_ids.split(self._delimiter): + self._add_textids(file_id, text_id) + + def _add_textids(self, file_id, text_id): + self._f2t[file_id].append(text_id) + self._t2f[text_id].append(file_id) + + def _resolve(self, fileids, categories, textids=None): + tmp = None + if ( + len( + filter( + lambda accessor: accessor is None, (fileids, categories, textids) + ) + ) + != 1 + ): + + raise ValueError( + 'Specify exactly one of: fileids, ' 'categories or textids' + ) + + if fileids is not None: + return fileids, None + + if categories is not None: + return self.fileids(categories), None + + if textids is not None: + if isinstance(textids, string_types): + textids = [textids] + files = sum((self._t2f[t] for t in textids), []) + tdict = dict() + for f in files: + tdict[f] = set(self._f2t[f]) & set(textids) + return files, tdict + + def decode_tag(self, tag): + # to be implemented + return tag + + def textids(self, fileids=None, categories=None): + """ + In the pl196x corpus each category is stored in single + file and thus both methods provide identical functionality. In order + to accommodate finer granularity, a non-standard textids() method was + implemented. All the main functions can be supplied with a list + of required chunks---giving much more control to the user. + """ + fileids, _ = self._resolve(fileids, categories) + if fileids is None: + return sorted(self._t2f) + + if isinstance(fileids, string_types): + fileids = [fileids] + return sorted(sum((self._f2t[d] for d in fileids), [])) + + def words(self, fileids=None, categories=None, textids=None): + fileids, textids = self._resolve(fileids, categories, textids) + if fileids is None: + fileids = self._fileids + elif isinstance(fileids, string_types): + fileids = [fileids] + + if textids: + return concat( + [ + TEICorpusView( + self.abspath(fileid), + False, + False, + False, + head_len=self.head_len, + textids=textids[fileid], + ) + for fileid in fileids + ] + ) + else: + return concat( + [ + TEICorpusView( + self.abspath(fileid), + False, + False, + False, + head_len=self.head_len, + ) + for fileid in fileids + ] + ) + + def sents(self, fileids=None, categories=None, textids=None): + fileids, textids = self._resolve(fileids, categories, textids) + if fileids is None: + fileids = self._fileids + elif isinstance(fileids, string_types): + fileids = [fileids] + + if textids: + return concat( + [ + TEICorpusView( + self.abspath(fileid), + False, + True, + False, + head_len=self.head_len, + textids=textids[fileid], + ) + for fileid in fileids + ] + ) + else: + return concat( + [ + TEICorpusView( + self.abspath(fileid), False, True, False, head_len=self.head_len + ) + for fileid in fileids + ] + ) + + def paras(self, fileids=None, categories=None, textids=None): + fileids, textids = self._resolve(fileids, categories, textids) + if fileids is None: + fileids = self._fileids + elif isinstance(fileids, string_types): + fileids = [fileids] + + if textids: + return concat( + [ + TEICorpusView( + self.abspath(fileid), + False, + True, + True, + head_len=self.head_len, + textids=textids[fileid], + ) + for fileid in fileids + ] + ) + else: + return concat( + [ + TEICorpusView( + self.abspath(fileid), False, True, True, head_len=self.head_len + ) + for fileid in fileids + ] + ) + + def tagged_words(self, fileids=None, categories=None, textids=None): + fileids, textids = self._resolve(fileids, categories, textids) + if fileids is None: + fileids = self._fileids + elif isinstance(fileids, string_types): + fileids = [fileids] + + if textids: + return concat( + [ + TEICorpusView( + self.abspath(fileid), + True, + False, + False, + head_len=self.head_len, + textids=textids[fileid], + ) + for fileid in fileids + ] + ) + else: + return concat( + [ + TEICorpusView( + self.abspath(fileid), True, False, False, head_len=self.head_len + ) + for fileid in fileids + ] + ) + + def tagged_sents(self, fileids=None, categories=None, textids=None): + fileids, textids = self._resolve(fileids, categories, textids) + if fileids is None: + fileids = self._fileids + elif isinstance(fileids, string_types): + fileids = [fileids] + + if textids: + return concat( + [ + TEICorpusView( + self.abspath(fileid), + True, + True, + False, + head_len=self.head_len, + textids=textids[fileid], + ) + for fileid in fileids + ] + ) + else: + return concat( + [ + TEICorpusView( + self.abspath(fileid), True, True, False, head_len=self.head_len + ) + for fileid in fileids + ] + ) + + def tagged_paras(self, fileids=None, categories=None, textids=None): + fileids, textids = self._resolve(fileids, categories, textids) + if fileids is None: + fileids = self._fileids + elif isinstance(fileids, string_types): + fileids = [fileids] + + if textids: + return concat( + [ + TEICorpusView( + self.abspath(fileid), + True, + True, + True, + head_len=self.head_len, + textids=textids[fileid], + ) + for fileid in fileids + ] + ) + else: + return concat( + [ + TEICorpusView( + self.abspath(fileid), True, True, True, head_len=self.head_len + ) + for fileid in fileids + ] + ) + + def xml(self, fileids=None, categories=None): + fileids, _ = self._resolve(fileids, categories) + if len(fileids) == 1: + return XMLCorpusReader.xml(self, fileids[0]) + else: + raise TypeError('Expected a single file') + + def raw(self, fileids=None, categories=None): + fileids, _ = self._resolve(fileids, categories) + if fileids is None: + fileids = self._fileids + elif isinstance(fileids, string_types): + fileids = [fileids] + return concat([self.open(f).read() for f in fileids]) diff --git a/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/plaintext.py b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/plaintext.py new file mode 100644 index 0000000..4de7787 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/plaintext.py @@ -0,0 +1,263 @@ +# Natural Language Toolkit: Plaintext Corpus Reader +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Steven Bird +# Edward Loper +# Nitin Madnani +# URL: +# For license information, see LICENSE.TXT + +""" +A reader for corpora that consist of plaintext documents. +""" + +import nltk.data +from nltk.tokenize import * + +from nltk.corpus.reader.util import * +from nltk.corpus.reader.api import * + + +class PlaintextCorpusReader(CorpusReader): + """ + Reader for corpora that consist of plaintext documents. Paragraphs + are assumed to be split using blank lines. Sentences and words can + be tokenized using the default tokenizers, or by custom tokenizers + specificed as parameters to the constructor. + + This corpus reader can be customized (e.g., to skip preface + sections of specific document formats) by creating a subclass and + overriding the ``CorpusView`` class variable. + """ + + CorpusView = StreamBackedCorpusView + """The corpus view class used by this reader. Subclasses of + ``PlaintextCorpusReader`` may specify alternative corpus view + classes (e.g., to skip the preface sections of documents.)""" + + def __init__( + self, + root, + fileids, + word_tokenizer=WordPunctTokenizer(), + sent_tokenizer=nltk.data.LazyLoader('tokenizers/punkt/english.pickle'), + para_block_reader=read_blankline_block, + encoding='utf8', + ): + """ + Construct a new plaintext corpus reader for a set of documents + located at the given root directory. Example usage: + + >>> root = '/usr/local/share/nltk_data/corpora/webtext/' + >>> reader = PlaintextCorpusReader(root, '.*\.txt') # doctest: +SKIP + + :param root: The root directory for this corpus. + :param fileids: A list or regexp specifying the fileids in this corpus. + :param word_tokenizer: Tokenizer for breaking sentences or + paragraphs into words. + :param sent_tokenizer: Tokenizer for breaking paragraphs + into words. + :param para_block_reader: The block reader used to divide the + corpus into paragraph blocks. + """ + CorpusReader.__init__(self, root, fileids, encoding) + self._word_tokenizer = word_tokenizer + self._sent_tokenizer = sent_tokenizer + self._para_block_reader = para_block_reader + + def raw(self, fileids=None): + """ + :return: the given file(s) as a single string. + :rtype: str + """ + if fileids is None: + fileids = self._fileids + elif isinstance(fileids, string_types): + fileids = [fileids] + raw_texts = [] + for f in fileids: + _fin = self.open(f) + raw_texts.append(_fin.read()) + _fin.close() + return concat(raw_texts) + + def words(self, fileids=None): + """ + :return: the given file(s) as a list of words + and punctuation symbols. + :rtype: list(str) + """ + return concat( + [ + self.CorpusView(path, self._read_word_block, encoding=enc) + for (path, enc, fileid) in self.abspaths(fileids, True, True) + ] + ) + + def sents(self, fileids=None): + """ + :return: the given file(s) as a list of + sentences or utterances, each encoded as a list of word + strings. + :rtype: list(list(str)) + """ + if self._sent_tokenizer is None: + raise ValueError('No sentence tokenizer for this corpus') + + return concat( + [ + self.CorpusView(path, self._read_sent_block, encoding=enc) + for (path, enc, fileid) in self.abspaths(fileids, True, True) + ] + ) + + def paras(self, fileids=None): + """ + :return: the given file(s) as a list of + paragraphs, each encoded as a list of sentences, which are + in turn encoded as lists of word strings. + :rtype: list(list(list(str))) + """ + if self._sent_tokenizer is None: + raise ValueError('No sentence tokenizer for this corpus') + + return concat( + [ + self.CorpusView(path, self._read_para_block, encoding=enc) + for (path, enc, fileid) in self.abspaths(fileids, True, True) + ] + ) + + def _read_word_block(self, stream): + words = [] + for i in range(20): # Read 20 lines at a time. + words.extend(self._word_tokenizer.tokenize(stream.readline())) + return words + + def _read_sent_block(self, stream): + sents = [] + for para in self._para_block_reader(stream): + sents.extend( + [ + self._word_tokenizer.tokenize(sent) + for sent in self._sent_tokenizer.tokenize(para) + ] + ) + return sents + + def _read_para_block(self, stream): + paras = [] + for para in self._para_block_reader(stream): + paras.append( + [ + self._word_tokenizer.tokenize(sent) + for sent in self._sent_tokenizer.tokenize(para) + ] + ) + return paras + + +class CategorizedPlaintextCorpusReader(CategorizedCorpusReader, PlaintextCorpusReader): + """ + A reader for plaintext corpora whose documents are divided into + categories based on their file identifiers. + """ + + def __init__(self, *args, **kwargs): + """ + Initialize the corpus reader. Categorization arguments + (``cat_pattern``, ``cat_map``, and ``cat_file``) are passed to + the ``CategorizedCorpusReader`` constructor. The remaining arguments + are passed to the ``PlaintextCorpusReader`` constructor. + """ + CategorizedCorpusReader.__init__(self, kwargs) + PlaintextCorpusReader.__init__(self, *args, **kwargs) + + def _resolve(self, fileids, categories): + if fileids is not None and categories is not None: + raise ValueError('Specify fileids or categories, not both') + if categories is not None: + return self.fileids(categories) + else: + return fileids + + def raw(self, fileids=None, categories=None): + return PlaintextCorpusReader.raw(self, self._resolve(fileids, categories)) + + def words(self, fileids=None, categories=None): + return PlaintextCorpusReader.words(self, self._resolve(fileids, categories)) + + def sents(self, fileids=None, categories=None): + return PlaintextCorpusReader.sents(self, self._resolve(fileids, categories)) + + def paras(self, fileids=None, categories=None): + return PlaintextCorpusReader.paras(self, self._resolve(fileids, categories)) + + +# FIXME: Is there a better way? How to not hardcode this? +# Possibly, add a language kwargs to CategorizedPlaintextCorpusReader to +# override the `sent_tokenizer`. +class PortugueseCategorizedPlaintextCorpusReader(CategorizedPlaintextCorpusReader): + def __init__(self, *args, **kwargs): + CategorizedCorpusReader.__init__(self, kwargs) + kwargs['sent_tokenizer'] = nltk.data.LazyLoader( + 'tokenizers/punkt/portuguese.pickle' + ) + PlaintextCorpusReader.__init__(self, *args, **kwargs) + + +class EuroparlCorpusReader(PlaintextCorpusReader): + + """ + Reader for Europarl corpora that consist of plaintext documents. + Documents are divided into chapters instead of paragraphs as + for regular plaintext documents. Chapters are separated using blank + lines. Everything is inherited from ``PlaintextCorpusReader`` except + that: + - Since the corpus is pre-processed and pre-tokenized, the + word tokenizer should just split the line at whitespaces. + - For the same reason, the sentence tokenizer should just + split the paragraph at line breaks. + - There is a new 'chapters()' method that returns chapters instead + instead of paragraphs. + - The 'paras()' method inherited from PlaintextCorpusReader is + made non-functional to remove any confusion between chapters + and paragraphs for Europarl. + """ + + def _read_word_block(self, stream): + words = [] + for i in range(20): # Read 20 lines at a time. + words.extend(stream.readline().split()) + return words + + def _read_sent_block(self, stream): + sents = [] + for para in self._para_block_reader(stream): + sents.extend([sent.split() for sent in para.splitlines()]) + return sents + + def _read_para_block(self, stream): + paras = [] + for para in self._para_block_reader(stream): + paras.append([sent.split() for sent in para.splitlines()]) + return paras + + def chapters(self, fileids=None): + """ + :return: the given file(s) as a list of + chapters, each encoded as a list of sentences, which are + in turn encoded as lists of word strings. + :rtype: list(list(list(str))) + """ + return concat( + [ + self.CorpusView(fileid, self._read_para_block, encoding=enc) + for (fileid, enc) in self.abspaths(fileids, True) + ] + ) + + def paras(self, fileids=None): + raise NotImplementedError( + 'The Europarl corpus reader does not support paragraphs. Please use chapters() instead.' + ) diff --git a/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/ppattach.py b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/ppattach.py new file mode 100644 index 0000000..3bc06e4 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/ppattach.py @@ -0,0 +1,107 @@ +# Natural Language Toolkit: PP Attachment Corpus Reader +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Steven Bird +# Edward Loper +# URL: +# For license information, see LICENSE.TXT + +""" +Read lines from the Prepositional Phrase Attachment Corpus. + +The PP Attachment Corpus contains several files having the format: + +sentence_id verb noun1 preposition noun2 attachment + +For example: + +42960 gives authority to administration V +46742 gives inventors of microchip N + +The PP attachment is to the verb phrase (V) or noun phrase (N), i.e.: + +(VP gives (NP authority) (PP to administration)) +(VP gives (NP inventors (PP of microchip))) + +The corpus contains the following files: + +training: training set +devset: development test set, used for algorithm development. +test: test set, used to report results +bitstrings: word classes derived from Mutual Information Clustering for the Wall Street Journal. + +Ratnaparkhi, Adwait (1994). A Maximum Entropy Model for Prepositional +Phrase Attachment. Proceedings of the ARPA Human Language Technology +Conference. [http://www.cis.upenn.edu/~adwait/papers/hlt94.ps] + +The PP Attachment Corpus is distributed with NLTK with the permission +of the author. +""" +from __future__ import unicode_literals + +from six import string_types + +from nltk import compat +from nltk.corpus.reader.util import * +from nltk.corpus.reader.api import * + + +@compat.python_2_unicode_compatible +class PPAttachment(object): + def __init__(self, sent, verb, noun1, prep, noun2, attachment): + self.sent = sent + self.verb = verb + self.noun1 = noun1 + self.prep = prep + self.noun2 = noun2 + self.attachment = attachment + + def __repr__(self): + return ( + 'PPAttachment(sent=%r, verb=%r, noun1=%r, prep=%r, ' + 'noun2=%r, attachment=%r)' + % (self.sent, self.verb, self.noun1, self.prep, self.noun2, self.attachment) + ) + + +class PPAttachmentCorpusReader(CorpusReader): + """ + sentence_id verb noun1 preposition noun2 attachment + """ + + def attachments(self, fileids): + return concat( + [ + StreamBackedCorpusView(fileid, self._read_obj_block, encoding=enc) + for (fileid, enc) in self.abspaths(fileids, True) + ] + ) + + def tuples(self, fileids): + return concat( + [ + StreamBackedCorpusView(fileid, self._read_tuple_block, encoding=enc) + for (fileid, enc) in self.abspaths(fileids, True) + ] + ) + + def raw(self, fileids=None): + if fileids is None: + fileids = self._fileids + elif isinstance(fileids, string_types): + fileids = [fileids] + return concat([self.open(f).read() for f in fileids]) + + def _read_tuple_block(self, stream): + line = stream.readline() + if line: + return [tuple(line.split())] + else: + return [] + + def _read_obj_block(self, stream): + line = stream.readline() + if line: + return [PPAttachment(*line.split())] + else: + return [] diff --git a/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/propbank.py b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/propbank.py new file mode 100644 index 0000000..5c9bdd9 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/propbank.py @@ -0,0 +1,539 @@ +# Natural Language Toolkit: PropBank Corpus Reader +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Edward Loper +# URL: +# For license information, see LICENSE.TXT + +from __future__ import unicode_literals +import re +from functools import total_ordering +from xml.etree import ElementTree + +from six import string_types + +from nltk.tree import Tree +from nltk.internals import raise_unorderable_types + +from nltk.corpus.reader.util import * +from nltk.corpus.reader.api import * + + +class PropbankCorpusReader(CorpusReader): + """ + Corpus reader for the propbank corpus, which augments the Penn + Treebank with information about the predicate argument structure + of every verb instance. The corpus consists of two parts: the + predicate-argument annotations themselves, and a set of "frameset + files" which define the argument labels used by the annotations, + on a per-verb basis. Each "frameset file" contains one or more + predicates, such as ``'turn'`` or ``'turn_on'``, each of which is + divided into coarse-grained word senses called "rolesets". For + each "roleset", the frameset file provides descriptions of the + argument roles, along with examples. + """ + + def __init__( + self, + root, + propfile, + framefiles='', + verbsfile=None, + parse_fileid_xform=None, + parse_corpus=None, + encoding='utf8', + ): + """ + :param root: The root directory for this corpus. + :param propfile: The name of the file containing the predicate- + argument annotations (relative to ``root``). + :param framefiles: A list or regexp specifying the frameset + fileids for this corpus. + :param parse_fileid_xform: A transform that should be applied + to the fileids in this corpus. This should be a function + of one argument (a fileid) that returns a string (the new + fileid). + :param parse_corpus: The corpus containing the parse trees + corresponding to this corpus. These parse trees are + necessary to resolve the tree pointers used by propbank. + """ + # If framefiles is specified as a regexp, expand it. + if isinstance(framefiles, string_types): + framefiles = find_corpus_fileids(root, framefiles) + framefiles = list(framefiles) + # Initialze the corpus reader. + CorpusReader.__init__(self, root, [propfile, verbsfile] + framefiles, encoding) + + # Record our frame fileids & prop file. + self._propfile = propfile + self._framefiles = framefiles + self._verbsfile = verbsfile + self._parse_fileid_xform = parse_fileid_xform + self._parse_corpus = parse_corpus + + def raw(self, fileids=None): + """ + :return: the text contents of the given fileids, as a single string. + """ + if fileids is None: + fileids = self._fileids + elif isinstance(fileids): + fileids = [fileids] + return concat([self.open(f).read() for f in fileids]) + + def instances(self, baseform=None): + """ + :return: a corpus view that acts as a list of + ``PropBankInstance`` objects, one for each noun in the corpus. + """ + kwargs = {} + if baseform is not None: + kwargs['instance_filter'] = lambda inst: inst.baseform == baseform + return StreamBackedCorpusView( + self.abspath(self._propfile), + lambda stream: self._read_instance_block(stream, **kwargs), + encoding=self.encoding(self._propfile), + ) + + def lines(self): + """ + :return: a corpus view that acts as a list of strings, one for + each line in the predicate-argument annotation file. + """ + return StreamBackedCorpusView( + self.abspath(self._propfile), + read_line_block, + encoding=self.encoding(self._propfile), + ) + + def roleset(self, roleset_id): + """ + :return: the xml description for the given roleset. + """ + baseform = roleset_id.split('.')[0] + framefile = 'frames/%s.xml' % baseform + if framefile not in self._framefiles: + raise ValueError('Frameset file for %s not found' % roleset_id) + + # n.b.: The encoding for XML fileids is specified by the file + # itself; so we ignore self._encoding here. + etree = ElementTree.parse(self.abspath(framefile).open()).getroot() + for roleset in etree.findall('predicate/roleset'): + if roleset.attrib['id'] == roleset_id: + return roleset + raise ValueError('Roleset %s not found in %s' % (roleset_id, framefile)) + + def rolesets(self, baseform=None): + """ + :return: list of xml descriptions for rolesets. + """ + if baseform is not None: + framefile = 'frames/%s.xml' % baseform + if framefile not in self._framefiles: + raise ValueError('Frameset file for %s not found' % baseform) + framefiles = [framefile] + else: + framefiles = self._framefiles + + rsets = [] + for framefile in framefiles: + # n.b.: The encoding for XML fileids is specified by the file + # itself; so we ignore self._encoding here. + etree = ElementTree.parse(self.abspath(framefile).open()).getroot() + rsets.append(etree.findall('predicate/roleset')) + return LazyConcatenation(rsets) + + def verbs(self): + """ + :return: a corpus view that acts as a list of all verb lemmas + in this corpus (from the verbs.txt file). + """ + return StreamBackedCorpusView( + self.abspath(self._verbsfile), + read_line_block, + encoding=self.encoding(self._verbsfile), + ) + + def _read_instance_block(self, stream, instance_filter=lambda inst: True): + block = [] + + # Read 100 at a time. + for i in range(100): + line = stream.readline().strip() + if line: + inst = PropbankInstance.parse( + line, self._parse_fileid_xform, self._parse_corpus + ) + if instance_filter(inst): + block.append(inst) + + return block + + +###################################################################### +# { Propbank Instance & related datatypes +###################################################################### + + +@compat.python_2_unicode_compatible +class PropbankInstance(object): + def __init__( + self, + fileid, + sentnum, + wordnum, + tagger, + roleset, + inflection, + predicate, + arguments, + parse_corpus=None, + ): + + self.fileid = fileid + """The name of the file containing the parse tree for this + instance's sentence.""" + + self.sentnum = sentnum + """The sentence number of this sentence within ``fileid``. + Indexing starts from zero.""" + + self.wordnum = wordnum + """The word number of this instance's predicate within its + containing sentence. Word numbers are indexed starting from + zero, and include traces and other empty parse elements.""" + + self.tagger = tagger + """An identifier for the tagger who tagged this instance; or + ``'gold'`` if this is an adjuticated instance.""" + + self.roleset = roleset + """The name of the roleset used by this instance's predicate. + Use ``propbank.roleset() `` to + look up information about the roleset.""" + + self.inflection = inflection + """A ``PropbankInflection`` object describing the inflection of + this instance's predicate.""" + + self.predicate = predicate + """A ``PropbankTreePointer`` indicating the position of this + instance's predicate within its containing sentence.""" + + self.arguments = tuple(arguments) + """A list of tuples (argloc, argid), specifying the location + and identifier for each of the predicate's argument in the + containing sentence. Argument identifiers are strings such as + ``'ARG0'`` or ``'ARGM-TMP'``. This list does *not* contain + the predicate.""" + + self.parse_corpus = parse_corpus + """A corpus reader for the parse trees corresponding to the + instances in this propbank corpus.""" + + @property + def baseform(self): + """The baseform of the predicate.""" + return self.roleset.split('.')[0] + + @property + def sensenumber(self): + """The sense number of the predicate.""" + return self.roleset.split('.')[1] + + @property + def predid(self): + """Identifier of the predicate.""" + return 'rel' + + def __repr__(self): + return '' % ( + self.fileid, + self.sentnum, + self.wordnum, + ) + + def __str__(self): + s = '%s %s %s %s %s %s' % ( + self.fileid, + self.sentnum, + self.wordnum, + self.tagger, + self.roleset, + self.inflection, + ) + items = self.arguments + ((self.predicate, 'rel'),) + for (argloc, argid) in sorted(items): + s += ' %s-%s' % (argloc, argid) + return s + + def _get_tree(self): + if self.parse_corpus is None: + return None + if self.fileid not in self.parse_corpus.fileids(): + return None + return self.parse_corpus.parsed_sents(self.fileid)[self.sentnum] + + tree = property( + _get_tree, + doc=""" + The parse tree corresponding to this instance, or None if + the corresponding tree is not available.""", + ) + + @staticmethod + def parse(s, parse_fileid_xform=None, parse_corpus=None): + pieces = s.split() + if len(pieces) < 7: + raise ValueError('Badly formatted propbank line: %r' % s) + + # Divide the line into its basic pieces. + (fileid, sentnum, wordnum, tagger, roleset, inflection) = pieces[:6] + rel = [p for p in pieces[6:] if p.endswith('-rel')] + args = [p for p in pieces[6:] if not p.endswith('-rel')] + if len(rel) != 1: + raise ValueError('Badly formatted propbank line: %r' % s) + + # Apply the fileid selector, if any. + if parse_fileid_xform is not None: + fileid = parse_fileid_xform(fileid) + + # Convert sentence & word numbers to ints. + sentnum = int(sentnum) + wordnum = int(wordnum) + + # Parse the inflection + inflection = PropbankInflection.parse(inflection) + + # Parse the predicate location. + predicate = PropbankTreePointer.parse(rel[0][:-4]) + + # Parse the arguments. + arguments = [] + for arg in args: + argloc, argid = arg.split('-', 1) + arguments.append((PropbankTreePointer.parse(argloc), argid)) + + # Put it all together. + return PropbankInstance( + fileid, + sentnum, + wordnum, + tagger, + roleset, + inflection, + predicate, + arguments, + parse_corpus, + ) + + +class PropbankPointer(object): + """ + A pointer used by propbank to identify one or more constituents in + a parse tree. ``PropbankPointer`` is an abstract base class with + three concrete subclasses: + + - ``PropbankTreePointer`` is used to point to single constituents. + - ``PropbankSplitTreePointer`` is used to point to 'split' + constituents, which consist of a sequence of two or more + ``PropbankTreePointer`` pointers. + - ``PropbankChainTreePointer`` is used to point to entire trace + chains in a tree. It consists of a sequence of pieces, which + can be ``PropbankTreePointer`` or ``PropbankSplitTreePointer`` pointers. + """ + + def __init__(self): + if self.__class__ == PropbankPointer: + raise NotImplementedError() + + +@compat.python_2_unicode_compatible +class PropbankChainTreePointer(PropbankPointer): + def __init__(self, pieces): + self.pieces = pieces + """A list of the pieces that make up this chain. Elements may + be either ``PropbankSplitTreePointer`` or + ``PropbankTreePointer`` pointers.""" + + def __str__(self): + return '*'.join('%s' % p for p in self.pieces) + + def __repr__(self): + return '' % self + + def select(self, tree): + if tree is None: + raise ValueError('Parse tree not avaialable') + return Tree('*CHAIN*', [p.select(tree) for p in self.pieces]) + + +@compat.python_2_unicode_compatible +class PropbankSplitTreePointer(PropbankPointer): + def __init__(self, pieces): + self.pieces = pieces + """A list of the pieces that make up this chain. Elements are + all ``PropbankTreePointer`` pointers.""" + + def __str__(self): + return ','.join('%s' % p for p in self.pieces) + + def __repr__(self): + return '' % self + + def select(self, tree): + if tree is None: + raise ValueError('Parse tree not avaialable') + return Tree('*SPLIT*', [p.select(tree) for p in self.pieces]) + + +@total_ordering +@compat.python_2_unicode_compatible +class PropbankTreePointer(PropbankPointer): + """ + wordnum:height*wordnum:height*... + wordnum:height, + + """ + + def __init__(self, wordnum, height): + self.wordnum = wordnum + self.height = height + + @staticmethod + def parse(s): + # Deal with chains (xx*yy*zz) + pieces = s.split('*') + if len(pieces) > 1: + return PropbankChainTreePointer( + [PropbankTreePointer.parse(elt) for elt in pieces] + ) + + # Deal with split args (xx,yy,zz) + pieces = s.split(',') + if len(pieces) > 1: + return PropbankSplitTreePointer( + [PropbankTreePointer.parse(elt) for elt in pieces] + ) + + # Deal with normal pointers. + pieces = s.split(':') + if len(pieces) != 2: + raise ValueError('bad propbank pointer %r' % s) + return PropbankTreePointer(int(pieces[0]), int(pieces[1])) + + def __str__(self): + return '%s:%s' % (self.wordnum, self.height) + + def __repr__(self): + return 'PropbankTreePointer(%d, %d)' % (self.wordnum, self.height) + + def __eq__(self, other): + while isinstance(other, (PropbankChainTreePointer, PropbankSplitTreePointer)): + other = other.pieces[0] + + if not isinstance(other, PropbankTreePointer): + return self is other + + return self.wordnum == other.wordnum and self.height == other.height + + def __ne__(self, other): + return not self == other + + def __lt__(self, other): + while isinstance(other, (PropbankChainTreePointer, PropbankSplitTreePointer)): + other = other.pieces[0] + + if not isinstance(other, PropbankTreePointer): + return id(self) < id(other) + + return (self.wordnum, -self.height) < (other.wordnum, -other.height) + + def select(self, tree): + if tree is None: + raise ValueError('Parse tree not avaialable') + return tree[self.treepos(tree)] + + def treepos(self, tree): + """ + Convert this pointer to a standard 'tree position' pointer, + given that it points to the given tree. + """ + if tree is None: + raise ValueError('Parse tree not avaialable') + stack = [tree] + treepos = [] + + wordnum = 0 + while True: + # print treepos + # print stack[-1] + # tree node: + if isinstance(stack[-1], Tree): + # Select the next child. + if len(treepos) < len(stack): + treepos.append(0) + else: + treepos[-1] += 1 + # Update the stack. + if treepos[-1] < len(stack[-1]): + stack.append(stack[-1][treepos[-1]]) + else: + # End of node's child list: pop up a level. + stack.pop() + treepos.pop() + # word node: + else: + if wordnum == self.wordnum: + return tuple(treepos[: len(treepos) - self.height - 1]) + else: + wordnum += 1 + stack.pop() + + +@compat.python_2_unicode_compatible +class PropbankInflection(object): + # { Inflection Form + INFINITIVE = 'i' + GERUND = 'g' + PARTICIPLE = 'p' + FINITE = 'v' + # { Inflection Tense + FUTURE = 'f' + PAST = 'p' + PRESENT = 'n' + # { Inflection Aspect + PERFECT = 'p' + PROGRESSIVE = 'o' + PERFECT_AND_PROGRESSIVE = 'b' + # { Inflection Person + THIRD_PERSON = '3' + # { Inflection Voice + ACTIVE = 'a' + PASSIVE = 'p' + # { Inflection + NONE = '-' + # } + + def __init__(self, form='-', tense='-', aspect='-', person='-', voice='-'): + self.form = form + self.tense = tense + self.aspect = aspect + self.person = person + self.voice = voice + + def __str__(self): + return self.form + self.tense + self.aspect + self.person + self.voice + + def __repr__(self): + return '' % self + + _VALIDATE = re.compile(r'[igpv\-][fpn\-][pob\-][3\-][ap\-]$') + + @staticmethod + def parse(s): + if not isinstance(s, string_types): + raise TypeError('expected a string') + if len(s) != 5 or not PropbankInflection._VALIDATE.match(s): + raise ValueError('Bad propbank inflection string %r' % s) + return PropbankInflection(*s) diff --git a/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/pros_cons.py b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/pros_cons.py new file mode 100644 index 0000000..8117918 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/pros_cons.py @@ -0,0 +1,143 @@ +# Natural Language Toolkit: Pros and Cons Corpus Reader +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Pierpaolo Pantone <24alsecondo@gmail.com> +# URL: +# For license information, see LICENSE.TXT + +""" +CorpusReader for the Pros and Cons dataset. + +- Pros and Cons dataset information - + +Contact: Bing Liu, liub@cs.uic.edu + http://www.cs.uic.edu/~liub + +Distributed with permission. + +Related papers: + +- Murthy Ganapathibhotla and Bing Liu. "Mining Opinions in Comparative Sentences". + Proceedings of the 22nd International Conference on Computational Linguistics + (Coling-2008), Manchester, 18-22 August, 2008. + +- Bing Liu, Minqing Hu and Junsheng Cheng. "Opinion Observer: Analyzing and Comparing + Opinions on the Web". Proceedings of the 14th international World Wide Web + conference (WWW-2005), May 10-14, 2005, in Chiba, Japan. +""" +import re + +from six import string_types + +from nltk.corpus.reader.api import * +from nltk.tokenize import * + + +class ProsConsCorpusReader(CategorizedCorpusReader, CorpusReader): + """ + Reader for the Pros and Cons sentence dataset. + + >>> from nltk.corpus import pros_cons + >>> pros_cons.sents(categories='Cons') + [['East', 'batteries', '!', 'On', '-', 'off', 'switch', 'too', 'easy', + 'to', 'maneuver', '.'], ['Eats', '...', 'no', ',', 'GULPS', 'batteries'], + ...] + >>> pros_cons.words('IntegratedPros.txt') + ['Easy', 'to', 'use', ',', 'economical', '!', ...] + """ + + CorpusView = StreamBackedCorpusView + + def __init__( + self, + root, + fileids, + word_tokenizer=WordPunctTokenizer(), + encoding='utf8', + **kwargs + ): + """ + :param root: The root directory for the corpus. + :param fileids: a list or regexp specifying the fileids in the corpus. + :param word_tokenizer: a tokenizer for breaking sentences or paragraphs + into words. Default: `WhitespaceTokenizer` + :param encoding: the encoding that should be used to read the corpus. + :param kwargs: additional parameters passed to CategorizedCorpusReader. + """ + + CorpusReader.__init__(self, root, fileids, encoding) + CategorizedCorpusReader.__init__(self, kwargs) + self._word_tokenizer = word_tokenizer + + def sents(self, fileids=None, categories=None): + """ + Return all sentences in the corpus or in the specified files/categories. + + :param fileids: a list or regexp specifying the ids of the files whose + sentences have to be returned. + :param categories: a list specifying the categories whose sentences + have to be returned. + :return: the given file(s) as a list of sentences. Each sentence is + tokenized using the specified word_tokenizer. + :rtype: list(list(str)) + """ + fileids = self._resolve(fileids, categories) + if fileids is None: + fileids = self._fileids + elif isinstance(fileids, string_types): + fileids = [fileids] + return concat( + [ + self.CorpusView(path, self._read_sent_block, encoding=enc) + for (path, enc, fileid) in self.abspaths(fileids, True, True) + ] + ) + + def words(self, fileids=None, categories=None): + """ + Return all words and punctuation symbols in the corpus or in the specified + files/categories. + + :param fileids: a list or regexp specifying the ids of the files whose + words have to be returned. + :param categories: a list specifying the categories whose words have + to be returned. + :return: the given file(s) as a list of words and punctuation symbols. + :rtype: list(str) + """ + fileids = self._resolve(fileids, categories) + if fileids is None: + fileids = self._fileids + elif isinstance(fileids, string_types): + fileids = [fileids] + return concat( + [ + self.CorpusView(path, self._read_word_block, encoding=enc) + for (path, enc, fileid) in self.abspaths(fileids, True, True) + ] + ) + + def _read_sent_block(self, stream): + sents = [] + for i in range(20): # Read 20 lines at a time. + line = stream.readline() + if not line: + continue + sent = re.match(r"^(?!\n)\s*<(Pros|Cons)>(.*)", line) + if sent: + sents.append(self._word_tokenizer.tokenize(sent.group(2).strip())) + return sents + + def _read_word_block(self, stream): + words = [] + for sent in self._read_sent_block(stream): + words.extend(sent) + return words + + def _resolve(self, fileids, categories): + if fileids is not None and categories is not None: + raise ValueError('Specify fileids or categories, not both') + if categories is not None: + return self.fileids(categories) + else: + return fileids diff --git a/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/reviews.py b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/reviews.py new file mode 100644 index 0000000..9a1f173 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/reviews.py @@ -0,0 +1,355 @@ +# Natural Language Toolkit: Product Reviews Corpus Reader +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Pierpaolo Pantone <24alsecondo@gmail.com> +# URL: +# For license information, see LICENSE.TXT + +""" +CorpusReader for reviews corpora (syntax based on Customer Review Corpus). + +- Customer Review Corpus information - +Annotated by: Minqing Hu and Bing Liu, 2004. + Department of Computer Sicence + University of Illinois at Chicago + +Contact: Bing Liu, liub@cs.uic.edu + http://www.cs.uic.edu/~liub + +Distributed with permission. + +The "product_reviews_1" and "product_reviews_2" datasets respectively contain +annotated customer reviews of 5 and 9 products from amazon.com. + +Related papers: + +- Minqing Hu and Bing Liu. "Mining and summarizing customer reviews". + Proceedings of the ACM SIGKDD International Conference on Knowledge + Discovery & Data Mining (KDD-04), 2004. + +- Minqing Hu and Bing Liu. "Mining Opinion Features in Customer Reviews". + Proceedings of Nineteeth National Conference on Artificial Intelligence + (AAAI-2004), 2004. + +- Xiaowen Ding, Bing Liu and Philip S. Yu. "A Holistic Lexicon-Based Appraoch to + Opinion Mining." Proceedings of First ACM International Conference on Web + Search and Data Mining (WSDM-2008), Feb 11-12, 2008, Stanford University, + Stanford, California, USA. + +Symbols used in the annotated reviews: + + [t] : the title of the review: Each [t] tag starts a review. + xxxx[+|-n]: xxxx is a product feature. + [+n]: Positive opinion, n is the opinion strength: 3 strongest, and 1 weakest. + Note that the strength is quite subjective. + You may want ignore it, but only considering + and - + [-n]: Negative opinion + ## : start of each sentence. Each line is a sentence. + [u] : feature not appeared in the sentence. + [p] : feature not appeared in the sentence. Pronoun resolution is needed. + [s] : suggestion or recommendation. + [cc]: comparison with a competing product from a different brand. + [cs]: comparison with a competing product from the same brand. + +Note: Some of the files (e.g. "ipod.txt", "Canon PowerShot SD500.txt") do not + provide separation between different reviews. This is due to the fact that + the dataset was specifically designed for aspect/feature-based sentiment + analysis, for which sentence-level annotation is sufficient. For document- + level classification and analysis, this peculiarity should be taken into + consideration. +""" + +from __future__ import division + +import re + +from six import string_types + +from nltk.corpus.reader.api import * +from nltk.tokenize import * + +TITLE = re.compile(r'^\[t\](.*)$') # [t] Title +FEATURES = re.compile( + r'((?:(?:\w+\s)+)?\w+)\[((?:\+|\-)\d)\]' +) # find 'feature' in feature[+3] +NOTES = re.compile(r'\[(?!t)(p|u|s|cc|cs)\]') # find 'p' in camera[+2][p] +SENT = re.compile(r'##(.*)$') # find tokenized sentence + + +@compat.python_2_unicode_compatible +class Review(object): + """ + A Review is the main block of a ReviewsCorpusReader. + """ + + def __init__(self, title=None, review_lines=None): + """ + :param title: the title of the review. + :param review_lines: the list of the ReviewLines that belong to the Review. + """ + self.title = title + if review_lines is None: + self.review_lines = [] + else: + self.review_lines = review_lines + + def add_line(self, review_line): + """ + Add a line (ReviewLine) to the review. + + :param review_line: a ReviewLine instance that belongs to the Review. + """ + assert isinstance(review_line, ReviewLine) + self.review_lines.append(review_line) + + def features(self): + """ + Return a list of features in the review. Each feature is a tuple made of + the specific item feature and the opinion strength about that feature. + + :return: all features of the review as a list of tuples (feat, score). + :rtype: list(tuple) + """ + features = [] + for review_line in self.review_lines: + features.extend(review_line.features) + return features + + def sents(self): + """ + Return all tokenized sentences in the review. + + :return: all sentences of the review as lists of tokens. + :rtype: list(list(str)) + """ + return [review_line.sent for review_line in self.review_lines] + + def __repr__(self): + return 'Review(title=\"{}\", review_lines={})'.format( + self.title, self.review_lines + ) + + +@compat.python_2_unicode_compatible +class ReviewLine(object): + """ + A ReviewLine represents a sentence of the review, together with (optional) + annotations of its features and notes about the reviewed item. + """ + + def __init__(self, sent, features=None, notes=None): + self.sent = sent + if features is None: + self.features = [] + else: + self.features = features + + if notes is None: + self.notes = [] + else: + self.notes = notes + + def __repr__(self): + return 'ReviewLine(features={}, notes={}, sent={})'.format( + self.features, self.notes, self.sent + ) + + +class ReviewsCorpusReader(CorpusReader): + """ + Reader for the Customer Review Data dataset by Hu, Liu (2004). + Note: we are not applying any sentence tokenization at the moment, just word + tokenization. + + >>> from nltk.corpus import product_reviews_1 + >>> camera_reviews = product_reviews_1.reviews('Canon_G3.txt') + >>> review = camera_reviews[0] + >>> review.sents()[0] + ['i', 'recently', 'purchased', 'the', 'canon', 'powershot', 'g3', 'and', 'am', + 'extremely', 'satisfied', 'with', 'the', 'purchase', '.'] + >>> review.features() + [('canon powershot g3', '+3'), ('use', '+2'), ('picture', '+2'), + ('picture quality', '+1'), ('picture quality', '+1'), ('camera', '+2'), + ('use', '+2'), ('feature', '+1'), ('picture quality', '+3'), ('use', '+1'), + ('option', '+1')] + + We can also reach the same information directly from the stream: + + >>> product_reviews_1.features('Canon_G3.txt') + [('canon powershot g3', '+3'), ('use', '+2'), ...] + + We can compute stats for specific product features: + + >>> from __future__ import division + >>> n_reviews = len([(feat,score) for (feat,score) in product_reviews_1.features('Canon_G3.txt') if feat=='picture']) + >>> tot = sum([int(score) for (feat,score) in product_reviews_1.features('Canon_G3.txt') if feat=='picture']) + >>> # We use float for backward compatibility with division in Python2.7 + >>> mean = tot / n_reviews + >>> print(n_reviews, tot, mean) + 15 24 1.6 + """ + + CorpusView = StreamBackedCorpusView + + def __init__( + self, root, fileids, word_tokenizer=WordPunctTokenizer(), encoding='utf8' + ): + """ + :param root: The root directory for the corpus. + :param fileids: a list or regexp specifying the fileids in the corpus. + :param word_tokenizer: a tokenizer for breaking sentences or paragraphs + into words. Default: `WordPunctTokenizer` + :param encoding: the encoding that should be used to read the corpus. + """ + + CorpusReader.__init__(self, root, fileids, encoding) + self._word_tokenizer = word_tokenizer + + def features(self, fileids=None): + """ + Return a list of features. Each feature is a tuple made of the specific + item feature and the opinion strength about that feature. + + :param fileids: a list or regexp specifying the ids of the files whose + features have to be returned. + :return: all features for the item(s) in the given file(s). + :rtype: list(tuple) + """ + if fileids is None: + fileids = self._fileids + elif isinstance(fileids, string_types): + fileids = [fileids] + return concat( + [ + self.CorpusView(fileid, self._read_features, encoding=enc) + for (fileid, enc) in self.abspaths(fileids, True) + ] + ) + + def raw(self, fileids=None): + """ + :param fileids: a list or regexp specifying the fileids of the files that + have to be returned as a raw string. + :return: the given file(s) as a single string. + :rtype: str + """ + if fileids is None: + fileids = self._fileids + elif isinstance(fileids, string_types): + fileids = [fileids] + return concat([self.open(f).read() for f in fileids]) + + def readme(self): + """ + Return the contents of the corpus README.txt file. + """ + return self.open("README.txt").read() + + def reviews(self, fileids=None): + """ + Return all the reviews as a list of Review objects. If `fileids` is + specified, return all the reviews from each of the specified files. + + :param fileids: a list or regexp specifying the ids of the files whose + reviews have to be returned. + :return: the given file(s) as a list of reviews. + """ + if fileids is None: + fileids = self._fileids + return concat( + [ + self.CorpusView(fileid, self._read_review_block, encoding=enc) + for (fileid, enc) in self.abspaths(fileids, True) + ] + ) + + def sents(self, fileids=None): + """ + Return all sentences in the corpus or in the specified files. + + :param fileids: a list or regexp specifying the ids of the files whose + sentences have to be returned. + :return: the given file(s) as a list of sentences, each encoded as a + list of word strings. + :rtype: list(list(str)) + """ + return concat( + [ + self.CorpusView(path, self._read_sent_block, encoding=enc) + for (path, enc, fileid) in self.abspaths(fileids, True, True) + ] + ) + + def words(self, fileids=None): + """ + Return all words and punctuation symbols in the corpus or in the specified + files. + + :param fileids: a list or regexp specifying the ids of the files whose + words have to be returned. + :return: the given file(s) as a list of words and punctuation symbols. + :rtype: list(str) + """ + return concat( + [ + self.CorpusView(path, self._read_word_block, encoding=enc) + for (path, enc, fileid) in self.abspaths(fileids, True, True) + ] + ) + + def _read_features(self, stream): + features = [] + for i in range(20): + line = stream.readline() + if not line: + return features + features.extend(re.findall(FEATURES, line)) + return features + + def _read_review_block(self, stream): + while True: + line = stream.readline() + if not line: + return [] # end of file. + title_match = re.match(TITLE, line) + if title_match: + review = Review( + title=title_match.group(1).strip() + ) # We create a new review + break + + # Scan until we find another line matching the regexp, or EOF. + while True: + oldpos = stream.tell() + line = stream.readline() + # End of file: + if not line: + return [review] + # Start of a new review: backup to just before it starts, and + # return the review we've already collected. + if re.match(TITLE, line): + stream.seek(oldpos) + return [review] + # Anything else is part of the review line. + feats = re.findall(FEATURES, line) + notes = re.findall(NOTES, line) + sent = re.findall(SENT, line) + if sent: + sent = self._word_tokenizer.tokenize(sent[0]) + review_line = ReviewLine(sent=sent, features=feats, notes=notes) + review.add_line(review_line) + + def _read_sent_block(self, stream): + sents = [] + for review in self._read_review_block(stream): + sents.extend([sent for sent in review.sents()]) + return sents + + def _read_word_block(self, stream): + words = [] + for i in range(20): # Read 20 lines at a time. + line = stream.readline() + sent = re.findall(SENT, line) + if sent: + words.extend(self._word_tokenizer.tokenize(sent[0])) + return words diff --git a/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/rte.py b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/rte.py new file mode 100644 index 0000000..0b0cd44 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/rte.py @@ -0,0 +1,151 @@ +# Natural Language Toolkit: RTE Corpus Reader +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Ewan Klein +# URL: +# For license information, see LICENSE.TXT + +""" +Corpus reader for the Recognizing Textual Entailment (RTE) Challenge Corpora. + +The files were taken from the RTE1, RTE2 and RTE3 datasets and the files +were regularized. + +Filenames are of the form rte*_dev.xml and rte*_test.xml. The latter are the +gold standard annotated files. + +Each entailment corpus is a list of 'text'/'hypothesis' pairs. The following +example is taken from RTE3:: + + + + The sale was made to pay Yukos' US$ 27.5 billion tax bill, + Yuganskneftegaz was originally sold for US$ 9.4 billion to a little known + company Baikalfinansgroup which was later bought by the Russian + state-owned oil company Rosneft . + + Baikalfinansgroup was sold to Rosneft. + + +In order to provide globally unique IDs for each pair, a new attribute +``challenge`` has been added to the root element ``entailment-corpus`` of each +file, taking values 1, 2 or 3. The GID is formatted 'm-n', where 'm' is the +challenge number and 'n' is the pair ID. +""" +from __future__ import unicode_literals + +from six import string_types + +from nltk import compat +from nltk.corpus.reader.util import * +from nltk.corpus.reader.api import * +from nltk.corpus.reader.xmldocs import * + + +def norm(value_string): + """ + Normalize the string value in an RTE pair's ``value`` or ``entailment`` + attribute as an integer (1, 0). + + :param value_string: the label used to classify a text/hypothesis pair + :type value_string: str + :rtype: int + """ + + valdict = {"TRUE": 1, "FALSE": 0, "YES": 1, "NO": 0} + return valdict[value_string.upper()] + + +@compat.python_2_unicode_compatible +class RTEPair(object): + """ + Container for RTE text-hypothesis pairs. + + The entailment relation is signalled by the ``value`` attribute in RTE1, and by + ``entailment`` in RTE2 and RTE3. These both get mapped on to the ``entailment`` + attribute of this class. + """ + + def __init__( + self, + pair, + challenge=None, + id=None, + text=None, + hyp=None, + value=None, + task=None, + length=None, + ): + """ + :param challenge: version of the RTE challenge (i.e., RTE1, RTE2 or RTE3) + :param id: identifier for the pair + :param text: the text component of the pair + :param hyp: the hypothesis component of the pair + :param value: classification label for the pair + :param task: attribute for the particular NLP task that the data was drawn from + :param length: attribute for the length of the text of the pair + """ + self.challenge = challenge + self.id = pair.attrib["id"] + self.gid = "%s-%s" % (self.challenge, self.id) + self.text = pair[0].text + self.hyp = pair[1].text + + if "value" in pair.attrib: + self.value = norm(pair.attrib["value"]) + elif "entailment" in pair.attrib: + self.value = norm(pair.attrib["entailment"]) + else: + self.value = value + if "task" in pair.attrib: + self.task = pair.attrib["task"] + else: + self.task = task + if "length" in pair.attrib: + self.length = pair.attrib["length"] + else: + self.length = length + + def __repr__(self): + if self.challenge: + return '' % (self.challenge, self.id) + else: + return '' % self.id + + +class RTECorpusReader(XMLCorpusReader): + """ + Corpus reader for corpora in RTE challenges. + + This is just a wrapper around the XMLCorpusReader. See module docstring above for the expected + structure of input documents. + """ + + def _read_etree(self, doc): + """ + Map the XML input into an RTEPair. + + This uses the ``getiterator()`` method from the ElementTree package to + find all the ```` elements. + + :param doc: a parsed XML document + :rtype: list(RTEPair) + """ + try: + challenge = doc.attrib['challenge'] + except KeyError: + challenge = None + return [RTEPair(pair, challenge=challenge) for pair in doc.getiterator("pair")] + + def pairs(self, fileids): + """ + Build a list of RTEPairs from a RTE corpus. + + :param fileids: a list of RTE corpus fileids + :type: list + :rtype: list(RTEPair) + """ + if isinstance(fileids, string_types): + fileids = [fileids] + return concat([self._read_etree(self.xml(fileid)) for fileid in fileids]) diff --git a/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/semcor.py b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/semcor.py new file mode 100644 index 0000000..1b6f515 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/semcor.py @@ -0,0 +1,297 @@ +# Natural Language Toolkit: SemCor Corpus Reader +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Nathan Schneider +# URL: +# For license information, see LICENSE.TXT + +""" +Corpus reader for the SemCor Corpus. +""" +from __future__ import absolute_import, unicode_literals + +__docformat__ = 'epytext en' + +from nltk.corpus.reader.api import * +from nltk.corpus.reader.xmldocs import XMLCorpusReader, XMLCorpusView +from nltk.tree import Tree + + +class SemcorCorpusReader(XMLCorpusReader): + """ + Corpus reader for the SemCor Corpus. + For access to the complete XML data structure, use the ``xml()`` + method. For access to simple word lists and tagged word lists, use + ``words()``, ``sents()``, ``tagged_words()``, and ``tagged_sents()``. + """ + + def __init__(self, root, fileids, wordnet, lazy=True): + XMLCorpusReader.__init__(self, root, fileids) + self._lazy = lazy + self._wordnet = wordnet + + def words(self, fileids=None): + """ + :return: the given file(s) as a list of words and punctuation symbols. + :rtype: list(str) + """ + return self._items(fileids, 'word', False, False, False) + + def chunks(self, fileids=None): + """ + :return: the given file(s) as a list of chunks, + each of which is a list of words and punctuation symbols + that form a unit. + :rtype: list(list(str)) + """ + return self._items(fileids, 'chunk', False, False, False) + + def tagged_chunks(self, fileids=None, tag=('pos' or 'sem' or 'both')): + """ + :return: the given file(s) as a list of tagged chunks, represented + in tree form. + :rtype: list(Tree) + + :param tag: `'pos'` (part of speech), `'sem'` (semantic), or `'both'` + to indicate the kind of tags to include. Semantic tags consist of + WordNet lemma IDs, plus an `'NE'` node if the chunk is a named entity + without a specific entry in WordNet. (Named entities of type 'other' + have no lemma. Other chunks not in WordNet have no semantic tag. + Punctuation tokens have `None` for their part of speech tag.) + """ + return self._items(fileids, 'chunk', False, tag != 'sem', tag != 'pos') + + def sents(self, fileids=None): + """ + :return: the given file(s) as a list of sentences, each encoded + as a list of word strings. + :rtype: list(list(str)) + """ + return self._items(fileids, 'word', True, False, False) + + def chunk_sents(self, fileids=None): + """ + :return: the given file(s) as a list of sentences, each encoded + as a list of chunks. + :rtype: list(list(list(str))) + """ + return self._items(fileids, 'chunk', True, False, False) + + def tagged_sents(self, fileids=None, tag=('pos' or 'sem' or 'both')): + """ + :return: the given file(s) as a list of sentences. Each sentence + is represented as a list of tagged chunks (in tree form). + :rtype: list(list(Tree)) + + :param tag: `'pos'` (part of speech), `'sem'` (semantic), or `'both'` + to indicate the kind of tags to include. Semantic tags consist of + WordNet lemma IDs, plus an `'NE'` node if the chunk is a named entity + without a specific entry in WordNet. (Named entities of type 'other' + have no lemma. Other chunks not in WordNet have no semantic tag. + Punctuation tokens have `None` for their part of speech tag.) + """ + return self._items(fileids, 'chunk', True, tag != 'sem', tag != 'pos') + + def _items(self, fileids, unit, bracket_sent, pos_tag, sem_tag): + if unit == 'word' and not bracket_sent: + # the result of the SemcorWordView may be a multiword unit, so the + # LazyConcatenation will make sure the sentence is flattened + _ = lambda *args: LazyConcatenation( + (SemcorWordView if self._lazy else self._words)(*args) + ) + else: + _ = SemcorWordView if self._lazy else self._words + return concat( + [ + _(fileid, unit, bracket_sent, pos_tag, sem_tag, self._wordnet) + for fileid in self.abspaths(fileids) + ] + ) + + def _words(self, fileid, unit, bracket_sent, pos_tag, sem_tag): + """ + Helper used to implement the view methods -- returns a list of + tokens, (segmented) words, chunks, or sentences. The tokens + and chunks may optionally be tagged (with POS and sense + information). + + :param fileid: The name of the underlying file. + :param unit: One of `'token'`, `'word'`, or `'chunk'`. + :param bracket_sent: If true, include sentence bracketing. + :param pos_tag: Whether to include part-of-speech tags. + :param sem_tag: Whether to include semantic tags, namely WordNet lemma + and OOV named entity status. + """ + assert unit in ('token', 'word', 'chunk') + result = [] + + xmldoc = ElementTree.parse(fileid).getroot() + for xmlsent in xmldoc.findall('.//s'): + sent = [] + for xmlword in _all_xmlwords_in(xmlsent): + itm = SemcorCorpusReader._word( + xmlword, unit, pos_tag, sem_tag, self._wordnet + ) + if unit == 'word': + sent.extend(itm) + else: + sent.append(itm) + + if bracket_sent: + result.append(SemcorSentence(xmlsent.attrib['snum'], sent)) + else: + result.extend(sent) + + assert None not in result + return result + + @staticmethod + def _word(xmlword, unit, pos_tag, sem_tag, wordnet): + tkn = xmlword.text + if not tkn: + tkn = "" # fixes issue 337? + + lemma = xmlword.get('lemma', tkn) # lemma or NE class + lexsn = xmlword.get('lexsn') # lex_sense (locator for the lemma's sense) + if lexsn is not None: + sense_key = lemma + '%' + lexsn + wnpos = ('n', 'v', 'a', 'r', 's')[ + int(lexsn.split(':')[0]) - 1 + ] # see http://wordnet.princeton.edu/man/senseidx.5WN.html + else: + sense_key = wnpos = None + redef = xmlword.get( + 'rdf', tkn + ) # redefinition--this indicates the lookup string + # does not exactly match the enclosed string, e.g. due to typographical adjustments + # or discontinuity of a multiword expression. If a redefinition has occurred, + # the "rdf" attribute holds its inflected form and "lemma" holds its lemma. + # For NEs, "rdf", "lemma", and "pn" all hold the same value (the NE class). + sensenum = xmlword.get('wnsn') # WordNet sense number + isOOVEntity = 'pn' in xmlword.keys() # a "personal name" (NE) not in WordNet + pos = xmlword.get( + 'pos' + ) # part of speech for the whole chunk (None for punctuation) + + if unit == 'token': + if not pos_tag and not sem_tag: + itm = tkn + else: + itm = ( + (tkn,) + + ((pos,) if pos_tag else ()) + + ((lemma, wnpos, sensenum, isOOVEntity) if sem_tag else ()) + ) + return itm + else: + ww = tkn.split('_') # TODO: case where punctuation intervenes in MWE + if unit == 'word': + return ww + else: + if sensenum is not None: + try: + sense = wordnet.lemma_from_key(sense_key) # Lemma object + except Exception: + # cannot retrieve the wordnet.Lemma object. possible reasons: + # (a) the wordnet corpus is not downloaded; + # (b) a nonexistant sense is annotated: e.g., such.s.00 triggers: + # nltk.corpus.reader.wordnet.WordNetError: No synset found for key u'such%5:00:01:specified:00' + # solution: just use the lemma name as a string + try: + sense = '%s.%s.%02d' % ( + lemma, + wnpos, + int(sensenum), + ) # e.g.: reach.v.02 + except ValueError: + sense = ( + lemma + '.' + wnpos + '.' + sensenum + ) # e.g. the sense number may be "2;1" + + bottom = [Tree(pos, ww)] if pos_tag else ww + + if sem_tag and isOOVEntity: + if sensenum is not None: + return Tree(sense, [Tree('NE', bottom)]) + else: # 'other' NE + return Tree('NE', bottom) + elif sem_tag and sensenum is not None: + return Tree(sense, bottom) + elif pos_tag: + return bottom[0] + else: + return bottom # chunk as a list + + +def _all_xmlwords_in(elt, result=None): + if result is None: + result = [] + for child in elt: + if child.tag in ('wf', 'punc'): + result.append(child) + else: + _all_xmlwords_in(child, result) + return result + + +class SemcorSentence(list): + """ + A list of words, augmented by an attribute ``num`` used to record + the sentence identifier (the ``n`` attribute from the XML). + """ + + def __init__(self, num, items): + self.num = num + list.__init__(self, items) + + +class SemcorWordView(XMLCorpusView): + """ + A stream backed corpus view specialized for use with the BNC corpus. + """ + + def __init__(self, fileid, unit, bracket_sent, pos_tag, sem_tag, wordnet): + """ + :param fileid: The name of the underlying file. + :param unit: One of `'token'`, `'word'`, or `'chunk'`. + :param bracket_sent: If true, include sentence bracketing. + :param pos_tag: Whether to include part-of-speech tags. + :param sem_tag: Whether to include semantic tags, namely WordNet lemma + and OOV named entity status. + """ + if bracket_sent: + tagspec = '.*/s' + else: + tagspec = '.*/s/(punc|wf)' + + self._unit = unit + self._sent = bracket_sent + self._pos_tag = pos_tag + self._sem_tag = sem_tag + self._wordnet = wordnet + + XMLCorpusView.__init__(self, fileid, tagspec) + + def handle_elt(self, elt, context): + if self._sent: + return self.handle_sent(elt) + else: + return self.handle_word(elt) + + def handle_word(self, elt): + return SemcorCorpusReader._word( + elt, self._unit, self._pos_tag, self._sem_tag, self._wordnet + ) + + def handle_sent(self, elt): + sent = [] + for child in elt: + if child.tag in ('wf', 'punc'): + itm = self.handle_word(child) + if self._unit == 'word': + sent.extend(itm) + else: + sent.append(itm) + else: + raise ValueError('Unexpected element %s' % child.tag) + return SemcorSentence(elt.attrib['snum'], sent) diff --git a/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/senseval.py b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/senseval.py new file mode 100644 index 0000000..66a5386 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/senseval.py @@ -0,0 +1,212 @@ +# Natural Language Toolkit: Senseval 2 Corpus Reader +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Trevor Cohn +# Steven Bird (modifications) +# URL: +# For license information, see LICENSE.TXT + +""" +Read from the Senseval 2 Corpus. + +SENSEVAL [http://www.senseval.org/] +Evaluation exercises for Word Sense Disambiguation. +Organized by ACL-SIGLEX [http://www.siglex.org/] + +Prepared by Ted Pedersen , University of Minnesota, +http://www.d.umn.edu/~tpederse/data.html +Distributed with permission. + +The NLTK version of the Senseval 2 files uses well-formed XML. +Each instance of the ambiguous words "hard", "interest", "line", and "serve" +is tagged with a sense identifier, and supplied with context. +""" +from __future__ import print_function, unicode_literals + +import re +from xml.etree import ElementTree + +from six import string_types + +from nltk import compat +from nltk.tokenize import * + +from nltk.corpus.reader.util import * +from nltk.corpus.reader.api import * + + +@compat.python_2_unicode_compatible +class SensevalInstance(object): + def __init__(self, word, position, context, senses): + self.word = word + self.senses = tuple(senses) + self.position = position + self.context = context + + def __repr__(self): + return 'SensevalInstance(word=%r, position=%r, ' 'context=%r, senses=%r)' % ( + self.word, + self.position, + self.context, + self.senses, + ) + + +class SensevalCorpusReader(CorpusReader): + def instances(self, fileids=None): + return concat( + [ + SensevalCorpusView(fileid, enc) + for (fileid, enc) in self.abspaths(fileids, True) + ] + ) + + def raw(self, fileids=None): + """ + :return: the text contents of the given fileids, as a single string. + """ + if fileids is None: + fileids = self._fileids + elif isinstance(fileids, string_types): + fileids = [fileids] + return concat([self.open(f).read() for f in fileids]) + + def _entry(self, tree): + elts = [] + for lexelt in tree.findall('lexelt'): + for inst in lexelt.findall('instance'): + sense = inst[0].attrib['senseid'] + context = [(w.text, w.attrib['pos']) for w in inst[1]] + elts.append((sense, context)) + return elts + + +class SensevalCorpusView(StreamBackedCorpusView): + def __init__(self, fileid, encoding): + StreamBackedCorpusView.__init__(self, fileid, encoding=encoding) + + self._word_tokenizer = WhitespaceTokenizer() + self._lexelt_starts = [0] # list of streampos + self._lexelts = [None] # list of lexelt names + + def read_block(self, stream): + # Decide which lexical element we're in. + lexelt_num = bisect.bisect_right(self._lexelt_starts, stream.tell()) - 1 + lexelt = self._lexelts[lexelt_num] + + instance_lines = [] + in_instance = False + while True: + line = stream.readline() + if line == '': + assert instance_lines == [] + return [] + + # Start of a lexical element? + if line.lstrip().startswith(' has no 'item=...' + lexelt = m.group(1)[1:-1] + if lexelt_num < len(self._lexelts): + assert lexelt == self._lexelts[lexelt_num] + else: + self._lexelts.append(lexelt) + self._lexelt_starts.append(stream.tell()) + + # Start of an instance? + if line.lstrip().startswith('' + elif cword.tag == 'wf': + context.append((cword.text, cword.attrib['pos'])) + elif cword.tag == 's': + pass # Sentence boundary marker. + + else: + print('ACK', cword.tag) + assert False, 'expected CDATA or or ' + if cword.tail: + context += self._word_tokenizer.tokenize(cword.tail) + else: + assert False, 'unexpected tag %s' % child.tag + return SensevalInstance(lexelt, position, context, senses) + + +def _fixXML(text): + """ + Fix the various issues with Senseval pseudo-XML. + """ + # <~> or <^> => ~ or ^ + text = re.sub(r'<([~\^])>', r'\1', text) + # fix lone & + text = re.sub(r'(\s+)\&(\s+)', r'\1&\2', text) + # fix """ + text = re.sub(r'"""', '\'"\'', text) + # fix => + text = re.sub(r'(<[^<]*snum=)([^">]+)>', r'\1"\2"/>', text) + # fix foreign word tag + text = re.sub(r'<\&frasl>\s*]*>', 'FRASL', text) + # remove <&I .> + text = re.sub(r'<\&I[^>]*>', '', text) + # fix <{word}> + text = re.sub(r'<{([^}]+)}>', r'\1', text) + # remove <@>,

    ,

    + text = re.sub(r'<(@|/?p)>', r'', text) + # remove <&M .> and <&T .> and <&Ms .> + text = re.sub(r'<&\w+ \.>', r'', text) + # remove lines + text = re.sub(r']*>', r'', text) + # remove <[hi]> and <[/p]> etc + text = re.sub(r'<\[\/?[^>]+\]*>', r'', text) + # take the thing out of the brackets: <…> + text = re.sub(r'<(\&\w+;)>', r'\1', text) + # and remove the & for those patterns that aren't regular XML + text = re.sub(r'&(?!amp|gt|lt|apos|quot)', r'', text) + # fix 'abc ' style tags - now abc + text = re.sub( + r'[ \t]*([^<>\s]+?)[ \t]*', r' \1', text + ) + text = re.sub(r'\s*"\s*', " \"", text) + return text diff --git a/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/sentiwordnet.py b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/sentiwordnet.py new file mode 100644 index 0000000..bbe4fc9 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/sentiwordnet.py @@ -0,0 +1,139 @@ +# -*- coding: utf-8 -*- +# Natural Language Toolkit: SentiWordNet +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Christopher Potts +# URL: +# For license information, see LICENSE.TXT + +""" +An NLTK interface for SentiWordNet + +SentiWordNet is a lexical resource for opinion mining. +SentiWordNet assigns to each synset of WordNet three +sentiment scores: positivity, negativity, and objectivity. + +For details about SentiWordNet see: +http://sentiwordnet.isti.cnr.it/ + + >>> from nltk.corpus import sentiwordnet as swn + >>> print(swn.senti_synset('breakdown.n.03')) + + >>> list(swn.senti_synsets('slow')) + [SentiSynset('decelerate.v.01'), SentiSynset('slow.v.02'), + SentiSynset('slow.v.03'), SentiSynset('slow.a.01'), + SentiSynset('slow.a.02'), SentiSynset('dense.s.04'), + SentiSynset('slow.a.04'), SentiSynset('boring.s.01'), + SentiSynset('dull.s.08'), SentiSynset('slowly.r.01'), + SentiSynset('behind.r.03')] + >>> happy = swn.senti_synsets('happy', 'a') + >>> happy0 = list(happy)[0] + >>> happy0.pos_score() + 0.875 + >>> happy0.neg_score() + 0.0 + >>> happy0.obj_score() + 0.125 +""" + +import re +from nltk.compat import python_2_unicode_compatible +from nltk.corpus.reader import CorpusReader + + +@python_2_unicode_compatible +class SentiWordNetCorpusReader(CorpusReader): + def __init__(self, root, fileids, encoding='utf-8'): + """ + Construct a new SentiWordNet Corpus Reader, using data from + the specified file. + """ + super(SentiWordNetCorpusReader, self).__init__(root, fileids, encoding=encoding) + if len(self._fileids) != 1: + raise ValueError('Exactly one file must be specified') + self._db = {} + self._parse_src_file() + + def _parse_src_file(self): + lines = self.open(self._fileids[0]).read().splitlines() + lines = filter((lambda x: not re.search(r"^\s*#", x)), lines) + for i, line in enumerate(lines): + fields = [field.strip() for field in re.split(r"\t+", line)] + try: + pos, offset, pos_score, neg_score, synset_terms, gloss = fields + except: + raise ValueError('Line %s formatted incorrectly: %s\n' % (i, line)) + if pos and offset: + offset = int(offset) + self._db[(pos, offset)] = (float(pos_score), float(neg_score)) + + def senti_synset(self, *vals): + from nltk.corpus import wordnet as wn + + if tuple(vals) in self._db: + pos_score, neg_score = self._db[tuple(vals)] + pos, offset = vals + if pos == 's': + pos = 'a' + synset = wn.synset_from_pos_and_offset(pos, offset) + return SentiSynset(pos_score, neg_score, synset) + else: + synset = wn.synset(vals[0]) + pos = synset.pos() + if pos == 's': + pos = 'a' + offset = synset.offset() + if (pos, offset) in self._db: + pos_score, neg_score = self._db[(pos, offset)] + return SentiSynset(pos_score, neg_score, synset) + else: + return None + + def senti_synsets(self, string, pos=None): + from nltk.corpus import wordnet as wn + + sentis = [] + synset_list = wn.synsets(string, pos) + for synset in synset_list: + sentis.append(self.senti_synset(synset.name())) + sentis = filter(lambda x: x, sentis) + return sentis + + def all_senti_synsets(self): + from nltk.corpus import wordnet as wn + + for key, fields in self._db.items(): + pos, offset = key + pos_score, neg_score = fields + synset = wn.synset_from_pos_and_offset(pos, offset) + yield SentiSynset(pos_score, neg_score, synset) + + +@python_2_unicode_compatible +class SentiSynset(object): + def __init__(self, pos_score, neg_score, synset): + self._pos_score = pos_score + self._neg_score = neg_score + self._obj_score = 1.0 - (self._pos_score + self._neg_score) + self.synset = synset + + def pos_score(self): + return self._pos_score + + def neg_score(self): + return self._neg_score + + def obj_score(self): + return self._obj_score + + def __str__(self): + """Prints just the Pos/Neg scores for now.""" + s = "<" + s += self.synset.name() + ": " + s += "PosScore=%s " % self._pos_score + s += "NegScore=%s" % self._neg_score + s += ">" + return s + + def __repr__(self): + return "Senti" + repr(self.synset) diff --git a/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/sinica_treebank.py b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/sinica_treebank.py new file mode 100644 index 0000000..fbbc92d --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/sinica_treebank.py @@ -0,0 +1,76 @@ +# Natural Language Toolkit: Sinica Treebank Reader +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Steven Bird +# URL: +# For license information, see LICENSE.TXT + +""" +Sinica Treebank Corpus Sample + +http://rocling.iis.sinica.edu.tw/CKIP/engversion/treebank.htm + +10,000 parsed sentences, drawn from the Academia Sinica Balanced +Corpus of Modern Chinese. Parse tree notation is based on +Information-based Case Grammar. Tagset documentation is available +at http://www.sinica.edu.tw/SinicaCorpus/modern_e_wordtype.html + +Language and Knowledge Processing Group, Institute of Information +Science, Academia Sinica + +The data is distributed with the Natural Language Toolkit under the terms of +the Creative Commons Attribution-NonCommercial-ShareAlike License +[http://creativecommons.org/licenses/by-nc-sa/2.5/]. + +References: + +Feng-Yi Chen, Pi-Fang Tsai, Keh-Jiann Chen, and Chu-Ren Huang (1999) +The Construction of Sinica Treebank. Computational Linguistics and +Chinese Language Processing, 4, pp 87-104. + +Huang Chu-Ren, Keh-Jiann Chen, Feng-Yi Chen, Keh-Jiann Chen, Zhao-Ming +Gao, and Kuang-Yu Chen. 2000. Sinica Treebank: Design Criteria, +Annotation Guidelines, and On-line Interface. Proceedings of 2nd +Chinese Language Processing Workshop, Association for Computational +Linguistics. + +Chen Keh-Jiann and Yu-Ming Hsieh (2004) Chinese Treebanks and Grammar +Extraction, Proceedings of IJCNLP-04, pp560-565. +""" + +from nltk.tree import sinica_parse +from nltk.tag import map_tag + +from nltk.corpus.reader.util import * +from nltk.corpus.reader.api import * + +IDENTIFIER = re.compile(r'^#\S+\s') +APPENDIX = re.compile(r'(?<=\))#.*$') +TAGWORD = re.compile(r':([^:()|]+):([^:()|]+)') +WORD = re.compile(r':[^:()|]+:([^:()|]+)') + + +class SinicaTreebankCorpusReader(SyntaxCorpusReader): + """ + Reader for the sinica treebank. + """ + + def _read_block(self, stream): + sent = stream.readline() + sent = IDENTIFIER.sub('', sent) + sent = APPENDIX.sub('', sent) + return [sent] + + def _parse(self, sent): + return sinica_parse(sent) + + def _tag(self, sent, tagset=None): + tagged_sent = [(w, t) for (t, w) in TAGWORD.findall(sent)] + if tagset and tagset != self._tagset: + tagged_sent = [ + (w, map_tag(self._tagset, tagset, t)) for (w, t) in tagged_sent + ] + return tagged_sent + + def _word(self, sent): + return WORD.findall(sent) diff --git a/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/string_category.py b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/string_category.py new file mode 100644 index 0000000..eaf5bf4 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/string_category.py @@ -0,0 +1,67 @@ +# Natural Language Toolkit: String Category Corpus Reader +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Steven Bird +# Edward Loper +# URL: +# For license information, see LICENSE.TXT + +""" +Read tuples from a corpus consisting of categorized strings. +For example, from the question classification corpus: + +NUM:dist How far is it from Denver to Aspen ? +LOC:city What county is Modesto , California in ? +HUM:desc Who was Galileo ? +DESC:def What is an atom ? +NUM:date When did Hawaii become a state ? +""" + +# based on PPAttachmentCorpusReader +from six import string_types + +from nltk import compat +from nltk.corpus.reader.util import * +from nltk.corpus.reader.api import * + +# [xx] Should the order of the tuple be reversed -- in most other places +# in nltk, we use the form (data, tag) -- e.g., tagged words and +# labeled texts for classifiers. +class StringCategoryCorpusReader(CorpusReader): + def __init__(self, root, fileids, delimiter=' ', encoding='utf8'): + """ + :param root: The root directory for this corpus. + :param fileids: A list or regexp specifying the fileids in this corpus. + :param delimiter: Field delimiter + """ + CorpusReader.__init__(self, root, fileids, encoding) + self._delimiter = delimiter + + def tuples(self, fileids=None): + if fileids is None: + fileids = self._fileids + elif isinstance(fileids, string_types): + fileids = [fileids] + return concat( + [ + StreamBackedCorpusView(fileid, self._read_tuple_block, encoding=enc) + for (fileid, enc) in self.abspaths(fileids, True) + ] + ) + + def raw(self, fileids=None): + """ + :return: the text contents of the given fileids, as a single string. + """ + if fileids is None: + fileids = self._fileids + elif isinstance(fileids, string_types): + fileids = [fileids] + return concat([self.open(f).read() for f in fileids]) + + def _read_tuple_block(self, stream): + line = stream.readline().strip() + if line: + return [tuple(line.split(self._delimiter, 1))] + else: + return [] diff --git a/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/switchboard.py b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/switchboard.py new file mode 100644 index 0000000..ed65c42 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/switchboard.py @@ -0,0 +1,129 @@ +# Natural Language Toolkit: Switchboard Corpus Reader +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Edward Loper +# URL: +# For license information, see LICENSE.TXT +from __future__ import unicode_literals +import re + +from nltk.tag import str2tuple, map_tag +from nltk import compat + +from nltk.corpus.reader.util import * +from nltk.corpus.reader.api import * + + +@compat.python_2_unicode_compatible +class SwitchboardTurn(list): + """ + A specialized list object used to encode switchboard utterances. + The elements of the list are the words in the utterance; and two + attributes, ``speaker`` and ``id``, are provided to retrieve the + spearker identifier and utterance id. Note that utterance ids + are only unique within a given discourse. + """ + + def __init__(self, words, speaker, id): + list.__init__(self, words) + self.speaker = speaker + self.id = int(id) + + def __repr__(self): + if len(self) == 0: + text = '' + elif isinstance(self[0], tuple): + text = ' '.join('%s/%s' % w for w in self) + else: + text = ' '.join(self) + return '<%s.%s: %r>' % (self.speaker, self.id, text) + + +class SwitchboardCorpusReader(CorpusReader): + _FILES = ['tagged'] + # Use the "tagged" file even for non-tagged data methods, since + # it's tokenized. + + def __init__(self, root, tagset=None): + CorpusReader.__init__(self, root, self._FILES) + self._tagset = tagset + + def words(self): + return StreamBackedCorpusView(self.abspath('tagged'), self._words_block_reader) + + def tagged_words(self, tagset=None): + def tagged_words_block_reader(stream): + return self._tagged_words_block_reader(stream, tagset) + + return StreamBackedCorpusView(self.abspath('tagged'), tagged_words_block_reader) + + def turns(self): + return StreamBackedCorpusView(self.abspath('tagged'), self._turns_block_reader) + + def tagged_turns(self, tagset=None): + def tagged_turns_block_reader(stream): + return self._tagged_turns_block_reader(stream, tagset) + + return StreamBackedCorpusView(self.abspath('tagged'), tagged_turns_block_reader) + + def discourses(self): + return StreamBackedCorpusView( + self.abspath('tagged'), self._discourses_block_reader + ) + + def tagged_discourses(self, tagset=False): + def tagged_discourses_block_reader(stream): + return self._tagged_discourses_block_reader(stream, tagset) + + return StreamBackedCorpusView( + self.abspath('tagged'), tagged_discourses_block_reader + ) + + def _discourses_block_reader(self, stream): + # returns at most 1 discourse. (The other methods depend on this.) + return [ + [ + self._parse_utterance(u, include_tag=False) + for b in read_blankline_block(stream) + for u in b.split('\n') + if u.strip() + ] + ] + + def _tagged_discourses_block_reader(self, stream, tagset=None): + # returns at most 1 discourse. (The other methods depend on this.) + return [ + [ + self._parse_utterance(u, include_tag=True, tagset=tagset) + for b in read_blankline_block(stream) + for u in b.split('\n') + if u.strip() + ] + ] + + def _turns_block_reader(self, stream): + return self._discourses_block_reader(stream)[0] + + def _tagged_turns_block_reader(self, stream, tagset=None): + return self._tagged_discourses_block_reader(stream, tagset)[0] + + def _words_block_reader(self, stream): + return sum(self._discourses_block_reader(stream)[0], []) + + def _tagged_words_block_reader(self, stream, tagset=None): + return sum(self._tagged_discourses_block_reader(stream, tagset)[0], []) + + _UTTERANCE_RE = re.compile('(\w+)\.(\d+)\:\s*(.*)') + _SEP = '/' + + def _parse_utterance(self, utterance, include_tag, tagset=None): + m = self._UTTERANCE_RE.match(utterance) + if m is None: + raise ValueError('Bad utterance %r' % utterance) + speaker, id, text = m.groups() + words = [str2tuple(s, self._SEP) for s in text.split()] + if not include_tag: + words = [w for (w, t) in words] + elif tagset and tagset != self._tagset: + words = [(w, map_tag(self._tagset, tagset, t)) for (w, t) in words] + return SwitchboardTurn(words, speaker, id) diff --git a/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/tagged.py b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/tagged.py new file mode 100644 index 0000000..3af1653 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/tagged.py @@ -0,0 +1,394 @@ +# Natural Language Toolkit: Tagged Corpus Reader +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Edward Loper +# Steven Bird +# Jacob Perkins +# URL: +# For license information, see LICENSE.TXT + +""" +A reader for corpora whose documents contain part-of-speech-tagged words. +""" + +import os + +from six import string_types + +from nltk.tag import str2tuple, map_tag +from nltk.tokenize import * + +from nltk.corpus.reader.api import * +from nltk.corpus.reader.util import * +from nltk.corpus.reader.timit import read_timit_block + + +class TaggedCorpusReader(CorpusReader): + """ + Reader for simple part-of-speech tagged corpora. Paragraphs are + assumed to be split using blank lines. Sentences and words can be + tokenized using the default tokenizers, or by custom tokenizers + specified as parameters to the constructor. Words are parsed + using ``nltk.tag.str2tuple``. By default, ``'/'`` is used as the + separator. I.e., words should have the form:: + + word1/tag1 word2/tag2 word3/tag3 ... + + But custom separators may be specified as parameters to the + constructor. Part of speech tags are case-normalized to upper + case. + """ + + def __init__( + self, + root, + fileids, + sep='/', + word_tokenizer=WhitespaceTokenizer(), + sent_tokenizer=RegexpTokenizer('\n', gaps=True), + para_block_reader=read_blankline_block, + encoding='utf8', + tagset=None, + ): + """ + Construct a new Tagged Corpus reader for a set of documents + located at the given root directory. Example usage: + + >>> root = '/...path to corpus.../' + >>> reader = TaggedCorpusReader(root, '.*', '.txt') # doctest: +SKIP + + :param root: The root directory for this corpus. + :param fileids: A list or regexp specifying the fileids in this corpus. + """ + CorpusReader.__init__(self, root, fileids, encoding) + self._sep = sep + self._word_tokenizer = word_tokenizer + self._sent_tokenizer = sent_tokenizer + self._para_block_reader = para_block_reader + self._tagset = tagset + + def raw(self, fileids=None): + """ + :return: the given file(s) as a single string. + :rtype: str + """ + if fileids is None: + fileids = self._fileids + elif isinstance(fileids, string_types): + fileids = [fileids] + return concat([self.open(f).read() for f in fileids]) + + def words(self, fileids=None): + """ + :return: the given file(s) as a list of words + and punctuation symbols. + :rtype: list(str) + """ + return concat( + [ + TaggedCorpusView( + fileid, + enc, + False, + False, + False, + self._sep, + self._word_tokenizer, + self._sent_tokenizer, + self._para_block_reader, + None, + ) + for (fileid, enc) in self.abspaths(fileids, True) + ] + ) + + def sents(self, fileids=None): + """ + :return: the given file(s) as a list of + sentences or utterances, each encoded as a list of word + strings. + :rtype: list(list(str)) + """ + return concat( + [ + TaggedCorpusView( + fileid, + enc, + False, + True, + False, + self._sep, + self._word_tokenizer, + self._sent_tokenizer, + self._para_block_reader, + None, + ) + for (fileid, enc) in self.abspaths(fileids, True) + ] + ) + + def paras(self, fileids=None): + """ + :return: the given file(s) as a list of + paragraphs, each encoded as a list of sentences, which are + in turn encoded as lists of word strings. + :rtype: list(list(list(str))) + """ + return concat( + [ + TaggedCorpusView( + fileid, + enc, + False, + True, + True, + self._sep, + self._word_tokenizer, + self._sent_tokenizer, + self._para_block_reader, + None, + ) + for (fileid, enc) in self.abspaths(fileids, True) + ] + ) + + def tagged_words(self, fileids=None, tagset=None): + """ + :return: the given file(s) as a list of tagged + words and punctuation symbols, encoded as tuples + ``(word,tag)``. + :rtype: list(tuple(str,str)) + """ + if tagset and tagset != self._tagset: + tag_mapping_function = lambda t: map_tag(self._tagset, tagset, t) + else: + tag_mapping_function = None + return concat( + [ + TaggedCorpusView( + fileid, + enc, + True, + False, + False, + self._sep, + self._word_tokenizer, + self._sent_tokenizer, + self._para_block_reader, + tag_mapping_function, + ) + for (fileid, enc) in self.abspaths(fileids, True) + ] + ) + + def tagged_sents(self, fileids=None, tagset=None): + """ + :return: the given file(s) as a list of + sentences, each encoded as a list of ``(word,tag)`` tuples. + + :rtype: list(list(tuple(str,str))) + """ + if tagset and tagset != self._tagset: + tag_mapping_function = lambda t: map_tag(self._tagset, tagset, t) + else: + tag_mapping_function = None + return concat( + [ + TaggedCorpusView( + fileid, + enc, + True, + True, + False, + self._sep, + self._word_tokenizer, + self._sent_tokenizer, + self._para_block_reader, + tag_mapping_function, + ) + for (fileid, enc) in self.abspaths(fileids, True) + ] + ) + + def tagged_paras(self, fileids=None, tagset=None): + """ + :return: the given file(s) as a list of + paragraphs, each encoded as a list of sentences, which are + in turn encoded as lists of ``(word,tag)`` tuples. + :rtype: list(list(list(tuple(str,str)))) + """ + if tagset and tagset != self._tagset: + tag_mapping_function = lambda t: map_tag(self._tagset, tagset, t) + else: + tag_mapping_function = None + return concat( + [ + TaggedCorpusView( + fileid, + enc, + True, + True, + True, + self._sep, + self._word_tokenizer, + self._sent_tokenizer, + self._para_block_reader, + tag_mapping_function, + ) + for (fileid, enc) in self.abspaths(fileids, True) + ] + ) + + +class CategorizedTaggedCorpusReader(CategorizedCorpusReader, TaggedCorpusReader): + """ + A reader for part-of-speech tagged corpora whose documents are + divided into categories based on their file identifiers. + """ + + def __init__(self, *args, **kwargs): + """ + Initialize the corpus reader. Categorization arguments + (``cat_pattern``, ``cat_map``, and ``cat_file``) are passed to + the ``CategorizedCorpusReader`` constructor. The remaining arguments + are passed to the ``TaggedCorpusReader``. + """ + CategorizedCorpusReader.__init__(self, kwargs) + TaggedCorpusReader.__init__(self, *args, **kwargs) + + def _resolve(self, fileids, categories): + if fileids is not None and categories is not None: + raise ValueError('Specify fileids or categories, not both') + if categories is not None: + return self.fileids(categories) + else: + return fileids + + def raw(self, fileids=None, categories=None): + return TaggedCorpusReader.raw(self, self._resolve(fileids, categories)) + + def words(self, fileids=None, categories=None): + return TaggedCorpusReader.words(self, self._resolve(fileids, categories)) + + def sents(self, fileids=None, categories=None): + return TaggedCorpusReader.sents(self, self._resolve(fileids, categories)) + + def paras(self, fileids=None, categories=None): + return TaggedCorpusReader.paras(self, self._resolve(fileids, categories)) + + def tagged_words(self, fileids=None, categories=None, tagset=None): + return TaggedCorpusReader.tagged_words( + self, self._resolve(fileids, categories), tagset + ) + + def tagged_sents(self, fileids=None, categories=None, tagset=None): + return TaggedCorpusReader.tagged_sents( + self, self._resolve(fileids, categories), tagset + ) + + def tagged_paras(self, fileids=None, categories=None, tagset=None): + return TaggedCorpusReader.tagged_paras( + self, self._resolve(fileids, categories), tagset + ) + + +class TaggedCorpusView(StreamBackedCorpusView): + """ + A specialized corpus view for tagged documents. It can be + customized via flags to divide the tagged corpus documents up by + sentence or paragraph, and to include or omit part of speech tags. + ``TaggedCorpusView`` objects are typically created by + ``TaggedCorpusReader`` (not directly by nltk users). + """ + + def __init__( + self, + corpus_file, + encoding, + tagged, + group_by_sent, + group_by_para, + sep, + word_tokenizer, + sent_tokenizer, + para_block_reader, + tag_mapping_function=None, + ): + self._tagged = tagged + self._group_by_sent = group_by_sent + self._group_by_para = group_by_para + self._sep = sep + self._word_tokenizer = word_tokenizer + self._sent_tokenizer = sent_tokenizer + self._para_block_reader = para_block_reader + self._tag_mapping_function = tag_mapping_function + StreamBackedCorpusView.__init__(self, corpus_file, encoding=encoding) + + def read_block(self, stream): + """Reads one paragraph at a time.""" + block = [] + for para_str in self._para_block_reader(stream): + para = [] + for sent_str in self._sent_tokenizer.tokenize(para_str): + sent = [ + str2tuple(s, self._sep) + for s in self._word_tokenizer.tokenize(sent_str) + ] + if self._tag_mapping_function: + sent = [(w, self._tag_mapping_function(t)) for (w, t) in sent] + if not self._tagged: + sent = [w for (w, t) in sent] + if self._group_by_sent: + para.append(sent) + else: + para.extend(sent) + if self._group_by_para: + block.append(para) + else: + block.extend(para) + return block + + +# needs to implement simplified tags +class MacMorphoCorpusReader(TaggedCorpusReader): + """ + A corpus reader for the MAC_MORPHO corpus. Each line contains a + single tagged word, using '_' as a separator. Sentence boundaries + are based on the end-sentence tag ('_.'). Paragraph information + is not included in the corpus, so each paragraph returned by + ``self.paras()`` and ``self.tagged_paras()`` contains a single + sentence. + """ + + def __init__(self, root, fileids, encoding='utf8', tagset=None): + TaggedCorpusReader.__init__( + self, + root, + fileids, + sep='_', + word_tokenizer=LineTokenizer(), + sent_tokenizer=RegexpTokenizer('.*\n'), + para_block_reader=self._read_block, + encoding=encoding, + tagset=tagset, + ) + + def _read_block(self, stream): + return read_regexp_block(stream, r'.*', r'.*_\.') + + +class TimitTaggedCorpusReader(TaggedCorpusReader): + """ + A corpus reader for tagged sentences that are included in the TIMIT corpus. + """ + + def __init__(self, *args, **kwargs): + TaggedCorpusReader.__init__( + self, para_block_reader=read_timit_block, *args, **kwargs + ) + + def paras(self): + raise NotImplementedError('use sents() instead') + + def tagged_paras(self): + raise NotImplementedError('use tagged_sents() instead') diff --git a/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/timit.py b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/timit.py new file mode 100644 index 0000000..bbd57c6 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/timit.py @@ -0,0 +1,499 @@ +# Natural Language Toolkit: TIMIT Corpus Reader +# +# Copyright (C) 2001-2007 NLTK Project +# Author: Haejoong Lee +# Steven Bird +# Jacob Perkins +# URL: +# For license information, see LICENSE.TXT + +# [xx] this docstring is out-of-date: +""" +Read tokens, phonemes and audio data from the NLTK TIMIT Corpus. + +This corpus contains selected portion of the TIMIT corpus. + + - 16 speakers from 8 dialect regions + - 1 male and 1 female from each dialect region + - total 130 sentences (10 sentences per speaker. Note that some + sentences are shared among other speakers, especially sa1 and sa2 + are spoken by all speakers.) + - total 160 recording of sentences (10 recordings per speaker) + - audio format: NIST Sphere, single channel, 16kHz sampling, + 16 bit sample, PCM encoding + + +Module contents +=============== + +The timit corpus reader provides 4 functions and 4 data items. + + - utterances + + List of utterances in the corpus. There are total 160 utterances, + each of which corresponds to a unique utterance of a speaker. + Here's an example of an utterance identifier in the list:: + + dr1-fvmh0/sx206 + - _---- _--- + | | | | | + | | | | | + | | | | `--- sentence number + | | | `----- sentence type (a:all, i:shared, x:exclusive) + | | `--------- speaker ID + | `------------ sex (m:male, f:female) + `-------------- dialect region (1..8) + + - speakers + + List of speaker IDs. An example of speaker ID:: + + dr1-fvmh0 + + Note that if you split an item ID with colon and take the first element of + the result, you will get a speaker ID. + + >>> itemid = 'dr1-fvmh0/sx206' + >>> spkrid , sentid = itemid.split('/') + >>> spkrid + 'dr1-fvmh0' + + The second element of the result is a sentence ID. + + - dictionary() + + Phonetic dictionary of words contained in this corpus. This is a Python + dictionary from words to phoneme lists. + + - spkrinfo() + + Speaker information table. It's a Python dictionary from speaker IDs to + records of 10 fields. Speaker IDs the same as the ones in timie.speakers. + Each record is a dictionary from field names to values, and the fields are + as follows:: + + id speaker ID as defined in the original TIMIT speaker info table + sex speaker gender (M:male, F:female) + dr speaker dialect region (1:new england, 2:northern, + 3:north midland, 4:south midland, 5:southern, 6:new york city, + 7:western, 8:army brat (moved around)) + use corpus type (TRN:training, TST:test) + in this sample corpus only TRN is available + recdate recording date + birthdate speaker birth date + ht speaker height + race speaker race (WHT:white, BLK:black, AMR:american indian, + SPN:spanish-american, ORN:oriental,???:unknown) + edu speaker education level (HS:high school, AS:associate degree, + BS:bachelor's degree (BS or BA), MS:master's degree (MS or MA), + PHD:doctorate degree (PhD,JD,MD), ??:unknown) + comments comments by the recorder + +The 4 functions are as follows. + + - tokenized(sentences=items, offset=False) + + Given a list of items, returns an iterator of a list of word lists, + each of which corresponds to an item (sentence). If offset is set to True, + each element of the word list is a tuple of word(string), start offset and + end offset, where offset is represented as a number of 16kHz samples. + + - phonetic(sentences=items, offset=False) + + Given a list of items, returns an iterator of a list of phoneme lists, + each of which corresponds to an item (sentence). If offset is set to True, + each element of the phoneme list is a tuple of word(string), start offset + and end offset, where offset is represented as a number of 16kHz samples. + + - audiodata(item, start=0, end=None) + + Given an item, returns a chunk of audio samples formatted into a string. + When the fuction is called, if start and end are omitted, the entire + samples of the recording will be returned. If only end is omitted, + samples from the start offset to the end of the recording will be returned. + + - play(data) + + Play the given audio samples. The audio samples can be obtained from the + timit.audiodata function. + +""" +from __future__ import print_function, unicode_literals + +import sys +import os +import re +import tempfile +import time + +from six import string_types + +from nltk import compat +from nltk.tree import Tree +from nltk.internals import import_from_stdlib + +from nltk.corpus.reader.util import * +from nltk.corpus.reader.api import * + + +class TimitCorpusReader(CorpusReader): + """ + Reader for the TIMIT corpus (or any other corpus with the same + file layout and use of file formats). The corpus root directory + should contain the following files: + + - timitdic.txt: dictionary of standard transcriptions + - spkrinfo.txt: table of speaker information + + In addition, the root directory should contain one subdirectory + for each speaker, containing three files for each utterance: + + - .txt: text content of utterances + - .wrd: tokenized text content of utterances + - .phn: phonetic transcription of utterances + - .wav: utterance sound file + """ + + _FILE_RE = r'(\w+-\w+/\w+\.(phn|txt|wav|wrd))|' + r'timitdic\.txt|spkrinfo\.txt' + """A regexp matching fileids that are used by this corpus reader.""" + _UTTERANCE_RE = r'\w+-\w+/\w+\.txt' + + def __init__(self, root, encoding='utf8'): + """ + Construct a new TIMIT corpus reader in the given directory. + :param root: The root directory for this corpus. + """ + # Ensure that wave files don't get treated as unicode data: + if isinstance(encoding, string_types): + encoding = [('.*\.wav', None), ('.*', encoding)] + + CorpusReader.__init__( + self, root, find_corpus_fileids(root, self._FILE_RE), encoding=encoding + ) + + self._utterances = [ + name[:-4] for name in find_corpus_fileids(root, self._UTTERANCE_RE) + ] + """A list of the utterance identifiers for all utterances in + this corpus.""" + + self._speakerinfo = None + self._root = root + self.speakers = sorted(set(u.split('/')[0] for u in self._utterances)) + + def fileids(self, filetype=None): + """ + Return a list of file identifiers for the files that make up + this corpus. + + :param filetype: If specified, then ``filetype`` indicates that + only the files that have the given type should be + returned. Accepted values are: ``txt``, ``wrd``, ``phn``, + ``wav``, or ``metadata``, + """ + if filetype is None: + return CorpusReader.fileids(self) + elif filetype in ('txt', 'wrd', 'phn', 'wav'): + return ['%s.%s' % (u, filetype) for u in self._utterances] + elif filetype == 'metadata': + return ['timitdic.txt', 'spkrinfo.txt'] + else: + raise ValueError('Bad value for filetype: %r' % filetype) + + def utteranceids( + self, dialect=None, sex=None, spkrid=None, sent_type=None, sentid=None + ): + """ + :return: A list of the utterance identifiers for all + utterances in this corpus, or for the given speaker, dialect + region, gender, sentence type, or sentence number, if + specified. + """ + if isinstance(dialect, string_types): + dialect = [dialect] + if isinstance(sex, string_types): + sex = [sex] + if isinstance(spkrid, string_types): + spkrid = [spkrid] + if isinstance(sent_type, string_types): + sent_type = [sent_type] + if isinstance(sentid, string_types): + sentid = [sentid] + + utterances = self._utterances[:] + if dialect is not None: + utterances = [u for u in utterances if u[2] in dialect] + if sex is not None: + utterances = [u for u in utterances if u[4] in sex] + if spkrid is not None: + utterances = [u for u in utterances if u[:9] in spkrid] + if sent_type is not None: + utterances = [u for u in utterances if u[11] in sent_type] + if sentid is not None: + utterances = [u for u in utterances if u[10:] in spkrid] + return utterances + + def transcription_dict(self): + """ + :return: A dictionary giving the 'standard' transcription for + each word. + """ + _transcriptions = {} + for line in self.open('timitdic.txt'): + if not line.strip() or line[0] == ';': + continue + m = re.match(r'\s*(\S+)\s+/(.*)/\s*$', line) + if not m: + raise ValueError('Bad line: %r' % line) + _transcriptions[m.group(1)] = m.group(2).split() + return _transcriptions + + def spkrid(self, utterance): + return utterance.split('/')[0] + + def sentid(self, utterance): + return utterance.split('/')[1] + + def utterance(self, spkrid, sentid): + return '%s/%s' % (spkrid, sentid) + + def spkrutteranceids(self, speaker): + """ + :return: A list of all utterances associated with a given + speaker. + """ + return [ + utterance + for utterance in self._utterances + if utterance.startswith(speaker + '/') + ] + + def spkrinfo(self, speaker): + """ + :return: A dictionary mapping .. something. + """ + if speaker in self._utterances: + speaker = self.spkrid(speaker) + + if self._speakerinfo is None: + self._speakerinfo = {} + for line in self.open('spkrinfo.txt'): + if not line.strip() or line[0] == ';': + continue + rec = line.strip().split(None, 9) + key = "dr%s-%s%s" % (rec[2], rec[1].lower(), rec[0].lower()) + self._speakerinfo[key] = SpeakerInfo(*rec) + + return self._speakerinfo[speaker] + + def phones(self, utterances=None): + return [ + line.split()[-1] + for fileid in self._utterance_fileids(utterances, '.phn') + for line in self.open(fileid) + if line.strip() + ] + + def phone_times(self, utterances=None): + """ + offset is represented as a number of 16kHz samples! + """ + return [ + (line.split()[2], int(line.split()[0]), int(line.split()[1])) + for fileid in self._utterance_fileids(utterances, '.phn') + for line in self.open(fileid) + if line.strip() + ] + + def words(self, utterances=None): + return [ + line.split()[-1] + for fileid in self._utterance_fileids(utterances, '.wrd') + for line in self.open(fileid) + if line.strip() + ] + + def word_times(self, utterances=None): + return [ + (line.split()[2], int(line.split()[0]), int(line.split()[1])) + for fileid in self._utterance_fileids(utterances, '.wrd') + for line in self.open(fileid) + if line.strip() + ] + + def sents(self, utterances=None): + return [ + [line.split()[-1] for line in self.open(fileid) if line.strip()] + for fileid in self._utterance_fileids(utterances, '.wrd') + ] + + def sent_times(self, utterances=None): + return [ + ( + line.split(None, 2)[-1].strip(), + int(line.split()[0]), + int(line.split()[1]), + ) + for fileid in self._utterance_fileids(utterances, '.txt') + for line in self.open(fileid) + if line.strip() + ] + + def phone_trees(self, utterances=None): + if utterances is None: + utterances = self._utterances + if isinstance(utterances, string_types): + utterances = [utterances] + + trees = [] + for utterance in utterances: + word_times = self.word_times(utterance) + phone_times = self.phone_times(utterance) + sent_times = self.sent_times(utterance) + + while sent_times: + (sent, sent_start, sent_end) = sent_times.pop(0) + trees.append(Tree('S', [])) + while ( + word_times and phone_times and phone_times[0][2] <= word_times[0][1] + ): + trees[-1].append(phone_times.pop(0)[0]) + while word_times and word_times[0][2] <= sent_end: + (word, word_start, word_end) = word_times.pop(0) + trees[-1].append(Tree(word, [])) + while phone_times and phone_times[0][2] <= word_end: + trees[-1][-1].append(phone_times.pop(0)[0]) + while phone_times and phone_times[0][2] <= sent_end: + trees[-1].append(phone_times.pop(0)[0]) + return trees + + # [xx] NOTE: This is currently broken -- we're assuming that the + # fileids are WAV fileids (aka RIFF), but they're actually NIST SPHERE + # fileids. + def wav(self, utterance, start=0, end=None): + # nltk.chunk conflicts with the stdlib module 'chunk' + wave = import_from_stdlib('wave') + + w = wave.open(self.open(utterance + '.wav'), 'rb') + + if end is None: + end = w.getnframes() + + # Skip past frames before start, then read the frames we want + w.readframes(start) + frames = w.readframes(end - start) + + # Open a new temporary file -- the wave module requires + # an actual file, and won't work w/ stringio. :( + tf = tempfile.TemporaryFile() + out = wave.open(tf, 'w') + + # Write the parameters & data to the new file. + out.setparams(w.getparams()) + out.writeframes(frames) + out.close() + + # Read the data back from the file, and return it. The + # file will automatically be deleted when we return. + tf.seek(0) + return tf.read() + + def audiodata(self, utterance, start=0, end=None): + assert end is None or end > start + headersize = 44 + if end is None: + data = self.open(utterance + '.wav').read() + else: + data = self.open(utterance + '.wav').read(headersize + end * 2) + return data[headersize + start * 2 :] + + def _utterance_fileids(self, utterances, extension): + if utterances is None: + utterances = self._utterances + if isinstance(utterances, string_types): + utterances = [utterances] + return ['%s%s' % (u, extension) for u in utterances] + + def play(self, utterance, start=0, end=None): + """ + Play the given audio sample. + + :param utterance: The utterance id of the sample to play + """ + # Method 1: os audio dev. + try: + import ossaudiodev + + try: + dsp = ossaudiodev.open('w') + dsp.setfmt(ossaudiodev.AFMT_S16_LE) + dsp.channels(1) + dsp.speed(16000) + dsp.write(self.audiodata(utterance, start, end)) + dsp.close() + except IOError as e: + print( + ( + "can't acquire the audio device; please " + "activate your audio device." + ), + file=sys.stderr, + ) + print("system error message:", str(e), file=sys.stderr) + return + except ImportError: + pass + + # Method 2: pygame + try: + # FIXME: this won't work under python 3 + import pygame.mixer, StringIO + + pygame.mixer.init(16000) + f = StringIO.StringIO(self.wav(utterance, start, end)) + pygame.mixer.Sound(f).play() + while pygame.mixer.get_busy(): + time.sleep(0.01) + return + except ImportError: + pass + + # Method 3: complain. :) + print( + ("you must install pygame or ossaudiodev " "for audio playback."), + file=sys.stderr, + ) + + +@compat.python_2_unicode_compatible +class SpeakerInfo(object): + def __init__( + self, id, sex, dr, use, recdate, birthdate, ht, race, edu, comments=None + ): + self.id = id + self.sex = sex + self.dr = dr + self.use = use + self.recdate = recdate + self.birthdate = birthdate + self.ht = ht + self.race = race + self.edu = edu + self.comments = comments + + def __repr__(self): + attribs = 'id sex dr use recdate birthdate ht race edu comments' + args = ['%s=%r' % (attr, getattr(self, attr)) for attr in attribs.split()] + return 'SpeakerInfo(%s)' % (', '.join(args)) + + +def read_timit_block(stream): + """ + Block reader for timit tagged sentences, which are preceded by a sentence + number that will be ignored. + """ + line = stream.readline() + if not line: + return [] + n, sent = line.split(' ', 1) + return [sent] diff --git a/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/toolbox.py b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/toolbox.py new file mode 100644 index 0000000..32acc01 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/toolbox.py @@ -0,0 +1,83 @@ +# Natural Language Toolkit: Toolbox Reader +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Greg Aumann +# Stuart Robinson +# Steven Bird +# URL: +# For license information, see LICENSE.TXT + +""" +Module for reading, writing and manipulating +Toolbox databases and settings fileids. +""" + +from nltk.toolbox import ToolboxData +from nltk.corpus.reader.util import * +from nltk.corpus.reader.api import * + + +class ToolboxCorpusReader(CorpusReader): + def xml(self, fileids, key=None): + return concat( + [ + ToolboxData(path, enc).parse(key=key) + for (path, enc) in self.abspaths(fileids, True) + ] + ) + + def fields( + self, + fileids, + strip=True, + unwrap=True, + encoding='utf8', + errors='strict', + unicode_fields=None, + ): + return concat( + [ + list( + ToolboxData(fileid, enc).fields( + strip, unwrap, encoding, errors, unicode_fields + ) + ) + for (fileid, enc) in self.abspaths(fileids, include_encoding=True) + ] + ) + + # should probably be done lazily: + def entries(self, fileids, **kwargs): + if 'key' in kwargs: + key = kwargs['key'] + del kwargs['key'] + else: + key = 'lx' # the default key in MDF + entries = [] + for marker, contents in self.fields(fileids, **kwargs): + if marker == key: + entries.append((contents, [])) + else: + try: + entries[-1][-1].append((marker, contents)) + except IndexError: + pass + return entries + + def words(self, fileids, key='lx'): + return [contents for marker, contents in self.fields(fileids) if marker == key] + + def raw(self, fileids): + if fileids is None: + fileids = self._fileids + elif isinstance(fileids, string_types): + fileids = [fileids] + return concat([self.open(f).read() for f in fileids]) + + +def demo(): + pass + + +if __name__ == '__main__': + demo() diff --git a/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/twitter.py b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/twitter.py new file mode 100644 index 0000000..78b9de3 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/twitter.py @@ -0,0 +1,153 @@ +# Natural Language Toolkit: Twitter Corpus Reader +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Ewan Klein +# URL: +# For license information, see LICENSE.TXT + +""" +A reader for corpora that consist of Tweets. It is assumed that the Tweets +have been serialised into line-delimited JSON. +""" + +import json +import os + +from six import string_types + +from nltk.tokenize import TweetTokenizer + +from nltk.corpus.reader.util import StreamBackedCorpusView, concat, ZipFilePathPointer +from nltk.corpus.reader.api import CorpusReader + + +class TwitterCorpusReader(CorpusReader): + """ + Reader for corpora that consist of Tweets represented as a list of line-delimited JSON. + + Individual Tweets can be tokenized using the default tokenizer, or by a + custom tokenizer specified as a parameter to the constructor. + + Construct a new Tweet corpus reader for a set of documents + located at the given root directory. + + If you made your own tweet collection in a directory called + `twitter-files`, then you can initialise the reader as:: + + from nltk.corpus import TwitterCorpusReader + reader = TwitterCorpusReader(root='/path/to/twitter-files', '.*\.json') + + However, the recommended approach is to set the relevant directory as the + value of the environmental variable `TWITTER`, and then invoke the reader + as follows:: + + root = os.environ['TWITTER'] + reader = TwitterCorpusReader(root, '.*\.json') + + If you want to work directly with the raw Tweets, the `json` library can + be used:: + + import json + for tweet in reader.docs(): + print(json.dumps(tweet, indent=1, sort_keys=True)) + + """ + + CorpusView = StreamBackedCorpusView + """ + The corpus view class used by this reader. + """ + + def __init__( + self, root, fileids=None, word_tokenizer=TweetTokenizer(), encoding='utf8' + ): + """ + + :param root: The root directory for this corpus. + + :param fileids: A list or regexp specifying the fileids in this corpus. + + :param word_tokenizer: Tokenizer for breaking the text of Tweets into + smaller units, including but not limited to words. + + """ + CorpusReader.__init__(self, root, fileids, encoding) + + for path in self.abspaths(self._fileids): + if isinstance(path, ZipFilePathPointer): + pass + elif os.path.getsize(path) == 0: + raise ValueError("File {} is empty".format(path)) + """Check that all user-created corpus files are non-empty.""" + + self._word_tokenizer = word_tokenizer + + def docs(self, fileids=None): + """ + Returns the full Tweet objects, as specified by `Twitter + documentation on Tweets + `_ + + :return: the given file(s) as a list of dictionaries deserialised + from JSON. + :rtype: list(dict) + """ + return concat( + [ + self.CorpusView(path, self._read_tweets, encoding=enc) + for (path, enc, fileid) in self.abspaths(fileids, True, True) + ] + ) + + def strings(self, fileids=None): + """ + Returns only the text content of Tweets in the file(s) + + :return: the given file(s) as a list of Tweets. + :rtype: list(str) + """ + fulltweets = self.docs(fileids) + tweets = [] + for jsono in fulltweets: + try: + text = jsono['text'] + if isinstance(text, bytes): + text = text.decode(self.encoding) + tweets.append(text) + except KeyError: + pass + return tweets + + def tokenized(self, fileids=None): + """ + :return: the given file(s) as a list of the text content of Tweets as + as a list of words, screenanames, hashtags, URLs and punctuation symbols. + + :rtype: list(list(str)) + """ + tweets = self.strings(fileids) + tokenizer = self._word_tokenizer + return [tokenizer.tokenize(t) for t in tweets] + + def raw(self, fileids=None): + """ + Return the corpora in their raw form. + """ + if fileids is None: + fileids = self._fileids + elif isinstance(fileids, string_types): + fileids = [fileids] + return concat([self.open(f).read() for f in fileids]) + + def _read_tweets(self, stream): + """ + Assumes that each line in ``stream`` is a JSON-serialised object. + """ + tweets = [] + for i in range(10): + line = stream.readline() + if not line: + return tweets + tweet = json.loads(line) + tweets.append(tweet) + return tweets diff --git a/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/udhr.py b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/udhr.py new file mode 100644 index 0000000..934a5b5 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/udhr.py @@ -0,0 +1,77 @@ +# -*- coding: utf-8 -*- +""" +UDHR corpus reader. It mostly deals with encodings. +""" +from __future__ import absolute_import, unicode_literals + +from nltk.corpus.reader.util import find_corpus_fileids +from nltk.corpus.reader.plaintext import PlaintextCorpusReader + + +class UdhrCorpusReader(PlaintextCorpusReader): + + ENCODINGS = [ + ('.*-Latin1$', 'latin-1'), + ('.*-Hebrew$', 'hebrew'), + ('.*-Arabic$', 'cp1256'), + ('Czech_Cesky-UTF8', 'cp1250'), # yeah + ('.*-Cyrillic$', 'cyrillic'), + ('.*-SJIS$', 'SJIS'), + ('.*-GB2312$', 'GB2312'), + ('.*-Latin2$', 'ISO-8859-2'), + ('.*-Greek$', 'greek'), + ('.*-UTF8$', 'utf-8'), + ('Hungarian_Magyar-Unicode', 'utf-16-le'), + ('Amahuaca', 'latin1'), + ('Turkish_Turkce-Turkish', 'latin5'), + ('Lithuanian_Lietuviskai-Baltic', 'latin4'), + ('Japanese_Nihongo-EUC', 'EUC-JP'), + ('Japanese_Nihongo-JIS', 'iso2022_jp'), + ('Chinese_Mandarin-HZ', 'hz'), + ('Abkhaz\-Cyrillic\+Abkh', 'cp1251'), + ] + + SKIP = set( + [ + # The following files are not fully decodable because they + # were truncated at wrong bytes: + 'Burmese_Myanmar-UTF8', + 'Japanese_Nihongo-JIS', + 'Chinese_Mandarin-HZ', + 'Chinese_Mandarin-UTF8', + 'Gujarati-UTF8', + 'Hungarian_Magyar-Unicode', + 'Lao-UTF8', + 'Magahi-UTF8', + 'Marathi-UTF8', + 'Tamil-UTF8', + # Unfortunately, encodings required for reading + # the following files are not supported by Python: + 'Vietnamese-VPS', + 'Vietnamese-VIQR', + 'Vietnamese-TCVN', + 'Magahi-Agra', + 'Bhojpuri-Agra', + 'Esperanto-T61', # latin3 raises an exception + # The following files are encoded for specific fonts: + 'Burmese_Myanmar-WinResearcher', + 'Armenian-DallakHelv', + 'Tigrinya_Tigrigna-VG2Main', + 'Amharic-Afenegus6..60375', # ? + 'Navaho_Dine-Navajo-Navaho-font', + # What are these? + 'Azeri_Azerbaijani_Cyrillic-Az.Times.Cyr.Normal0117', + 'Azeri_Azerbaijani_Latin-Az.Times.Lat0117', + # The following files are unintended: + 'Czech-Latin2-err', + 'Russian_Russky-UTF8~', + ] + ) + + def __init__(self, root='udhr'): + fileids = find_corpus_fileids(root, r'(?!README|\.).*') + super(UdhrCorpusReader, self).__init__( + root, + [fileid for fileid in fileids if fileid not in self.SKIP], + encoding=self.ENCODINGS, + ) diff --git a/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/util.py b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/util.py new file mode 100644 index 0000000..b60f7ab --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/util.py @@ -0,0 +1,870 @@ +# Natural Language Toolkit: Corpus Reader Utilities +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Steven Bird +# Edward Loper +# URL: +# For license information, see LICENSE.TXT + +import os +import bisect +import re +import tempfile +from functools import reduce + +try: + import cPickle as pickle +except ImportError: + import pickle + +try: # Use the c version of ElementTree, which is faster, if possible. + from xml.etree import cElementTree as ElementTree +except ImportError: + from xml.etree import ElementTree + +from six import string_types, text_type + +from nltk.tokenize import wordpunct_tokenize +from nltk.internals import slice_bounds +from nltk.data import PathPointer, FileSystemPathPointer, ZipFilePathPointer +from nltk.data import SeekableUnicodeStreamReader +from nltk.util import AbstractLazySequence, LazySubsequence, LazyConcatenation, py25 + +###################################################################### +# { Corpus View +###################################################################### + + +class StreamBackedCorpusView(AbstractLazySequence): + """ + A 'view' of a corpus file, which acts like a sequence of tokens: + it can be accessed by index, iterated over, etc. However, the + tokens are only constructed as-needed -- the entire corpus is + never stored in memory at once. + + The constructor to ``StreamBackedCorpusView`` takes two arguments: + a corpus fileid (specified as a string or as a ``PathPointer``); + and a block reader. A "block reader" is a function that reads + zero or more tokens from a stream, and returns them as a list. A + very simple example of a block reader is: + + >>> def simple_block_reader(stream): + ... return stream.readline().split() + + This simple block reader reads a single line at a time, and + returns a single token (consisting of a string) for each + whitespace-separated substring on the line. + + When deciding how to define the block reader for a given + corpus, careful consideration should be given to the size of + blocks handled by the block reader. Smaller block sizes will + increase the memory requirements of the corpus view's internal + data structures (by 2 integers per block). On the other hand, + larger block sizes may decrease performance for random access to + the corpus. (But note that larger block sizes will *not* + decrease performance for iteration.) + + Internally, ``CorpusView`` maintains a partial mapping from token + index to file position, with one entry per block. When a token + with a given index *i* is requested, the ``CorpusView`` constructs + it as follows: + + 1. First, it searches the toknum/filepos mapping for the token + index closest to (but less than or equal to) *i*. + + 2. Then, starting at the file position corresponding to that + index, it reads one block at a time using the block reader + until it reaches the requested token. + + The toknum/filepos mapping is created lazily: it is initially + empty, but every time a new block is read, the block's + initial token is added to the mapping. (Thus, the toknum/filepos + map has one entry per block.) + + In order to increase efficiency for random access patterns that + have high degrees of locality, the corpus view may cache one or + more blocks. + + :note: Each ``CorpusView`` object internally maintains an open file + object for its underlying corpus file. This file should be + automatically closed when the ``CorpusView`` is garbage collected, + but if you wish to close it manually, use the ``close()`` + method. If you access a ``CorpusView``'s items after it has been + closed, the file object will be automatically re-opened. + + :warning: If the contents of the file are modified during the + lifetime of the ``CorpusView``, then the ``CorpusView``'s behavior + is undefined. + + :warning: If a unicode encoding is specified when constructing a + ``CorpusView``, then the block reader may only call + ``stream.seek()`` with offsets that have been returned by + ``stream.tell()``; in particular, calling ``stream.seek()`` with + relative offsets, or with offsets based on string lengths, may + lead to incorrect behavior. + + :ivar _block_reader: The function used to read + a single block from the underlying file stream. + :ivar _toknum: A list containing the token index of each block + that has been processed. In particular, ``_toknum[i]`` is the + token index of the first token in block ``i``. Together + with ``_filepos``, this forms a partial mapping between token + indices and file positions. + :ivar _filepos: A list containing the file position of each block + that has been processed. In particular, ``_toknum[i]`` is the + file position of the first character in block ``i``. Together + with ``_toknum``, this forms a partial mapping between token + indices and file positions. + :ivar _stream: The stream used to access the underlying corpus file. + :ivar _len: The total number of tokens in the corpus, if known; + or None, if the number of tokens is not yet known. + :ivar _eofpos: The character position of the last character in the + file. This is calculated when the corpus view is initialized, + and is used to decide when the end of file has been reached. + :ivar _cache: A cache of the most recently read block. It + is encoded as a tuple (start_toknum, end_toknum, tokens), where + start_toknum is the token index of the first token in the block; + end_toknum is the token index of the first token not in the + block; and tokens is a list of the tokens in the block. + """ + + def __init__(self, fileid, block_reader=None, startpos=0, encoding='utf8'): + """ + Create a new corpus view, based on the file ``fileid``, and + read with ``block_reader``. See the class documentation + for more information. + + :param fileid: The path to the file that is read by this + corpus view. ``fileid`` can either be a string or a + ``PathPointer``. + + :param startpos: The file position at which the view will + start reading. This can be used to skip over preface + sections. + + :param encoding: The unicode encoding that should be used to + read the file's contents. If no encoding is specified, + then the file's contents will be read as a non-unicode + string (i.e., a str). + """ + if block_reader: + self.read_block = block_reader + # Initialize our toknum/filepos mapping. + self._toknum = [0] + self._filepos = [startpos] + self._encoding = encoding + # We don't know our length (number of tokens) yet. + self._len = None + + self._fileid = fileid + self._stream = None + + self._current_toknum = None + """This variable is set to the index of the next token that + will be read, immediately before ``self.read_block()`` is + called. This is provided for the benefit of the block + reader, which under rare circumstances may need to know + the current token number.""" + + self._current_blocknum = None + """This variable is set to the index of the next block that + will be read, immediately before ``self.read_block()`` is + called. This is provided for the benefit of the block + reader, which under rare circumstances may need to know + the current block number.""" + + # Find the length of the file. + try: + if isinstance(self._fileid, PathPointer): + self._eofpos = self._fileid.file_size() + else: + self._eofpos = os.stat(self._fileid).st_size + except Exception as exc: + raise ValueError('Unable to open or access %r -- %s' % (fileid, exc)) + + # Maintain a cache of the most recently read block, to + # increase efficiency of random access. + self._cache = (-1, -1, None) + + fileid = property( + lambda self: self._fileid, + doc=""" + The fileid of the file that is accessed by this view. + + :type: str or PathPointer""", + ) + + def read_block(self, stream): + """ + Read a block from the input stream. + + :return: a block of tokens from the input stream + :rtype: list(any) + :param stream: an input stream + :type stream: stream + """ + raise NotImplementedError('Abstract Method') + + def _open(self): + """ + Open the file stream associated with this corpus view. This + will be called performed if any value is read from the view + while its file stream is closed. + """ + if isinstance(self._fileid, PathPointer): + self._stream = self._fileid.open(self._encoding) + elif self._encoding: + self._stream = SeekableUnicodeStreamReader( + open(self._fileid, 'rb'), self._encoding + ) + else: + self._stream = open(self._fileid, 'rb') + + def close(self): + """ + Close the file stream associated with this corpus view. This + can be useful if you are worried about running out of file + handles (although the stream should automatically be closed + upon garbage collection of the corpus view). If the corpus + view is accessed after it is closed, it will be automatically + re-opened. + """ + if self._stream is not None: + self._stream.close() + self._stream = None + + def __len__(self): + if self._len is None: + # iterate_from() sets self._len when it reaches the end + # of the file: + for tok in self.iterate_from(self._toknum[-1]): + pass + return self._len + + def __getitem__(self, i): + if isinstance(i, slice): + start, stop = slice_bounds(self, i) + # Check if it's in the cache. + offset = self._cache[0] + if offset <= start and stop <= self._cache[1]: + return self._cache[2][start - offset : stop - offset] + # Construct & return the result. + return LazySubsequence(self, start, stop) + else: + # Handle negative indices + if i < 0: + i += len(self) + if i < 0: + raise IndexError('index out of range') + # Check if it's in the cache. + offset = self._cache[0] + if offset <= i < self._cache[1]: + return self._cache[2][i - offset] + # Use iterate_from to extract it. + try: + return next(self.iterate_from(i)) + except StopIteration: + raise IndexError('index out of range') + + # If we wanted to be thread-safe, then this method would need to + # do some locking. + def iterate_from(self, start_tok): + # Start by feeding from the cache, if possible. + if self._cache[0] <= start_tok < self._cache[1]: + for tok in self._cache[2][start_tok - self._cache[0] :]: + yield tok + start_tok += 1 + + # Decide where in the file we should start. If `start` is in + # our mapping, then we can jump straight to the correct block; + # otherwise, start at the last block we've processed. + if start_tok < self._toknum[-1]: + block_index = bisect.bisect_right(self._toknum, start_tok) - 1 + toknum = self._toknum[block_index] + filepos = self._filepos[block_index] + else: + block_index = len(self._toknum) - 1 + toknum = self._toknum[-1] + filepos = self._filepos[-1] + + # Open the stream, if it's not open already. + if self._stream is None: + self._open() + + # If the file is empty, the while loop will never run. + # This *seems* to be all the state we need to set: + if self._eofpos == 0: + self._len = 0 + + # Each iteration through this loop, we read a single block + # from the stream. + while filepos < self._eofpos: + # Read the next block. + self._stream.seek(filepos) + self._current_toknum = toknum + self._current_blocknum = block_index + tokens = self.read_block(self._stream) + assert isinstance(tokens, (tuple, list, AbstractLazySequence)), ( + 'block reader %s() should return list or tuple.' + % self.read_block.__name__ + ) + num_toks = len(tokens) + new_filepos = self._stream.tell() + assert new_filepos > filepos, ( + 'block reader %s() should consume at least 1 byte (filepos=%d)' + % (self.read_block.__name__, filepos) + ) + + # Update our cache. + self._cache = (toknum, toknum + num_toks, list(tokens)) + + # Update our mapping. + assert toknum <= self._toknum[-1] + if num_toks > 0: + block_index += 1 + if toknum == self._toknum[-1]: + assert new_filepos > self._filepos[-1] # monotonic! + self._filepos.append(new_filepos) + self._toknum.append(toknum + num_toks) + else: + # Check for consistency: + assert ( + new_filepos == self._filepos[block_index] + ), 'inconsistent block reader (num chars read)' + assert ( + toknum + num_toks == self._toknum[block_index] + ), 'inconsistent block reader (num tokens returned)' + + # If we reached the end of the file, then update self._len + if new_filepos == self._eofpos: + self._len = toknum + num_toks + # Generate the tokens in this block (but skip any tokens + # before start_tok). Note that between yields, our state + # may be modified. + for tok in tokens[max(0, start_tok - toknum) :]: + yield tok + # If we're at the end of the file, then we're done. + assert new_filepos <= self._eofpos + if new_filepos == self._eofpos: + break + # Update our indices + toknum += num_toks + filepos = new_filepos + + # If we reach this point, then we should know our length. + assert self._len is not None + # Enforce closing of stream once we reached end of file + # We should have reached EOF once we're out of the while loop. + self.close() + + # Use concat for these, so we can use a ConcatenatedCorpusView + # when possible. + def __add__(self, other): + return concat([self, other]) + + def __radd__(self, other): + return concat([other, self]) + + def __mul__(self, count): + return concat([self] * count) + + def __rmul__(self, count): + return concat([self] * count) + + +class ConcatenatedCorpusView(AbstractLazySequence): + """ + A 'view' of a corpus file that joins together one or more + ``StreamBackedCorpusViews``. At most + one file handle is left open at any time. + """ + + def __init__(self, corpus_views): + self._pieces = corpus_views + """A list of the corpus subviews that make up this + concatenation.""" + + self._offsets = [0] + """A list of offsets, indicating the index at which each + subview begins. In particular:: + offsets[i] = sum([len(p) for p in pieces[:i]])""" + + self._open_piece = None + """The most recently accessed corpus subview (or None). + Before a new subview is accessed, this subview will be closed.""" + + def __len__(self): + if len(self._offsets) <= len(self._pieces): + # Iterate to the end of the corpus. + for tok in self.iterate_from(self._offsets[-1]): + pass + + return self._offsets[-1] + + def close(self): + for piece in self._pieces: + piece.close() + + def iterate_from(self, start_tok): + piecenum = bisect.bisect_right(self._offsets, start_tok) - 1 + + while piecenum < len(self._pieces): + offset = self._offsets[piecenum] + piece = self._pieces[piecenum] + + # If we've got another piece open, close it first. + if self._open_piece is not piece: + if self._open_piece is not None: + self._open_piece.close() + self._open_piece = piece + + # Get everything we can from this piece. + for tok in piece.iterate_from(max(0, start_tok - offset)): + yield tok + + # Update the offset table. + if piecenum + 1 == len(self._offsets): + self._offsets.append(self._offsets[-1] + len(piece)) + + # Move on to the next piece. + piecenum += 1 + + +def concat(docs): + """ + Concatenate together the contents of multiple documents from a + single corpus, using an appropriate concatenation function. This + utility function is used by corpus readers when the user requests + more than one document at a time. + """ + if len(docs) == 1: + return docs[0] + if len(docs) == 0: + raise ValueError('concat() expects at least one object!') + + types = set(d.__class__ for d in docs) + + # If they're all strings, use string concatenation. + if all(isinstance(doc, string_types) for doc in docs): + return ''.join(docs) + + # If they're all corpus views, then use ConcatenatedCorpusView. + for typ in types: + if not issubclass(typ, (StreamBackedCorpusView, ConcatenatedCorpusView)): + break + else: + return ConcatenatedCorpusView(docs) + + # If they're all lazy sequences, use a lazy concatenation + for typ in types: + if not issubclass(typ, AbstractLazySequence): + break + else: + return LazyConcatenation(docs) + + # Otherwise, see what we can do: + if len(types) == 1: + typ = list(types)[0] + + if issubclass(typ, list): + return reduce((lambda a, b: a + b), docs, []) + + if issubclass(typ, tuple): + return reduce((lambda a, b: a + b), docs, ()) + + if ElementTree.iselement(typ): + xmltree = ElementTree.Element('documents') + for doc in docs: + xmltree.append(doc) + return xmltree + + # No method found! + raise ValueError("Don't know how to concatenate types: %r" % types) + + +###################################################################### +# { Corpus View for Pickled Sequences +###################################################################### + + +class PickleCorpusView(StreamBackedCorpusView): + """ + A stream backed corpus view for corpus files that consist of + sequences of serialized Python objects (serialized using + ``pickle.dump``). One use case for this class is to store the + result of running feature detection on a corpus to disk. This can + be useful when performing feature detection is expensive (so we + don't want to repeat it); but the corpus is too large to store in + memory. The following example illustrates this technique: + + >>> from nltk.corpus.reader.util import PickleCorpusView + >>> from nltk.util import LazyMap + >>> feature_corpus = LazyMap(detect_features, corpus) # doctest: +SKIP + >>> PickleCorpusView.write(feature_corpus, some_fileid) # doctest: +SKIP + >>> pcv = PickleCorpusView(some_fileid) # doctest: +SKIP + """ + + BLOCK_SIZE = 100 + PROTOCOL = -1 + + def __init__(self, fileid, delete_on_gc=False): + """ + Create a new corpus view that reads the pickle corpus + ``fileid``. + + :param delete_on_gc: If true, then ``fileid`` will be deleted + whenever this object gets garbage-collected. + """ + self._delete_on_gc = delete_on_gc + StreamBackedCorpusView.__init__(self, fileid) + + def read_block(self, stream): + result = [] + for i in range(self.BLOCK_SIZE): + try: + result.append(pickle.load(stream)) + except EOFError: + break + return result + + def __del__(self): + """ + If ``delete_on_gc`` was set to true when this + ``PickleCorpusView`` was created, then delete the corpus view's + fileid. (This method is called whenever a + ``PickledCorpusView`` is garbage-collected. + """ + if getattr(self, '_delete_on_gc'): + if os.path.exists(self._fileid): + try: + os.remove(self._fileid) + except (OSError, IOError): + pass + self.__dict__.clear() # make the garbage collector's job easier + + @classmethod + def write(cls, sequence, output_file): + if isinstance(output_file, string_types): + output_file = open(output_file, 'wb') + for item in sequence: + pickle.dump(item, output_file, cls.PROTOCOL) + + @classmethod + def cache_to_tempfile(cls, sequence, delete_on_gc=True): + """ + Write the given sequence to a temporary file as a pickle + corpus; and then return a ``PickleCorpusView`` view for that + temporary corpus file. + + :param delete_on_gc: If true, then the temporary file will be + deleted whenever this object gets garbage-collected. + """ + try: + fd, output_file_name = tempfile.mkstemp('.pcv', 'nltk-') + output_file = os.fdopen(fd, 'wb') + cls.write(sequence, output_file) + output_file.close() + return PickleCorpusView(output_file_name, delete_on_gc) + except (OSError, IOError) as e: + raise ValueError('Error while creating temp file: %s' % e) + + +###################################################################### +# { Block Readers +###################################################################### + + +def read_whitespace_block(stream): + toks = [] + for i in range(20): # Read 20 lines at a time. + toks.extend(stream.readline().split()) + return toks + + +def read_wordpunct_block(stream): + toks = [] + for i in range(20): # Read 20 lines at a time. + toks.extend(wordpunct_tokenize(stream.readline())) + return toks + + +def read_line_block(stream): + toks = [] + for i in range(20): + line = stream.readline() + if not line: + return toks + toks.append(line.rstrip('\n')) + return toks + + +def read_blankline_block(stream): + s = '' + while True: + line = stream.readline() + # End of file: + if not line: + if s: + return [s] + else: + return [] + # Blank line: + elif line and not line.strip(): + if s: + return [s] + # Other line: + else: + s += line + + +def read_alignedsent_block(stream): + s = '' + while True: + line = stream.readline() + if line[0] == '=' or line[0] == '\n' or line[:2] == '\r\n': + continue + # End of file: + if not line: + if s: + return [s] + else: + return [] + # Other line: + else: + s += line + if re.match('^\d+-\d+', line) is not None: + return [s] + + +def read_regexp_block(stream, start_re, end_re=None): + """ + Read a sequence of tokens from a stream, where tokens begin with + lines that match ``start_re``. If ``end_re`` is specified, then + tokens end with lines that match ``end_re``; otherwise, tokens end + whenever the next line matching ``start_re`` or EOF is found. + """ + # Scan until we find a line matching the start regexp. + while True: + line = stream.readline() + if not line: + return [] # end of file. + if re.match(start_re, line): + break + + # Scan until we find another line matching the regexp, or EOF. + lines = [line] + while True: + oldpos = stream.tell() + line = stream.readline() + # End of file: + if not line: + return [''.join(lines)] + # End of token: + if end_re is not None and re.match(end_re, line): + return [''.join(lines)] + # Start of new token: backup to just before it starts, and + # return the token we've already collected. + if end_re is None and re.match(start_re, line): + stream.seek(oldpos) + return [''.join(lines)] + # Anything else is part of the token. + lines.append(line) + + +def read_sexpr_block(stream, block_size=16384, comment_char=None): + """ + Read a sequence of s-expressions from the stream, and leave the + stream's file position at the end the last complete s-expression + read. This function will always return at least one s-expression, + unless there are no more s-expressions in the file. + + If the file ends in in the middle of an s-expression, then that + incomplete s-expression is returned when the end of the file is + reached. + + :param block_size: The default block size for reading. If an + s-expression is longer than one block, then more than one + block will be read. + :param comment_char: A character that marks comments. Any lines + that begin with this character will be stripped out. + (If spaces or tabs precede the comment character, then the + line will not be stripped.) + """ + start = stream.tell() + block = stream.read(block_size) + encoding = getattr(stream, 'encoding', None) + assert encoding is not None or isinstance(block, text_type) + if encoding not in (None, 'utf-8'): + import warnings + + warnings.warn( + 'Parsing may fail, depending on the properties ' + 'of the %s encoding!' % encoding + ) + # (e.g., the utf-16 encoding does not work because it insists + # on adding BOMs to the beginning of encoded strings.) + + if comment_char: + COMMENT = re.compile('(?m)^%s.*$' % re.escape(comment_char)) + while True: + try: + # If we're stripping comments, then make sure our block ends + # on a line boundary; and then replace any comments with + # space characters. (We can't just strip them out -- that + # would make our offset wrong.) + if comment_char: + block += stream.readline() + block = re.sub(COMMENT, _sub_space, block) + # Read the block. + tokens, offset = _parse_sexpr_block(block) + # Skip whitespace + offset = re.compile(r'\s*').search(block, offset).end() + + # Move to the end position. + if encoding is None: + stream.seek(start + offset) + else: + stream.seek(start + len(block[:offset].encode(encoding))) + + # Return the list of tokens we processed + return tokens + except ValueError as e: + if e.args[0] == 'Block too small': + next_block = stream.read(block_size) + if next_block: + block += next_block + continue + else: + # The file ended mid-sexpr -- return what we got. + return [block.strip()] + else: + raise + + +def _sub_space(m): + """Helper function: given a regexp match, return a string of + spaces that's the same length as the matched string.""" + return ' ' * (m.end() - m.start()) + + +def _parse_sexpr_block(block): + tokens = [] + start = end = 0 + + while end < len(block): + m = re.compile(r'\S').search(block, end) + if not m: + return tokens, end + + start = m.start() + + # Case 1: sexpr is not parenthesized. + if m.group() != '(': + m2 = re.compile(r'[\s(]').search(block, start) + if m2: + end = m2.start() + else: + if tokens: + return tokens, end + raise ValueError('Block too small') + + # Case 2: parenthesized sexpr. + else: + nesting = 0 + for m in re.compile(r'[()]').finditer(block, start): + if m.group() == '(': + nesting += 1 + else: + nesting -= 1 + if nesting == 0: + end = m.end() + break + else: + if tokens: + return tokens, end + raise ValueError('Block too small') + + tokens.append(block[start:end]) + + return tokens, end + + +###################################################################### +# { Finding Corpus Items +###################################################################### + + +def find_corpus_fileids(root, regexp): + if not isinstance(root, PathPointer): + raise TypeError('find_corpus_fileids: expected a PathPointer') + regexp += '$' + + # Find fileids in a zipfile: scan the zipfile's namelist. Filter + # out entries that end in '/' -- they're directories. + if isinstance(root, ZipFilePathPointer): + fileids = [ + name[len(root.entry) :] + for name in root.zipfile.namelist() + if not name.endswith('/') + ] + items = [name for name in fileids if re.match(regexp, name)] + return sorted(items) + + # Find fileids in a directory: use os.walk to search all (proper + # or symlinked) subdirectories, and match paths against the regexp. + elif isinstance(root, FileSystemPathPointer): + items = [] + # workaround for py25 which doesn't support followlinks + kwargs = {} + if not py25(): + kwargs = {'followlinks': True} + for dirname, subdirs, fileids in os.walk(root.path, **kwargs): + prefix = ''.join('%s/' % p for p in _path_from(root.path, dirname)) + items += [ + prefix + fileid + for fileid in fileids + if re.match(regexp, prefix + fileid) + ] + # Don't visit svn directories: + if '.svn' in subdirs: + subdirs.remove('.svn') + return sorted(items) + + else: + raise AssertionError("Don't know how to handle %r" % root) + + +def _path_from(parent, child): + if os.path.split(parent)[1] == '': + parent = os.path.split(parent)[0] + path = [] + while parent != child: + child, dirname = os.path.split(child) + path.insert(0, dirname) + assert os.path.split(child)[0] != child + return path + + +###################################################################### +# { Paragraph structure in Treebank files +###################################################################### + + +def tagged_treebank_para_block_reader(stream): + # Read the next paragraph. + para = '' + while True: + line = stream.readline() + # End of paragraph: + if re.match('======+\s*$', line): + if para.strip(): + return [para] + # End of file: + elif line == '': + if para.strip(): + return [para] + else: + return [] + # Content line: + else: + para += line diff --git a/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/verbnet.py b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/verbnet.py new file mode 100644 index 0000000..d0492f5 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/verbnet.py @@ -0,0 +1,627 @@ +# Natural Language Toolkit: Verbnet Corpus Reader +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Edward Loper +# URL: +# For license information, see LICENSE.TXT + +""" +An NLTK interface to the VerbNet verb lexicon + +For details about VerbNet see: +https://verbs.colorado.edu/~mpalmer/projects/verbnet.html +""" +from __future__ import unicode_literals + +import re +import textwrap +from collections import defaultdict + +from six import string_types + +from nltk.corpus.reader.xmldocs import XMLCorpusReader + + +class VerbnetCorpusReader(XMLCorpusReader): + """ + An NLTK interface to the VerbNet verb lexicon. + + From the VerbNet site: "VerbNet (VN) (Kipper-Schuler 2006) is the largest + on-line verb lexicon currently available for English. It is a hierarchical + domain-independent, broad-coverage verb lexicon with mappings to other + lexical resources such as WordNet (Miller, 1990; Fellbaum, 1998), XTAG + (XTAG Research Group, 2001), and FrameNet (Baker et al., 1998)." + + For details about VerbNet see: + https://verbs.colorado.edu/~mpalmer/projects/verbnet.html + """ + + # No unicode encoding param, since the data files are all XML. + def __init__(self, root, fileids, wrap_etree=False): + XMLCorpusReader.__init__(self, root, fileids, wrap_etree) + + self._lemma_to_class = defaultdict(list) + """A dictionary mapping from verb lemma strings to lists of + VerbNet class identifiers.""" + + self._wordnet_to_class = defaultdict(list) + """A dictionary mapping from wordnet identifier strings to + lists of VerbNet class identifiers.""" + + self._class_to_fileid = {} + """A dictionary mapping from class identifiers to + corresponding file identifiers. The keys of this dictionary + provide a complete list of all classes and subclasses.""" + + self._shortid_to_longid = {} + + # Initialize the dictionaries. Use the quick (regexp-based) + # method instead of the slow (xml-based) method, because it + # runs 2-30 times faster. + self._quick_index() + + _LONGID_RE = re.compile(r'([^\-\.]*)-([\d+.\-]+)$') + """Regular expression that matches (and decomposes) longids""" + + _SHORTID_RE = re.compile(r'[\d+.\-]+$') + """Regular expression that matches shortids""" + + _INDEX_RE = re.compile( + r']+>|' r'' + ) + """Regular expression used by ``_index()`` to quickly scan the corpus + for basic information.""" + + def lemmas(self, vnclass=None): + """ + Return a list of all verb lemmas that appear in any class, or + in the ``classid`` if specified. + """ + if vnclass is None: + return sorted(self._lemma_to_class.keys()) + else: + # [xx] should this include subclass members? + if isinstance(vnclass, string_types): + vnclass = self.vnclass(vnclass) + return [member.get('name') for member in vnclass.findall('MEMBERS/MEMBER')] + + def wordnetids(self, vnclass=None): + """ + Return a list of all wordnet identifiers that appear in any + class, or in ``classid`` if specified. + """ + if vnclass is None: + return sorted(self._wordnet_to_class.keys()) + else: + # [xx] should this include subclass members? + if isinstance(vnclass, string_types): + vnclass = self.vnclass(vnclass) + return sum( + [ + member.get('wn', '').split() + for member in vnclass.findall('MEMBERS/MEMBER') + ], + [], + ) + + def classids(self, lemma=None, wordnetid=None, fileid=None, classid=None): + """ + Return a list of the VerbNet class identifiers. If a file + identifier is specified, then return only the VerbNet class + identifiers for classes (and subclasses) defined by that file. + If a lemma is specified, then return only VerbNet class + identifiers for classes that contain that lemma as a member. + If a wordnetid is specified, then return only identifiers for + classes that contain that wordnetid as a member. If a classid + is specified, then return only identifiers for subclasses of + the specified VerbNet class. + If nothing is specified, return all classids within VerbNet + """ + if fileid is not None: + return [c for (c, f) in self._class_to_fileid.items() if f == fileid] + elif lemma is not None: + return self._lemma_to_class[lemma] + elif wordnetid is not None: + return self._wordnet_to_class[wordnetid] + elif classid is not None: + xmltree = self.vnclass(classid) + return [ + subclass.get('ID') + for subclass in xmltree.findall('SUBCLASSES/VNSUBCLASS') + ] + else: + return sorted(self._class_to_fileid.keys()) + + def vnclass(self, fileid_or_classid): + """Returns VerbNet class ElementTree + + Return an ElementTree containing the xml for the specified + VerbNet class. + + :param fileid_or_classid: An identifier specifying which class + should be returned. Can be a file identifier (such as + ``'put-9.1.xml'``), or a VerbNet class identifier (such as + ``'put-9.1'``) or a short VerbNet class identifier (such as + ``'9.1'``). + """ + # File identifier: just return the xml. + if fileid_or_classid in self._fileids: + return self.xml(fileid_or_classid) + + # Class identifier: get the xml, and find the right elt. + classid = self.longid(fileid_or_classid) + if classid in self._class_to_fileid: + fileid = self._class_to_fileid[self.longid(classid)] + tree = self.xml(fileid) + if classid == tree.get('ID'): + return tree + else: + for subclass in tree.findall('.//VNSUBCLASS'): + if classid == subclass.get('ID'): + return subclass + else: + assert False # we saw it during _index()! + + else: + raise ValueError('Unknown identifier {}'.format(fileid_or_classid)) + + def fileids(self, vnclass_ids=None): + """ + Return a list of fileids that make up this corpus. If + ``vnclass_ids`` is specified, then return the fileids that make + up the specified VerbNet class(es). + """ + if vnclass_ids is None: + return self._fileids + elif isinstance(vnclass_ids, string_types): + return [self._class_to_fileid[self.longid(vnclass_ids)]] + else: + return [ + self._class_to_fileid[self.longid(vnclass_id)] + for vnclass_id in vnclass_ids + ] + + def frames(self, vnclass): + """Given a VerbNet class, this method returns VerbNet frames + + The members returned are: + 1) Example + 2) Description + 3) Syntax + 4) Semantics + + :param vnclass: A VerbNet class identifier; or an ElementTree + containing the xml contents of a VerbNet class. + :return: frames - a list of frame dictionaries + """ + if isinstance(vnclass, string_types): + vnclass = self.vnclass(vnclass) + frames = [] + vnframes = vnclass.findall('FRAMES/FRAME') + for vnframe in vnframes: + frames.append( + { + 'example': self._get_example_within_frame(vnframe), + 'description': self._get_description_within_frame(vnframe), + 'syntax': self._get_syntactic_list_within_frame(vnframe), + 'semantics': self._get_semantics_within_frame(vnframe), + } + ) + return frames + + def subclasses(self, vnclass): + """Returns subclass ids, if any exist + + Given a VerbNet class, this method returns subclass ids (if they exist) + in a list of strings. + + :param vnclass: A VerbNet class identifier; or an ElementTree + containing the xml contents of a VerbNet class. + :return: list of subclasses + """ + if isinstance(vnclass, string_types): + vnclass = self.vnclass(vnclass) + + subclasses = [ + subclass.get('ID') for subclass in vnclass.findall('SUBCLASSES/VNSUBCLASS') + ] + return subclasses + + def themroles(self, vnclass): + """Returns thematic roles participating in a VerbNet class + + Members returned as part of roles are- + 1) Type + 2) Modifiers + + :param vnclass: A VerbNet class identifier; or an ElementTree + containing the xml contents of a VerbNet class. + :return: themroles: A list of thematic roles in the VerbNet class + """ + if isinstance(vnclass, string_types): + vnclass = self.vnclass(vnclass) + + themroles = [] + for trole in vnclass.findall('THEMROLES/THEMROLE'): + themroles.append( + { + 'type': trole.get('type'), + 'modifiers': [ + {'value': restr.get('Value'), 'type': restr.get('type')} + for restr in trole.findall('SELRESTRS/SELRESTR') + ], + } + ) + return themroles + + ###################################################################### + # { Index Initialization + ###################################################################### + + def _index(self): + """ + Initialize the indexes ``_lemma_to_class``, + ``_wordnet_to_class``, and ``_class_to_fileid`` by scanning + through the corpus fileids. This is fast with cElementTree + (<0.1 secs), but quite slow (>10 secs) with the python + implementation of ElementTree. + """ + for fileid in self._fileids: + self._index_helper(self.xml(fileid), fileid) + + def _index_helper(self, xmltree, fileid): + """Helper for ``_index()``""" + vnclass = xmltree.get('ID') + self._class_to_fileid[vnclass] = fileid + self._shortid_to_longid[self.shortid(vnclass)] = vnclass + for member in xmltree.findall('MEMBERS/MEMBER'): + self._lemma_to_class[member.get('name')].append(vnclass) + for wn in member.get('wn', '').split(): + self._wordnet_to_class[wn].append(vnclass) + for subclass in xmltree.findall('SUBCLASSES/VNSUBCLASS'): + self._index_helper(subclass, fileid) + + def _quick_index(self): + """ + Initialize the indexes ``_lemma_to_class``, + ``_wordnet_to_class``, and ``_class_to_fileid`` by scanning + through the corpus fileids. This doesn't do proper xml parsing, + but is good enough to find everything in the standard VerbNet + corpus -- and it runs about 30 times faster than xml parsing + (with the python ElementTree; only 2-3 times faster with + cElementTree). + """ + # nb: if we got rid of wordnet_to_class, this would run 2-3 + # times faster. + for fileid in self._fileids: + vnclass = fileid[:-4] # strip the '.xml' + self._class_to_fileid[vnclass] = fileid + self._shortid_to_longid[self.shortid(vnclass)] = vnclass + for m in self._INDEX_RE.finditer(self.open(fileid).read()): + groups = m.groups() + if groups[0] is not None: + self._lemma_to_class[groups[0]].append(vnclass) + for wn in groups[1].split(): + self._wordnet_to_class[wn].append(vnclass) + elif groups[2] is not None: + self._class_to_fileid[groups[2]] = fileid + vnclass = groups[2] # for elts. + self._shortid_to_longid[self.shortid(vnclass)] = vnclass + else: + assert False, 'unexpected match condition' + + ###################################################################### + # { Identifier conversion + ###################################################################### + + def longid(self, shortid): + """Returns longid of a VerbNet class + + Given a short VerbNet class identifier (eg '37.10'), map it + to a long id (eg 'confess-37.10'). If ``shortid`` is already a + long id, then return it as-is""" + if self._LONGID_RE.match(shortid): + return shortid # it's already a longid. + elif not self._SHORTID_RE.match(shortid): + raise ValueError('vnclass identifier %r not found' % shortid) + try: + return self._shortid_to_longid[shortid] + except KeyError: + raise ValueError('vnclass identifier %r not found' % shortid) + + def shortid(self, longid): + """Returns shortid of a VerbNet class + + Given a long VerbNet class identifier (eg 'confess-37.10'), + map it to a short id (eg '37.10'). If ``longid`` is already a + short id, then return it as-is.""" + if self._SHORTID_RE.match(longid): + return longid # it's already a shortid. + m = self._LONGID_RE.match(longid) + if m: + return m.group(2) + else: + raise ValueError('vnclass identifier %r not found' % longid) + + ###################################################################### + # { Frame access utility functions + ###################################################################### + + def _get_semantics_within_frame(self, vnframe): + """Returns semantics within a single frame + + A utility function to retrieve semantics within a frame in VerbNet + Members of the semantics dictionary: + 1) Predicate value + 2) Arguments + + :param vnframe: An ElementTree containing the xml contents of + a VerbNet frame. + :return: semantics: semantics dictionary + """ + semantics_within_single_frame = [] + for pred in vnframe.findall('SEMANTICS/PRED'): + arguments = [ + {'type': arg.get('type'), 'value': arg.get('value')} + for arg in pred.findall('ARGS/ARG') + ] + semantics_within_single_frame.append( + {'predicate_value': pred.get('value'), 'arguments': arguments} + ) + return semantics_within_single_frame + + def _get_example_within_frame(self, vnframe): + """Returns example within a frame + + A utility function to retrieve an example within a frame in VerbNet. + + :param vnframe: An ElementTree containing the xml contents of + a VerbNet frame. + :return: example_text: The example sentence for this particular frame + """ + example_element = vnframe.find('EXAMPLES/EXAMPLE') + if example_element is not None: + example_text = example_element.text + else: + example_text = "" + return example_text + + def _get_description_within_frame(self, vnframe): + """Returns member description within frame + + A utility function to retrieve a description of participating members + within a frame in VerbNet. + + :param vnframe: An ElementTree containing the xml contents of + a VerbNet frame. + :return: description: a description dictionary with members - primary and secondary + """ + description_element = vnframe.find('DESCRIPTION') + return { + 'primary': description_element.attrib['primary'], + 'secondary': description_element.get('secondary', ''), + } + + def _get_syntactic_list_within_frame(self, vnframe): + """Returns semantics within a frame + + A utility function to retrieve semantics within a frame in VerbNet. + Members of the syntactic dictionary: + 1) POS Tag + 2) Modifiers + + :param vnframe: An ElementTree containing the xml contents of + a VerbNet frame. + :return: syntax_within_single_frame + """ + syntax_within_single_frame = [] + for elt in vnframe.find('SYNTAX'): + pos_tag = elt.tag + modifiers = dict() + modifiers['value'] = elt.get('value') if 'value' in elt.attrib else "" + modifiers['selrestrs'] = [ + {'value': restr.get('Value'), 'type': restr.get('type')} + for restr in elt.findall('SELRESTRS/SELRESTR') + ] + modifiers['synrestrs'] = [ + {'value': restr.get('Value'), 'type': restr.get('type')} + for restr in elt.findall('SYNRESTRS/SYNRESTR') + ] + syntax_within_single_frame.append( + {'pos_tag': pos_tag, 'modifiers': modifiers} + ) + return syntax_within_single_frame + + ###################################################################### + # { Pretty Printing + ###################################################################### + + def pprint(self, vnclass): + """Returns pretty printed version of a VerbNet class + + Return a string containing a pretty-printed representation of + the given VerbNet class. + + :param vnclass: A VerbNet class identifier; or an ElementTree + containing the xml contents of a VerbNet class. + """ + if isinstance(vnclass, string_types): + vnclass = self.vnclass(vnclass) + + s = vnclass.get('ID') + '\n' + s += self.pprint_subclasses(vnclass, indent=' ') + '\n' + s += self.pprint_members(vnclass, indent=' ') + '\n' + s += ' Thematic roles:\n' + s += self.pprint_themroles(vnclass, indent=' ') + '\n' + s += ' Frames:\n' + s += self.pprint_frames(vnclass, indent=' ') + return s + + def pprint_subclasses(self, vnclass, indent=''): + """Returns pretty printed version of subclasses of VerbNet class + + Return a string containing a pretty-printed representation of + the given VerbNet class's subclasses. + + :param vnclass: A VerbNet class identifier; or an ElementTree + containing the xml contents of a VerbNet class. + """ + if isinstance(vnclass, string_types): + vnclass = self.vnclass(vnclass) + + subclasses = self.subclasses(vnclass) + if not subclasses: + subclasses = ['(none)'] + s = 'Subclasses: ' + ' '.join(subclasses) + return textwrap.fill( + s, 70, initial_indent=indent, subsequent_indent=indent + ' ' + ) + + def pprint_members(self, vnclass, indent=''): + """Returns pretty printed version of members in a VerbNet class + + Return a string containing a pretty-printed representation of + the given VerbNet class's member verbs. + + :param vnclass: A VerbNet class identifier; or an ElementTree + containing the xml contents of a VerbNet class. + """ + if isinstance(vnclass, string_types): + vnclass = self.vnclass(vnclass) + + members = self.lemmas(vnclass) + if not members: + members = ['(none)'] + s = 'Members: ' + ' '.join(members) + return textwrap.fill( + s, 70, initial_indent=indent, subsequent_indent=indent + ' ' + ) + + def pprint_themroles(self, vnclass, indent=''): + """Returns pretty printed version of thematic roles in a VerbNet class + + Return a string containing a pretty-printed representation of + the given VerbNet class's thematic roles. + + :param vnclass: A VerbNet class identifier; or an ElementTree + containing the xml contents of a VerbNet class. + """ + if isinstance(vnclass, string_types): + vnclass = self.vnclass(vnclass) + + pieces = [] + for themrole in self.themroles(vnclass): + piece = indent + '* ' + themrole.get('type') + modifiers = [ + modifier['value'] + modifier['type'] + for modifier in themrole['modifiers'] + ] + if modifiers: + piece += '[{}]'.format(' '.join(modifiers)) + pieces.append(piece) + return '\n'.join(pieces) + + def pprint_frames(self, vnclass, indent=''): + """Returns pretty version of all frames in a VerbNet class + + Return a string containing a pretty-printed representation of + the list of frames within the VerbNet class. + + :param vnclass: A VerbNet class identifier; or an ElementTree + containing the xml contents of a VerbNet class. + """ + if isinstance(vnclass, string_types): + vnclass = self.vnclass(vnclass) + pieces = [] + for vnframe in self.frames(vnclass): + pieces.append(self._pprint_single_frame(vnframe, indent)) + return '\n'.join(pieces) + + def _pprint_single_frame(self, vnframe, indent=''): + """Returns pretty printed version of a single frame in a VerbNet class + + Returns a string containing a pretty-printed representation of + the given frame. + + :param vnframe: An ElementTree containing the xml contents of + a VerbNet frame. + """ + frame_string = self._pprint_description_within_frame(vnframe, indent) + '\n' + frame_string += self._pprint_example_within_frame(vnframe, indent + ' ') + '\n' + frame_string += ( + self._pprint_syntax_within_frame(vnframe, indent + ' Syntax: ') + '\n' + ) + frame_string += indent + ' Semantics:\n' + frame_string += self._pprint_semantics_within_frame(vnframe, indent + ' ') + return frame_string + + def _pprint_example_within_frame(self, vnframe, indent=''): + """Returns pretty printed version of example within frame in a VerbNet class + + Return a string containing a pretty-printed representation of + the given VerbNet frame example. + + :param vnframe: An ElementTree containing the xml contents of + a Verbnet frame. + """ + if vnframe['example']: + return indent + ' Example: ' + vnframe['example'] + + def _pprint_description_within_frame(self, vnframe, indent=''): + """Returns pretty printed version of a VerbNet frame description + + Return a string containing a pretty-printed representation of + the given VerbNet frame description. + + :param vnframe: An ElementTree containing the xml contents of + a VerbNet frame. + """ + description = indent + vnframe['description']['primary'] + if vnframe['description']['secondary']: + description += ' ({})'.format(vnframe['description']['secondary']) + return description + + def _pprint_syntax_within_frame(self, vnframe, indent=''): + """Returns pretty printed version of syntax within a frame in a VerbNet class + + Return a string containing a pretty-printed representation of + the given VerbNet frame syntax. + + :param vnframe: An ElementTree containing the xml contents of + a VerbNet frame. + """ + pieces = [] + for element in vnframe['syntax']: + piece = element['pos_tag'] + modifier_list = [] + if 'value' in element['modifiers'] and element['modifiers']['value']: + modifier_list.append(element['modifiers']['value']) + modifier_list += [ + '{}{}'.format(restr['value'], restr['type']) + for restr in ( + element['modifiers']['selrestrs'] + + element['modifiers']['synrestrs'] + ) + ] + if modifier_list: + piece += '[{}]'.format(' '.join(modifier_list)) + pieces.append(piece) + + return indent + ' '.join(pieces) + + def _pprint_semantics_within_frame(self, vnframe, indent=''): + """Returns a pretty printed version of semantics within frame in a VerbNet class + + Return a string containing a pretty-printed representation of + the given VerbNet frame semantics. + + :param vnframe: An ElementTree containing the xml contents of + a VerbNet frame. + """ + pieces = [] + for predicate in vnframe['semantics']: + arguments = [argument['value'] for argument in predicate['arguments']] + pieces.append( + '{}({})'.format(predicate['predicate_value'], ', '.join(arguments)) + ) + return '\n'.join('{}* {}'.format(indent, piece) for piece in pieces) diff --git a/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/wordlist.py b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/wordlist.py new file mode 100644 index 0000000..31332d7 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/wordlist.py @@ -0,0 +1,176 @@ +# -*- coding: utf-8 -*- +# Natural Language Toolkit: Word List Corpus Reader +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Steven Bird +# Edward Loper +# URL: +# For license information, see LICENSE.TXT +from six import string_types + +from nltk.tokenize import line_tokenize + +from nltk.corpus.reader.util import * +from nltk.corpus.reader.api import * + + +class WordListCorpusReader(CorpusReader): + """ + List of words, one per line. Blank lines are ignored. + """ + + def words(self, fileids=None, ignore_lines_startswith='\n'): + return [ + line + for line in line_tokenize(self.raw(fileids)) + if not line.startswith(ignore_lines_startswith) + ] + + def raw(self, fileids=None): + if fileids is None: + fileids = self._fileids + elif isinstance(fileids, string_types): + fileids = [fileids] + return concat([self.open(f).read() for f in fileids]) + + +class SwadeshCorpusReader(WordListCorpusReader): + def entries(self, fileids=None): + """ + :return: a tuple of words for the specified fileids. + """ + if not fileids: + fileids = self.fileids() + + wordlists = [self.words(f) for f in fileids] + return list(zip(*wordlists)) + + +class NonbreakingPrefixesCorpusReader(WordListCorpusReader): + """ + This is a class to read the nonbreaking prefixes textfiles from the + Moses Machine Translation toolkit. These lists are used in the Python port + of the Moses' word tokenizer. + """ + + available_langs = { + 'catalan': 'ca', + 'czech': 'cs', + 'german': 'de', + 'greek': 'el', + 'english': 'en', + 'spanish': 'es', + 'finnish': 'fi', + 'french': 'fr', + 'hungarian': 'hu', + 'icelandic': 'is', + 'italian': 'it', + 'latvian': 'lv', + 'dutch': 'nl', + 'polish': 'pl', + 'portuguese': 'pt', + 'romanian': 'ro', + 'russian': 'ru', + 'slovak': 'sk', + 'slovenian': 'sl', + 'swedish': 'sv', + 'tamil': 'ta', + } + # Also, add the lang IDs as the keys. + available_langs.update({v: v for v in available_langs.values()}) + + def words(self, lang=None, fileids=None, ignore_lines_startswith='#'): + """ + This module returns a list of nonbreaking prefixes for the specified + language(s). + + >>> from nltk.corpus import nonbreaking_prefixes as nbp + >>> nbp.words('en')[:10] == [u'A', u'B', u'C', u'D', u'E', u'F', u'G', u'H', u'I', u'J'] + True + >>> nbp.words('ta')[:5] == [u'\u0b85', u'\u0b86', u'\u0b87', u'\u0b88', u'\u0b89'] + True + + :return: a list words for the specified language(s). + """ + # If *lang* in list of languages available, allocate apt fileid. + # Otherwise, the function returns non-breaking prefixes for + # all languages when fileids==None. + if lang in self.available_langs: + lang = self.available_langs[lang] + fileids = ['nonbreaking_prefix.' + lang] + return [ + line + for line in line_tokenize(self.raw(fileids)) + if not line.startswith(ignore_lines_startswith) + ] + + +class UnicharsCorpusReader(WordListCorpusReader): + """ + This class is used to read lists of characters from the Perl Unicode + Properties (see http://perldoc.perl.org/perluniprops.html). + The files in the perluniprop.zip are extracted using the Unicode::Tussle + module from http://search.cpan.org/~bdfoy/Unicode-Tussle-1.11/lib/Unicode/Tussle.pm + """ + + # These are categories similar to the Perl Unicode Properties + available_categories = [ + 'Close_Punctuation', + 'Currency_Symbol', + 'IsAlnum', + 'IsAlpha', + 'IsLower', + 'IsN', + 'IsSc', + 'IsSo', + 'IsUpper', + 'Line_Separator', + 'Number', + 'Open_Punctuation', + 'Punctuation', + 'Separator', + 'Symbol', + ] + + def chars(self, category=None, fileids=None): + """ + This module returns a list of characters from the Perl Unicode Properties. + They are very useful when porting Perl tokenizers to Python. + + >>> from nltk.corpus import perluniprops as pup + >>> pup.chars('Open_Punctuation')[:5] == [u'(', u'[', u'{', u'\u0f3a', u'\u0f3c'] + True + >>> pup.chars('Currency_Symbol')[:5] == [u'$', u'\xa2', u'\xa3', u'\xa4', u'\xa5'] + True + >>> pup.available_categories + ['Close_Punctuation', 'Currency_Symbol', 'IsAlnum', 'IsAlpha', 'IsLower', 'IsN', 'IsSc', 'IsSo', 'IsUpper', 'Line_Separator', 'Number', 'Open_Punctuation', 'Punctuation', 'Separator', 'Symbol'] + + :return: a list of characters given the specific unicode character category + """ + if category in self.available_categories: + fileids = [category + '.txt'] + return list(self.raw(fileids).strip()) + + +class MWAPPDBCorpusReader(WordListCorpusReader): + """ + This class is used to read the list of word pairs from the subset of lexical + pairs of The Paraphrase Database (PPDB) XXXL used in the Monolingual Word + Alignment (MWA) algorithm described in Sultan et al. (2014a, 2014b, 2015): + - http://acl2014.org/acl2014/Q14/pdf/Q14-1017 + - http://www.aclweb.org/anthology/S14-2039 + - http://www.aclweb.org/anthology/S15-2027 + + The original source of the full PPDB corpus can be found on + http://www.cis.upenn.edu/~ccb/ppdb/ + + :return: a list of tuples of similar lexical terms. + """ + + mwa_ppdb_xxxl_file = 'ppdb-1.0-xxxl-lexical.extended.synonyms.uniquepairs' + + def entries(self, fileids=mwa_ppdb_xxxl_file): + """ + :return: a tuple of synonym word pairs. + """ + return [tuple(line.split('\t')) for line in line_tokenize(self.raw(fileids))] diff --git a/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/wordnet.py b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/wordnet.py new file mode 100644 index 0000000..df2098c --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/wordnet.py @@ -0,0 +1,2178 @@ +# -*- coding: utf-8 -*- +# Natural Language Toolkit: WordNet +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Steven Bethard +# Steven Bird +# Edward Loper +# Nitin Madnani +# Nasruddin A’aidil Shari +# Sim Wei Ying Geraldine +# Soe Lynn +# Francis Bond +# URL: +# For license information, see LICENSE.TXT + +""" +An NLTK interface for WordNet + +WordNet is a lexical database of English. +Using synsets, helps find conceptual relationships between words +such as hypernyms, hyponyms, synonyms, antonyms etc. + +For details about WordNet see: +http://wordnet.princeton.edu/ + +This module also allows you to find lemmas in languages +other than English from the Open Multilingual Wordnet +http://compling.hss.ntu.edu.sg/omw/ + +""" + +from __future__ import print_function, unicode_literals + +import math +import re +from itertools import islice, chain +from functools import total_ordering +from operator import itemgetter +from collections import defaultdict, deque + +from six import iteritems +from six.moves import range + +from nltk.corpus.reader import CorpusReader +from nltk.util import binary_search_file as _binary_search_file +from nltk.probability import FreqDist +from nltk.compat import python_2_unicode_compatible +from nltk.internals import deprecated + +###################################################################### +# Table of Contents +###################################################################### +# - Constants +# - Data Classes +# - WordNetError +# - Lemma +# - Synset +# - WordNet Corpus Reader +# - WordNet Information Content Corpus Reader +# - Similarity Metrics +# - Demo + +###################################################################### +# Constants +###################################################################### + +#: Positive infinity (for similarity functions) +_INF = 1e300 + +# { Part-of-speech constants +ADJ, ADJ_SAT, ADV, NOUN, VERB = 'a', 's', 'r', 'n', 'v' +# } + +POS_LIST = [NOUN, VERB, ADJ, ADV] + +# A table of strings that are used to express verb frames. +VERB_FRAME_STRINGS = ( + None, + "Something %s", + "Somebody %s", + "It is %sing", + "Something is %sing PP", + "Something %s something Adjective/Noun", + "Something %s Adjective/Noun", + "Somebody %s Adjective", + "Somebody %s something", + "Somebody %s somebody", + "Something %s somebody", + "Something %s something", + "Something %s to somebody", + "Somebody %s on something", + "Somebody %s somebody something", + "Somebody %s something to somebody", + "Somebody %s something from somebody", + "Somebody %s somebody with something", + "Somebody %s somebody of something", + "Somebody %s something on somebody", + "Somebody %s somebody PP", + "Somebody %s something PP", + "Somebody %s PP", + "Somebody's (body part) %s", + "Somebody %s somebody to INFINITIVE", + "Somebody %s somebody INFINITIVE", + "Somebody %s that CLAUSE", + "Somebody %s to somebody", + "Somebody %s to INFINITIVE", + "Somebody %s whether INFINITIVE", + "Somebody %s somebody into V-ing something", + "Somebody %s something with something", + "Somebody %s INFINITIVE", + "Somebody %s VERB-ing", + "It %s that CLAUSE", + "Something %s INFINITIVE", +) + +SENSENUM_RE = re.compile(r'\.[\d]+\.') + + +###################################################################### +# Data Classes +###################################################################### + + +class WordNetError(Exception): + """An exception class for wordnet-related errors.""" + + +@total_ordering +class _WordNetObject(object): + """A common base class for lemmas and synsets.""" + + def hypernyms(self): + return self._related('@') + + def _hypernyms(self): + return self._related('@') + + def instance_hypernyms(self): + return self._related('@i') + + def _instance_hypernyms(self): + return self._related('@i') + + def hyponyms(self): + return self._related('~') + + def instance_hyponyms(self): + return self._related('~i') + + def member_holonyms(self): + return self._related('#m') + + def substance_holonyms(self): + return self._related('#s') + + def part_holonyms(self): + return self._related('#p') + + def member_meronyms(self): + return self._related('%m') + + def substance_meronyms(self): + return self._related('%s') + + def part_meronyms(self): + return self._related('%p') + + def topic_domains(self): + return self._related(';c') + + def in_topic_domains(self): + return self._related('-c') + + def region_domains(self): + return self._related(';r') + + def in_region_domains(self): + return self._related('-r') + + def usage_domains(self): + return self._related(';u') + + def in_usage_domains(self): + return self._related('-u') + + def attributes(self): + return self._related('=') + + def entailments(self): + return self._related('*') + + def causes(self): + return self._related('>') + + def also_sees(self): + return self._related('^') + + def verb_groups(self): + return self._related('$') + + def similar_tos(self): + return self._related('&') + + def __hash__(self): + return hash(self._name) + + def __eq__(self, other): + return self._name == other._name + + def __ne__(self, other): + return self._name != other._name + + def __lt__(self, other): + return self._name < other._name + + +@python_2_unicode_compatible +class Lemma(_WordNetObject): + """ + The lexical entry for a single morphological form of a + sense-disambiguated word. + + Create a Lemma from a "..." string where: + is the morphological stem identifying the synset + is one of the module attributes ADJ, ADJ_SAT, ADV, NOUN or VERB + is the sense number, counting from 0. + is the morphological form of interest + + Note that and can be different, e.g. the Synset + 'salt.n.03' has the Lemmas 'salt.n.03.salt', 'salt.n.03.saltiness' and + 'salt.n.03.salinity'. + + Lemma attributes, accessible via methods with the same name: + + - name: The canonical name of this lemma. + - synset: The synset that this lemma belongs to. + - syntactic_marker: For adjectives, the WordNet string identifying the + syntactic position relative modified noun. See: + https://wordnet.princeton.edu/documentation/wninput5wn + For all other parts of speech, this attribute is None. + - count: The frequency of this lemma in wordnet. + + Lemma methods: + + Lemmas have the following methods for retrieving related Lemmas. They + correspond to the names for the pointer symbols defined here: + https://wordnet.princeton.edu/documentation/wninput5wn + These methods all return lists of Lemmas: + + - antonyms + - hypernyms, instance_hypernyms + - hyponyms, instance_hyponyms + - member_holonyms, substance_holonyms, part_holonyms + - member_meronyms, substance_meronyms, part_meronyms + - topic_domains, region_domains, usage_domains + - attributes + - derivationally_related_forms + - entailments + - causes + - also_sees + - verb_groups + - similar_tos + - pertainyms + """ + + __slots__ = [ + '_wordnet_corpus_reader', + '_name', + '_syntactic_marker', + '_synset', + '_frame_strings', + '_frame_ids', + '_lexname_index', + '_lex_id', + '_lang', + '_key', + ] + + def __init__( + self, + wordnet_corpus_reader, + synset, + name, + lexname_index, + lex_id, + syntactic_marker, + ): + self._wordnet_corpus_reader = wordnet_corpus_reader + self._name = name + self._syntactic_marker = syntactic_marker + self._synset = synset + self._frame_strings = [] + self._frame_ids = [] + self._lexname_index = lexname_index + self._lex_id = lex_id + self._lang = 'eng' + + self._key = None # gets set later. + + def name(self): + return self._name + + def syntactic_marker(self): + return self._syntactic_marker + + def synset(self): + return self._synset + + def frame_strings(self): + return self._frame_strings + + def frame_ids(self): + return self._frame_ids + + def lang(self): + return self._lang + + def key(self): + return self._key + + def __repr__(self): + tup = type(self).__name__, self._synset._name, self._name + return "%s('%s.%s')" % tup + + def _related(self, relation_symbol): + get_synset = self._wordnet_corpus_reader.synset_from_pos_and_offset + if (self._name, relation_symbol) not in self._synset._lemma_pointers: + return [] + return [ + get_synset(pos, offset)._lemmas[lemma_index] + for pos, offset, lemma_index in self._synset._lemma_pointers[ + self._name, relation_symbol + ] + ] + + def count(self): + """Return the frequency count for this Lemma""" + return self._wordnet_corpus_reader.lemma_count(self) + + def antonyms(self): + return self._related('!') + + def derivationally_related_forms(self): + return self._related('+') + + def pertainyms(self): + return self._related('\\') + + +@python_2_unicode_compatible +class Synset(_WordNetObject): + """Create a Synset from a ".." string where: + is the word's morphological stem + is one of the module attributes ADJ, ADJ_SAT, ADV, NOUN or VERB + is the sense number, counting from 0. + + Synset attributes, accessible via methods with the same name: + + - name: The canonical name of this synset, formed using the first lemma + of this synset. Note that this may be different from the name + passed to the constructor if that string used a different lemma to + identify the synset. + - pos: The synset's part of speech, matching one of the module level + attributes ADJ, ADJ_SAT, ADV, NOUN or VERB. + - lemmas: A list of the Lemma objects for this synset. + - definition: The definition for this synset. + - examples: A list of example strings for this synset. + - offset: The offset in the WordNet dict file of this synset. + - lexname: The name of the lexicographer file containing this synset. + + Synset methods: + + Synsets have the following methods for retrieving related Synsets. + They correspond to the names for the pointer symbols defined here: + https://wordnet.princeton.edu/documentation/wninput5wn + These methods all return lists of Synsets. + + - hypernyms, instance_hypernyms + - hyponyms, instance_hyponyms + - member_holonyms, substance_holonyms, part_holonyms + - member_meronyms, substance_meronyms, part_meronyms + - attributes + - entailments + - causes + - also_sees + - verb_groups + - similar_tos + + Additionally, Synsets support the following methods specific to the + hypernym relation: + + - root_hypernyms + - common_hypernyms + - lowest_common_hypernyms + + Note that Synsets do not support the following relations because + these are defined by WordNet as lexical relations: + + - antonyms + - derivationally_related_forms + - pertainyms + """ + + __slots__ = [ + '_pos', + '_offset', + '_name', + '_frame_ids', + '_lemmas', + '_lemma_names', + '_definition', + '_examples', + '_lexname', + '_pointers', + '_lemma_pointers', + '_max_depth', + '_min_depth', + ] + + def __init__(self, wordnet_corpus_reader): + self._wordnet_corpus_reader = wordnet_corpus_reader + # All of these attributes get initialized by + # WordNetCorpusReader._synset_from_pos_and_line() + + self._pos = None + self._offset = None + self._name = None + self._frame_ids = [] + self._lemmas = [] + self._lemma_names = [] + self._definition = None + self._examples = [] + self._lexname = None # lexicographer name + self._all_hypernyms = None + + self._pointers = defaultdict(set) + self._lemma_pointers = defaultdict(list) + + def pos(self): + return self._pos + + def offset(self): + return self._offset + + def name(self): + return self._name + + def frame_ids(self): + return self._frame_ids + + def definition(self): + return self._definition + + def examples(self): + return self._examples + + def lexname(self): + return self._lexname + + def _needs_root(self): + if self._pos == NOUN: + if self._wordnet_corpus_reader.get_version() == '1.6': + return True + else: + return False + elif self._pos == VERB: + return True + + def lemma_names(self, lang='eng'): + '''Return all the lemma_names associated with the synset''' + if lang == 'eng': + return self._lemma_names + else: + self._wordnet_corpus_reader._load_lang_data(lang) + + i = self._wordnet_corpus_reader.ss2of(self, lang) + if i in self._wordnet_corpus_reader._lang_data[lang][0]: + return self._wordnet_corpus_reader._lang_data[lang][0][i] + else: + return [] + + def lemmas(self, lang='eng'): + '''Return all the lemma objects associated with the synset''' + if lang == 'eng': + return self._lemmas + else: + self._wordnet_corpus_reader._load_lang_data(lang) + lemmark = [] + lemmy = self.lemma_names(lang) + for lem in lemmy: + temp = Lemma( + self._wordnet_corpus_reader, + self, + lem, + self._wordnet_corpus_reader._lexnames.index(self.lexname()), + 0, + None, + ) + temp._lang = lang + lemmark.append(temp) + return lemmark + + def root_hypernyms(self): + """Get the topmost hypernyms of this synset in WordNet.""" + + result = [] + seen = set() + todo = [self] + while todo: + next_synset = todo.pop() + if next_synset not in seen: + seen.add(next_synset) + next_hypernyms = ( + next_synset.hypernyms() + next_synset.instance_hypernyms() + ) + if not next_hypernyms: + result.append(next_synset) + else: + todo.extend(next_hypernyms) + return result + + # Simpler implementation which makes incorrect assumption that + # hypernym hierarchy is acyclic: + # + # if not self.hypernyms(): + # return [self] + # else: + # return list(set(root for h in self.hypernyms() + # for root in h.root_hypernyms())) + def max_depth(self): + """ + :return: The length of the longest hypernym path from this + synset to the root. + """ + + if "_max_depth" not in self.__dict__: + hypernyms = self.hypernyms() + self.instance_hypernyms() + if not hypernyms: + self._max_depth = 0 + else: + self._max_depth = 1 + max(h.max_depth() for h in hypernyms) + return self._max_depth + + def min_depth(self): + """ + :return: The length of the shortest hypernym path from this + synset to the root. + """ + + if "_min_depth" not in self.__dict__: + hypernyms = self.hypernyms() + self.instance_hypernyms() + if not hypernyms: + self._min_depth = 0 + else: + self._min_depth = 1 + min(h.min_depth() for h in hypernyms) + return self._min_depth + + def closure(self, rel, depth=-1): + """Return the transitive closure of source under the rel + relationship, breadth-first + + >>> from nltk.corpus import wordnet as wn + >>> dog = wn.synset('dog.n.01') + >>> hyp = lambda s:s.hypernyms() + >>> list(dog.closure(hyp)) + [Synset('canine.n.02'), Synset('domestic_animal.n.01'), + Synset('carnivore.n.01'), Synset('animal.n.01'), + Synset('placental.n.01'), Synset('organism.n.01'), + Synset('mammal.n.01'), Synset('living_thing.n.01'), + Synset('vertebrate.n.01'), Synset('whole.n.02'), + Synset('chordate.n.01'), Synset('object.n.01'), + Synset('physical_entity.n.01'), Synset('entity.n.01')] + + """ + from nltk.util import breadth_first + + synset_offsets = [] + for synset in breadth_first(self, rel, depth): + if synset._offset != self._offset: + if synset._offset not in synset_offsets: + synset_offsets.append(synset._offset) + yield synset + + def hypernym_paths(self): + """ + Get the path(s) from this synset to the root, where each path is a + list of the synset nodes traversed on the way to the root. + + :return: A list of lists, where each list gives the node sequence + connecting the initial ``Synset`` node and a root node. + """ + paths = [] + + hypernyms = self.hypernyms() + self.instance_hypernyms() + if len(hypernyms) == 0: + paths = [[self]] + + for hypernym in hypernyms: + for ancestor_list in hypernym.hypernym_paths(): + ancestor_list.append(self) + paths.append(ancestor_list) + return paths + + def common_hypernyms(self, other): + """ + Find all synsets that are hypernyms of this synset and the + other synset. + + :type other: Synset + :param other: other input synset. + :return: The synsets that are hypernyms of both synsets. + """ + if not self._all_hypernyms: + self._all_hypernyms = set( + self_synset + for self_synsets in self._iter_hypernym_lists() + for self_synset in self_synsets + ) + if not other._all_hypernyms: + other._all_hypernyms = set( + other_synset + for other_synsets in other._iter_hypernym_lists() + for other_synset in other_synsets + ) + return list(self._all_hypernyms.intersection(other._all_hypernyms)) + + def lowest_common_hypernyms(self, other, simulate_root=False, use_min_depth=False): + """ + Get a list of lowest synset(s) that both synsets have as a hypernym. + When `use_min_depth == False` this means that the synset which appears + as a hypernym of both `self` and `other` with the lowest maximum depth + is returned or if there are multiple such synsets at the same depth + they are all returned + + However, if `use_min_depth == True` then the synset(s) which has/have + the lowest minimum depth and appear(s) in both paths is/are returned. + + By setting the use_min_depth flag to True, the behavior of NLTK2 can be + preserved. This was changed in NLTK3 to give more accurate results in a + small set of cases, generally with synsets concerning people. (eg: + 'chef.n.01', 'fireman.n.01', etc.) + + This method is an implementation of Ted Pedersen's "Lowest Common + Subsumer" method from the Perl Wordnet module. It can return either + "self" or "other" if they are a hypernym of the other. + + :type other: Synset + :param other: other input synset + :type simulate_root: bool + :param simulate_root: The various verb taxonomies do not + share a single root which disallows this metric from working for + synsets that are not connected. This flag (False by default) + creates a fake root that connects all the taxonomies. Set it + to True to enable this behavior. For the noun taxonomy, + there is usually a default root except for WordNet version 1.6. + If you are using wordnet 1.6, a fake root will need to be added + for nouns as well. + :type use_min_depth: bool + :param use_min_depth: This setting mimics older (v2) behavior of NLTK + wordnet If True, will use the min_depth function to calculate the + lowest common hypernyms. This is known to give strange results for + some synset pairs (eg: 'chef.n.01', 'fireman.n.01') but is retained + for backwards compatibility + :return: The synsets that are the lowest common hypernyms of both + synsets + """ + synsets = self.common_hypernyms(other) + if simulate_root: + fake_synset = Synset(None) + fake_synset._name = '*ROOT*' + fake_synset.hypernyms = lambda: [] + fake_synset.instance_hypernyms = lambda: [] + synsets.append(fake_synset) + + try: + if use_min_depth: + max_depth = max(s.min_depth() for s in synsets) + unsorted_lch = [s for s in synsets if s.min_depth() == max_depth] + else: + max_depth = max(s.max_depth() for s in synsets) + unsorted_lch = [s for s in synsets if s.max_depth() == max_depth] + return sorted(unsorted_lch) + except ValueError: + return [] + + def hypernym_distances(self, distance=0, simulate_root=False): + """ + Get the path(s) from this synset to the root, counting the distance + of each node from the initial node on the way. A set of + (synset, distance) tuples is returned. + + :type distance: int + :param distance: the distance (number of edges) from this hypernym to + the original hypernym ``Synset`` on which this method was called. + :return: A set of ``(Synset, int)`` tuples where each ``Synset`` is + a hypernym of the first ``Synset``. + """ + distances = set([(self, distance)]) + for hypernym in self._hypernyms() + self._instance_hypernyms(): + distances |= hypernym.hypernym_distances(distance + 1, simulate_root=False) + if simulate_root: + fake_synset = Synset(None) + fake_synset._name = '*ROOT*' + fake_synset_distance = max(distances, key=itemgetter(1))[1] + distances.add((fake_synset, fake_synset_distance + 1)) + return distances + + def _shortest_hypernym_paths(self, simulate_root): + if self._name == '*ROOT*': + return {self: 0} + + queue = deque([(self, 0)]) + path = {} + + while queue: + s, depth = queue.popleft() + if s in path: + continue + path[s] = depth + + depth += 1 + queue.extend((hyp, depth) for hyp in s._hypernyms()) + queue.extend((hyp, depth) for hyp in s._instance_hypernyms()) + + if simulate_root: + fake_synset = Synset(None) + fake_synset._name = '*ROOT*' + path[fake_synset] = max(path.values()) + 1 + + return path + + def shortest_path_distance(self, other, simulate_root=False): + """ + Returns the distance of the shortest path linking the two synsets (if + one exists). For each synset, all the ancestor nodes and their + distances are recorded and compared. The ancestor node common to both + synsets that can be reached with the minimum number of traversals is + used. If no ancestor nodes are common, None is returned. If a node is + compared with itself 0 is returned. + + :type other: Synset + :param other: The Synset to which the shortest path will be found. + :return: The number of edges in the shortest path connecting the two + nodes, or None if no path exists. + """ + + if self == other: + return 0 + + dist_dict1 = self._shortest_hypernym_paths(simulate_root) + dist_dict2 = other._shortest_hypernym_paths(simulate_root) + + # For each ancestor synset common to both subject synsets, find the + # connecting path length. Return the shortest of these. + + inf = float('inf') + path_distance = inf + for synset, d1 in iteritems(dist_dict1): + d2 = dist_dict2.get(synset, inf) + path_distance = min(path_distance, d1 + d2) + + return None if math.isinf(path_distance) else path_distance + + def tree(self, rel, depth=-1, cut_mark=None): + """ + >>> from nltk.corpus import wordnet as wn + >>> dog = wn.synset('dog.n.01') + >>> hyp = lambda s:s.hypernyms() + >>> from pprint import pprint + >>> pprint(dog.tree(hyp)) + [Synset('dog.n.01'), + [Synset('canine.n.02'), + [Synset('carnivore.n.01'), + [Synset('placental.n.01'), + [Synset('mammal.n.01'), + [Synset('vertebrate.n.01'), + [Synset('chordate.n.01'), + [Synset('animal.n.01'), + [Synset('organism.n.01'), + [Synset('living_thing.n.01'), + [Synset('whole.n.02'), + [Synset('object.n.01'), + [Synset('physical_entity.n.01'), + [Synset('entity.n.01')]]]]]]]]]]]]], + [Synset('domestic_animal.n.01'), + [Synset('animal.n.01'), + [Synset('organism.n.01'), + [Synset('living_thing.n.01'), + [Synset('whole.n.02'), + [Synset('object.n.01'), + [Synset('physical_entity.n.01'), [Synset('entity.n.01')]]]]]]]]] + """ + + tree = [self] + if depth != 0: + tree += [x.tree(rel, depth - 1, cut_mark) for x in rel(self)] + elif cut_mark: + tree += [cut_mark] + return tree + + # interface to similarity methods + def path_similarity(self, other, verbose=False, simulate_root=True): + """ + Path Distance Similarity: + Return a score denoting how similar two word senses are, based on the + shortest path that connects the senses in the is-a (hypernym/hypnoym) + taxonomy. The score is in the range 0 to 1, except in those cases where + a path cannot be found (will only be true for verbs as there are many + distinct verb taxonomies), in which case None is returned. A score of + 1 represents identity i.e. comparing a sense with itself will return 1. + + :type other: Synset + :param other: The ``Synset`` that this ``Synset`` is being compared to. + :type simulate_root: bool + :param simulate_root: The various verb taxonomies do not + share a single root which disallows this metric from working for + synsets that are not connected. This flag (True by default) + creates a fake root that connects all the taxonomies. Set it + to false to disable this behavior. For the noun taxonomy, + there is usually a default root except for WordNet version 1.6. + If you are using wordnet 1.6, a fake root will be added for nouns + as well. + :return: A score denoting the similarity of the two ``Synset`` objects, + normally between 0 and 1. None is returned if no connecting path + could be found. 1 is returned if a ``Synset`` is compared with + itself. + """ + + distance = self.shortest_path_distance( + other, simulate_root=simulate_root and self._needs_root() + ) + if distance is None or distance < 0: + return None + return 1.0 / (distance + 1) + + def lch_similarity(self, other, verbose=False, simulate_root=True): + """ + Leacock Chodorow Similarity: + Return a score denoting how similar two word senses are, based on the + shortest path that connects the senses (as above) and the maximum depth + of the taxonomy in which the senses occur. The relationship is given as + -log(p/2d) where p is the shortest path length and d is the taxonomy + depth. + + :type other: Synset + :param other: The ``Synset`` that this ``Synset`` is being compared to. + :type simulate_root: bool + :param simulate_root: The various verb taxonomies do not + share a single root which disallows this metric from working for + synsets that are not connected. This flag (True by default) + creates a fake root that connects all the taxonomies. Set it + to false to disable this behavior. For the noun taxonomy, + there is usually a default root except for WordNet version 1.6. + If you are using wordnet 1.6, a fake root will be added for nouns + as well. + :return: A score denoting the similarity of the two ``Synset`` objects, + normally greater than 0. None is returned if no connecting path + could be found. If a ``Synset`` is compared with itself, the + maximum score is returned, which varies depending on the taxonomy + depth. + """ + + if self._pos != other._pos: + raise WordNetError( + 'Computing the lch similarity requires ' + '%s and %s to have the same part of speech.' % (self, other) + ) + + need_root = self._needs_root() + + if self._pos not in self._wordnet_corpus_reader._max_depth: + self._wordnet_corpus_reader._compute_max_depth(self._pos, need_root) + + depth = self._wordnet_corpus_reader._max_depth[self._pos] + + distance = self.shortest_path_distance( + other, simulate_root=simulate_root and need_root + ) + + if distance is None or distance < 0 or depth == 0: + return None + return -math.log((distance + 1) / (2.0 * depth)) + + def wup_similarity(self, other, verbose=False, simulate_root=True): + """ + Wu-Palmer Similarity: + Return a score denoting how similar two word senses are, based on the + depth of the two senses in the taxonomy and that of their Least Common + Subsumer (most specific ancestor node). Previously, the scores computed + by this implementation did _not_ always agree with those given by + Pedersen's Perl implementation of WordNet Similarity. However, with + the addition of the simulate_root flag (see below), the score for + verbs now almost always agree but not always for nouns. + + The LCS does not necessarily feature in the shortest path connecting + the two senses, as it is by definition the common ancestor deepest in + the taxonomy, not closest to the two senses. Typically, however, it + will so feature. Where multiple candidates for the LCS exist, that + whose shortest path to the root node is the longest will be selected. + Where the LCS has multiple paths to the root, the longer path is used + for the purposes of the calculation. + + :type other: Synset + :param other: The ``Synset`` that this ``Synset`` is being compared to. + :type simulate_root: bool + :param simulate_root: The various verb taxonomies do not + share a single root which disallows this metric from working for + synsets that are not connected. This flag (True by default) + creates a fake root that connects all the taxonomies. Set it + to false to disable this behavior. For the noun taxonomy, + there is usually a default root except for WordNet version 1.6. + If you are using wordnet 1.6, a fake root will be added for nouns + as well. + :return: A float score denoting the similarity of the two ``Synset`` + objects, normally greater than zero. If no connecting path between + the two senses can be found, None is returned. + + """ + + need_root = self._needs_root() + # Note that to preserve behavior from NLTK2 we set use_min_depth=True + # It is possible that more accurate results could be obtained by + # removing this setting and it should be tested later on + subsumers = self.lowest_common_hypernyms( + other, simulate_root=simulate_root and need_root, use_min_depth=True + ) + + # If no LCS was found return None + if len(subsumers) == 0: + return None + + subsumer = self if self in subsumers else subsumers[0] + + # Get the longest path from the LCS to the root, + # including a correction: + # - add one because the calculations include both the start and end + # nodes + depth = subsumer.max_depth() + 1 + + # Note: No need for an additional add-one correction for non-nouns + # to account for an imaginary root node because that is now + # automatically handled by simulate_root + # if subsumer._pos != NOUN: + # depth += 1 + + # Get the shortest path from the LCS to each of the synsets it is + # subsuming. Add this to the LCS path length to get the path + # length from each synset to the root. + len1 = self.shortest_path_distance( + subsumer, simulate_root=simulate_root and need_root + ) + len2 = other.shortest_path_distance( + subsumer, simulate_root=simulate_root and need_root + ) + if len1 is None or len2 is None: + return None + len1 += depth + len2 += depth + return (2.0 * depth) / (len1 + len2) + + def res_similarity(self, other, ic, verbose=False): + """ + Resnik Similarity: + Return a score denoting how similar two word senses are, based on the + Information Content (IC) of the Least Common Subsumer (most specific + ancestor node). + + :type other: Synset + :param other: The ``Synset`` that this ``Synset`` is being compared to. + :type ic: dict + :param ic: an information content object (as returned by + ``nltk.corpus.wordnet_ic.ic()``). + :return: A float score denoting the similarity of the two ``Synset`` + objects. Synsets whose LCS is the root node of the taxonomy will + have a score of 0 (e.g. N['dog'][0] and N['table'][0]). + """ + + ic1, ic2, lcs_ic = _lcs_ic(self, other, ic) + return lcs_ic + + def jcn_similarity(self, other, ic, verbose=False): + """ + Jiang-Conrath Similarity: + Return a score denoting how similar two word senses are, based on the + Information Content (IC) of the Least Common Subsumer (most specific + ancestor node) and that of the two input Synsets. The relationship is + given by the equation 1 / (IC(s1) + IC(s2) - 2 * IC(lcs)). + + :type other: Synset + :param other: The ``Synset`` that this ``Synset`` is being compared to. + :type ic: dict + :param ic: an information content object (as returned by + ``nltk.corpus.wordnet_ic.ic()``). + :return: A float score denoting the similarity of the two ``Synset`` + objects. + """ + + if self == other: + return _INF + + ic1, ic2, lcs_ic = _lcs_ic(self, other, ic) + + # If either of the input synsets are the root synset, or have a + # frequency of 0 (sparse data problem), return 0. + if ic1 == 0 or ic2 == 0: + return 0 + + ic_difference = ic1 + ic2 - 2 * lcs_ic + + if ic_difference == 0: + return _INF + + return 1 / ic_difference + + def lin_similarity(self, other, ic, verbose=False): + """ + Lin Similarity: + Return a score denoting how similar two word senses are, based on the + Information Content (IC) of the Least Common Subsumer (most specific + ancestor node) and that of the two input Synsets. The relationship is + given by the equation 2 * IC(lcs) / (IC(s1) + IC(s2)). + + :type other: Synset + :param other: The ``Synset`` that this ``Synset`` is being compared to. + :type ic: dict + :param ic: an information content object (as returned by + ``nltk.corpus.wordnet_ic.ic()``). + :return: A float score denoting the similarity of the two ``Synset`` + objects, in the range 0 to 1. + """ + + ic1, ic2, lcs_ic = _lcs_ic(self, other, ic) + return (2.0 * lcs_ic) / (ic1 + ic2) + + def _iter_hypernym_lists(self): + """ + :return: An iterator over ``Synset`` objects that are either proper + hypernyms or instance of hypernyms of the synset. + """ + todo = [self] + seen = set() + while todo: + for synset in todo: + seen.add(synset) + yield todo + todo = [ + hypernym + for synset in todo + for hypernym in (synset.hypernyms() + synset.instance_hypernyms()) + if hypernym not in seen + ] + + def __repr__(self): + return "%s('%s')" % (type(self).__name__, self._name) + + def _related(self, relation_symbol, sort=True): + get_synset = self._wordnet_corpus_reader.synset_from_pos_and_offset + if relation_symbol not in self._pointers: + return [] + pointer_tuples = self._pointers[relation_symbol] + r = [get_synset(pos, offset) for pos, offset in pointer_tuples] + if sort: + r.sort() + return r + + +###################################################################### +# WordNet Corpus Reader +###################################################################### + + +class WordNetCorpusReader(CorpusReader): + """ + A corpus reader used to access wordnet or its variants. + """ + + _ENCODING = 'utf8' + + # { Part-of-speech constants + ADJ, ADJ_SAT, ADV, NOUN, VERB = 'a', 's', 'r', 'n', 'v' + # } + + # { Filename constants + _FILEMAP = {ADJ: 'adj', ADV: 'adv', NOUN: 'noun', VERB: 'verb'} + # } + + # { Part of speech constants + _pos_numbers = {NOUN: 1, VERB: 2, ADJ: 3, ADV: 4, ADJ_SAT: 5} + _pos_names = dict(tup[::-1] for tup in _pos_numbers.items()) + # } + + #: A list of file identifiers for all the fileids used by this + #: corpus reader. + _FILES = ( + 'cntlist.rev', + 'lexnames', + 'index.sense', + 'index.adj', + 'index.adv', + 'index.noun', + 'index.verb', + 'data.adj', + 'data.adv', + 'data.noun', + 'data.verb', + 'adj.exc', + 'adv.exc', + 'noun.exc', + 'verb.exc', + ) + + def __init__(self, root, omw_reader): + """ + Construct a new wordnet corpus reader, with the given root + directory. + """ + super(WordNetCorpusReader, self).__init__( + root, self._FILES, encoding=self._ENCODING + ) + + # A index that provides the file offset + # Map from lemma -> pos -> synset_index -> offset + self._lemma_pos_offset_map = defaultdict(dict) + + # A cache so we don't have to reconstuct synsets + # Map from pos -> offset -> synset + self._synset_offset_cache = defaultdict(dict) + + # A lookup for the maximum depth of each part of speech. Useful for + # the lch similarity metric. + self._max_depth = defaultdict(dict) + + # Corpus reader containing omw data. + self._omw_reader = omw_reader + + # A cache to store the wordnet data of multiple languages + self._lang_data = defaultdict(list) + + self._data_file_map = {} + self._exception_map = {} + self._lexnames = [] + self._key_count_file = None + self._key_synset_file = None + + # Load the lexnames + for i, line in enumerate(self.open('lexnames')): + index, lexname, _ = line.split() + assert int(index) == i + self._lexnames.append(lexname) + + # Load the indices for lemmas and synset offsets + self._load_lemma_pos_offset_map() + + # load the exception file data into memory + self._load_exception_map() + + # Open Multilingual WordNet functions, contributed by + # Nasruddin A’aidil Shari, Sim Wei Ying Geraldine, and Soe Lynn + + def of2ss(self, of): + ''' take an id and return the synsets ''' + return self.synset_from_pos_and_offset(of[-1], int(of[:8])) + + def ss2of(self, ss, lang=None): + ''' return the ID of the synset ''' + pos = ss.pos() + # Only these 3 WordNets retain the satellite pos tag + if lang not in ["nld", "lit", "slk"] and pos == 's': + pos = 'a' + return "{:08d}-{}".format(ss.offset(), pos) + + def _load_lang_data(self, lang): + ''' load the wordnet data of the requested language from the file to + the cache, _lang_data ''' + + if lang in self._lang_data.keys(): + return + + if lang not in self.langs(): + raise WordNetError("Language is not supported.") + + f = self._omw_reader.open('{0:}/wn-data-{0:}.tab'.format(lang)) + self.custom_lemmas(f, lang) + f.close() + + def langs(self): + ''' return a list of languages supported by Multilingual Wordnet ''' + import os + + langs = ['eng'] + fileids = self._omw_reader.fileids() + for fileid in fileids: + file_name, file_extension = os.path.splitext(fileid) + if file_extension == '.tab': + langs.append(file_name.split('-')[-1]) + + return langs + + def _load_lemma_pos_offset_map(self): + for suffix in self._FILEMAP.values(): + + # parse each line of the file (ignoring comment lines) + for i, line in enumerate(self.open('index.%s' % suffix)): + if line.startswith(' '): + continue + + _iter = iter(line.split()) + + def _next_token(): + return next(_iter) + + try: + + # get the lemma and part-of-speech + lemma = _next_token() + pos = _next_token() + + # get the number of synsets for this lemma + n_synsets = int(_next_token()) + assert n_synsets > 0 + + # get and ignore the pointer symbols for all synsets of + # this lemma + n_pointers = int(_next_token()) + [_next_token() for _ in range(n_pointers)] + + # same as number of synsets + n_senses = int(_next_token()) + assert n_synsets == n_senses + + # get and ignore number of senses ranked according to + # frequency + _next_token() + + # get synset offsets + synset_offsets = [int(_next_token()) for _ in range(n_synsets)] + + # raise more informative error with file name and line number + except (AssertionError, ValueError) as e: + tup = ('index.%s' % suffix), (i + 1), e + raise WordNetError('file %s, line %i: %s' % tup) + + # map lemmas and parts of speech to synsets + self._lemma_pos_offset_map[lemma][pos] = synset_offsets + if pos == ADJ: + self._lemma_pos_offset_map[lemma][ADJ_SAT] = synset_offsets + + def _load_exception_map(self): + # load the exception file data into memory + for pos, suffix in self._FILEMAP.items(): + self._exception_map[pos] = {} + for line in self.open('%s.exc' % suffix): + terms = line.split() + self._exception_map[pos][terms[0]] = terms[1:] + self._exception_map[ADJ_SAT] = self._exception_map[ADJ] + + def _compute_max_depth(self, pos, simulate_root): + """ + Compute the max depth for the given part of speech. This is + used by the lch similarity metric. + """ + depth = 0 + for ii in self.all_synsets(pos): + try: + depth = max(depth, ii.max_depth()) + except RuntimeError: + print(ii) + if simulate_root: + depth += 1 + self._max_depth[pos] = depth + + def get_version(self): + fh = self._data_file(ADJ) + for line in fh: + match = re.search(r'WordNet (\d+\.\d+) Copyright', line) + if match is not None: + version = match.group(1) + fh.seek(0) + return version + + ############################################################# + # Loading Lemmas + ############################################################# + + def lemma(self, name, lang='eng'): + '''Return lemma object that matches the name''' + # cannot simply split on first '.', + # e.g.: '.45_caliber.a.01..45_caliber' + separator = SENSENUM_RE.search(name).end() + + synset_name, lemma_name = name[: separator - 1], name[separator:] + + synset = self.synset(synset_name) + for lemma in synset.lemmas(lang): + if lemma._name == lemma_name: + return lemma + raise WordNetError('no lemma %r in %r' % (lemma_name, synset_name)) + + def lemma_from_key(self, key): + # Keys are case sensitive and always lower-case + key = key.lower() + + lemma_name, lex_sense = key.split('%') + pos_number, lexname_index, lex_id, _, _ = lex_sense.split(':') + pos = self._pos_names[int(pos_number)] + + # open the key -> synset file if necessary + if self._key_synset_file is None: + self._key_synset_file = self.open('index.sense') + + # Find the synset for the lemma. + synset_line = _binary_search_file(self._key_synset_file, key) + if not synset_line: + raise WordNetError("No synset found for key %r" % key) + offset = int(synset_line.split()[1]) + synset = self.synset_from_pos_and_offset(pos, offset) + + # return the corresponding lemma + for lemma in synset._lemmas: + if lemma._key == key: + return lemma + raise WordNetError("No lemma found for for key %r" % key) + + ############################################################# + # Loading Synsets + ############################################################# + def synset(self, name): + # split name into lemma, part of speech and synset number + lemma, pos, synset_index_str = name.lower().rsplit('.', 2) + synset_index = int(synset_index_str) - 1 + + # get the offset for this synset + try: + offset = self._lemma_pos_offset_map[lemma][pos][synset_index] + except KeyError: + message = 'no lemma %r with part of speech %r' + raise WordNetError(message % (lemma, pos)) + except IndexError: + n_senses = len(self._lemma_pos_offset_map[lemma][pos]) + message = "lemma %r with part of speech %r has only %i %s" + if n_senses == 1: + tup = lemma, pos, n_senses, "sense" + else: + tup = lemma, pos, n_senses, "senses" + raise WordNetError(message % tup) + + # load synset information from the appropriate file + synset = self.synset_from_pos_and_offset(pos, offset) + + # some basic sanity checks on loaded attributes + if pos == 's' and synset._pos == 'a': + message = ( + 'adjective satellite requested but only plain ' + 'adjective found for lemma %r' + ) + raise WordNetError(message % lemma) + assert synset._pos == pos or (pos == 'a' and synset._pos == 's') + + # Return the synset object. + return synset + + def _data_file(self, pos): + """ + Return an open file pointer for the data file for the given + part of speech. + """ + if pos == ADJ_SAT: + pos = ADJ + if self._data_file_map.get(pos) is None: + fileid = 'data.%s' % self._FILEMAP[pos] + self._data_file_map[pos] = self.open(fileid) + return self._data_file_map[pos] + + def synset_from_pos_and_offset(self, pos, offset): + # Check to see if the synset is in the cache + if offset in self._synset_offset_cache[pos]: + return self._synset_offset_cache[pos][offset] + + data_file = self._data_file(pos) + data_file.seek(offset) + data_file_line = data_file.readline() + synset = self._synset_from_pos_and_line(pos, data_file_line) + assert synset._offset == offset + self._synset_offset_cache[pos][offset] = synset + return synset + + @deprecated('Use public method synset_from_pos_and_offset() instead') + def _synset_from_pos_and_offset(self, *args, **kwargs): + """ + Hack to help people like the readers of + http://stackoverflow.com/a/27145655/1709587 + who were using this function before it was officially a public method + """ + return self.synset_from_pos_and_offset(*args, **kwargs) + + def _synset_from_pos_and_line(self, pos, data_file_line): + # Construct a new (empty) synset. + synset = Synset(self) + + # parse the entry for this synset + try: + + # parse out the definitions and examples from the gloss + columns_str, gloss = data_file_line.strip().split('|') + definition = re.sub(r"[\"].*?[\"]", "", gloss).strip() + examples = re.findall(r'"([^"]*)"', gloss) + for example in examples: + synset._examples.append(example) + + synset._definition = definition.strip('; ') + + # split the other info into fields + _iter = iter(columns_str.split()) + + def _next_token(): + return next(_iter) + + # get the offset + synset._offset = int(_next_token()) + + # determine the lexicographer file name + lexname_index = int(_next_token()) + synset._lexname = self._lexnames[lexname_index] + + # get the part of speech + synset._pos = _next_token() + + # create Lemma objects for each lemma + n_lemmas = int(_next_token(), 16) + for _ in range(n_lemmas): + # get the lemma name + lemma_name = _next_token() + # get the lex_id (used for sense_keys) + lex_id = int(_next_token(), 16) + # If the lemma has a syntactic marker, extract it. + m = re.match(r'(.*?)(\(.*\))?$', lemma_name) + lemma_name, syn_mark = m.groups() + # create the lemma object + lemma = Lemma(self, synset, lemma_name, lexname_index, lex_id, syn_mark) + synset._lemmas.append(lemma) + synset._lemma_names.append(lemma._name) + + # collect the pointer tuples + n_pointers = int(_next_token()) + for _ in range(n_pointers): + symbol = _next_token() + offset = int(_next_token()) + pos = _next_token() + lemma_ids_str = _next_token() + if lemma_ids_str == '0000': + synset._pointers[symbol].add((pos, offset)) + else: + source_index = int(lemma_ids_str[:2], 16) - 1 + target_index = int(lemma_ids_str[2:], 16) - 1 + source_lemma_name = synset._lemmas[source_index]._name + lemma_pointers = synset._lemma_pointers + tups = lemma_pointers[source_lemma_name, symbol] + tups.append((pos, offset, target_index)) + + # read the verb frames + try: + frame_count = int(_next_token()) + except StopIteration: + pass + else: + for _ in range(frame_count): + # read the plus sign + plus = _next_token() + assert plus == '+' + # read the frame and lemma number + frame_number = int(_next_token()) + frame_string_fmt = VERB_FRAME_STRINGS[frame_number] + lemma_number = int(_next_token(), 16) + # lemma number of 00 means all words in the synset + if lemma_number == 0: + synset._frame_ids.append(frame_number) + for lemma in synset._lemmas: + lemma._frame_ids.append(frame_number) + lemma._frame_strings.append(frame_string_fmt % lemma._name) + # only a specific word in the synset + else: + lemma = synset._lemmas[lemma_number - 1] + lemma._frame_ids.append(frame_number) + lemma._frame_strings.append(frame_string_fmt % lemma._name) + + # raise a more informative error with line text + except ValueError as e: + raise WordNetError('line %r: %s' % (data_file_line, e)) + + # set sense keys for Lemma objects - note that this has to be + # done afterwards so that the relations are available + for lemma in synset._lemmas: + if synset._pos == ADJ_SAT: + head_lemma = synset.similar_tos()[0]._lemmas[0] + head_name = head_lemma._name + head_id = '%02d' % head_lemma._lex_id + else: + head_name = head_id = '' + tup = ( + lemma._name, + WordNetCorpusReader._pos_numbers[synset._pos], + lemma._lexname_index, + lemma._lex_id, + head_name, + head_id, + ) + lemma._key = ('%s%%%d:%02d:%02d:%s:%s' % tup).lower() + + # the canonical name is based on the first lemma + lemma_name = synset._lemmas[0]._name.lower() + offsets = self._lemma_pos_offset_map[lemma_name][synset._pos] + sense_index = offsets.index(synset._offset) + tup = lemma_name, synset._pos, sense_index + 1 + synset._name = '%s.%s.%02i' % tup + + return synset + + def synset_from_sense_key(self, sense_key): + """ + Retrieves synset based on a given sense_key. Sense keys can be + obtained from lemma.key() + + From https://wordnet.princeton.edu/documentation/senseidx5wn: + A sense_key is represented as: + lemma % lex_sense (e.g. 'dog%1:18:01::') + where lex_sense is encoded as: + ss_type:lex_filenum:lex_id:head_word:head_id + + lemma: ASCII text of word/collocation, in lower case + ss_type: synset type for the sense (1 digit int) + The synset type is encoded as follows: + 1 NOUN + 2 VERB + 3 ADJECTIVE + 4 ADVERB + 5 ADJECTIVE SATELLITE + lex_filenum: name of lexicographer file containing the synset for the sense (2 digit int) + lex_id: when paired with lemma, uniquely identifies a sense in the lexicographer file (2 digit int) + head_word: lemma of the first word in satellite's head synset + Only used if sense is in an adjective satellite synset + head_id: uniquely identifies sense in a lexicographer file when paired with head_word + Only used if head_word is present (2 digit int) + """ + sense_key_regex = re.compile(r"(.*)\%(.*):(.*):(.*):(.*):(.*)") + synset_types = {1: NOUN, 2: VERB, 3: ADJ, 4: ADV, 5: ADJ_SAT} + lemma, ss_type, _, lex_id, _, _ = sense_key_regex.match(sense_key).groups() + + # check that information extracted from sense_key is valid + error = None + if not lemma: + error = "lemma" + elif int(ss_type) not in synset_types: + error = "ss_type" + elif int(lex_id) < 0 or int(lex_id) > 99: + error = "lex_id" + if error: + raise WordNetError( + "valid {} could not be extracted from the sense key".format(error) + ) + + synset_id = '.'.join([lemma, synset_types[int(ss_type)], lex_id]) + return self.synset(synset_id) + + ############################################################# + # Retrieve synsets and lemmas. + ############################################################# + + def synsets(self, lemma, pos=None, lang='eng', check_exceptions=True): + """Load all synsets with a given lemma and part of speech tag. + If no pos is specified, all synsets for all parts of speech + will be loaded. + If lang is specified, all the synsets associated with the lemma name + of that language will be returned. + """ + lemma = lemma.lower() + + if lang == 'eng': + get_synset = self.synset_from_pos_and_offset + index = self._lemma_pos_offset_map + if pos is None: + pos = POS_LIST + return [ + get_synset(p, offset) + for p in pos + for form in self._morphy(lemma, p, check_exceptions) + for offset in index[form].get(p, []) + ] + + else: + self._load_lang_data(lang) + synset_list = [] + if lemma in self._lang_data[lang][1]: + for l in self._lang_data[lang][1][lemma]: + if pos is not None and l[-1] != pos: + continue + synset_list.append(self.of2ss(l)) + return synset_list + + def lemmas(self, lemma, pos=None, lang='eng'): + """Return all Lemma objects with a name matching the specified lemma + name and part of speech tag. Matches any part of speech tag if none is + specified.""" + + lemma = lemma.lower() + if lang == 'eng': + return [ + lemma_obj + for synset in self.synsets(lemma, pos) + for lemma_obj in synset.lemmas() + if lemma_obj.name().lower() == lemma + ] + + else: + self._load_lang_data(lang) + lemmas = [] + syn = self.synsets(lemma, lang=lang) + for s in syn: + if pos is not None and s.pos() != pos: + continue + for lemma_obj in s.lemmas(lang=lang): + if lemma_obj.name().lower() == lemma: + lemmas.append(lemma_obj) + return lemmas + + def all_lemma_names(self, pos=None, lang='eng'): + """Return all lemma names for all synsets for the given + part of speech tag and language or languages. If pos is + not specified, all synsets for all parts of speech will + be used.""" + + if lang == 'eng': + if pos is None: + return iter(self._lemma_pos_offset_map) + else: + return ( + lemma + for lemma in self._lemma_pos_offset_map + if pos in self._lemma_pos_offset_map[lemma] + ) + else: + self._load_lang_data(lang) + lemma = [] + for i in self._lang_data[lang][0]: + if pos is not None and i[-1] != pos: + continue + lemma.extend(self._lang_data[lang][0][i]) + + lemma = iter(set(lemma)) + return lemma + + def all_synsets(self, pos=None): + """Iterate over all synsets with a given part of speech tag. + If no pos is specified, all synsets for all parts of speech + will be loaded. + """ + if pos is None: + pos_tags = self._FILEMAP.keys() + else: + pos_tags = [pos] + + cache = self._synset_offset_cache + from_pos_and_line = self._synset_from_pos_and_line + + # generate all synsets for each part of speech + for pos_tag in pos_tags: + # Open the file for reading. Note that we can not re-use + # the file poitners from self._data_file_map here, because + # we're defining an iterator, and those file pointers might + # be moved while we're not looking. + if pos_tag == ADJ_SAT: + pos_tag = ADJ + fileid = 'data.%s' % self._FILEMAP[pos_tag] + data_file = self.open(fileid) + + try: + # generate synsets for each line in the POS file + offset = data_file.tell() + line = data_file.readline() + while line: + if not line[0].isspace(): + if offset in cache[pos_tag]: + # See if the synset is cached + synset = cache[pos_tag][offset] + else: + # Otherwise, parse the line + synset = from_pos_and_line(pos_tag, line) + cache[pos_tag][offset] = synset + + # adjective satellites are in the same file as + # adjectives so only yield the synset if it's actually + # a satellite + if synset._pos == ADJ_SAT: + yield synset + + # for all other POS tags, yield all synsets (this means + # that adjectives also include adjective satellites) + else: + yield synset + offset = data_file.tell() + line = data_file.readline() + + # close the extra file handle we opened + except: + data_file.close() + raise + else: + data_file.close() + + def words(self, lang='eng'): + """return lemmas of the given language as list of words""" + return self.all_lemma_names(lang=lang) + + def license(self, lang='eng'): + """Return the contents of LICENSE (for omw) + use lang=lang to get the license for an individual language""" + if lang == 'eng': + return self.open("LICENSE").read() + elif lang in self.langs(): + return self._omw_reader.open("{}/LICENSE".format(lang)).read() + elif lang == 'omw': + # under the assumption you don't mean Omwunra-Toqura + return self._omw_reader.open("LICENSE").read() + elif lang in self._lang_data: + raise WordNetError("Cannot determine license for user-provided tab file") + else: + raise WordNetError("Language is not supported.") + + def readme(self, lang='omw'): + """Return the contents of README (for omw) + use lang=lang to get the readme for an individual language""" + if lang == 'eng': + return self.open("README").read() + elif lang in self.langs(): + return self._omw_reader.open("{}/README".format(lang)).read() + elif lang == 'omw': + # under the assumption you don't mean Omwunra-Toqura + return self._omw_reader.open("README").read() + elif lang in self._lang_data: + raise WordNetError("No README for user-provided tab file") + else: + raise WordNetError("Language is not supported.") + + def citation(self, lang='omw'): + """Return the contents of citation.bib file (for omw) + use lang=lang to get the citation for an individual language""" + if lang == 'eng': + return self.open("citation.bib").read() + elif lang in self.langs(): + return self._omw_reader.open("{}/citation.bib".format(lang)).read() + elif lang == 'omw': + # under the assumption you don't mean Omwunra-Toqura + return self._omw_reader.open("citation.bib").read() + elif lang in self._lang_data: + raise WordNetError("citation not known for user-provided tab file") + else: + raise WordNetError("Language is not supported.") + + ############################################################# + # Misc + ############################################################# + def lemma_count(self, lemma): + """Return the frequency count for this Lemma""" + # Currently, count is only work for English + if lemma._lang != 'eng': + return 0 + # open the count file if we haven't already + if self._key_count_file is None: + self._key_count_file = self.open('cntlist.rev') + # find the key in the counts file and return the count + line = _binary_search_file(self._key_count_file, lemma._key) + if line: + return int(line.rsplit(' ', 1)[-1]) + else: + return 0 + + def path_similarity(self, synset1, synset2, verbose=False, simulate_root=True): + return synset1.path_similarity(synset2, verbose, simulate_root) + + path_similarity.__doc__ = Synset.path_similarity.__doc__ + + def lch_similarity(self, synset1, synset2, verbose=False, simulate_root=True): + return synset1.lch_similarity(synset2, verbose, simulate_root) + + lch_similarity.__doc__ = Synset.lch_similarity.__doc__ + + def wup_similarity(self, synset1, synset2, verbose=False, simulate_root=True): + return synset1.wup_similarity(synset2, verbose, simulate_root) + + wup_similarity.__doc__ = Synset.wup_similarity.__doc__ + + def res_similarity(self, synset1, synset2, ic, verbose=False): + return synset1.res_similarity(synset2, ic, verbose) + + res_similarity.__doc__ = Synset.res_similarity.__doc__ + + def jcn_similarity(self, synset1, synset2, ic, verbose=False): + return synset1.jcn_similarity(synset2, ic, verbose) + + jcn_similarity.__doc__ = Synset.jcn_similarity.__doc__ + + def lin_similarity(self, synset1, synset2, ic, verbose=False): + return synset1.lin_similarity(synset2, ic, verbose) + + lin_similarity.__doc__ = Synset.lin_similarity.__doc__ + + ############################################################# + # Morphy + ############################################################# + # Morphy, adapted from Oliver Steele's pywordnet + def morphy(self, form, pos=None, check_exceptions=True): + """ + Find a possible base form for the given form, with the given + part of speech, by checking WordNet's list of exceptional + forms, and by recursively stripping affixes for this part of + speech until a form in WordNet is found. + + >>> from nltk.corpus import wordnet as wn + >>> print(wn.morphy('dogs')) + dog + >>> print(wn.morphy('churches')) + church + >>> print(wn.morphy('aardwolves')) + aardwolf + >>> print(wn.morphy('abaci')) + abacus + >>> wn.morphy('hardrock', wn.ADV) + >>> print(wn.morphy('book', wn.NOUN)) + book + >>> wn.morphy('book', wn.ADJ) + """ + + if pos is None: + morphy = self._morphy + analyses = chain(a for p in POS_LIST for a in morphy(form, p)) + else: + analyses = self._morphy(form, pos, check_exceptions) + + # get the first one we find + first = list(islice(analyses, 1)) + if len(first) == 1: + return first[0] + else: + return None + + MORPHOLOGICAL_SUBSTITUTIONS = { + NOUN: [ + ('s', ''), + ('ses', 's'), + ('ves', 'f'), + ('xes', 'x'), + ('zes', 'z'), + ('ches', 'ch'), + ('shes', 'sh'), + ('men', 'man'), + ('ies', 'y'), + ], + VERB: [ + ('s', ''), + ('ies', 'y'), + ('es', 'e'), + ('es', ''), + ('ed', 'e'), + ('ed', ''), + ('ing', 'e'), + ('ing', ''), + ], + ADJ: [('er', ''), ('est', ''), ('er', 'e'), ('est', 'e')], + ADV: [], + } + + MORPHOLOGICAL_SUBSTITUTIONS[ADJ_SAT] = MORPHOLOGICAL_SUBSTITUTIONS[ADJ] + + def _morphy(self, form, pos, check_exceptions=True): + # from jordanbg: + # Given an original string x + # 1. Apply rules once to the input to get y1, y2, y3, etc. + # 2. Return all that are in the database + # 3. If there are no matches, keep applying rules until you either + # find a match or you can't go any further + + exceptions = self._exception_map[pos] + substitutions = self.MORPHOLOGICAL_SUBSTITUTIONS[pos] + + def apply_rules(forms): + return [ + form[: -len(old)] + new + for form in forms + for old, new in substitutions + if form.endswith(old) + ] + + def filter_forms(forms): + result = [] + seen = set() + for form in forms: + if form in self._lemma_pos_offset_map: + if pos in self._lemma_pos_offset_map[form]: + if form not in seen: + result.append(form) + seen.add(form) + return result + + # 0. Check the exception lists + if check_exceptions: + if form in exceptions: + return filter_forms([form] + exceptions[form]) + + # 1. Apply rules once to the input to get y1, y2, y3, etc. + forms = apply_rules([form]) + + # 2. Return all that are in the database (and check the original too) + results = filter_forms([form] + forms) + if results: + return results + + # 3. If there are no matches, keep applying rules until we find a match + while forms: + forms = apply_rules(forms) + results = filter_forms(forms) + if results: + return results + + # Return an empty list if we can't find anything + return [] + + ############################################################# + # Create information content from corpus + ############################################################# + def ic(self, corpus, weight_senses_equally=False, smoothing=1.0): + """ + Creates an information content lookup dictionary from a corpus. + + :type corpus: CorpusReader + :param corpus: The corpus from which we create an information + content dictionary. + :type weight_senses_equally: bool + :param weight_senses_equally: If this is True, gives all + possible senses equal weight rather than dividing by the + number of possible senses. (If a word has 3 synses, each + sense gets 0.3333 per appearance when this is False, 1.0 when + it is true.) + :param smoothing: How much do we smooth synset counts (default is 1.0) + :type smoothing: float + :return: An information content dictionary + """ + counts = FreqDist() + for ww in corpus.words(): + counts[ww] += 1 + + ic = {} + for pp in POS_LIST: + ic[pp] = defaultdict(float) + + # Initialize the counts with the smoothing value + if smoothing > 0.0: + for ss in self.all_synsets(): + pos = ss._pos + if pos == ADJ_SAT: + pos = ADJ + ic[pos][ss._offset] = smoothing + + for ww in counts: + possible_synsets = self.synsets(ww) + if len(possible_synsets) == 0: + continue + + # Distribute weight among possible synsets + weight = float(counts[ww]) + if not weight_senses_equally: + weight /= float(len(possible_synsets)) + + for ss in possible_synsets: + pos = ss._pos + if pos == ADJ_SAT: + pos = ADJ + for level in ss._iter_hypernym_lists(): + for hh in level: + ic[pos][hh._offset] += weight + # Add the weight to the root + ic[pos][0] += weight + return ic + + def custom_lemmas(self, tab_file, lang): + """ + Reads a custom tab file containing mappings of lemmas in the given + language to Princeton WordNet 3.0 synset offsets, allowing NLTK's + WordNet functions to then be used with that language. + + See the "Tab files" section at http://compling.hss.ntu.edu.sg/omw/ for + documentation on the Multilingual WordNet tab file format. + + :param tab_file: Tab file as a file or file-like object + :type lang str + :param lang ISO 639-3 code of the language of the tab file + """ + if len(lang) != 3: + raise ValueError('lang should be a (3 character) ISO 639-3 code') + self._lang_data[lang] = [defaultdict(list), defaultdict(list)] + for line in tab_file.readlines(): + if isinstance(line, bytes): + # Support byte-stream files (e.g. as returned by Python 2's + # open() function) as well as text-stream ones + line = line.decode('utf-8') + if not line.startswith('#'): + offset_pos, lemma_type, lemma = line.strip().split('\t') + lemma = lemma.strip().replace(' ', '_') + self._lang_data[lang][0][offset_pos].append(lemma) + self._lang_data[lang][1][lemma.lower()].append(offset_pos) + # Make sure no more entries are accidentally added subsequently + self._lang_data[lang][0].default_factory = None + self._lang_data[lang][1].default_factory = None + + +###################################################################### +# WordNet Information Content Corpus Reader +###################################################################### + + +class WordNetICCorpusReader(CorpusReader): + """ + A corpus reader for the WordNet information content corpus. + """ + + def __init__(self, root, fileids): + CorpusReader.__init__(self, root, fileids, encoding='utf8') + + # this load function would be more efficient if the data was pickled + # Note that we can't use NLTK's frequency distributions because + # synsets are overlapping (each instance of a synset also counts + # as an instance of its hypernyms) + def ic(self, icfile): + """ + Load an information content file from the wordnet_ic corpus + and return a dictionary. This dictionary has just two keys, + NOUN and VERB, whose values are dictionaries that map from + synsets to information content values. + + :type icfile: str + :param icfile: The name of the wordnet_ic file (e.g. "ic-brown.dat") + :return: An information content dictionary + """ + ic = {} + ic[NOUN] = defaultdict(float) + ic[VERB] = defaultdict(float) + for num, line in enumerate(self.open(icfile)): + if num == 0: # skip the header + continue + fields = line.split() + offset = int(fields[0][:-1]) + value = float(fields[1]) + pos = _get_pos(fields[0]) + if len(fields) == 3 and fields[2] == "ROOT": + # Store root count. + ic[pos][0] += value + if value != 0: + ic[pos][offset] = value + return ic + + +###################################################################### +# Similarity metrics +###################################################################### + +# TODO: Add in the option to manually add a new root node; this will be +# useful for verb similarity as there exist multiple verb taxonomies. + +# More information about the metrics is available at +# http://marimba.d.umn.edu/similarity/measures.html + + +def path_similarity(synset1, synset2, verbose=False, simulate_root=True): + return synset1.path_similarity(synset2, verbose, simulate_root) + + +def lch_similarity(synset1, synset2, verbose=False, simulate_root=True): + return synset1.lch_similarity(synset2, verbose, simulate_root) + + +def wup_similarity(synset1, synset2, verbose=False, simulate_root=True): + return synset1.wup_similarity(synset2, verbose, simulate_root) + + +def res_similarity(synset1, synset2, ic, verbose=False): + return synset1.res_similarity(synset2, verbose) + + +def jcn_similarity(synset1, synset2, ic, verbose=False): + return synset1.jcn_similarity(synset2, verbose) + + +def lin_similarity(synset1, synset2, ic, verbose=False): + return synset1.lin_similarity(synset2, verbose) + + +path_similarity.__doc__ = Synset.path_similarity.__doc__ +lch_similarity.__doc__ = Synset.lch_similarity.__doc__ +wup_similarity.__doc__ = Synset.wup_similarity.__doc__ +res_similarity.__doc__ = Synset.res_similarity.__doc__ +jcn_similarity.__doc__ = Synset.jcn_similarity.__doc__ +lin_similarity.__doc__ = Synset.lin_similarity.__doc__ + + +def _lcs_ic(synset1, synset2, ic, verbose=False): + """ + Get the information content of the least common subsumer that has + the highest information content value. If two nodes have no + explicit common subsumer, assume that they share an artificial + root node that is the hypernym of all explicit roots. + + :type synset1: Synset + :param synset1: First input synset. + :type synset2: Synset + :param synset2: Second input synset. Must be the same part of + speech as the first synset. + :type ic: dict + :param ic: an information content object (as returned by ``load_ic()``). + :return: The information content of the two synsets and their most + informative subsumer + """ + if synset1._pos != synset2._pos: + raise WordNetError( + 'Computing the least common subsumer requires ' + '%s and %s to have the same part of speech.' % (synset1, synset2) + ) + + ic1 = information_content(synset1, ic) + ic2 = information_content(synset2, ic) + subsumers = synset1.common_hypernyms(synset2) + if len(subsumers) == 0: + subsumer_ic = 0 + else: + subsumer_ic = max(information_content(s, ic) for s in subsumers) + + if verbose: + print("> LCS Subsumer by content:", subsumer_ic) + + return ic1, ic2, subsumer_ic + + +# Utility functions + + +def information_content(synset, ic): + try: + icpos = ic[synset._pos] + except KeyError: + msg = 'Information content file has no entries for part-of-speech: %s' + raise WordNetError(msg % synset._pos) + + counts = icpos[synset._offset] + if counts == 0: + return _INF + else: + return -math.log(counts / icpos[0]) + + +# get the part of speech (NOUN or VERB) from the information content record +# (each identifier has a 'n' or 'v' suffix) + + +def _get_pos(field): + if field[-1] == 'n': + return NOUN + elif field[-1] == 'v': + return VERB + else: + msg = ( + "Unidentified part of speech in WordNet Information Content file " + "for field %s" % field + ) + raise ValueError(msg) + + +# unload corpus after tests +def teardown_module(module=None): + from nltk.corpus import wordnet + + wordnet._unload() diff --git a/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/xmldocs.py b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/xmldocs.py new file mode 100644 index 0000000..8a66720 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/xmldocs.py @@ -0,0 +1,411 @@ +# Natural Language Toolkit: XML Corpus Reader +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Steven Bird +# URL: +# For license information, see LICENSE.TXT + +""" +Corpus reader for corpora whose documents are xml files. + +(note -- not named 'xml' to avoid conflicting w/ standard xml package) +""" +from __future__ import print_function, unicode_literals + +import codecs + +# Use the c version of ElementTree, which is faster, if possible: +try: + from xml.etree import cElementTree as ElementTree +except ImportError: + from xml.etree import ElementTree + +from six import string_types + +from nltk.data import SeekableUnicodeStreamReader +from nltk.tokenize import WordPunctTokenizer +from nltk.internals import ElementWrapper + +from nltk.corpus.reader.api import CorpusReader +from nltk.corpus.reader.util import * + + +class XMLCorpusReader(CorpusReader): + """ + Corpus reader for corpora whose documents are xml files. + + Note that the ``XMLCorpusReader`` constructor does not take an + ``encoding`` argument, because the unicode encoding is specified by + the XML files themselves. See the XML specs for more info. + """ + + def __init__(self, root, fileids, wrap_etree=False): + self._wrap_etree = wrap_etree + CorpusReader.__init__(self, root, fileids) + + def xml(self, fileid=None): + # Make sure we have exactly one file -- no concatenating XML. + if fileid is None and len(self._fileids) == 1: + fileid = self._fileids[0] + if not isinstance(fileid, string_types): + raise TypeError('Expected a single file identifier string') + # Read the XML in using ElementTree. + elt = ElementTree.parse(self.abspath(fileid).open()).getroot() + # If requested, wrap it. + if self._wrap_etree: + elt = ElementWrapper(elt) + # Return the ElementTree element. + return elt + + def words(self, fileid=None): + """ + Returns all of the words and punctuation symbols in the specified file + that were in text nodes -- ie, tags are ignored. Like the xml() method, + fileid can only specify one file. + + :return: the given file's text nodes as a list of words and punctuation symbols + :rtype: list(str) + """ + + elt = self.xml(fileid) + encoding = self.encoding(fileid) + word_tokenizer = WordPunctTokenizer() + iterator = elt.getiterator() + out = [] + + for node in iterator: + text = node.text + if text is not None: + if isinstance(text, bytes): + text = text.decode(encoding) + toks = word_tokenizer.tokenize(text) + out.extend(toks) + return out + + def raw(self, fileids=None): + if fileids is None: + fileids = self._fileids + elif isinstance(fileids, string_types): + fileids = [fileids] + return concat([self.open(f).read() for f in fileids]) + + +class XMLCorpusView(StreamBackedCorpusView): + """ + A corpus view that selects out specified elements from an XML + file, and provides a flat list-like interface for accessing them. + (Note: ``XMLCorpusView`` is not used by ``XMLCorpusReader`` itself, + but may be used by subclasses of ``XMLCorpusReader``.) + + Every XML corpus view has a "tag specification", indicating what + XML elements should be included in the view; and each (non-nested) + element that matches this specification corresponds to one item in + the view. Tag specifications are regular expressions over tag + paths, where a tag path is a list of element tag names, separated + by '/', indicating the ancestry of the element. Some examples: + + - ``'foo'``: A top-level element whose tag is ``foo``. + - ``'foo/bar'``: An element whose tag is ``bar`` and whose parent + is a top-level element whose tag is ``foo``. + - ``'.*/foo'``: An element whose tag is ``foo``, appearing anywhere + in the xml tree. + - ``'.*/(foo|bar)'``: An wlement whose tag is ``foo`` or ``bar``, + appearing anywhere in the xml tree. + + The view items are generated from the selected XML elements via + the method ``handle_elt()``. By default, this method returns the + element as-is (i.e., as an ElementTree object); but it can be + overridden, either via subclassing or via the ``elt_handler`` + constructor parameter. + """ + + #: If true, then display debugging output to stdout when reading + #: blocks. + _DEBUG = False + + #: The number of characters read at a time by this corpus reader. + _BLOCK_SIZE = 1024 + + def __init__(self, fileid, tagspec, elt_handler=None): + """ + Create a new corpus view based on a specified XML file. + + Note that the ``XMLCorpusView`` constructor does not take an + ``encoding`` argument, because the unicode encoding is + specified by the XML files themselves. + + :type tagspec: str + :param tagspec: A tag specification, indicating what XML + elements should be included in the view. Each non-nested + element that matches this specification corresponds to one + item in the view. + + :param elt_handler: A function used to transform each element + to a value for the view. If no handler is specified, then + ``self.handle_elt()`` is called, which returns the element + as an ElementTree object. The signature of elt_handler is:: + + elt_handler(elt, tagspec) -> value + """ + if elt_handler: + self.handle_elt = elt_handler + + self._tagspec = re.compile(tagspec + r'\Z') + """The tag specification for this corpus view.""" + + self._tag_context = {0: ()} + """A dictionary mapping from file positions (as returned by + ``stream.seek()`` to XML contexts. An XML context is a + tuple of XML tag names, indicating which tags have not yet + been closed.""" + + encoding = self._detect_encoding(fileid) + StreamBackedCorpusView.__init__(self, fileid, encoding=encoding) + + def _detect_encoding(self, fileid): + if isinstance(fileid, PathPointer): + try: + infile = fileid.open() + s = infile.readline() + finally: + infile.close() + else: + with open(fileid, 'rb') as infile: + s = infile.readline() + if s.startswith(codecs.BOM_UTF16_BE): + return 'utf-16-be' + if s.startswith(codecs.BOM_UTF16_LE): + return 'utf-16-le' + if s.startswith(codecs.BOM_UTF32_BE): + return 'utf-32-be' + if s.startswith(codecs.BOM_UTF32_LE): + return 'utf-32-le' + if s.startswith(codecs.BOM_UTF8): + return 'utf-8' + m = re.match(br'\s*<\?xml\b.*\bencoding="([^"]+)"', s) + if m: + return m.group(1).decode() + m = re.match(br"\s*<\?xml\b.*\bencoding='([^']+)'", s) + if m: + return m.group(1).decode() + # No encoding found -- what should the default be? + return 'utf-8' + + def handle_elt(self, elt, context): + """ + Convert an element into an appropriate value for inclusion in + the view. Unless overridden by a subclass or by the + ``elt_handler`` constructor argument, this method simply + returns ``elt``. + + :return: The view value corresponding to ``elt``. + + :type elt: ElementTree + :param elt: The element that should be converted. + + :type context: str + :param context: A string composed of element tags separated by + forward slashes, indicating the XML context of the given + element. For example, the string ``'foo/bar/baz'`` + indicates that the element is a ``baz`` element whose + parent is a ``bar`` element and whose grandparent is a + top-level ``foo`` element. + """ + return elt + + #: A regular expression that matches XML fragments that do not + #: contain any un-closed tags. + _VALID_XML_RE = re.compile( + r""" + [^<]* + ( + (() | # comment + () | # doctype decl + (<[^!>][^>]*>)) # tag or PI + [^<]*)* + \Z""", + re.DOTALL | re.VERBOSE, + ) + + #: A regular expression used to extract the tag name from a start tag, + #: end tag, or empty-elt tag string. + _XML_TAG_NAME = re.compile('<\s*/?\s*([^\s>]+)') + + #: A regular expression used to find all start-tags, end-tags, and + #: emtpy-elt tags in an XML file. This regexp is more lenient than + #: the XML spec -- e.g., it allows spaces in some places where the + #: spec does not. + _XML_PIECE = re.compile( + r""" + # Include these so we can skip them: + (?P )| + (?P )| + (?P <\?.*?\?> )| + (?P ]*(\[[^\]]*])?\s*>)| + # These are the ones we actually care about: + (?P <\s*[^>/\?!\s][^>]*/\s*> )| + (?P <\s*[^>/\?!\s][^>]*> )| + (?P <\s*/[^>/\?!\s][^>]*> )""", + re.DOTALL | re.VERBOSE, + ) + + def _read_xml_fragment(self, stream): + """ + Read a string from the given stream that does not contain any + un-closed tags. In particular, this function first reads a + block from the stream of size ``self._BLOCK_SIZE``. It then + checks if that block contains an un-closed tag. If it does, + then this function either backtracks to the last '<', or reads + another block. + """ + fragment = '' + + if isinstance(stream, SeekableUnicodeStreamReader): + startpos = stream.tell() + while True: + # Read a block and add it to the fragment. + xml_block = stream.read(self._BLOCK_SIZE) + fragment += xml_block + + # Do we have a well-formed xml fragment? + if self._VALID_XML_RE.match(fragment): + return fragment + + # Do we have a fragment that will never be well-formed? + if re.search('[<>]', fragment).group(0) == '>': + pos = stream.tell() - ( + len(fragment) - re.search('[<>]', fragment).end() + ) + raise ValueError('Unexpected ">" near char %s' % pos) + + # End of file? + if not xml_block: + raise ValueError('Unexpected end of file: tag not closed') + + # If not, then we must be in the middle of a <..tag..>. + # If appropriate, backtrack to the most recent '<' + # character. + last_open_bracket = fragment.rfind('<') + if last_open_bracket > 0: + if self._VALID_XML_RE.match(fragment[:last_open_bracket]): + if isinstance(stream, SeekableUnicodeStreamReader): + stream.seek(startpos) + stream.char_seek_forward(last_open_bracket) + else: + stream.seek(-(len(fragment) - last_open_bracket), 1) + return fragment[:last_open_bracket] + + # Otherwise, read another block. (i.e., return to the + # top of the loop.) + + def read_block(self, stream, tagspec=None, elt_handler=None): + """ + Read from ``stream`` until we find at least one element that + matches ``tagspec``, and return the result of applying + ``elt_handler`` to each element found. + """ + if tagspec is None: + tagspec = self._tagspec + if elt_handler is None: + elt_handler = self.handle_elt + + # Use a stack of strings to keep track of our context: + context = list(self._tag_context.get(stream.tell())) + assert context is not None # check this -- could it ever happen? + + elts = [] + + elt_start = None # where does the elt start + elt_depth = None # what context depth + elt_text = '' + + while elts == [] or elt_start is not None: + if isinstance(stream, SeekableUnicodeStreamReader): + startpos = stream.tell() + xml_fragment = self._read_xml_fragment(stream) + + # End of file. + if not xml_fragment: + if elt_start is None: + break + else: + raise ValueError('Unexpected end of file') + + # Process each in the xml fragment. + for piece in self._XML_PIECE.finditer(xml_fragment): + if self._DEBUG: + print('%25s %s' % ('/'.join(context)[-20:], piece.group())) + + if piece.group('START_TAG'): + name = self._XML_TAG_NAME.match(piece.group()).group(1) + # Keep context up-to-date. + context.append(name) + # Is this one of the elts we're looking for? + if elt_start is None: + if re.match(tagspec, '/'.join(context)): + elt_start = piece.start() + elt_depth = len(context) + + elif piece.group('END_TAG'): + name = self._XML_TAG_NAME.match(piece.group()).group(1) + # sanity checks: + if not context: + raise ValueError('Unmatched tag ' % name) + if name != context[-1]: + raise ValueError( + 'Unmatched tag <%s>...' % (context[-1], name) + ) + # Is this the end of an element? + if elt_start is not None and elt_depth == len(context): + elt_text += xml_fragment[elt_start : piece.end()] + elts.append((elt_text, '/'.join(context))) + elt_start = elt_depth = None + elt_text = '' + # Keep context up-to-date + context.pop() + + elif piece.group('EMPTY_ELT_TAG'): + name = self._XML_TAG_NAME.match(piece.group()).group(1) + if elt_start is None: + if re.match(tagspec, '/'.join(context) + '/' + name): + elts.append((piece.group(), '/'.join(context) + '/' + name)) + + if elt_start is not None: + # If we haven't found any elements yet, then keep + # looping until we do. + if elts == []: + elt_text += xml_fragment[elt_start:] + elt_start = 0 + + # If we've found at least one element, then try + # backtracking to the start of the element that we're + # inside of. + else: + # take back the last start-tag, and return what + # we've gotten so far (elts is non-empty). + if self._DEBUG: + print(' ' * 36 + '(backtrack)') + if isinstance(stream, SeekableUnicodeStreamReader): + stream.seek(startpos) + stream.char_seek_forward(elt_start) + else: + stream.seek(-(len(xml_fragment) - elt_start), 1) + context = context[: elt_depth - 1] + elt_start = elt_depth = None + elt_text = '' + + # Update the _tag_context dict. + pos = stream.tell() + if pos in self._tag_context: + assert tuple(context) == self._tag_context[pos] + else: + self._tag_context[pos] = tuple(context) + + return [ + elt_handler( + ElementTree.fromstring(elt.encode('ascii', 'xmlcharrefreplace')), + context, + ) + for (elt, context) in elts + ] diff --git a/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/ycoe.py b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/ycoe.py new file mode 100644 index 0000000..49a6685 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/corpus/reader/ycoe.py @@ -0,0 +1,261 @@ +# -*- coding: iso-8859-1 -*- + +# Natural Language Toolkit: York-Toronto-Helsinki Parsed Corpus of Old English Prose (YCOE) +# +# Copyright (C) 2001-2015 NLTK Project +# Author: Selina Dennis +# URL: +# For license information, see LICENSE.TXT + +""" +Corpus reader for the York-Toronto-Helsinki Parsed Corpus of Old +English Prose (YCOE), a 1.5 million word syntactically-annotated +corpus of Old English prose texts. The corpus is distributed by the +Oxford Text Archive: http://www.ota.ahds.ac.uk/ It is not included +with NLTK. + +The YCOE corpus is divided into 100 files, each representing +an Old English prose text. Tags used within each text complies +to the YCOE standard: http://www-users.york.ac.uk/~lang22/YCOE/YcoeHome.htm +""" + +import os +import re + +from six import string_types + +from nltk.tokenize import RegexpTokenizer +from nltk.corpus.reader.bracket_parse import BracketParseCorpusReader +from nltk.corpus.reader.tagged import TaggedCorpusReader + +from nltk.corpus.reader.util import * +from nltk.corpus.reader.api import * + + +class YCOECorpusReader(CorpusReader): + """ + Corpus reader for the York-Toronto-Helsinki Parsed Corpus of Old + English Prose (YCOE), a 1.5 million word syntactically-annotated + corpus of Old English prose texts. + """ + + def __init__(self, root, encoding='utf8'): + CorpusReader.__init__(self, root, [], encoding) + + self._psd_reader = YCOEParseCorpusReader( + self.root.join('psd'), '.*', '.psd', encoding=encoding + ) + self._pos_reader = YCOETaggedCorpusReader(self.root.join('pos'), '.*', '.pos') + + # Make sure we have a consistent set of items: + documents = set(f[:-4] for f in self._psd_reader.fileids()) + if set(f[:-4] for f in self._pos_reader.fileids()) != documents: + raise ValueError('Items in "psd" and "pos" ' 'subdirectories do not match.') + + fileids = sorted( + ['%s.psd' % doc for doc in documents] + + ['%s.pos' % doc for doc in documents] + ) + CorpusReader.__init__(self, root, fileids, encoding) + self._documents = sorted(documents) + + def documents(self, fileids=None): + """ + Return a list of document identifiers for all documents in + this corpus, or for the documents with the given file(s) if + specified. + """ + if fileids is None: + return self._documents + if isinstance(fileids, string_types): + fileids = [fileids] + for f in fileids: + if f not in self._fileids: + raise KeyError('File id %s not found' % fileids) + # Strip off the '.pos' and '.psd' extensions. + return sorted(set(f[:-4] for f in fileids)) + + def fileids(self, documents=None): + """ + Return a list of file identifiers for the files that make up + this corpus, or that store the given document(s) if specified. + """ + if documents is None: + return self._fileids + elif isinstance(documents, string_types): + documents = [documents] + return sorted( + set( + ['%s.pos' % doc for doc in documents] + + ['%s.psd' % doc for doc in documents] + ) + ) + + def _getfileids(self, documents, subcorpus): + """ + Helper that selects the appropriate fileids for a given set of + documents from a given subcorpus (pos or psd). + """ + if documents is None: + documents = self._documents + else: + if isinstance(documents, string_types): + documents = [documents] + for document in documents: + if document not in self._documents: + if document[-4:] in ('.pos', '.psd'): + raise ValueError( + 'Expected a document identifier, not a file ' + 'identifier. (Use corpus.documents() to get ' + 'a list of document identifiers.' + ) + else: + raise ValueError('Document identifier %s not found' % document) + return ['%s.%s' % (d, subcorpus) for d in documents] + + # Delegate to one of our two sub-readers: + def words(self, documents=None): + return self._pos_reader.words(self._getfileids(documents, 'pos')) + + def sents(self, documents=None): + return self._pos_reader.sents(self._getfileids(documents, 'pos')) + + def paras(self, documents=None): + return self._pos_reader.paras(self._getfileids(documents, 'pos')) + + def tagged_words(self, documents=None): + return self._pos_reader.tagged_words(self._getfileids(documents, 'pos')) + + def tagged_sents(self, documents=None): + return self._pos_reader.tagged_sents(self._getfileids(documents, 'pos')) + + def tagged_paras(self, documents=None): + return self._pos_reader.tagged_paras(self._getfileids(documents, 'pos')) + + def parsed_sents(self, documents=None): + return self._psd_reader.parsed_sents(self._getfileids(documents, 'psd')) + + +class YCOEParseCorpusReader(BracketParseCorpusReader): + """Specialized version of the standard bracket parse corpus reader + that strips out (CODE ...) and (ID ...) nodes.""" + + def _parse(self, t): + t = re.sub(r'(?u)\((CODE|ID)[^\)]*\)', '', t) + if re.match(r'\s*\(\s*\)\s*$', t): + return None + return BracketParseCorpusReader._parse(self, t) + + +class YCOETaggedCorpusReader(TaggedCorpusReader): + def __init__(self, root, items, encoding='utf8'): + gaps_re = r'(?u)(?<=/\.)\s+|\s*\S*_CODE\s*|\s*\S*_ID\s*' + sent_tokenizer = RegexpTokenizer(gaps_re, gaps=True) + TaggedCorpusReader.__init__( + self, root, items, sep='_', sent_tokenizer=sent_tokenizer + ) + + +#: A list of all documents and their titles in ycoe. +documents = { + 'coadrian.o34': 'Adrian and Ritheus', + 'coaelhom.o3': 'Ælfric, Supplemental Homilies', + 'coaelive.o3': 'Ælfric\'s Lives of Saints', + 'coalcuin': 'Alcuin De virtutibus et vitiis', + 'coalex.o23': 'Alexander\'s Letter to Aristotle', + 'coapollo.o3': 'Apollonius of Tyre', + 'coaugust': 'Augustine', + 'cobede.o2': 'Bede\'s History of the English Church', + 'cobenrul.o3': 'Benedictine Rule', + 'coblick.o23': 'Blickling Homilies', + 'coboeth.o2': 'Boethius\' Consolation of Philosophy', + 'cobyrhtf.o3': 'Byrhtferth\'s Manual', + 'cocanedgD': 'Canons of Edgar (D)', + 'cocanedgX': 'Canons of Edgar (X)', + 'cocathom1.o3': 'Ælfric\'s Catholic Homilies I', + 'cocathom2.o3': 'Ælfric\'s Catholic Homilies II', + 'cochad.o24': 'Saint Chad', + 'cochdrul': 'Chrodegang of Metz, Rule', + 'cochristoph': 'Saint Christopher', + 'cochronA.o23': 'Anglo-Saxon Chronicle A', + 'cochronC': 'Anglo-Saxon Chronicle C', + 'cochronD': 'Anglo-Saxon Chronicle D', + 'cochronE.o34': 'Anglo-Saxon Chronicle E', + 'cocura.o2': 'Cura Pastoralis', + 'cocuraC': 'Cura Pastoralis (Cotton)', + 'codicts.o34': 'Dicts of Cato', + 'codocu1.o1': 'Documents 1 (O1)', + 'codocu2.o12': 'Documents 2 (O1/O2)', + 'codocu2.o2': 'Documents 2 (O2)', + 'codocu3.o23': 'Documents 3 (O2/O3)', + 'codocu3.o3': 'Documents 3 (O3)', + 'codocu4.o24': 'Documents 4 (O2/O4)', + 'coeluc1': 'Honorius of Autun, Elucidarium 1', + 'coeluc2': 'Honorius of Autun, Elucidarium 1', + 'coepigen.o3': 'Ælfric\'s Epilogue to Genesis', + 'coeuphr': 'Saint Euphrosyne', + 'coeust': 'Saint Eustace and his companions', + 'coexodusP': 'Exodus (P)', + 'cogenesiC': 'Genesis (C)', + 'cogregdC.o24': 'Gregory\'s Dialogues (C)', + 'cogregdH.o23': 'Gregory\'s Dialogues (H)', + 'coherbar': 'Pseudo-Apuleius, Herbarium', + 'coinspolD.o34': 'Wulfstan\'s Institute of Polity (D)', + 'coinspolX': 'Wulfstan\'s Institute of Polity (X)', + 'cojames': 'Saint James', + 'colacnu.o23': 'Lacnunga', + 'colaece.o2': 'Leechdoms', + 'colaw1cn.o3': 'Laws, Cnut I', + 'colaw2cn.o3': 'Laws, Cnut II', + 'colaw5atr.o3': 'Laws, Æthelred V', + 'colaw6atr.o3': 'Laws, Æthelred VI', + 'colawaf.o2': 'Laws, Alfred', + 'colawafint.o2': 'Alfred\'s Introduction to Laws', + 'colawger.o34': 'Laws, Gerefa', + 'colawine.ox2': 'Laws, Ine', + 'colawnorthu.o3': 'Northumbra Preosta Lagu', + 'colawwllad.o4': 'Laws, William I, Lad', + 'coleofri.o4': 'Leofric', + 'colsigef.o3': 'Ælfric\'s Letter to Sigefyrth', + 'colsigewB': 'Ælfric\'s Letter to Sigeweard (B)', + 'colsigewZ.o34': 'Ælfric\'s Letter to Sigeweard (Z)', + 'colwgeat': 'Ælfric\'s Letter to Wulfgeat', + 'colwsigeT': 'Ælfric\'s Letter to Wulfsige (T)', + 'colwsigeXa.o34': 'Ælfric\'s Letter to Wulfsige (Xa)', + 'colwstan1.o3': 'Ælfric\'s Letter to Wulfstan I', + 'colwstan2.o3': 'Ælfric\'s Letter to Wulfstan II', + 'comargaC.o34': 'Saint Margaret (C)', + 'comargaT': 'Saint Margaret (T)', + 'comart1': 'Martyrology, I', + 'comart2': 'Martyrology, II', + 'comart3.o23': 'Martyrology, III', + 'comarvel.o23': 'Marvels of the East', + 'comary': 'Mary of Egypt', + 'coneot': 'Saint Neot', + 'conicodA': 'Gospel of Nicodemus (A)', + 'conicodC': 'Gospel of Nicodemus (C)', + 'conicodD': 'Gospel of Nicodemus (D)', + 'conicodE': 'Gospel of Nicodemus (E)', + 'coorosiu.o2': 'Orosius', + 'cootest.o3': 'Heptateuch', + 'coprefcath1.o3': 'Ælfric\'s Preface to Catholic Homilies I', + 'coprefcath2.o3': 'Ælfric\'s Preface to Catholic Homilies II', + 'coprefcura.o2': 'Preface to the Cura Pastoralis', + 'coprefgen.o3': 'Ælfric\'s Preface to Genesis', + 'copreflives.o3': 'Ælfric\'s Preface to Lives of Saints', + 'coprefsolilo': 'Preface to Augustine\'s Soliloquies', + 'coquadru.o23': 'Pseudo-Apuleius, Medicina de quadrupedibus', + 'corood': 'History of the Holy Rood-Tree', + 'cosevensl': 'Seven Sleepers', + 'cosolilo': 'St. Augustine\'s Soliloquies', + 'cosolsat1.o4': 'Solomon and Saturn I', + 'cosolsat2': 'Solomon and Saturn II', + 'cotempo.o3': 'Ælfric\'s De Temporibus Anni', + 'coverhom': 'Vercelli Homilies', + 'coverhomE': 'Vercelli Homilies (E)', + 'coverhomL': 'Vercelli Homilies (L)', + 'covinceB': 'Saint Vincent (Bodley 343)', + 'covinsal': 'Vindicta Salvatoris', + 'cowsgosp.o3': 'West-Saxon Gospels', + 'cowulf.o34': 'Wulfstan\'s Homilies', +} diff --git a/venv.bak/lib/python3.7/site-packages/nltk/corpus/util.py b/venv.bak/lib/python3.7/site-packages/nltk/corpus/util.py new file mode 100644 index 0000000..382edc1 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/corpus/util.py @@ -0,0 +1,156 @@ +# Natural Language Toolkit: Corpus Reader Utility Functions +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Edward Loper +# URL: +# For license information, see LICENSE.TXT + +###################################################################### +# { Lazy Corpus Loader +###################################################################### + +from __future__ import unicode_literals +import re +import gc +import nltk +from nltk.compat import python_2_unicode_compatible + +TRY_ZIPFILE_FIRST = False + + +@python_2_unicode_compatible +class LazyCorpusLoader(object): + """ + To see the API documentation for this lazily loaded corpus, first + run corpus.ensure_loaded(), and then run help(this_corpus). + + LazyCorpusLoader is a proxy object which is used to stand in for a + corpus object before the corpus is loaded. This allows NLTK to + create an object for each corpus, but defer the costs associated + with loading those corpora until the first time that they're + actually accessed. + + The first time this object is accessed in any way, it will load + the corresponding corpus, and transform itself into that corpus + (by modifying its own ``__class__`` and ``__dict__`` attributes). + + If the corpus can not be found, then accessing this object will + raise an exception, displaying installation instructions for the + NLTK data package. Once they've properly installed the data + package (or modified ``nltk.data.path`` to point to its location), + they can then use the corpus object without restarting python. + + :param name: The name of the corpus + :type name: str + :param reader_cls: The specific CorpusReader class, e.g. PlaintextCorpusReader, WordListCorpusReader + :type reader: nltk.corpus.reader.api.CorpusReader + :param nltk_data_subdir: The subdirectory where the corpus is stored. + :type nltk_data_subdir: str + :param *args: Any other non-keywords arguments that `reader_cls` might need. + :param *kargs: Any other keywords arguments that `reader_cls` might need. + """ + + def __init__(self, name, reader_cls, *args, **kwargs): + from nltk.corpus.reader.api import CorpusReader + + assert issubclass(reader_cls, CorpusReader) + self.__name = self.__name__ = name + self.__reader_cls = reader_cls + # If nltk_data_subdir is set explicitly + if 'nltk_data_subdir' in kwargs: + # Use the specified subdirectory path + self.subdir = kwargs['nltk_data_subdir'] + # Pops the `nltk_data_subdir` argument, we don't need it anymore. + kwargs.pop('nltk_data_subdir', None) + else: # Otherwise use 'nltk_data/corpora' + self.subdir = 'corpora' + self.__args = args + self.__kwargs = kwargs + + def __load(self): + # Find the corpus root directory. + zip_name = re.sub(r'(([^/]+)(/.*)?)', r'\2.zip/\1/', self.__name) + if TRY_ZIPFILE_FIRST: + try: + root = nltk.data.find('{}/{}'.format(self.subdir, zip_name)) + except LookupError as e: + try: + root = nltk.data.find('{}/{}'.format(self.subdir, self.__name)) + except LookupError: + raise e + else: + try: + root = nltk.data.find('{}/{}'.format(self.subdir, self.__name)) + except LookupError as e: + try: + root = nltk.data.find('{}/{}'.format(self.subdir, zip_name)) + except LookupError: + raise e + + # Load the corpus. + corpus = self.__reader_cls(root, *self.__args, **self.__kwargs) + + # This is where the magic happens! Transform ourselves into + # the corpus by modifying our own __dict__ and __class__ to + # match that of the corpus. + + args, kwargs = self.__args, self.__kwargs + name, reader_cls = self.__name, self.__reader_cls + + self.__dict__ = corpus.__dict__ + self.__class__ = corpus.__class__ + + # _unload support: assign __dict__ and __class__ back, then do GC. + # after reassigning __dict__ there shouldn't be any references to + # corpus data so the memory should be deallocated after gc.collect() + def _unload(self): + lazy_reader = LazyCorpusLoader(name, reader_cls, *args, **kwargs) + self.__dict__ = lazy_reader.__dict__ + self.__class__ = lazy_reader.__class__ + gc.collect() + + self._unload = _make_bound_method(_unload, self) + + def __getattr__(self, attr): + + # Fix for inspect.isclass under Python 2.6 + # (see http://bugs.python.org/issue1225107). + # Without this fix tests may take extra 1.5GB RAM + # because all corpora gets loaded during test collection. + if attr == '__bases__': + raise AttributeError("LazyCorpusLoader object has no attribute '__bases__'") + + self.__load() + # This looks circular, but its not, since __load() changes our + # __class__ to something new: + return getattr(self, attr) + + def __repr__(self): + return '<%s in %r (not loaded yet)>' % ( + self.__reader_cls.__name__, + '.../corpora/' + self.__name, + ) + + def _unload(self): + # If an exception occures during corpus loading then + # '_unload' method may be unattached, so __getattr__ can be called; + # we shouldn't trigger corpus loading again in this case. + pass + + +def _make_bound_method(func, self): + """ + Magic for creating bound methods (used for _unload). + """ + + class Foo(object): + def meth(self): + pass + + f = Foo() + bound_method = type(f.meth) + + try: + return bound_method(func, self, self.__class__) + except TypeError: # python3 + return bound_method(func, self) diff --git a/venv.bak/lib/python3.7/site-packages/nltk/data.py b/venv.bak/lib/python3.7/site-packages/nltk/data.py new file mode 100644 index 0000000..67ac9b5 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/data.py @@ -0,0 +1,1555 @@ +# Natural Language Toolkit: Utility functions +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Edward Loper +# URL: +# For license information, see LICENSE.TXT + +""" +Functions to find and load NLTK resource files, such as corpora, +grammars, and saved processing objects. Resource files are identified +using URLs, such as ``nltk:corpora/abc/rural.txt`` or +``http://nltk.org/sample/toy.cfg``. The following URL protocols are +supported: + + - ``file:path``: Specifies the file whose path is *path*. + Both relative and absolute paths may be used. + + - ``http://host/path``: Specifies the file stored on the web + server *host* at path *path*. + + - ``nltk:path``: Specifies the file stored in the NLTK data + package at *path*. NLTK will search for these files in the + directories specified by ``nltk.data.path``. + +If no protocol is specified, then the default protocol ``nltk:`` will +be used. + +This module provides to functions that can be used to access a +resource file, given its URL: ``load()`` loads a given resource, and +adds it to a resource cache; and ``retrieve()`` copies a given resource +to a local file. +""" +from __future__ import print_function, unicode_literals, division + +import functools +import textwrap +import io +import os +import re +import sys +import zipfile +import codecs + +from abc import ABCMeta, abstractmethod +from gzip import GzipFile, WRITE as GZ_WRITE + +from six import add_metaclass +from six import string_types, text_type +from six.moves.urllib.request import urlopen, url2pathname + +try: + import cPickle as pickle +except ImportError: + import pickle + +try: # Python 3. + textwrap_indent = functools.partial(textwrap.indent, prefix=' ') +except AttributeError: # Python 2; indent() not available for Python2. + textwrap_fill = functools.partial( + textwrap.fill, + initial_indent=' ', + subsequent_indent=' ', + replace_whitespace=False, + ) + + def textwrap_indent(text): + return '\n'.join(textwrap_fill(line) for line in text.splitlines()) + + +try: + from zlib import Z_SYNC_FLUSH as FLUSH +except ImportError: + from zlib import Z_FINISH as FLUSH + +# this import should be more specific: +import nltk +from nltk.compat import py3_data, add_py3_data, BytesIO + +###################################################################### +# Search Path +###################################################################### + +path = [] +"""A list of directories where the NLTK data package might reside. + These directories will be checked in order when looking for a + resource in the data package. Note that this allows users to + substitute in their own versions of resources, if they have them + (e.g., in their home directory under ~/nltk_data).""" + +# User-specified locations: +_paths_from_env = os.environ.get('NLTK_DATA', str('')).split(os.pathsep) +path += [d for d in _paths_from_env if d] +if 'APPENGINE_RUNTIME' not in os.environ and os.path.expanduser('~/') != '~/': + path.append(os.path.expanduser(str('~/nltk_data'))) + +if sys.platform.startswith('win'): + # Common locations on Windows: + path += [ + os.path.join(sys.prefix, str('nltk_data')), + os.path.join(sys.prefix, str('share'), str('nltk_data')), + os.path.join(sys.prefix, str('lib'), str('nltk_data')), + os.path.join(os.environ.get(str('APPDATA'), str('C:\\')), str('nltk_data')), + str(r'C:\nltk_data'), + str(r'D:\nltk_data'), + str(r'E:\nltk_data'), + ] +else: + # Common locations on UNIX & OS X: + path += [ + os.path.join(sys.prefix, str('nltk_data')), + os.path.join(sys.prefix, str('share'), str('nltk_data')), + os.path.join(sys.prefix, str('lib'), str('nltk_data')), + str('/usr/share/nltk_data'), + str('/usr/local/share/nltk_data'), + str('/usr/lib/nltk_data'), + str('/usr/local/lib/nltk_data'), + ] + + +###################################################################### +# Util Functions +###################################################################### + + +def gzip_open_unicode( + filename, + mode="rb", + compresslevel=9, + encoding='utf-8', + fileobj=None, + errors=None, + newline=None, +): + if fileobj is None: + fileobj = GzipFile(filename, mode, compresslevel, fileobj) + return io.TextIOWrapper(fileobj, encoding, errors, newline) + + +def split_resource_url(resource_url): + """ + Splits a resource url into ":". + + >>> windows = sys.platform.startswith('win') + >>> split_resource_url('nltk:home/nltk') + ('nltk', 'home/nltk') + >>> split_resource_url('nltk:/home/nltk') + ('nltk', '/home/nltk') + >>> split_resource_url('file:/home/nltk') + ('file', '/home/nltk') + >>> split_resource_url('file:///home/nltk') + ('file', '/home/nltk') + >>> split_resource_url('file:///C:/home/nltk') + ('file', '/C:/home/nltk') + """ + protocol, path_ = resource_url.split(':', 1) + if protocol == 'nltk': + pass + elif protocol == 'file': + if path_.startswith('/'): + path_ = '/' + path_.lstrip('/') + else: + path_ = re.sub(r'^/{0,2}', '', path_) + return protocol, path_ + + +def normalize_resource_url(resource_url): + r""" + Normalizes a resource url + + >>> windows = sys.platform.startswith('win') + >>> os.path.normpath(split_resource_url(normalize_resource_url('file:grammar.fcfg'))[1]) == \ + ... ('\\' if windows else '') + os.path.abspath(os.path.join(os.curdir, 'grammar.fcfg')) + True + >>> not windows or normalize_resource_url('file:C:/dir/file') == 'file:///C:/dir/file' + True + >>> not windows or normalize_resource_url('file:C:\\dir\\file') == 'file:///C:/dir/file' + True + >>> not windows or normalize_resource_url('file:C:\\dir/file') == 'file:///C:/dir/file' + True + >>> not windows or normalize_resource_url('file://C:/dir/file') == 'file:///C:/dir/file' + True + >>> not windows or normalize_resource_url('file:////C:/dir/file') == 'file:///C:/dir/file' + True + >>> not windows or normalize_resource_url('nltk:C:/dir/file') == 'file:///C:/dir/file' + True + >>> not windows or normalize_resource_url('nltk:C:\\dir\\file') == 'file:///C:/dir/file' + True + >>> windows or normalize_resource_url('file:/dir/file/toy.cfg') == 'file:///dir/file/toy.cfg' + True + >>> normalize_resource_url('nltk:home/nltk') + 'nltk:home/nltk' + >>> windows or normalize_resource_url('nltk:/home/nltk') == 'file:///home/nltk' + True + >>> normalize_resource_url('http://example.com/dir/file') + 'http://example.com/dir/file' + >>> normalize_resource_url('dir/file') + 'nltk:dir/file' + """ + try: + protocol, name = split_resource_url(resource_url) + except ValueError: + # the resource url has no protocol, use the nltk protocol by default + protocol = 'nltk' + name = resource_url + # use file protocol if the path is an absolute path + if protocol == 'nltk' and os.path.isabs(name): + protocol = 'file://' + name = normalize_resource_name(name, False, None) + elif protocol == 'file': + protocol = 'file://' + # name is absolute + name = normalize_resource_name(name, False, None) + elif protocol == 'nltk': + protocol = 'nltk:' + name = normalize_resource_name(name, True) + else: + # handled by urllib + protocol += '://' + return ''.join([protocol, name]) + + +def normalize_resource_name(resource_name, allow_relative=True, relative_path=None): + """ + :type resource_name: str or unicode + :param resource_name: The name of the resource to search for. + Resource names are posix-style relative path names, such as + ``corpora/brown``. Directory names will automatically + be converted to a platform-appropriate path separator. + Directory trailing slashes are preserved + + >>> windows = sys.platform.startswith('win') + >>> normalize_resource_name('.', True) + './' + >>> normalize_resource_name('./', True) + './' + >>> windows or normalize_resource_name('dir/file', False, '/') == '/dir/file' + True + >>> not windows or normalize_resource_name('C:/file', False, '/') == '/C:/file' + True + >>> windows or normalize_resource_name('/dir/file', False, '/') == '/dir/file' + True + >>> windows or normalize_resource_name('../dir/file', False, '/') == '/dir/file' + True + >>> not windows or normalize_resource_name('/dir/file', True, '/') == 'dir/file' + True + >>> windows or normalize_resource_name('/dir/file', True, '/') == '/dir/file' + True + """ + is_dir = bool(re.search(r'[\\/.]$', resource_name)) or resource_name.endswith( + os.path.sep + ) + if sys.platform.startswith('win'): + resource_name = resource_name.lstrip('/') + else: + resource_name = re.sub(r'^/+', '/', resource_name) + if allow_relative: + resource_name = os.path.normpath(resource_name) + else: + if relative_path is None: + relative_path = os.curdir + resource_name = os.path.abspath(os.path.join(relative_path, resource_name)) + resource_name = resource_name.replace('\\', '/').replace(os.path.sep, '/') + if sys.platform.startswith('win') and os.path.isabs(resource_name): + resource_name = '/' + resource_name + if is_dir and not resource_name.endswith('/'): + resource_name += '/' + return resource_name + + +###################################################################### +# Path Pointers +###################################################################### + + +@add_metaclass(ABCMeta) +class PathPointer(object): + """ + An abstract base class for 'path pointers,' used by NLTK's data + package to identify specific paths. Two subclasses exist: + ``FileSystemPathPointer`` identifies a file that can be accessed + directly via a given absolute path. ``ZipFilePathPointer`` + identifies a file contained within a zipfile, that can be accessed + by reading that zipfile. + """ + + @abstractmethod + def open(self, encoding=None): + """ + Return a seekable read-only stream that can be used to read + the contents of the file identified by this path pointer. + + :raise IOError: If the path specified by this pointer does + not contain a readable file. + """ + + @abstractmethod + def file_size(self): + """ + Return the size of the file pointed to by this path pointer, + in bytes. + + :raise IOError: If the path specified by this pointer does + not contain a readable file. + """ + + @abstractmethod + def join(self, fileid): + """ + Return a new path pointer formed by starting at the path + identified by this pointer, and then following the relative + path given by ``fileid``. The path components of ``fileid`` + should be separated by forward slashes, regardless of + the underlying file system's path seperator character. + """ + + +class FileSystemPathPointer(PathPointer, text_type): + """ + A path pointer that identifies a file which can be accessed + directly via a given absolute path. + """ + + @py3_data + def __init__(self, _path): + """ + Create a new path pointer for the given absolute path. + + :raise IOError: If the given path does not exist. + """ + + _path = os.path.abspath(_path) + if not os.path.exists(_path): + raise IOError('No such file or directory: %r' % _path) + self._path = _path + + # There's no need to call str.__init__(), since it's a no-op; + # str does all of its setup work in __new__. + + @property + def path(self): + """The absolute path identified by this path pointer.""" + return self._path + + def open(self, encoding=None): + stream = open(self._path, 'rb') + if encoding is not None: + stream = SeekableUnicodeStreamReader(stream, encoding) + return stream + + def file_size(self): + return os.stat(self._path).st_size + + def join(self, fileid): + _path = os.path.join(self._path, fileid) + return FileSystemPathPointer(_path) + + def __repr__(self): + # This should be a byte string under Python 2.x; + # we don't want transliteration here so + # @python_2_unicode_compatible is not used. + return str('FileSystemPathPointer(%r)' % self._path) + + def __str__(self): + return self._path + + +class BufferedGzipFile(GzipFile): + """ + A ``GzipFile`` subclass that buffers calls to ``read()`` and ``write()``. + This allows faster reads and writes of data to and from gzip-compressed + files at the cost of using more memory. + + The default buffer size is 2MB. + + ``BufferedGzipFile`` is useful for loading large gzipped pickle objects + as well as writing large encoded feature files for classifier training. + """ + + MB = 2 ** 20 + SIZE = 2 * MB + + @py3_data + def __init__( + self, filename=None, mode=None, compresslevel=9, fileobj=None, **kwargs + ): + """ + Return a buffered gzip file object. + + :param filename: a filesystem path + :type filename: str + :param mode: a file mode which can be any of 'r', 'rb', 'a', 'ab', + 'w', or 'wb' + :type mode: str + :param compresslevel: The compresslevel argument is an integer from 1 + to 9 controlling the level of compression; 1 is fastest and + produces the least compression, and 9 is slowest and produces the + most compression. The default is 9. + :type compresslevel: int + :param fileobj: a BytesIO stream to read from instead of a file. + :type fileobj: BytesIO + :param size: number of bytes to buffer during calls to read() and write() + :type size: int + :rtype: BufferedGzipFile + """ + GzipFile.__init__(self, filename, mode, compresslevel, fileobj) + self._size = kwargs.get('size', self.SIZE) + self._nltk_buffer = BytesIO() + # cStringIO does not support len. + self._len = 0 + + def _reset_buffer(self): + # For some reason calling BytesIO.truncate() here will lead to + # inconsistent writes so just set _buffer to a new BytesIO object. + self._nltk_buffer = BytesIO() + self._len = 0 + + def _write_buffer(self, data): + # Simply write to the buffer and increment the buffer size. + if data is not None: + self._nltk_buffer.write(data) + self._len += len(data) + + def _write_gzip(self, data): + # Write the current buffer to the GzipFile. + GzipFile.write(self, self._nltk_buffer.getvalue()) + # Then reset the buffer and write the new data to the buffer. + self._reset_buffer() + self._write_buffer(data) + + def close(self): + # GzipFile.close() doesn't actuallly close anything. + if self.mode == GZ_WRITE: + self._write_gzip(None) + self._reset_buffer() + return GzipFile.close(self) + + def flush(self, lib_mode=FLUSH): + self._nltk_buffer.flush() + GzipFile.flush(self, lib_mode) + + def read(self, size=None): + if not size: + size = self._size + contents = BytesIO() + while True: + blocks = GzipFile.read(self, size) + if not blocks: + contents.flush() + break + contents.write(blocks) + return contents.getvalue() + else: + return GzipFile.read(self, size) + + def write(self, data, size=-1): + """ + :param data: bytes to write to file or buffer + :type data: bytes + :param size: buffer at least size bytes before writing to file + :type size: int + """ + if not size: + size = self._size + if self._len + len(data) <= size: + self._write_buffer(data) + else: + self._write_gzip(data) + + +class GzipFileSystemPathPointer(FileSystemPathPointer): + """ + A subclass of ``FileSystemPathPointer`` that identifies a gzip-compressed + file located at a given absolute path. ``GzipFileSystemPathPointer`` is + appropriate for loading large gzip-compressed pickle objects efficiently. + """ + + def open(self, encoding=None): + # Note: In >= Python3.5, GzipFile is already using a + # buffered reader in the backend which has a variable self._buffer + # See https://github.com/nltk/nltk/issues/1308 + if sys.version.startswith('2.7') or sys.version.startswith('3.4'): + stream = BufferedGzipFile(self._path, 'rb') + else: + stream = GzipFile(self._path, 'rb') + if encoding: + stream = SeekableUnicodeStreamReader(stream, encoding) + return stream + + +class ZipFilePathPointer(PathPointer): + """ + A path pointer that identifies a file contained within a zipfile, + which can be accessed by reading that zipfile. + """ + + @py3_data + def __init__(self, zipfile, entry=''): + """ + Create a new path pointer pointing at the specified entry + in the given zipfile. + + :raise IOError: If the given zipfile does not exist, or if it + does not contain the specified entry. + """ + if isinstance(zipfile, string_types): + zipfile = OpenOnDemandZipFile(os.path.abspath(zipfile)) + + # Check that the entry exists: + if entry: + + # Normalize the entry string, it should be relative: + entry = normalize_resource_name(entry, True, '/').lstrip('/') + + try: + zipfile.getinfo(entry) + except Exception: + # Sometimes directories aren't explicitly listed in + # the zip file. So if `entry` is a directory name, + # then check if the zipfile contains any files that + # are under the given directory. + if entry.endswith('/') and [ + n for n in zipfile.namelist() if n.startswith(entry) + ]: + pass # zipfile contains a file in that directory. + else: + # Otherwise, complain. + raise IOError( + 'Zipfile %r does not contain %r' % (zipfile.filename, entry) + ) + self._zipfile = zipfile + self._entry = entry + + @property + def zipfile(self): + """ + The zipfile.ZipFile object used to access the zip file + containing the entry identified by this path pointer. + """ + return self._zipfile + + @property + def entry(self): + """ + The name of the file within zipfile that this path + pointer points to. + """ + return self._entry + + def open(self, encoding=None): + data = self._zipfile.read(self._entry) + stream = BytesIO(data) + if self._entry.endswith('.gz'): + # Note: In >= Python3.5, GzipFile is already using a + # buffered reader in the backend which has a variable self._buffer + # See https://github.com/nltk/nltk/issues/1308 + if sys.version.startswith('2.7') or sys.version.startswith('3.4'): + stream = BufferedGzipFile(self._entry, fileobj=stream) + else: + stream = GzipFile(self._entry, fileobj=stream) + elif encoding is not None: + stream = SeekableUnicodeStreamReader(stream, encoding) + return stream + + def file_size(self): + return self._zipfile.getinfo(self._entry).file_size + + def join(self, fileid): + entry = '%s/%s' % (self._entry, fileid) + return ZipFilePathPointer(self._zipfile, entry) + + def __repr__(self): + return str('ZipFilePathPointer(%r, %r)') % (self._zipfile.filename, self._entry) + + def __str__(self): + return os.path.normpath(os.path.join(self._zipfile.filename, self._entry)) + + +###################################################################### +# Access Functions +###################################################################### + +# Don't use a weak dictionary, because in the common case this +# causes a lot more reloading that necessary. +_resource_cache = {} +"""A dictionary used to cache resources so that they won't + need to be loaded more than once.""" + + +def find(resource_name, paths=None): + """ + Find the given resource by searching through the directories and + zip files in paths, where a None or empty string specifies an absolute path. + Returns a corresponding path name. If the given resource is not + found, raise a ``LookupError``, whose message gives a pointer to + the installation instructions for the NLTK downloader. + + Zip File Handling: + + - If ``resource_name`` contains a component with a ``.zip`` + extension, then it is assumed to be a zipfile; and the + remaining path components are used to look inside the zipfile. + + - If any element of ``nltk.data.path`` has a ``.zip`` extension, + then it is assumed to be a zipfile. + + - If a given resource name that does not contain any zipfile + component is not found initially, then ``find()`` will make a + second attempt to find that resource, by replacing each + component *p* in the path with *p.zip/p*. For example, this + allows ``find()`` to map the resource name + ``corpora/chat80/cities.pl`` to a zip file path pointer to + ``corpora/chat80.zip/chat80/cities.pl``. + + - When using ``find()`` to locate a directory contained in a + zipfile, the resource name must end with the forward slash + character. Otherwise, ``find()`` will not locate the + directory. + + :type resource_name: str or unicode + :param resource_name: The name of the resource to search for. + Resource names are posix-style relative path names, such as + ``corpora/brown``. Directory names will be + automatically converted to a platform-appropriate path separator. + :rtype: str + """ + resource_name = normalize_resource_name(resource_name, True) + + # Resolve default paths at runtime in-case the user overrides + # nltk.data.path + if paths is None: + paths = path + + # Check if the resource name includes a zipfile name + m = re.match(r'(.*\.zip)/?(.*)$|', resource_name) + zipfile, zipentry = m.groups() + + # Check each item in our path + for path_ in paths: + # Is the path item a zipfile? + if path_ and (os.path.isfile(path_) and path_.endswith('.zip')): + try: + return ZipFilePathPointer(path_, resource_name) + except IOError: + # resource not in zipfile + continue + + # Is the path item a directory or is resource_name an absolute path? + elif not path_ or os.path.isdir(path_): + if zipfile is None: + p = os.path.join(path_, url2pathname(resource_name)) + if os.path.exists(p): + if p.endswith('.gz'): + return GzipFileSystemPathPointer(p) + else: + return FileSystemPathPointer(p) + else: + p = os.path.join(path_, url2pathname(zipfile)) + if os.path.exists(p): + try: + return ZipFilePathPointer(p, zipentry) + except IOError: + # resource not in zipfile + continue + + # Fallback: if the path doesn't include a zip file, then try + # again, assuming that one of the path components is inside a + # zipfile of the same name. + if zipfile is None: + pieces = resource_name.split('/') + for i in range(len(pieces)): + modified_name = '/'.join(pieces[:i] + [pieces[i] + '.zip'] + pieces[i:]) + try: + return find(modified_name, paths) + except LookupError: + pass + + # Identify the package (i.e. the .zip file) to download. + resource_zipname = resource_name.split('/')[1] + if resource_zipname.endswith('.zip'): + resource_zipname = resource_zipname.rpartition('.')[0] + # Display a friendly error message if the resource wasn't found: + msg = str( + "Resource \33[93m{resource}\033[0m not found.\n" + "Please use the NLTK Downloader to obtain the resource:\n\n" + "\33[31m" # To display red text in terminal. + ">>> import nltk\n" + ">>> nltk.download(\'{resource}\')\n" + "\033[0m" + ).format(resource=resource_zipname) + msg = textwrap_indent(msg) + + msg += '\n For more information see: https://www.nltk.org/data.html\n' + + msg += '\n Attempted to load \33[93m{resource_name}\033[0m\n'.format( + resource_name=resource_name + ) + + msg += '\n Searched in:' + ''.join('\n - %r' % d for d in paths) + sep = '*' * 70 + resource_not_found = '\n%s\n%s\n%s\n' % (sep, msg, sep) + raise LookupError(resource_not_found) + + +def retrieve(resource_url, filename=None, verbose=True): + """ + Copy the given resource to a local file. If no filename is + specified, then use the URL's filename. If there is already a + file named ``filename``, then raise a ``ValueError``. + + :type resource_url: str + :param resource_url: A URL specifying where the resource should be + loaded from. The default protocol is "nltk:", which searches + for the file in the the NLTK data package. + """ + resource_url = normalize_resource_url(resource_url) + if filename is None: + if resource_url.startswith('file:'): + filename = os.path.split(resource_url)[-1] + else: + filename = re.sub(r'(^\w+:)?.*/', '', resource_url) + if os.path.exists(filename): + filename = os.path.abspath(filename) + raise ValueError("File %r already exists!" % filename) + + if verbose: + print('Retrieving %r, saving to %r' % (resource_url, filename)) + + # Open the input & output streams. + infile = _open(resource_url) + + # Copy infile -> outfile, using 64k blocks. + with open(filename, "wb") as outfile: + while True: + s = infile.read(1024 * 64) # 64k blocks. + outfile.write(s) + if not s: + break + + infile.close() + + +#: A dictionary describing the formats that are supported by NLTK's +#: load() method. Keys are format names, and values are format +#: descriptions. +FORMATS = { + 'pickle': "A serialized python object, stored using the pickle module.", + 'json': "A serialized python object, stored using the json module.", + 'yaml': "A serialized python object, stored using the yaml module.", + 'cfg': "A context free grammar.", + 'pcfg': "A probabilistic CFG.", + 'fcfg': "A feature CFG.", + 'fol': "A list of first order logic expressions, parsed with " + "nltk.sem.logic.Expression.fromstring.", + 'logic': "A list of first order logic expressions, parsed with " + "nltk.sem.logic.LogicParser. Requires an additional logic_parser " + "parameter", + 'val': "A semantic valuation, parsed by nltk.sem.Valuation.fromstring.", + 'raw': "The raw (byte string) contents of a file.", + 'text': "The raw (unicode string) contents of a file. ", +} + +#: A dictionary mapping from file extensions to format names, used +#: by load() when format="auto" to decide the format for a +#: given resource url. +AUTO_FORMATS = { + 'pickle': 'pickle', + 'json': 'json', + 'yaml': 'yaml', + 'cfg': 'cfg', + 'pcfg': 'pcfg', + 'fcfg': 'fcfg', + 'fol': 'fol', + 'logic': 'logic', + 'val': 'val', + 'txt': 'text', + 'text': 'text', +} + + +def load( + resource_url, + format='auto', + cache=True, + verbose=False, + logic_parser=None, + fstruct_reader=None, + encoding=None, +): + """ + Load a given resource from the NLTK data package. The following + resource formats are currently supported: + + - ``pickle`` + - ``json`` + - ``yaml`` + - ``cfg`` (context free grammars) + - ``pcfg`` (probabilistic CFGs) + - ``fcfg`` (feature-based CFGs) + - ``fol`` (formulas of First Order Logic) + - ``logic`` (Logical formulas to be parsed by the given logic_parser) + - ``val`` (valuation of First Order Logic model) + - ``text`` (the file contents as a unicode string) + - ``raw`` (the raw file contents as a byte string) + + If no format is specified, ``load()`` will attempt to determine a + format based on the resource name's file extension. If that + fails, ``load()`` will raise a ``ValueError`` exception. + + For all text formats (everything except ``pickle``, ``json``, ``yaml`` and ``raw``), + it tries to decode the raw contents using UTF-8, and if that doesn't + work, it tries with ISO-8859-1 (Latin-1), unless the ``encoding`` + is specified. + + :type resource_url: str + :param resource_url: A URL specifying where the resource should be + loaded from. The default protocol is "nltk:", which searches + for the file in the the NLTK data package. + :type cache: bool + :param cache: If true, add this resource to a cache. If load() + finds a resource in its cache, then it will return it from the + cache rather than loading it. + :type verbose: bool + :param verbose: If true, print a message when loading a resource. + Messages are not displayed when a resource is retrieved from + the cache. + :type logic_parser: LogicParser + :param logic_parser: The parser that will be used to parse logical + expressions. + :type fstruct_reader: FeatStructReader + :param fstruct_reader: The parser that will be used to parse the + feature structure of an fcfg. + :type encoding: str + :param encoding: the encoding of the input; only used for text formats. + """ + resource_url = normalize_resource_url(resource_url) + resource_url = add_py3_data(resource_url) + + # Determine the format of the resource. + if format == 'auto': + resource_url_parts = resource_url.split('.') + ext = resource_url_parts[-1] + if ext == 'gz': + ext = resource_url_parts[-2] + format = AUTO_FORMATS.get(ext) + if format is None: + raise ValueError( + 'Could not determine format for %s based ' + 'on its file\nextension; use the "format" ' + 'argument to specify the format explicitly.' % resource_url + ) + + if format not in FORMATS: + raise ValueError('Unknown format type: %s!' % (format,)) + + # If we've cached the resource, then just return it. + if cache: + resource_val = _resource_cache.get((resource_url, format)) + if resource_val is not None: + if verbose: + print('<>' % (resource_url,)) + return resource_val + + # Let the user know what's going on. + if verbose: + print('<>' % (resource_url,)) + + # Load the resource. + opened_resource = _open(resource_url) + + if format == 'raw': + resource_val = opened_resource.read() + elif format == 'pickle': + resource_val = pickle.load(opened_resource) + elif format == 'json': + import json + from nltk.jsontags import json_tags + + resource_val = json.load(opened_resource) + tag = None + if len(resource_val) != 1: + tag = next(resource_val.keys()) + if tag not in json_tags: + raise ValueError('Unknown json tag.') + elif format == 'yaml': + import yaml + + resource_val = yaml.load(opened_resource) + else: + # The resource is a text format. + binary_data = opened_resource.read() + if encoding is not None: + string_data = binary_data.decode(encoding) + else: + try: + string_data = binary_data.decode('utf-8') + except UnicodeDecodeError: + string_data = binary_data.decode('latin-1') + if format == 'text': + resource_val = string_data + elif format == 'cfg': + resource_val = nltk.grammar.CFG.fromstring(string_data, encoding=encoding) + elif format == 'pcfg': + resource_val = nltk.grammar.PCFG.fromstring(string_data, encoding=encoding) + elif format == 'fcfg': + resource_val = nltk.grammar.FeatureGrammar.fromstring( + string_data, + logic_parser=logic_parser, + fstruct_reader=fstruct_reader, + encoding=encoding, + ) + elif format == 'fol': + resource_val = nltk.sem.read_logic( + string_data, + logic_parser=nltk.sem.logic.LogicParser(), + encoding=encoding, + ) + elif format == 'logic': + resource_val = nltk.sem.read_logic( + string_data, logic_parser=logic_parser, encoding=encoding + ) + elif format == 'val': + resource_val = nltk.sem.read_valuation(string_data, encoding=encoding) + else: + raise AssertionError( + "Internal NLTK error: Format %s isn't " + "handled by nltk.data.load()" % (format,) + ) + + opened_resource.close() + + # If requested, add it to the cache. + if cache: + try: + _resource_cache[(resource_url, format)] = resource_val + # TODO: add this line + # print('<>' % (resource_url,)) + except TypeError: + # We can't create weak references to some object types, like + # strings and tuples. For now, just don't cache them. + pass + + return resource_val + + +def show_cfg(resource_url, escape='##'): + """ + Write out a grammar file, ignoring escaped and empty lines. + + :type resource_url: str + :param resource_url: A URL specifying where the resource should be + loaded from. The default protocol is "nltk:", which searches + for the file in the the NLTK data package. + :type escape: str + :param escape: Prepended string that signals lines to be ignored + """ + resource_url = normalize_resource_url(resource_url) + resource_val = load(resource_url, format='text', cache=False) + lines = resource_val.splitlines() + for l in lines: + if l.startswith(escape): + continue + if re.match('^$', l): + continue + print(l) + + +def clear_cache(): + """ + Remove all objects from the resource cache. + :see: load() + """ + _resource_cache.clear() + + +def _open(resource_url): + """ + Helper function that returns an open file object for a resource, + given its resource URL. If the given resource URL uses the "nltk:" + protocol, or uses no protocol, then use ``nltk.data.find`` to find + its path, and open it with the given mode; if the resource URL + uses the 'file' protocol, then open the file with the given mode; + otherwise, delegate to ``urllib2.urlopen``. + + :type resource_url: str + :param resource_url: A URL specifying where the resource should be + loaded from. The default protocol is "nltk:", which searches + for the file in the the NLTK data package. + """ + resource_url = normalize_resource_url(resource_url) + protocol, path_ = split_resource_url(resource_url) + + if protocol is None or protocol.lower() == 'nltk': + return find(path_, path + ['']).open() + elif protocol.lower() == 'file': + # urllib might not use mode='rb', so handle this one ourselves: + return find(path_, ['']).open() + else: + return urlopen(resource_url) + + +###################################################################### +# Lazy Resource Loader +###################################################################### + +# We shouldn't apply @python_2_unicode_compatible +# decorator to LazyLoader, this is resource.__class__ responsibility. + + +class LazyLoader(object): + @py3_data + def __init__(self, _path): + self._path = _path + + def __load(self): + resource = load(self._path) + # This is where the magic happens! Transform ourselves into + # the object by modifying our own __dict__ and __class__ to + # match that of `resource`. + self.__dict__ = resource.__dict__ + self.__class__ = resource.__class__ + + def __getattr__(self, attr): + self.__load() + # This looks circular, but its not, since __load() changes our + # __class__ to something new: + return getattr(self, attr) + + def __repr__(self): + self.__load() + # This looks circular, but its not, since __load() changes our + # __class__ to something new: + return repr(self) + + +###################################################################### +# Open-On-Demand ZipFile +###################################################################### + + +class OpenOnDemandZipFile(zipfile.ZipFile): + """ + A subclass of ``zipfile.ZipFile`` that closes its file pointer + whenever it is not using it; and re-opens it when it needs to read + data from the zipfile. This is useful for reducing the number of + open file handles when many zip files are being accessed at once. + ``OpenOnDemandZipFile`` must be constructed from a filename, not a + file-like object (to allow re-opening). ``OpenOnDemandZipFile`` is + read-only (i.e. ``write()`` and ``writestr()`` are disabled. + """ + + @py3_data + def __init__(self, filename): + if not isinstance(filename, string_types): + raise TypeError('ReopenableZipFile filename must be a string') + zipfile.ZipFile.__init__(self, filename) + assert self.filename == filename + self.close() + # After closing a ZipFile object, the _fileRefCnt needs to be cleared + # for Python2and3 compatible code. + self._fileRefCnt = 0 + + def read(self, name): + assert self.fp is None + self.fp = open(self.filename, 'rb') + value = zipfile.ZipFile.read(self, name) + # Ensure that _fileRefCnt needs to be set for Python2and3 compatible code. + # Since we only opened one file here, we add 1. + self._fileRefCnt += 1 + self.close() + return value + + def write(self, *args, **kwargs): + """:raise NotImplementedError: OpenOnDemandZipfile is read-only""" + raise NotImplementedError('OpenOnDemandZipfile is read-only') + + def writestr(self, *args, **kwargs): + """:raise NotImplementedError: OpenOnDemandZipfile is read-only""" + raise NotImplementedError('OpenOnDemandZipfile is read-only') + + def __repr__(self): + return repr(str('OpenOnDemandZipFile(%r)') % self.filename) + + +###################################################################### +# { Seekable Unicode Stream Reader +###################################################################### + + +class SeekableUnicodeStreamReader(object): + """ + A stream reader that automatically encodes the source byte stream + into unicode (like ``codecs.StreamReader``); but still supports the + ``seek()`` and ``tell()`` operations correctly. This is in contrast + to ``codecs.StreamReader``, which provide *broken* ``seek()`` and + ``tell()`` methods. + + This class was motivated by ``StreamBackedCorpusView``, which + makes extensive use of ``seek()`` and ``tell()``, and needs to be + able to handle unicode-encoded files. + + Note: this class requires stateless decoders. To my knowledge, + this shouldn't cause a problem with any of python's builtin + unicode encodings. + """ + + DEBUG = True # : If true, then perform extra sanity checks. + + @py3_data + def __init__(self, stream, encoding, errors='strict'): + # Rewind the stream to its beginning. + stream.seek(0) + + self.stream = stream + """The underlying stream.""" + + self.encoding = encoding + """The name of the encoding that should be used to encode the + underlying stream.""" + + self.errors = errors + """The error mode that should be used when decoding data from + the underlying stream. Can be 'strict', 'ignore', or + 'replace'.""" + + self.decode = codecs.getdecoder(encoding) + """The function that is used to decode byte strings into + unicode strings.""" + + self.bytebuffer = b'' + """A buffer to use bytes that have been read but have not yet + been decoded. This is only used when the final bytes from + a read do not form a complete encoding for a character.""" + + self.linebuffer = None + """A buffer used by ``readline()`` to hold characters that have + been read, but have not yet been returned by ``read()`` or + ``readline()``. This buffer consists of a list of unicode + strings, where each string corresponds to a single line. + The final element of the list may or may not be a complete + line. Note that the existence of a linebuffer makes the + ``tell()`` operation more complex, because it must backtrack + to the beginning of the buffer to determine the correct + file position in the underlying byte stream.""" + + self._rewind_checkpoint = 0 + """The file position at which the most recent read on the + underlying stream began. This is used, together with + ``_rewind_numchars``, to backtrack to the beginning of + ``linebuffer`` (which is required by ``tell()``).""" + + self._rewind_numchars = None + """The number of characters that have been returned since the + read that started at ``_rewind_checkpoint``. This is used, + together with ``_rewind_checkpoint``, to backtrack to the + beginning of ``linebuffer`` (which is required by ``tell()``).""" + + self._bom = self._check_bom() + """The length of the byte order marker at the beginning of + the stream (or None for no byte order marker).""" + + # ///////////////////////////////////////////////////////////////// + # Read methods + # ///////////////////////////////////////////////////////////////// + + def read(self, size=None): + """ + Read up to ``size`` bytes, decode them using this reader's + encoding, and return the resulting unicode string. + + :param size: The maximum number of bytes to read. If not + specified, then read as many bytes as possible. + :type size: int + :rtype: unicode + """ + chars = self._read(size) + + # If linebuffer is not empty, then include it in the result + if self.linebuffer: + chars = ''.join(self.linebuffer) + chars + self.linebuffer = None + self._rewind_numchars = None + + return chars + + def discard_line(self): + if self.linebuffer and len(self.linebuffer) > 1: + line = self.linebuffer.pop(0) + self._rewind_numchars += len(line) + else: + self.stream.readline() + + def readline(self, size=None): + """ + Read a line of text, decode it using this reader's encoding, + and return the resulting unicode string. + + :param size: The maximum number of bytes to read. If no + newline is encountered before ``size`` bytes have been read, + then the returned value may not be a complete line of text. + :type size: int + """ + # If we have a non-empty linebuffer, then return the first + # line from it. (Note that the last element of linebuffer may + # not be a complete line; so let _read() deal with it.) + if self.linebuffer and len(self.linebuffer) > 1: + line = self.linebuffer.pop(0) + self._rewind_numchars += len(line) + return line + + readsize = size or 72 + chars = '' + + # If there's a remaining incomplete line in the buffer, add it. + if self.linebuffer: + chars += self.linebuffer.pop() + self.linebuffer = None + + while True: + startpos = self.stream.tell() - len(self.bytebuffer) + new_chars = self._read(readsize) + + # If we're at a '\r', then read one extra character, since + # it might be a '\n', to get the proper line ending. + if new_chars and new_chars.endswith('\r'): + new_chars += self._read(1) + + chars += new_chars + lines = chars.splitlines(True) + if len(lines) > 1: + line = lines[0] + self.linebuffer = lines[1:] + self._rewind_numchars = len(new_chars) - (len(chars) - len(line)) + self._rewind_checkpoint = startpos + break + elif len(lines) == 1: + line0withend = lines[0] + line0withoutend = lines[0].splitlines(False)[0] + if line0withend != line0withoutend: # complete line + line = line0withend + break + + if not new_chars or size is not None: + line = chars + break + + # Read successively larger blocks of text. + if readsize < 8000: + readsize *= 2 + + return line + + def readlines(self, sizehint=None, keepends=True): + """ + Read this file's contents, decode them using this reader's + encoding, and return it as a list of unicode lines. + + :rtype: list(unicode) + :param sizehint: Ignored. + :param keepends: If false, then strip newlines. + """ + return self.read().splitlines(keepends) + + def next(self): + """Return the next decoded line from the underlying stream.""" + line = self.readline() + if line: + return line + else: + raise StopIteration + + def __next__(self): + return self.next() + + def __iter__(self): + """Return self""" + return self + + def __del__(self): + # let garbage collector deal with still opened streams + if not self.closed: + self.close() + + def xreadlines(self): + """Return self""" + return self + + # ///////////////////////////////////////////////////////////////// + # Pass-through methods & properties + # ///////////////////////////////////////////////////////////////// + + @property + def closed(self): + """True if the underlying stream is closed.""" + return self.stream.closed + + @property + def name(self): + """The name of the underlying stream.""" + return self.stream.name + + @property + def mode(self): + """The mode of the underlying stream.""" + return self.stream.mode + + def close(self): + """ + Close the underlying stream. + """ + self.stream.close() + + # ///////////////////////////////////////////////////////////////// + # Seek and tell + # ///////////////////////////////////////////////////////////////// + + def seek(self, offset, whence=0): + """ + Move the stream to a new file position. If the reader is + maintaining any buffers, then they will be cleared. + + :param offset: A byte count offset. + :param whence: If 0, then the offset is from the start of the file + (offset should be positive), if 1, then the offset is from the + current position (offset may be positive or negative); and if 2, + then the offset is from the end of the file (offset should + typically be negative). + """ + if whence == 1: + raise ValueError( + 'Relative seek is not supported for ' + 'SeekableUnicodeStreamReader -- consider ' + 'using char_seek_forward() instead.' + ) + self.stream.seek(offset, whence) + self.linebuffer = None + self.bytebuffer = b'' + self._rewind_numchars = None + self._rewind_checkpoint = self.stream.tell() + + def char_seek_forward(self, offset): + """ + Move the read pointer forward by ``offset`` characters. + """ + if offset < 0: + raise ValueError('Negative offsets are not supported') + # Clear all buffers. + self.seek(self.tell()) + # Perform the seek operation. + self._char_seek_forward(offset) + + def _char_seek_forward(self, offset, est_bytes=None): + """ + Move the file position forward by ``offset`` characters, + ignoring all buffers. + + :param est_bytes: A hint, giving an estimate of the number of + bytes that will be needed to move forward by ``offset`` chars. + Defaults to ``offset``. + """ + if est_bytes is None: + est_bytes = offset + bytes = b'' + + while True: + # Read in a block of bytes. + newbytes = self.stream.read(est_bytes - len(bytes)) + bytes += newbytes + + # Decode the bytes to characters. + chars, bytes_decoded = self._incr_decode(bytes) + + # If we got the right number of characters, then seek + # backwards over any truncated characters, and return. + if len(chars) == offset: + self.stream.seek(-len(bytes) + bytes_decoded, 1) + return + + # If we went too far, then we can back-up until we get it + # right, using the bytes we've already read. + if len(chars) > offset: + while len(chars) > offset: + # Assume at least one byte/char. + est_bytes += offset - len(chars) + chars, bytes_decoded = self._incr_decode(bytes[:est_bytes]) + self.stream.seek(-len(bytes) + bytes_decoded, 1) + return + + # Otherwise, we haven't read enough bytes yet; loop again. + est_bytes += offset - len(chars) + + def tell(self): + """ + Return the current file position on the underlying byte + stream. If this reader is maintaining any buffers, then the + returned file position will be the position of the beginning + of those buffers. + """ + # If nothing's buffered, then just return our current filepos: + if self.linebuffer is None: + return self.stream.tell() - len(self.bytebuffer) + + # Otherwise, we'll need to backtrack the filepos until we + # reach the beginning of the buffer. + + # Store our original file position, so we can return here. + orig_filepos = self.stream.tell() + + # Calculate an estimate of where we think the newline is. + bytes_read = (orig_filepos - len(self.bytebuffer)) - self._rewind_checkpoint + buf_size = sum(len(line) for line in self.linebuffer) + est_bytes = int( + (bytes_read * self._rewind_numchars / (self._rewind_numchars + buf_size)) + ) + + self.stream.seek(self._rewind_checkpoint) + self._char_seek_forward(self._rewind_numchars, est_bytes) + filepos = self.stream.tell() + + # Sanity check + if self.DEBUG: + self.stream.seek(filepos) + check1 = self._incr_decode(self.stream.read(50))[0] + check2 = ''.join(self.linebuffer) + assert check1.startswith(check2) or check2.startswith(check1) + + # Return to our original filepos (so we don't have to throw + # out our buffer.) + self.stream.seek(orig_filepos) + + # Return the calculated filepos + return filepos + + # ///////////////////////////////////////////////////////////////// + # Helper methods + # ///////////////////////////////////////////////////////////////// + + def _read(self, size=None): + """ + Read up to ``size`` bytes from the underlying stream, decode + them using this reader's encoding, and return the resulting + unicode string. ``linebuffer`` is not included in the result. + """ + if size == 0: + return '' + + # Skip past the byte order marker, if present. + if self._bom and self.stream.tell() == 0: + self.stream.read(self._bom) + + # Read the requested number of bytes. + if size is None: + new_bytes = self.stream.read() + else: + new_bytes = self.stream.read(size) + bytes = self.bytebuffer + new_bytes + + # Decode the bytes into unicode characters + chars, bytes_decoded = self._incr_decode(bytes) + + # If we got bytes but couldn't decode any, then read further. + if (size is not None) and (not chars) and (len(new_bytes) > 0): + while not chars: + new_bytes = self.stream.read(1) + if not new_bytes: + break # end of file. + bytes += new_bytes + chars, bytes_decoded = self._incr_decode(bytes) + + # Record any bytes we didn't consume. + self.bytebuffer = bytes[bytes_decoded:] + + # Return the result + return chars + + def _incr_decode(self, bytes): + """ + Decode the given byte string into a unicode string, using this + reader's encoding. If an exception is encountered that + appears to be caused by a truncation error, then just decode + the byte string without the bytes that cause the trunctaion + error. + + Return a tuple ``(chars, num_consumed)``, where ``chars`` is + the decoded unicode string, and ``num_consumed`` is the + number of bytes that were consumed. + """ + while True: + try: + return self.decode(bytes, 'strict') + except UnicodeDecodeError as exc: + # If the exception occurs at the end of the string, + # then assume that it's a truncation error. + if exc.end == len(bytes): + return self.decode(bytes[: exc.start], self.errors) + + # Otherwise, if we're being strict, then raise it. + elif self.errors == 'strict': + raise + + # If we're not strict, then re-process it with our + # errors setting. This *may* raise an exception. + else: + return self.decode(bytes, self.errors) + + _BOM_TABLE = { + 'utf8': [(codecs.BOM_UTF8, None)], + 'utf16': [(codecs.BOM_UTF16_LE, 'utf16-le'), (codecs.BOM_UTF16_BE, 'utf16-be')], + 'utf16le': [(codecs.BOM_UTF16_LE, None)], + 'utf16be': [(codecs.BOM_UTF16_BE, None)], + 'utf32': [(codecs.BOM_UTF32_LE, 'utf32-le'), (codecs.BOM_UTF32_BE, 'utf32-be')], + 'utf32le': [(codecs.BOM_UTF32_LE, None)], + 'utf32be': [(codecs.BOM_UTF32_BE, None)], + } + + def _check_bom(self): + # Normalize our encoding name + enc = re.sub('[ -]', '', self.encoding.lower()) + + # Look up our encoding in the BOM table. + bom_info = self._BOM_TABLE.get(enc) + + if bom_info: + # Read a prefix, to check against the BOM(s) + bytes = self.stream.read(16) + self.stream.seek(0) + + # Check for each possible BOM. + for (bom, new_encoding) in bom_info: + if bytes.startswith(bom): + if new_encoding: + self.encoding = new_encoding + return len(bom) + + return None + + +__all__ = [ + 'path', + 'PathPointer', + 'FileSystemPathPointer', + 'BufferedGzipFile', + 'GzipFileSystemPathPointer', + 'GzipFileSystemPathPointer', + 'find', + 'retrieve', + 'FORMATS', + 'AUTO_FORMATS', + 'load', + 'show_cfg', + 'clear_cache', + 'LazyLoader', + 'OpenOnDemandZipFile', + 'GzipFileSystemPathPointer', + 'SeekableUnicodeStreamReader', +] diff --git a/venv.bak/lib/python3.7/site-packages/nltk/decorators.py b/venv.bak/lib/python3.7/site-packages/nltk/decorators.py new file mode 100644 index 0000000..b61db66 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/decorators.py @@ -0,0 +1,236 @@ +""" +Decorator module by Michele Simionato +Copyright Michele Simionato, distributed under the terms of the BSD License (see below). +http://www.phyast.pitt.edu/~micheles/python/documentation.html + +Included in NLTK for its support of a nice memoization decorator. +""" +from __future__ import print_function + +__docformat__ = 'restructuredtext en' + +## The basic trick is to generate the source code for the decorated function +## with the right signature and to evaluate it. +## Uncomment the statement 'print >> sys.stderr, func_src' in _decorator +## to understand what is going on. + +__all__ = ["decorator", "new_wrapper", "getinfo"] + +import sys + +# Hack to keep NLTK's "tokenize" module from colliding with the "tokenize" in +# the Python standard library. +old_sys_path = sys.path[:] +sys.path = [p for p in sys.path if p and "nltk" not in p] +import inspect + +sys.path = old_sys_path + + +def getinfo(func): + """ + Returns an info dictionary containing: + - name (the name of the function : str) + - argnames (the names of the arguments : list) + - defaults (the values of the default arguments : tuple) + - signature (the signature : str) + - doc (the docstring : str) + - module (the module name : str) + - dict (the function __dict__ : str) + + >>> def f(self, x=1, y=2, *args, **kw): pass + + >>> info = getinfo(f) + + >>> info["name"] + 'f' + >>> info["argnames"] + ['self', 'x', 'y', 'args', 'kw'] + + >>> info["defaults"] + (1, 2) + + >>> info["signature"] + 'self, x, y, *args, **kw' + """ + assert inspect.ismethod(func) or inspect.isfunction(func) + if sys.version_info[0] >= 3: + argspec = inspect.getfullargspec(func) + else: + argspec = inspect.getargspec(func) + regargs, varargs, varkwargs, defaults = argspec[:4] + argnames = list(regargs) + if varargs: + argnames.append(varargs) + if varkwargs: + argnames.append(varkwargs) + signature = inspect.formatargspec( + regargs, varargs, varkwargs, defaults, formatvalue=lambda value: "" + )[1:-1] + + # pypy compatibility + if hasattr(func, '__closure__'): + _closure = func.__closure__ + _globals = func.__globals__ + else: + _closure = func.func_closure + _globals = func.func_globals + + return dict( + name=func.__name__, + argnames=argnames, + signature=signature, + defaults=func.__defaults__, + doc=func.__doc__, + module=func.__module__, + dict=func.__dict__, + globals=_globals, + closure=_closure, + ) + + +# akin to functools.update_wrapper +def update_wrapper(wrapper, model, infodict=None): + infodict = infodict or getinfo(model) + wrapper.__name__ = infodict['name'] + wrapper.__doc__ = infodict['doc'] + wrapper.__module__ = infodict['module'] + wrapper.__dict__.update(infodict['dict']) + wrapper.__defaults__ = infodict['defaults'] + wrapper.undecorated = model + return wrapper + + +def new_wrapper(wrapper, model): + """ + An improvement over functools.update_wrapper. The wrapper is a generic + callable object. It works by generating a copy of the wrapper with the + right signature and by updating the copy, not the original. + Moreovoer, 'model' can be a dictionary with keys 'name', 'doc', 'module', + 'dict', 'defaults'. + """ + if isinstance(model, dict): + infodict = model + else: # assume model is a function + infodict = getinfo(model) + assert ( + not '_wrapper_' in infodict["argnames"] + ), '"_wrapper_" is a reserved argument name!' + src = "lambda %(signature)s: _wrapper_(%(signature)s)" % infodict + funcopy = eval(src, dict(_wrapper_=wrapper)) + return update_wrapper(funcopy, model, infodict) + + +# helper used in decorator_factory +def __call__(self, func): + return new_wrapper(lambda *a, **k: self.call(func, *a, **k), func) + + +def decorator_factory(cls): + """ + Take a class with a ``.caller`` method and return a callable decorator + object. It works by adding a suitable __call__ method to the class; + it raises a TypeError if the class already has a nontrivial __call__ + method. + """ + attrs = set(dir(cls)) + if '__call__' in attrs: + raise TypeError( + 'You cannot decorate a class with a nontrivial ' '__call__ method' + ) + if 'call' not in attrs: + raise TypeError('You cannot decorate a class without a ' '.call method') + cls.__call__ = __call__ + return cls + + +def decorator(caller): + """ + General purpose decorator factory: takes a caller function as + input and returns a decorator with the same attributes. + A caller function is any function like this:: + + def caller(func, *args, **kw): + # do something + return func(*args, **kw) + + Here is an example of usage: + + >>> @decorator + ... def chatty(f, *args, **kw): + ... print("Calling %r" % f.__name__) + ... return f(*args, **kw) + + >>> chatty.__name__ + 'chatty' + + >>> @chatty + ... def f(): pass + ... + >>> f() + Calling 'f' + + decorator can also take in input a class with a .caller method; in this + case it converts the class into a factory of callable decorator objects. + See the documentation for an example. + """ + if inspect.isclass(caller): + return decorator_factory(caller) + + def _decorator(func): # the real meat is here + infodict = getinfo(func) + argnames = infodict['argnames'] + assert not ( + '_call_' in argnames or '_func_' in argnames + ), 'You cannot use _call_ or _func_ as argument names!' + src = "lambda %(signature)s: _call_(_func_, %(signature)s)" % infodict + # import sys; print >> sys.stderr, src # for debugging purposes + dec_func = eval(src, dict(_func_=func, _call_=caller)) + return update_wrapper(dec_func, func, infodict) + + return update_wrapper(_decorator, caller) + + +def getattr_(obj, name, default_thunk): + "Similar to .setdefault in dictionaries." + try: + return getattr(obj, name) + except AttributeError: + default = default_thunk() + setattr(obj, name, default) + return default + + +@decorator +def memoize(func, *args): + dic = getattr_(func, "memoize_dic", dict) + # memoize_dic is created at the first call + if args in dic: + return dic[args] + else: + result = func(*args) + dic[args] = result + return result + + +########################## LEGALESE ############################### + +## Redistributions of source code must retain the above copyright +## notice, this list of conditions and the following disclaimer. +## Redistributions in bytecode form must reproduce the above copyright +## notice, this list of conditions and the following disclaimer in +## the documentation and/or other materials provided with the +## distribution. + +## THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +## "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +## LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +## A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +## HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +## INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +## BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS +## OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +## ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR +## TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +## USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH +## DAMAGE. diff --git a/venv.bak/lib/python3.7/site-packages/nltk/downloader.py b/venv.bak/lib/python3.7/site-packages/nltk/downloader.py new file mode 100644 index 0000000..e6831f9 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/downloader.py @@ -0,0 +1,2559 @@ +# Natural Language Toolkit: Corpus & Model Downloader +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Edward Loper +# URL: +# For license information, see LICENSE.TXT + +""" +The NLTK corpus and module downloader. This module defines several +interfaces which can be used to download corpora, models, and other +data packages that can be used with NLTK. + +Downloading Packages +==================== +If called with no arguments, ``download()`` will display an interactive +interface which can be used to download and install new packages. +If Tkinter is available, then a graphical interface will be shown, +otherwise a simple text interface will be provided. + +Individual packages can be downloaded by calling the ``download()`` +function with a single argument, giving the package identifier for the +package that should be downloaded: + + >>> download('treebank') # doctest: +SKIP + [nltk_data] Downloading package 'treebank'... + [nltk_data] Unzipping corpora/treebank.zip. + +NLTK also provides a number of \"package collections\", consisting of +a group of related packages. To download all packages in a +colleciton, simply call ``download()`` with the collection's +identifier: + + >>> download('all-corpora') # doctest: +SKIP + [nltk_data] Downloading package 'abc'... + [nltk_data] Unzipping corpora/abc.zip. + [nltk_data] Downloading package 'alpino'... + [nltk_data] Unzipping corpora/alpino.zip. + ... + [nltk_data] Downloading package 'words'... + [nltk_data] Unzipping corpora/words.zip. + +Download Directory +================== +By default, packages are installed in either a system-wide directory +(if Python has sufficient access to write to it); or in the current +user's home directory. However, the ``download_dir`` argument may be +used to specify a different installation target, if desired. + +See ``Downloader.default_download_dir()`` for more a detailed +description of how the default download directory is chosen. + +NLTK Download Server +==================== +Before downloading any packages, the corpus and module downloader +contacts the NLTK download server, to retrieve an index file +describing the available packages. By default, this index file is +loaded from ``https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/index.xml``. +If necessary, it is possible to create a new ``Downloader`` object, +specifying a different URL for the package index file. + +Usage:: + + python nltk/downloader.py [-d DATADIR] [-q] [-f] [-k] PACKAGE_IDS + +or:: + + python -m nltk.downloader [-d DATADIR] [-q] [-f] [-k] PACKAGE_IDS +""" +# ---------------------------------------------------------------------- +from __future__ import print_function, division, unicode_literals + +""" + + 0 1 2 3 +[label][----][label][----] +[column ][column ] + +Notes +===== +Handling data files.. Some questions: + +* Should the data files be kept zipped or unzipped? I say zipped. + +* Should the data files be kept in svn at all? Advantages: history; + automatic version numbers; 'svn up' could be used rather than the + downloader to update the corpora. Disadvantages: they're big, + which makes working from svn a bit of a pain. And we're planning + to potentially make them much bigger. I don't think we want + people to have to download 400MB corpora just to use nltk from svn. + +* Compromise: keep the data files in trunk/data rather than in + trunk/nltk. That way you can check them out in svn if you want + to; but you don't need to, and you can use the downloader instead. + +* Also: keep models in mind. When we change the code, we'd + potentially like the models to get updated. This could require a + little thought. + +* So.. let's assume we have a trunk/data directory, containing a bunch + of packages. The packages should be kept as zip files, because we + really shouldn't be editing them much (well -- we may edit models + more, but they tend to be binary-ish files anyway, where diffs + aren't that helpful). So we'll have trunk/data, with a bunch of + files like abc.zip and treebank.zip and propbank.zip. For each + package we could also have eg treebank.xml and propbank.xml, + describing the contents of the package (name, copyright, license, + etc). Collections would also have .xml files. Finally, we would + pull all these together to form a single index.xml file. Some + directory structure wouldn't hurt. So how about:: + + /trunk/data/ ....................... root of data svn + index.xml ........................ main index file + src/ ............................. python scripts + packages/ ........................ dir for packages + corpora/ ....................... zip & xml files for corpora + grammars/ ...................... zip & xml files for grammars + taggers/ ....................... zip & xml files for taggers + tokenizers/ .................... zip & xml files for tokenizers + etc. + collections/ ..................... xml files for collections + + Where the root (/trunk/data) would contain a makefile; and src/ + would contain a script to update the info.xml file. It could also + contain scripts to rebuild some of the various model files. The + script that builds index.xml should probably check that each zip + file expands entirely into a single subdir, whose name matches the + package's uid. + +Changes I need to make: + - in index: change "size" to "filesize" or "compressed-size" + - in index: add "unzipped-size" + - when checking status: check both compressed & uncompressed size. + uncompressed size is important to make sure we detect a problem + if something got partially unzipped. define new status values + to differentiate stale vs corrupt vs corruptly-uncompressed?? + (we shouldn't need to re-download the file if the zip file is ok + but it didn't get uncompressed fully.) + - add other fields to the index: author, license, copyright, contact, + etc. + +the current grammars/ package would become a single new package (eg +toy-grammars or book-grammars). + +xml file should have: + - authorship info + - license info + - copyright info + - contact info + - info about what type of data/annotation it contains? + - recommended corpus reader? + +collections can contain other collections. they can also contain +multiple package types (corpora & models). Have a single 'basics' +package that includes everything we talk about in the book? + +n.b.: there will have to be a fallback to the punkt tokenizer, in case +they didn't download that model. + +default: unzip or not? + +""" +import time, os, zipfile, sys, textwrap, threading, itertools, shutil, functools +import subprocess +from hashlib import md5 +from xml.etree import ElementTree + +try: + TKINTER = True + from six.moves.tkinter import ( + Tk, + Frame, + Label, + Entry, + Button, + Canvas, + Menu, + IntVar, + TclError, + ) + from six.moves.tkinter_messagebox import showerror + from nltk.draw.table import Table + from nltk.draw.util import ShowText +except ImportError: + TKINTER = False + TclError = ValueError + +from six import string_types, text_type +from six.moves import input +from six.moves.urllib.request import urlopen +from six.moves.urllib.error import HTTPError, URLError + +import nltk +from nltk.compat import python_2_unicode_compatible + +# urllib2 = nltk.internals.import_from_stdlib('urllib2') + + +###################################################################### +# Directory entry objects (from the data server's index file) +###################################################################### + + +@python_2_unicode_compatible +class Package(object): + """ + A directory entry for a downloadable package. These entries are + extracted from the XML index file that is downloaded by + ``Downloader``. Each package consists of a single file; but if + that file is a zip file, then it can be automatically decompressed + when the package is installed. + """ + + def __init__( + self, + id, + url, + name=None, + subdir='', + size=None, + unzipped_size=None, + checksum=None, + svn_revision=None, + copyright='Unknown', + contact='Unknown', + license='Unknown', + author='Unknown', + unzip=True, + **kw + ): + self.id = id + """A unique identifier for this package.""" + + self.name = name or id + """A string name for this package.""" + + self.subdir = subdir + """The subdirectory where this package should be installed. + E.g., ``'corpora'`` or ``'taggers'``.""" + + self.url = url + """A URL that can be used to download this package's file.""" + + self.size = int(size) + """The filesize (in bytes) of the package file.""" + + self.unzipped_size = int(unzipped_size) + """The total filesize of the files contained in the package's + zipfile.""" + + self.checksum = checksum + """The MD-5 checksum of the package file.""" + + self.svn_revision = svn_revision + """A subversion revision number for this package.""" + + self.copyright = copyright + """Copyright holder for this package.""" + + self.contact = contact + """Name & email of the person who should be contacted with + questions about this package.""" + + self.license = license + """License information for this package.""" + + self.author = author + """Author of this package.""" + + ext = os.path.splitext(url.split('/')[-1])[1] + self.filename = os.path.join(subdir, id + ext) + """The filename that should be used for this package's file. It + is formed by joining ``self.subdir`` with ``self.id``, and + using the same extension as ``url``.""" + + self.unzip = bool(int(unzip)) # '0' or '1' + """A flag indicating whether this corpus should be unzipped by + default.""" + + # Include any other attributes provided by the XML file. + self.__dict__.update(kw) + + @staticmethod + def fromxml(xml): + if isinstance(xml, string_types): + xml = ElementTree.parse(xml) + for key in xml.attrib: + xml.attrib[key] = text_type(xml.attrib[key]) + return Package(**xml.attrib) + + def __lt__(self, other): + return self.id < other.id + + def __repr__(self): + return '' % self.id + + +@python_2_unicode_compatible +class Collection(object): + """ + A directory entry for a collection of downloadable packages. + These entries are extracted from the XML index file that is + downloaded by ``Downloader``. + """ + + def __init__(self, id, children, name=None, **kw): + self.id = id + """A unique identifier for this collection.""" + + self.name = name or id + """A string name for this collection.""" + + self.children = children + """A list of the ``Collections`` or ``Packages`` directly + contained by this collection.""" + + self.packages = None + """A list of ``Packages`` contained by this collection or any + collections it recursively contains.""" + + # Include any other attributes provided by the XML file. + self.__dict__.update(kw) + + @staticmethod + def fromxml(xml): + if isinstance(xml, string_types): + xml = ElementTree.parse(xml) + for key in xml.attrib: + xml.attrib[key] = text_type(xml.attrib[key]) + children = [child.get('ref') for child in xml.findall('item')] + return Collection(children=children, **xml.attrib) + + def __lt__(self, other): + return self.id < other.id + + def __repr__(self): + return '' % self.id + + +###################################################################### +# Message Passing Objects +###################################################################### + + +class DownloaderMessage(object): + """A status message object, used by ``incr_download`` to + communicate its progress.""" + + +class StartCollectionMessage(DownloaderMessage): + """Data server has started working on a collection of packages.""" + + def __init__(self, collection): + self.collection = collection + + +class FinishCollectionMessage(DownloaderMessage): + """Data server has finished working on a collection of packages.""" + + def __init__(self, collection): + self.collection = collection + + +class StartPackageMessage(DownloaderMessage): + """Data server has started working on a package.""" + + def __init__(self, package): + self.package = package + + +class FinishPackageMessage(DownloaderMessage): + """Data server has finished working on a package.""" + + def __init__(self, package): + self.package = package + + +class StartDownloadMessage(DownloaderMessage): + """Data server has started downloading a package.""" + + def __init__(self, package): + self.package = package + + +class FinishDownloadMessage(DownloaderMessage): + """Data server has finished downloading a package.""" + + def __init__(self, package): + self.package = package + + +class StartUnzipMessage(DownloaderMessage): + """Data server has started unzipping a package.""" + + def __init__(self, package): + self.package = package + + +class FinishUnzipMessage(DownloaderMessage): + """Data server has finished unzipping a package.""" + + def __init__(self, package): + self.package = package + + +class UpToDateMessage(DownloaderMessage): + """The package download file is already up-to-date""" + + def __init__(self, package): + self.package = package + + +class StaleMessage(DownloaderMessage): + """The package download file is out-of-date or corrupt""" + + def __init__(self, package): + self.package = package + + +class ErrorMessage(DownloaderMessage): + """Data server encountered an error""" + + def __init__(self, package, message): + self.package = package + if isinstance(message, Exception): + self.message = str(message) + else: + self.message = message + + +class ProgressMessage(DownloaderMessage): + """Indicates how much progress the data server has made""" + + def __init__(self, progress): + self.progress = progress + + +class SelectDownloadDirMessage(DownloaderMessage): + """Indicates what download directory the data server is using""" + + def __init__(self, download_dir): + self.download_dir = download_dir + + +###################################################################### +# NLTK Data Server +###################################################################### + + +class Downloader(object): + """ + A class used to access the NLTK data server, which can be used to + download corpora and other data packages. + """ + + # ///////////////////////////////////////////////////////////////// + # Configuration + # ///////////////////////////////////////////////////////////////// + + INDEX_TIMEOUT = 60 * 60 # 1 hour + """The amount of time after which the cached copy of the data + server index will be considered 'stale,' and will be + re-downloaded.""" + + DEFAULT_URL = 'https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/index.xml' + """The default URL for the NLTK data server's index. An + alternative URL can be specified when creating a new + ``Downloader`` object.""" + + # ///////////////////////////////////////////////////////////////// + # Status Constants + # ///////////////////////////////////////////////////////////////// + + INSTALLED = 'installed' + """A status string indicating that a package or collection is + installed and up-to-date.""" + NOT_INSTALLED = 'not installed' + """A status string indicating that a package or collection is + not installed.""" + STALE = 'out of date' + """A status string indicating that a package or collection is + corrupt or out-of-date.""" + PARTIAL = 'partial' + """A status string indicating that a collection is partially + installed (i.e., only some of its packages are installed.)""" + + # ///////////////////////////////////////////////////////////////// + # Cosntructor + # ///////////////////////////////////////////////////////////////// + + def __init__(self, server_index_url=None, download_dir=None): + self._url = server_index_url or self.DEFAULT_URL + """The URL for the data server's index file.""" + + self._collections = {} + """Dictionary from collection identifier to ``Collection``""" + + self._packages = {} + """Dictionary from package identifier to ``Package``""" + + self._download_dir = download_dir + """The default directory to which packages will be downloaded.""" + + self._index = None + """The XML index file downloaded from the data server""" + + self._index_timestamp = None + """Time at which ``self._index`` was downloaded. If it is more + than ``INDEX_TIMEOUT`` seconds old, it will be re-downloaded.""" + + self._status_cache = {} + """Dictionary from package/collection identifier to status + string (``INSTALLED``, ``NOT_INSTALLED``, ``STALE``, or + ``PARTIAL``). Cache is used for packages only, not + collections.""" + + self._errors = None + """Flag for telling if all packages got successfully downloaded or not.""" + + # decide where we're going to save things to. + if self._download_dir is None: + self._download_dir = self.default_download_dir() + + # ///////////////////////////////////////////////////////////////// + # Information + # ///////////////////////////////////////////////////////////////// + + def list( + self, + download_dir=None, + show_packages=True, + show_collections=True, + header=True, + more_prompt=False, + skip_installed=False, + ): + lines = 0 # for more_prompt + if download_dir is None: + download_dir = self._download_dir + print('Using default data directory (%s)' % download_dir) + if header: + print('=' * (26 + len(self._url))) + print(' Data server index for <%s>' % self._url) + print('=' * (26 + len(self._url))) + lines += 3 # for more_prompt + stale = partial = False + + categories = [] + if show_packages: + categories.append('packages') + if show_collections: + categories.append('collections') + for category in categories: + print('%s:' % category.capitalize()) + lines += 1 # for more_prompt + for info in sorted(getattr(self, category)(), key=str): + status = self.status(info, download_dir) + if status == self.INSTALLED and skip_installed: + continue + if status == self.STALE: + stale = True + if status == self.PARTIAL: + partial = True + prefix = { + self.INSTALLED: '*', + self.STALE: '-', + self.PARTIAL: 'P', + self.NOT_INSTALLED: ' ', + }[status] + name = textwrap.fill( + '-' * 27 + (info.name or info.id), 75, subsequent_indent=27 * ' ' + )[27:] + print(' [%s] %s %s' % (prefix, info.id.ljust(20, '.'), name)) + lines += len(name.split('\n')) # for more_prompt + if more_prompt and lines > 20: + user_input = input("Hit Enter to continue: ") + if user_input.lower() in ('x', 'q'): + return + lines = 0 + print() + msg = '([*] marks installed packages' + if stale: + msg += '; [-] marks out-of-date or corrupt packages' + if partial: + msg += '; [P] marks partially installed collections' + print(textwrap.fill(msg + ')', subsequent_indent=' ', width=76)) + + def packages(self): + self._update_index() + return self._packages.values() + + def corpora(self): + self._update_index() + return [pkg for (id, pkg) in self._packages.items() if pkg.subdir == 'corpora'] + + def models(self): + self._update_index() + return [pkg for (id, pkg) in self._packages.items() if pkg.subdir != 'corpora'] + + def collections(self): + self._update_index() + return self._collections.values() + + # ///////////////////////////////////////////////////////////////// + # Downloading + # ///////////////////////////////////////////////////////////////// + + def _info_or_id(self, info_or_id): + if isinstance(info_or_id, string_types): + return self.info(info_or_id) + else: + return info_or_id + + # [xx] When during downloading is it 'safe' to abort? Only unsafe + # time is *during* an unzip -- we don't want to leave a + # partially-unzipped corpus in place because we wouldn't notice + # it. But if we had the exact total size of the unzipped corpus, + # then that would be fine. Then we could abort anytime we want! + # So this is really what we should do. That way the threaded + # downloader in the gui can just kill the download thread anytime + # it wants. + + def incr_download(self, info_or_id, download_dir=None, force=False): + # If they didn't specify a download_dir, then use the default one. + if download_dir is None: + download_dir = self._download_dir + yield SelectDownloadDirMessage(download_dir) + + # If they gave us a list of ids, then download each one. + if isinstance(info_or_id, (list, tuple)): + for msg in self._download_list(info_or_id, download_dir, force): + yield msg + return + + # Look up the requested collection or package. + try: + info = self._info_or_id(info_or_id) + except (IOError, ValueError) as e: + yield ErrorMessage(None, 'Error loading %s: %s' % (info_or_id, e)) + return + + # Handle collections. + if isinstance(info, Collection): + yield StartCollectionMessage(info) + for msg in self.incr_download(info.children, download_dir, force): + yield msg + yield FinishCollectionMessage(info) + + # Handle Packages (delegate to a helper function). + else: + for msg in self._download_package(info, download_dir, force): + yield msg + + def _num_packages(self, item): + if isinstance(item, Package): + return 1 + else: + return len(item.packages) + + def _download_list(self, items, download_dir, force): + # Look up the requested items. + for i in range(len(items)): + try: + items[i] = self._info_or_id(items[i]) + except (IOError, ValueError) as e: + yield ErrorMessage(items[i], e) + return + + # Download each item, re-scaling their progress. + num_packages = sum(self._num_packages(item) for item in items) + progress = 0 + for i, item in enumerate(items): + if isinstance(item, Package): + delta = 1.0 / num_packages + else: + delta = len(item.packages) / num_packages + for msg in self.incr_download(item, download_dir, force): + if isinstance(msg, ProgressMessage): + yield ProgressMessage(progress + msg.progress * delta) + else: + yield msg + + progress += 100 * delta + + def _download_package(self, info, download_dir, force): + yield StartPackageMessage(info) + yield ProgressMessage(0) + + # Do we already have the current version? + status = self.status(info, download_dir) + if not force and status == self.INSTALLED: + yield UpToDateMessage(info) + yield ProgressMessage(100) + yield FinishPackageMessage(info) + return + + # Remove the package from our status cache + self._status_cache.pop(info.id, None) + + # Check for (and remove) any old/stale version. + filepath = os.path.join(download_dir, info.filename) + if os.path.exists(filepath): + if status == self.STALE: + yield StaleMessage(info) + os.remove(filepath) + + # Ensure the download_dir exists + if not os.path.exists(download_dir): + os.mkdir(download_dir) + if not os.path.exists(os.path.join(download_dir, info.subdir)): + os.mkdir(os.path.join(download_dir, info.subdir)) + + # Download the file. This will raise an IOError if the url + # is not found. + yield StartDownloadMessage(info) + yield ProgressMessage(5) + try: + infile = urlopen(info.url) + with open(filepath, 'wb') as outfile: + # print info.size + num_blocks = max(1, info.size / (1024 * 16)) + for block in itertools.count(): + s = infile.read(1024 * 16) # 16k blocks. + outfile.write(s) + if not s: + break + if block % 2 == 0: # how often? + yield ProgressMessage(min(80, 5 + 75 * (block / num_blocks))) + infile.close() + except IOError as e: + yield ErrorMessage( + info, + 'Error downloading %r from <%s>:' '\n %s' % (info.id, info.url, e), + ) + return + yield FinishDownloadMessage(info) + yield ProgressMessage(80) + + # If it's a zipfile, uncompress it. + if info.filename.endswith('.zip'): + zipdir = os.path.join(download_dir, info.subdir) + # Unzip if we're unzipping by default; *or* if it's already + # been unzipped (presumably a previous version). + if info.unzip or os.path.exists(os.path.join(zipdir, info.id)): + yield StartUnzipMessage(info) + for msg in _unzip_iter(filepath, zipdir, verbose=False): + # Somewhat of a hack, but we need a proper package reference + msg.package = info + yield msg + yield FinishUnzipMessage(info) + + yield FinishPackageMessage(info) + + def download( + self, + info_or_id=None, + download_dir=None, + quiet=False, + force=False, + prefix='[nltk_data] ', + halt_on_error=True, + raise_on_error=False, + print_error_to=sys.stderr, + ): + + print_to = functools.partial(print, file=print_error_to) + # If no info or id is given, then use the interactive shell. + if info_or_id is None: + # [xx] hmm -- changing self._download_dir here seems like + # the wrong thing to do. Maybe the _interactive_download + # function should make a new copy of self to use? + if download_dir is not None: + self._download_dir = download_dir + self._interactive_download() + return True + + else: + # Define a helper function for displaying output: + def show(s, prefix2=''): + print_to( + textwrap.fill( + s, + initial_indent=prefix + prefix2, + subsequent_indent=prefix + prefix2 + ' ' * 4, + ) + ) + + for msg in self.incr_download(info_or_id, download_dir, force): + # Error messages + if isinstance(msg, ErrorMessage): + show(msg.message) + if raise_on_error: + raise ValueError(msg.message) + if halt_on_error: + return False + self._errors = True + if not quiet: + print_to("Error installing package. Retry? [n/y/e]") + choice = input().strip() + if choice in ['y', 'Y']: + if not self.download( + msg.package.id, + download_dir, + quiet, + force, + prefix, + halt_on_error, + raise_on_error, + ): + return False + elif choice in ['e', 'E']: + return False + + # All other messages + if not quiet: + # Collection downloading messages: + if isinstance(msg, StartCollectionMessage): + show('Downloading collection %r' % msg.collection.id) + prefix += ' | ' + print_to(prefix) + elif isinstance(msg, FinishCollectionMessage): + print_to(prefix) + prefix = prefix[:-4] + if self._errors: + show( + 'Downloaded collection %r with errors' + % msg.collection.id + ) + else: + show('Done downloading collection %s' % msg.collection.id) + + # Package downloading messages: + elif isinstance(msg, StartPackageMessage): + show( + 'Downloading package %s to %s...' + % (msg.package.id, download_dir) + ) + elif isinstance(msg, UpToDateMessage): + show('Package %s is already up-to-date!' % msg.package.id, ' ') + # elif isinstance(msg, StaleMessage): + # show('Package %s is out-of-date or corrupt' % + # msg.package.id, ' ') + elif isinstance(msg, StartUnzipMessage): + show('Unzipping %s.' % msg.package.filename, ' ') + + # Data directory message: + elif isinstance(msg, SelectDownloadDirMessage): + download_dir = msg.download_dir + return True + + def is_stale(self, info_or_id, download_dir=None): + return self.status(info_or_id, download_dir) == self.STALE + + def is_installed(self, info_or_id, download_dir=None): + return self.status(info_or_id, download_dir) == self.INSTALLED + + def clear_status_cache(self, id=None): + if id is None: + self._status_cache.clear() + else: + self._status_cache.pop(id, None) + + def status(self, info_or_id, download_dir=None): + """ + Return a constant describing the status of the given package + or collection. Status can be one of ``INSTALLED``, + ``NOT_INSTALLED``, ``STALE``, or ``PARTIAL``. + """ + if download_dir is None: + download_dir = self._download_dir + info = self._info_or_id(info_or_id) + + # Handle collections: + if isinstance(info, Collection): + pkg_status = [self.status(pkg.id) for pkg in info.packages] + if self.STALE in pkg_status: + return self.STALE + elif self.PARTIAL in pkg_status: + return self.PARTIAL + elif self.INSTALLED in pkg_status and self.NOT_INSTALLED in pkg_status: + return self.PARTIAL + elif self.NOT_INSTALLED in pkg_status: + return self.NOT_INSTALLED + else: + return self.INSTALLED + + # Handle packages: + else: + filepath = os.path.join(download_dir, info.filename) + if download_dir != self._download_dir: + return self._pkg_status(info, filepath) + else: + if info.id not in self._status_cache: + self._status_cache[info.id] = self._pkg_status(info, filepath) + return self._status_cache[info.id] + + def _pkg_status(self, info, filepath): + if not os.path.exists(filepath): + return self.NOT_INSTALLED + + # Check if the file has the correct size. + try: + filestat = os.stat(filepath) + except OSError: + return self.NOT_INSTALLED + if filestat.st_size != int(info.size): + return self.STALE + + # Check if the file's checksum matches + if md5_hexdigest(filepath) != info.checksum: + return self.STALE + + # If it's a zipfile, and it's been at least partially + # unzipped, then check if it's been fully unzipped. + if filepath.endswith('.zip'): + unzipdir = filepath[:-4] + if not os.path.exists(unzipdir): + return self.INSTALLED # but not unzipped -- ok! + if not os.path.isdir(unzipdir): + return self.STALE + + unzipped_size = sum( + os.stat(os.path.join(d, f)).st_size + for d, _, files in os.walk(unzipdir) + for f in files + ) + if unzipped_size != info.unzipped_size: + return self.STALE + + # Otherwise, everything looks good. + return self.INSTALLED + + def update(self, quiet=False, prefix='[nltk_data] '): + """ + Re-download any packages whose status is STALE. + """ + self.clear_status_cache() + for pkg in self.packages(): + if self.status(pkg) == self.STALE: + self.download(pkg, quiet=quiet, prefix=prefix) + + # ///////////////////////////////////////////////////////////////// + # Index + # ///////////////////////////////////////////////////////////////// + + def _update_index(self, url=None): + """A helper function that ensures that self._index is + up-to-date. If the index is older than self.INDEX_TIMEOUT, + then download it again.""" + # Check if the index is aleady up-to-date. If so, do nothing. + if not ( + self._index is None + or url is not None + or time.time() - self._index_timestamp > self.INDEX_TIMEOUT + ): + return + + # If a URL was specified, then update our URL. + self._url = url or self._url + + # Download the index file. + self._index = nltk.internals.ElementWrapper( + ElementTree.parse(urlopen(self._url)).getroot() + ) + self._index_timestamp = time.time() + + # Build a dictionary of packages. + packages = [Package.fromxml(p) for p in self._index.findall('packages/package')] + self._packages = dict((p.id, p) for p in packages) + + # Build a dictionary of collections. + collections = [ + Collection.fromxml(c) for c in self._index.findall('collections/collection') + ] + self._collections = dict((c.id, c) for c in collections) + + # Replace identifiers with actual children in collection.children. + for collection in self._collections.values(): + for i, child_id in enumerate(collection.children): + if child_id in self._packages: + collection.children[i] = self._packages[child_id] + elif child_id in self._collections: + collection.children[i] = self._collections[child_id] + else: + print( + 'removing collection member with no package: {}'.format( + child_id + ) + ) + del collection.children[i] + + # Fill in collection.packages for each collection. + for collection in self._collections.values(): + packages = {} + queue = [collection] + for child in queue: + if isinstance(child, Collection): + queue.extend(child.children) + elif isinstance(child, Package): + packages[child.id] = child + else: + pass + collection.packages = packages.values() + + # Flush the status cache + self._status_cache.clear() + + def index(self): + """ + Return the XML index describing the packages available from + the data server. If necessary, this index will be downloaded + from the data server. + """ + self._update_index() + return self._index + + def info(self, id): + """Return the ``Package`` or ``Collection`` record for the + given item.""" + self._update_index() + if id in self._packages: + return self._packages[id] + if id in self._collections: + return self._collections[id] + raise ValueError('Package %r not found in index' % id) + + def xmlinfo(self, id): + """Return the XML info record for the given item""" + self._update_index() + for package in self._index.findall('packages/package'): + if package.get('id') == id: + return package + for collection in self._index.findall('collections/collection'): + if collection.get('id') == id: + return collection + raise ValueError('Package %r not found in index' % id) + + # ///////////////////////////////////////////////////////////////// + # URL & Data Directory + # ///////////////////////////////////////////////////////////////// + + def _get_url(self): + """The URL for the data server's index file.""" + return self._url + + def _set_url(self, url): + """ + Set a new URL for the data server. If we're unable to contact + the given url, then the original url is kept. + """ + original_url = self._url + try: + self._update_index(url) + except: + self._url = original_url + raise + + url = property(_get_url, _set_url) + + def default_download_dir(self): + """ + Return the directory to which packages will be downloaded by + default. This value can be overridden using the constructor, + or on a case-by-case basis using the ``download_dir`` argument when + calling ``download()``. + + On Windows, the default download directory is + ``PYTHONHOME/lib/nltk``, where *PYTHONHOME* is the + directory containing Python, e.g. ``C:\\Python25``. + + On all other platforms, the default directory is the first of + the following which exists or which can be created with write + permission: ``/usr/share/nltk_data``, ``/usr/local/share/nltk_data``, + ``/usr/lib/nltk_data``, ``/usr/local/lib/nltk_data``, ``~/nltk_data``. + """ + # Check if we are on GAE where we cannot write into filesystem. + if 'APPENGINE_RUNTIME' in os.environ: + return + + # Check if we have sufficient permissions to install in a + # variety of system-wide locations. + for nltkdir in nltk.data.path: + if os.path.exists(nltkdir) and nltk.internals.is_writable(nltkdir): + return nltkdir + + # On Windows, use %APPDATA% + if sys.platform == 'win32' and 'APPDATA' in os.environ: + homedir = os.environ['APPDATA'] + + # Otherwise, install in the user's home directory. + else: + homedir = os.path.expanduser('~/') + if homedir == '~/': + raise ValueError("Could not find a default download directory") + + # append "nltk_data" to the home directory + return os.path.join(homedir, 'nltk_data') + + def _get_download_dir(self): + """ + The default directory to which packages will be downloaded. + This defaults to the value returned by ``default_download_dir()``. + To override this default on a case-by-case basis, use the + ``download_dir`` argument when calling ``download()``. + """ + return self._download_dir + + def _set_download_dir(self, download_dir): + self._download_dir = download_dir + # Clear the status cache. + self._status_cache.clear() + + download_dir = property(_get_download_dir, _set_download_dir) + + # ///////////////////////////////////////////////////////////////// + # Interactive Shell + # ///////////////////////////////////////////////////////////////// + + def _interactive_download(self): + # Try the GUI first; if that doesn't work, try the simple + # interactive shell. + if TKINTER: + try: + DownloaderGUI(self).mainloop() + except TclError: + DownloaderShell(self).run() + else: + DownloaderShell(self).run() + + +class DownloaderShell(object): + def __init__(self, dataserver): + self._ds = dataserver + + def _simple_interactive_menu(self, *options): + print('-' * 75) + spc = (68 - sum(len(o) for o in options)) // (len(options) - 1) * ' ' + print(' ' + spc.join(options)) + # w = 76/len(options) + # fmt = ' ' + ('%-'+str(w)+'s')*(len(options)-1) + '%s' + # print fmt % options + print('-' * 75) + + def run(self): + print('NLTK Downloader') + while True: + self._simple_interactive_menu( + 'd) Download', + 'l) List', + ' u) Update', + 'c) Config', + 'h) Help', + 'q) Quit', + ) + user_input = input('Downloader> ').strip() + if not user_input: + print() + continue + command = user_input.lower().split()[0] + args = user_input.split()[1:] + try: + if command == 'l': + print() + self._ds.list(self._ds.download_dir, header=False, more_prompt=True) + elif command == 'h': + self._simple_interactive_help() + elif command == 'c': + self._simple_interactive_config() + elif command in ('q', 'x'): + return + elif command == 'd': + self._simple_interactive_download(args) + elif command == 'u': + self._simple_interactive_update() + else: + print('Command %r unrecognized' % user_input) + except HTTPError as e: + print('Error reading from server: %s' % e) + except URLError as e: + print('Error connecting to server: %s' % e.reason) + # try checking if user_input is a package name, & + # downloading it? + print() + + def _simple_interactive_download(self, args): + if args: + for arg in args: + try: + self._ds.download(arg, prefix=' ') + except (IOError, ValueError) as e: + print(e) + else: + while True: + print() + print('Download which package (l=list; x=cancel)?') + user_input = input(' Identifier> ') + if user_input.lower() == 'l': + self._ds.list( + self._ds.download_dir, + header=False, + more_prompt=True, + skip_installed=True, + ) + continue + elif user_input.lower() in ('x', 'q', ''): + return + elif user_input: + for id in user_input.split(): + try: + self._ds.download(id, prefix=' ') + except (IOError, ValueError) as e: + print(e) + break + + def _simple_interactive_update(self): + while True: + stale_packages = [] + stale = partial = False + for info in sorted(getattr(self._ds, 'packages')(), key=str): + if self._ds.status(info) == self._ds.STALE: + stale_packages.append((info.id, info.name)) + + print() + if stale_packages: + print('Will update following packages (o=ok; x=cancel)') + for pid, pname in stale_packages: + name = textwrap.fill( + '-' * 27 + (pname), 75, subsequent_indent=27 * ' ' + )[27:] + print(' [ ] %s %s' % (pid.ljust(20, '.'), name)) + print() + + user_input = input(' Identifier> ') + if user_input.lower() == 'o': + for pid, pname in stale_packages: + try: + self._ds.download(pid, prefix=' ') + except (IOError, ValueError) as e: + print(e) + break + elif user_input.lower() in ('x', 'q', ''): + return + else: + print('Nothing to update.') + return + + def _simple_interactive_help(self): + print() + print('Commands:') + print( + ' d) Download a package or collection u) Update out of date packages' + ) + print(' l) List packages & collections h) Help') + print(' c) View & Modify Configuration q) Quit') + + def _show_config(self): + print() + print('Data Server:') + print(' - URL: <%s>' % self._ds.url) + print((' - %d Package Collections Available' % len(self._ds.collections()))) + print((' - %d Individual Packages Available' % len(self._ds.packages()))) + print() + print('Local Machine:') + print(' - Data directory: %s' % self._ds.download_dir) + + def _simple_interactive_config(self): + self._show_config() + while True: + print() + self._simple_interactive_menu( + 's) Show Config', 'u) Set Server URL', 'd) Set Data Dir', 'm) Main Menu' + ) + user_input = input('Config> ').strip().lower() + if user_input == 's': + self._show_config() + elif user_input == 'd': + new_dl_dir = input(' New Directory> ').strip() + if new_dl_dir in ('', 'x', 'q', 'X', 'Q'): + print(' Cancelled!') + elif os.path.isdir(new_dl_dir): + self._ds.download_dir = new_dl_dir + else: + print(('Directory %r not found! Create it first.' % new_dl_dir)) + elif user_input == 'u': + new_url = input(' New URL> ').strip() + if new_url in ('', 'x', 'q', 'X', 'Q'): + print(' Cancelled!') + else: + if not new_url.startswith(('http://', 'https://')): + new_url = 'http://' + new_url + try: + self._ds.url = new_url + except Exception as e: + print('Error reading <%r>:\n %s' % (new_url, e)) + elif user_input == 'm': + break + + +class DownloaderGUI(object): + """ + Graphical interface for downloading packages from the NLTK data + server. + """ + + # ///////////////////////////////////////////////////////////////// + # Column Configuration + # ///////////////////////////////////////////////////////////////// + + COLUMNS = [ + '', + 'Identifier', + 'Name', + 'Size', + 'Status', + 'Unzipped Size', + 'Copyright', + 'Contact', + 'License', + 'Author', + 'Subdir', + 'Checksum', + ] + """A list of the names of columns. This controls the order in + which the columns will appear. If this is edited, then + ``_package_to_columns()`` may need to be edited to match.""" + + COLUMN_WEIGHTS = {'': 0, 'Name': 5, 'Size': 0, 'Status': 0} + """A dictionary specifying how columns should be resized when the + table is resized. Columns with weight 0 will not be resized at + all; and columns with high weight will be resized more. + Default weight (for columns not explicitly listed) is 1.""" + + COLUMN_WIDTHS = { + '': 1, + 'Identifier': 20, + 'Name': 45, + 'Size': 10, + 'Unzipped Size': 10, + 'Status': 12, + } + """A dictionary specifying how wide each column should be, in + characters. The default width (for columns not explicitly + listed) is specified by ``DEFAULT_COLUMN_WIDTH``.""" + + DEFAULT_COLUMN_WIDTH = 30 + """The default width for columns that are not explicitly listed + in ``COLUMN_WIDTHS``.""" + + INITIAL_COLUMNS = ['', 'Identifier', 'Name', 'Size', 'Status'] + """The set of columns that should be displayed by default.""" + + # Perform a few import-time sanity checks to make sure that the + # column configuration variables are defined consistently: + for c in COLUMN_WEIGHTS: + assert c in COLUMNS + for c in COLUMN_WIDTHS: + assert c in COLUMNS + for c in INITIAL_COLUMNS: + assert c in COLUMNS + + # ///////////////////////////////////////////////////////////////// + # Color Configuration + # ///////////////////////////////////////////////////////////////// + + _BACKDROP_COLOR = ('#000', '#ccc') + + _ROW_COLOR = { + Downloader.INSTALLED: ('#afa', '#080'), + Downloader.PARTIAL: ('#ffa', '#880'), + Downloader.STALE: ('#faa', '#800'), + Downloader.NOT_INSTALLED: ('#fff', '#888'), + } + + _MARK_COLOR = ('#000', '#ccc') + + # _FRONT_TAB_COLOR = ('#ccf', '#008') + # _BACK_TAB_COLOR = ('#88a', '#448') + _FRONT_TAB_COLOR = ('#fff', '#45c') + _BACK_TAB_COLOR = ('#aaa', '#67a') + + _PROGRESS_COLOR = ('#f00', '#aaa') + + _TAB_FONT = 'helvetica -16 bold' + + # ///////////////////////////////////////////////////////////////// + # Constructor + # ///////////////////////////////////////////////////////////////// + + def __init__(self, dataserver, use_threads=True): + self._ds = dataserver + self._use_threads = use_threads + + # For the threaded downloader: + self._download_lock = threading.Lock() + self._download_msg_queue = [] + self._download_abort_queue = [] + self._downloading = False + + # For tkinter after callbacks: + self._afterid = {} + + # A message log. + self._log_messages = [] + self._log_indent = 0 + self._log('NLTK Downloader Started!') + + # Create the main window. + top = self.top = Tk() + top.geometry('+50+50') + top.title('NLTK Downloader') + top.configure(background=self._BACKDROP_COLOR[1]) + + # Set up some bindings now, in case anything goes wrong. + top.bind('', self.destroy) + top.bind('', self.destroy) + self._destroyed = False + + self._column_vars = {} + + # Initialize the GUI. + self._init_widgets() + self._init_menu() + try: + self._fill_table() + except HTTPError as e: + showerror('Error reading from server', e) + except URLError as e: + showerror('Error connecting to server', e.reason) + + self._show_info() + self._select_columns() + self._table.select(0) + + # Make sure we get notified when we're destroyed, so we can + # cancel any download in progress. + self._table.bind('', self._destroy) + + def _log(self, msg): + self._log_messages.append( + '%s %s%s' % (time.ctime(), ' | ' * self._log_indent, msg) + ) + + # ///////////////////////////////////////////////////////////////// + # Internals + # ///////////////////////////////////////////////////////////////// + + def _init_widgets(self): + # Create the top-level frame structures + f1 = Frame(self.top, relief='raised', border=2, padx=8, pady=0) + f1.pack(sid='top', expand=True, fill='both') + f1.grid_rowconfigure(2, weight=1) + f1.grid_columnconfigure(0, weight=1) + Frame(f1, height=8).grid(column=0, row=0) # spacer + tabframe = Frame(f1) + tabframe.grid(column=0, row=1, sticky='news') + tableframe = Frame(f1) + tableframe.grid(column=0, row=2, sticky='news') + buttonframe = Frame(f1) + buttonframe.grid(column=0, row=3, sticky='news') + Frame(f1, height=8).grid(column=0, row=4) # spacer + infoframe = Frame(f1) + infoframe.grid(column=0, row=5, sticky='news') + Frame(f1, height=8).grid(column=0, row=6) # spacer + progressframe = Frame( + self.top, padx=3, pady=3, background=self._BACKDROP_COLOR[1] + ) + progressframe.pack(side='bottom', fill='x') + self.top['border'] = 0 + self.top['highlightthickness'] = 0 + + # Create the tabs + self._tab_names = ['Collections', 'Corpora', 'Models', 'All Packages'] + self._tabs = {} + for i, tab in enumerate(self._tab_names): + label = Label(tabframe, text=tab, font=self._TAB_FONT) + label.pack(side='left', padx=((i + 1) % 2) * 10) + label.bind('', self._select_tab) + self._tabs[tab.lower()] = label + + # Create the table. + column_weights = [self.COLUMN_WEIGHTS.get(column, 1) for column in self.COLUMNS] + self._table = Table( + tableframe, + self.COLUMNS, + column_weights=column_weights, + highlightthickness=0, + listbox_height=16, + reprfunc=self._table_reprfunc, + ) + self._table.columnconfig(0, foreground=self._MARK_COLOR[0]) # marked + for i, column in enumerate(self.COLUMNS): + width = self.COLUMN_WIDTHS.get(column, self.DEFAULT_COLUMN_WIDTH) + self._table.columnconfig(i, width=width) + self._table.pack(expand=True, fill='both') + self._table.focus() + self._table.bind_to_listboxes('', self._download) + self._table.bind('', self._table_mark) + self._table.bind('', self._download) + self._table.bind('', self._prev_tab) + self._table.bind('', self._next_tab) + self._table.bind('', self._mark_all) + + # Create entry boxes for URL & download_dir + infoframe.grid_columnconfigure(1, weight=1) + + info = [ + ('url', 'Server Index:', self._set_url), + ('download_dir', 'Download Directory:', self._set_download_dir), + ] + self._info = {} + for (i, (key, label, callback)) in enumerate(info): + Label(infoframe, text=label).grid(column=0, row=i, sticky='e') + entry = Entry( + infoframe, font='courier', relief='groove', disabledforeground='black' + ) + self._info[key] = (entry, callback) + entry.bind('', self._info_save) + entry.bind('', lambda e, key=key: self._info_edit(key)) + entry.grid(column=1, row=i, sticky='ew') + + # If the user edits url or download_dir, and then clicks outside + # the entry box, then save their results. + self.top.bind('', self._info_save) + + # Create Download & Refresh buttons. + self._download_button = Button( + buttonframe, text='Download', command=self._download, width=8 + ) + self._download_button.pack(side='left') + self._refresh_button = Button( + buttonframe, text='Refresh', command=self._refresh, width=8 + ) + self._refresh_button.pack(side='right') + + # Create Progress bar + self._progresslabel = Label( + progressframe, + text='', + foreground=self._BACKDROP_COLOR[0], + background=self._BACKDROP_COLOR[1], + ) + self._progressbar = Canvas( + progressframe, + width=200, + height=16, + background=self._PROGRESS_COLOR[1], + relief='sunken', + border=1, + ) + self._init_progressbar() + self._progressbar.pack(side='right') + self._progresslabel.pack(side='left') + + def _init_menu(self): + menubar = Menu(self.top) + + filemenu = Menu(menubar, tearoff=0) + filemenu.add_command( + label='Download', underline=0, command=self._download, accelerator='Return' + ) + filemenu.add_separator() + filemenu.add_command( + label='Change Server Index', + underline=7, + command=lambda: self._info_edit('url'), + ) + filemenu.add_command( + label='Change Download Directory', + underline=0, + command=lambda: self._info_edit('download_dir'), + ) + filemenu.add_separator() + filemenu.add_command(label='Show Log', underline=5, command=self._show_log) + filemenu.add_separator() + filemenu.add_command( + label='Exit', underline=1, command=self.destroy, accelerator='Ctrl-x' + ) + menubar.add_cascade(label='File', underline=0, menu=filemenu) + + # Create a menu to control which columns of the table are + # shown. n.b.: we never hide the first two columns (mark and + # identifier). + viewmenu = Menu(menubar, tearoff=0) + for column in self._table.column_names[2:]: + var = IntVar(self.top) + assert column not in self._column_vars + self._column_vars[column] = var + if column in self.INITIAL_COLUMNS: + var.set(1) + viewmenu.add_checkbutton( + label=column, underline=0, variable=var, command=self._select_columns + ) + menubar.add_cascade(label='View', underline=0, menu=viewmenu) + + # Create a sort menu + # [xx] this should be selectbuttons; and it should include + # reversed sorts as options. + sortmenu = Menu(menubar, tearoff=0) + for column in self._table.column_names[1:]: + sortmenu.add_command( + label='Sort by %s' % column, + command=(lambda c=column: self._table.sort_by(c, 'ascending')), + ) + sortmenu.add_separator() + # sortmenu.add_command(label='Descending Sort:') + for column in self._table.column_names[1:]: + sortmenu.add_command( + label='Reverse sort by %s' % column, + command=(lambda c=column: self._table.sort_by(c, 'descending')), + ) + menubar.add_cascade(label='Sort', underline=0, menu=sortmenu) + + helpmenu = Menu(menubar, tearoff=0) + helpmenu.add_command(label='About', underline=0, command=self.about) + helpmenu.add_command( + label='Instructions', underline=0, command=self.help, accelerator='F1' + ) + menubar.add_cascade(label='Help', underline=0, menu=helpmenu) + self.top.bind('', self.help) + + self.top.config(menu=menubar) + + def _select_columns(self): + for (column, var) in self._column_vars.items(): + if var.get(): + self._table.show_column(column) + else: + self._table.hide_column(column) + + def _refresh(self): + self._ds.clear_status_cache() + try: + self._fill_table() + except HTTPError as e: + showerror('Error reading from server', e) + except URLError as e: + showerror('Error connecting to server', e.reason) + self._table.select(0) + + def _info_edit(self, info_key): + self._info_save() # just in case. + (entry, callback) = self._info[info_key] + entry['state'] = 'normal' + entry['relief'] = 'sunken' + entry.focus() + + def _info_save(self, e=None): + focus = self._table + for entry, callback in self._info.values(): + if entry['state'] == 'disabled': + continue + if e is not None and e.widget is entry and e.keysym != 'Return': + focus = entry + else: + entry['state'] = 'disabled' + entry['relief'] = 'groove' + callback(entry.get()) + focus.focus() + + def _table_reprfunc(self, row, col, val): + if self._table.column_names[col].endswith('Size'): + if isinstance(val, string_types): + return ' %s' % val + elif val < 1024 ** 2: + return ' %.1f KB' % (val / 1024.0 ** 1) + elif val < 1024 ** 3: + return ' %.1f MB' % (val / 1024.0 ** 2) + else: + return ' %.1f GB' % (val / 1024.0 ** 3) + + if col in (0, ''): + return str(val) + else: + return ' %s' % val + + def _set_url(self, url): + if url == self._ds.url: + return + try: + self._ds.url = url + self._fill_table() + except IOError as e: + showerror('Error Setting Server Index', str(e)) + self._show_info() + + def _set_download_dir(self, download_dir): + if self._ds.download_dir == download_dir: + return + # check if the dir exists, and if not, ask if we should create it? + + # Clear our status cache, & re-check what's installed + self._ds.download_dir = download_dir + try: + self._fill_table() + except HTTPError as e: + showerror('Error reading from server', e) + except URLError as e: + showerror('Error connecting to server', e.reason) + self._show_info() + + def _show_info(self): + print('showing info', self._ds.url) + for entry, cb in self._info.values(): + entry['state'] = 'normal' + entry.delete(0, 'end') + self._info['url'][0].insert(0, self._ds.url) + self._info['download_dir'][0].insert(0, self._ds.download_dir) + for entry, cb in self._info.values(): + entry['state'] = 'disabled' + + def _prev_tab(self, *e): + for i, tab in enumerate(self._tab_names): + if tab.lower() == self._tab and i > 0: + self._tab = self._tab_names[i - 1].lower() + try: + return self._fill_table() + except HTTPError as e: + showerror('Error reading from server', e) + except URLError as e: + showerror('Error connecting to server', e.reason) + + def _next_tab(self, *e): + for i, tab in enumerate(self._tab_names): + if tab.lower() == self._tab and i < (len(self._tabs) - 1): + self._tab = self._tab_names[i + 1].lower() + try: + return self._fill_table() + except HTTPError as e: + showerror('Error reading from server', e) + except URLError as e: + showerror('Error connecting to server', e.reason) + + def _select_tab(self, event): + self._tab = event.widget['text'].lower() + try: + self._fill_table() + except HTTPError as e: + showerror('Error reading from server', e) + except URLError as e: + showerror('Error connecting to server', e.reason) + + _tab = 'collections' + # _tab = 'corpora' + _rows = None + + def _fill_table(self): + selected_row = self._table.selected_row() + self._table.clear() + if self._tab == 'all packages': + items = self._ds.packages() + elif self._tab == 'corpora': + items = self._ds.corpora() + elif self._tab == 'models': + items = self._ds.models() + elif self._tab == 'collections': + items = self._ds.collections() + else: + assert 0, 'bad tab value %r' % self._tab + rows = [self._package_to_columns(item) for item in items] + self._table.extend(rows) + + # Highlight the active tab. + for tab, label in self._tabs.items(): + if tab == self._tab: + label.configure( + foreground=self._FRONT_TAB_COLOR[0], + background=self._FRONT_TAB_COLOR[1], + ) + else: + label.configure( + foreground=self._BACK_TAB_COLOR[0], + background=self._BACK_TAB_COLOR[1], + ) + + self._table.sort_by('Identifier', order='ascending') + self._color_table() + self._table.select(selected_row) + + # This is a hack, because the scrollbar isn't updating its + # position right -- I'm not sure what the underlying cause is + # though. (This is on OS X w/ python 2.5) The length of + # delay that's necessary seems to depend on how fast the + # comptuer is. :-/ + self.top.after(150, self._table._scrollbar.set, *self._table._mlb.yview()) + self.top.after(300, self._table._scrollbar.set, *self._table._mlb.yview()) + + def _update_table_status(self): + for row_num in range(len(self._table)): + status = self._ds.status(self._table[row_num, 'Identifier']) + self._table[row_num, 'Status'] = status + self._color_table() + + def _download(self, *e): + # If we're using threads, then delegate to the threaded + # downloader instead. + if self._use_threads: + return self._download_threaded(*e) + + marked = [ + self._table[row, 'Identifier'] + for row in range(len(self._table)) + if self._table[row, 0] != '' + ] + selection = self._table.selected_row() + if not marked and selection is not None: + marked = [self._table[selection, 'Identifier']] + + download_iter = self._ds.incr_download(marked, self._ds.download_dir) + self._log_indent = 0 + self._download_cb(download_iter, marked) + + _DL_DELAY = 10 + + def _download_cb(self, download_iter, ids): + try: + msg = next(download_iter) + except StopIteration: + # self._fill_table(sort=False) + self._update_table_status() + afterid = self.top.after(10, self._show_progress, 0) + self._afterid['_download_cb'] = afterid + return + + def show(s): + self._progresslabel['text'] = s + self._log(s) + + if isinstance(msg, ProgressMessage): + self._show_progress(msg.progress) + elif isinstance(msg, ErrorMessage): + show(msg.message) + if msg.package is not None: + self._select(msg.package.id) + self._show_progress(None) + return # halt progress. + elif isinstance(msg, StartCollectionMessage): + show('Downloading collection %s' % msg.collection.id) + self._log_indent += 1 + elif isinstance(msg, StartPackageMessage): + show('Downloading package %s' % msg.package.id) + elif isinstance(msg, UpToDateMessage): + show('Package %s is up-to-date!' % msg.package.id) + # elif isinstance(msg, StaleMessage): + # show('Package %s is out-of-date or corrupt' % msg.package.id) + elif isinstance(msg, FinishDownloadMessage): + show('Finished downloading %r.' % msg.package.id) + elif isinstance(msg, StartUnzipMessage): + show('Unzipping %s' % msg.package.filename) + elif isinstance(msg, FinishCollectionMessage): + self._log_indent -= 1 + show('Finished downloading collection %r.' % msg.collection.id) + self._clear_mark(msg.collection.id) + elif isinstance(msg, FinishPackageMessage): + self._clear_mark(msg.package.id) + afterid = self.top.after(self._DL_DELAY, self._download_cb, download_iter, ids) + self._afterid['_download_cb'] = afterid + + def _select(self, id): + for row in range(len(self._table)): + if self._table[row, 'Identifier'] == id: + self._table.select(row) + return + + def _color_table(self): + # Color rows according to status. + for row in range(len(self._table)): + bg, sbg = self._ROW_COLOR[self._table[row, 'Status']] + fg, sfg = ('black', 'white') + self._table.rowconfig( + row, + foreground=fg, + selectforeground=sfg, + background=bg, + selectbackground=sbg, + ) + # Color the marked column + self._table.itemconfigure( + row, 0, foreground=self._MARK_COLOR[0], background=self._MARK_COLOR[1] + ) + + def _clear_mark(self, id): + for row in range(len(self._table)): + if self._table[row, 'Identifier'] == id: + self._table[row, 0] = '' + + def _mark_all(self, *e): + for row in range(len(self._table)): + self._table[row, 0] = 'X' + + def _table_mark(self, *e): + selection = self._table.selected_row() + if selection >= 0: + if self._table[selection][0] != '': + self._table[selection, 0] = '' + else: + self._table[selection, 0] = 'X' + self._table.select(delta=1) + + def _show_log(self): + text = '\n'.join(self._log_messages) + ShowText(self.top, 'NLTK Downloader Log', text) + + def _package_to_columns(self, pkg): + """ + Given a package, return a list of values describing that + package, one for each column in ``self.COLUMNS``. + """ + row = [] + for column_index, column_name in enumerate(self.COLUMNS): + if column_index == 0: # Mark: + row.append('') + elif column_name == 'Identifier': + row.append(pkg.id) + elif column_name == 'Status': + row.append(self._ds.status(pkg)) + else: + attr = column_name.lower().replace(' ', '_') + row.append(getattr(pkg, attr, 'n/a')) + return row + + # ///////////////////////////////////////////////////////////////// + # External Interface + # ///////////////////////////////////////////////////////////////// + + def destroy(self, *e): + if self._destroyed: + return + self.top.destroy() + self._destroyed = True + + def _destroy(self, *e): + if self.top is not None: + for afterid in self._afterid.values(): + self.top.after_cancel(afterid) + + # Abort any download in progress. + if self._downloading and self._use_threads: + self._abort_download() + + # Make sure the garbage collector destroys these now; + # otherwise, they may get destroyed when we're not in the main + # thread, which would make Tkinter unhappy. + self._column_vars.clear() + + def mainloop(self, *args, **kwargs): + self.top.mainloop(*args, **kwargs) + + # ///////////////////////////////////////////////////////////////// + # HELP + # ///////////////////////////////////////////////////////////////// + + HELP = textwrap.dedent( + """\ + This tool can be used to download a variety of corpora and models + that can be used with NLTK. Each corpus or model is distributed + in a single zip file, known as a \"package file.\" You can + download packages individually, or you can download pre-defined + collections of packages. + + When you download a package, it will be saved to the \"download + directory.\" A default download directory is chosen when you run + + the downloader; but you may also select a different download + directory. On Windows, the default download directory is + + + \"package.\" + + The NLTK downloader can be used to download a variety of corpora, + models, and other data packages. + + Keyboard shortcuts:: + [return]\t Download + [up]\t Select previous package + [down]\t Select next package + [left]\t Select previous tab + [right]\t Select next tab + """ + ) + + def help(self, *e): + # The default font's not very legible; try using 'fixed' instead. + try: + ShowText( + self.top, + 'Help: NLTK Dowloader', + self.HELP.strip(), + width=75, + font='fixed', + ) + except: + ShowText(self.top, 'Help: NLTK Downloader', self.HELP.strip(), width=75) + + def about(self, *e): + ABOUT = "NLTK Downloader\n" + "Written by Edward Loper" + TITLE = 'About: NLTK Downloader' + try: + from six.moves.tkinter_messagebox import Message + + Message(message=ABOUT, title=TITLE).show() + except ImportError: + ShowText(self.top, TITLE, ABOUT) + + # ///////////////////////////////////////////////////////////////// + # Progress Bar + # ///////////////////////////////////////////////////////////////// + + _gradient_width = 5 + + def _init_progressbar(self): + c = self._progressbar + width, height = int(c['width']), int(c['height']) + for i in range(0, (int(c['width']) * 2) // self._gradient_width): + c.create_line( + i * self._gradient_width + 20, + -20, + i * self._gradient_width - height - 20, + height + 20, + width=self._gradient_width, + fill='#%02x0000' % (80 + abs(i % 6 - 3) * 12), + ) + c.addtag_all('gradient') + c.itemconfig('gradient', state='hidden') + + # This is used to display progress + c.addtag_withtag( + 'redbox', c.create_rectangle(0, 0, 0, 0, fill=self._PROGRESS_COLOR[0]) + ) + + def _show_progress(self, percent): + c = self._progressbar + if percent is None: + c.coords('redbox', 0, 0, 0, 0) + c.itemconfig('gradient', state='hidden') + else: + width, height = int(c['width']), int(c['height']) + x = percent * int(width) // 100 + 1 + c.coords('redbox', 0, 0, x, height + 1) + + def _progress_alive(self): + c = self._progressbar + if not self._downloading: + c.itemconfig('gradient', state='hidden') + else: + c.itemconfig('gradient', state='normal') + x1, y1, x2, y2 = c.bbox('gradient') + if x1 <= -100: + c.move('gradient', (self._gradient_width * 6) - 4, 0) + else: + c.move('gradient', -4, 0) + afterid = self.top.after(200, self._progress_alive) + self._afterid['_progress_alive'] = afterid + + # ///////////////////////////////////////////////////////////////// + # Threaded downloader + # ///////////////////////////////////////////////////////////////// + + def _download_threaded(self, *e): + # If the user tries to start a new download while we're already + # downloading something, then abort the current download instead. + if self._downloading: + self._abort_download() + return + + # Change the 'download' button to an 'abort' button. + self._download_button['text'] = 'Cancel' + + marked = [ + self._table[row, 'Identifier'] + for row in range(len(self._table)) + if self._table[row, 0] != '' + ] + selection = self._table.selected_row() + if not marked and selection is not None: + marked = [self._table[selection, 'Identifier']] + + # Create a new data server object for the download operation, + # just in case the user modifies our data server during the + # download (e.g., clicking 'refresh' or editing the index url). + ds = Downloader(self._ds.url, self._ds.download_dir) + + # Start downloading in a separate thread. + assert self._download_msg_queue == [] + assert self._download_abort_queue == [] + self._DownloadThread( + ds, + marked, + self._download_lock, + self._download_msg_queue, + self._download_abort_queue, + ).start() + + # Monitor the download message queue & display its progress. + self._log_indent = 0 + self._downloading = True + self._monitor_message_queue() + + # Display an indication that we're still alive and well by + # cycling the progress bar. + self._progress_alive() + + def _abort_download(self): + if self._downloading: + self._download_lock.acquire() + self._download_abort_queue.append('abort') + self._download_lock.release() + + class _DownloadThread(threading.Thread): + def __init__(self, data_server, items, lock, message_queue, abort): + self.data_server = data_server + self.items = items + self.lock = lock + self.message_queue = message_queue + self.abort = abort + threading.Thread.__init__(self) + + def run(self): + for msg in self.data_server.incr_download(self.items): + self.lock.acquire() + self.message_queue.append(msg) + # Check if we've been told to kill ourselves: + if self.abort: + self.message_queue.append('aborted') + self.lock.release() + return + self.lock.release() + self.lock.acquire() + self.message_queue.append('finished') + self.lock.release() + + _MONITOR_QUEUE_DELAY = 100 + + def _monitor_message_queue(self): + def show(s): + self._progresslabel['text'] = s + self._log(s) + + # Try to acquire the lock; if it's busy, then just try again later. + if not self._download_lock.acquire(): + return + for msg in self._download_msg_queue: + + # Done downloading? + if msg == 'finished' or msg == 'aborted': + # self._fill_table(sort=False) + self._update_table_status() + self._downloading = False + self._download_button['text'] = 'Download' + del self._download_msg_queue[:] + del self._download_abort_queue[:] + self._download_lock.release() + if msg == 'aborted': + show('Download aborted!') + self._show_progress(None) + else: + afterid = self.top.after(100, self._show_progress, None) + self._afterid['_monitor_message_queue'] = afterid + return + + # All other messages + elif isinstance(msg, ProgressMessage): + self._show_progress(msg.progress) + elif isinstance(msg, ErrorMessage): + show(msg.message) + if msg.package is not None: + self._select(msg.package.id) + self._show_progress(None) + self._downloading = False + return # halt progress. + elif isinstance(msg, StartCollectionMessage): + show('Downloading collection %r' % msg.collection.id) + self._log_indent += 1 + elif isinstance(msg, StartPackageMessage): + self._ds.clear_status_cache(msg.package.id) + show('Downloading package %r' % msg.package.id) + elif isinstance(msg, UpToDateMessage): + show('Package %s is up-to-date!' % msg.package.id) + # elif isinstance(msg, StaleMessage): + # show('Package %s is out-of-date or corrupt; updating it' % + # msg.package.id) + elif isinstance(msg, FinishDownloadMessage): + show('Finished downloading %r.' % msg.package.id) + elif isinstance(msg, StartUnzipMessage): + show('Unzipping %s' % msg.package.filename) + elif isinstance(msg, FinishUnzipMessage): + show('Finished installing %s' % msg.package.id) + elif isinstance(msg, FinishCollectionMessage): + self._log_indent -= 1 + show('Finished downloading collection %r.' % msg.collection.id) + self._clear_mark(msg.collection.id) + elif isinstance(msg, FinishPackageMessage): + self._update_table_status() + self._clear_mark(msg.package.id) + + # Let the user know when we're aborting a download (but + # waiting for a good point to abort it, so we don't end up + # with a partially unzipped package or anything like that). + if self._download_abort_queue: + self._progresslabel['text'] = 'Aborting download...' + + # Clear the message queue and then release the lock + del self._download_msg_queue[:] + self._download_lock.release() + + # Check the queue again after MONITOR_QUEUE_DELAY msec. + afterid = self.top.after(self._MONITOR_QUEUE_DELAY, self._monitor_message_queue) + self._afterid['_monitor_message_queue'] = afterid + + +###################################################################### +# Helper Functions +###################################################################### +# [xx] It may make sense to move these to nltk.internals. + + +def md5_hexdigest(file): + """ + Calculate and return the MD5 checksum for a given file. + ``file`` may either be a filename or an open stream. + """ + if isinstance(file, string_types): + with open(file, 'rb') as infile: + return _md5_hexdigest(infile) + return _md5_hexdigest(file) + + +def _md5_hexdigest(fp): + md5_digest = md5() + while True: + block = fp.read(1024 * 16) # 16k blocks + if not block: + break + md5_digest.update(block) + return md5_digest.hexdigest() + + +# change this to periodically yield progress messages? +# [xx] get rid of topdir parameter -- we should be checking +# this when we build the index, anyway. +def unzip(filename, root, verbose=True): + """ + Extract the contents of the zip file ``filename`` into the + directory ``root``. + """ + for message in _unzip_iter(filename, root, verbose): + if isinstance(message, ErrorMessage): + raise Exception(message) + + +def _unzip_iter(filename, root, verbose=True): + if verbose: + sys.stdout.write('Unzipping %s' % os.path.split(filename)[1]) + sys.stdout.flush() + + try: + zf = zipfile.ZipFile(filename) + except zipfile.error as e: + yield ErrorMessage(filename, 'Error with downloaded zip file') + return + except Exception as e: + yield ErrorMessage(filename, e) + return + + zf.extractall(root) + + if verbose: + print() + + +###################################################################### +# Index Builder +###################################################################### +# This may move to a different file sometime. + + +def build_index(root, base_url): + """ + Create a new data.xml index file, by combining the xml description + files for various packages and collections. ``root`` should be the + path to a directory containing the package xml and zip files; and + the collection xml files. The ``root`` directory is expected to + have the following subdirectories:: + + root/ + packages/ .................. subdirectory for packages + corpora/ ................. zip & xml files for corpora + grammars/ ................ zip & xml files for grammars + taggers/ ................. zip & xml files for taggers + tokenizers/ .............. zip & xml files for tokenizers + etc. + collections/ ............... xml files for collections + + For each package, there should be two files: ``package.zip`` + (where *package* is the package name) + which contains the package itself as a compressed zip file; and + ``package.xml``, which is an xml description of the package. The + zipfile ``package.zip`` should expand to a single subdirectory + named ``package/``. The base filename ``package`` must match + the identifier given in the package's xml file. + + For each collection, there should be a single file ``collection.zip`` + describing the collection, where *collection* is the name of the collection. + + All identifiers (for both packages and collections) must be unique. + """ + # Find all packages. + packages = [] + for pkg_xml, zf, subdir in _find_packages(os.path.join(root, 'packages')): + zipstat = os.stat(zf.filename) + url = '%s/%s/%s' % (base_url, subdir, os.path.split(zf.filename)[1]) + unzipped_size = sum(zf_info.file_size for zf_info in zf.infolist()) + + # Fill in several fields of the package xml with calculated values. + pkg_xml.set('unzipped_size', '%s' % unzipped_size) + pkg_xml.set('size', '%s' % zipstat.st_size) + pkg_xml.set('checksum', '%s' % md5_hexdigest(zf.filename)) + pkg_xml.set('subdir', subdir) + # pkg_xml.set('svn_revision', _svn_revision(zf.filename)) + if not pkg_xml.get('url'): + pkg_xml.set('url', url) + + # Record the package. + packages.append(pkg_xml) + + # Find all collections + collections = list(_find_collections(os.path.join(root, 'collections'))) + + # Check that all UIDs are unique + uids = set() + for item in packages + collections: + if item.get('id') in uids: + raise ValueError('Duplicate UID: %s' % item.get('id')) + uids.add(item.get('id')) + + # Put it all together + top_elt = ElementTree.Element('nltk_data') + top_elt.append(ElementTree.Element('packages')) + for package in packages: + top_elt[0].append(package) + top_elt.append(ElementTree.Element('collections')) + for collection in collections: + top_elt[1].append(collection) + + _indent_xml(top_elt) + return top_elt + + +def _indent_xml(xml, prefix=''): + """ + Helper for ``build_index()``: Given an XML ``ElementTree``, modify it + (and its descendents) ``text`` and ``tail`` attributes to generate + an indented tree, where each nested element is indented by 2 + spaces with respect to its parent. + """ + if len(xml) > 0: + xml.text = (xml.text or '').strip() + '\n' + prefix + ' ' + for child in xml: + _indent_xml(child, prefix + ' ') + for child in xml[:-1]: + child.tail = (child.tail or '').strip() + '\n' + prefix + ' ' + xml[-1].tail = (xml[-1].tail or '').strip() + '\n' + prefix + + +def _check_package(pkg_xml, zipfilename, zf): + """ + Helper for ``build_index()``: Perform some checks to make sure that + the given package is consistent. + """ + # The filename must patch the id given in the XML file. + uid = os.path.splitext(os.path.split(zipfilename)[1])[0] + if pkg_xml.get('id') != uid: + raise ValueError( + 'package identifier mismatch (%s vs %s)' % (pkg_xml.get('id'), uid) + ) + + # Zip file must expand to a subdir whose name matches uid. + if sum((name != uid and not name.startswith(uid + '/')) for name in zf.namelist()): + raise ValueError( + 'Zipfile %s.zip does not expand to a single ' + 'subdirectory %s/' % (uid, uid) + ) + + +# update for git? +def _svn_revision(filename): + """ + Helper for ``build_index()``: Calculate the subversion revision + number for a given file (by using ``subprocess`` to run ``svn``). + """ + p = subprocess.Popen( + ['svn', 'status', '-v', filename], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) + (stdout, stderr) = p.communicate() + if p.returncode != 0 or stderr or not stdout: + raise ValueError( + 'Error determining svn_revision for %s: %s' + % (os.path.split(filename)[1], textwrap.fill(stderr)) + ) + return stdout.split()[2] + + +def _find_collections(root): + """ + Helper for ``build_index()``: Yield a list of ElementTree.Element + objects, each holding the xml for a single package collection. + """ + packages = [] + for dirname, subdirs, files in os.walk(root): + for filename in files: + if filename.endswith('.xml'): + xmlfile = os.path.join(dirname, filename) + yield ElementTree.parse(xmlfile).getroot() + + +def _find_packages(root): + """ + Helper for ``build_index()``: Yield a list of tuples + ``(pkg_xml, zf, subdir)``, where: + - ``pkg_xml`` is an ``ElementTree.Element`` holding the xml for a + package + - ``zf`` is a ``zipfile.ZipFile`` for the package's contents. + - ``subdir`` is the subdirectory (relative to ``root``) where + the package was found (e.g. 'corpora' or 'grammars'). + """ + from nltk.corpus.reader.util import _path_from + + # Find all packages. + packages = [] + for dirname, subdirs, files in os.walk(root): + relpath = '/'.join(_path_from(root, dirname)) + for filename in files: + if filename.endswith('.xml'): + xmlfilename = os.path.join(dirname, filename) + zipfilename = xmlfilename[:-4] + '.zip' + try: + zf = zipfile.ZipFile(zipfilename) + except Exception as e: + raise ValueError('Error reading file %r!\n%s' % (zipfilename, e)) + try: + pkg_xml = ElementTree.parse(xmlfilename).getroot() + except Exception as e: + raise ValueError('Error reading file %r!\n%s' % (xmlfilename, e)) + + # Check that the UID matches the filename + uid = os.path.split(xmlfilename[:-4])[1] + if pkg_xml.get('id') != uid: + raise ValueError( + 'package identifier mismatch (%s ' + 'vs %s)' % (pkg_xml.get('id'), uid) + ) + + # Check that the zipfile expands to a subdir whose + # name matches the uid. + if sum( + (name != uid and not name.startswith(uid + '/')) + for name in zf.namelist() + ): + raise ValueError( + 'Zipfile %s.zip does not expand to a ' + 'single subdirectory %s/' % (uid, uid) + ) + + yield pkg_xml, zf, relpath + # Don't recurse into svn subdirectories: + try: + subdirs.remove('.svn') + except ValueError: + pass + + +###################################################################### +# Main: +###################################################################### + +# There should be a command-line interface + +# Aliases +_downloader = Downloader() +download = _downloader.download + + +def download_shell(): + DownloaderShell(_downloader).run() + + +def download_gui(): + DownloaderGUI(_downloader).mainloop() + + +def update(): + _downloader.update() + + +if __name__ == '__main__': + from optparse import OptionParser + + parser = OptionParser() + parser.add_option( + "-d", + "--dir", + dest="dir", + help="download package to directory DIR", + metavar="DIR", + ) + parser.add_option( + "-q", + "--quiet", + dest="quiet", + action="store_true", + default=False, + help="work quietly", + ) + parser.add_option( + "-f", + "--force", + dest="force", + action="store_true", + default=False, + help="download even if already installed", + ) + parser.add_option( + "-e", + "--exit-on-error", + dest="halt_on_error", + action="store_true", + default=False, + help="exit if an error occurs", + ) + parser.add_option( + "-u", + "--url", + dest="server_index_url", + default=os.environ.get('NLTK_DOWNLOAD_URL'), + help="download server index url", + ) + + (options, args) = parser.parse_args() + + downloader = Downloader(server_index_url=options.server_index_url) + + if args: + for pkg_id in args: + rv = downloader.download( + info_or_id=pkg_id, + download_dir=options.dir, + quiet=options.quiet, + force=options.force, + halt_on_error=options.halt_on_error, + ) + if rv == False and options.halt_on_error: + break + else: + downloader.download( + download_dir=options.dir, + quiet=options.quiet, + force=options.force, + halt_on_error=options.halt_on_error, + ) diff --git a/venv.bak/lib/python3.7/site-packages/nltk/draw/__init__.py b/venv.bak/lib/python3.7/site-packages/nltk/draw/__init__.py new file mode 100644 index 0000000..f5c6a6e --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/draw/__init__.py @@ -0,0 +1,33 @@ +# Natural Language Toolkit: graphical representations package +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Edward Loper +# Steven Bird +# URL: +# For license information, see LICENSE.TXT + +# Import Tkinter-based modules if Tkinter is installed +try: + from six.moves import tkinter +except ImportError: + import warnings + + warnings.warn("nltk.draw package not loaded " "(please install Tkinter library).") +else: + from nltk.draw.cfg import ProductionList, CFGEditor, CFGDemo + from nltk.draw.tree import ( + TreeSegmentWidget, + tree_to_treesegment, + TreeWidget, + TreeView, + draw_trees, + ) + from nltk.draw.table import Table + +from nltk.draw.dispersion import dispersion_plot + +# skip doctests from this package +def setup_module(module): + from nose import SkipTest + + raise SkipTest("nltk.draw examples are not doctests") diff --git a/venv.bak/lib/python3.7/site-packages/nltk/draw/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/draw/__pycache__/__init__.cpython-37.pyc new file mode 100644 index 0000000..8bb83c9 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/draw/__pycache__/__init__.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/draw/__pycache__/cfg.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/draw/__pycache__/cfg.cpython-37.pyc new file mode 100644 index 0000000..cc665a0 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/draw/__pycache__/cfg.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/draw/__pycache__/dispersion.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/draw/__pycache__/dispersion.cpython-37.pyc new file mode 100644 index 0000000..552c5f3 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/draw/__pycache__/dispersion.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/draw/__pycache__/table.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/draw/__pycache__/table.cpython-37.pyc new file mode 100644 index 0000000..238a65b Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/draw/__pycache__/table.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/draw/__pycache__/tree.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/draw/__pycache__/tree.cpython-37.pyc new file mode 100644 index 0000000..3c376db Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/draw/__pycache__/tree.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/draw/__pycache__/util.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/draw/__pycache__/util.cpython-37.pyc new file mode 100644 index 0000000..1883a00 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/draw/__pycache__/util.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/draw/cfg.py b/venv.bak/lib/python3.7/site-packages/nltk/draw/cfg.py new file mode 100644 index 0000000..3afb3e4 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/draw/cfg.py @@ -0,0 +1,861 @@ +# Natural Language Toolkit: CFG visualization +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Edward Loper +# URL: +# For license information, see LICENSE.TXT + +""" +Visualization tools for CFGs. +""" + +# Idea for a nice demo: +# - 3 panes: grammar, treelet, working area +# - grammar is a list of productions +# - when you select a production, the treelet that it licenses appears +# in the treelet area +# - the working area has the text on the bottom, and S at top. When +# you select a production, it shows (ghosted) the locations where +# that production's treelet could be attached to either the text +# or the tree rooted at S. +# - the user can drag the treelet onto one of those (or click on them?) +# - the user can delete pieces of the tree from the working area +# (right click?) +# - connecting top to bottom? drag one NP onto another? +# +# +-------------------------------------------------------------+ +# | S -> NP VP | S | +# |[NP -> Det N ]| / \ | +# | ... | NP VP | +# | N -> 'dog' | | +# | N -> 'cat' | | +# | ... | | +# +--------------+ | +# | NP | Det N | +# | / \ | | | | +# | Det N | the cat saw the dog | +# | | | +# +--------------+----------------------------------------------+ +# +# Operations: +# - connect a new treelet -- drag or click shadow +# - delete a treelet -- right click +# - if only connected to top, delete everything below +# - if only connected to bottom, delete everything above +# - connect top & bottom -- drag a leaf to a root or a root to a leaf +# - disconnect top & bottom -- right click +# - if connected to top & bottom, then disconnect + +import re + +from six import string_types +from six.moves.tkinter import ( + Button, + Canvas, + Entry, + Frame, + IntVar, + Label, + Scrollbar, + Text, + Tk, + Toplevel, +) + +from nltk.grammar import CFG, _read_cfg_production, Nonterminal, nonterminals +from nltk.tree import Tree +from nltk.draw.tree import TreeSegmentWidget, tree_to_treesegment +from nltk.draw.util import ( + CanvasFrame, + ColorizedList, + ShowText, + SymbolWidget, + TextWidget, +) + +###################################################################### +# Production List +###################################################################### + + +class ProductionList(ColorizedList): + ARROW = SymbolWidget.SYMBOLS['rightarrow'] + + def _init_colortags(self, textwidget, options): + textwidget.tag_config('terminal', foreground='#006000') + textwidget.tag_config('arrow', font='symbol', underline='0') + textwidget.tag_config( + 'nonterminal', foreground='blue', font=('helvetica', -12, 'bold') + ) + + def _item_repr(self, item): + contents = [] + contents.append(('%s\t' % item.lhs(), 'nonterminal')) + contents.append((self.ARROW, 'arrow')) + for elt in item.rhs(): + if isinstance(elt, Nonterminal): + contents.append((' %s' % elt.symbol(), 'nonterminal')) + else: + contents.append((' %r' % elt, 'terminal')) + return contents + + +###################################################################### +# CFG Editor +###################################################################### + +_CFGEditor_HELP = """ + +The CFG Editor can be used to create or modify context free grammars. +A context free grammar consists of a start symbol and a list of +productions. The start symbol is specified by the text entry field in +the upper right hand corner of the editor; and the list of productions +are specified in the main text editing box. + +Every non-blank line specifies a single production. Each production +has the form "LHS -> RHS," where LHS is a single nonterminal, and RHS +is a list of nonterminals and terminals. + +Nonterminals must be a single word, such as S or NP or NP_subj. +Currently, nonterminals must consists of alphanumeric characters and +underscores (_). Nonterminals are colored blue. If you place the +mouse over any nonterminal, then all occurrences of that nonterminal +will be highlighted. + +Terminals must be surrounded by single quotes (') or double +quotes(\"). For example, "dog" and "New York" are terminals. +Currently, the string within the quotes must consist of alphanumeric +characters, underscores, and spaces. + +To enter a new production, go to a blank line, and type a nonterminal, +followed by an arrow (->), followed by a sequence of terminals and +nonterminals. Note that "->" (dash + greater-than) is automatically +converted to an arrow symbol. When you move your cursor to a +different line, your production will automatically be colorized. If +there are any errors, they will be highlighted in red. + +Note that the order of the productions is significant for some +algorithms. To re-order the productions, use cut and paste to move +them. + +Use the buttons at the bottom of the window when you are done editing +the CFG: + - Ok: apply the new CFG, and exit the editor. + - Apply: apply the new CFG, and do not exit the editor. + - Reset: revert to the original CFG, and do not exit the editor. + - Cancel: revert to the original CFG, and exit the editor. + +""" + + +class CFGEditor(object): + """ + A dialog window for creating and editing context free grammars. + ``CFGEditor`` imposes the following restrictions: + + - All nonterminals must be strings consisting of word + characters. + - All terminals must be strings consisting of word characters + and space characters. + """ + + # Regular expressions used by _analyze_line. Precompile them, so + # we can process the text faster. + ARROW = SymbolWidget.SYMBOLS['rightarrow'] + _LHS_RE = re.compile(r"(^\s*\w+\s*)(->|(" + ARROW + "))") + _ARROW_RE = re.compile("\s*(->|(" + ARROW + "))\s*") + _PRODUCTION_RE = re.compile( + r"(^\s*\w+\s*)" + + "(->|(" # LHS + + ARROW + + "))\s*" + + r"((\w+|'[\w ]*'|\"[\w ]*\"|\|)\s*)*$" # arrow + ) # RHS + _TOKEN_RE = re.compile("\\w+|->|'[\\w ]+'|\"[\\w ]+\"|(" + ARROW + ")") + _BOLD = ('helvetica', -12, 'bold') + + def __init__(self, parent, cfg=None, set_cfg_callback=None): + self._parent = parent + if cfg is not None: + self._cfg = cfg + else: + self._cfg = CFG(Nonterminal('S'), []) + self._set_cfg_callback = set_cfg_callback + + self._highlight_matching_nonterminals = 1 + + # Create the top-level window. + self._top = Toplevel(parent) + self._init_bindings() + + self._init_startframe() + self._startframe.pack(side='top', fill='x', expand=0) + self._init_prodframe() + self._prodframe.pack(side='top', fill='both', expand=1) + self._init_buttons() + self._buttonframe.pack(side='bottom', fill='x', expand=0) + + self._textwidget.focus() + + def _init_startframe(self): + frame = self._startframe = Frame(self._top) + self._start = Entry(frame) + self._start.pack(side='right') + Label(frame, text='Start Symbol:').pack(side='right') + Label(frame, text='Productions:').pack(side='left') + self._start.insert(0, self._cfg.start().symbol()) + + def _init_buttons(self): + frame = self._buttonframe = Frame(self._top) + Button(frame, text='Ok', command=self._ok, underline=0, takefocus=0).pack( + side='left' + ) + Button(frame, text='Apply', command=self._apply, underline=0, takefocus=0).pack( + side='left' + ) + Button(frame, text='Reset', command=self._reset, underline=0, takefocus=0).pack( + side='left' + ) + Button( + frame, text='Cancel', command=self._cancel, underline=0, takefocus=0 + ).pack(side='left') + Button(frame, text='Help', command=self._help, underline=0, takefocus=0).pack( + side='right' + ) + + def _init_bindings(self): + self._top.title('CFG Editor') + self._top.bind('', self._cancel) + self._top.bind('', self._cancel) + self._top.bind('', self._cancel) + # self._top.bind('', self._cancel) + self._top.bind('', self._cancel) + self._top.bind('', self._cancel) + # self._top.bind('', self._cancel) + self._top.bind('', self._cancel) + + self._top.bind('', self._ok) + self._top.bind('', self._ok) + self._top.bind('', self._apply) + self._top.bind('', self._apply) + self._top.bind('', self._reset) + self._top.bind('', self._reset) + self._top.bind('', self._help) + self._top.bind('', self._help) + self._top.bind('', self._help) + + def _init_prodframe(self): + self._prodframe = Frame(self._top) + + # Create the basic Text widget & scrollbar. + self._textwidget = Text( + self._prodframe, background='#e0e0e0', exportselection=1 + ) + self._textscroll = Scrollbar(self._prodframe, takefocus=0, orient='vertical') + self._textwidget.config(yscrollcommand=self._textscroll.set) + self._textscroll.config(command=self._textwidget.yview) + self._textscroll.pack(side='right', fill='y') + self._textwidget.pack(expand=1, fill='both', side='left') + + # Initialize the colorization tags. Each nonterminal gets its + # own tag, so they aren't listed here. + self._textwidget.tag_config('terminal', foreground='#006000') + self._textwidget.tag_config('arrow', font='symbol') + self._textwidget.tag_config('error', background='red') + + # Keep track of what line they're on. We use that to remember + # to re-analyze a line whenever they leave it. + self._linenum = 0 + + # Expand "->" to an arrow. + self._top.bind('>', self._replace_arrows) + + # Re-colorize lines when appropriate. + self._top.bind('<>', self._analyze) + self._top.bind('', self._check_analyze) + self._top.bind('', self._check_analyze) + + # Tab cycles focus. (why doesn't this work??) + def cycle(e, textwidget=self._textwidget): + textwidget.tk_focusNext().focus() + + self._textwidget.bind('', cycle) + + prod_tuples = [(p.lhs(), [p.rhs()]) for p in self._cfg.productions()] + for i in range(len(prod_tuples) - 1, 0, -1): + if prod_tuples[i][0] == prod_tuples[i - 1][0]: + if () in prod_tuples[i][1]: + continue + if () in prod_tuples[i - 1][1]: + continue + print(prod_tuples[i - 1][1]) + print(prod_tuples[i][1]) + prod_tuples[i - 1][1].extend(prod_tuples[i][1]) + del prod_tuples[i] + + for lhs, rhss in prod_tuples: + print(lhs, rhss) + s = '%s ->' % lhs + for rhs in rhss: + for elt in rhs: + if isinstance(elt, Nonterminal): + s += ' %s' % elt + else: + s += ' %r' % elt + s += ' |' + s = s[:-2] + '\n' + self._textwidget.insert('end', s) + + self._analyze() + + # # Add the producitons to the text widget, and colorize them. + # prod_by_lhs = {} + # for prod in self._cfg.productions(): + # if len(prod.rhs()) > 0: + # prod_by_lhs.setdefault(prod.lhs(),[]).append(prod) + # for (lhs, prods) in prod_by_lhs.items(): + # self._textwidget.insert('end', '%s ->' % lhs) + # self._textwidget.insert('end', self._rhs(prods[0])) + # for prod in prods[1:]: + # print '\t|'+self._rhs(prod), + # self._textwidget.insert('end', '\t|'+self._rhs(prod)) + # print + # self._textwidget.insert('end', '\n') + # for prod in self._cfg.productions(): + # if len(prod.rhs()) == 0: + # self._textwidget.insert('end', '%s' % prod) + # self._analyze() + + # def _rhs(self, prod): + # s = '' + # for elt in prod.rhs(): + # if isinstance(elt, Nonterminal): s += ' %s' % elt.symbol() + # else: s += ' %r' % elt + # return s + + def _clear_tags(self, linenum): + """ + Remove all tags (except ``arrow`` and ``sel``) from the given + line of the text widget used for editing the productions. + """ + start = '%d.0' % linenum + end = '%d.end' % linenum + for tag in self._textwidget.tag_names(): + if tag not in ('arrow', 'sel'): + self._textwidget.tag_remove(tag, start, end) + + def _check_analyze(self, *e): + """ + Check if we've moved to a new line. If we have, then remove + all colorization from the line we moved to, and re-colorize + the line that we moved from. + """ + linenum = int(self._textwidget.index('insert').split('.')[0]) + if linenum != self._linenum: + self._clear_tags(linenum) + self._analyze_line(self._linenum) + self._linenum = linenum + + def _replace_arrows(self, *e): + """ + Replace any ``'->'`` text strings with arrows (char \\256, in + symbol font). This searches the whole buffer, but is fast + enough to be done anytime they press '>'. + """ + arrow = '1.0' + while True: + arrow = self._textwidget.search('->', arrow, 'end+1char') + if arrow == '': + break + self._textwidget.delete(arrow, arrow + '+2char') + self._textwidget.insert(arrow, self.ARROW, 'arrow') + self._textwidget.insert(arrow, '\t') + + arrow = '1.0' + while True: + arrow = self._textwidget.search(self.ARROW, arrow + '+1char', 'end+1char') + if arrow == '': + break + self._textwidget.tag_add('arrow', arrow, arrow + '+1char') + + def _analyze_token(self, match, linenum): + """ + Given a line number and a regexp match for a token on that + line, colorize the token. Note that the regexp match gives us + the token's text, start index (on the line), and end index (on + the line). + """ + # What type of token is it? + if match.group()[0] in "'\"": + tag = 'terminal' + elif match.group() in ('->', self.ARROW): + tag = 'arrow' + else: + # If it's a nonterminal, then set up new bindings, so we + # can highlight all instances of that nonterminal when we + # put the mouse over it. + tag = 'nonterminal_' + match.group() + if tag not in self._textwidget.tag_names(): + self._init_nonterminal_tag(tag) + + start = '%d.%d' % (linenum, match.start()) + end = '%d.%d' % (linenum, match.end()) + self._textwidget.tag_add(tag, start, end) + + def _init_nonterminal_tag(self, tag, foreground='blue'): + self._textwidget.tag_config(tag, foreground=foreground, font=CFGEditor._BOLD) + if not self._highlight_matching_nonterminals: + return + + def enter(e, textwidget=self._textwidget, tag=tag): + textwidget.tag_config(tag, background='#80ff80') + + def leave(e, textwidget=self._textwidget, tag=tag): + textwidget.tag_config(tag, background='') + + self._textwidget.tag_bind(tag, '', enter) + self._textwidget.tag_bind(tag, '', leave) + + def _analyze_line(self, linenum): + """ + Colorize a given line. + """ + # Get rid of any tags that were previously on the line. + self._clear_tags(linenum) + + # Get the line line's text string. + line = self._textwidget.get(repr(linenum) + '.0', repr(linenum) + '.end') + + # If it's a valid production, then colorize each token. + if CFGEditor._PRODUCTION_RE.match(line): + # It's valid; Use _TOKEN_RE to tokenize the production, + # and call analyze_token on each token. + def analyze_token(match, self=self, linenum=linenum): + self._analyze_token(match, linenum) + return '' + + CFGEditor._TOKEN_RE.sub(analyze_token, line) + elif line.strip() != '': + # It's invalid; show the user where the error is. + self._mark_error(linenum, line) + + def _mark_error(self, linenum, line): + """ + Mark the location of an error in a line. + """ + arrowmatch = CFGEditor._ARROW_RE.search(line) + if not arrowmatch: + # If there's no arrow at all, highlight the whole line. + start = '%d.0' % linenum + end = '%d.end' % linenum + elif not CFGEditor._LHS_RE.match(line): + # Otherwise, if the LHS is bad, highlight it. + start = '%d.0' % linenum + end = '%d.%d' % (linenum, arrowmatch.start()) + else: + # Otherwise, highlight the RHS. + start = '%d.%d' % (linenum, arrowmatch.end()) + end = '%d.end' % linenum + + # If we're highlighting 0 chars, highlight the whole line. + if self._textwidget.compare(start, '==', end): + start = '%d.0' % linenum + end = '%d.end' % linenum + self._textwidget.tag_add('error', start, end) + + def _analyze(self, *e): + """ + Replace ``->`` with arrows, and colorize the entire buffer. + """ + self._replace_arrows() + numlines = int(self._textwidget.index('end').split('.')[0]) + for linenum in range(1, numlines + 1): # line numbers start at 1. + self._analyze_line(linenum) + + def _parse_productions(self): + """ + Parse the current contents of the textwidget buffer, to create + a list of productions. + """ + productions = [] + + # Get the text, normalize it, and split it into lines. + text = self._textwidget.get('1.0', 'end') + text = re.sub(self.ARROW, '->', text) + text = re.sub('\t', ' ', text) + lines = text.split('\n') + + # Convert each line to a CFG production + for line in lines: + line = line.strip() + if line == '': + continue + productions += _read_cfg_production(line) + # if line.strip() == '': continue + # if not CFGEditor._PRODUCTION_RE.match(line): + # raise ValueError('Bad production string %r' % line) + # + # (lhs_str, rhs_str) = line.split('->') + # lhs = Nonterminal(lhs_str.strip()) + # rhs = [] + # def parse_token(match, rhs=rhs): + # token = match.group() + # if token[0] in "'\"": rhs.append(token[1:-1]) + # else: rhs.append(Nonterminal(token)) + # return '' + # CFGEditor._TOKEN_RE.sub(parse_token, rhs_str) + # + # productions.append(Production(lhs, *rhs)) + + return productions + + def _destroy(self, *e): + if self._top is None: + return + self._top.destroy() + self._top = None + + def _ok(self, *e): + self._apply() + self._destroy() + + def _apply(self, *e): + productions = self._parse_productions() + start = Nonterminal(self._start.get()) + cfg = CFG(start, productions) + if self._set_cfg_callback is not None: + self._set_cfg_callback(cfg) + + def _reset(self, *e): + self._textwidget.delete('1.0', 'end') + for production in self._cfg.productions(): + self._textwidget.insert('end', '%s\n' % production) + self._analyze() + if self._set_cfg_callback is not None: + self._set_cfg_callback(self._cfg) + + def _cancel(self, *e): + try: + self._reset() + except: + pass + self._destroy() + + def _help(self, *e): + # The default font's not very legible; try using 'fixed' instead. + try: + ShowText( + self._parent, + 'Help: Chart Parser Demo', + (_CFGEditor_HELP).strip(), + width=75, + font='fixed', + ) + except: + ShowText( + self._parent, + 'Help: Chart Parser Demo', + (_CFGEditor_HELP).strip(), + width=75, + ) + + +###################################################################### +# New Demo (built tree based on cfg) +###################################################################### + + +class CFGDemo(object): + def __init__(self, grammar, text): + self._grammar = grammar + self._text = text + + # Set up the main window. + self._top = Tk() + self._top.title('Context Free Grammar Demo') + + # Base font size + self._size = IntVar(self._top) + self._size.set(12) # = medium + + # Set up the key bindings + self._init_bindings(self._top) + + # Create the basic frames + frame1 = Frame(self._top) + frame1.pack(side='left', fill='y', expand=0) + self._init_menubar(self._top) + self._init_buttons(self._top) + self._init_grammar(frame1) + self._init_treelet(frame1) + self._init_workspace(self._top) + + # ////////////////////////////////////////////////// + # Initialization + # ////////////////////////////////////////////////// + + def _init_bindings(self, top): + top.bind('', self.destroy) + + def _init_menubar(self, parent): + pass + + def _init_buttons(self, parent): + pass + + def _init_grammar(self, parent): + self._prodlist = ProductionList(parent, self._grammar, width=20) + self._prodlist.pack(side='top', fill='both', expand=1) + self._prodlist.focus() + self._prodlist.add_callback('select', self._selectprod_cb) + self._prodlist.add_callback('move', self._selectprod_cb) + + def _init_treelet(self, parent): + self._treelet_canvas = Canvas(parent, background='white') + self._treelet_canvas.pack(side='bottom', fill='x') + self._treelet = None + + def _init_workspace(self, parent): + self._workspace = CanvasFrame(parent, background='white') + self._workspace.pack(side='right', fill='both', expand=1) + self._tree = None + self.reset_workspace() + + # ////////////////////////////////////////////////// + # Workspace + # ////////////////////////////////////////////////// + + def reset_workspace(self): + c = self._workspace.canvas() + fontsize = int(self._size.get()) + node_font = ('helvetica', -(fontsize + 4), 'bold') + leaf_font = ('helvetica', -(fontsize + 2)) + + # Remove the old tree + if self._tree is not None: + self._workspace.remove_widget(self._tree) + + # The root of the tree. + start = self._grammar.start().symbol() + rootnode = TextWidget(c, start, font=node_font, draggable=1) + + # The leaves of the tree. + leaves = [] + for word in self._text: + leaves.append(TextWidget(c, word, font=leaf_font, draggable=1)) + + # Put it all together into one tree + self._tree = TreeSegmentWidget(c, rootnode, leaves, color='white') + + # Add it to the workspace. + self._workspace.add_widget(self._tree) + + # Move the leaves to the bottom of the workspace. + for leaf in leaves: + leaf.move(0, 100) + + # self._nodes = {start:1} + # self._leaves = dict([(l,1) for l in leaves]) + + def workspace_markprod(self, production): + pass + + def _markproduction(self, prod, tree=None): + if tree is None: + tree = self._tree + for i in range(len(tree.subtrees()) - len(prod.rhs())): + if tree['color', i] == 'white': + self._markproduction # FIXME: Is this necessary at all? + + for j, node in enumerate(prod.rhs()): + widget = tree.subtrees()[i + j] + if ( + isinstance(node, Nonterminal) + and isinstance(widget, TreeSegmentWidget) + and node.symbol == widget.label().text() + ): + pass # matching nonterminal + elif ( + isinstance(node, string_types) + and isinstance(widget, TextWidget) + and node == widget.text() + ): + pass # matching nonterminal + else: + break + else: + # Everything matched! + print('MATCH AT', i) + + # ////////////////////////////////////////////////// + # Grammar + # ////////////////////////////////////////////////// + + def _selectprod_cb(self, production): + canvas = self._treelet_canvas + + self._prodlist.highlight(production) + if self._treelet is not None: + self._treelet.destroy() + + # Convert the production to a tree. + rhs = production.rhs() + for (i, elt) in enumerate(rhs): + if isinstance(elt, Nonterminal): + elt = Tree(elt) + tree = Tree(production.lhs().symbol(), *rhs) + + # Draw the tree in the treelet area. + fontsize = int(self._size.get()) + node_font = ('helvetica', -(fontsize + 4), 'bold') + leaf_font = ('helvetica', -(fontsize + 2)) + self._treelet = tree_to_treesegment( + canvas, tree, node_font=node_font, leaf_font=leaf_font + ) + self._treelet['draggable'] = 1 + + # Center the treelet. + (x1, y1, x2, y2) = self._treelet.bbox() + w, h = int(canvas['width']), int(canvas['height']) + self._treelet.move((w - x1 - x2) / 2, (h - y1 - y2) / 2) + + # Mark the places where we can add it to the workspace. + self._markproduction(production) + + def destroy(self, *args): + self._top.destroy() + + def mainloop(self, *args, **kwargs): + self._top.mainloop(*args, **kwargs) + + +def demo2(): + from nltk import Nonterminal, Production, CFG + + nonterminals = 'S VP NP PP P N Name V Det' + (S, VP, NP, PP, P, N, Name, V, Det) = [Nonterminal(s) for s in nonterminals.split()] + productions = ( + # Syntactic Productions + Production(S, [NP, VP]), + Production(NP, [Det, N]), + Production(NP, [NP, PP]), + Production(VP, [VP, PP]), + Production(VP, [V, NP, PP]), + Production(VP, [V, NP]), + Production(PP, [P, NP]), + Production(PP, []), + Production(PP, ['up', 'over', NP]), + # Lexical Productions + Production(NP, ['I']), + Production(Det, ['the']), + Production(Det, ['a']), + Production(N, ['man']), + Production(V, ['saw']), + Production(P, ['in']), + Production(P, ['with']), + Production(N, ['park']), + Production(N, ['dog']), + Production(N, ['statue']), + Production(Det, ['my']), + ) + grammar = CFG(S, productions) + + text = 'I saw a man in the park'.split() + d = CFGDemo(grammar, text) + d.mainloop() + + +###################################################################### +# Old Demo +###################################################################### + + +def demo(): + from nltk import Nonterminal, CFG + + nonterminals = 'S VP NP PP P N Name V Det' + (S, VP, NP, PP, P, N, Name, V, Det) = [Nonterminal(s) for s in nonterminals.split()] + + grammar = CFG.fromstring( + """ + S -> NP VP + PP -> P NP + NP -> Det N + NP -> NP PP + VP -> V NP + VP -> VP PP + Det -> 'a' + Det -> 'the' + Det -> 'my' + NP -> 'I' + N -> 'dog' + N -> 'man' + N -> 'park' + N -> 'statue' + V -> 'saw' + P -> 'in' + P -> 'up' + P -> 'over' + P -> 'with' + """ + ) + + def cb(grammar): + print(grammar) + + top = Tk() + editor = CFGEditor(top, grammar, cb) + Label(top, text='\nTesting CFG Editor\n').pack() + Button(top, text='Quit', command=top.destroy).pack() + top.mainloop() + + +def demo3(): + from nltk import Production + + (S, VP, NP, PP, P, N, Name, V, Det) = nonterminals( + 'S, VP, NP, PP, P, N, Name, V, Det' + ) + + productions = ( + # Syntactic Productions + Production(S, [NP, VP]), + Production(NP, [Det, N]), + Production(NP, [NP, PP]), + Production(VP, [VP, PP]), + Production(VP, [V, NP, PP]), + Production(VP, [V, NP]), + Production(PP, [P, NP]), + Production(PP, []), + Production(PP, ['up', 'over', NP]), + # Lexical Productions + Production(NP, ['I']), + Production(Det, ['the']), + Production(Det, ['a']), + Production(N, ['man']), + Production(V, ['saw']), + Production(P, ['in']), + Production(P, ['with']), + Production(N, ['park']), + Production(N, ['dog']), + Production(N, ['statue']), + Production(Det, ['my']), + ) + + t = Tk() + + def destroy(e, t=t): + t.destroy() + + t.bind('q', destroy) + p = ProductionList(t, productions) + p.pack(expand=1, fill='both') + p.add_callback('select', p.markonly) + p.add_callback('move', p.markonly) + p.focus() + p.mark(productions[2]) + p.mark(productions[8]) + + +if __name__ == '__main__': + demo() diff --git a/venv.bak/lib/python3.7/site-packages/nltk/draw/dispersion.py b/venv.bak/lib/python3.7/site-packages/nltk/draw/dispersion.py new file mode 100644 index 0000000..40b2a9a --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/draw/dispersion.py @@ -0,0 +1,66 @@ +# Natural Language Toolkit: Dispersion Plots +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Steven Bird +# URL: +# For license information, see LICENSE.TXT + +""" +A utility for displaying lexical dispersion. +""" + + +def dispersion_plot(text, words, ignore_case=False, title="Lexical Dispersion Plot"): + """ + Generate a lexical dispersion plot. + + :param text: The source text + :type text: list(str) or enum(str) + :param words: The target words + :type words: list of str + :param ignore_case: flag to set if case should be ignored when searching text + :type ignore_case: bool + """ + + try: + from matplotlib import pylab + except ImportError: + raise ValueError( + 'The plot function requires matplotlib to be installed.' + 'See http://matplotlib.org/' + ) + + text = list(text) + words.reverse() + + if ignore_case: + words_to_comp = list(map(str.lower, words)) + text_to_comp = list(map(str.lower, text)) + else: + words_to_comp = words + text_to_comp = text + + points = [ + (x, y) + for x in range(len(text_to_comp)) + for y in range(len(words_to_comp)) + if text_to_comp[x] == words_to_comp[y] + ] + if points: + x, y = list(zip(*points)) + else: + x = y = () + pylab.plot(x, y, "b|", scalex=0.1) + pylab.yticks(list(range(len(words))), words, color="b") + pylab.ylim(-1, len(words)) + pylab.title(title) + pylab.xlabel("Word Offset") + pylab.show() + + +if __name__ == '__main__': + import nltk.compat + from nltk.corpus import gutenberg + + words = ['Elinor', 'Marianne', 'Edward', 'Willoughby'] + dispersion_plot(gutenberg.words('austen-sense.txt'), words) diff --git a/venv.bak/lib/python3.7/site-packages/nltk/draw/table.py b/venv.bak/lib/python3.7/site-packages/nltk/draw/table.py new file mode 100644 index 0000000..aea70b4 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/draw/table.py @@ -0,0 +1,1183 @@ +# Natural Language Toolkit: Table widget +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Edward Loper +# URL: +# For license information, see LICENSE.TXT + +""" +Tkinter widgets for displaying multi-column listboxes and tables. +""" + +from __future__ import division + + +import operator + +from six.moves.tkinter import Frame, Label, Listbox, Scrollbar, Tk + + +###################################################################### +# Multi-Column Listbox +###################################################################### + + +class MultiListbox(Frame): + """ + A multi-column listbox, where the current selection applies to an + entire row. Based on the MultiListbox Tkinter widget + recipe from the Python Cookbook (http://code.activestate.com/recipes/52266/) + + For the most part, ``MultiListbox`` methods delegate to its + contained listboxes. For any methods that do not have docstrings, + see ``Tkinter.Listbox`` for a description of what that method does. + """ + + # ///////////////////////////////////////////////////////////////// + # Configuration + # ///////////////////////////////////////////////////////////////// + + #: Default configuration values for the frame. + FRAME_CONFIG = dict(background='#888', takefocus=True, highlightthickness=1) + + #: Default configurations for the column labels. + LABEL_CONFIG = dict( + borderwidth=1, + relief='raised', + font='helvetica -16 bold', + background='#444', + foreground='white', + ) + + #: Default configuration for the column listboxes. + LISTBOX_CONFIG = dict( + borderwidth=1, + selectborderwidth=0, + highlightthickness=0, + exportselection=False, + selectbackground='#888', + activestyle='none', + takefocus=False, + ) + + # ///////////////////////////////////////////////////////////////// + # Constructor + # ///////////////////////////////////////////////////////////////// + + def __init__(self, master, columns, column_weights=None, cnf={}, **kw): + """ + Construct a new multi-column listbox widget. + + :param master: The widget that should contain the new + multi-column listbox. + + :param columns: Specifies what columns should be included in + the new multi-column listbox. If ``columns`` is an integer, + the it is the number of columns to include. If it is + a list, then its length indicates the number of columns + to include; and each element of the list will be used as + a label for the corresponding column. + + :param cnf, kw: Configuration parameters for this widget. + Use ``label_*`` to configure all labels; and ``listbox_*`` + to configure all listboxes. E.g.: + + >>> mlb = MultiListbox(master, 5, label_foreground='red') + """ + # If columns was specified as an int, convert it to a list. + if isinstance(columns, int): + columns = list(range(columns)) + include_labels = False + else: + include_labels = True + + if len(columns) == 0: + raise ValueError("Expected at least one column") + + # Instance variables + self._column_names = tuple(columns) + self._listboxes = [] + self._labels = [] + + # Pick a default value for column_weights, if none was specified. + if column_weights is None: + column_weights = [1] * len(columns) + elif len(column_weights) != len(columns): + raise ValueError('Expected one column_weight for each column') + self._column_weights = column_weights + + # Configure our widgets. + Frame.__init__(self, master, **self.FRAME_CONFIG) + self.grid_rowconfigure(1, weight=1) + for i, label in enumerate(self._column_names): + self.grid_columnconfigure(i, weight=column_weights[i]) + + # Create a label for the column + if include_labels: + l = Label(self, text=label, **self.LABEL_CONFIG) + self._labels.append(l) + l.grid(column=i, row=0, sticky='news', padx=0, pady=0) + l.column_index = i + + # Create a listbox for the column + lb = Listbox(self, **self.LISTBOX_CONFIG) + self._listboxes.append(lb) + lb.grid(column=i, row=1, sticky='news', padx=0, pady=0) + lb.column_index = i + + # Clicking or dragging selects: + lb.bind('', self._select) + lb.bind('', self._select) + # Scroll whell scrolls: + lb.bind('', lambda e: self._scroll(-1)) + lb.bind('', lambda e: self._scroll(+1)) + lb.bind('', lambda e: self._scroll(e.delta)) + # Button 2 can be used to scan: + lb.bind('', lambda e: self.scan_mark(e.x, e.y)) + lb.bind('', lambda e: self.scan_dragto(e.x, e.y)) + # Dragging outside the window has no effect (diable + # the default listbox behavior, which scrolls): + lb.bind('', lambda e: 'break') + # Columns can be resized by dragging them: + l.bind('', self._resize_column) + + # Columns can be resized by dragging them. (This binding is + # used if they click on the grid between columns:) + self.bind('', self._resize_column) + + # Set up key bindings for the widget: + self.bind('', lambda e: self.select(delta=-1)) + self.bind('', lambda e: self.select(delta=1)) + self.bind('', lambda e: self.select(delta=-self._pagesize())) + self.bind('', lambda e: self.select(delta=self._pagesize())) + + # Configuration customizations + self.configure(cnf, **kw) + + # ///////////////////////////////////////////////////////////////// + # Column Resizing + # ///////////////////////////////////////////////////////////////// + + def _resize_column(self, event): + """ + Callback used to resize a column of the table. Return ``True`` + if the column is actually getting resized (if the user clicked + on the far left or far right 5 pixels of a label); and + ``False`` otherwies. + """ + # If we're already waiting for a button release, then ignore + # the new button press. + if event.widget.bind(''): + return False + + # Decide which column (if any) to resize. + self._resize_column_index = None + if event.widget is self: + for i, lb in enumerate(self._listboxes): + if abs(event.x - (lb.winfo_x() + lb.winfo_width())) < 10: + self._resize_column_index = i + elif event.x > (event.widget.winfo_width() - 5): + self._resize_column_index = event.widget.column_index + elif event.x < 5 and event.widget.column_index != 0: + self._resize_column_index = event.widget.column_index - 1 + + # Bind callbacks that are used to resize it. + if self._resize_column_index is not None: + event.widget.bind('', self._resize_column_motion_cb) + event.widget.bind( + '' % event.num, self._resize_column_buttonrelease_cb + ) + return True + else: + return False + + def _resize_column_motion_cb(self, event): + lb = self._listboxes[self._resize_column_index] + charwidth = lb.winfo_width() / lb['width'] + + x1 = event.x + event.widget.winfo_x() + x2 = lb.winfo_x() + lb.winfo_width() + + lb['width'] = max(3, lb['width'] + (x1 - x2) // charwidth) + + def _resize_column_buttonrelease_cb(self, event): + event.widget.unbind('' % event.num) + event.widget.unbind('') + + # ///////////////////////////////////////////////////////////////// + # Properties + # ///////////////////////////////////////////////////////////////// + + @property + def column_names(self): + """ + A tuple containing the names of the columns used by this + multi-column listbox. + """ + return self._column_names + + @property + def column_labels(self): + """ + A tuple containing the ``Tkinter.Label`` widgets used to + display the label of each column. If this multi-column + listbox was created without labels, then this will be an empty + tuple. These widgets will all be augmented with a + ``column_index`` attribute, which can be used to determine + which column they correspond to. This can be convenient, + e.g., when defining callbacks for bound events. + """ + return tuple(self._labels) + + @property + def listboxes(self): + """ + A tuple containing the ``Tkinter.Listbox`` widgets used to + display individual columns. These widgets will all be + augmented with a ``column_index`` attribute, which can be used + to determine which column they correspond to. This can be + convenient, e.g., when defining callbacks for bound events. + """ + return tuple(self._listboxes) + + # ///////////////////////////////////////////////////////////////// + # Mouse & Keyboard Callback Functions + # ///////////////////////////////////////////////////////////////// + + def _select(self, e): + i = e.widget.nearest(e.y) + self.selection_clear(0, 'end') + self.selection_set(i) + self.activate(i) + self.focus() + + def _scroll(self, delta): + for lb in self._listboxes: + lb.yview_scroll(delta, 'unit') + return 'break' + + def _pagesize(self): + """:return: The number of rows that makes up one page""" + return int(self.index('@0,1000000')) - int(self.index('@0,0')) + + # ///////////////////////////////////////////////////////////////// + # Row selection + # ///////////////////////////////////////////////////////////////// + + def select(self, index=None, delta=None, see=True): + """ + Set the selected row. If ``index`` is specified, then select + row ``index``. Otherwise, if ``delta`` is specified, then move + the current selection by ``delta`` (negative numbers for up, + positive numbers for down). This will not move the selection + past the top or the bottom of the list. + + :param see: If true, then call ``self.see()`` with the newly + selected index, to ensure that it is visible. + """ + if (index is not None) and (delta is not None): + raise ValueError('specify index or delta, but not both') + + # If delta was given, then calculate index. + if delta is not None: + if len(self.curselection()) == 0: + index = -1 + delta + else: + index = int(self.curselection()[0]) + delta + + # Clear all selected rows. + self.selection_clear(0, 'end') + + # Select the specified index + if index is not None: + index = min(max(index, 0), self.size() - 1) + # self.activate(index) + self.selection_set(index) + if see: + self.see(index) + + # ///////////////////////////////////////////////////////////////// + # Configuration + # ///////////////////////////////////////////////////////////////// + + def configure(self, cnf={}, **kw): + """ + Configure this widget. Use ``label_*`` to configure all + labels; and ``listbox_*`` to configure all listboxes. E.g.: + + >>> mlb = MultiListbox(master, 5) + >>> mlb.configure(label_foreground='red') + >>> mlb.configure(listbox_foreground='red') + """ + cnf = dict(list(cnf.items()) + list(kw.items())) + for (key, val) in list(cnf.items()): + if key.startswith('label_') or key.startswith('label-'): + for label in self._labels: + label.configure({key[6:]: val}) + elif key.startswith('listbox_') or key.startswith('listbox-'): + for listbox in self._listboxes: + listbox.configure({key[8:]: val}) + else: + Frame.configure(self, {key: val}) + + def __setitem__(self, key, val): + """ + Configure this widget. This is equivalent to + ``self.configure({key,val``)}. See ``configure()``. + """ + self.configure({key: val}) + + def rowconfigure(self, row_index, cnf={}, **kw): + """ + Configure all table cells in the given row. Valid keyword + arguments are: ``background``, ``bg``, ``foreground``, ``fg``, + ``selectbackground``, ``selectforeground``. + """ + for lb in self._listboxes: + lb.itemconfigure(row_index, cnf, **kw) + + def columnconfigure(self, col_index, cnf={}, **kw): + """ + Configure all table cells in the given column. Valid keyword + arguments are: ``background``, ``bg``, ``foreground``, ``fg``, + ``selectbackground``, ``selectforeground``. + """ + lb = self._listboxes[col_index] + + cnf = dict(list(cnf.items()) + list(kw.items())) + for (key, val) in list(cnf.items()): + if key in ( + 'background', + 'bg', + 'foreground', + 'fg', + 'selectbackground', + 'selectforeground', + ): + for i in range(lb.size()): + lb.itemconfigure(i, {key: val}) + else: + lb.configure({key: val}) + + def itemconfigure(self, row_index, col_index, cnf=None, **kw): + """ + Configure the table cell at the given row and column. Valid + keyword arguments are: ``background``, ``bg``, ``foreground``, + ``fg``, ``selectbackground``, ``selectforeground``. + """ + lb = self._listboxes[col_index] + return lb.itemconfigure(row_index, cnf, **kw) + + # ///////////////////////////////////////////////////////////////// + # Value Access + # ///////////////////////////////////////////////////////////////// + + def insert(self, index, *rows): + """ + Insert the given row or rows into the table, at the given + index. Each row value should be a tuple of cell values, one + for each column in the row. Index may be an integer or any of + the special strings (such as ``'end'``) accepted by + ``Tkinter.Listbox``. + """ + for elt in rows: + if len(elt) != len(self._column_names): + raise ValueError( + 'rows should be tuples whose length ' + 'is equal to the number of columns' + ) + for (lb, elts) in zip(self._listboxes, list(zip(*rows))): + lb.insert(index, *elts) + + def get(self, first, last=None): + """ + Return the value(s) of the specified row(s). If ``last`` is + not specified, then return a single row value; otherwise, + return a list of row values. Each row value is a tuple of + cell values, one for each column in the row. + """ + values = [lb.get(first, last) for lb in self._listboxes] + if last: + return [tuple(row) for row in zip(*values)] + else: + return tuple(values) + + def bbox(self, row, col): + """ + Return the bounding box for the given table cell, relative to + this widget's top-left corner. The bounding box is a tuple + of integers ``(left, top, width, height)``. + """ + dx, dy, _, _ = self.grid_bbox(row=0, column=col) + x, y, w, h = self._listboxes[col].bbox(row) + return int(x) + int(dx), int(y) + int(dy), int(w), int(h) + + # ///////////////////////////////////////////////////////////////// + # Hide/Show Columns + # ///////////////////////////////////////////////////////////////// + + def hide_column(self, col_index): + """ + Hide the given column. The column's state is still + maintained: its values will still be returned by ``get()``, and + you must supply its values when calling ``insert()``. It is + safe to call this on a column that is already hidden. + + :see: ``show_column()`` + """ + if self._labels: + self._labels[col_index].grid_forget() + self.listboxes[col_index].grid_forget() + self.grid_columnconfigure(col_index, weight=0) + + def show_column(self, col_index): + """ + Display a column that has been hidden using ``hide_column()``. + It is safe to call this on a column that is not hidden. + """ + weight = self._column_weights[col_index] + if self._labels: + self._labels[col_index].grid( + column=col_index, row=0, sticky='news', padx=0, pady=0 + ) + self._listboxes[col_index].grid( + column=col_index, row=1, sticky='news', padx=0, pady=0 + ) + self.grid_columnconfigure(col_index, weight=weight) + + # ///////////////////////////////////////////////////////////////// + # Binding Methods + # ///////////////////////////////////////////////////////////////// + + def bind_to_labels(self, sequence=None, func=None, add=None): + """ + Add a binding to each ``Tkinter.Label`` widget in this + mult-column listbox that will call ``func`` in response to the + event sequence. + + :return: A list of the identifiers of replaced binding + functions (if any), allowing for their deletion (to + prevent a memory leak). + """ + return [label.bind(sequence, func, add) for label in self.column_labels] + + def bind_to_listboxes(self, sequence=None, func=None, add=None): + """ + Add a binding to each ``Tkinter.Listbox`` widget in this + mult-column listbox that will call ``func`` in response to the + event sequence. + + :return: A list of the identifiers of replaced binding + functions (if any), allowing for their deletion (to + prevent a memory leak). + """ + for listbox in self.listboxes: + listbox.bind(sequence, func, add) + + def bind_to_columns(self, sequence=None, func=None, add=None): + """ + Add a binding to each ``Tkinter.Label`` and ``Tkinter.Listbox`` + widget in this mult-column listbox that will call ``func`` in + response to the event sequence. + + :return: A list of the identifiers of replaced binding + functions (if any), allowing for their deletion (to + prevent a memory leak). + """ + return self.bind_to_labels(sequence, func, add) + self.bind_to_listboxes( + sequence, func, add + ) + + # ///////////////////////////////////////////////////////////////// + # Simple Delegation + # ///////////////////////////////////////////////////////////////// + + # These methods delegate to the first listbox: + def curselection(self, *args, **kwargs): + return self._listboxes[0].curselection(*args, **kwargs) + + def selection_includes(self, *args, **kwargs): + return self._listboxes[0].selection_includes(*args, **kwargs) + + def itemcget(self, *args, **kwargs): + return self._listboxes[0].itemcget(*args, **kwargs) + + def size(self, *args, **kwargs): + return self._listboxes[0].size(*args, **kwargs) + + def index(self, *args, **kwargs): + return self._listboxes[0].index(*args, **kwargs) + + def nearest(self, *args, **kwargs): + return self._listboxes[0].nearest(*args, **kwargs) + + # These methods delegate to each listbox (and return None): + def activate(self, *args, **kwargs): + for lb in self._listboxes: + lb.activate(*args, **kwargs) + + def delete(self, *args, **kwargs): + for lb in self._listboxes: + lb.delete(*args, **kwargs) + + def scan_mark(self, *args, **kwargs): + for lb in self._listboxes: + lb.scan_mark(*args, **kwargs) + + def scan_dragto(self, *args, **kwargs): + for lb in self._listboxes: + lb.scan_dragto(*args, **kwargs) + + def see(self, *args, **kwargs): + for lb in self._listboxes: + lb.see(*args, **kwargs) + + def selection_anchor(self, *args, **kwargs): + for lb in self._listboxes: + lb.selection_anchor(*args, **kwargs) + + def selection_clear(self, *args, **kwargs): + for lb in self._listboxes: + lb.selection_clear(*args, **kwargs) + + def selection_set(self, *args, **kwargs): + for lb in self._listboxes: + lb.selection_set(*args, **kwargs) + + def yview(self, *args, **kwargs): + for lb in self._listboxes: + v = lb.yview(*args, **kwargs) + return v # if called with no arguments + + def yview_moveto(self, *args, **kwargs): + for lb in self._listboxes: + lb.yview_moveto(*args, **kwargs) + + def yview_scroll(self, *args, **kwargs): + for lb in self._listboxes: + lb.yview_scroll(*args, **kwargs) + + # ///////////////////////////////////////////////////////////////// + # Aliases + # ///////////////////////////////////////////////////////////////// + + itemconfig = itemconfigure + rowconfig = rowconfigure + columnconfig = columnconfigure + select_anchor = selection_anchor + select_clear = selection_clear + select_includes = selection_includes + select_set = selection_set + + # ///////////////////////////////////////////////////////////////// + # These listbox methods are not defined for multi-listbox + # ///////////////////////////////////////////////////////////////// + # def xview(self, *what): pass + # def xview_moveto(self, fraction): pass + # def xview_scroll(self, number, what): pass + + +###################################################################### +# Table +###################################################################### + + +class Table(object): + """ + A display widget for a table of values, based on a ``MultiListbox`` + widget. For many purposes, ``Table`` can be treated as a + list-of-lists. E.g., table[i] is a list of the values for row i; + and table.append(row) adds a new row with the given lits of + values. Individual cells can be accessed using table[i,j], which + refers to the j-th column of the i-th row. This can be used to + both read and write values from the table. E.g.: + + >>> table[i,j] = 'hello' + + The column (j) can be given either as an index number, or as a + column name. E.g., the following prints the value in the 3rd row + for the 'First Name' column: + + >>> print(table[3, 'First Name']) + John + + You can configure the colors for individual rows, columns, or + cells using ``rowconfig()``, ``columnconfig()``, and ``itemconfig()``. + The color configuration for each row will be preserved if the + table is modified; however, when new rows are added, any color + configurations that have been made for *columns* will not be + applied to the new row. + + Note: Although ``Table`` acts like a widget in some ways (e.g., it + defines ``grid()``, ``pack()``, and ``bind()``), it is not itself a + widget; it just contains one. This is because widgets need to + define ``__getitem__()``, ``__setitem__()``, and ``__nonzero__()`` in + a way that's incompatible with the fact that ``Table`` behaves as a + list-of-lists. + + :ivar _mlb: The multi-column listbox used to display this table's data. + :ivar _rows: A list-of-lists used to hold the cell values of this + table. Each element of _rows is a row value, i.e., a list of + cell values, one for each column in the row. + """ + + def __init__( + self, + master, + column_names, + rows=None, + column_weights=None, + scrollbar=True, + click_to_sort=True, + reprfunc=None, + cnf={}, + **kw + ): + """ + Construct a new Table widget. + + :type master: Tkinter.Widget + :param master: The widget that should contain the new table. + :type column_names: list(str) + :param column_names: A list of names for the columns; these + names will be used to create labels for each column; + and can be used as an index when reading or writing + cell values from the table. + :type rows: list(list) + :param rows: A list of row values used to initialze the table. + Each row value should be a tuple of cell values, one for + each column in the row. + :type scrollbar: bool + :param scrollbar: If true, then create a scrollbar for the + new table widget. + :type click_to_sort: bool + :param click_to_sort: If true, then create bindings that will + sort the table's rows by a given column's values if the + user clicks on that colum's label. + :type reprfunc: function + :param reprfunc: If specified, then use this function to + convert each table cell value to a string suitable for + display. ``reprfunc`` has the following signature: + reprfunc(row_index, col_index, cell_value) -> str + (Note that the column is specified by index, not by name.) + :param cnf, kw: Configuration parameters for this widget's + contained ``MultiListbox``. See ``MultiListbox.__init__()`` + for details. + """ + self._num_columns = len(column_names) + self._reprfunc = reprfunc + self._frame = Frame(master) + + self._column_name_to_index = dict((c, i) for (i, c) in enumerate(column_names)) + + # Make a copy of the rows & check that it's valid. + if rows is None: + self._rows = [] + else: + self._rows = [[v for v in row] for row in rows] + for row in self._rows: + self._checkrow(row) + + # Create our multi-list box. + self._mlb = MultiListbox(self._frame, column_names, column_weights, cnf, **kw) + self._mlb.pack(side='left', expand=True, fill='both') + + # Optional scrollbar + if scrollbar: + sb = Scrollbar(self._frame, orient='vertical', command=self._mlb.yview) + self._mlb.listboxes[0]['yscrollcommand'] = sb.set + # for listbox in self._mlb.listboxes: + # listbox['yscrollcommand'] = sb.set + sb.pack(side='right', fill='y') + self._scrollbar = sb + + # Set up sorting + self._sortkey = None + if click_to_sort: + for i, l in enumerate(self._mlb.column_labels): + l.bind('', self._sort) + + # Fill in our multi-list box. + self._fill_table() + + # ///////////////////////////////////////////////////////////////// + # { Widget-like Methods + # ///////////////////////////////////////////////////////////////// + # These all just delegate to either our frame or our MLB. + + def pack(self, *args, **kwargs): + """Position this table's main frame widget in its parent + widget. See ``Tkinter.Frame.pack()`` for more info.""" + self._frame.pack(*args, **kwargs) + + def grid(self, *args, **kwargs): + """Position this table's main frame widget in its parent + widget. See ``Tkinter.Frame.grid()`` for more info.""" + self._frame.grid(*args, **kwargs) + + def focus(self): + """Direct (keyboard) input foxus to this widget.""" + self._mlb.focus() + + def bind(self, sequence=None, func=None, add=None): + """Add a binding to this table's main frame that will call + ``func`` in response to the event sequence.""" + self._mlb.bind(sequence, func, add) + + def rowconfigure(self, row_index, cnf={}, **kw): + """:see: ``MultiListbox.rowconfigure()``""" + self._mlb.rowconfigure(row_index, cnf, **kw) + + def columnconfigure(self, col_index, cnf={}, **kw): + """:see: ``MultiListbox.columnconfigure()``""" + col_index = self.column_index(col_index) + self._mlb.columnconfigure(col_index, cnf, **kw) + + def itemconfigure(self, row_index, col_index, cnf=None, **kw): + """:see: ``MultiListbox.itemconfigure()``""" + col_index = self.column_index(col_index) + return self._mlb.itemconfigure(row_index, col_index, cnf, **kw) + + def bind_to_labels(self, sequence=None, func=None, add=None): + """:see: ``MultiListbox.bind_to_labels()``""" + return self._mlb.bind_to_labels(sequence, func, add) + + def bind_to_listboxes(self, sequence=None, func=None, add=None): + """:see: ``MultiListbox.bind_to_listboxes()``""" + return self._mlb.bind_to_listboxes(sequence, func, add) + + def bind_to_columns(self, sequence=None, func=None, add=None): + """:see: ``MultiListbox.bind_to_columns()``""" + return self._mlb.bind_to_columns(sequence, func, add) + + rowconfig = rowconfigure + columnconfig = columnconfigure + itemconfig = itemconfigure + + # ///////////////////////////////////////////////////////////////// + # { Table as list-of-lists + # ///////////////////////////////////////////////////////////////// + + def insert(self, row_index, rowvalue): + """ + Insert a new row into the table, so that its row index will be + ``row_index``. If the table contains any rows whose row index + is greater than or equal to ``row_index``, then they will be + shifted down. + + :param rowvalue: A tuple of cell values, one for each column + in the new row. + """ + self._checkrow(rowvalue) + self._rows.insert(row_index, rowvalue) + if self._reprfunc is not None: + rowvalue = [ + self._reprfunc(row_index, j, v) for (j, v) in enumerate(rowvalue) + ] + self._mlb.insert(row_index, rowvalue) + if self._DEBUG: + self._check_table_vs_mlb() + + def extend(self, rowvalues): + """ + Add new rows at the end of the table. + + :param rowvalues: A list of row values used to initialze the + table. Each row value should be a tuple of cell values, + one for each column in the row. + """ + for rowvalue in rowvalues: + self.append(rowvalue) + if self._DEBUG: + self._check_table_vs_mlb() + + def append(self, rowvalue): + """ + Add a new row to the end of the table. + + :param rowvalue: A tuple of cell values, one for each column + in the new row. + """ + self.insert(len(self._rows), rowvalue) + if self._DEBUG: + self._check_table_vs_mlb() + + def clear(self): + """ + Delete all rows in this table. + """ + self._rows = [] + self._mlb.delete(0, 'end') + if self._DEBUG: + self._check_table_vs_mlb() + + def __getitem__(self, index): + """ + Return the value of a row or a cell in this table. If + ``index`` is an integer, then the row value for the ``index``th + row. This row value consists of a tuple of cell values, one + for each column in the row. If ``index`` is a tuple of two + integers, ``(i,j)``, then return the value of the cell in the + ``i``th row and the ``j``th column. + """ + if isinstance(index, slice): + raise ValueError('Slicing not supported') + elif isinstance(index, tuple) and len(index) == 2: + return self._rows[index[0]][self.column_index(index[1])] + else: + return tuple(self._rows[index]) + + def __setitem__(self, index, val): + """ + Replace the value of a row or a cell in this table with + ``val``. + + If ``index`` is an integer, then ``val`` should be a row value + (i.e., a tuple of cell values, one for each column). In this + case, the values of the ``index``th row of the table will be + replaced with the values in ``val``. + + If ``index`` is a tuple of integers, ``(i,j)``, then replace the + value of the cell in the ``i``th row and ``j``th column with + ``val``. + """ + if isinstance(index, slice): + raise ValueError('Slicing not supported') + + # table[i,j] = val + elif isinstance(index, tuple) and len(index) == 2: + i, j = index[0], self.column_index(index[1]) + config_cookie = self._save_config_info([i]) + self._rows[i][j] = val + if self._reprfunc is not None: + val = self._reprfunc(i, j, val) + self._mlb.listboxes[j].insert(i, val) + self._mlb.listboxes[j].delete(i + 1) + self._restore_config_info(config_cookie) + + # table[i] = val + else: + config_cookie = self._save_config_info([index]) + self._checkrow(val) + self._rows[index] = list(val) + if self._reprfunc is not None: + val = [self._reprfunc(index, j, v) for (j, v) in enumerate(val)] + self._mlb.insert(index, val) + self._mlb.delete(index + 1) + self._restore_config_info(config_cookie) + + def __delitem__(self, row_index): + """ + Delete the ``row_index``th row from this table. + """ + if isinstance(row_index, slice): + raise ValueError('Slicing not supported') + if isinstance(row_index, tuple) and len(row_index) == 2: + raise ValueError('Cannot delete a single cell!') + del self._rows[row_index] + self._mlb.delete(row_index) + if self._DEBUG: + self._check_table_vs_mlb() + + def __len__(self): + """ + :return: the number of rows in this table. + """ + return len(self._rows) + + def _checkrow(self, rowvalue): + """ + Helper function: check that a given row value has the correct + number of elements; and if not, raise an exception. + """ + if len(rowvalue) != self._num_columns: + raise ValueError( + 'Row %r has %d columns; expected %d' + % (rowvalue, len(rowvalue), self._num_columns) + ) + + # ///////////////////////////////////////////////////////////////// + # Columns + # ///////////////////////////////////////////////////////////////// + + @property + def column_names(self): + """A list of the names of the columns in this table.""" + return self._mlb.column_names + + def column_index(self, i): + """ + If ``i`` is a valid column index integer, then return it as is. + Otherwise, check if ``i`` is used as the name for any column; + if so, return that column's index. Otherwise, raise a + ``KeyError`` exception. + """ + if isinstance(i, int) and 0 <= i < self._num_columns: + return i + else: + # This raises a key error if the column is not found. + return self._column_name_to_index[i] + + def hide_column(self, column_index): + """:see: ``MultiListbox.hide_column()``""" + self._mlb.hide_column(self.column_index(column_index)) + + def show_column(self, column_index): + """:see: ``MultiListbox.show_column()``""" + self._mlb.show_column(self.column_index(column_index)) + + # ///////////////////////////////////////////////////////////////// + # Selection + # ///////////////////////////////////////////////////////////////// + + def selected_row(self): + """ + Return the index of the currently selected row, or None if + no row is selected. To get the row value itself, use + ``table[table.selected_row()]``. + """ + sel = self._mlb.curselection() + if sel: + return int(sel[0]) + else: + return None + + def select(self, index=None, delta=None, see=True): + """:see: ``MultiListbox.select()``""" + self._mlb.select(index, delta, see) + + # ///////////////////////////////////////////////////////////////// + # Sorting + # ///////////////////////////////////////////////////////////////// + + def sort_by(self, column_index, order='toggle'): + """ + Sort the rows in this table, using the specified column's + values as a sort key. + + :param column_index: Specifies which column to sort, using + either a column index (int) or a column's label name + (str). + + :param order: Specifies whether to sort the values in + ascending or descending order: + + - ``'ascending'``: Sort from least to greatest. + - ``'descending'``: Sort from greatest to least. + - ``'toggle'``: If the most recent call to ``sort_by()`` + sorted the table by the same column (``column_index``), + then reverse the rows; otherwise sort in ascending + order. + """ + if order not in ('ascending', 'descending', 'toggle'): + raise ValueError( + 'sort_by(): order should be "ascending", ' '"descending", or "toggle".' + ) + column_index = self.column_index(column_index) + config_cookie = self._save_config_info(index_by_id=True) + + # Sort the rows. + if order == 'toggle' and column_index == self._sortkey: + self._rows.reverse() + else: + self._rows.sort( + key=operator.itemgetter(column_index), reverse=(order == 'descending') + ) + self._sortkey = column_index + + # Redraw the table. + self._fill_table() + self._restore_config_info(config_cookie, index_by_id=True, see=True) + if self._DEBUG: + self._check_table_vs_mlb() + + def _sort(self, event): + """Event handler for clicking on a column label -- sort by + that column.""" + column_index = event.widget.column_index + + # If they click on the far-left of far-right of a column's + # label, then resize rather than sorting. + if self._mlb._resize_column(event): + return 'continue' + + # Otherwise, sort. + else: + self.sort_by(column_index) + return 'continue' + + # ///////////////////////////////////////////////////////////////// + # { Table Drawing Helpers + # ///////////////////////////////////////////////////////////////// + + def _fill_table(self, save_config=True): + """ + Re-draw the table from scratch, by clearing out the table's + multi-column listbox; and then filling it in with values from + ``self._rows``. Note that any cell-, row-, or column-specific + color configuration that has been done will be lost. The + selection will also be lost -- i.e., no row will be selected + after this call completes. + """ + self._mlb.delete(0, 'end') + for i, row in enumerate(self._rows): + if self._reprfunc is not None: + row = [self._reprfunc(i, j, v) for (j, v) in enumerate(row)] + self._mlb.insert('end', row) + + def _get_itemconfig(self, r, c): + return dict( + (k, self._mlb.itemconfig(r, c, k)[-1]) + for k in ( + 'foreground', + 'selectforeground', + 'background', + 'selectbackground', + ) + ) + + def _save_config_info(self, row_indices=None, index_by_id=False): + """ + Return a 'cookie' containing information about which row is + selected, and what color configurations have been applied. + this information can the be re-applied to the table (after + making modifications) using ``_restore_config_info()``. Color + configuration information will be saved for any rows in + ``row_indices``, or in the entire table, if + ``row_indices=None``. If ``index_by_id=True``, the the cookie + will associate rows with their configuration information based + on the rows' python id. This is useful when performing + operations that re-arrange the rows (e.g. ``sort``). If + ``index_by_id=False``, then it is assumed that all rows will be + in the same order when ``_restore_config_info()`` is called. + """ + # Default value for row_indices is all rows. + if row_indices is None: + row_indices = list(range(len(self._rows))) + + # Look up our current selection. + selection = self.selected_row() + if index_by_id and selection is not None: + selection = id(self._rows[selection]) + + # Look up the color configuration info for each row. + if index_by_id: + config = dict( + ( + id(self._rows[r]), + [self._get_itemconfig(r, c) for c in range(self._num_columns)], + ) + for r in row_indices + ) + else: + config = dict( + (r, [self._get_itemconfig(r, c) for c in range(self._num_columns)]) + for r in row_indices + ) + + return selection, config + + def _restore_config_info(self, cookie, index_by_id=False, see=False): + """ + Restore selection & color configuration information that was + saved using ``_save_config_info``. + """ + selection, config = cookie + + # Clear the selection. + if selection is None: + self._mlb.selection_clear(0, 'end') + + # Restore selection & color config + if index_by_id: + for r, row in enumerate(self._rows): + if id(row) in config: + for c in range(self._num_columns): + self._mlb.itemconfigure(r, c, config[id(row)][c]) + if id(row) == selection: + self._mlb.select(r, see=see) + else: + if selection is not None: + self._mlb.select(selection, see=see) + for r in config: + for c in range(self._num_columns): + self._mlb.itemconfigure(r, c, config[r][c]) + + # ///////////////////////////////////////////////////////////////// + # Debugging (Invariant Checker) + # ///////////////////////////////////////////////////////////////// + + _DEBUG = False + """If true, then run ``_check_table_vs_mlb()`` after any operation + that modifies the table.""" + + def _check_table_vs_mlb(self): + """ + Verify that the contents of the table's ``_rows`` variable match + the contents of its multi-listbox (``_mlb``). This is just + included for debugging purposes, to make sure that the + list-modifying operations are working correctly. + """ + for col in self._mlb.listboxes: + assert len(self) == col.size() + for row in self: + assert len(row) == self._num_columns + assert self._num_columns == len(self._mlb.column_names) + # assert self._column_names == self._mlb.column_names + for i, row in enumerate(self): + for j, cell in enumerate(row): + if self._reprfunc is not None: + cell = self._reprfunc(i, j, cell) + assert self._mlb.get(i)[j] == cell + + +###################################################################### +# Demo/Test Function +###################################################################### + +# update this to use new WordNet API +def demo(): + root = Tk() + root.bind('', lambda e: root.destroy()) + + table = Table( + root, + 'Word Synset Hypernym Hyponym'.split(), + column_weights=[0, 1, 1, 1], + reprfunc=(lambda i, j, s: ' %s' % s), + ) + table.pack(expand=True, fill='both') + + from nltk.corpus import wordnet + from nltk.corpus import brown + + for word, pos in sorted(set(brown.tagged_words()[:500])): + if pos[0] != 'N': + continue + word = word.lower() + for synset in wordnet.synsets(word): + try: + hyper_def = synset.hypernyms()[0].definition() + except: + hyper_def = '*none*' + try: + hypo_def = synset.hypernyms()[0].definition() + except: + hypo_def = '*none*' + table.append([word, synset.definition(), hyper_def, hypo_def]) + + table.columnconfig('Word', background='#afa') + table.columnconfig('Synset', background='#efe') + table.columnconfig('Hypernym', background='#fee') + table.columnconfig('Hyponym', background='#ffe') + for row in range(len(table)): + for column in ('Hypernym', 'Hyponym'): + if table[row, column] == '*none*': + table.itemconfig( + row, column, foreground='#666', selectforeground='#666' + ) + root.mainloop() + + +if __name__ == '__main__': + demo() diff --git a/venv.bak/lib/python3.7/site-packages/nltk/draw/tree.py b/venv.bak/lib/python3.7/site-packages/nltk/draw/tree.py new file mode 100644 index 0000000..8124f5e --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/draw/tree.py @@ -0,0 +1,1129 @@ +# Natural Language Toolkit: Graphical Representations for Trees +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Edward Loper +# URL: +# For license information, see LICENSE.TXT + +""" +Graphically display a Tree. +""" + +from six.moves.tkinter import IntVar, Menu, Tk + +from nltk.util import in_idle +from nltk.tree import Tree +from nltk.draw.util import ( + CanvasFrame, + CanvasWidget, + BoxWidget, + TextWidget, + ParenWidget, + OvalWidget, +) + +##////////////////////////////////////////////////////// +## Tree Segment +##////////////////////////////////////////////////////// + + +class TreeSegmentWidget(CanvasWidget): + """ + A canvas widget that displays a single segment of a hierarchical + tree. Each ``TreeSegmentWidget`` connects a single "node widget" + to a sequence of zero or more "subtree widgets". By default, the + bottom of the node is connected to the top of each subtree by a + single line. However, if the ``roof`` attribute is set, then a + single triangular "roof" will connect the node to all of its + children. + + Attributes: + - ``roof``: What sort of connection to draw between the node and + its subtrees. If ``roof`` is true, draw a single triangular + "roof" over the subtrees. If ``roof`` is false, draw a line + between each subtree and the node. Default value is false. + - ``xspace``: The amount of horizontal space to leave between + subtrees when managing this widget. Default value is 10. + - ``yspace``: The amount of space to place between the node and + its children when managing this widget. Default value is 15. + - ``color``: The color of the lines connecting the node to its + subtrees; and of the outline of the triangular roof. Default + value is ``'#006060'``. + - ``fill``: The fill color for the triangular roof. Default + value is ``''`` (no fill). + - ``width``: The width of the lines connecting the node to its + subtrees; and of the outline of the triangular roof. Default + value is 1. + - ``orientation``: Determines whether the tree branches downwards + or rightwards. Possible values are ``'horizontal'`` and + ``'vertical'``. The default value is ``'vertical'`` (i.e., + branch downwards). + - ``draggable``: whether the widget can be dragged by the user. + """ + + def __init__(self, canvas, label, subtrees, **attribs): + """ + :type node: + :type subtrees: list(CanvasWidgetI) + """ + self._label = label + self._subtrees = subtrees + + # Attributes + self._horizontal = 0 + self._roof = 0 + self._xspace = 10 + self._yspace = 15 + self._ordered = False + + # Create canvas objects. + self._lines = [canvas.create_line(0, 0, 0, 0, fill='#006060') for c in subtrees] + self._polygon = canvas.create_polygon( + 0, 0, fill='', state='hidden', outline='#006060' + ) + + # Register child widgets (label + subtrees) + self._add_child_widget(label) + for subtree in subtrees: + self._add_child_widget(subtree) + + # Are we currently managing? + self._managing = False + + CanvasWidget.__init__(self, canvas, **attribs) + + def __setitem__(self, attr, value): + canvas = self.canvas() + if attr == 'roof': + self._roof = value + if self._roof: + for l in self._lines: + canvas.itemconfig(l, state='hidden') + canvas.itemconfig(self._polygon, state='normal') + else: + for l in self._lines: + canvas.itemconfig(l, state='normal') + canvas.itemconfig(self._polygon, state='hidden') + elif attr == 'orientation': + if value == 'horizontal': + self._horizontal = 1 + elif value == 'vertical': + self._horizontal = 0 + else: + raise ValueError('orientation must be horizontal or vertical') + elif attr == 'color': + for l in self._lines: + canvas.itemconfig(l, fill=value) + canvas.itemconfig(self._polygon, outline=value) + elif isinstance(attr, tuple) and attr[0] == 'color': + # Set the color of an individual line. + l = self._lines[int(attr[1])] + canvas.itemconfig(l, fill=value) + elif attr == 'fill': + canvas.itemconfig(self._polygon, fill=value) + elif attr == 'width': + canvas.itemconfig(self._polygon, {attr: value}) + for l in self._lines: + canvas.itemconfig(l, {attr: value}) + elif attr in ('xspace', 'yspace'): + if attr == 'xspace': + self._xspace = value + elif attr == 'yspace': + self._yspace = value + self.update(self._label) + elif attr == 'ordered': + self._ordered = value + else: + CanvasWidget.__setitem__(self, attr, value) + + def __getitem__(self, attr): + if attr == 'roof': + return self._roof + elif attr == 'width': + return self.canvas().itemcget(self._polygon, attr) + elif attr == 'color': + return self.canvas().itemcget(self._polygon, 'outline') + elif isinstance(attr, tuple) and attr[0] == 'color': + l = self._lines[int(attr[1])] + return self.canvas().itemcget(l, 'fill') + elif attr == 'xspace': + return self._xspace + elif attr == 'yspace': + return self._yspace + elif attr == 'orientation': + if self._horizontal: + return 'horizontal' + else: + return 'vertical' + elif attr == 'ordered': + return self._ordered + else: + return CanvasWidget.__getitem__(self, attr) + + def label(self): + return self._label + + def subtrees(self): + return self._subtrees[:] + + def set_label(self, label): + """ + Set the node label to ``label``. + """ + self._remove_child_widget(self._label) + self._add_child_widget(label) + self._label = label + self.update(self._label) + + def replace_child(self, oldchild, newchild): + """ + Replace the child ``oldchild`` with ``newchild``. + """ + index = self._subtrees.index(oldchild) + self._subtrees[index] = newchild + self._remove_child_widget(oldchild) + self._add_child_widget(newchild) + self.update(newchild) + + def remove_child(self, child): + index = self._subtrees.index(child) + del self._subtrees[index] + self._remove_child_widget(child) + self.canvas().delete(self._lines.pop()) + self.update(self._label) + + def insert_child(self, index, child): + canvas = self.canvas() + self._subtrees.insert(index, child) + self._add_child_widget(child) + self._lines.append(canvas.create_line(0, 0, 0, 0, fill='#006060')) + self.update(self._label) + + # but.. lines??? + + def _tags(self): + if self._roof: + return [self._polygon] + else: + return self._lines + + def _subtree_top(self, child): + if isinstance(child, TreeSegmentWidget): + bbox = child.label().bbox() + else: + bbox = child.bbox() + if self._horizontal: + return (bbox[0], (bbox[1] + bbox[3]) / 2.0) + else: + return ((bbox[0] + bbox[2]) / 2.0, bbox[1]) + + def _node_bottom(self): + bbox = self._label.bbox() + if self._horizontal: + return (bbox[2], (bbox[1] + bbox[3]) / 2.0) + else: + return ((bbox[0] + bbox[2]) / 2.0, bbox[3]) + + def _update(self, child): + if len(self._subtrees) == 0: + return + if self._label.bbox() is None: + return # [XX] ??? + + # Which lines need to be redrawn? + if child is self._label: + need_update = self._subtrees + else: + need_update = [child] + + if self._ordered and not self._managing: + need_update = self._maintain_order(child) + + # Update the polygon. + (nodex, nodey) = self._node_bottom() + (xmin, ymin, xmax, ymax) = self._subtrees[0].bbox() + for subtree in self._subtrees[1:]: + bbox = subtree.bbox() + xmin = min(xmin, bbox[0]) + ymin = min(ymin, bbox[1]) + xmax = max(xmax, bbox[2]) + ymax = max(ymax, bbox[3]) + + if self._horizontal: + self.canvas().coords( + self._polygon, nodex, nodey, xmin, ymin, xmin, ymax, nodex, nodey + ) + else: + self.canvas().coords( + self._polygon, nodex, nodey, xmin, ymin, xmax, ymin, nodex, nodey + ) + + # Redraw all lines that need it. + for subtree in need_update: + (nodex, nodey) = self._node_bottom() + line = self._lines[self._subtrees.index(subtree)] + (subtreex, subtreey) = self._subtree_top(subtree) + self.canvas().coords(line, nodex, nodey, subtreex, subtreey) + + def _maintain_order(self, child): + if self._horizontal: + return self._maintain_order_horizontal(child) + else: + return self._maintain_order_vertical(child) + + def _maintain_order_vertical(self, child): + (left, top, right, bot) = child.bbox() + + if child is self._label: + # Check all the leaves + for subtree in self._subtrees: + (x1, y1, x2, y2) = subtree.bbox() + if bot + self._yspace > y1: + subtree.move(0, bot + self._yspace - y1) + + return self._subtrees + else: + moved = [child] + index = self._subtrees.index(child) + + # Check leaves to our right. + x = right + self._xspace + for i in range(index + 1, len(self._subtrees)): + (x1, y1, x2, y2) = self._subtrees[i].bbox() + if x > x1: + self._subtrees[i].move(x - x1, 0) + x += x2 - x1 + self._xspace + moved.append(self._subtrees[i]) + + # Check leaves to our left. + x = left - self._xspace + for i in range(index - 1, -1, -1): + (x1, y1, x2, y2) = self._subtrees[i].bbox() + if x < x2: + self._subtrees[i].move(x - x2, 0) + x -= x2 - x1 + self._xspace + moved.append(self._subtrees[i]) + + # Check the node + (x1, y1, x2, y2) = self._label.bbox() + if y2 > top - self._yspace: + self._label.move(0, top - self._yspace - y2) + moved = self._subtrees + + # Return a list of the nodes we moved + return moved + + def _maintain_order_horizontal(self, child): + (left, top, right, bot) = child.bbox() + + if child is self._label: + # Check all the leaves + for subtree in self._subtrees: + (x1, y1, x2, y2) = subtree.bbox() + if right + self._xspace > x1: + subtree.move(right + self._xspace - x1) + + return self._subtrees + else: + moved = [child] + index = self._subtrees.index(child) + + # Check leaves below us. + y = bot + self._yspace + for i in range(index + 1, len(self._subtrees)): + (x1, y1, x2, y2) = self._subtrees[i].bbox() + if y > y1: + self._subtrees[i].move(0, y - y1) + y += y2 - y1 + self._yspace + moved.append(self._subtrees[i]) + + # Check leaves above us + y = top - self._yspace + for i in range(index - 1, -1, -1): + (x1, y1, x2, y2) = self._subtrees[i].bbox() + if y < y2: + self._subtrees[i].move(0, y - y2) + y -= y2 - y1 + self._yspace + moved.append(self._subtrees[i]) + + # Check the node + (x1, y1, x2, y2) = self._label.bbox() + if x2 > left - self._xspace: + self._label.move(left - self._xspace - x2, 0) + moved = self._subtrees + + # Return a list of the nodes we moved + return moved + + def _manage_horizontal(self): + (nodex, nodey) = self._node_bottom() + + # Put the subtrees in a line. + y = 20 + for subtree in self._subtrees: + subtree_bbox = subtree.bbox() + dx = nodex - subtree_bbox[0] + self._xspace + dy = y - subtree_bbox[1] + subtree.move(dx, dy) + y += subtree_bbox[3] - subtree_bbox[1] + self._yspace + + # Find the center of their tops. + center = 0.0 + for subtree in self._subtrees: + center += self._subtree_top(subtree)[1] + center /= len(self._subtrees) + + # Center the subtrees with the node. + for subtree in self._subtrees: + subtree.move(0, nodey - center) + + def _manage_vertical(self): + (nodex, nodey) = self._node_bottom() + + # Put the subtrees in a line. + x = 0 + for subtree in self._subtrees: + subtree_bbox = subtree.bbox() + dy = nodey - subtree_bbox[1] + self._yspace + dx = x - subtree_bbox[0] + subtree.move(dx, dy) + x += subtree_bbox[2] - subtree_bbox[0] + self._xspace + + # Find the center of their tops. + center = 0.0 + for subtree in self._subtrees: + center += self._subtree_top(subtree)[0] / len(self._subtrees) + + # Center the subtrees with the node. + for subtree in self._subtrees: + subtree.move(nodex - center, 0) + + def _manage(self): + self._managing = True + (nodex, nodey) = self._node_bottom() + if len(self._subtrees) == 0: + return + + if self._horizontal: + self._manage_horizontal() + else: + self._manage_vertical() + + # Update lines to subtrees. + for subtree in self._subtrees: + self._update(subtree) + + self._managing = False + + def __repr__(self): + return '[TreeSeg %s: %s]' % (self._label, self._subtrees) + + +def _tree_to_treeseg( + canvas, + t, + make_node, + make_leaf, + tree_attribs, + node_attribs, + leaf_attribs, + loc_attribs, +): + if isinstance(t, Tree): + label = make_node(canvas, t.label(), **node_attribs) + subtrees = [ + _tree_to_treeseg( + canvas, + child, + make_node, + make_leaf, + tree_attribs, + node_attribs, + leaf_attribs, + loc_attribs, + ) + for child in t + ] + return TreeSegmentWidget(canvas, label, subtrees, **tree_attribs) + else: + return make_leaf(canvas, t, **leaf_attribs) + + +def tree_to_treesegment( + canvas, t, make_node=TextWidget, make_leaf=TextWidget, **attribs +): + """ + Convert a Tree into a ``TreeSegmentWidget``. + + :param make_node: A ``CanvasWidget`` constructor or a function that + creates ``CanvasWidgets``. ``make_node`` is used to convert + the Tree's nodes into ``CanvasWidgets``. If no constructor + is specified, then ``TextWidget`` will be used. + :param make_leaf: A ``CanvasWidget`` constructor or a function that + creates ``CanvasWidgets``. ``make_leaf`` is used to convert + the Tree's leafs into ``CanvasWidgets``. If no constructor + is specified, then ``TextWidget`` will be used. + :param attribs: Attributes for the canvas widgets that make up the + returned ``TreeSegmentWidget``. Any attribute beginning with + ``'tree_'`` will be passed to all ``TreeSegmentWidgets`` (with + the ``'tree_'`` prefix removed. Any attribute beginning with + ``'node_'`` will be passed to all nodes. Any attribute + beginning with ``'leaf_'`` will be passed to all leaves. And + any attribute beginning with ``'loc_'`` will be passed to all + text locations (for Trees). + """ + # Process attribs. + tree_attribs = {} + node_attribs = {} + leaf_attribs = {} + loc_attribs = {} + + for (key, value) in list(attribs.items()): + if key[:5] == 'tree_': + tree_attribs[key[5:]] = value + elif key[:5] == 'node_': + node_attribs[key[5:]] = value + elif key[:5] == 'leaf_': + leaf_attribs[key[5:]] = value + elif key[:4] == 'loc_': + loc_attribs[key[4:]] = value + else: + raise ValueError('Bad attribute: %s' % key) + return _tree_to_treeseg( + canvas, + t, + make_node, + make_leaf, + tree_attribs, + node_attribs, + leaf_attribs, + loc_attribs, + ) + + +##////////////////////////////////////////////////////// +## Tree Widget +##////////////////////////////////////////////////////// + + +class TreeWidget(CanvasWidget): + """ + A canvas widget that displays a single Tree. + ``TreeWidget`` manages a group of ``TreeSegmentWidgets`` that are + used to display a Tree. + + Attributes: + + - ``node_attr``: Sets the attribute ``attr`` on all of the + node widgets for this ``TreeWidget``. + - ``node_attr``: Sets the attribute ``attr`` on all of the + leaf widgets for this ``TreeWidget``. + - ``loc_attr``: Sets the attribute ``attr`` on all of the + location widgets for this ``TreeWidget`` (if it was built from + a Tree). Note that a location widget is a ``TextWidget``. + + - ``xspace``: The amount of horizontal space to leave between + subtrees when managing this widget. Default value is 10. + - ``yspace``: The amount of space to place between the node and + its children when managing this widget. Default value is 15. + + - ``line_color``: The color of the lines connecting each expanded + node to its subtrees. + - ``roof_color``: The color of the outline of the triangular roof + for collapsed trees. + - ``roof_fill``: The fill color for the triangular roof for + collapsed trees. + - ``width`` + + - ``orientation``: Determines whether the tree branches downwards + or rightwards. Possible values are ``'horizontal'`` and + ``'vertical'``. The default value is ``'vertical'`` (i.e., + branch downwards). + + - ``shapeable``: whether the subtrees can be independently + dragged by the user. THIS property simply sets the + ``DRAGGABLE`` property on all of the ``TreeWidget``'s tree + segments. + - ``draggable``: whether the widget can be dragged by the user. + """ + + def __init__( + self, canvas, t, make_node=TextWidget, make_leaf=TextWidget, **attribs + ): + # Node & leaf canvas widget constructors + self._make_node = make_node + self._make_leaf = make_leaf + self._tree = t + + # Attributes. + self._nodeattribs = {} + self._leafattribs = {} + self._locattribs = {'color': '#008000'} + self._line_color = '#008080' + self._line_width = 1 + self._roof_color = '#008080' + self._roof_fill = '#c0c0c0' + self._shapeable = False + self._xspace = 10 + self._yspace = 10 + self._orientation = 'vertical' + self._ordered = False + + # Build trees. + self._keys = {} # treeseg -> key + self._expanded_trees = {} + self._collapsed_trees = {} + self._nodes = [] + self._leaves = [] + # self._locs = [] + self._make_collapsed_trees(canvas, t, ()) + self._treeseg = self._make_expanded_tree(canvas, t, ()) + self._add_child_widget(self._treeseg) + + CanvasWidget.__init__(self, canvas, **attribs) + + def expanded_tree(self, *path_to_tree): + """ + Return the ``TreeSegmentWidget`` for the specified subtree. + + :param path_to_tree: A list of indices i1, i2, ..., in, where + the desired widget is the widget corresponding to + ``tree.children()[i1].children()[i2]....children()[in]``. + For the root, the path is ``()``. + """ + return self._expanded_trees[path_to_tree] + + def collapsed_tree(self, *path_to_tree): + """ + Return the ``TreeSegmentWidget`` for the specified subtree. + + :param path_to_tree: A list of indices i1, i2, ..., in, where + the desired widget is the widget corresponding to + ``tree.children()[i1].children()[i2]....children()[in]``. + For the root, the path is ``()``. + """ + return self._collapsed_trees[path_to_tree] + + def bind_click_trees(self, callback, button=1): + """ + Add a binding to all tree segments. + """ + for tseg in list(self._expanded_trees.values()): + tseg.bind_click(callback, button) + for tseg in list(self._collapsed_trees.values()): + tseg.bind_click(callback, button) + + def bind_drag_trees(self, callback, button=1): + """ + Add a binding to all tree segments. + """ + for tseg in list(self._expanded_trees.values()): + tseg.bind_drag(callback, button) + for tseg in list(self._collapsed_trees.values()): + tseg.bind_drag(callback, button) + + def bind_click_leaves(self, callback, button=1): + """ + Add a binding to all leaves. + """ + for leaf in self._leaves: + leaf.bind_click(callback, button) + for leaf in self._leaves: + leaf.bind_click(callback, button) + + def bind_drag_leaves(self, callback, button=1): + """ + Add a binding to all leaves. + """ + for leaf in self._leaves: + leaf.bind_drag(callback, button) + for leaf in self._leaves: + leaf.bind_drag(callback, button) + + def bind_click_nodes(self, callback, button=1): + """ + Add a binding to all nodes. + """ + for node in self._nodes: + node.bind_click(callback, button) + for node in self._nodes: + node.bind_click(callback, button) + + def bind_drag_nodes(self, callback, button=1): + """ + Add a binding to all nodes. + """ + for node in self._nodes: + node.bind_drag(callback, button) + for node in self._nodes: + node.bind_drag(callback, button) + + def _make_collapsed_trees(self, canvas, t, key): + if not isinstance(t, Tree): + return + make_node = self._make_node + make_leaf = self._make_leaf + + node = make_node(canvas, t.label(), **self._nodeattribs) + self._nodes.append(node) + leaves = [make_leaf(canvas, l, **self._leafattribs) for l in t.leaves()] + self._leaves += leaves + treeseg = TreeSegmentWidget( + canvas, + node, + leaves, + roof=1, + color=self._roof_color, + fill=self._roof_fill, + width=self._line_width, + ) + + self._collapsed_trees[key] = treeseg + self._keys[treeseg] = key + # self._add_child_widget(treeseg) + treeseg.hide() + + # Build trees for children. + for i in range(len(t)): + child = t[i] + self._make_collapsed_trees(canvas, child, key + (i,)) + + def _make_expanded_tree(self, canvas, t, key): + make_node = self._make_node + make_leaf = self._make_leaf + + if isinstance(t, Tree): + node = make_node(canvas, t.label(), **self._nodeattribs) + self._nodes.append(node) + children = t + subtrees = [ + self._make_expanded_tree(canvas, children[i], key + (i,)) + for i in range(len(children)) + ] + treeseg = TreeSegmentWidget( + canvas, node, subtrees, color=self._line_color, width=self._line_width + ) + self._expanded_trees[key] = treeseg + self._keys[treeseg] = key + return treeseg + else: + leaf = make_leaf(canvas, t, **self._leafattribs) + self._leaves.append(leaf) + return leaf + + def __setitem__(self, attr, value): + if attr[:5] == 'node_': + for node in self._nodes: + node[attr[5:]] = value + elif attr[:5] == 'leaf_': + for leaf in self._leaves: + leaf[attr[5:]] = value + elif attr == 'line_color': + self._line_color = value + for tseg in list(self._expanded_trees.values()): + tseg['color'] = value + elif attr == 'line_width': + self._line_width = value + for tseg in list(self._expanded_trees.values()): + tseg['width'] = value + for tseg in list(self._collapsed_trees.values()): + tseg['width'] = value + elif attr == 'roof_color': + self._roof_color = value + for tseg in list(self._collapsed_trees.values()): + tseg['color'] = value + elif attr == 'roof_fill': + self._roof_fill = value + for tseg in list(self._collapsed_trees.values()): + tseg['fill'] = value + elif attr == 'shapeable': + self._shapeable = value + for tseg in list(self._expanded_trees.values()): + tseg['draggable'] = value + for tseg in list(self._collapsed_trees.values()): + tseg['draggable'] = value + for leaf in self._leaves: + leaf['draggable'] = value + elif attr == 'xspace': + self._xspace = value + for tseg in list(self._expanded_trees.values()): + tseg['xspace'] = value + for tseg in list(self._collapsed_trees.values()): + tseg['xspace'] = value + self.manage() + elif attr == 'yspace': + self._yspace = value + for tseg in list(self._expanded_trees.values()): + tseg['yspace'] = value + for tseg in list(self._collapsed_trees.values()): + tseg['yspace'] = value + self.manage() + elif attr == 'orientation': + self._orientation = value + for tseg in list(self._expanded_trees.values()): + tseg['orientation'] = value + for tseg in list(self._collapsed_trees.values()): + tseg['orientation'] = value + self.manage() + elif attr == 'ordered': + self._ordered = value + for tseg in list(self._expanded_trees.values()): + tseg['ordered'] = value + for tseg in list(self._collapsed_trees.values()): + tseg['ordered'] = value + else: + CanvasWidget.__setitem__(self, attr, value) + + def __getitem__(self, attr): + if attr[:5] == 'node_': + return self._nodeattribs.get(attr[5:], None) + elif attr[:5] == 'leaf_': + return self._leafattribs.get(attr[5:], None) + elif attr[:4] == 'loc_': + return self._locattribs.get(attr[4:], None) + elif attr == 'line_color': + return self._line_color + elif attr == 'line_width': + return self._line_width + elif attr == 'roof_color': + return self._roof_color + elif attr == 'roof_fill': + return self._roof_fill + elif attr == 'shapeable': + return self._shapeable + elif attr == 'xspace': + return self._xspace + elif attr == 'yspace': + return self._yspace + elif attr == 'orientation': + return self._orientation + else: + return CanvasWidget.__getitem__(self, attr) + + def _tags(self): + return [] + + def _manage(self): + segs = list(self._expanded_trees.values()) + list( + self._collapsed_trees.values() + ) + for tseg in segs: + if tseg.hidden(): + tseg.show() + tseg.manage() + tseg.hide() + + def toggle_collapsed(self, treeseg): + """ + Collapse/expand a tree. + """ + old_treeseg = treeseg + if old_treeseg['roof']: + new_treeseg = self._expanded_trees[self._keys[old_treeseg]] + else: + new_treeseg = self._collapsed_trees[self._keys[old_treeseg]] + + # Replace the old tree with the new tree. + if old_treeseg.parent() is self: + self._remove_child_widget(old_treeseg) + self._add_child_widget(new_treeseg) + self._treeseg = new_treeseg + else: + old_treeseg.parent().replace_child(old_treeseg, new_treeseg) + + # Move the new tree to where the old tree was. Show it first, + # so we can find its bounding box. + new_treeseg.show() + (newx, newy) = new_treeseg.label().bbox()[:2] + (oldx, oldy) = old_treeseg.label().bbox()[:2] + new_treeseg.move(oldx - newx, oldy - newy) + + # Hide the old tree + old_treeseg.hide() + + # We could do parent.manage() here instead, if we wanted. + new_treeseg.parent().update(new_treeseg) + + +##////////////////////////////////////////////////////// +## draw_trees +##////////////////////////////////////////////////////// + + +class TreeView(object): + def __init__(self, *trees): + from math import sqrt, ceil + + self._trees = trees + + self._top = Tk() + self._top.title('NLTK') + self._top.bind('', self.destroy) + self._top.bind('', self.destroy) + + cf = self._cframe = CanvasFrame(self._top) + self._top.bind('', self._cframe.print_to_file) + + # Size is variable. + self._size = IntVar(self._top) + self._size.set(12) + bold = ('helvetica', -self._size.get(), 'bold') + helv = ('helvetica', -self._size.get()) + + # Lay the trees out in a square. + self._width = int(ceil(sqrt(len(trees)))) + self._widgets = [] + for i in range(len(trees)): + widget = TreeWidget( + cf.canvas(), + trees[i], + node_font=bold, + leaf_color='#008040', + node_color='#004080', + roof_color='#004040', + roof_fill='white', + line_color='#004040', + draggable=1, + leaf_font=helv, + ) + widget.bind_click_trees(widget.toggle_collapsed) + self._widgets.append(widget) + cf.add_widget(widget, 0, 0) + + self._layout() + self._cframe.pack(expand=1, fill='both') + self._init_menubar() + + def _layout(self): + i = x = y = ymax = 0 + width = self._width + for i in range(len(self._widgets)): + widget = self._widgets[i] + (oldx, oldy) = widget.bbox()[:2] + if i % width == 0: + y = ymax + x = 0 + widget.move(x - oldx, y - oldy) + x = widget.bbox()[2] + 10 + ymax = max(ymax, widget.bbox()[3] + 10) + + def _init_menubar(self): + menubar = Menu(self._top) + + filemenu = Menu(menubar, tearoff=0) + filemenu.add_command( + label='Print to Postscript', + underline=0, + command=self._cframe.print_to_file, + accelerator='Ctrl-p', + ) + filemenu.add_command( + label='Exit', underline=1, command=self.destroy, accelerator='Ctrl-x' + ) + menubar.add_cascade(label='File', underline=0, menu=filemenu) + + zoommenu = Menu(menubar, tearoff=0) + zoommenu.add_radiobutton( + label='Tiny', + variable=self._size, + underline=0, + value=10, + command=self.resize, + ) + zoommenu.add_radiobutton( + label='Small', + variable=self._size, + underline=0, + value=12, + command=self.resize, + ) + zoommenu.add_radiobutton( + label='Medium', + variable=self._size, + underline=0, + value=14, + command=self.resize, + ) + zoommenu.add_radiobutton( + label='Large', + variable=self._size, + underline=0, + value=28, + command=self.resize, + ) + zoommenu.add_radiobutton( + label='Huge', + variable=self._size, + underline=0, + value=50, + command=self.resize, + ) + menubar.add_cascade(label='Zoom', underline=0, menu=zoommenu) + + self._top.config(menu=menubar) + + def resize(self, *e): + bold = ('helvetica', -self._size.get(), 'bold') + helv = ('helvetica', -self._size.get()) + xspace = self._size.get() + yspace = self._size.get() + for widget in self._widgets: + widget['node_font'] = bold + widget['leaf_font'] = helv + widget['xspace'] = xspace + widget['yspace'] = yspace + if self._size.get() < 20: + widget['line_width'] = 1 + elif self._size.get() < 30: + widget['line_width'] = 2 + else: + widget['line_width'] = 3 + self._layout() + + def destroy(self, *e): + if self._top is None: + return + self._top.destroy() + self._top = None + + def mainloop(self, *args, **kwargs): + """ + Enter the Tkinter mainloop. This function must be called if + this demo is created from a non-interactive program (e.g. + from a secript); otherwise, the demo will close as soon as + the script completes. + """ + if in_idle(): + return + self._top.mainloop(*args, **kwargs) + + +def draw_trees(*trees): + """ + Open a new window containing a graphical diagram of the given + trees. + + :rtype: None + """ + TreeView(*trees).mainloop() + return + + +##////////////////////////////////////////////////////// +## Demo Code +##////////////////////////////////////////////////////// + + +def demo(): + import random + + def fill(cw): + cw['fill'] = '#%06d' % random.randint(0, 999999) + + cf = CanvasFrame(width=550, height=450, closeenough=2) + + t = Tree.fromstring( + ''' + (S (NP the very big cat) + (VP (Adv sorta) (V saw) (NP (Det the) (N dog))))''' + ) + + tc = TreeWidget( + cf.canvas(), + t, + draggable=1, + node_font=('helvetica', -14, 'bold'), + leaf_font=('helvetica', -12, 'italic'), + roof_fill='white', + roof_color='black', + leaf_color='green4', + node_color='blue2', + ) + cf.add_widget(tc, 10, 10) + + def boxit(canvas, text): + big = ('helvetica', -16, 'bold') + return BoxWidget(canvas, TextWidget(canvas, text, font=big), fill='green') + + def ovalit(canvas, text): + return OvalWidget(canvas, TextWidget(canvas, text), fill='cyan') + + treetok = Tree.fromstring('(S (NP this tree) (VP (V is) (AdjP shapeable)))') + tc2 = TreeWidget(cf.canvas(), treetok, boxit, ovalit, shapeable=1) + + def color(node): + node['color'] = '#%04d00' % random.randint(0, 9999) + + def color2(treeseg): + treeseg.label()['fill'] = '#%06d' % random.randint(0, 9999) + treeseg.label().child()['color'] = 'white' + + tc.bind_click_trees(tc.toggle_collapsed) + tc2.bind_click_trees(tc2.toggle_collapsed) + tc.bind_click_nodes(color, 3) + tc2.expanded_tree(1).bind_click(color2, 3) + tc2.expanded_tree().bind_click(color2, 3) + + paren = ParenWidget(cf.canvas(), tc2) + cf.add_widget(paren, tc.bbox()[2] + 10, 10) + + tree3 = Tree.fromstring( + ''' + (S (NP this tree) (AUX was) + (VP (V built) (PP (P with) (NP (N tree_to_treesegment)))))''' + ) + tc3 = tree_to_treesegment( + cf.canvas(), tree3, tree_color='green4', tree_xspace=2, tree_width=2 + ) + tc3['draggable'] = 1 + cf.add_widget(tc3, 10, tc.bbox()[3] + 10) + + def orientswitch(treewidget): + if treewidget['orientation'] == 'horizontal': + treewidget.expanded_tree(1, 1).subtrees()[0].set_text('vertical') + treewidget.collapsed_tree(1, 1).subtrees()[0].set_text('vertical') + treewidget.collapsed_tree(1).subtrees()[1].set_text('vertical') + treewidget.collapsed_tree().subtrees()[3].set_text('vertical') + treewidget['orientation'] = 'vertical' + else: + treewidget.expanded_tree(1, 1).subtrees()[0].set_text('horizontal') + treewidget.collapsed_tree(1, 1).subtrees()[0].set_text('horizontal') + treewidget.collapsed_tree(1).subtrees()[1].set_text('horizontal') + treewidget.collapsed_tree().subtrees()[3].set_text('horizontal') + treewidget['orientation'] = 'horizontal' + + text = """ +Try clicking, right clicking, and dragging +different elements of each of the trees. +The top-left tree is a TreeWidget built from +a Tree. The top-right is a TreeWidget built +from a Tree, using non-default widget +constructors for the nodes & leaves (BoxWidget +and OvalWidget). The bottom-left tree is +built from tree_to_treesegment.""" + twidget = TextWidget(cf.canvas(), text.strip()) + textbox = BoxWidget(cf.canvas(), twidget, fill='white', draggable=1) + cf.add_widget(textbox, tc3.bbox()[2] + 10, tc2.bbox()[3] + 10) + + tree4 = Tree.fromstring('(S (NP this tree) (VP (V is) (Adj horizontal)))') + tc4 = TreeWidget( + cf.canvas(), + tree4, + draggable=1, + line_color='brown2', + roof_color='brown2', + node_font=('helvetica', -12, 'bold'), + node_color='brown4', + orientation='horizontal', + ) + tc4.manage() + cf.add_widget(tc4, tc3.bbox()[2] + 10, textbox.bbox()[3] + 10) + tc4.bind_click(orientswitch) + tc4.bind_click_trees(tc4.toggle_collapsed, 3) + + # Run mainloop + cf.mainloop() + + +if __name__ == '__main__': + demo() diff --git a/venv.bak/lib/python3.7/site-packages/nltk/draw/util.py b/venv.bak/lib/python3.7/site-packages/nltk/draw/util.py new file mode 100644 index 0000000..9daebbb --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/draw/util.py @@ -0,0 +1,2574 @@ +# Natural Language Toolkit: Drawing utilities +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Edward Loper +# URL: +# For license information, see LICENSE.TXT + +""" +Tools for graphically displaying and interacting with the objects and +processing classes defined by the Toolkit. These tools are primarily +intended to help students visualize the objects that they create. + +The graphical tools are typically built using "canvas widgets", each +of which encapsulates the graphical elements and bindings used to +display a complex object on a Tkinter ``Canvas``. For example, NLTK +defines canvas widgets for displaying trees and directed graphs, as +well as a number of simpler widgets. These canvas widgets make it +easier to build new graphical tools and demos. See the class +documentation for ``CanvasWidget`` for more information. + +The ``nltk.draw`` module defines the abstract ``CanvasWidget`` base +class, and a number of simple canvas widgets. The remaining canvas +widgets are defined by submodules, such as ``nltk.draw.tree``. + +The ``nltk.draw`` module also defines ``CanvasFrame``, which +encapsulates a ``Canvas`` and its scrollbars. It uses a +``ScrollWatcherWidget`` to ensure that all canvas widgets contained on +its canvas are within the scroll region. + +Acknowledgements: Many of the ideas behind the canvas widget system +are derived from ``CLIG``, a Tk-based grapher for linguistic data +structures. For more information, see the CLIG +homepage (http://www.ags.uni-sb.de/~konrad/clig.html). + +""" +from abc import ABCMeta, abstractmethod +from six import add_metaclass +from six.moves.tkinter import ( + Button, + Canvas, + Entry, + Frame, + Label, + Menu, + Menubutton, + Scrollbar, + StringVar, + Text, + Tk, + Toplevel, + Widget, + RAISED, +) +from six.moves.tkinter_tkfiledialog import asksaveasfilename + +from nltk.util import in_idle + +##////////////////////////////////////////////////////// +## CanvasWidget +##////////////////////////////////////////////////////// + + +@add_metaclass(ABCMeta) +class CanvasWidget(object): + """ + A collection of graphical elements and bindings used to display a + complex object on a Tkinter ``Canvas``. A canvas widget is + responsible for managing the ``Canvas`` tags and callback bindings + necessary to display and interact with the object. Canvas widgets + are often organized into hierarchies, where parent canvas widgets + control aspects of their child widgets. + + Each canvas widget is bound to a single ``Canvas``. This ``Canvas`` + is specified as the first argument to the ``CanvasWidget``'s + constructor. + + Attributes. Each canvas widget can support a variety of + "attributes", which control how the canvas widget is displayed. + Some typical examples attributes are ``color``, ``font``, and + ``radius``. Each attribute has a default value. This default + value can be overridden in the constructor, using keyword + arguments of the form ``attribute=value``: + + >>> from nltk.draw.util import TextWidget + >>> cn = TextWidget(c, 'test', color='red') + + Attribute values can also be changed after a canvas widget has + been constructed, using the ``__setitem__`` operator: + + >>> cn['font'] = 'times' + + The current value of an attribute value can be queried using the + ``__getitem__`` operator: + + >>> cn['color'] + red + + For a list of the attributes supported by a type of canvas widget, + see its class documentation. + + Interaction. The attribute ``'draggable'`` controls whether the + user can drag a canvas widget around the canvas. By default, + canvas widgets are not draggable. + + ``CanvasWidget`` provides callback support for two types of user + interaction: clicking and dragging. The method ``bind_click`` + registers a callback function that is called whenever the canvas + widget is clicked. The method ``bind_drag`` registers a callback + function that is called after the canvas widget is dragged. If + the user clicks or drags a canvas widget with no registered + callback function, then the interaction event will propagate to + its parent. For each canvas widget, only one callback function + may be registered for an interaction event. Callback functions + can be deregistered with the ``unbind_click`` and ``unbind_drag`` + methods. + + Subclassing. ``CanvasWidget`` is an abstract class. Subclasses + are required to implement the following methods: + + - ``__init__``: Builds a new canvas widget. It must perform the + following three tasks (in order): + - Create any new graphical elements. + - Call ``_add_child_widget`` on each child widget. + - Call the ``CanvasWidget`` constructor. + - ``_tags``: Returns a list of the canvas tags for all graphical + elements managed by this canvas widget, not including + graphical elements managed by its child widgets. + - ``_manage``: Arranges the child widgets of this canvas widget. + This is typically only called when the canvas widget is + created. + - ``_update``: Update this canvas widget in response to a + change in a single child. + + For a ``CanvasWidget`` with no child widgets, the default + definitions for ``_manage`` and ``_update`` may be used. + + If a subclass defines any attributes, then it should implement + ``__getitem__`` and ``__setitem__``. If either of these methods is + called with an unknown attribute, then they should propagate the + request to ``CanvasWidget``. + + Most subclasses implement a number of additional methods that + modify the ``CanvasWidget`` in some way. These methods must call + ``parent.update(self)`` after making any changes to the canvas + widget's graphical elements. The canvas widget must also call + ``parent.update(self)`` after changing any attribute value that + affects the shape or position of the canvas widget's graphical + elements. + + :type __canvas: Tkinter.Canvas + :ivar __canvas: This ``CanvasWidget``'s canvas. + + :type __parent: CanvasWidget or None + :ivar __parent: This ``CanvasWidget``'s hierarchical parent widget. + :type __children: list(CanvasWidget) + :ivar __children: This ``CanvasWidget``'s hierarchical child widgets. + + :type __updating: bool + :ivar __updating: Is this canvas widget currently performing an + update? If it is, then it will ignore any new update requests + from child widgets. + + :type __draggable: bool + :ivar __draggable: Is this canvas widget draggable? + :type __press: event + :ivar __press: The ButtonPress event that we're currently handling. + :type __drag_x: int + :ivar __drag_x: Where it's been moved to (to find dx) + :type __drag_y: int + :ivar __drag_y: Where it's been moved to (to find dy) + :type __callbacks: dictionary + :ivar __callbacks: Registered callbacks. Currently, four keys are + used: ``1``, ``2``, ``3``, and ``'drag'``. The values are + callback functions. Each callback function takes a single + argument, which is the ``CanvasWidget`` that triggered the + callback. + """ + + def __init__(self, canvas, parent=None, **attribs): + """ + Create a new canvas widget. This constructor should only be + called by subclass constructors; and it should be called only + "after" the subclass has constructed all graphical canvas + objects and registered all child widgets. + + :param canvas: This canvas widget's canvas. + :type canvas: Tkinter.Canvas + :param parent: This canvas widget's hierarchical parent. + :type parent: CanvasWidget + :param attribs: The new canvas widget's attributes. + """ + if self.__class__ == CanvasWidget: + raise TypeError('CanvasWidget is an abstract base class') + + if not isinstance(canvas, Canvas): + raise TypeError('Expected a canvas!') + + self.__canvas = canvas + self.__parent = parent + + # If the subclass constructor called _add_child_widget, then + # self.__children will already exist. + if not hasattr(self, '_CanvasWidget__children'): + self.__children = [] + + # Is this widget hidden? + self.__hidden = 0 + + # Update control (prevents infinite loops) + self.__updating = 0 + + # Button-press and drag callback handling. + self.__press = None + self.__drag_x = self.__drag_y = 0 + self.__callbacks = {} + self.__draggable = 0 + + # Set up attributes. + for (attr, value) in list(attribs.items()): + self[attr] = value + + # Manage this canvas widget + self._manage() + + # Register any new bindings + for tag in self._tags(): + self.__canvas.tag_bind(tag, '', self.__press_cb) + self.__canvas.tag_bind(tag, '', self.__press_cb) + self.__canvas.tag_bind(tag, '', self.__press_cb) + + ##////////////////////////////////////////////////////// + ## Inherited methods. + ##////////////////////////////////////////////////////// + + def bbox(self): + """ + :return: A bounding box for this ``CanvasWidget``. The bounding + box is a tuple of four coordinates, *(xmin, ymin, xmax, ymax)*, + for a rectangle which encloses all of the canvas + widget's graphical elements. Bounding box coordinates are + specified with respect to the coordinate space of the ``Canvas``. + :rtype: tuple(int, int, int, int) + """ + if self.__hidden: + return (0, 0, 0, 0) + if len(self.tags()) == 0: + raise ValueError('No tags') + return self.__canvas.bbox(*self.tags()) + + def width(self): + """ + :return: The width of this canvas widget's bounding box, in + its ``Canvas``'s coordinate space. + :rtype: int + """ + if len(self.tags()) == 0: + raise ValueError('No tags') + bbox = self.__canvas.bbox(*self.tags()) + return bbox[2] - bbox[0] + + def height(self): + """ + :return: The height of this canvas widget's bounding box, in + its ``Canvas``'s coordinate space. + :rtype: int + """ + if len(self.tags()) == 0: + raise ValueError('No tags') + bbox = self.__canvas.bbox(*self.tags()) + return bbox[3] - bbox[1] + + def parent(self): + """ + :return: The hierarchical parent of this canvas widget. + ``self`` is considered a subpart of its parent for + purposes of user interaction. + :rtype: CanvasWidget or None + """ + return self.__parent + + def child_widgets(self): + """ + :return: A list of the hierarchical children of this canvas + widget. These children are considered part of ``self`` + for purposes of user interaction. + :rtype: list of CanvasWidget + """ + return self.__children + + def canvas(self): + """ + :return: The canvas that this canvas widget is bound to. + :rtype: Tkinter.Canvas + """ + return self.__canvas + + def move(self, dx, dy): + """ + Move this canvas widget by a given distance. In particular, + shift the canvas widget right by ``dx`` pixels, and down by + ``dy`` pixels. Both ``dx`` and ``dy`` may be negative, resulting + in leftward or upward movement. + + :type dx: int + :param dx: The number of pixels to move this canvas widget + rightwards. + :type dy: int + :param dy: The number of pixels to move this canvas widget + downwards. + :rtype: None + """ + if dx == dy == 0: + return + for tag in self.tags(): + self.__canvas.move(tag, dx, dy) + if self.__parent: + self.__parent.update(self) + + def moveto(self, x, y, anchor='NW'): + """ + Move this canvas widget to the given location. In particular, + shift the canvas widget such that the corner or side of the + bounding box specified by ``anchor`` is at location (``x``, + ``y``). + + :param x,y: The location that the canvas widget should be moved + to. + :param anchor: The corner or side of the canvas widget that + should be moved to the specified location. ``'N'`` + specifies the top center; ``'NE'`` specifies the top right + corner; etc. + """ + x1, y1, x2, y2 = self.bbox() + if anchor == 'NW': + self.move(x - x1, y - y1) + if anchor == 'N': + self.move(x - x1 / 2 - x2 / 2, y - y1) + if anchor == 'NE': + self.move(x - x2, y - y1) + if anchor == 'E': + self.move(x - x2, y - y1 / 2 - y2 / 2) + if anchor == 'SE': + self.move(x - x2, y - y2) + if anchor == 'S': + self.move(x - x1 / 2 - x2 / 2, y - y2) + if anchor == 'SW': + self.move(x - x1, y - y2) + if anchor == 'W': + self.move(x - x1, y - y1 / 2 - y2 / 2) + + def destroy(self): + """ + Remove this ``CanvasWidget`` from its ``Canvas``. After a + ``CanvasWidget`` has been destroyed, it should not be accessed. + + Note that you only need to destroy a top-level + ``CanvasWidget``; its child widgets will be destroyed + automatically. If you destroy a non-top-level + ``CanvasWidget``, then the entire top-level widget will be + destroyed. + + :raise ValueError: if this ``CanvasWidget`` has a parent. + :rtype: None + """ + if self.__parent is not None: + self.__parent.destroy() + return + + for tag in self.tags(): + self.__canvas.tag_unbind(tag, '') + self.__canvas.tag_unbind(tag, '') + self.__canvas.tag_unbind(tag, '') + self.__canvas.delete(*self.tags()) + self.__canvas = None + + def update(self, child): + """ + Update the graphical display of this canvas widget, and all of + its ancestors, in response to a change in one of this canvas + widget's children. + + :param child: The child widget that changed. + :type child: CanvasWidget + """ + if self.__hidden or child.__hidden: + return + # If we're already updating, then do nothing. This prevents + # infinite loops when _update modifies its children. + if self.__updating: + return + self.__updating = 1 + + # Update this CanvasWidget. + self._update(child) + + # Propagate update request to the parent. + if self.__parent: + self.__parent.update(self) + + # We're done updating. + self.__updating = 0 + + def manage(self): + """ + Arrange this canvas widget and all of its descendants. + + :rtype: None + """ + if self.__hidden: + return + for child in self.__children: + child.manage() + self._manage() + + def tags(self): + """ + :return: a list of the canvas tags for all graphical + elements managed by this canvas widget, including + graphical elements managed by its child widgets. + :rtype: list of int + """ + if self.__canvas is None: + raise ValueError('Attempt to access a destroyed canvas widget') + tags = [] + tags += self._tags() + for child in self.__children: + tags += child.tags() + return tags + + def __setitem__(self, attr, value): + """ + Set the value of the attribute ``attr`` to ``value``. See the + class documentation for a list of attributes supported by this + canvas widget. + + :rtype: None + """ + if attr == 'draggable': + self.__draggable = value + else: + raise ValueError('Unknown attribute %r' % attr) + + def __getitem__(self, attr): + """ + :return: the value of the attribute ``attr``. See the class + documentation for a list of attributes supported by this + canvas widget. + :rtype: (any) + """ + if attr == 'draggable': + return self.__draggable + else: + raise ValueError('Unknown attribute %r' % attr) + + def __repr__(self): + """ + :return: a string representation of this canvas widget. + :rtype: str + """ + return '<%s>' % self.__class__.__name__ + + def hide(self): + """ + Temporarily hide this canvas widget. + + :rtype: None + """ + self.__hidden = 1 + for tag in self.tags(): + self.__canvas.itemconfig(tag, state='hidden') + + def show(self): + """ + Show a hidden canvas widget. + + :rtype: None + """ + self.__hidden = 0 + for tag in self.tags(): + self.__canvas.itemconfig(tag, state='normal') + + def hidden(self): + """ + :return: True if this canvas widget is hidden. + :rtype: bool + """ + return self.__hidden + + ##////////////////////////////////////////////////////// + ## Callback interface + ##////////////////////////////////////////////////////// + + def bind_click(self, callback, button=1): + """ + Register a new callback that will be called whenever this + ``CanvasWidget`` is clicked on. + + :type callback: function + :param callback: The callback function that will be called + whenever this ``CanvasWidget`` is clicked. This function + will be called with this ``CanvasWidget`` as its argument. + :type button: int + :param button: Which button the user should use to click on + this ``CanvasWidget``. Typically, this should be 1 (left + button), 3 (right button), or 2 (middle button). + """ + self.__callbacks[button] = callback + + def bind_drag(self, callback): + """ + Register a new callback that will be called after this + ``CanvasWidget`` is dragged. This implicitly makes this + ``CanvasWidget`` draggable. + + :type callback: function + :param callback: The callback function that will be called + whenever this ``CanvasWidget`` is clicked. This function + will be called with this ``CanvasWidget`` as its argument. + """ + self.__draggable = 1 + self.__callbacks['drag'] = callback + + def unbind_click(self, button=1): + """ + Remove a callback that was registered with ``bind_click``. + + :type button: int + :param button: Which button the user should use to click on + this ``CanvasWidget``. Typically, this should be 1 (left + button), 3 (right button), or 2 (middle button). + """ + try: + del self.__callbacks[button] + except: + pass + + def unbind_drag(self): + """ + Remove a callback that was registered with ``bind_drag``. + """ + try: + del self.__callbacks['drag'] + except: + pass + + ##////////////////////////////////////////////////////// + ## Callback internals + ##////////////////////////////////////////////////////// + + def __press_cb(self, event): + """ + Handle a button-press event: + - record the button press event in ``self.__press`` + - register a button-release callback. + - if this CanvasWidget or any of its ancestors are + draggable, then register the appropriate motion callback. + """ + # If we're already waiting for a button release, then ignore + # this new button press. + if ( + self.__canvas.bind('') + or self.__canvas.bind('') + or self.__canvas.bind('') + ): + return + + # Unbind motion (just in case; this shouldn't be necessary) + self.__canvas.unbind('') + + # Record the button press event. + self.__press = event + + # If any ancestor is draggable, set up a motion callback. + # (Only if they pressed button number 1) + if event.num == 1: + widget = self + while widget is not None: + if widget['draggable']: + widget.__start_drag(event) + break + widget = widget.parent() + + # Set up the button release callback. + self.__canvas.bind('' % event.num, self.__release_cb) + + def __start_drag(self, event): + """ + Begin dragging this object: + - register a motion callback + - record the drag coordinates + """ + self.__canvas.bind('', self.__motion_cb) + self.__drag_x = event.x + self.__drag_y = event.y + + def __motion_cb(self, event): + """ + Handle a motion event: + - move this object to the new location + - record the new drag coordinates + """ + self.move(event.x - self.__drag_x, event.y - self.__drag_y) + self.__drag_x = event.x + self.__drag_y = event.y + + def __release_cb(self, event): + """ + Handle a release callback: + - unregister motion & button release callbacks. + - decide whether they clicked, dragged, or cancelled + - call the appropriate handler. + """ + # Unbind the button release & motion callbacks. + self.__canvas.unbind('' % event.num) + self.__canvas.unbind('') + + # Is it a click or a drag? + if ( + event.time - self.__press.time < 100 + and abs(event.x - self.__press.x) + abs(event.y - self.__press.y) < 5 + ): + # Move it back, if we were dragging. + if self.__draggable and event.num == 1: + self.move( + self.__press.x - self.__drag_x, self.__press.y - self.__drag_y + ) + self.__click(event.num) + elif event.num == 1: + self.__drag() + + self.__press = None + + def __drag(self): + """ + If this ``CanvasWidget`` has a drag callback, then call it; + otherwise, find the closest ancestor with a drag callback, and + call it. If no ancestors have a drag callback, do nothing. + """ + if self.__draggable: + if 'drag' in self.__callbacks: + cb = self.__callbacks['drag'] + try: + cb(self) + except: + print('Error in drag callback for %r' % self) + elif self.__parent is not None: + self.__parent.__drag() + + def __click(self, button): + """ + If this ``CanvasWidget`` has a drag callback, then call it; + otherwise, find the closest ancestor with a click callback, and + call it. If no ancestors have a click callback, do nothing. + """ + if button in self.__callbacks: + cb = self.__callbacks[button] + # try: + cb(self) + # except: + # print 'Error in click callback for %r' % self + # raise + elif self.__parent is not None: + self.__parent.__click(button) + + ##////////////////////////////////////////////////////// + ## Child/parent Handling + ##////////////////////////////////////////////////////// + + def _add_child_widget(self, child): + """ + Register a hierarchical child widget. The child will be + considered part of this canvas widget for purposes of user + interaction. ``_add_child_widget`` has two direct effects: + - It sets ``child``'s parent to this canvas widget. + - It adds ``child`` to the list of canvas widgets returned by + the ``child_widgets`` member function. + + :param child: The new child widget. ``child`` must not already + have a parent. + :type child: CanvasWidget + """ + if not hasattr(self, '_CanvasWidget__children'): + self.__children = [] + if child.__parent is not None: + raise ValueError('{} already has a parent'.format(child)) + child.__parent = self + self.__children.append(child) + + def _remove_child_widget(self, child): + """ + Remove a hierarchical child widget. This child will no longer + be considered part of this canvas widget for purposes of user + interaction. ``_add_child_widget`` has two direct effects: + - It sets ``child``'s parent to None. + - It removes ``child`` from the list of canvas widgets + returned by the ``child_widgets`` member function. + + :param child: The child widget to remove. ``child`` must be a + child of this canvas widget. + :type child: CanvasWidget + """ + self.__children.remove(child) + child.__parent = None + + ##////////////////////////////////////////////////////// + ## Defined by subclass + ##////////////////////////////////////////////////////// + + @abstractmethod + def _tags(self): + """ + :return: a list of canvas tags for all graphical elements + managed by this canvas widget, not including graphical + elements managed by its child widgets. + :rtype: list of int + """ + + def _manage(self): + """ + Arrange the child widgets of this canvas widget. This method + is called when the canvas widget is initially created. It is + also called if the user calls the ``manage`` method on this + canvas widget or any of its ancestors. + + :rtype: None + """ + + def _update(self, child): + """ + Update this canvas widget in response to a change in one of + its children. + + :param child: The child that changed. + :type child: CanvasWidget + :rtype: None + """ + + +##////////////////////////////////////////////////////// +## Basic widgets. +##////////////////////////////////////////////////////// + + +class TextWidget(CanvasWidget): + """ + A canvas widget that displays a single string of text. + + Attributes: + - ``color``: the color of the text. + - ``font``: the font used to display the text. + - ``justify``: justification for multi-line texts. Valid values + are ``left``, ``center``, and ``right``. + - ``width``: the width of the text. If the text is wider than + this width, it will be line-wrapped at whitespace. + - ``draggable``: whether the text can be dragged by the user. + """ + + def __init__(self, canvas, text, **attribs): + """ + Create a new text widget. + + :type canvas: Tkinter.Canvas + :param canvas: This canvas widget's canvas. + :type text: str + :param text: The string of text to display. + :param attribs: The new canvas widget's attributes. + """ + self._text = text + self._tag = canvas.create_text(1, 1, text=text) + CanvasWidget.__init__(self, canvas, **attribs) + + def __setitem__(self, attr, value): + if attr in ('color', 'font', 'justify', 'width'): + if attr == 'color': + attr = 'fill' + self.canvas().itemconfig(self._tag, {attr: value}) + else: + CanvasWidget.__setitem__(self, attr, value) + + def __getitem__(self, attr): + if attr == 'width': + return int(self.canvas().itemcget(self._tag, attr)) + elif attr in ('color', 'font', 'justify'): + if attr == 'color': + attr = 'fill' + return self.canvas().itemcget(self._tag, attr) + else: + return CanvasWidget.__getitem__(self, attr) + + def _tags(self): + return [self._tag] + + def text(self): + """ + :return: The text displayed by this text widget. + :rtype: str + """ + return self.canvas().itemcget(self._tag, 'TEXT') + + def set_text(self, text): + """ + Change the text that is displayed by this text widget. + + :type text: str + :param text: The string of text to display. + :rtype: None + """ + self.canvas().itemconfig(self._tag, text=text) + if self.parent() is not None: + self.parent().update(self) + + def __repr__(self): + return '[Text: %r]' % self._text + + +class SymbolWidget(TextWidget): + """ + A canvas widget that displays special symbols, such as the + negation sign and the exists operator. Symbols are specified by + name. Currently, the following symbol names are defined: ``neg``, + ``disj``, ``conj``, ``lambda``, ``merge``, ``forall``, ``exists``, + ``subseteq``, ``subset``, ``notsubset``, ``emptyset``, ``imp``, + ``rightarrow``, ``equal``, ``notequal``, ``epsilon``. + + Attributes: + + - ``color``: the color of the text. + - ``draggable``: whether the text can be dragged by the user. + + :cvar SYMBOLS: A dictionary mapping from symbols to the character + in the ``symbol`` font used to render them. + """ + + SYMBOLS = { + 'neg': '\330', + 'disj': '\332', + 'conj': '\331', + 'lambda': '\154', + 'merge': '\304', + 'forall': '\042', + 'exists': '\044', + 'subseteq': '\315', + 'subset': '\314', + 'notsubset': '\313', + 'emptyset': '\306', + 'imp': '\336', + 'rightarrow': chr(222), #'\256', + 'equal': '\75', + 'notequal': '\271', + 'intersection': '\307', + 'union': '\310', + 'epsilon': 'e', + } + + def __init__(self, canvas, symbol, **attribs): + """ + Create a new symbol widget. + + :type canvas: Tkinter.Canvas + :param canvas: This canvas widget's canvas. + :type symbol: str + :param symbol: The name of the symbol to display. + :param attribs: The new canvas widget's attributes. + """ + attribs['font'] = 'symbol' + TextWidget.__init__(self, canvas, '', **attribs) + self.set_symbol(symbol) + + def symbol(self): + """ + :return: the name of the symbol that is displayed by this + symbol widget. + :rtype: str + """ + return self._symbol + + def set_symbol(self, symbol): + """ + Change the symbol that is displayed by this symbol widget. + + :type symbol: str + :param symbol: The name of the symbol to display. + """ + if symbol not in SymbolWidget.SYMBOLS: + raise ValueError('Unknown symbol: %s' % symbol) + self._symbol = symbol + self.set_text(SymbolWidget.SYMBOLS[symbol]) + + def __repr__(self): + return '[Symbol: %r]' % self._symbol + + @staticmethod + def symbolsheet(size=20): + """ + Open a new Tkinter window that displays the entire alphabet + for the symbol font. This is useful for constructing the + ``SymbolWidget.SYMBOLS`` dictionary. + """ + top = Tk() + + def destroy(e, top=top): + top.destroy() + + top.bind('q', destroy) + Button(top, text='Quit', command=top.destroy).pack(side='bottom') + text = Text(top, font=('helvetica', -size), width=20, height=30) + text.pack(side='left') + sb = Scrollbar(top, command=text.yview) + text['yscrollcommand'] = sb.set + sb.pack(side='right', fill='y') + text.tag_config('symbol', font=('symbol', -size)) + for i in range(256): + if i in (0, 10): + continue # null and newline + for k, v in list(SymbolWidget.SYMBOLS.items()): + if v == chr(i): + text.insert('end', '%-10s\t' % k) + break + else: + text.insert('end', '%-10d \t' % i) + text.insert('end', '[%s]\n' % chr(i), 'symbol') + top.mainloop() + + +class AbstractContainerWidget(CanvasWidget): + """ + An abstract class for canvas widgets that contain a single child, + such as ``BoxWidget`` and ``OvalWidget``. Subclasses must define + a constructor, which should create any new graphical elements and + then call the ``AbstractCanvasContainer`` constructor. Subclasses + must also define the ``_update`` method and the ``_tags`` method; + and any subclasses that define attributes should define + ``__setitem__`` and ``__getitem__``. + """ + + def __init__(self, canvas, child, **attribs): + """ + Create a new container widget. This constructor should only + be called by subclass constructors. + + :type canvas: Tkinter.Canvas + :param canvas: This canvas widget's canvas. + :param child: The container's child widget. ``child`` must not + have a parent. + :type child: CanvasWidget + :param attribs: The new canvas widget's attributes. + """ + self._child = child + self._add_child_widget(child) + CanvasWidget.__init__(self, canvas, **attribs) + + def _manage(self): + self._update(self._child) + + def child(self): + """ + :return: The child widget contained by this container widget. + :rtype: CanvasWidget + """ + return self._child + + def set_child(self, child): + """ + Change the child widget contained by this container widget. + + :param child: The new child widget. ``child`` must not have a + parent. + :type child: CanvasWidget + :rtype: None + """ + self._remove_child_widget(self._child) + self._add_child_widget(child) + self._child = child + self.update(child) + + def __repr__(self): + name = self.__class__.__name__ + if name[-6:] == 'Widget': + name = name[:-6] + return '[%s: %r]' % (name, self._child) + + +class BoxWidget(AbstractContainerWidget): + """ + A canvas widget that places a box around a child widget. + + Attributes: + - ``fill``: The color used to fill the interior of the box. + - ``outline``: The color used to draw the outline of the box. + - ``width``: The width of the outline of the box. + - ``margin``: The number of pixels space left between the child + and the box. + - ``draggable``: whether the text can be dragged by the user. + """ + + def __init__(self, canvas, child, **attribs): + """ + Create a new box widget. + + :type canvas: Tkinter.Canvas + :param canvas: This canvas widget's canvas. + :param child: The child widget. ``child`` must not have a + parent. + :type child: CanvasWidget + :param attribs: The new canvas widget's attributes. + """ + self._child = child + self._margin = 1 + self._box = canvas.create_rectangle(1, 1, 1, 1) + canvas.tag_lower(self._box) + AbstractContainerWidget.__init__(self, canvas, child, **attribs) + + def __setitem__(self, attr, value): + if attr == 'margin': + self._margin = value + elif attr in ('outline', 'fill', 'width'): + self.canvas().itemconfig(self._box, {attr: value}) + else: + CanvasWidget.__setitem__(self, attr, value) + + def __getitem__(self, attr): + if attr == 'margin': + return self._margin + elif attr == 'width': + return float(self.canvas().itemcget(self._box, attr)) + elif attr in ('outline', 'fill', 'width'): + return self.canvas().itemcget(self._box, attr) + else: + return CanvasWidget.__getitem__(self, attr) + + def _update(self, child): + (x1, y1, x2, y2) = child.bbox() + margin = self._margin + self['width'] / 2 + self.canvas().coords( + self._box, x1 - margin, y1 - margin, x2 + margin, y2 + margin + ) + + def _tags(self): + return [self._box] + + +class OvalWidget(AbstractContainerWidget): + """ + A canvas widget that places a oval around a child widget. + + Attributes: + - ``fill``: The color used to fill the interior of the oval. + - ``outline``: The color used to draw the outline of the oval. + - ``width``: The width of the outline of the oval. + - ``margin``: The number of pixels space left between the child + and the oval. + - ``draggable``: whether the text can be dragged by the user. + - ``double``: If true, then a double-oval is drawn. + """ + + def __init__(self, canvas, child, **attribs): + """ + Create a new oval widget. + + :type canvas: Tkinter.Canvas + :param canvas: This canvas widget's canvas. + :param child: The child widget. ``child`` must not have a + parent. + :type child: CanvasWidget + :param attribs: The new canvas widget's attributes. + """ + self._child = child + self._margin = 1 + self._oval = canvas.create_oval(1, 1, 1, 1) + self._circle = attribs.pop('circle', False) + self._double = attribs.pop('double', False) + if self._double: + self._oval2 = canvas.create_oval(1, 1, 1, 1) + else: + self._oval2 = None + canvas.tag_lower(self._oval) + AbstractContainerWidget.__init__(self, canvas, child, **attribs) + + def __setitem__(self, attr, value): + c = self.canvas() + if attr == 'margin': + self._margin = value + elif attr == 'double': + if value == True and self._oval2 is None: + # Copy attributes & position from self._oval. + x1, y1, x2, y2 = c.bbox(self._oval) + w = self['width'] * 2 + self._oval2 = c.create_oval( + x1 - w, + y1 - w, + x2 + w, + y2 + w, + outline=c.itemcget(self._oval, 'outline'), + width=c.itemcget(self._oval, 'width'), + ) + c.tag_lower(self._oval2) + if value == False and self._oval2 is not None: + c.delete(self._oval2) + self._oval2 = None + elif attr in ('outline', 'fill', 'width'): + c.itemconfig(self._oval, {attr: value}) + if self._oval2 is not None and attr != 'fill': + c.itemconfig(self._oval2, {attr: value}) + if self._oval2 is not None and attr != 'fill': + self.canvas().itemconfig(self._oval2, {attr: value}) + else: + CanvasWidget.__setitem__(self, attr, value) + + def __getitem__(self, attr): + if attr == 'margin': + return self._margin + elif attr == 'double': + return self._double is not None + elif attr == 'width': + return float(self.canvas().itemcget(self._oval, attr)) + elif attr in ('outline', 'fill', 'width'): + return self.canvas().itemcget(self._oval, attr) + else: + return CanvasWidget.__getitem__(self, attr) + + # The ratio between inscribed & circumscribed ovals + RATIO = 1.4142135623730949 + + def _update(self, child): + R = OvalWidget.RATIO + (x1, y1, x2, y2) = child.bbox() + margin = self._margin + + # If we're a circle, pretend our contents are square. + if self._circle: + dx, dy = abs(x1 - x2), abs(y1 - y2) + if dx > dy: + y = (y1 + y2) / 2 + y1, y2 = y - dx / 2, y + dx / 2 + elif dy > dx: + x = (x1 + x2) / 2 + x1, x2 = x - dy / 2, x + dy / 2 + + # Find the four corners. + left = int((x1 * (1 + R) + x2 * (1 - R)) / 2) + right = left + int((x2 - x1) * R) + top = int((y1 * (1 + R) + y2 * (1 - R)) / 2) + bot = top + int((y2 - y1) * R) + self.canvas().coords( + self._oval, left - margin, top - margin, right + margin, bot + margin + ) + if self._oval2 is not None: + self.canvas().coords( + self._oval2, + left - margin + 2, + top - margin + 2, + right + margin - 2, + bot + margin - 2, + ) + + def _tags(self): + if self._oval2 is None: + return [self._oval] + else: + return [self._oval, self._oval2] + + +class ParenWidget(AbstractContainerWidget): + """ + A canvas widget that places a pair of parenthases around a child + widget. + + Attributes: + - ``color``: The color used to draw the parenthases. + - ``width``: The width of the parenthases. + - ``draggable``: whether the text can be dragged by the user. + """ + + def __init__(self, canvas, child, **attribs): + """ + Create a new parenthasis widget. + + :type canvas: Tkinter.Canvas + :param canvas: This canvas widget's canvas. + :param child: The child widget. ``child`` must not have a + parent. + :type child: CanvasWidget + :param attribs: The new canvas widget's attributes. + """ + self._child = child + self._oparen = canvas.create_arc(1, 1, 1, 1, style='arc', start=90, extent=180) + self._cparen = canvas.create_arc(1, 1, 1, 1, style='arc', start=-90, extent=180) + AbstractContainerWidget.__init__(self, canvas, child, **attribs) + + def __setitem__(self, attr, value): + if attr == 'color': + self.canvas().itemconfig(self._oparen, outline=value) + self.canvas().itemconfig(self._cparen, outline=value) + elif attr == 'width': + self.canvas().itemconfig(self._oparen, width=value) + self.canvas().itemconfig(self._cparen, width=value) + else: + CanvasWidget.__setitem__(self, attr, value) + + def __getitem__(self, attr): + if attr == 'color': + return self.canvas().itemcget(self._oparen, 'outline') + elif attr == 'width': + return self.canvas().itemcget(self._oparen, 'width') + else: + return CanvasWidget.__getitem__(self, attr) + + def _update(self, child): + (x1, y1, x2, y2) = child.bbox() + width = max((y2 - y1) / 6, 4) + self.canvas().coords(self._oparen, x1 - width, y1, x1 + width, y2) + self.canvas().coords(self._cparen, x2 - width, y1, x2 + width, y2) + + def _tags(self): + return [self._oparen, self._cparen] + + +class BracketWidget(AbstractContainerWidget): + """ + A canvas widget that places a pair of brackets around a child + widget. + + Attributes: + - ``color``: The color used to draw the brackets. + - ``width``: The width of the brackets. + - ``draggable``: whether the text can be dragged by the user. + """ + + def __init__(self, canvas, child, **attribs): + """ + Create a new bracket widget. + + :type canvas: Tkinter.Canvas + :param canvas: This canvas widget's canvas. + :param child: The child widget. ``child`` must not have a + parent. + :type child: CanvasWidget + :param attribs: The new canvas widget's attributes. + """ + self._child = child + self._obrack = canvas.create_line(1, 1, 1, 1, 1, 1, 1, 1) + self._cbrack = canvas.create_line(1, 1, 1, 1, 1, 1, 1, 1) + AbstractContainerWidget.__init__(self, canvas, child, **attribs) + + def __setitem__(self, attr, value): + if attr == 'color': + self.canvas().itemconfig(self._obrack, fill=value) + self.canvas().itemconfig(self._cbrack, fill=value) + elif attr == 'width': + self.canvas().itemconfig(self._obrack, width=value) + self.canvas().itemconfig(self._cbrack, width=value) + else: + CanvasWidget.__setitem__(self, attr, value) + + def __getitem__(self, attr): + if attr == 'color': + return self.canvas().itemcget(self._obrack, 'outline') + elif attr == 'width': + return self.canvas().itemcget(self._obrack, 'width') + else: + return CanvasWidget.__getitem__(self, attr) + + def _update(self, child): + (x1, y1, x2, y2) = child.bbox() + width = max((y2 - y1) / 8, 2) + self.canvas().coords( + self._obrack, x1, y1, x1 - width, y1, x1 - width, y2, x1, y2 + ) + self.canvas().coords( + self._cbrack, x2, y1, x2 + width, y1, x2 + width, y2, x2, y2 + ) + + def _tags(self): + return [self._obrack, self._cbrack] + + +class SequenceWidget(CanvasWidget): + """ + A canvas widget that keeps a list of canvas widgets in a + horizontal line. + + Attributes: + - ``align``: The vertical alignment of the children. Possible + values are ``'top'``, ``'center'``, and ``'bottom'``. By + default, children are center-aligned. + - ``space``: The amount of horizontal space to place between + children. By default, one pixel of space is used. + - ``ordered``: If true, then keep the children in their + original order. + """ + + def __init__(self, canvas, *children, **attribs): + """ + Create a new sequence widget. + + :type canvas: Tkinter.Canvas + :param canvas: This canvas widget's canvas. + :param children: The widgets that should be aligned + horizontally. Each child must not have a parent. + :type children: list(CanvasWidget) + :param attribs: The new canvas widget's attributes. + """ + self._align = 'center' + self._space = 1 + self._ordered = False + self._children = list(children) + for child in children: + self._add_child_widget(child) + CanvasWidget.__init__(self, canvas, **attribs) + + def __setitem__(self, attr, value): + if attr == 'align': + if value not in ('top', 'bottom', 'center'): + raise ValueError('Bad alignment: %r' % value) + self._align = value + elif attr == 'space': + self._space = value + elif attr == 'ordered': + self._ordered = value + else: + CanvasWidget.__setitem__(self, attr, value) + + def __getitem__(self, attr): + if attr == 'align': + return self._align + elif attr == 'space': + return self._space + elif attr == 'ordered': + return self._ordered + else: + return CanvasWidget.__getitem__(self, attr) + + def _tags(self): + return [] + + def _yalign(self, top, bot): + if self._align == 'top': + return top + if self._align == 'bottom': + return bot + if self._align == 'center': + return (top + bot) / 2 + + def _update(self, child): + # Align all children with child. + (left, top, right, bot) = child.bbox() + y = self._yalign(top, bot) + for c in self._children: + (x1, y1, x2, y2) = c.bbox() + c.move(0, y - self._yalign(y1, y2)) + + if self._ordered and len(self._children) > 1: + index = self._children.index(child) + + x = right + self._space + for i in range(index + 1, len(self._children)): + (x1, y1, x2, y2) = self._children[i].bbox() + if x > x1: + self._children[i].move(x - x1, 0) + x += x2 - x1 + self._space + + x = left - self._space + for i in range(index - 1, -1, -1): + (x1, y1, x2, y2) = self._children[i].bbox() + if x < x2: + self._children[i].move(x - x2, 0) + x -= x2 - x1 + self._space + + def _manage(self): + if len(self._children) == 0: + return + child = self._children[0] + + # Align all children with child. + (left, top, right, bot) = child.bbox() + y = self._yalign(top, bot) + + index = self._children.index(child) + + # Line up children to the right of child. + x = right + self._space + for i in range(index + 1, len(self._children)): + (x1, y1, x2, y2) = self._children[i].bbox() + self._children[i].move(x - x1, y - self._yalign(y1, y2)) + x += x2 - x1 + self._space + + # Line up children to the left of child. + x = left - self._space + for i in range(index - 1, -1, -1): + (x1, y1, x2, y2) = self._children[i].bbox() + self._children[i].move(x - x2, y - self._yalign(y1, y2)) + x -= x2 - x1 + self._space + + def __repr__(self): + return '[Sequence: ' + repr(self._children)[1:-1] + ']' + + # Provide an alias for the child_widgets() member. + children = CanvasWidget.child_widgets + + def replace_child(self, oldchild, newchild): + """ + Replace the child canvas widget ``oldchild`` with ``newchild``. + ``newchild`` must not have a parent. ``oldchild``'s parent will + be set to None. + + :type oldchild: CanvasWidget + :param oldchild: The child canvas widget to remove. + :type newchild: CanvasWidget + :param newchild: The canvas widget that should replace + ``oldchild``. + """ + index = self._children.index(oldchild) + self._children[index] = newchild + self._remove_child_widget(oldchild) + self._add_child_widget(newchild) + self.update(newchild) + + def remove_child(self, child): + """ + Remove the given child canvas widget. ``child``'s parent will + be set ot None. + + :type child: CanvasWidget + :param child: The child canvas widget to remove. + """ + index = self._children.index(child) + del self._children[index] + self._remove_child_widget(child) + if len(self._children) > 0: + self.update(self._children[0]) + + def insert_child(self, index, child): + """ + Insert a child canvas widget before a given index. + + :type child: CanvasWidget + :param child: The canvas widget that should be inserted. + :type index: int + :param index: The index where the child widget should be + inserted. In particular, the index of ``child`` will be + ``index``; and the index of any children whose indices were + greater than equal to ``index`` before ``child`` was + inserted will be incremented by one. + """ + self._children.insert(index, child) + self._add_child_widget(child) + + +class StackWidget(CanvasWidget): + """ + A canvas widget that keeps a list of canvas widgets in a vertical + line. + + Attributes: + - ``align``: The horizontal alignment of the children. Possible + values are ``'left'``, ``'center'``, and ``'right'``. By + default, children are center-aligned. + - ``space``: The amount of vertical space to place between + children. By default, one pixel of space is used. + - ``ordered``: If true, then keep the children in their + original order. + """ + + def __init__(self, canvas, *children, **attribs): + """ + Create a new stack widget. + + :type canvas: Tkinter.Canvas + :param canvas: This canvas widget's canvas. + :param children: The widgets that should be aligned + vertically. Each child must not have a parent. + :type children: list(CanvasWidget) + :param attribs: The new canvas widget's attributes. + """ + self._align = 'center' + self._space = 1 + self._ordered = False + self._children = list(children) + for child in children: + self._add_child_widget(child) + CanvasWidget.__init__(self, canvas, **attribs) + + def __setitem__(self, attr, value): + if attr == 'align': + if value not in ('left', 'right', 'center'): + raise ValueError('Bad alignment: %r' % value) + self._align = value + elif attr == 'space': + self._space = value + elif attr == 'ordered': + self._ordered = value + else: + CanvasWidget.__setitem__(self, attr, value) + + def __getitem__(self, attr): + if attr == 'align': + return self._align + elif attr == 'space': + return self._space + elif attr == 'ordered': + return self._ordered + else: + return CanvasWidget.__getitem__(self, attr) + + def _tags(self): + return [] + + def _xalign(self, left, right): + if self._align == 'left': + return left + if self._align == 'right': + return right + if self._align == 'center': + return (left + right) / 2 + + def _update(self, child): + # Align all children with child. + (left, top, right, bot) = child.bbox() + x = self._xalign(left, right) + for c in self._children: + (x1, y1, x2, y2) = c.bbox() + c.move(x - self._xalign(x1, x2), 0) + + if self._ordered and len(self._children) > 1: + index = self._children.index(child) + + y = bot + self._space + for i in range(index + 1, len(self._children)): + (x1, y1, x2, y2) = self._children[i].bbox() + if y > y1: + self._children[i].move(0, y - y1) + y += y2 - y1 + self._space + + y = top - self._space + for i in range(index - 1, -1, -1): + (x1, y1, x2, y2) = self._children[i].bbox() + if y < y2: + self._children[i].move(0, y - y2) + y -= y2 - y1 + self._space + + def _manage(self): + if len(self._children) == 0: + return + child = self._children[0] + + # Align all children with child. + (left, top, right, bot) = child.bbox() + x = self._xalign(left, right) + + index = self._children.index(child) + + # Line up children below the child. + y = bot + self._space + for i in range(index + 1, len(self._children)): + (x1, y1, x2, y2) = self._children[i].bbox() + self._children[i].move(x - self._xalign(x1, x2), y - y1) + y += y2 - y1 + self._space + + # Line up children above the child. + y = top - self._space + for i in range(index - 1, -1, -1): + (x1, y1, x2, y2) = self._children[i].bbox() + self._children[i].move(x - self._xalign(x1, x2), y - y2) + y -= y2 - y1 + self._space + + def __repr__(self): + return '[Stack: ' + repr(self._children)[1:-1] + ']' + + # Provide an alias for the child_widgets() member. + children = CanvasWidget.child_widgets + + def replace_child(self, oldchild, newchild): + """ + Replace the child canvas widget ``oldchild`` with ``newchild``. + ``newchild`` must not have a parent. ``oldchild``'s parent will + be set to None. + + :type oldchild: CanvasWidget + :param oldchild: The child canvas widget to remove. + :type newchild: CanvasWidget + :param newchild: The canvas widget that should replace + ``oldchild``. + """ + index = self._children.index(oldchild) + self._children[index] = newchild + self._remove_child_widget(oldchild) + self._add_child_widget(newchild) + self.update(newchild) + + def remove_child(self, child): + """ + Remove the given child canvas widget. ``child``'s parent will + be set ot None. + + :type child: CanvasWidget + :param child: The child canvas widget to remove. + """ + index = self._children.index(child) + del self._children[index] + self._remove_child_widget(child) + if len(self._children) > 0: + self.update(self._children[0]) + + def insert_child(self, index, child): + """ + Insert a child canvas widget before a given index. + + :type child: CanvasWidget + :param child: The canvas widget that should be inserted. + :type index: int + :param index: The index where the child widget should be + inserted. In particular, the index of ``child`` will be + ``index``; and the index of any children whose indices were + greater than equal to ``index`` before ``child`` was + inserted will be incremented by one. + """ + self._children.insert(index, child) + self._add_child_widget(child) + + +class SpaceWidget(CanvasWidget): + """ + A canvas widget that takes up space but does not display + anything. A ``SpaceWidget`` can be used to add space between + elements. Each space widget is characterized by a width and a + height. If you wish to only create horizontal space, then use a + height of zero; and if you wish to only create vertical space, use + a width of zero. + """ + + def __init__(self, canvas, width, height, **attribs): + """ + Create a new space widget. + + :type canvas: Tkinter.Canvas + :param canvas: This canvas widget's canvas. + :type width: int + :param width: The width of the new space widget. + :type height: int + :param height: The height of the new space widget. + :param attribs: The new canvas widget's attributes. + """ + # For some reason, + if width > 4: + width -= 4 + if height > 4: + height -= 4 + self._tag = canvas.create_line(1, 1, width, height, fill='') + CanvasWidget.__init__(self, canvas, **attribs) + + # note: width() and height() are already defined by CanvasWidget. + def set_width(self, width): + """ + Change the width of this space widget. + + :param width: The new width. + :type width: int + :rtype: None + """ + [x1, y1, x2, y2] = self.bbox() + self.canvas().coords(self._tag, x1, y1, x1 + width, y2) + + def set_height(self, height): + """ + Change the height of this space widget. + + :param height: The new height. + :type height: int + :rtype: None + """ + [x1, y1, x2, y2] = self.bbox() + self.canvas().coords(self._tag, x1, y1, x2, y1 + height) + + def _tags(self): + return [self._tag] + + def __repr__(self): + return '[Space]' + + +class ScrollWatcherWidget(CanvasWidget): + """ + A special canvas widget that adjusts its ``Canvas``'s scrollregion + to always include the bounding boxes of all of its children. The + scroll-watcher widget will only increase the size of the + ``Canvas``'s scrollregion; it will never decrease it. + """ + + def __init__(self, canvas, *children, **attribs): + """ + Create a new scroll-watcher widget. + + :type canvas: Tkinter.Canvas + :param canvas: This canvas widget's canvas. + :type children: list(CanvasWidget) + :param children: The canvas widgets watched by the + scroll-watcher. The scroll-watcher will ensure that these + canvas widgets are always contained in their canvas's + scrollregion. + :param attribs: The new canvas widget's attributes. + """ + for child in children: + self._add_child_widget(child) + CanvasWidget.__init__(self, canvas, **attribs) + + def add_child(self, canvaswidget): + """ + Add a new canvas widget to the scroll-watcher. The + scroll-watcher will ensure that the new canvas widget is + always contained in its canvas's scrollregion. + + :param canvaswidget: The new canvas widget. + :type canvaswidget: CanvasWidget + :rtype: None + """ + self._add_child_widget(canvaswidget) + self.update(canvaswidget) + + def remove_child(self, canvaswidget): + """ + Remove a canvas widget from the scroll-watcher. The + scroll-watcher will no longer ensure that the new canvas + widget is always contained in its canvas's scrollregion. + + :param canvaswidget: The canvas widget to remove. + :type canvaswidget: CanvasWidget + :rtype: None + """ + self._remove_child_widget(canvaswidget) + + def _tags(self): + return [] + + def _update(self, child): + self._adjust_scrollregion() + + def _adjust_scrollregion(self): + """ + Adjust the scrollregion of this scroll-watcher's ``Canvas`` to + include the bounding boxes of all of its children. + """ + bbox = self.bbox() + canvas = self.canvas() + scrollregion = [int(n) for n in canvas['scrollregion'].split()] + if len(scrollregion) != 4: + return + if ( + bbox[0] < scrollregion[0] + or bbox[1] < scrollregion[1] + or bbox[2] > scrollregion[2] + or bbox[3] > scrollregion[3] + ): + scrollregion = '%d %d %d %d' % ( + min(bbox[0], scrollregion[0]), + min(bbox[1], scrollregion[1]), + max(bbox[2], scrollregion[2]), + max(bbox[3], scrollregion[3]), + ) + canvas['scrollregion'] = scrollregion + + +##////////////////////////////////////////////////////// +## Canvas Frame +##////////////////////////////////////////////////////// + + +class CanvasFrame(object): + """ + A ``Tkinter`` frame containing a canvas and scrollbars. + ``CanvasFrame`` uses a ``ScrollWatcherWidget`` to ensure that all of + the canvas widgets contained on its canvas are within its + scrollregion. In order for ``CanvasFrame`` to make these checks, + all canvas widgets must be registered with ``add_widget`` when they + are added to the canvas; and destroyed with ``destroy_widget`` when + they are no longer needed. + + If a ``CanvasFrame`` is created with no parent, then it will create + its own main window, including a "Done" button and a "Print" + button. + """ + + def __init__(self, parent=None, **kw): + """ + Create a new ``CanvasFrame``. + + :type parent: Tkinter.BaseWidget or Tkinter.Tk + :param parent: The parent ``Tkinter`` widget. If no parent is + specified, then ``CanvasFrame`` will create a new main + window. + :param kw: Keyword arguments for the new ``Canvas``. See the + documentation for ``Tkinter.Canvas`` for more information. + """ + # If no parent was given, set up a top-level window. + if parent is None: + self._parent = Tk() + self._parent.title('NLTK') + self._parent.bind('', lambda e: self.print_to_file()) + self._parent.bind('', self.destroy) + self._parent.bind('', self.destroy) + else: + self._parent = parent + + # Create a frame for the canvas & scrollbars + self._frame = frame = Frame(self._parent) + self._canvas = canvas = Canvas(frame, **kw) + xscrollbar = Scrollbar(self._frame, orient='horizontal') + yscrollbar = Scrollbar(self._frame, orient='vertical') + xscrollbar['command'] = canvas.xview + yscrollbar['command'] = canvas.yview + canvas['xscrollcommand'] = xscrollbar.set + canvas['yscrollcommand'] = yscrollbar.set + yscrollbar.pack(fill='y', side='right') + xscrollbar.pack(fill='x', side='bottom') + canvas.pack(expand=1, fill='both', side='left') + + # Set initial scroll region. + scrollregion = '0 0 %s %s' % (canvas['width'], canvas['height']) + canvas['scrollregion'] = scrollregion + + self._scrollwatcher = ScrollWatcherWidget(canvas) + + # If no parent was given, pack the frame, and add a menu. + if parent is None: + self.pack(expand=1, fill='both') + self._init_menubar() + + def _init_menubar(self): + menubar = Menu(self._parent) + + filemenu = Menu(menubar, tearoff=0) + filemenu.add_command( + label='Print to Postscript', + underline=0, + command=self.print_to_file, + accelerator='Ctrl-p', + ) + filemenu.add_command( + label='Exit', underline=1, command=self.destroy, accelerator='Ctrl-x' + ) + menubar.add_cascade(label='File', underline=0, menu=filemenu) + + self._parent.config(menu=menubar) + + def print_to_file(self, filename=None): + """ + Print the contents of this ``CanvasFrame`` to a postscript + file. If no filename is given, then prompt the user for one. + + :param filename: The name of the file to print the tree to. + :type filename: str + :rtype: None + """ + if filename is None: + ftypes = [('Postscript files', '.ps'), ('All files', '*')] + filename = asksaveasfilename(filetypes=ftypes, defaultextension='.ps') + if not filename: + return + (x0, y0, w, h) = self.scrollregion() + postscript = self._canvas.postscript( + x=x0, + y=y0, + width=w + 2, + height=h + 2, + pagewidth=w + 2, # points = 1/72 inch + pageheight=h + 2, # points = 1/72 inch + pagex=0, + pagey=0, + ) + # workaround for bug in Tk font handling + postscript = postscript.replace(' 0 scalefont ', ' 9 scalefont ') + with open(filename, 'wb') as f: + f.write(postscript.encode('utf8')) + + def scrollregion(self): + """ + :return: The current scroll region for the canvas managed by + this ``CanvasFrame``. + :rtype: 4-tuple of int + """ + (x1, y1, x2, y2) = self._canvas['scrollregion'].split() + return (int(x1), int(y1), int(x2), int(y2)) + + def canvas(self): + """ + :return: The canvas managed by this ``CanvasFrame``. + :rtype: Tkinter.Canvas + """ + return self._canvas + + def add_widget(self, canvaswidget, x=None, y=None): + """ + Register a canvas widget with this ``CanvasFrame``. The + ``CanvasFrame`` will ensure that this canvas widget is always + within the ``Canvas``'s scrollregion. If no coordinates are + given for the canvas widget, then the ``CanvasFrame`` will + attempt to find a clear area of the canvas for it. + + :type canvaswidget: CanvasWidget + :param canvaswidget: The new canvas widget. ``canvaswidget`` + must have been created on this ``CanvasFrame``'s canvas. + :type x: int + :param x: The initial x coordinate for the upper left hand + corner of ``canvaswidget``, in the canvas's coordinate + space. + :type y: int + :param y: The initial y coordinate for the upper left hand + corner of ``canvaswidget``, in the canvas's coordinate + space. + """ + if x is None or y is None: + (x, y) = self._find_room(canvaswidget, x, y) + + # Move to (x,y) + (x1, y1, x2, y2) = canvaswidget.bbox() + canvaswidget.move(x - x1, y - y1) + + # Register with scrollwatcher. + self._scrollwatcher.add_child(canvaswidget) + + def _find_room(self, widget, desired_x, desired_y): + """ + Try to find a space for a given widget. + """ + (left, top, right, bot) = self.scrollregion() + w = widget.width() + h = widget.height() + + if w >= (right - left): + return (0, 0) + if h >= (bot - top): + return (0, 0) + + # Move the widget out of the way, for now. + (x1, y1, x2, y2) = widget.bbox() + widget.move(left - x2 - 50, top - y2 - 50) + + if desired_x is not None: + x = desired_x + for y in range(top, bot - h, int((bot - top - h) / 10)): + if not self._canvas.find_overlapping( + x - 5, y - 5, x + w + 5, y + h + 5 + ): + return (x, y) + + if desired_y is not None: + y = desired_y + for x in range(left, right - w, int((right - left - w) / 10)): + if not self._canvas.find_overlapping( + x - 5, y - 5, x + w + 5, y + h + 5 + ): + return (x, y) + + for y in range(top, bot - h, int((bot - top - h) / 10)): + for x in range(left, right - w, int((right - left - w) / 10)): + if not self._canvas.find_overlapping( + x - 5, y - 5, x + w + 5, y + h + 5 + ): + return (x, y) + return (0, 0) + + def destroy_widget(self, canvaswidget): + """ + Remove a canvas widget from this ``CanvasFrame``. This + deregisters the canvas widget, and destroys it. + """ + self.remove_widget(canvaswidget) + canvaswidget.destroy() + + def remove_widget(self, canvaswidget): + # Deregister with scrollwatcher. + self._scrollwatcher.remove_child(canvaswidget) + + def pack(self, cnf={}, **kw): + """ + Pack this ``CanvasFrame``. See the documentation for + ``Tkinter.Pack`` for more information. + """ + self._frame.pack(cnf, **kw) + # Adjust to be big enough for kids? + + def destroy(self, *e): + """ + Destroy this ``CanvasFrame``. If this ``CanvasFrame`` created a + top-level window, then this will close that window. + """ + if self._parent is None: + return + self._parent.destroy() + self._parent = None + + def mainloop(self, *args, **kwargs): + """ + Enter the Tkinter mainloop. This function must be called if + this frame is created from a non-interactive program (e.g. + from a secript); otherwise, the frame will close as soon as + the script completes. + """ + if in_idle(): + return + self._parent.mainloop(*args, **kwargs) + + +##////////////////////////////////////////////////////// +## Text display +##////////////////////////////////////////////////////// + + +class ShowText(object): + """ + A ``Tkinter`` window used to display a text. ``ShowText`` is + typically used by graphical tools to display help text, or similar + information. + """ + + def __init__(self, root, title, text, width=None, height=None, **textbox_options): + if width is None or height is None: + (width, height) = self.find_dimentions(text, width, height) + + # Create the main window. + if root is None: + self._top = top = Tk() + else: + self._top = top = Toplevel(root) + top.title(title) + + b = Button(top, text='Ok', command=self.destroy) + b.pack(side='bottom') + + tbf = Frame(top) + tbf.pack(expand=1, fill='both') + scrollbar = Scrollbar(tbf, orient='vertical') + scrollbar.pack(side='right', fill='y') + textbox = Text(tbf, wrap='word', width=width, height=height, **textbox_options) + textbox.insert('end', text) + textbox['state'] = 'disabled' + textbox.pack(side='left', expand=1, fill='both') + scrollbar['command'] = textbox.yview + textbox['yscrollcommand'] = scrollbar.set + + # Make it easy to close the window. + top.bind('q', self.destroy) + top.bind('x', self.destroy) + top.bind('c', self.destroy) + top.bind('', self.destroy) + top.bind('', self.destroy) + + # Focus the scrollbar, so they can use up/down, etc. + scrollbar.focus() + + def find_dimentions(self, text, width, height): + lines = text.split('\n') + if width is None: + maxwidth = max(len(line) for line in lines) + width = min(maxwidth, 80) + + # Now, find height. + height = 0 + for line in lines: + while len(line) > width: + brk = line[:width].rfind(' ') + line = line[brk:] + height += 1 + height += 1 + height = min(height, 25) + + return (width, height) + + def destroy(self, *e): + if self._top is None: + return + self._top.destroy() + self._top = None + + def mainloop(self, *args, **kwargs): + """ + Enter the Tkinter mainloop. This function must be called if + this window is created from a non-interactive program (e.g. + from a secript); otherwise, the window will close as soon as + the script completes. + """ + if in_idle(): + return + self._top.mainloop(*args, **kwargs) + + +##////////////////////////////////////////////////////// +## Entry dialog +##////////////////////////////////////////////////////// + + +class EntryDialog(object): + """ + A dialog box for entering + """ + + def __init__( + self, parent, original_text='', instructions='', set_callback=None, title=None + ): + self._parent = parent + self._original_text = original_text + self._set_callback = set_callback + + width = int(max(30, len(original_text) * 3 / 2)) + self._top = Toplevel(parent) + + if title: + self._top.title(title) + + # The text entry box. + entryframe = Frame(self._top) + entryframe.pack(expand=1, fill='both', padx=5, pady=5, ipady=10) + if instructions: + l = Label(entryframe, text=instructions) + l.pack(side='top', anchor='w', padx=30) + self._entry = Entry(entryframe, width=width) + self._entry.pack(expand=1, fill='x', padx=30) + self._entry.insert(0, original_text) + + # A divider + divider = Frame(self._top, borderwidth=1, relief='sunken') + divider.pack(fill='x', ipady=1, padx=10) + + # The buttons. + buttons = Frame(self._top) + buttons.pack(expand=0, fill='x', padx=5, pady=5) + b = Button(buttons, text='Cancel', command=self._cancel, width=8) + b.pack(side='right', padx=5) + b = Button(buttons, text='Ok', command=self._ok, width=8, default='active') + b.pack(side='left', padx=5) + b = Button(buttons, text='Apply', command=self._apply, width=8) + b.pack(side='left') + + self._top.bind('', self._ok) + self._top.bind('', self._cancel) + self._top.bind('', self._cancel) + + self._entry.focus() + + def _reset(self, *e): + self._entry.delete(0, 'end') + self._entry.insert(0, self._original_text) + if self._set_callback: + self._set_callback(self._original_text) + + def _cancel(self, *e): + try: + self._reset() + except: + pass + self._destroy() + + def _ok(self, *e): + self._apply() + self._destroy() + + def _apply(self, *e): + if self._set_callback: + self._set_callback(self._entry.get()) + + def _destroy(self, *e): + if self._top is None: + return + self._top.destroy() + self._top = None + + +##////////////////////////////////////////////////////// +## Colorized List +##////////////////////////////////////////////////////// + + +class ColorizedList(object): + """ + An abstract base class for displaying a colorized list of items. + Subclasses should define: + - ``_init_colortags``, which sets up Text color tags that + will be used by the list. + - ``_item_repr``, which returns a list of (text,colortag) + tuples that make up the colorized representation of the + item. + :note: Typically, you will want to register a callback for + ``'select'`` that calls ``mark`` on the given item. + """ + + def __init__(self, parent, items=[], **options): + """ + Construct a new list. + + :param parent: The Tk widget that contains the colorized list + :param items: The initial contents of the colorized list. + :param options: + """ + self._parent = parent + self._callbacks = {} + + # Which items are marked? + self._marks = {} + + # Initialize the Tkinter frames. + self._init_itemframe(options.copy()) + + # Set up key & mouse bindings. + self._textwidget.bind('', self._keypress) + self._textwidget.bind('', self._buttonpress) + + # Fill in the given CFG's items. + self._items = None + self.set(items) + + # //////////////////////////////////////////////////////////// + # Abstract methods + # //////////////////////////////////////////////////////////// + @abstractmethod + def _init_colortags(self, textwidget, options): + """ + Set up any colortags that will be used by this colorized list. + E.g.: + >>> textwidget.tag_config('terminal', foreground='black') + """ + + @abstractmethod + def _item_repr(self, item): + """ + Return a list of (text, colortag) tuples that make up the + colorized representation of the item. Colorized + representations may not span multiple lines. I.e., the text + strings returned may not contain newline characters. + """ + + # //////////////////////////////////////////////////////////// + # Item Access + # //////////////////////////////////////////////////////////// + + def get(self, index=None): + """ + :return: A list of the items contained by this list. + """ + if index is None: + return self._items[:] + else: + return self._items[index] + + def set(self, items): + """ + Modify the list of items contained by this list. + """ + items = list(items) + if self._items == items: + return + self._items = list(items) + + self._textwidget['state'] = 'normal' + self._textwidget.delete('1.0', 'end') + for item in items: + for (text, colortag) in self._item_repr(item): + assert '\n' not in text, 'item repr may not contain newline' + self._textwidget.insert('end', text, colortag) + self._textwidget.insert('end', '\n') + # Remove the final newline + self._textwidget.delete('end-1char', 'end') + self._textwidget.mark_set('insert', '1.0') + self._textwidget['state'] = 'disabled' + # Clear all marks + self._marks.clear() + + def unmark(self, item=None): + """ + Remove highlighting from the given item; or from every item, + if no item is given. + :raise ValueError: If ``item`` is not contained in the list. + :raise KeyError: If ``item`` is not marked. + """ + if item is None: + self._marks.clear() + self._textwidget.tag_remove('highlight', '1.0', 'end+1char') + else: + index = self._items.index(item) + del self._marks[item] + (start, end) = ('%d.0' % (index + 1), '%d.0' % (index + 2)) + self._textwidget.tag_remove('highlight', start, end) + + def mark(self, item): + """ + Highlight the given item. + :raise ValueError: If ``item`` is not contained in the list. + """ + self._marks[item] = 1 + index = self._items.index(item) + (start, end) = ('%d.0' % (index + 1), '%d.0' % (index + 2)) + self._textwidget.tag_add('highlight', start, end) + + def markonly(self, item): + """ + Remove any current highlighting, and mark the given item. + :raise ValueError: If ``item`` is not contained in the list. + """ + self.unmark() + self.mark(item) + + def view(self, item): + """ + Adjust the view such that the given item is visible. If + the item is already visible, then do nothing. + """ + index = self._items.index(item) + self._textwidget.see('%d.0' % (index + 1)) + + # //////////////////////////////////////////////////////////// + # Callbacks + # //////////////////////////////////////////////////////////// + + def add_callback(self, event, func): + """ + Register a callback function with the list. This function + will be called whenever the given event occurs. + + :param event: The event that will trigger the callback + function. Valid events are: click1, click2, click3, + space, return, select, up, down, next, prior, move + :param func: The function that should be called when + the event occurs. ``func`` will be called with a + single item as its argument. (The item selected + or the item moved to). + """ + if event == 'select': + events = ['click1', 'space', 'return'] + elif event == 'move': + events = ['up', 'down', 'next', 'prior'] + else: + events = [event] + + for e in events: + self._callbacks.setdefault(e, {})[func] = 1 + + def remove_callback(self, event, func=None): + """ + Deregister a callback function. If ``func`` is none, then + all callbacks are removed for the given event. + """ + if event is None: + events = list(self._callbacks.keys()) + elif event == 'select': + events = ['click1', 'space', 'return'] + elif event == 'move': + events = ['up', 'down', 'next', 'prior'] + else: + events = [event] + + for e in events: + if func is None: + del self._callbacks[e] + else: + try: + del self._callbacks[e][func] + except: + pass + + # //////////////////////////////////////////////////////////// + # Tkinter Methods + # //////////////////////////////////////////////////////////// + + def pack(self, cnf={}, **kw): + # "@include: Tkinter.Pack.pack" + self._itemframe.pack(cnf, **kw) + + def grid(self, cnf={}, **kw): + # "@include: Tkinter.Grid.grid" + self._itemframe.grid(cnf, *kw) + + def focus(self): + # "@include: Tkinter.Widget.focus" + self._textwidget.focus() + + # //////////////////////////////////////////////////////////// + # Internal Methods + # //////////////////////////////////////////////////////////// + + def _init_itemframe(self, options): + self._itemframe = Frame(self._parent) + + # Create the basic Text widget & scrollbar. + options.setdefault('background', '#e0e0e0') + self._textwidget = Text(self._itemframe, **options) + self._textscroll = Scrollbar(self._itemframe, takefocus=0, orient='vertical') + self._textwidget.config(yscrollcommand=self._textscroll.set) + self._textscroll.config(command=self._textwidget.yview) + self._textscroll.pack(side='right', fill='y') + self._textwidget.pack(expand=1, fill='both', side='left') + + # Initialize the colorization tags + self._textwidget.tag_config( + 'highlight', background='#e0ffff', border='1', relief='raised' + ) + self._init_colortags(self._textwidget, options) + + # How do I want to mark keyboard selection? + self._textwidget.tag_config('sel', foreground='') + self._textwidget.tag_config( + 'sel', foreground='', background='', border='', underline=1 + ) + self._textwidget.tag_lower('highlight', 'sel') + + def _fire_callback(self, event, itemnum): + if event not in self._callbacks: + return + if 0 <= itemnum < len(self._items): + item = self._items[itemnum] + else: + item = None + for cb_func in list(self._callbacks[event].keys()): + cb_func(item) + + def _buttonpress(self, event): + clickloc = '@%d,%d' % (event.x, event.y) + insert_point = self._textwidget.index(clickloc) + itemnum = int(insert_point.split('.')[0]) - 1 + self._fire_callback('click%d' % event.num, itemnum) + + def _keypress(self, event): + if event.keysym == 'Return' or event.keysym == 'space': + insert_point = self._textwidget.index('insert') + itemnum = int(insert_point.split('.')[0]) - 1 + self._fire_callback(event.keysym.lower(), itemnum) + return + elif event.keysym == 'Down': + delta = '+1line' + elif event.keysym == 'Up': + delta = '-1line' + elif event.keysym == 'Next': + delta = '+10lines' + elif event.keysym == 'Prior': + delta = '-10lines' + else: + return 'continue' + + self._textwidget.mark_set('insert', 'insert' + delta) + self._textwidget.see('insert') + self._textwidget.tag_remove('sel', '1.0', 'end+1char') + self._textwidget.tag_add('sel', 'insert linestart', 'insert lineend') + + insert_point = self._textwidget.index('insert') + itemnum = int(insert_point.split('.')[0]) - 1 + self._fire_callback(event.keysym.lower(), itemnum) + + return 'break' + + +##////////////////////////////////////////////////////// +## Improved OptionMenu +##////////////////////////////////////////////////////// + + +class MutableOptionMenu(Menubutton): + def __init__(self, master, values, **options): + self._callback = options.get('command') + if 'command' in options: + del options['command'] + + # Create a variable + self._variable = variable = StringVar() + if len(values) > 0: + variable.set(values[0]) + + kw = { + "borderwidth": 2, + "textvariable": variable, + "indicatoron": 1, + "relief": RAISED, + "anchor": "c", + "highlightthickness": 2, + } + kw.update(options) + Widget.__init__(self, master, "menubutton", kw) + self.widgetName = 'tk_optionMenu' + self._menu = Menu(self, name="menu", tearoff=0) + self.menuname = self._menu._w + + self._values = [] + for value in values: + self.add(value) + + self["menu"] = self._menu + + def add(self, value): + if value in self._values: + return + + def set(value=value): + self.set(value) + + self._menu.add_command(label=value, command=set) + self._values.append(value) + + def set(self, value): + self._variable.set(value) + if self._callback: + self._callback(value) + + def remove(self, value): + # Might raise indexerror: pass to parent. + i = self._values.index(value) + del self._values[i] + self._menu.delete(i, i) + + def __getitem__(self, name): + if name == 'menu': + return self.__menu + return Widget.__getitem__(self, name) + + def destroy(self): + """Destroy this widget and the associated menu.""" + Menubutton.destroy(self) + self._menu = None + + +##////////////////////////////////////////////////////// +## Test code. +##////////////////////////////////////////////////////// + + +def demo(): + """ + A simple demonstration showing how to use canvas widgets. + """ + + def fill(cw): + from random import randint + + cw['fill'] = '#00%04d' % randint(0, 9999) + + def color(cw): + from random import randint + + cw['color'] = '#ff%04d' % randint(0, 9999) + + cf = CanvasFrame(closeenough=10, width=300, height=300) + c = cf.canvas() + ct3 = TextWidget(c, 'hiya there', draggable=1) + ct2 = TextWidget(c, 'o o\n||\n___\n U', draggable=1, justify='center') + co = OvalWidget(c, ct2, outline='red') + ct = TextWidget(c, 'o o\n||\n\\___/', draggable=1, justify='center') + cp = ParenWidget(c, ct, color='red') + cb = BoxWidget(c, cp, fill='cyan', draggable=1, width=3, margin=10) + equation = SequenceWidget( + c, + SymbolWidget(c, 'forall'), + TextWidget(c, 'x'), + SymbolWidget(c, 'exists'), + TextWidget(c, 'y: '), + TextWidget(c, 'x'), + SymbolWidget(c, 'notequal'), + TextWidget(c, 'y'), + ) + space = SpaceWidget(c, 0, 30) + cstack = StackWidget(c, cb, ct3, space, co, equation, align='center') + prompt_msg = TextWidget( + c, 'try clicking\nand dragging', draggable=1, justify='center' + ) + cs = SequenceWidget(c, cstack, prompt_msg) + zz = BracketWidget(c, cs, color='green4', width=3) + cf.add_widget(zz, 60, 30) + + cb.bind_click(fill) + ct.bind_click(color) + co.bind_click(fill) + ct2.bind_click(color) + ct3.bind_click(color) + + cf.mainloop() + # ShowText(None, 'title', ((('this is text'*150)+'\n')*5)) + + +if __name__ == '__main__': + demo() diff --git a/venv.bak/lib/python3.7/site-packages/nltk/featstruct.py b/venv.bak/lib/python3.7/site-packages/nltk/featstruct.py new file mode 100644 index 0000000..9c2cdeb --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/featstruct.py @@ -0,0 +1,2796 @@ +# Natural Language Toolkit: Feature Structures +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Edward Loper , +# Rob Speer, +# Steven Bird +# URL: +# For license information, see LICENSE.TXT + +""" +Basic data classes for representing feature structures, and for +performing basic operations on those feature structures. A feature +structure is a mapping from feature identifiers to feature values, +where each feature value is either a basic value (such as a string or +an integer), or a nested feature structure. There are two types of +feature structure, implemented by two subclasses of ``FeatStruct``: + + - feature dictionaries, implemented by ``FeatDict``, act like + Python dictionaries. Feature identifiers may be strings or + instances of the ``Feature`` class. + - feature lists, implemented by ``FeatList``, act like Python + lists. Feature identifiers are integers. + +Feature structures are typically used to represent partial information +about objects. A feature identifier that is not mapped to a value +stands for a feature whose value is unknown (*not* a feature without +a value). Two feature structures that represent (potentially +overlapping) information about the same object can be combined by +unification. When two inconsistent feature structures are unified, +the unification fails and returns None. + +Features can be specified using "feature paths", or tuples of feature +identifiers that specify path through the nested feature structures to +a value. Feature structures may contain reentrant feature values. A +"reentrant feature value" is a single feature value that can be +accessed via multiple feature paths. Unification preserves the +reentrance relations imposed by both of the unified feature +structures. In the feature structure resulting from unification, any +modifications to a reentrant feature value will be visible using any +of its feature paths. + +Feature structure variables are encoded using the ``nltk.sem.Variable`` +class. The variables' values are tracked using a bindings +dictionary, which maps variables to their values. When two feature +structures are unified, a fresh bindings dictionary is created to +track their values; and before unification completes, all bound +variables are replaced by their values. Thus, the bindings +dictionaries are usually strictly internal to the unification process. +However, it is possible to track the bindings of variables if you +choose to, by supplying your own initial bindings dictionary to the +``unify()`` function. + +When unbound variables are unified with one another, they become +aliased. This is encoded by binding one variable to the other. + +Lightweight Feature Structures +============================== +Many of the functions defined by ``nltk.featstruct`` can be applied +directly to simple Python dictionaries and lists, rather than to +full-fledged ``FeatDict`` and ``FeatList`` objects. In other words, +Python ``dicts`` and ``lists`` can be used as "light-weight" feature +structures. + + >>> from nltk.featstruct import unify + >>> unify(dict(x=1, y=dict()), dict(a='a', y=dict(b='b'))) # doctest: +SKIP + {'y': {'b': 'b'}, 'x': 1, 'a': 'a'} + +However, you should keep in mind the following caveats: + + - Python dictionaries & lists ignore reentrance when checking for + equality between values. But two FeatStructs with different + reentrances are considered nonequal, even if all their base + values are equal. + + - FeatStructs can be easily frozen, allowing them to be used as + keys in hash tables. Python dictionaries and lists can not. + + - FeatStructs display reentrance in their string representations; + Python dictionaries and lists do not. + + - FeatStructs may *not* be mixed with Python dictionaries and lists + (e.g., when performing unification). + + - FeatStructs provide a number of useful methods, such as ``walk()`` + and ``cyclic()``, which are not available for Python dicts and lists. + +In general, if your feature structures will contain any reentrances, +or if you plan to use them as dictionary keys, it is strongly +recommended that you use full-fledged ``FeatStruct`` objects. +""" +from __future__ import print_function, unicode_literals, division + +import re +import copy +from functools import total_ordering + +from six import integer_types, string_types + +from nltk.internals import read_str, raise_unorderable_types +from nltk.sem.logic import ( + Variable, + Expression, + SubstituteBindingsI, + LogicParser, + LogicalExpressionException, +) +from nltk.compat import python_2_unicode_compatible, unicode_repr + + +###################################################################### +# Feature Structure +###################################################################### + + +@total_ordering +class FeatStruct(SubstituteBindingsI): + """ + A mapping from feature identifiers to feature values, where each + feature value is either a basic value (such as a string or an + integer), or a nested feature structure. There are two types of + feature structure: + + - feature dictionaries, implemented by ``FeatDict``, act like + Python dictionaries. Feature identifiers may be strings or + instances of the ``Feature`` class. + - feature lists, implemented by ``FeatList``, act like Python + lists. Feature identifiers are integers. + + Feature structures may be indexed using either simple feature + identifiers or 'feature paths.' A feature path is a sequence + of feature identifiers that stand for a corresponding sequence of + indexing operations. In particular, ``fstruct[(f1,f2,...,fn)]`` is + equivalent to ``fstruct[f1][f2]...[fn]``. + + Feature structures may contain reentrant feature structures. A + "reentrant feature structure" is a single feature structure + object that can be accessed via multiple feature paths. Feature + structures may also be cyclic. A feature structure is "cyclic" + if there is any feature path from the feature structure to itself. + + Two feature structures are considered equal if they assign the + same values to all features, and have the same reentrancies. + + By default, feature structures are mutable. They may be made + immutable with the ``freeze()`` method. Once they have been + frozen, they may be hashed, and thus used as dictionary keys. + """ + + _frozen = False + """:ivar: A flag indicating whether this feature structure is + frozen or not. Once this flag is set, it should never be + un-set; and no further modification should be made to this + feature structue.""" + + ##//////////////////////////////////////////////////////////// + # { Constructor + ##//////////////////////////////////////////////////////////// + + def __new__(cls, features=None, **morefeatures): + """ + Construct and return a new feature structure. If this + constructor is called directly, then the returned feature + structure will be an instance of either the ``FeatDict`` class + or the ``FeatList`` class. + + :param features: The initial feature values for this feature + structure: + - FeatStruct(string) -> FeatStructReader().read(string) + - FeatStruct(mapping) -> FeatDict(mapping) + - FeatStruct(sequence) -> FeatList(sequence) + - FeatStruct() -> FeatDict() + :param morefeatures: If ``features`` is a mapping or None, + then ``morefeatures`` provides additional features for the + ``FeatDict`` constructor. + """ + # If the FeatStruct constructor is called directly, then decide + # whether to create a FeatDict or a FeatList, based on the + # contents of the `features` argument. + if cls is FeatStruct: + if features is None: + return FeatDict.__new__(FeatDict, **morefeatures) + elif _is_mapping(features): + return FeatDict.__new__(FeatDict, features, **morefeatures) + elif morefeatures: + raise TypeError( + 'Keyword arguments may only be specified ' + 'if features is None or is a mapping.' + ) + if isinstance(features, string_types): + if FeatStructReader._START_FDICT_RE.match(features): + return FeatDict.__new__(FeatDict, features, **morefeatures) + else: + return FeatList.__new__(FeatList, features, **morefeatures) + elif _is_sequence(features): + return FeatList.__new__(FeatList, features) + else: + raise TypeError('Expected string or mapping or sequence') + + # Otherwise, construct the object as normal. + else: + return super(FeatStruct, cls).__new__(cls, features, **morefeatures) + + ##//////////////////////////////////////////////////////////// + # { Uniform Accessor Methods + ##//////////////////////////////////////////////////////////// + # These helper functions allow the methods defined by FeatStruct + # to treat all feature structures as mappings, even if they're + # really lists. (Lists are treated as mappings from ints to vals) + + def _keys(self): + """Return an iterable of the feature identifiers used by this + FeatStruct.""" + raise NotImplementedError() # Implemented by subclasses. + + def _values(self): + """Return an iterable of the feature values directly defined + by this FeatStruct.""" + raise NotImplementedError() # Implemented by subclasses. + + def _items(self): + """Return an iterable of (fid,fval) pairs, where fid is a + feature identifier and fval is the corresponding feature + value, for all features defined by this FeatStruct.""" + raise NotImplementedError() # Implemented by subclasses. + + ##//////////////////////////////////////////////////////////// + # { Equality & Hashing + ##//////////////////////////////////////////////////////////// + + def equal_values(self, other, check_reentrance=False): + """ + Return True if ``self`` and ``other`` assign the same value to + to every feature. In particular, return true if + ``self[p]==other[p]`` for every feature path *p* such + that ``self[p]`` or ``other[p]`` is a base value (i.e., + not a nested feature structure). + + :param check_reentrance: If True, then also return False if + there is any difference between the reentrances of ``self`` + and ``other``. + :note: the ``==`` is equivalent to ``equal_values()`` with + ``check_reentrance=True``. + """ + return self._equal(other, check_reentrance, set(), set(), set()) + + def __eq__(self, other): + """ + Return true if ``self`` and ``other`` are both feature structures, + assign the same values to all features, and contain the same + reentrances. I.e., return + ``self.equal_values(other, check_reentrance=True)``. + + :see: ``equal_values()`` + """ + return self._equal(other, True, set(), set(), set()) + + def __ne__(self, other): + return not self == other + + def __lt__(self, other): + if not isinstance(other, FeatStruct): + # raise_unorderable_types("<", self, other) + # Sometimes feature values can be pure strings, + # so we need to be able to compare with non-featstructs: + return self.__class__.__name__ < other.__class__.__name__ + else: + return len(self) < len(other) + + def __hash__(self): + """ + If this feature structure is frozen, return its hash value; + otherwise, raise ``TypeError``. + """ + if not self._frozen: + raise TypeError('FeatStructs must be frozen before they ' 'can be hashed.') + try: + return self._hash + except AttributeError: + self._hash = self._calculate_hashvalue(set()) + return self._hash + + def _equal( + self, other, check_reentrance, visited_self, visited_other, visited_pairs + ): + """ + Return True iff self and other have equal values. + + :param visited_self: A set containing the ids of all ``self`` + feature structures we've already visited. + :param visited_other: A set containing the ids of all ``other`` + feature structures we've already visited. + :param visited_pairs: A set containing ``(selfid, otherid)`` pairs + for all pairs of feature structures we've already visited. + """ + # If we're the same object, then we're equal. + if self is other: + return True + + # If we have different classes, we're definitely not equal. + if self.__class__ != other.__class__: + return False + + # If we define different features, we're definitely not equal. + # (Perform len test first because it's faster -- we should + # do profiling to see if this actually helps) + if len(self) != len(other): + return False + if set(self._keys()) != set(other._keys()): + return False + + # If we're checking reentrance, then any time we revisit a + # structure, make sure that it was paired with the same + # feature structure that it is now. Note: if check_reentrance, + # then visited_pairs will never contain two pairs whose first + # values are equal, or two pairs whose second values are equal. + if check_reentrance: + if id(self) in visited_self or id(other) in visited_other: + return (id(self), id(other)) in visited_pairs + + # If we're not checking reentrance, then we still need to deal + # with cycles. If we encounter the same (self, other) pair a + # second time, then we won't learn anything more by examining + # their children a second time, so just return true. + else: + if (id(self), id(other)) in visited_pairs: + return True + + # Keep track of which nodes we've visited. + visited_self.add(id(self)) + visited_other.add(id(other)) + visited_pairs.add((id(self), id(other))) + + # Now we have to check all values. If any of them don't match, + # then return false. + for (fname, self_fval) in self._items(): + other_fval = other[fname] + if isinstance(self_fval, FeatStruct): + if not self_fval._equal( + other_fval, + check_reentrance, + visited_self, + visited_other, + visited_pairs, + ): + return False + else: + if self_fval != other_fval: + return False + + # Everything matched up; return true. + return True + + def _calculate_hashvalue(self, visited): + """ + Return a hash value for this feature structure. + + :require: ``self`` must be frozen. + :param visited: A set containing the ids of all feature + structures we've already visited while hashing. + """ + if id(self) in visited: + return 1 + visited.add(id(self)) + + hashval = 5831 + for (fname, fval) in sorted(self._items()): + hashval *= 37 + hashval += hash(fname) + hashval *= 37 + if isinstance(fval, FeatStruct): + hashval += fval._calculate_hashvalue(visited) + else: + hashval += hash(fval) + # Convert to a 32 bit int. + hashval = int(hashval & 0x7FFFFFFF) + return hashval + + ##//////////////////////////////////////////////////////////// + # { Freezing + ##//////////////////////////////////////////////////////////// + + #: Error message used by mutating methods when called on a frozen + #: feature structure. + _FROZEN_ERROR = "Frozen FeatStructs may not be modified." + + def freeze(self): + """ + Make this feature structure, and any feature structures it + contains, immutable. Note: this method does not attempt to + 'freeze' any feature value that is not a ``FeatStruct``; it + is recommended that you use only immutable feature values. + """ + if self._frozen: + return + self._freeze(set()) + + def frozen(self): + """ + Return True if this feature structure is immutable. Feature + structures can be made immutable with the ``freeze()`` method. + Immutable feature structures may not be made mutable again, + but new mutable copies can be produced with the ``copy()`` method. + """ + return self._frozen + + def _freeze(self, visited): + """ + Make this feature structure, and any feature structure it + contains, immutable. + + :param visited: A set containing the ids of all feature + structures we've already visited while freezing. + """ + if id(self) in visited: + return + visited.add(id(self)) + self._frozen = True + for (fname, fval) in sorted(self._items()): + if isinstance(fval, FeatStruct): + fval._freeze(visited) + + ##//////////////////////////////////////////////////////////// + # { Copying + ##//////////////////////////////////////////////////////////// + + def copy(self, deep=True): + """ + Return a new copy of ``self``. The new copy will not be frozen. + + :param deep: If true, create a deep copy; if false, create + a shallow copy. + """ + if deep: + return copy.deepcopy(self) + else: + return self.__class__(self) + + # Subclasses should define __deepcopy__ to ensure that the new + # copy will not be frozen. + def __deepcopy__(self, memo): + raise NotImplementedError() # Implemented by subclasses. + + ##//////////////////////////////////////////////////////////// + # { Structural Information + ##//////////////////////////////////////////////////////////// + + def cyclic(self): + """ + Return True if this feature structure contains itself. + """ + return self._find_reentrances({})[id(self)] + + def walk(self): + """ + Return an iterator that generates this feature structure, and + each feature structure it contains. Each feature structure will + be generated exactly once. + """ + return self._walk(set()) + + def _walk(self, visited): + """ + Return an iterator that generates this feature structure, and + each feature structure it contains. + + :param visited: A set containing the ids of all feature + structures we've already visited while freezing. + """ + raise NotImplementedError() # Implemented by subclasses. + + def _walk(self, visited): + if id(self) in visited: + return + visited.add(id(self)) + yield self + for fval in self._values(): + if isinstance(fval, FeatStruct): + for elt in fval._walk(visited): + yield elt + + # Walk through the feature tree. The first time we see a feature + # value, map it to False (not reentrant). If we see a feature + # value more than once, then map it to True (reentrant). + def _find_reentrances(self, reentrances): + """ + Return a dictionary that maps from the ``id`` of each feature + structure contained in ``self`` (including ``self``) to a + boolean value, indicating whether it is reentrant or not. + """ + if id(self) in reentrances: + # We've seen it more than once. + reentrances[id(self)] = True + else: + # This is the first time we've seen it. + reentrances[id(self)] = False + + # Recurse to contained feature structures. + for fval in self._values(): + if isinstance(fval, FeatStruct): + fval._find_reentrances(reentrances) + + return reentrances + + ##//////////////////////////////////////////////////////////// + # { Variables & Bindings + ##//////////////////////////////////////////////////////////// + + def substitute_bindings(self, bindings): + """:see: ``nltk.featstruct.substitute_bindings()``""" + return substitute_bindings(self, bindings) + + def retract_bindings(self, bindings): + """:see: ``nltk.featstruct.retract_bindings()``""" + return retract_bindings(self, bindings) + + def variables(self): + """:see: ``nltk.featstruct.find_variables()``""" + return find_variables(self) + + def rename_variables(self, vars=None, used_vars=(), new_vars=None): + """:see: ``nltk.featstruct.rename_variables()``""" + return rename_variables(self, vars, used_vars, new_vars) + + def remove_variables(self): + """ + Return the feature structure that is obtained by deleting + any feature whose value is a ``Variable``. + + :rtype: FeatStruct + """ + return remove_variables(self) + + ##//////////////////////////////////////////////////////////// + # { Unification + ##//////////////////////////////////////////////////////////// + + def unify(self, other, bindings=None, trace=False, fail=None, rename_vars=True): + return unify(self, other, bindings, trace, fail, rename_vars) + + def subsumes(self, other): + """ + Return True if ``self`` subsumes ``other``. I.e., return true + If unifying ``self`` with ``other`` would result in a feature + structure equal to ``other``. + """ + return subsumes(self, other) + + ##//////////////////////////////////////////////////////////// + # { String Representations + ##//////////////////////////////////////////////////////////// + + def __repr__(self): + """ + Display a single-line representation of this feature structure, + suitable for embedding in other representations. + """ + return self._repr(self._find_reentrances({}), {}) + + def _repr(self, reentrances, reentrance_ids): + """ + Return a string representation of this feature structure. + + :param reentrances: A dictionary that maps from the ``id`` of + each feature value in self, indicating whether that value + is reentrant or not. + :param reentrance_ids: A dictionary mapping from each ``id`` + of a feature value to a unique identifier. This is modified + by ``repr``: the first time a reentrant feature value is + displayed, an identifier is added to ``reentrance_ids`` for it. + """ + raise NotImplementedError() + + +# Mutation: disable if frozen. +_FROZEN_ERROR = "Frozen FeatStructs may not be modified." +_FROZEN_NOTICE = "\n%sIf self is frozen, raise ValueError." + + +def _check_frozen(method, indent=''): + """ + Given a method function, return a new method function that first + checks if ``self._frozen`` is true; and if so, raises ``ValueError`` + with an appropriate message. Otherwise, call the method and return + its result. + """ + + def wrapped(self, *args, **kwargs): + if self._frozen: + raise ValueError(_FROZEN_ERROR) + else: + return method(self, *args, **kwargs) + + wrapped.__name__ = method.__name__ + wrapped.__doc__ = (method.__doc__ or '') + (_FROZEN_NOTICE % indent) + return wrapped + + +###################################################################### +# Feature Dictionary +###################################################################### + + +@python_2_unicode_compatible +class FeatDict(FeatStruct, dict): + """ + A feature structure that acts like a Python dictionary. I.e., a + mapping from feature identifiers to feature values, where a feature + identifier can be a string or a ``Feature``; and where a feature value + can be either a basic value (such as a string or an integer), or a nested + feature structure. A feature identifiers for a ``FeatDict`` is + sometimes called a "feature name". + + Two feature dicts are considered equal if they assign the same + values to all features, and have the same reentrances. + + :see: ``FeatStruct`` for information about feature paths, reentrance, + cyclic feature structures, mutability, freezing, and hashing. + """ + + def __init__(self, features=None, **morefeatures): + """ + Create a new feature dictionary, with the specified features. + + :param features: The initial value for this feature + dictionary. If ``features`` is a ``FeatStruct``, then its + features are copied (shallow copy). If ``features`` is a + dict, then a feature is created for each item, mapping its + key to its value. If ``features`` is a string, then it is + processed using ``FeatStructReader``. If ``features`` is a list of + tuples ``(name, val)``, then a feature is created for each tuple. + :param morefeatures: Additional features for the new feature + dictionary. If a feature is listed under both ``features`` and + ``morefeatures``, then the value from ``morefeatures`` will be + used. + """ + if isinstance(features, string_types): + FeatStructReader().fromstring(features, self) + self.update(**morefeatures) + else: + # update() checks the types of features. + self.update(features, **morefeatures) + + # //////////////////////////////////////////////////////////// + # { Dict methods + # //////////////////////////////////////////////////////////// + _INDEX_ERROR = str("Expected feature name or path. Got %r.") + + def __getitem__(self, name_or_path): + """If the feature with the given name or path exists, return + its value; otherwise, raise ``KeyError``.""" + if isinstance(name_or_path, (string_types, Feature)): + return dict.__getitem__(self, name_or_path) + elif isinstance(name_or_path, tuple): + try: + val = self + for fid in name_or_path: + if not isinstance(val, FeatStruct): + raise KeyError # path contains base value + val = val[fid] + return val + except (KeyError, IndexError): + raise KeyError(name_or_path) + else: + raise TypeError(self._INDEX_ERROR % name_or_path) + + def get(self, name_or_path, default=None): + """If the feature with the given name or path exists, return its + value; otherwise, return ``default``.""" + try: + return self[name_or_path] + except KeyError: + return default + + def __contains__(self, name_or_path): + """Return true if a feature with the given name or path exists.""" + try: + self[name_or_path] + return True + except KeyError: + return False + + def has_key(self, name_or_path): + """Return true if a feature with the given name or path exists.""" + return name_or_path in self + + def __delitem__(self, name_or_path): + """If the feature with the given name or path exists, delete + its value; otherwise, raise ``KeyError``.""" + if self._frozen: + raise ValueError(_FROZEN_ERROR) + if isinstance(name_or_path, (string_types, Feature)): + return dict.__delitem__(self, name_or_path) + elif isinstance(name_or_path, tuple): + if len(name_or_path) == 0: + raise ValueError("The path () can not be set") + else: + parent = self[name_or_path[:-1]] + if not isinstance(parent, FeatStruct): + raise KeyError(name_or_path) # path contains base value + del parent[name_or_path[-1]] + else: + raise TypeError(self._INDEX_ERROR % name_or_path) + + def __setitem__(self, name_or_path, value): + """Set the value for the feature with the given name or path + to ``value``. If ``name_or_path`` is an invalid path, raise + ``KeyError``.""" + if self._frozen: + raise ValueError(_FROZEN_ERROR) + if isinstance(name_or_path, (string_types, Feature)): + return dict.__setitem__(self, name_or_path, value) + elif isinstance(name_or_path, tuple): + if len(name_or_path) == 0: + raise ValueError("The path () can not be set") + else: + parent = self[name_or_path[:-1]] + if not isinstance(parent, FeatStruct): + raise KeyError(name_or_path) # path contains base value + parent[name_or_path[-1]] = value + else: + raise TypeError(self._INDEX_ERROR % name_or_path) + + clear = _check_frozen(dict.clear) + pop = _check_frozen(dict.pop) + popitem = _check_frozen(dict.popitem) + setdefault = _check_frozen(dict.setdefault) + + def update(self, features=None, **morefeatures): + if self._frozen: + raise ValueError(_FROZEN_ERROR) + if features is None: + items = () + elif hasattr(features, 'items') and callable(features.items): + items = features.items() + elif hasattr(features, '__iter__'): + items = features + else: + raise ValueError('Expected mapping or list of tuples') + + for key, val in items: + if not isinstance(key, (string_types, Feature)): + raise TypeError('Feature names must be strings') + self[key] = val + for key, val in morefeatures.items(): + if not isinstance(key, (string_types, Feature)): + raise TypeError('Feature names must be strings') + self[key] = val + + ##//////////////////////////////////////////////////////////// + # { Copying + ##//////////////////////////////////////////////////////////// + + def __deepcopy__(self, memo): + memo[id(self)] = selfcopy = self.__class__() + for (key, val) in self._items(): + selfcopy[copy.deepcopy(key, memo)] = copy.deepcopy(val, memo) + return selfcopy + + ##//////////////////////////////////////////////////////////// + # { Uniform Accessor Methods + ##//////////////////////////////////////////////////////////// + + def _keys(self): + return self.keys() + + def _values(self): + return self.values() + + def _items(self): + return self.items() + + ##//////////////////////////////////////////////////////////// + # { String Representations + ##//////////////////////////////////////////////////////////// + + def __str__(self): + """ + Display a multi-line representation of this feature dictionary + as an FVM (feature value matrix). + """ + return '\n'.join(self._str(self._find_reentrances({}), {})) + + def _repr(self, reentrances, reentrance_ids): + segments = [] + prefix = '' + suffix = '' + + # If this is the first time we've seen a reentrant structure, + # then assign it a unique identifier. + if reentrances[id(self)]: + assert id(self) not in reentrance_ids + reentrance_ids[id(self)] = repr(len(reentrance_ids) + 1) + + # sorting note: keys are unique strings, so we'll never fall + # through to comparing values. + for (fname, fval) in sorted(self.items()): + display = getattr(fname, 'display', None) + if id(fval) in reentrance_ids: + segments.append('%s->(%s)' % (fname, reentrance_ids[id(fval)])) + elif ( + display == 'prefix' + and not prefix + and isinstance(fval, (Variable, string_types)) + ): + prefix = '%s' % fval + elif display == 'slash' and not suffix: + if isinstance(fval, Variable): + suffix = '/%s' % fval.name + else: + suffix = '/%s' % unicode_repr(fval) + elif isinstance(fval, Variable): + segments.append('%s=%s' % (fname, fval.name)) + elif fval is True: + segments.append('+%s' % fname) + elif fval is False: + segments.append('-%s' % fname) + elif isinstance(fval, Expression): + segments.append('%s=<%s>' % (fname, fval)) + elif not isinstance(fval, FeatStruct): + segments.append('%s=%s' % (fname, unicode_repr(fval))) + else: + fval_repr = fval._repr(reentrances, reentrance_ids) + segments.append('%s=%s' % (fname, fval_repr)) + # If it's reentrant, then add on an identifier tag. + if reentrances[id(self)]: + prefix = '(%s)%s' % (reentrance_ids[id(self)], prefix) + return '%s[%s]%s' % (prefix, ', '.join(segments), suffix) + + def _str(self, reentrances, reentrance_ids): + """ + :return: A list of lines composing a string representation of + this feature dictionary. + :param reentrances: A dictionary that maps from the ``id`` of + each feature value in self, indicating whether that value + is reentrant or not. + :param reentrance_ids: A dictionary mapping from each ``id`` + of a feature value to a unique identifier. This is modified + by ``repr``: the first time a reentrant feature value is + displayed, an identifier is added to ``reentrance_ids`` for + it. + """ + # If this is the first time we've seen a reentrant structure, + # then tack on an id string. + if reentrances[id(self)]: + assert id(self) not in reentrance_ids + reentrance_ids[id(self)] = repr(len(reentrance_ids) + 1) + + # Special case: empty feature dict. + if len(self) == 0: + if reentrances[id(self)]: + return ['(%s) []' % reentrance_ids[id(self)]] + else: + return ['[]'] + + # What's the longest feature name? Use this to align names. + maxfnamelen = max(len("%s" % k) for k in self.keys()) + + lines = [] + # sorting note: keys are unique strings, so we'll never fall + # through to comparing values. + for (fname, fval) in sorted(self.items()): + fname = ("%s" % fname).ljust(maxfnamelen) + if isinstance(fval, Variable): + lines.append('%s = %s' % (fname, fval.name)) + + elif isinstance(fval, Expression): + lines.append('%s = <%s>' % (fname, fval)) + + elif isinstance(fval, FeatList): + fval_repr = fval._repr(reentrances, reentrance_ids) + lines.append('%s = %s' % (fname, unicode_repr(fval_repr))) + + elif not isinstance(fval, FeatDict): + # It's not a nested feature structure -- just print it. + lines.append('%s = %s' % (fname, unicode_repr(fval))) + + elif id(fval) in reentrance_ids: + # It's a feature structure we've seen before -- print + # the reentrance id. + lines.append('%s -> (%s)' % (fname, reentrance_ids[id(fval)])) + + else: + # It's a new feature structure. Separate it from + # other values by a blank line. + if lines and lines[-1] != '': + lines.append('') + + # Recursively print the feature's value (fval). + fval_lines = fval._str(reentrances, reentrance_ids) + + # Indent each line to make room for fname. + fval_lines = [(' ' * (maxfnamelen + 3)) + l for l in fval_lines] + + # Pick which line we'll display fname on, & splice it in. + nameline = (len(fval_lines) - 1) // 2 + fval_lines[nameline] = ( + fname + ' =' + fval_lines[nameline][maxfnamelen + 2 :] + ) + + # Add the feature structure to the output. + lines += fval_lines + + # Separate FeatStructs by a blank line. + lines.append('') + + # Get rid of any excess blank lines. + if lines[-1] == '': + lines.pop() + + # Add brackets around everything. + maxlen = max(len(line) for line in lines) + lines = ['[ %s%s ]' % (line, ' ' * (maxlen - len(line))) for line in lines] + + # If it's reentrant, then add on an identifier tag. + if reentrances[id(self)]: + idstr = '(%s) ' % reentrance_ids[id(self)] + lines = [(' ' * len(idstr)) + l for l in lines] + idline = (len(lines) - 1) // 2 + lines[idline] = idstr + lines[idline][len(idstr) :] + + return lines + + +###################################################################### +# Feature List +###################################################################### + + +class FeatList(FeatStruct, list): + """ + A list of feature values, where each feature value is either a + basic value (such as a string or an integer), or a nested feature + structure. + + Feature lists may contain reentrant feature values. A "reentrant + feature value" is a single feature value that can be accessed via + multiple feature paths. Feature lists may also be cyclic. + + Two feature lists are considered equal if they assign the same + values to all features, and have the same reentrances. + + :see: ``FeatStruct`` for information about feature paths, reentrance, + cyclic feature structures, mutability, freezing, and hashing. + """ + + def __init__(self, features=()): + """ + Create a new feature list, with the specified features. + + :param features: The initial list of features for this feature + list. If ``features`` is a string, then it is paresd using + ``FeatStructReader``. Otherwise, it should be a sequence + of basic values and nested feature structures. + """ + if isinstance(features, string_types): + FeatStructReader().fromstring(features, self) + else: + list.__init__(self, features) + + # //////////////////////////////////////////////////////////// + # { List methods + # //////////////////////////////////////////////////////////// + _INDEX_ERROR = "Expected int or feature path. Got %r." + + def __getitem__(self, name_or_path): + if isinstance(name_or_path, integer_types): + return list.__getitem__(self, name_or_path) + elif isinstance(name_or_path, tuple): + try: + val = self + for fid in name_or_path: + if not isinstance(val, FeatStruct): + raise KeyError # path contains base value + val = val[fid] + return val + except (KeyError, IndexError): + raise KeyError(name_or_path) + else: + raise TypeError(self._INDEX_ERROR % name_or_path) + + def __delitem__(self, name_or_path): + """If the feature with the given name or path exists, delete + its value; otherwise, raise ``KeyError``.""" + if self._frozen: + raise ValueError(_FROZEN_ERROR) + if isinstance(name_or_path, (integer_types, slice)): + return list.__delitem__(self, name_or_path) + elif isinstance(name_or_path, tuple): + if len(name_or_path) == 0: + raise ValueError("The path () can not be set") + else: + parent = self[name_or_path[:-1]] + if not isinstance(parent, FeatStruct): + raise KeyError(name_or_path) # path contains base value + del parent[name_or_path[-1]] + else: + raise TypeError(self._INDEX_ERROR % name_or_path) + + def __setitem__(self, name_or_path, value): + """Set the value for the feature with the given name or path + to ``value``. If ``name_or_path`` is an invalid path, raise + ``KeyError``.""" + if self._frozen: + raise ValueError(_FROZEN_ERROR) + if isinstance(name_or_path, (integer_types, slice)): + return list.__setitem__(self, name_or_path, value) + elif isinstance(name_or_path, tuple): + if len(name_or_path) == 0: + raise ValueError("The path () can not be set") + else: + parent = self[name_or_path[:-1]] + if not isinstance(parent, FeatStruct): + raise KeyError(name_or_path) # path contains base value + parent[name_or_path[-1]] = value + else: + raise TypeError(self._INDEX_ERROR % name_or_path) + + # __delslice__ = _check_frozen(list.__delslice__, ' ') + # __setslice__ = _check_frozen(list.__setslice__, ' ') + __iadd__ = _check_frozen(list.__iadd__) + __imul__ = _check_frozen(list.__imul__) + append = _check_frozen(list.append) + extend = _check_frozen(list.extend) + insert = _check_frozen(list.insert) + pop = _check_frozen(list.pop) + remove = _check_frozen(list.remove) + reverse = _check_frozen(list.reverse) + sort = _check_frozen(list.sort) + + ##//////////////////////////////////////////////////////////// + # { Copying + ##//////////////////////////////////////////////////////////// + + def __deepcopy__(self, memo): + memo[id(self)] = selfcopy = self.__class__() + selfcopy.extend(copy.deepcopy(fval, memo) for fval in self) + return selfcopy + + ##//////////////////////////////////////////////////////////// + # { Uniform Accessor Methods + ##//////////////////////////////////////////////////////////// + + def _keys(self): + return list(range(len(self))) + + def _values(self): + return self + + def _items(self): + return enumerate(self) + + ##//////////////////////////////////////////////////////////// + # { String Representations + ##//////////////////////////////////////////////////////////// + + # Special handling for: reentrances, variables, expressions. + def _repr(self, reentrances, reentrance_ids): + # If this is the first time we've seen a reentrant structure, + # then assign it a unique identifier. + if reentrances[id(self)]: + assert id(self) not in reentrance_ids + reentrance_ids[id(self)] = repr(len(reentrance_ids) + 1) + prefix = '(%s)' % reentrance_ids[id(self)] + else: + prefix = '' + + segments = [] + for fval in self: + if id(fval) in reentrance_ids: + segments.append('->(%s)' % reentrance_ids[id(fval)]) + elif isinstance(fval, Variable): + segments.append(fval.name) + elif isinstance(fval, Expression): + segments.append('%s' % fval) + elif isinstance(fval, FeatStruct): + segments.append(fval._repr(reentrances, reentrance_ids)) + else: + segments.append('%s' % unicode_repr(fval)) + + return '%s[%s]' % (prefix, ', '.join(segments)) + + +###################################################################### +# Variables & Bindings +###################################################################### + + +def substitute_bindings(fstruct, bindings, fs_class='default'): + """ + Return the feature structure that is obtained by replacing each + variable bound by ``bindings`` with its binding. If a variable is + aliased to a bound variable, then it will be replaced by that + variable's value. If a variable is aliased to an unbound + variable, then it will be replaced by that variable. + + :type bindings: dict(Variable -> any) + :param bindings: A dictionary mapping from variables to values. + """ + if fs_class == 'default': + fs_class = _default_fs_class(fstruct) + fstruct = copy.deepcopy(fstruct) + _substitute_bindings(fstruct, bindings, fs_class, set()) + return fstruct + + +def _substitute_bindings(fstruct, bindings, fs_class, visited): + # Visit each node only once: + if id(fstruct) in visited: + return + visited.add(id(fstruct)) + + if _is_mapping(fstruct): + items = fstruct.items() + elif _is_sequence(fstruct): + items = enumerate(fstruct) + else: + raise ValueError('Expected mapping or sequence') + for (fname, fval) in items: + while isinstance(fval, Variable) and fval in bindings: + fval = fstruct[fname] = bindings[fval] + if isinstance(fval, fs_class): + _substitute_bindings(fval, bindings, fs_class, visited) + elif isinstance(fval, SubstituteBindingsI): + fstruct[fname] = fval.substitute_bindings(bindings) + + +def retract_bindings(fstruct, bindings, fs_class='default'): + """ + Return the feature structure that is obtained by replacing each + feature structure value that is bound by ``bindings`` with the + variable that binds it. A feature structure value must be + identical to a bound value (i.e., have equal id) to be replaced. + + ``bindings`` is modified to point to this new feature structure, + rather than the original feature structure. Feature structure + values in ``bindings`` may be modified if they are contained in + ``fstruct``. + """ + if fs_class == 'default': + fs_class = _default_fs_class(fstruct) + (fstruct, new_bindings) = copy.deepcopy((fstruct, bindings)) + bindings.update(new_bindings) + inv_bindings = dict((id(val), var) for (var, val) in bindings.items()) + _retract_bindings(fstruct, inv_bindings, fs_class, set()) + return fstruct + + +def _retract_bindings(fstruct, inv_bindings, fs_class, visited): + # Visit each node only once: + if id(fstruct) in visited: + return + visited.add(id(fstruct)) + + if _is_mapping(fstruct): + items = fstruct.items() + elif _is_sequence(fstruct): + items = enumerate(fstruct) + else: + raise ValueError('Expected mapping or sequence') + for (fname, fval) in items: + if isinstance(fval, fs_class): + if id(fval) in inv_bindings: + fstruct[fname] = inv_bindings[id(fval)] + _retract_bindings(fval, inv_bindings, fs_class, visited) + + +def find_variables(fstruct, fs_class='default'): + """ + :return: The set of variables used by this feature structure. + :rtype: set(Variable) + """ + if fs_class == 'default': + fs_class = _default_fs_class(fstruct) + return _variables(fstruct, set(), fs_class, set()) + + +def _variables(fstruct, vars, fs_class, visited): + # Visit each node only once: + if id(fstruct) in visited: + return + visited.add(id(fstruct)) + if _is_mapping(fstruct): + items = fstruct.items() + elif _is_sequence(fstruct): + items = enumerate(fstruct) + else: + raise ValueError('Expected mapping or sequence') + for (fname, fval) in items: + if isinstance(fval, Variable): + vars.add(fval) + elif isinstance(fval, fs_class): + _variables(fval, vars, fs_class, visited) + elif isinstance(fval, SubstituteBindingsI): + vars.update(fval.variables()) + return vars + + +def rename_variables( + fstruct, vars=None, used_vars=(), new_vars=None, fs_class='default' +): + """ + Return the feature structure that is obtained by replacing + any of this feature structure's variables that are in ``vars`` + with new variables. The names for these new variables will be + names that are not used by any variable in ``vars``, or in + ``used_vars``, or in this feature structure. + + :type vars: set + :param vars: The set of variables that should be renamed. + If not specified, ``find_variables(fstruct)`` is used; i.e., all + variables will be given new names. + :type used_vars: set + :param used_vars: A set of variables whose names should not be + used by the new variables. + :type new_vars: dict(Variable -> Variable) + :param new_vars: A dictionary that is used to hold the mapping + from old variables to new variables. For each variable *v* + in this feature structure: + + - If ``new_vars`` maps *v* to *v'*, then *v* will be + replaced by *v'*. + - If ``new_vars`` does not contain *v*, but ``vars`` + does contain *v*, then a new entry will be added to + ``new_vars``, mapping *v* to the new variable that is used + to replace it. + + To consistently rename the variables in a set of feature + structures, simply apply rename_variables to each one, using + the same dictionary: + + >>> from nltk.featstruct import FeatStruct + >>> fstruct1 = FeatStruct('[subj=[agr=[gender=?y]], obj=[agr=[gender=?y]]]') + >>> fstruct2 = FeatStruct('[subj=[agr=[number=?z,gender=?y]], obj=[agr=[number=?z,gender=?y]]]') + >>> new_vars = {} # Maps old vars to alpha-renamed vars + >>> fstruct1.rename_variables(new_vars=new_vars) + [obj=[agr=[gender=?y2]], subj=[agr=[gender=?y2]]] + >>> fstruct2.rename_variables(new_vars=new_vars) + [obj=[agr=[gender=?y2, number=?z2]], subj=[agr=[gender=?y2, number=?z2]]] + + If new_vars is not specified, then an empty dictionary is used. + """ + if fs_class == 'default': + fs_class = _default_fs_class(fstruct) + + # Default values: + if new_vars is None: + new_vars = {} + if vars is None: + vars = find_variables(fstruct, fs_class) + else: + vars = set(vars) + + # Add our own variables to used_vars. + used_vars = find_variables(fstruct, fs_class).union(used_vars) + + # Copy ourselves, and rename variables in the copy. + return _rename_variables( + copy.deepcopy(fstruct), vars, used_vars, new_vars, fs_class, set() + ) + + +def _rename_variables(fstruct, vars, used_vars, new_vars, fs_class, visited): + if id(fstruct) in visited: + return + visited.add(id(fstruct)) + if _is_mapping(fstruct): + items = fstruct.items() + elif _is_sequence(fstruct): + items = enumerate(fstruct) + else: + raise ValueError('Expected mapping or sequence') + for (fname, fval) in items: + if isinstance(fval, Variable): + # If it's in new_vars, then rebind it. + if fval in new_vars: + fstruct[fname] = new_vars[fval] + # If it's in vars, pick a new name for it. + elif fval in vars: + new_vars[fval] = _rename_variable(fval, used_vars) + fstruct[fname] = new_vars[fval] + used_vars.add(new_vars[fval]) + elif isinstance(fval, fs_class): + _rename_variables(fval, vars, used_vars, new_vars, fs_class, visited) + elif isinstance(fval, SubstituteBindingsI): + # Pick new names for any variables in `vars` + for var in fval.variables(): + if var in vars and var not in new_vars: + new_vars[var] = _rename_variable(var, used_vars) + used_vars.add(new_vars[var]) + # Replace all variables in `new_vars`. + fstruct[fname] = fval.substitute_bindings(new_vars) + return fstruct + + +def _rename_variable(var, used_vars): + name, n = re.sub('\d+$', '', var.name), 2 + if not name: + name = '?' + while Variable('%s%s' % (name, n)) in used_vars: + n += 1 + return Variable('%s%s' % (name, n)) + + +def remove_variables(fstruct, fs_class='default'): + """ + :rtype: FeatStruct + :return: The feature structure that is obtained by deleting + all features whose values are ``Variables``. + """ + if fs_class == 'default': + fs_class = _default_fs_class(fstruct) + return _remove_variables(copy.deepcopy(fstruct), fs_class, set()) + + +def _remove_variables(fstruct, fs_class, visited): + if id(fstruct) in visited: + return + visited.add(id(fstruct)) + + if _is_mapping(fstruct): + items = list(fstruct.items()) + elif _is_sequence(fstruct): + items = list(enumerate(fstruct)) + else: + raise ValueError('Expected mapping or sequence') + + for (fname, fval) in items: + if isinstance(fval, Variable): + del fstruct[fname] + elif isinstance(fval, fs_class): + _remove_variables(fval, fs_class, visited) + return fstruct + + +###################################################################### +# Unification +###################################################################### + + +@python_2_unicode_compatible +class _UnificationFailure(object): + def __repr__(self): + return 'nltk.featstruct.UnificationFailure' + + +UnificationFailure = _UnificationFailure() +"""A unique value used to indicate unification failure. It can be + returned by ``Feature.unify_base_values()`` or by custom ``fail()`` + functions to indicate that unificaiton should fail.""" + + +# The basic unification algorithm: +# 1. Make copies of self and other (preserving reentrance) +# 2. Destructively unify self and other +# 3. Apply forward pointers, to preserve reentrance. +# 4. Replace bound variables with their values. +def unify( + fstruct1, + fstruct2, + bindings=None, + trace=False, + fail=None, + rename_vars=True, + fs_class='default', +): + """ + Unify ``fstruct1`` with ``fstruct2``, and return the resulting feature + structure. This unified feature structure is the minimal + feature structure that contains all feature value assignments from both + ``fstruct1`` and ``fstruct2``, and that preserves all reentrancies. + + If no such feature structure exists (because ``fstruct1`` and + ``fstruct2`` specify incompatible values for some feature), then + unification fails, and ``unify`` returns None. + + Bound variables are replaced by their values. Aliased + variables are replaced by their representative variable + (if unbound) or the value of their representative variable + (if bound). I.e., if variable *v* is in ``bindings``, + then *v* is replaced by ``bindings[v]``. This will + be repeated until the variable is replaced by an unbound + variable or a non-variable value. + + Unbound variables are bound when they are unified with + values; and aliased when they are unified with variables. + I.e., if variable *v* is not in ``bindings``, and is + unified with a variable or value *x*, then + ``bindings[v]`` is set to *x*. + + If ``bindings`` is unspecified, then all variables are + assumed to be unbound. I.e., ``bindings`` defaults to an + empty dict. + + >>> from nltk.featstruct import FeatStruct + >>> FeatStruct('[a=?x]').unify(FeatStruct('[b=?x]')) + [a=?x, b=?x2] + + :type bindings: dict(Variable -> any) + :param bindings: A set of variable bindings to be used and + updated during unification. + :type trace: bool + :param trace: If true, generate trace output. + :type rename_vars: bool + :param rename_vars: If True, then rename any variables in + ``fstruct2`` that are also used in ``fstruct1``, in order to + avoid collisions on variable names. + """ + # Decide which class(es) will be treated as feature structures, + # for the purposes of unification. + if fs_class == 'default': + fs_class = _default_fs_class(fstruct1) + if _default_fs_class(fstruct2) != fs_class: + raise ValueError( + "Mixing FeatStruct objects with Python " + "dicts and lists is not supported." + ) + assert isinstance(fstruct1, fs_class) + assert isinstance(fstruct2, fs_class) + + # If bindings are unspecified, use an empty set of bindings. + user_bindings = bindings is not None + if bindings is None: + bindings = {} + + # Make copies of fstruct1 and fstruct2 (since the unification + # algorithm is destructive). Do it all at once, to preserve + # reentrance links between fstruct1 and fstruct2. Copy bindings + # as well, in case there are any bound vars that contain parts + # of fstruct1 or fstruct2. + (fstruct1copy, fstruct2copy, bindings_copy) = copy.deepcopy( + (fstruct1, fstruct2, bindings) + ) + + # Copy the bindings back to the original bindings dict. + bindings.update(bindings_copy) + + if rename_vars: + vars1 = find_variables(fstruct1copy, fs_class) + vars2 = find_variables(fstruct2copy, fs_class) + _rename_variables(fstruct2copy, vars1, vars2, {}, fs_class, set()) + + # Do the actual unification. If it fails, return None. + forward = {} + if trace: + _trace_unify_start((), fstruct1copy, fstruct2copy) + try: + result = _destructively_unify( + fstruct1copy, fstruct2copy, bindings, forward, trace, fail, fs_class, () + ) + except _UnificationFailureError: + return None + + # _destructively_unify might return UnificationFailure, e.g. if we + # tried to unify a mapping with a sequence. + if result is UnificationFailure: + if fail is None: + return None + else: + return fail(fstruct1copy, fstruct2copy, ()) + + # Replace any feature structure that has a forward pointer + # with the target of its forward pointer. + result = _apply_forwards(result, forward, fs_class, set()) + if user_bindings: + _apply_forwards_to_bindings(forward, bindings) + + # Replace bound vars with values. + _resolve_aliases(bindings) + _substitute_bindings(result, bindings, fs_class, set()) + + # Return the result. + if trace: + _trace_unify_succeed((), result) + if trace: + _trace_bindings((), bindings) + return result + + +class _UnificationFailureError(Exception): + """An exception that is used by ``_destructively_unify`` to abort + unification when a failure is encountered.""" + + +def _destructively_unify( + fstruct1, fstruct2, bindings, forward, trace, fail, fs_class, path +): + """ + Attempt to unify ``fstruct1`` and ``fstruct2`` by modifying them + in-place. If the unification succeeds, then ``fstruct1`` will + contain the unified value, the value of ``fstruct2`` is undefined, + and forward[id(fstruct2)] is set to fstruct1. If the unification + fails, then a _UnificationFailureError is raised, and the + values of ``fstruct1`` and ``fstruct2`` are undefined. + + :param bindings: A dictionary mapping variables to values. + :param forward: A dictionary mapping feature structures ids + to replacement structures. When two feature structures + are merged, a mapping from one to the other will be added + to the forward dictionary; and changes will be made only + to the target of the forward dictionary. + ``_destructively_unify`` will always 'follow' any links + in the forward dictionary for fstruct1 and fstruct2 before + actually unifying them. + :param trace: If true, generate trace output + :param path: The feature path that led us to this unification + step. Used for trace output. + """ + # If fstruct1 is already identical to fstruct2, we're done. + # Note: this, together with the forward pointers, ensures + # that unification will terminate even for cyclic structures. + if fstruct1 is fstruct2: + if trace: + _trace_unify_identity(path, fstruct1) + return fstruct1 + + # Set fstruct2's forward pointer to point to fstruct1; this makes + # fstruct1 the canonical copy for fstruct2. Note that we need to + # do this before we recurse into any child structures, in case + # they're cyclic. + forward[id(fstruct2)] = fstruct1 + + # Unifying two mappings: + if _is_mapping(fstruct1) and _is_mapping(fstruct2): + for fname in fstruct1: + if getattr(fname, 'default', None) is not None: + fstruct2.setdefault(fname, fname.default) + for fname in fstruct2: + if getattr(fname, 'default', None) is not None: + fstruct1.setdefault(fname, fname.default) + + # Unify any values that are defined in both fstruct1 and + # fstruct2. Copy any values that are defined in fstruct2 but + # not in fstruct1 to fstruct1. Note: sorting fstruct2's + # features isn't actually necessary; but we do it to give + # deterministic behavior, e.g. for tracing. + for fname, fval2 in sorted(fstruct2.items()): + if fname in fstruct1: + fstruct1[fname] = _unify_feature_values( + fname, + fstruct1[fname], + fval2, + bindings, + forward, + trace, + fail, + fs_class, + path + (fname,), + ) + else: + fstruct1[fname] = fval2 + + return fstruct1 # Contains the unified value. + + # Unifying two sequences: + elif _is_sequence(fstruct1) and _is_sequence(fstruct2): + # If the lengths don't match, fail. + if len(fstruct1) != len(fstruct2): + return UnificationFailure + + # Unify corresponding values in fstruct1 and fstruct2. + for findex in range(len(fstruct1)): + fstruct1[findex] = _unify_feature_values( + findex, + fstruct1[findex], + fstruct2[findex], + bindings, + forward, + trace, + fail, + fs_class, + path + (findex,), + ) + + return fstruct1 # Contains the unified value. + + # Unifying sequence & mapping: fail. The failure function + # doesn't get a chance to recover in this case. + elif (_is_sequence(fstruct1) or _is_mapping(fstruct1)) and ( + _is_sequence(fstruct2) or _is_mapping(fstruct2) + ): + return UnificationFailure + + # Unifying anything else: not allowed! + raise TypeError('Expected mappings or sequences') + + +def _unify_feature_values( + fname, fval1, fval2, bindings, forward, trace, fail, fs_class, fpath +): + """ + Attempt to unify ``fval1`` and and ``fval2``, and return the + resulting unified value. The method of unification will depend on + the types of ``fval1`` and ``fval2``: + + 1. If they're both feature structures, then destructively + unify them (see ``_destructively_unify()``. + 2. If they're both unbound variables, then alias one variable + to the other (by setting bindings[v2]=v1). + 3. If one is an unbound variable, and the other is a value, + then bind the unbound variable to the value. + 4. If one is a feature structure, and the other is a base value, + then fail. + 5. If they're both base values, then unify them. By default, + this will succeed if they are equal, and fail otherwise. + """ + if trace: + _trace_unify_start(fpath, fval1, fval2) + + # Look up the "canonical" copy of fval1 and fval2 + while id(fval1) in forward: + fval1 = forward[id(fval1)] + while id(fval2) in forward: + fval2 = forward[id(fval2)] + + # If fval1 or fval2 is a bound variable, then + # replace it by the variable's bound value. This + # includes aliased variables, which are encoded as + # variables bound to other variables. + fvar1 = fvar2 = None + while isinstance(fval1, Variable) and fval1 in bindings: + fvar1 = fval1 + fval1 = bindings[fval1] + while isinstance(fval2, Variable) and fval2 in bindings: + fvar2 = fval2 + fval2 = bindings[fval2] + + # Case 1: Two feature structures (recursive case) + if isinstance(fval1, fs_class) and isinstance(fval2, fs_class): + result = _destructively_unify( + fval1, fval2, bindings, forward, trace, fail, fs_class, fpath + ) + + # Case 2: Two unbound variables (create alias) + elif isinstance(fval1, Variable) and isinstance(fval2, Variable): + if fval1 != fval2: + bindings[fval2] = fval1 + result = fval1 + + # Case 3: An unbound variable and a value (bind) + elif isinstance(fval1, Variable): + bindings[fval1] = fval2 + result = fval1 + elif isinstance(fval2, Variable): + bindings[fval2] = fval1 + result = fval2 + + # Case 4: A feature structure & a base value (fail) + elif isinstance(fval1, fs_class) or isinstance(fval2, fs_class): + result = UnificationFailure + + # Case 5: Two base values + else: + # Case 5a: Feature defines a custom unification method for base values + if isinstance(fname, Feature): + result = fname.unify_base_values(fval1, fval2, bindings) + # Case 5b: Feature value defines custom unification method + elif isinstance(fval1, CustomFeatureValue): + result = fval1.unify(fval2) + # Sanity check: unify value should be symmetric + if isinstance(fval2, CustomFeatureValue) and result != fval2.unify(fval1): + raise AssertionError( + 'CustomFeatureValue objects %r and %r disagree ' + 'about unification value: %r vs. %r' + % (fval1, fval2, result, fval2.unify(fval1)) + ) + elif isinstance(fval2, CustomFeatureValue): + result = fval2.unify(fval1) + # Case 5c: Simple values -- check if they're equal. + else: + if fval1 == fval2: + result = fval1 + else: + result = UnificationFailure + + # If either value was a bound variable, then update the + # bindings. (This is really only necessary if fname is a + # Feature or if either value is a CustomFeatureValue.) + if result is not UnificationFailure: + if fvar1 is not None: + bindings[fvar1] = result + result = fvar1 + if fvar2 is not None and fvar2 != fvar1: + bindings[fvar2] = result + result = fvar2 + + # If we unification failed, call the failure function; it + # might decide to continue anyway. + if result is UnificationFailure: + if fail is not None: + result = fail(fval1, fval2, fpath) + if trace: + _trace_unify_fail(fpath[:-1], result) + if result is UnificationFailure: + raise _UnificationFailureError + + # Normalize the result. + if isinstance(result, fs_class): + result = _apply_forwards(result, forward, fs_class, set()) + + if trace: + _trace_unify_succeed(fpath, result) + if trace and isinstance(result, fs_class): + _trace_bindings(fpath, bindings) + + return result + + +def _apply_forwards_to_bindings(forward, bindings): + """ + Replace any feature structure that has a forward pointer with + the target of its forward pointer (to preserve reentrancy). + """ + for (var, value) in bindings.items(): + while id(value) in forward: + value = forward[id(value)] + bindings[var] = value + + +def _apply_forwards(fstruct, forward, fs_class, visited): + """ + Replace any feature structure that has a forward pointer with + the target of its forward pointer (to preserve reentrancy). + """ + # Follow our own forwards pointers (if any) + while id(fstruct) in forward: + fstruct = forward[id(fstruct)] + + # Visit each node only once: + if id(fstruct) in visited: + return + visited.add(id(fstruct)) + + if _is_mapping(fstruct): + items = fstruct.items() + elif _is_sequence(fstruct): + items = enumerate(fstruct) + else: + raise ValueError('Expected mapping or sequence') + for fname, fval in items: + if isinstance(fval, fs_class): + # Replace w/ forwarded value. + while id(fval) in forward: + fval = forward[id(fval)] + fstruct[fname] = fval + # Recurse to child. + _apply_forwards(fval, forward, fs_class, visited) + + return fstruct + + +def _resolve_aliases(bindings): + """ + Replace any bound aliased vars with their binding; and replace + any unbound aliased vars with their representative var. + """ + for (var, value) in bindings.items(): + while isinstance(value, Variable) and value in bindings: + value = bindings[var] = bindings[value] + + +def _trace_unify_start(path, fval1, fval2): + if path == (): + print('\nUnification trace:') + else: + fullname = '.'.join("%s" % n for n in path) + print(' ' + '| ' * (len(path) - 1) + '|') + print(' ' + '| ' * (len(path) - 1) + '| Unify feature: %s' % fullname) + print(' ' + '| ' * len(path) + ' / ' + _trace_valrepr(fval1)) + print(' ' + '| ' * len(path) + '|\\ ' + _trace_valrepr(fval2)) + + +def _trace_unify_identity(path, fval1): + print(' ' + '| ' * len(path) + '|') + print(' ' + '| ' * len(path) + '| (identical objects)') + print(' ' + '| ' * len(path) + '|') + print(' ' + '| ' * len(path) + '+-->' + unicode_repr(fval1)) + + +def _trace_unify_fail(path, result): + if result is UnificationFailure: + resume = '' + else: + resume = ' (nonfatal)' + print(' ' + '| ' * len(path) + '| |') + print(' ' + 'X ' * len(path) + 'X X <-- FAIL' + resume) + + +def _trace_unify_succeed(path, fval1): + # Print the result. + print(' ' + '| ' * len(path) + '|') + print(' ' + '| ' * len(path) + '+-->' + unicode_repr(fval1)) + + +def _trace_bindings(path, bindings): + # Print the bindings (if any). + if len(bindings) > 0: + binditems = sorted(bindings.items(), key=lambda v: v[0].name) + bindstr = '{%s}' % ', '.join( + '%s: %s' % (var, _trace_valrepr(val)) for (var, val) in binditems + ) + print(' ' + '| ' * len(path) + ' Bindings: ' + bindstr) + + +def _trace_valrepr(val): + if isinstance(val, Variable): + return '%s' % val + else: + return '%s' % unicode_repr(val) + + +def subsumes(fstruct1, fstruct2): + """ + Return True if ``fstruct1`` subsumes ``fstruct2``. I.e., return + true if unifying ``fstruct1`` with ``fstruct2`` would result in a + feature structure equal to ``fstruct2.`` + + :rtype: bool + """ + return fstruct2 == unify(fstruct1, fstruct2) + + +def conflicts(fstruct1, fstruct2, trace=0): + """ + Return a list of the feature paths of all features which are + assigned incompatible values by ``fstruct1`` and ``fstruct2``. + + :rtype: list(tuple) + """ + conflict_list = [] + + def add_conflict(fval1, fval2, path): + conflict_list.append(path) + return fval1 + + unify(fstruct1, fstruct2, fail=add_conflict, trace=trace) + return conflict_list + + +###################################################################### +# Helper Functions +###################################################################### + + +def _is_mapping(v): + return hasattr(v, '__contains__') and hasattr(v, 'keys') + + +def _is_sequence(v): + return ( + hasattr(v, '__iter__') + and hasattr(v, '__len__') + and not isinstance(v, string_types) + ) + + +def _default_fs_class(obj): + if isinstance(obj, FeatStruct): + return FeatStruct + if isinstance(obj, (dict, list)): + return (dict, list) + else: + raise ValueError( + 'To unify objects of type %s, you must specify ' + 'fs_class explicitly.' % obj.__class__.__name__ + ) + + +###################################################################### +# FeatureValueSet & FeatureValueTuple +###################################################################### + + +class SubstituteBindingsSequence(SubstituteBindingsI): + """ + A mixin class for sequence clases that distributes variables() and + substitute_bindings() over the object's elements. + """ + + def variables(self): + return [elt for elt in self if isinstance(elt, Variable)] + sum( + [ + list(elt.variables()) + for elt in self + if isinstance(elt, SubstituteBindingsI) + ], + [], + ) + + def substitute_bindings(self, bindings): + return self.__class__([self.subst(v, bindings) for v in self]) + + def subst(self, v, bindings): + if isinstance(v, SubstituteBindingsI): + return v.substitute_bindings(bindings) + else: + return bindings.get(v, v) + + +@python_2_unicode_compatible +class FeatureValueTuple(SubstituteBindingsSequence, tuple): + """ + A base feature value that is a tuple of other base feature values. + FeatureValueTuple implements ``SubstituteBindingsI``, so it any + variable substitutions will be propagated to the elements + contained by the set. A ``FeatureValueTuple`` is immutable. + """ + + def __repr__(self): # [xx] really use %s here? + if len(self) == 0: + return '()' + return '(%s)' % ', '.join('%s' % (b,) for b in self) + + +@python_2_unicode_compatible +class FeatureValueSet(SubstituteBindingsSequence, frozenset): + """ + A base feature value that is a set of other base feature values. + FeatureValueSet implements ``SubstituteBindingsI``, so it any + variable substitutions will be propagated to the elements + contained by the set. A ``FeatureValueSet`` is immutable. + """ + + def __repr__(self): # [xx] really use %s here? + if len(self) == 0: + return '{/}' # distinguish from dict. + # n.b., we sort the string reprs of our elements, to ensure + # that our own repr is deterministic. + return '{%s}' % ', '.join(sorted('%s' % (b,) for b in self)) + + __str__ = __repr__ + + +@python_2_unicode_compatible +class FeatureValueUnion(SubstituteBindingsSequence, frozenset): + """ + A base feature value that represents the union of two or more + ``FeatureValueSet`` or ``Variable``. + """ + + def __new__(cls, values): + # If values contains FeatureValueUnions, then collapse them. + values = _flatten(values, FeatureValueUnion) + + # If the resulting list contains no variables, then + # use a simple FeatureValueSet instead. + if sum(isinstance(v, Variable) for v in values) == 0: + values = _flatten(values, FeatureValueSet) + return FeatureValueSet(values) + + # If we contain a single variable, return that variable. + if len(values) == 1: + return list(values)[0] + + # Otherwise, build the FeatureValueUnion. + return frozenset.__new__(cls, values) + + def __repr__(self): + # n.b., we sort the string reprs of our elements, to ensure + # that our own repr is deterministic. also, note that len(self) + # is guaranteed to be 2 or more. + return '{%s}' % '+'.join(sorted('%s' % (b,) for b in self)) + + +@python_2_unicode_compatible +class FeatureValueConcat(SubstituteBindingsSequence, tuple): + """ + A base feature value that represents the concatenation of two or + more ``FeatureValueTuple`` or ``Variable``. + """ + + def __new__(cls, values): + # If values contains FeatureValueConcats, then collapse them. + values = _flatten(values, FeatureValueConcat) + + # If the resulting list contains no variables, then + # use a simple FeatureValueTuple instead. + if sum(isinstance(v, Variable) for v in values) == 0: + values = _flatten(values, FeatureValueTuple) + return FeatureValueTuple(values) + + # If we contain a single variable, return that variable. + if len(values) == 1: + return list(values)[0] + + # Otherwise, build the FeatureValueConcat. + return tuple.__new__(cls, values) + + def __repr__(self): + # n.b.: len(self) is guaranteed to be 2 or more. + return '(%s)' % '+'.join('%s' % (b,) for b in self) + + +def _flatten(lst, cls): + """ + Helper function -- return a copy of list, with all elements of + type ``cls`` spliced in rather than appended in. + """ + result = [] + for elt in lst: + if isinstance(elt, cls): + result.extend(elt) + else: + result.append(elt) + return result + + +###################################################################### +# Specialized Features +###################################################################### + + +@total_ordering +@python_2_unicode_compatible +class Feature(object): + """ + A feature identifier that's specialized to put additional + constraints, default values, etc. + """ + + def __init__(self, name, default=None, display=None): + assert display in (None, 'prefix', 'slash') + + self._name = name # [xx] rename to .identifier? + self._default = default # [xx] not implemented yet. + self._display = display + + if self._display == 'prefix': + self._sortkey = (-1, self._name) + elif self._display == 'slash': + self._sortkey = (1, self._name) + else: + self._sortkey = (0, self._name) + + @property + def name(self): + """The name of this feature.""" + return self._name + + @property + def default(self): + """Default value for this feature.""" + return self._default + + @property + def display(self): + """Custom display location: can be prefix, or slash.""" + return self._display + + def __repr__(self): + return '*%s*' % self.name + + def __lt__(self, other): + if isinstance(other, string_types): + return True + if not isinstance(other, Feature): + raise_unorderable_types("<", self, other) + return self._sortkey < other._sortkey + + def __eq__(self, other): + return type(self) == type(other) and self._name == other._name + + def __ne__(self, other): + return not self == other + + def __hash__(self): + return hash(self._name) + + # //////////////////////////////////////////////////////////// + # These can be overridden by subclasses: + # //////////////////////////////////////////////////////////// + + def read_value(self, s, position, reentrances, parser): + return parser.read_value(s, position, reentrances) + + def unify_base_values(self, fval1, fval2, bindings): + """ + If possible, return a single value.. If not, return + the value ``UnificationFailure``. + """ + if fval1 == fval2: + return fval1 + else: + return UnificationFailure + + +class SlashFeature(Feature): + def read_value(self, s, position, reentrances, parser): + return parser.read_partial(s, position, reentrances) + + +class RangeFeature(Feature): + RANGE_RE = re.compile('(-?\d+):(-?\d+)') + + def read_value(self, s, position, reentrances, parser): + m = self.RANGE_RE.match(s, position) + if not m: + raise ValueError('range', position) + return (int(m.group(1)), int(m.group(2))), m.end() + + def unify_base_values(self, fval1, fval2, bindings): + if fval1 is None: + return fval2 + if fval2 is None: + return fval1 + rng = max(fval1[0], fval2[0]), min(fval1[1], fval2[1]) + if rng[1] < rng[0]: + return UnificationFailure + return rng + + +SLASH = SlashFeature('slash', default=False, display='slash') +TYPE = Feature('type', display='prefix') + + +###################################################################### +# Specialized Feature Values +###################################################################### + + +@total_ordering +class CustomFeatureValue(object): + """ + An abstract base class for base values that define a custom + unification method. The custom unification method of + ``CustomFeatureValue`` will be used during unification if: + + - The ``CustomFeatureValue`` is unified with another base value. + - The ``CustomFeatureValue`` is not the value of a customized + ``Feature`` (which defines its own unification method). + + If two ``CustomFeatureValue`` objects are unified with one another + during feature structure unification, then the unified base values + they return *must* be equal; otherwise, an ``AssertionError`` will + be raised. + + Subclasses must define ``unify()``, ``__eq__()`` and ``__lt__()``. + Subclasses may also wish to define ``__hash__()``. + """ + + def unify(self, other): + """ + If this base value unifies with ``other``, then return the + unified value. Otherwise, return ``UnificationFailure``. + """ + raise NotImplementedError('abstract base class') + + def __eq__(self, other): + raise NotImplementedError('abstract base class') + + def __ne__(self, other): + return not self == other + + def __lt__(self, other): + raise NotImplementedError('abstract base class') + + def __hash__(self): + raise TypeError('%s objects or unhashable' % self.__class__.__name__) + + +###################################################################### +# Feature Structure Reader +###################################################################### + + +class FeatStructReader(object): + def __init__( + self, + features=(SLASH, TYPE), + fdict_class=FeatStruct, + flist_class=FeatList, + logic_parser=None, + ): + self._features = dict((f.name, f) for f in features) + self._fdict_class = fdict_class + self._flist_class = flist_class + self._prefix_feature = None + self._slash_feature = None + for feature in features: + if feature.display == 'slash': + if self._slash_feature: + raise ValueError('Multiple features w/ display=slash') + self._slash_feature = feature + if feature.display == 'prefix': + if self._prefix_feature: + raise ValueError('Multiple features w/ display=prefix') + self._prefix_feature = feature + self._features_with_defaults = [ + feature for feature in features if feature.default is not None + ] + if logic_parser is None: + logic_parser = LogicParser() + self._logic_parser = logic_parser + + def fromstring(self, s, fstruct=None): + """ + Convert a string representation of a feature structure (as + displayed by repr) into a ``FeatStruct``. This process + imposes the following restrictions on the string + representation: + + - Feature names cannot contain any of the following: + whitespace, parentheses, quote marks, equals signs, + dashes, commas, and square brackets. Feature names may + not begin with plus signs or minus signs. + - Only the following basic feature value are supported: + strings, integers, variables, None, and unquoted + alphanumeric strings. + - For reentrant values, the first mention must specify + a reentrance identifier and a value; and any subsequent + mentions must use arrows (``'->'``) to reference the + reentrance identifier. + """ + s = s.strip() + value, position = self.read_partial(s, 0, {}, fstruct) + if position != len(s): + self._error(s, 'end of string', position) + return value + + _START_FSTRUCT_RE = re.compile(r'\s*(?:\((\d+)\)\s*)?(\??[\w-]+)?(\[)') + _END_FSTRUCT_RE = re.compile(r'\s*]\s*') + _SLASH_RE = re.compile(r'/') + _FEATURE_NAME_RE = re.compile(r'\s*([+-]?)([^\s\(\)<>"\'\-=\[\],]+)\s*') + _REENTRANCE_RE = re.compile(r'\s*->\s*') + _TARGET_RE = re.compile(r'\s*\((\d+)\)\s*') + _ASSIGN_RE = re.compile(r'\s*=\s*') + _COMMA_RE = re.compile(r'\s*,\s*') + _BARE_PREFIX_RE = re.compile(r'\s*(?:\((\d+)\)\s*)?(\??[\w-]+\s*)()') + # This one is used to distinguish fdicts from flists: + _START_FDICT_RE = re.compile( + r'(%s)|(%s\s*(%s\s*(=|->)|[+-]%s|\]))' + % ( + _BARE_PREFIX_RE.pattern, + _START_FSTRUCT_RE.pattern, + _FEATURE_NAME_RE.pattern, + _FEATURE_NAME_RE.pattern, + ) + ) + + def read_partial(self, s, position=0, reentrances=None, fstruct=None): + """ + Helper function that reads in a feature structure. + + :param s: The string to read. + :param position: The position in the string to start parsing. + :param reentrances: A dictionary from reentrance ids to values. + Defaults to an empty dictionary. + :return: A tuple (val, pos) of the feature structure created by + parsing and the position where the parsed feature structure ends. + :rtype: bool + """ + if reentrances is None: + reentrances = {} + try: + return self._read_partial(s, position, reentrances, fstruct) + except ValueError as e: + if len(e.args) != 2: + raise + self._error(s, *e.args) + + def _read_partial(self, s, position, reentrances, fstruct=None): + # Create the new feature structure + if fstruct is None: + if self._START_FDICT_RE.match(s, position): + fstruct = self._fdict_class() + else: + fstruct = self._flist_class() + + # Read up to the open bracket. + match = self._START_FSTRUCT_RE.match(s, position) + if not match: + match = self._BARE_PREFIX_RE.match(s, position) + if not match: + raise ValueError('open bracket or identifier', position) + position = match.end() + + # If there as an identifier, record it. + if match.group(1): + identifier = match.group(1) + if identifier in reentrances: + raise ValueError('new identifier', match.start(1)) + reentrances[identifier] = fstruct + + if isinstance(fstruct, FeatDict): + fstruct.clear() + return self._read_partial_featdict(s, position, match, reentrances, fstruct) + else: + del fstruct[:] + return self._read_partial_featlist(s, position, match, reentrances, fstruct) + + def _read_partial_featlist(self, s, position, match, reentrances, fstruct): + # Prefix features are not allowed: + if match.group(2): + raise ValueError('open bracket') + # Bare prefixes are not allowed: + if not match.group(3): + raise ValueError('open bracket') + + # Build a list of the features defined by the structure. + while position < len(s): + # Check for the close bracket. + match = self._END_FSTRUCT_RE.match(s, position) + if match is not None: + return fstruct, match.end() + + # Reentances have the form "-> (target)" + match = self._REENTRANCE_RE.match(s, position) + if match: + position = match.end() + match = self._TARGET_RE.match(s, position) + if not match: + raise ValueError('identifier', position) + target = match.group(1) + if target not in reentrances: + raise ValueError('bound identifier', position) + position = match.end() + fstruct.append(reentrances[target]) + + # Anything else is a value. + else: + value, position = self._read_value(0, s, position, reentrances) + fstruct.append(value) + + # If there's a close bracket, handle it at the top of the loop. + if self._END_FSTRUCT_RE.match(s, position): + continue + + # Otherwise, there should be a comma + match = self._COMMA_RE.match(s, position) + if match is None: + raise ValueError('comma', position) + position = match.end() + + # We never saw a close bracket. + raise ValueError('close bracket', position) + + def _read_partial_featdict(self, s, position, match, reentrances, fstruct): + # If there was a prefix feature, record it. + if match.group(2): + if self._prefix_feature is None: + raise ValueError('open bracket or identifier', match.start(2)) + prefixval = match.group(2).strip() + if prefixval.startswith('?'): + prefixval = Variable(prefixval) + fstruct[self._prefix_feature] = prefixval + + # If group 3 is empty, then we just have a bare prefix, so + # we're done. + if not match.group(3): + return self._finalize(s, match.end(), reentrances, fstruct) + + # Build a list of the features defined by the structure. + # Each feature has one of the three following forms: + # name = value + # name -> (target) + # +name + # -name + while position < len(s): + # Use these variables to hold info about each feature: + name = value = None + + # Check for the close bracket. + match = self._END_FSTRUCT_RE.match(s, position) + if match is not None: + return self._finalize(s, match.end(), reentrances, fstruct) + + # Get the feature name's name + match = self._FEATURE_NAME_RE.match(s, position) + if match is None: + raise ValueError('feature name', position) + name = match.group(2) + position = match.end() + + # Check if it's a special feature. + if name[0] == '*' and name[-1] == '*': + name = self._features.get(name[1:-1]) + if name is None: + raise ValueError('known special feature', match.start(2)) + + # Check if this feature has a value already. + if name in fstruct: + raise ValueError('new name', match.start(2)) + + # Boolean value ("+name" or "-name") + if match.group(1) == '+': + value = True + if match.group(1) == '-': + value = False + + # Reentrance link ("-> (target)") + if value is None: + match = self._REENTRANCE_RE.match(s, position) + if match is not None: + position = match.end() + match = self._TARGET_RE.match(s, position) + if not match: + raise ValueError('identifier', position) + target = match.group(1) + if target not in reentrances: + raise ValueError('bound identifier', position) + position = match.end() + value = reentrances[target] + + # Assignment ("= value"). + if value is None: + match = self._ASSIGN_RE.match(s, position) + if match: + position = match.end() + value, position = self._read_value(name, s, position, reentrances) + # None of the above: error. + else: + raise ValueError('equals sign', position) + + # Store the value. + fstruct[name] = value + + # If there's a close bracket, handle it at the top of the loop. + if self._END_FSTRUCT_RE.match(s, position): + continue + + # Otherwise, there should be a comma + match = self._COMMA_RE.match(s, position) + if match is None: + raise ValueError('comma', position) + position = match.end() + + # We never saw a close bracket. + raise ValueError('close bracket', position) + + def _finalize(self, s, pos, reentrances, fstruct): + """ + Called when we see the close brace -- checks for a slash feature, + and adds in default values. + """ + # Add the slash feature (if any) + match = self._SLASH_RE.match(s, pos) + if match: + name = self._slash_feature + v, pos = self._read_value(name, s, match.end(), reentrances) + fstruct[name] = v + ## Add any default features. -- handle in unficiation instead? + # for feature in self._features_with_defaults: + # fstruct.setdefault(feature, feature.default) + # Return the value. + return fstruct, pos + + def _read_value(self, name, s, position, reentrances): + if isinstance(name, Feature): + return name.read_value(s, position, reentrances, self) + else: + return self.read_value(s, position, reentrances) + + def read_value(self, s, position, reentrances): + for (handler, regexp) in self.VALUE_HANDLERS: + match = regexp.match(s, position) + if match: + handler_func = getattr(self, handler) + return handler_func(s, position, reentrances, match) + raise ValueError('value', position) + + def _error(self, s, expected, position): + lines = s.split('\n') + while position > len(lines[0]): + position -= len(lines.pop(0)) + 1 # +1 for the newline. + estr = ( + 'Error parsing feature structure\n ' + + lines[0] + + '\n ' + + ' ' * position + + '^ ' + + 'Expected %s' % expected + ) + raise ValueError(estr) + + # //////////////////////////////////////////////////////////// + # { Value Readers + # //////////////////////////////////////////////////////////// + + #: A table indicating how feature values should be processed. Each + #: entry in the table is a pair (handler, regexp). The first entry + #: with a matching regexp will have its handler called. Handlers + #: should have the following signature:: + #: + #: def handler(s, position, reentrances, match): ... + #: + #: and should return a tuple (value, position), where position is + #: the string position where the value ended. (n.b.: order is + #: important here!) + VALUE_HANDLERS = [ + ('read_fstruct_value', _START_FSTRUCT_RE), + ('read_var_value', re.compile(r'\?[a-zA-Z_][a-zA-Z0-9_]*')), + ('read_str_value', re.compile("[uU]?[rR]?(['\"])")), + ('read_int_value', re.compile(r'-?\d+')), + ('read_sym_value', re.compile(r'[a-zA-Z_][a-zA-Z0-9_]*')), + ( + 'read_app_value', + re.compile(r'<(app)\((\?[a-z][a-z]*)\s*,' r'\s*(\?[a-z][a-z]*)\)>'), + ), + # ('read_logic_value', re.compile(r'<([^>]*)>')), + # lazily match any character after '<' until we hit a '>' not preceded by '-' + ('read_logic_value', re.compile(r'<(.*?)(?')), + ('read_set_value', re.compile(r'{')), + ('read_tuple_value', re.compile(r'\(')), + ] + + def read_fstruct_value(self, s, position, reentrances, match): + return self.read_partial(s, position, reentrances) + + def read_str_value(self, s, position, reentrances, match): + return read_str(s, position) + + def read_int_value(self, s, position, reentrances, match): + return int(match.group()), match.end() + + # Note: the '?' is included in the variable name. + def read_var_value(self, s, position, reentrances, match): + return Variable(match.group()), match.end() + + _SYM_CONSTS = {'None': None, 'True': True, 'False': False} + + def read_sym_value(self, s, position, reentrances, match): + val, end = match.group(), match.end() + return self._SYM_CONSTS.get(val, val), end + + def read_app_value(self, s, position, reentrances, match): + """Mainly included for backwards compat.""" + return self._logic_parser.parse('%s(%s)' % match.group(2, 3)), match.end() + + def read_logic_value(self, s, position, reentrances, match): + try: + try: + expr = self._logic_parser.parse(match.group(1)) + except LogicalExpressionException: + raise ValueError() + return expr, match.end() + except ValueError: + raise ValueError('logic expression', match.start(1)) + + def read_tuple_value(self, s, position, reentrances, match): + return self._read_seq_value( + s, position, reentrances, match, ')', FeatureValueTuple, FeatureValueConcat + ) + + def read_set_value(self, s, position, reentrances, match): + return self._read_seq_value( + s, position, reentrances, match, '}', FeatureValueSet, FeatureValueUnion + ) + + def _read_seq_value( + self, s, position, reentrances, match, close_paren, seq_class, plus_class + ): + """ + Helper function used by read_tuple_value and read_set_value. + """ + cp = re.escape(close_paren) + position = match.end() + # Special syntax fo empty tuples: + m = re.compile(r'\s*/?\s*%s' % cp).match(s, position) + if m: + return seq_class(), m.end() + # Read values: + values = [] + seen_plus = False + while True: + # Close paren: return value. + m = re.compile(r'\s*%s' % cp).match(s, position) + if m: + if seen_plus: + return plus_class(values), m.end() + else: + return seq_class(values), m.end() + + # Read the next value. + val, position = self.read_value(s, position, reentrances) + values.append(val) + + # Comma or looking at close paren + m = re.compile(r'\s*(,|\+|(?=%s))\s*' % cp).match(s, position) + if not m: + raise ValueError("',' or '+' or '%s'" % cp, position) + if m.group(1) == '+': + seen_plus = True + position = m.end() + + +###################################################################### +# { Demo +###################################################################### + + +def display_unification(fs1, fs2, indent=' '): + # Print the two input feature structures, side by side. + fs1_lines = ("%s" % fs1).split('\n') + fs2_lines = ("%s" % fs2).split('\n') + if len(fs1_lines) > len(fs2_lines): + blankline = '[' + ' ' * (len(fs2_lines[0]) - 2) + ']' + fs2_lines += [blankline] * len(fs1_lines) + else: + blankline = '[' + ' ' * (len(fs1_lines[0]) - 2) + ']' + fs1_lines += [blankline] * len(fs2_lines) + for (fs1_line, fs2_line) in zip(fs1_lines, fs2_lines): + print(indent + fs1_line + ' ' + fs2_line) + print(indent + '-' * len(fs1_lines[0]) + ' ' + '-' * len(fs2_lines[0])) + + linelen = len(fs1_lines[0]) * 2 + 3 + print(indent + '| |'.center(linelen)) + print(indent + '+-----UNIFY-----+'.center(linelen)) + print(indent + '|'.center(linelen)) + print(indent + 'V'.center(linelen)) + + bindings = {} + + result = fs1.unify(fs2, bindings) + if result is None: + print(indent + '(FAILED)'.center(linelen)) + else: + print( + '\n'.join(indent + l.center(linelen) for l in ("%s" % result).split('\n')) + ) + if bindings and len(bindings.bound_variables()) > 0: + print(repr(bindings).center(linelen)) + return result + + +def interactive_demo(trace=False): + import random, sys + + HELP = ''' + 1-%d: Select the corresponding feature structure + q: Quit + t: Turn tracing on or off + l: List all feature structures + ?: Help + ''' + + print( + ''' + This demo will repeatedly present you with a list of feature + structures, and ask you to choose two for unification. Whenever a + new feature structure is generated, it is added to the list of + choices that you can pick from. However, since this can be a + large number of feature structures, the demo will only print out a + random subset for you to choose between at a given time. If you + want to see the complete lists, type "l". For a list of valid + commands, type "?". + ''' + ) + print('Press "Enter" to continue...') + sys.stdin.readline() + + fstruct_strings = [ + '[agr=[number=sing, gender=masc]]', + '[agr=[gender=masc, person=3]]', + '[agr=[gender=fem, person=3]]', + '[subj=[agr=(1)[]], agr->(1)]', + '[obj=?x]', + '[subj=?x]', + '[/=None]', + '[/=NP]', + '[cat=NP]', + '[cat=VP]', + '[cat=PP]', + '[subj=[agr=[gender=?y]], obj=[agr=[gender=?y]]]', + '[gender=masc, agr=?C]', + '[gender=?S, agr=[gender=?S,person=3]]', + ] + + all_fstructs = [ + (i, FeatStruct(fstruct_strings[i])) for i in range(len(fstruct_strings)) + ] + + def list_fstructs(fstructs): + for i, fstruct in fstructs: + print() + lines = ("%s" % fstruct).split('\n') + print('%3d: %s' % (i + 1, lines[0])) + for line in lines[1:]: + print(' ' + line) + print() + + while True: + # Pick 5 feature structures at random from the master list. + MAX_CHOICES = 5 + if len(all_fstructs) > MAX_CHOICES: + fstructs = sorted(random.sample(all_fstructs, MAX_CHOICES)) + else: + fstructs = all_fstructs + + print('_' * 75) + + print('Choose two feature structures to unify:') + list_fstructs(fstructs) + + selected = [None, None] + for (nth, i) in (('First', 0), ('Second', 1)): + while selected[i] is None: + print( + ( + '%s feature structure (1-%d,q,t,l,?): ' + % (nth, len(all_fstructs)) + ), + end=' ', + ) + try: + input = sys.stdin.readline().strip() + if input in ('q', 'Q', 'x', 'X'): + return + if input in ('t', 'T'): + trace = not trace + print(' Trace = %s' % trace) + continue + if input in ('h', 'H', '?'): + print(HELP % len(fstructs)) + continue + if input in ('l', 'L'): + list_fstructs(all_fstructs) + continue + num = int(input) - 1 + selected[i] = all_fstructs[num][1] + print() + except: + print('Bad sentence number') + continue + + if trace: + result = selected[0].unify(selected[1], trace=1) + else: + result = display_unification(selected[0], selected[1]) + if result is not None: + for i, fstruct in all_fstructs: + if repr(result) == repr(fstruct): + break + else: + all_fstructs.append((len(all_fstructs), result)) + + print('\nType "Enter" to continue unifying; or "q" to quit.') + input = sys.stdin.readline().strip() + if input in ('q', 'Q', 'x', 'X'): + return + + +def demo(trace=False): + """ + Just for testing + """ + # import random + + # processor breaks with values like '3rd' + fstruct_strings = [ + '[agr=[number=sing, gender=masc]]', + '[agr=[gender=masc, person=3]]', + '[agr=[gender=fem, person=3]]', + '[subj=[agr=(1)[]], agr->(1)]', + '[obj=?x]', + '[subj=?x]', + '[/=None]', + '[/=NP]', + '[cat=NP]', + '[cat=VP]', + '[cat=PP]', + '[subj=[agr=[gender=?y]], obj=[agr=[gender=?y]]]', + '[gender=masc, agr=?C]', + '[gender=?S, agr=[gender=?S,person=3]]', + ] + all_fstructs = [FeatStruct(fss) for fss in fstruct_strings] + # MAX_CHOICES = 5 + # if len(all_fstructs) > MAX_CHOICES: + # fstructs = random.sample(all_fstructs, MAX_CHOICES) + # fstructs.sort() + # else: + # fstructs = all_fstructs + + for fs1 in all_fstructs: + for fs2 in all_fstructs: + print( + "\n*******************\nfs1 is:\n%s\n\nfs2 is:\n%s\n\nresult is:\n%s" + % (fs1, fs2, unify(fs1, fs2)) + ) + + +if __name__ == '__main__': + demo() + +__all__ = [ + 'FeatStruct', + 'FeatDict', + 'FeatList', + 'unify', + 'subsumes', + 'conflicts', + 'Feature', + 'SlashFeature', + 'RangeFeature', + 'SLASH', + 'TYPE', + 'FeatStructReader', +] diff --git a/venv.bak/lib/python3.7/site-packages/nltk/grammar.py b/venv.bak/lib/python3.7/site-packages/nltk/grammar.py new file mode 100644 index 0000000..2408b2e --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/grammar.py @@ -0,0 +1,1731 @@ +# -*- coding: utf-8 -*- +# Natural Language Toolkit: Context Free Grammars +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Steven Bird +# Edward Loper +# Jason Narad +# Peter Ljunglöf +# URL: +# For license information, see LICENSE.TXT +# + +""" +Basic data classes for representing context free grammars. A +"grammar" specifies which trees can represent the structure of a +given text. Each of these trees is called a "parse tree" for the +text (or simply a "parse"). In a "context free" grammar, the set of +parse trees for any piece of a text can depend only on that piece, and +not on the rest of the text (i.e., the piece's context). Context free +grammars are often used to find possible syntactic structures for +sentences. In this context, the leaves of a parse tree are word +tokens; and the node values are phrasal categories, such as ``NP`` +and ``VP``. + +The ``CFG`` class is used to encode context free grammars. Each +``CFG`` consists of a start symbol and a set of productions. +The "start symbol" specifies the root node value for parse trees. For example, +the start symbol for syntactic parsing is usually ``S``. Start +symbols are encoded using the ``Nonterminal`` class, which is discussed +below. + +A Grammar's "productions" specify what parent-child relationships a parse +tree can contain. Each production specifies that a particular +node can be the parent of a particular set of children. For example, +the production `` -> `` specifies that an ``S`` node can +be the parent of an ``NP`` node and a ``VP`` node. + +Grammar productions are implemented by the ``Production`` class. +Each ``Production`` consists of a left hand side and a right hand +side. The "left hand side" is a ``Nonterminal`` that specifies the +node type for a potential parent; and the "right hand side" is a list +that specifies allowable children for that parent. This lists +consists of ``Nonterminals`` and text types: each ``Nonterminal`` +indicates that the corresponding child may be a ``TreeToken`` with the +specified node type; and each text type indicates that the +corresponding child may be a ``Token`` with the with that type. + +The ``Nonterminal`` class is used to distinguish node values from leaf +values. This prevents the grammar from accidentally using a leaf +value (such as the English word "A") as the node of a subtree. Within +a ``CFG``, all node values are wrapped in the ``Nonterminal`` +class. Note, however, that the trees that are specified by the grammar do +*not* include these ``Nonterminal`` wrappers. + +Grammars can also be given a more procedural interpretation. According to +this interpretation, a Grammar specifies any tree structure *tree* that +can be produced by the following procedure: + +| Set tree to the start symbol +| Repeat until tree contains no more nonterminal leaves: +| Choose a production prod with whose left hand side +| lhs is a nonterminal leaf of tree. +| Replace the nonterminal leaf with a subtree, whose node +| value is the value wrapped by the nonterminal lhs, and +| whose children are the right hand side of prod. + +The operation of replacing the left hand side (*lhs*) of a production +with the right hand side (*rhs*) in a tree (*tree*) is known as +"expanding" *lhs* to *rhs* in *tree*. +""" +from __future__ import print_function, unicode_literals, division + +import re +from functools import total_ordering + +from six import string_types + +from nltk.util import transitive_closure, invert_graph +from nltk.compat import python_2_unicode_compatible, unicode_repr +from nltk.internals import raise_unorderable_types + +from nltk.probability import ImmutableProbabilisticMixIn +from nltk.featstruct import FeatStruct, FeatDict, FeatStructReader, SLASH, TYPE + + +################################################################# +# Nonterminal +################################################################# + + +@total_ordering +@python_2_unicode_compatible +class Nonterminal(object): + """ + A non-terminal symbol for a context free grammar. ``Nonterminal`` + is a wrapper class for node values; it is used by ``Production`` + objects to distinguish node values from leaf values. + The node value that is wrapped by a ``Nonterminal`` is known as its + "symbol". Symbols are typically strings representing phrasal + categories (such as ``"NP"`` or ``"VP"``). However, more complex + symbol types are sometimes used (e.g., for lexicalized grammars). + Since symbols are node values, they must be immutable and + hashable. Two ``Nonterminals`` are considered equal if their + symbols are equal. + + :see: ``CFG``, ``Production`` + :type _symbol: any + :ivar _symbol: The node value corresponding to this + ``Nonterminal``. This value must be immutable and hashable. + """ + + def __init__(self, symbol): + """ + Construct a new non-terminal from the given symbol. + + :type symbol: any + :param symbol: The node value corresponding to this + ``Nonterminal``. This value must be immutable and + hashable. + """ + self._symbol = symbol + self._hash = hash(symbol) + + def symbol(self): + """ + Return the node value corresponding to this ``Nonterminal``. + + :rtype: (any) + """ + return self._symbol + + def __eq__(self, other): + """ + Return True if this non-terminal is equal to ``other``. In + particular, return True if ``other`` is a ``Nonterminal`` + and this non-terminal's symbol is equal to ``other`` 's symbol. + + :rtype: bool + """ + return type(self) == type(other) and self._symbol == other._symbol + + def __ne__(self, other): + return not self == other + + def __lt__(self, other): + if not isinstance(other, Nonterminal): + raise_unorderable_types("<", self, other) + return self._symbol < other._symbol + + def __hash__(self): + return self._hash + + def __repr__(self): + """ + Return a string representation for this ``Nonterminal``. + + :rtype: str + """ + if isinstance(self._symbol, string_types): + return '%s' % self._symbol + else: + return '%s' % unicode_repr(self._symbol) + + def __str__(self): + """ + Return a string representation for this ``Nonterminal``. + + :rtype: str + """ + if isinstance(self._symbol, string_types): + return '%s' % self._symbol + else: + return '%s' % unicode_repr(self._symbol) + + def __div__(self, rhs): + """ + Return a new nonterminal whose symbol is ``A/B``, where ``A`` is + the symbol for this nonterminal, and ``B`` is the symbol for rhs. + + :param rhs: The nonterminal used to form the right hand side + of the new nonterminal. + :type rhs: Nonterminal + :rtype: Nonterminal + """ + return Nonterminal('%s/%s' % (self._symbol, rhs._symbol)) + + def __truediv__(self, rhs): + """ + Return a new nonterminal whose symbol is ``A/B``, where ``A`` is + the symbol for this nonterminal, and ``B`` is the symbol for rhs. + This function allows use of the slash ``/`` operator with + the future import of division. + + :param rhs: The nonterminal used to form the right hand side + of the new nonterminal. + :type rhs: Nonterminal + :rtype: Nonterminal + """ + return self.__div__(rhs) + + +def nonterminals(symbols): + """ + Given a string containing a list of symbol names, return a list of + ``Nonterminals`` constructed from those symbols. + + :param symbols: The symbol name string. This string can be + delimited by either spaces or commas. + :type symbols: str + :return: A list of ``Nonterminals`` constructed from the symbol + names given in ``symbols``. The ``Nonterminals`` are sorted + in the same order as the symbols names. + :rtype: list(Nonterminal) + """ + if ',' in symbols: + symbol_list = symbols.split(',') + else: + symbol_list = symbols.split() + return [Nonterminal(s.strip()) for s in symbol_list] + + +class FeatStructNonterminal(FeatDict, Nonterminal): + """A feature structure that's also a nonterminal. It acts as its + own symbol, and automatically freezes itself when hashed.""" + + def __hash__(self): + self.freeze() + return FeatStruct.__hash__(self) + + def symbol(self): + return self + + +def is_nonterminal(item): + """ + :return: True if the item is a ``Nonterminal``. + :rtype: bool + """ + return isinstance(item, Nonterminal) + + +################################################################# +# Terminals +################################################################# + + +def is_terminal(item): + """ + Return True if the item is a terminal, which currently is + if it is hashable and not a ``Nonterminal``. + + :rtype: bool + """ + return hasattr(item, '__hash__') and not isinstance(item, Nonterminal) + + +################################################################# +# Productions +################################################################# + + +@total_ordering +@python_2_unicode_compatible +class Production(object): + """ + A grammar production. Each production maps a single symbol + on the "left-hand side" to a sequence of symbols on the + "right-hand side". (In the case of context-free productions, + the left-hand side must be a ``Nonterminal``, and the right-hand + side is a sequence of terminals and ``Nonterminals``.) + "terminals" can be any immutable hashable object that is + not a ``Nonterminal``. Typically, terminals are strings + representing words, such as ``"dog"`` or ``"under"``. + + :see: ``CFG`` + :see: ``DependencyGrammar`` + :see: ``Nonterminal`` + :type _lhs: Nonterminal + :ivar _lhs: The left-hand side of the production. + :type _rhs: tuple(Nonterminal, terminal) + :ivar _rhs: The right-hand side of the production. + """ + + def __init__(self, lhs, rhs): + """ + Construct a new ``Production``. + + :param lhs: The left-hand side of the new ``Production``. + :type lhs: Nonterminal + :param rhs: The right-hand side of the new ``Production``. + :type rhs: sequence(Nonterminal and terminal) + """ + if isinstance(rhs, string_types): + raise TypeError( + 'production right hand side should be a list, ' 'not a string' + ) + self._lhs = lhs + self._rhs = tuple(rhs) + self._hash = hash((self._lhs, self._rhs)) + + def lhs(self): + """ + Return the left-hand side of this ``Production``. + + :rtype: Nonterminal + """ + return self._lhs + + def rhs(self): + """ + Return the right-hand side of this ``Production``. + + :rtype: sequence(Nonterminal and terminal) + """ + return self._rhs + + def __len__(self): + """ + Return the length of the right-hand side. + + :rtype: int + """ + return len(self._rhs) + + def is_nonlexical(self): + """ + Return True if the right-hand side only contains ``Nonterminals`` + + :rtype: bool + """ + return all(is_nonterminal(n) for n in self._rhs) + + def is_lexical(self): + """ + Return True if the right-hand contain at least one terminal token. + + :rtype: bool + """ + return not self.is_nonlexical() + + def __str__(self): + """ + Return a verbose string representation of the ``Production``. + + :rtype: str + """ + result = '%s -> ' % unicode_repr(self._lhs) + result += " ".join(unicode_repr(el) for el in self._rhs) + return result + + def __repr__(self): + """ + Return a concise string representation of the ``Production``. + + :rtype: str + """ + return '%s' % self + + def __eq__(self, other): + """ + Return True if this ``Production`` is equal to ``other``. + + :rtype: bool + """ + return ( + type(self) == type(other) + and self._lhs == other._lhs + and self._rhs == other._rhs + ) + + def __ne__(self, other): + return not self == other + + def __lt__(self, other): + if not isinstance(other, Production): + raise_unorderable_types("<", self, other) + return (self._lhs, self._rhs) < (other._lhs, other._rhs) + + def __hash__(self): + """ + Return a hash value for the ``Production``. + + :rtype: int + """ + return self._hash + + +@python_2_unicode_compatible +class DependencyProduction(Production): + """ + A dependency grammar production. Each production maps a single + head word to an unordered list of one or more modifier words. + """ + + def __str__(self): + """ + Return a verbose string representation of the ``DependencyProduction``. + + :rtype: str + """ + result = '\'%s\' ->' % (self._lhs,) + for elt in self._rhs: + result += ' \'%s\'' % (elt,) + return result + + +@python_2_unicode_compatible +class ProbabilisticProduction(Production, ImmutableProbabilisticMixIn): + """ + A probabilistic context free grammar production. + A PCFG ``ProbabilisticProduction`` is essentially just a ``Production`` that + has an associated probability, which represents how likely it is that + this production will be used. In particular, the probability of a + ``ProbabilisticProduction`` records the likelihood that its right-hand side is + the correct instantiation for any given occurrence of its left-hand side. + + :see: ``Production`` + """ + + def __init__(self, lhs, rhs, **prob): + """ + Construct a new ``ProbabilisticProduction``. + + :param lhs: The left-hand side of the new ``ProbabilisticProduction``. + :type lhs: Nonterminal + :param rhs: The right-hand side of the new ``ProbabilisticProduction``. + :type rhs: sequence(Nonterminal and terminal) + :param prob: Probability parameters of the new ``ProbabilisticProduction``. + """ + ImmutableProbabilisticMixIn.__init__(self, **prob) + Production.__init__(self, lhs, rhs) + + def __str__(self): + return Production.__unicode__(self) + ( + ' [1.0]' if (self.prob() == 1.0) else ' [%g]' % self.prob() + ) + + def __eq__(self, other): + return ( + type(self) == type(other) + and self._lhs == other._lhs + and self._rhs == other._rhs + and self.prob() == other.prob() + ) + + def __ne__(self, other): + return not self == other + + def __hash__(self): + return hash((self._lhs, self._rhs, self.prob())) + + +################################################################# +# Grammars +################################################################# + + +@python_2_unicode_compatible +class CFG(object): + """ + A context-free grammar. A grammar consists of a start state and + a set of productions. The set of terminals and nonterminals is + implicitly specified by the productions. + + If you need efficient key-based access to productions, you + can use a subclass to implement it. + """ + + def __init__(self, start, productions, calculate_leftcorners=True): + """ + Create a new context-free grammar, from the given start state + and set of ``Production``s. + + :param start: The start symbol + :type start: Nonterminal + :param productions: The list of productions that defines the grammar + :type productions: list(Production) + :param calculate_leftcorners: False if we don't want to calculate the + leftcorner relation. In that case, some optimized chart parsers won't work. + :type calculate_leftcorners: bool + """ + if not is_nonterminal(start): + raise TypeError( + "start should be a Nonterminal object," + " not a %s" % type(start).__name__ + ) + + self._start = start + self._productions = productions + self._categories = set(prod.lhs() for prod in productions) + self._calculate_indexes() + self._calculate_grammar_forms() + if calculate_leftcorners: + self._calculate_leftcorners() + + def _calculate_indexes(self): + self._lhs_index = {} + self._rhs_index = {} + self._empty_index = {} + self._lexical_index = {} + for prod in self._productions: + # Left hand side. + lhs = prod._lhs + if lhs not in self._lhs_index: + self._lhs_index[lhs] = [] + self._lhs_index[lhs].append(prod) + if prod._rhs: + # First item in right hand side. + rhs0 = prod._rhs[0] + if rhs0 not in self._rhs_index: + self._rhs_index[rhs0] = [] + self._rhs_index[rhs0].append(prod) + else: + # The right hand side is empty. + self._empty_index[prod.lhs()] = prod + # Lexical tokens in the right hand side. + for token in prod._rhs: + if is_terminal(token): + self._lexical_index.setdefault(token, set()).add(prod) + + def _calculate_leftcorners(self): + # Calculate leftcorner relations, for use in optimized parsing. + self._immediate_leftcorner_categories = dict( + (cat, set([cat])) for cat in self._categories + ) + self._immediate_leftcorner_words = dict( + (cat, set()) for cat in self._categories + ) + for prod in self.productions(): + if len(prod) > 0: + cat, left = prod.lhs(), prod.rhs()[0] + if is_nonterminal(left): + self._immediate_leftcorner_categories[cat].add(left) + else: + self._immediate_leftcorner_words[cat].add(left) + + lc = transitive_closure(self._immediate_leftcorner_categories, reflexive=True) + self._leftcorners = lc + self._leftcorner_parents = invert_graph(lc) + + nr_leftcorner_categories = sum( + map(len, self._immediate_leftcorner_categories.values()) + ) + nr_leftcorner_words = sum(map(len, self._immediate_leftcorner_words.values())) + if nr_leftcorner_words > nr_leftcorner_categories > 10000: + # If the grammar is big, the leftcorner-word dictionary will be too large. + # In that case it is better to calculate the relation on demand. + self._leftcorner_words = None + return + + self._leftcorner_words = {} + for cat in self._leftcorners: + lefts = self._leftcorners[cat] + lc = self._leftcorner_words[cat] = set() + for left in lefts: + lc.update(self._immediate_leftcorner_words.get(left, set())) + + @classmethod + def fromstring(cls, input, encoding=None): + """ + Return the grammar instance corresponding to the input string(s). + + :param input: a grammar, either in the form of a string or as a list of strings. + """ + start, productions = read_grammar( + input, standard_nonterm_parser, encoding=encoding + ) + return cls(start, productions) + + def start(self): + """ + Return the start symbol of the grammar + + :rtype: Nonterminal + """ + return self._start + + # tricky to balance readability and efficiency here! + # can't use set operations as they don't preserve ordering + def productions(self, lhs=None, rhs=None, empty=False): + """ + Return the grammar productions, filtered by the left-hand side + or the first item in the right-hand side. + + :param lhs: Only return productions with the given left-hand side. + :param rhs: Only return productions with the given first item + in the right-hand side. + :param empty: Only return productions with an empty right-hand side. + :return: A list of productions matching the given constraints. + :rtype: list(Production) + """ + if rhs and empty: + raise ValueError( + "You cannot select empty and non-empty " "productions at the same time." + ) + + # no constraints so return everything + if not lhs and not rhs: + if not empty: + return self._productions + else: + return self._empty_index.values() + + # only lhs specified so look up its index + elif lhs and not rhs: + if not empty: + return self._lhs_index.get(lhs, []) + elif lhs in self._empty_index: + return [self._empty_index[lhs]] + else: + return [] + + # only rhs specified so look up its index + elif rhs and not lhs: + return self._rhs_index.get(rhs, []) + + # intersect + else: + return [ + prod + for prod in self._lhs_index.get(lhs, []) + if prod in self._rhs_index.get(rhs, []) + ] + + def leftcorners(self, cat): + """ + Return the set of all nonterminals that the given nonterminal + can start with, including itself. + + This is the reflexive, transitive closure of the immediate + leftcorner relation: (A > B) iff (A -> B beta) + + :param cat: the parent of the leftcorners + :type cat: Nonterminal + :return: the set of all leftcorners + :rtype: set(Nonterminal) + """ + return self._leftcorners.get(cat, set([cat])) + + def is_leftcorner(self, cat, left): + """ + True if left is a leftcorner of cat, where left can be a + terminal or a nonterminal. + + :param cat: the parent of the leftcorner + :type cat: Nonterminal + :param left: the suggested leftcorner + :type left: Terminal or Nonterminal + :rtype: bool + """ + if is_nonterminal(left): + return left in self.leftcorners(cat) + elif self._leftcorner_words: + return left in self._leftcorner_words.get(cat, set()) + else: + return any( + left in self._immediate_leftcorner_words.get(parent, set()) + for parent in self.leftcorners(cat) + ) + + def leftcorner_parents(self, cat): + """ + Return the set of all nonterminals for which the given category + is a left corner. This is the inverse of the leftcorner relation. + + :param cat: the suggested leftcorner + :type cat: Nonterminal + :return: the set of all parents to the leftcorner + :rtype: set(Nonterminal) + """ + return self._leftcorner_parents.get(cat, set([cat])) + + def check_coverage(self, tokens): + """ + Check whether the grammar rules cover the given list of tokens. + If not, then raise an exception. + + :type tokens: list(str) + """ + missing = [tok for tok in tokens if not self._lexical_index.get(tok)] + if missing: + missing = ', '.join('%r' % (w,) for w in missing) + raise ValueError( + "Grammar does not cover some of the " "input words: %r." % missing + ) + + def _calculate_grammar_forms(self): + """ + Pre-calculate of which form(s) the grammar is. + """ + prods = self._productions + self._is_lexical = all(p.is_lexical() for p in prods) + self._is_nonlexical = all(p.is_nonlexical() for p in prods if len(p) != 1) + self._min_len = min(len(p) for p in prods) + self._max_len = max(len(p) for p in prods) + self._all_unary_are_lexical = all(p.is_lexical() for p in prods if len(p) == 1) + + def is_lexical(self): + """ + Return True if all productions are lexicalised. + """ + return self._is_lexical + + def is_nonlexical(self): + """ + Return True if all lexical rules are "preterminals", that is, + unary rules which can be separated in a preprocessing step. + + This means that all productions are of the forms + A -> B1 ... Bn (n>=0), or A -> "s". + + Note: is_lexical() and is_nonlexical() are not opposites. + There are grammars which are neither, and grammars which are both. + """ + return self._is_nonlexical + + def min_len(self): + """ + Return the right-hand side length of the shortest grammar production. + """ + return self._min_len + + def max_len(self): + """ + Return the right-hand side length of the longest grammar production. + """ + return self._max_len + + def is_nonempty(self): + """ + Return True if there are no empty productions. + """ + return self._min_len > 0 + + def is_binarised(self): + """ + Return True if all productions are at most binary. + Note that there can still be empty and unary productions. + """ + return self._max_len <= 2 + + def is_flexible_chomsky_normal_form(self): + """ + Return True if all productions are of the forms + A -> B C, A -> B, or A -> "s". + """ + return self.is_nonempty() and self.is_nonlexical() and self.is_binarised() + + def is_chomsky_normal_form(self): + """ + Return True if the grammar is of Chomsky Normal Form, i.e. all productions + are of the form A -> B C, or A -> "s". + """ + return self.is_flexible_chomsky_normal_form() and self._all_unary_are_lexical + + def chomsky_normal_form(self, new_token_padding='@$@', flexible=False): + """ + Returns a new Grammer that is in chomsky normal + :param: new_token_padding + Customise new rule formation during binarisation + """ + if self.is_chomsky_normal_form(): + return + if self.productions(empty=True): + raise ValueError(('Grammar has Empty rules. ' + 'Cannot deal with them at the moment')) + + # check for mixed rules + for rule in self.productions(): + if rule.is_lexical() and len(rule.rhs()) > 1: + raise ValueError( + 'Cannot handled mixed rule {} => {}'.format(rule.lhs(), + rule.rhs())) + + step1 = CFG.eliminate_start(self) + step2 = CFG.binarize(step1, new_token_padding) + if flexible: + return step2 + step3 = CFG.remove_unitary_rules(step2) + return step3 + + @classmethod + def remove_unitary_rules(cls, grammar): + """ + Remove nonlexical unitary rules and convert them to + lexical + """ + result = [] + unitary = [] + for rule in grammar.productions(): + if len(rule) == 1 and rule.is_nonlexical(): + unitary.append(rule) + else: + result.append(rule) + + while unitary: + rule = unitary.pop(0) + for item in grammar.productions(lhs=rule.rhs()[0]): + new_rule = Production(rule.lhs(), item.rhs()) + if len(new_rule) != 1 or new_rule.is_lexical(): + result.append(new_rule) + else: + unitary.append(new_rule) + + n_grammar = CFG(grammar.start(), result) + return n_grammar + + @classmethod + def binarize(cls, grammar, padding='@$@'): + """ + Convert all non-binary rules into binary by introducing + new tokens. + Example:: + Original: + A => B C D + After Conversion: + A => B A@$@B + A@$@B => C D + """ + result = [] + + for rule in grammar.productions(): + if len(rule.rhs()) > 2: + # this rule needs to be broken down + left_side = rule.lhs() + for k in range(0, len(rule.rhs()) - 2): + tsym = rule.rhs()[k] + new_sym = Nonterminal( + left_side.symbol() + padding + tsym.symbol() + ) + new_production = Production(left_side, (tsym, new_sym)) + left_side = new_sym + result.append(new_production) + last_prd = Production(left_side, rule.rhs()[-2:]) + result.append(last_prd) + else: + result.append(rule) + + n_grammar = CFG(grammar.start(), result) + return n_grammar + + @classmethod + def eliminate_start(cls, grammar): + """ + Eliminate start rule in case it appears on RHS + Example: S -> S0 S1 and S0 -> S1 S + Then another rule S0_Sigma -> S is added + """ + start = grammar.start() + result = [] + need_to_add = None + for rule in grammar.productions(): + if start in rule.rhs(): + need_to_add = True + result.append(rule) + if need_to_add: + start = Nonterminal('S0_SIGMA') + result.append(Production(start, grammar.start())) + n_grammar = CFG(start, result) + return n_grammar + return grammar + + def __repr__(self): + return '' % len(self._productions) + + def __str__(self): + result = 'Grammar with %d productions' % len(self._productions) + result += ' (start state = %r)' % self._start + for production in self._productions: + result += '\n %s' % production + return result + + +class FeatureGrammar(CFG): + """ + A feature-based grammar. This is equivalent to a + ``CFG`` whose nonterminals are all + ``FeatStructNonterminal``. + + A grammar consists of a start state and a set of + productions. The set of terminals and nonterminals + is implicitly specified by the productions. + """ + + def __init__(self, start, productions): + """ + Create a new feature-based grammar, from the given start + state and set of ``Productions``. + + :param start: The start symbol + :type start: FeatStructNonterminal + :param productions: The list of productions that defines the grammar + :type productions: list(Production) + """ + CFG.__init__(self, start, productions) + + # The difference with CFG is that the productions are + # indexed on the TYPE feature of the nonterminals. + # This is calculated by the method _get_type_if_possible(). + + def _calculate_indexes(self): + self._lhs_index = {} + self._rhs_index = {} + self._empty_index = {} + self._empty_productions = [] + self._lexical_index = {} + for prod in self._productions: + # Left hand side. + lhs = self._get_type_if_possible(prod._lhs) + if lhs not in self._lhs_index: + self._lhs_index[lhs] = [] + self._lhs_index[lhs].append(prod) + if prod._rhs: + # First item in right hand side. + rhs0 = self._get_type_if_possible(prod._rhs[0]) + if rhs0 not in self._rhs_index: + self._rhs_index[rhs0] = [] + self._rhs_index[rhs0].append(prod) + else: + # The right hand side is empty. + if lhs not in self._empty_index: + self._empty_index[lhs] = [] + self._empty_index[lhs].append(prod) + self._empty_productions.append(prod) + # Lexical tokens in the right hand side. + for token in prod._rhs: + if is_terminal(token): + self._lexical_index.setdefault(token, set()).add(prod) + + @classmethod + def fromstring( + cls, input, features=None, logic_parser=None, fstruct_reader=None, encoding=None + ): + """ + Return a feature structure based grammar. + + :param input: a grammar, either in the form of a string or else + as a list of strings. + :param features: a tuple of features (default: SLASH, TYPE) + :param logic_parser: a parser for lambda-expressions, + by default, ``LogicParser()`` + :param fstruct_reader: a feature structure parser + (only if features and logic_parser is None) + """ + if features is None: + features = (SLASH, TYPE) + + if fstruct_reader is None: + fstruct_reader = FeatStructReader( + features, FeatStructNonterminal, logic_parser=logic_parser + ) + elif logic_parser is not None: + raise Exception( + '\'logic_parser\' and \'fstruct_reader\' must ' 'not both be set' + ) + + start, productions = read_grammar( + input, fstruct_reader.read_partial, encoding=encoding + ) + return cls(start, productions) + + def productions(self, lhs=None, rhs=None, empty=False): + """ + Return the grammar productions, filtered by the left-hand side + or the first item in the right-hand side. + + :param lhs: Only return productions with the given left-hand side. + :param rhs: Only return productions with the given first item + in the right-hand side. + :param empty: Only return productions with an empty right-hand side. + :rtype: list(Production) + """ + if rhs and empty: + raise ValueError( + "You cannot select empty and non-empty " "productions at the same time." + ) + + # no constraints so return everything + if not lhs and not rhs: + if empty: + return self._empty_productions + else: + return self._productions + + # only lhs specified so look up its index + elif lhs and not rhs: + if empty: + return self._empty_index.get(self._get_type_if_possible(lhs), []) + else: + return self._lhs_index.get(self._get_type_if_possible(lhs), []) + + # only rhs specified so look up its index + elif rhs and not lhs: + return self._rhs_index.get(self._get_type_if_possible(rhs), []) + + # intersect + else: + return [ + prod + for prod in self._lhs_index.get(self._get_type_if_possible(lhs), []) + if prod in self._rhs_index.get(self._get_type_if_possible(rhs), []) + ] + + def leftcorners(self, cat): + """ + Return the set of all words that the given category can start with. + Also called the "first set" in compiler construction. + """ + raise NotImplementedError("Not implemented yet") + + def leftcorner_parents(self, cat): + """ + Return the set of all categories for which the given category + is a left corner. + """ + raise NotImplementedError("Not implemented yet") + + def _get_type_if_possible(self, item): + """ + Helper function which returns the ``TYPE`` feature of the ``item``, + if it exists, otherwise it returns the ``item`` itself + """ + if isinstance(item, dict) and TYPE in item: + return FeatureValueType(item[TYPE]) + else: + return item + + +@total_ordering +@python_2_unicode_compatible +class FeatureValueType(object): + """ + A helper class for ``FeatureGrammars``, designed to be different + from ordinary strings. This is to stop the ``FeatStruct`` + ``FOO[]`` from being compare equal to the terminal "FOO". + """ + + def __init__(self, value): + self._value = value + self._hash = hash(value) + + def __repr__(self): + return '<%s>' % self._value + + def __eq__(self, other): + return type(self) == type(other) and self._value == other._value + + def __ne__(self, other): + return not self == other + + def __lt__(self, other): + if not isinstance(other, FeatureValueType): + raise_unorderable_types("<", self, other) + return self._value < other._value + + def __hash__(self): + return self._hash + + +@python_2_unicode_compatible +class DependencyGrammar(object): + """ + A dependency grammar. A DependencyGrammar consists of a set of + productions. Each production specifies a head/modifier relationship + between a pair of words. + """ + + def __init__(self, productions): + """ + Create a new dependency grammar, from the set of ``Productions``. + + :param productions: The list of productions that defines the grammar + :type productions: list(Production) + """ + self._productions = productions + + @classmethod + def fromstring(cls, input): + productions = [] + for linenum, line in enumerate(input.split('\n')): + line = line.strip() + if line.startswith('#') or line == '': + continue + try: + productions += _read_dependency_production(line) + except ValueError: + raise ValueError('Unable to parse line %s: %s' % (linenum, line)) + if len(productions) == 0: + raise ValueError('No productions found!') + return cls(productions) + + def contains(self, head, mod): + """ + :param head: A head word. + :type head: str + :param mod: A mod word, to test as a modifier of 'head'. + :type mod: str + + :return: true if this ``DependencyGrammar`` contains a + ``DependencyProduction`` mapping 'head' to 'mod'. + :rtype: bool + """ + for production in self._productions: + for possibleMod in production._rhs: + if production._lhs == head and possibleMod == mod: + return True + return False + + def __contains__(self, head, mod): + """ + Return True if this ``DependencyGrammar`` contains a + ``DependencyProduction`` mapping 'head' to 'mod'. + + :param head: A head word. + :type head: str + :param mod: A mod word, to test as a modifier of 'head'. + :type mod: str + :rtype: bool + """ + for production in self._productions: + for possibleMod in production._rhs: + if production._lhs == head and possibleMod == mod: + return True + return False + + # # should be rewritten, the set comp won't work in all comparisons + # def contains_exactly(self, head, modlist): + # for production in self._productions: + # if(len(production._rhs) == len(modlist)): + # if(production._lhs == head): + # set1 = Set(production._rhs) + # set2 = Set(modlist) + # if(set1 == set2): + # return True + # return False + + def __str__(self): + """ + Return a verbose string representation of the ``DependencyGrammar`` + + :rtype: str + """ + str = 'Dependency grammar with %d productions' % len(self._productions) + for production in self._productions: + str += '\n %s' % production + return str + + def __repr__(self): + """ + Return a concise string representation of the ``DependencyGrammar`` + """ + return 'Dependency grammar with %d productions' % len(self._productions) + + +@python_2_unicode_compatible +class ProbabilisticDependencyGrammar(object): + """ + + """ + + def __init__(self, productions, events, tags): + self._productions = productions + self._events = events + self._tags = tags + + def contains(self, head, mod): + """ + Return True if this ``DependencyGrammar`` contains a + ``DependencyProduction`` mapping 'head' to 'mod'. + + :param head: A head word. + :type head: str + :param mod: A mod word, to test as a modifier of 'head'. + :type mod: str + :rtype: bool + """ + for production in self._productions: + for possibleMod in production._rhs: + if production._lhs == head and possibleMod == mod: + return True + return False + + def __str__(self): + """ + Return a verbose string representation of the ``ProbabilisticDependencyGrammar`` + + :rtype: str + """ + str = 'Statistical dependency grammar with %d productions' % len( + self._productions + ) + for production in self._productions: + str += '\n %s' % production + str += '\nEvents:' + for event in self._events: + str += '\n %d:%s' % (self._events[event], event) + str += '\nTags:' + for tag_word in self._tags: + str += '\n %s:\t(%s)' % (tag_word, self._tags[tag_word]) + return str + + def __repr__(self): + """ + Return a concise string representation of the ``ProbabilisticDependencyGrammar`` + """ + return 'Statistical Dependency grammar with %d productions' % len( + self._productions + ) + + +class PCFG(CFG): + """ + A probabilistic context-free grammar. A PCFG consists of a + start state and a set of productions with probabilities. The set of + terminals and nonterminals is implicitly specified by the productions. + + PCFG productions use the ``ProbabilisticProduction`` class. + ``PCFGs`` impose the constraint that the set of productions with + any given left-hand-side must have probabilities that sum to 1 + (allowing for a small margin of error). + + If you need efficient key-based access to productions, you can use + a subclass to implement it. + + :type EPSILON: float + :cvar EPSILON: The acceptable margin of error for checking that + productions with a given left-hand side have probabilities + that sum to 1. + """ + + EPSILON = 0.01 + + def __init__(self, start, productions, calculate_leftcorners=True): + """ + Create a new context-free grammar, from the given start state + and set of ``ProbabilisticProductions``. + + :param start: The start symbol + :type start: Nonterminal + :param productions: The list of productions that defines the grammar + :type productions: list(Production) + :raise ValueError: if the set of productions with any left-hand-side + do not have probabilities that sum to a value within + EPSILON of 1. + :param calculate_leftcorners: False if we don't want to calculate the + leftcorner relation. In that case, some optimized chart parsers won't work. + :type calculate_leftcorners: bool + """ + CFG.__init__(self, start, productions, calculate_leftcorners) + + # Make sure that the probabilities sum to one. + probs = {} + for production in productions: + probs[production.lhs()] = probs.get(production.lhs(), 0) + production.prob() + for (lhs, p) in probs.items(): + if not ((1 - PCFG.EPSILON) < p < (1 + PCFG.EPSILON)): + raise ValueError("Productions for %r do not sum to 1" % lhs) + + @classmethod + def fromstring(cls, input, encoding=None): + """ + Return a probabilistic context-free grammar corresponding to the + input string(s). + + :param input: a grammar, either in the form of a string or else + as a list of strings. + """ + start, productions = read_grammar( + input, standard_nonterm_parser, probabilistic=True, encoding=encoding + ) + return cls(start, productions) + + +################################################################# +# Inducing Grammars +################################################################# + +# Contributed by Nathan Bodenstab + + +def induce_pcfg(start, productions): + """ + Induce a PCFG grammar from a list of productions. + + The probability of a production A -> B C in a PCFG is: + + | count(A -> B C) + | P(B, C | A) = --------------- where \* is any right hand side + | count(A -> \*) + + :param start: The start symbol + :type start: Nonterminal + :param productions: The list of productions that defines the grammar + :type productions: list(Production) + """ + # Production count: the number of times a given production occurs + pcount = {} + + # LHS-count: counts the number of times a given lhs occurs + lcount = {} + + for prod in productions: + lcount[prod.lhs()] = lcount.get(prod.lhs(), 0) + 1 + pcount[prod] = pcount.get(prod, 0) + 1 + + prods = [ + ProbabilisticProduction(p.lhs(), p.rhs(), prob=pcount[p] / lcount[p.lhs()]) + for p in pcount + ] + return PCFG(start, prods) + + +################################################################# +# Helper functions for reading productions +################################################################# + + +def _read_cfg_production(input): + """ + Return a list of context-free ``Productions``. + """ + return _read_production(input, standard_nonterm_parser) + + +def _read_pcfg_production(input): + """ + Return a list of PCFG ``ProbabilisticProductions``. + """ + return _read_production(input, standard_nonterm_parser, probabilistic=True) + + +def _read_fcfg_production(input, fstruct_reader): + """ + Return a list of feature-based ``Productions``. + """ + return _read_production(input, fstruct_reader) + + +# Parsing generic grammars + +_ARROW_RE = re.compile(r'\s* -> \s*', re.VERBOSE) +_PROBABILITY_RE = re.compile(r'( \[ [\d\.]+ \] ) \s*', re.VERBOSE) +_TERMINAL_RE = re.compile(r'( "[^"]+" | \'[^\']+\' ) \s*', re.VERBOSE) +_DISJUNCTION_RE = re.compile(r'\| \s*', re.VERBOSE) + + +def _read_production(line, nonterm_parser, probabilistic=False): + """ + Parse a grammar rule, given as a string, and return + a list of productions. + """ + pos = 0 + + # Parse the left-hand side. + lhs, pos = nonterm_parser(line, pos) + + # Skip over the arrow. + m = _ARROW_RE.match(line, pos) + if not m: + raise ValueError('Expected an arrow') + pos = m.end() + + # Parse the right hand side. + probabilities = [0.0] + rhsides = [[]] + while pos < len(line): + # Probability. + m = _PROBABILITY_RE.match(line, pos) + if probabilistic and m: + pos = m.end() + probabilities[-1] = float(m.group(1)[1:-1]) + if probabilities[-1] > 1.0: + raise ValueError( + 'Production probability %f, ' + 'should not be greater than 1.0' % (probabilities[-1],) + ) + + # String -- add terminal. + elif line[pos] in "\'\"": + m = _TERMINAL_RE.match(line, pos) + if not m: + raise ValueError('Unterminated string') + rhsides[-1].append(m.group(1)[1:-1]) + pos = m.end() + + # Vertical bar -- start new rhside. + elif line[pos] == '|': + m = _DISJUNCTION_RE.match(line, pos) + probabilities.append(0.0) + rhsides.append([]) + pos = m.end() + + # Anything else -- nonterminal. + else: + nonterm, pos = nonterm_parser(line, pos) + rhsides[-1].append(nonterm) + + if probabilistic: + return [ + ProbabilisticProduction(lhs, rhs, prob=probability) + for (rhs, probability) in zip(rhsides, probabilities) + ] + else: + return [Production(lhs, rhs) for rhs in rhsides] + + +################################################################# +# Reading Phrase Structure Grammars +################################################################# + + +def read_grammar(input, nonterm_parser, probabilistic=False, encoding=None): + """ + Return a pair consisting of a starting category and a list of + ``Productions``. + + :param input: a grammar, either in the form of a string or else + as a list of strings. + :param nonterm_parser: a function for parsing nonterminals. + It should take a ``(string, position)`` as argument and + return a ``(nonterminal, position)`` as result. + :param probabilistic: are the grammar rules probabilistic? + :type probabilistic: bool + :param encoding: the encoding of the grammar, if it is a binary string + :type encoding: str + """ + if encoding is not None: + input = input.decode(encoding) + if isinstance(input, string_types): + lines = input.split('\n') + else: + lines = input + + start = None + productions = [] + continue_line = '' + for linenum, line in enumerate(lines): + line = continue_line + line.strip() + if line.startswith('#') or line == '': + continue + if line.endswith('\\'): + continue_line = line[:-1].rstrip() + ' ' + continue + continue_line = '' + try: + if line[0] == '%': + directive, args = line[1:].split(None, 1) + if directive == 'start': + start, pos = nonterm_parser(args, 0) + if pos != len(args): + raise ValueError('Bad argument to start directive') + else: + raise ValueError('Bad directive') + else: + # expand out the disjunctions on the RHS + productions += _read_production(line, nonterm_parser, probabilistic) + except ValueError as e: + raise ValueError('Unable to parse line %s: %s\n%s' % (linenum + 1, line, e)) + + if not productions: + raise ValueError('No productions found!') + if not start: + start = productions[0].lhs() + return (start, productions) + + +_STANDARD_NONTERM_RE = re.compile('( [\w/][\w/^<>-]* ) \s*', re.VERBOSE) + + +def standard_nonterm_parser(string, pos): + m = _STANDARD_NONTERM_RE.match(string, pos) + if not m: + raise ValueError('Expected a nonterminal, found: ' + string[pos:]) + return (Nonterminal(m.group(1)), m.end()) + + +################################################################# +# Reading Dependency Grammars +################################################################# + +_READ_DG_RE = re.compile( + r'''^\s* # leading whitespace + ('[^']+')\s* # single-quoted lhs + (?:[-=]+>)\s* # arrow + (?:( # rhs: + "[^"]+" # doubled-quoted terminal + | '[^']+' # single-quoted terminal + | \| # disjunction + ) + \s*) # trailing space + *$''', # zero or more copies + re.VERBOSE, +) +_SPLIT_DG_RE = re.compile(r'''('[^']'|[-=]+>|"[^"]+"|'[^']+'|\|)''') + + +def _read_dependency_production(s): + if not _READ_DG_RE.match(s): + raise ValueError('Bad production string') + pieces = _SPLIT_DG_RE.split(s) + pieces = [p for i, p in enumerate(pieces) if i % 2 == 1] + lhside = pieces[0].strip('\'\"') + rhsides = [[]] + for piece in pieces[2:]: + if piece == '|': + rhsides.append([]) + else: + rhsides[-1].append(piece.strip('\'\"')) + return [DependencyProduction(lhside, rhside) for rhside in rhsides] + + +################################################################# +# Demonstration +################################################################# + + +def cfg_demo(): + """ + A demonstration showing how ``CFGs`` can be created and used. + """ + + from nltk import nonterminals, Production, CFG + + # Create some nonterminals + S, NP, VP, PP = nonterminals('S, NP, VP, PP') + N, V, P, Det = nonterminals('N, V, P, Det') + VP_slash_NP = VP / NP + + print('Some nonterminals:', [S, NP, VP, PP, N, V, P, Det, VP / NP]) + print(' S.symbol() =>', repr(S.symbol())) + print() + + print(Production(S, [NP])) + + # Create some Grammar Productions + grammar = CFG.fromstring( + """ + S -> NP VP + PP -> P NP + NP -> Det N | NP PP + VP -> V NP | VP PP + Det -> 'a' | 'the' + N -> 'dog' | 'cat' + V -> 'chased' | 'sat' + P -> 'on' | 'in' + """ + ) + + print('A Grammar:', repr(grammar)) + print(' grammar.start() =>', repr(grammar.start())) + print(' grammar.productions() =>', end=' ') + # Use string.replace(...) is to line-wrap the output. + print(repr(grammar.productions()).replace(',', ',\n' + ' ' * 25)) + print() + + +toy_pcfg1 = PCFG.fromstring( + """ + S -> NP VP [1.0] + NP -> Det N [0.5] | NP PP [0.25] | 'John' [0.1] | 'I' [0.15] + Det -> 'the' [0.8] | 'my' [0.2] + N -> 'man' [0.5] | 'telescope' [0.5] + VP -> VP PP [0.1] | V NP [0.7] | V [0.2] + V -> 'ate' [0.35] | 'saw' [0.65] + PP -> P NP [1.0] + P -> 'with' [0.61] | 'under' [0.39] + """ +) + +toy_pcfg2 = PCFG.fromstring( + """ + S -> NP VP [1.0] + VP -> V NP [.59] + VP -> V [.40] + VP -> VP PP [.01] + NP -> Det N [.41] + NP -> Name [.28] + NP -> NP PP [.31] + PP -> P NP [1.0] + V -> 'saw' [.21] + V -> 'ate' [.51] + V -> 'ran' [.28] + N -> 'boy' [.11] + N -> 'cookie' [.12] + N -> 'table' [.13] + N -> 'telescope' [.14] + N -> 'hill' [.5] + Name -> 'Jack' [.52] + Name -> 'Bob' [.48] + P -> 'with' [.61] + P -> 'under' [.39] + Det -> 'the' [.41] + Det -> 'a' [.31] + Det -> 'my' [.28] + """ +) + + +def pcfg_demo(): + """ + A demonstration showing how a ``PCFG`` can be created and used. + """ + + from nltk.corpus import treebank + from nltk import treetransforms + from nltk import induce_pcfg + from nltk.parse import pchart + + pcfg_prods = toy_pcfg1.productions() + + pcfg_prod = pcfg_prods[2] + print('A PCFG production:', repr(pcfg_prod)) + print(' pcfg_prod.lhs() =>', repr(pcfg_prod.lhs())) + print(' pcfg_prod.rhs() =>', repr(pcfg_prod.rhs())) + print(' pcfg_prod.prob() =>', repr(pcfg_prod.prob())) + print() + + grammar = toy_pcfg2 + print('A PCFG grammar:', repr(grammar)) + print(' grammar.start() =>', repr(grammar.start())) + print(' grammar.productions() =>', end=' ') + # Use .replace(...) is to line-wrap the output. + print(repr(grammar.productions()).replace(',', ',\n' + ' ' * 26)) + print() + + # extract productions from three trees and induce the PCFG + print("Induce PCFG grammar from treebank data:") + + productions = [] + item = treebank._fileids[0] + for tree in treebank.parsed_sents(item)[:3]: + # perform optional tree transformations, e.g.: + tree.collapse_unary(collapsePOS=False) + tree.chomsky_normal_form(horzMarkov=2) + + productions += tree.productions() + + S = Nonterminal('S') + grammar = induce_pcfg(S, productions) + print(grammar) + print() + + print("Parse sentence using induced grammar:") + + parser = pchart.InsideChartParser(grammar) + parser.trace(3) + + # doesn't work as tokens are different: + # sent = treebank.tokenized('wsj_0001.mrg')[0] + + sent = treebank.parsed_sents(item)[0].leaves() + print(sent) + for parse in parser.parse(sent): + print(parse) + + +def fcfg_demo(): + import nltk.data + + g = nltk.data.load('grammars/book_grammars/feat0.fcfg') + print(g) + print() + + +def dg_demo(): + """ + A demonstration showing the creation and inspection of a + ``DependencyGrammar``. + """ + grammar = DependencyGrammar.fromstring( + """ + 'scratch' -> 'cats' | 'walls' + 'walls' -> 'the' + 'cats' -> 'the' + """ + ) + print(grammar) + + +def sdg_demo(): + """ + A demonstration of how to read a string representation of + a CoNLL format dependency tree. + """ + from nltk.parse import DependencyGraph + + dg = DependencyGraph( + """ + 1 Ze ze Pron Pron per|3|evofmv|nom 2 su _ _ + 2 had heb V V trans|ovt|1of2of3|ev 0 ROOT _ _ + 3 met met Prep Prep voor 8 mod _ _ + 4 haar haar Pron Pron bez|3|ev|neut|attr 5 det _ _ + 5 moeder moeder N N soort|ev|neut 3 obj1 _ _ + 6 kunnen kan V V hulp|ott|1of2of3|mv 2 vc _ _ + 7 gaan ga V V hulp|inf 6 vc _ _ + 8 winkelen winkel V V intrans|inf 11 cnj _ _ + 9 , , Punc Punc komma 8 punct _ _ + 10 zwemmen zwem V V intrans|inf 11 cnj _ _ + 11 of of Conj Conj neven 7 vc _ _ + 12 terrassen terras N N soort|mv|neut 11 cnj _ _ + 13 . . Punc Punc punt 12 punct _ _ + """ + ) + tree = dg.tree() + print(tree.pprint()) + + +def demo(): + cfg_demo() + pcfg_demo() + fcfg_demo() + dg_demo() + sdg_demo() + + +if __name__ == '__main__': + demo() + +__all__ = [ + 'Nonterminal', + 'nonterminals', + 'CFG', + 'Production', + 'PCFG', + 'ProbabilisticProduction', + 'DependencyGrammar', + 'DependencyProduction', + 'ProbabilisticDependencyGrammar', + 'induce_pcfg', + 'read_grammar', +] diff --git a/venv.bak/lib/python3.7/site-packages/nltk/help.py b/venv.bak/lib/python3.7/site-packages/nltk/help.py new file mode 100644 index 0000000..27671e8 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/help.py @@ -0,0 +1,65 @@ +# Natural Language Toolkit (NLTK) Help +# +# Copyright (C) 2001-2019 NLTK Project +# Authors: Steven Bird +# URL: +# For license information, see LICENSE.TXT + +""" +Provide structured access to documentation. +""" +from __future__ import print_function + +import re +from textwrap import wrap + +from nltk.data import load + + +def brown_tagset(tagpattern=None): + _format_tagset("brown_tagset", tagpattern) + + +def claws5_tagset(tagpattern=None): + _format_tagset("claws5_tagset", tagpattern) + + +def upenn_tagset(tagpattern=None): + _format_tagset("upenn_tagset", tagpattern) + + +##################################################################### +# UTILITIES +##################################################################### + + +def _print_entries(tags, tagdict): + for tag in tags: + entry = tagdict[tag] + defn = [tag + ": " + entry[0]] + examples = wrap( + entry[1], width=75, initial_indent=' ', subsequent_indent=' ' + ) + print("\n".join(defn + examples)) + + +def _format_tagset(tagset, tagpattern=None): + tagdict = load("help/tagsets/" + tagset + ".pickle") + if not tagpattern: + _print_entries(sorted(tagdict), tagdict) + elif tagpattern in tagdict: + _print_entries([tagpattern], tagdict) + else: + tagpattern = re.compile(tagpattern) + tags = [tag for tag in sorted(tagdict) if tagpattern.match(tag)] + if tags: + _print_entries(tags, tagdict) + else: + print("No matching tags found.") + + +if __name__ == '__main__': + brown_tagset(r'NN.*') + upenn_tagset(r'.*\$') + claws5_tagset('UNDEFINED') + brown_tagset(r'NN') diff --git a/venv.bak/lib/python3.7/site-packages/nltk/inference/__init__.py b/venv.bak/lib/python3.7/site-packages/nltk/inference/__init__.py new file mode 100644 index 0000000..d79c935 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/inference/__init__.py @@ -0,0 +1,24 @@ +# Natural Language Toolkit: Inference +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Dan Garrette +# Ewan Klein +# +# URL: +# For license information, see LICENSE.TXT + +""" +Classes and interfaces for theorem proving and model building. +""" + +from nltk.inference.api import ParallelProverBuilder, ParallelProverBuilderCommand +from nltk.inference.mace import Mace, MaceCommand +from nltk.inference.prover9 import Prover9, Prover9Command +from nltk.inference.resolution import ResolutionProver, ResolutionProverCommand +from nltk.inference.tableau import TableauProver, TableauProverCommand +from nltk.inference.discourse import ( + ReadingCommand, + CfgReadingCommand, + DrtGlueReadingCommand, + DiscourseTester, +) diff --git a/venv.bak/lib/python3.7/site-packages/nltk/inference/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/inference/__pycache__/__init__.cpython-37.pyc new file mode 100644 index 0000000..cbbc048 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/inference/__pycache__/__init__.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/inference/__pycache__/api.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/inference/__pycache__/api.cpython-37.pyc new file mode 100644 index 0000000..97e847c Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/inference/__pycache__/api.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/inference/__pycache__/discourse.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/inference/__pycache__/discourse.cpython-37.pyc new file mode 100644 index 0000000..0ea61d6 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/inference/__pycache__/discourse.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/inference/__pycache__/mace.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/inference/__pycache__/mace.cpython-37.pyc new file mode 100644 index 0000000..64ef27c Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/inference/__pycache__/mace.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/inference/__pycache__/nonmonotonic.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/inference/__pycache__/nonmonotonic.cpython-37.pyc new file mode 100644 index 0000000..838cf8e Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/inference/__pycache__/nonmonotonic.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/inference/__pycache__/prover9.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/inference/__pycache__/prover9.cpython-37.pyc new file mode 100644 index 0000000..6a51f50 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/inference/__pycache__/prover9.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/inference/__pycache__/resolution.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/inference/__pycache__/resolution.cpython-37.pyc new file mode 100644 index 0000000..f995073 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/inference/__pycache__/resolution.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/inference/__pycache__/tableau.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/inference/__pycache__/tableau.cpython-37.pyc new file mode 100644 index 0000000..1a5ede9 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/inference/__pycache__/tableau.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/inference/api.py b/venv.bak/lib/python3.7/site-packages/nltk/inference/api.py new file mode 100644 index 0000000..3bc8ad3 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/inference/api.py @@ -0,0 +1,620 @@ +# Natural Language Toolkit: Classifier Interface +# +# Author: Ewan Klein +# Dan Garrette +# +# URL: +# For license information, see LICENSE.TXT + +""" +Interfaces and base classes for theorem provers and model builders. + +``Prover`` is a standard interface for a theorem prover which tries to prove a goal from a +list of assumptions. + +``ModelBuilder`` is a standard interface for a model builder. Given just a set of assumptions. +the model builder tries to build a model for the assumptions. Given a set of assumptions and a +goal *G*, the model builder tries to find a counter-model, in the sense of a model that will satisfy +the assumptions plus the negation of *G*. +""" +from __future__ import print_function + +from abc import ABCMeta, abstractmethod +import threading +import time + +from six import add_metaclass + + +@add_metaclass(ABCMeta) +class Prover(object): + """ + Interface for trying to prove a goal from assumptions. Both the goal and + the assumptions are constrained to be formulas of ``logic.Expression``. + """ + + def prove(self, goal=None, assumptions=None, verbose=False): + """ + :return: Whether the proof was successful or not. + :rtype: bool + """ + return self._prove(goal, assumptions, verbose)[0] + + @abstractmethod + def _prove(self, goal=None, assumptions=None, verbose=False): + """ + :return: Whether the proof was successful or not, along with the proof + :rtype: tuple: (bool, str) + """ + + +@add_metaclass(ABCMeta) +class ModelBuilder(object): + """ + Interface for trying to build a model of set of formulas. + Open formulas are assumed to be universally quantified. + Both the goal and the assumptions are constrained to be formulas + of ``logic.Expression``. + """ + + def build_model(self, goal=None, assumptions=None, verbose=False): + """ + Perform the actual model building. + :return: Whether a model was generated + :rtype: bool + """ + return self._build_model(goal, assumptions, verbose)[0] + + @abstractmethod + def _build_model(self, goal=None, assumptions=None, verbose=False): + """ + Perform the actual model building. + :return: Whether a model was generated, and the model itself + :rtype: tuple(bool, sem.Valuation) + """ + + +@add_metaclass(ABCMeta) +class TheoremToolCommand(object): + """ + This class holds a goal and a list of assumptions to be used in proving + or model building. + """ + + @abstractmethod + def add_assumptions(self, new_assumptions): + """ + Add new assumptions to the assumption list. + + :param new_assumptions: new assumptions + :type new_assumptions: list(sem.Expression) + """ + + @abstractmethod + def retract_assumptions(self, retracted, debug=False): + """ + Retract assumptions from the assumption list. + + :param debug: If True, give warning when ``retracted`` is not present on + assumptions list. + :type debug: bool + :param retracted: assumptions to be retracted + :type retracted: list(sem.Expression) + """ + + @abstractmethod + def assumptions(self): + """ + List the current assumptions. + + :return: list of ``Expression`` + """ + + @abstractmethod + def goal(self): + """ + Return the goal + + :return: ``Expression`` + """ + + @abstractmethod + def print_assumptions(self): + """ + Print the list of the current assumptions. + """ + + +class ProverCommand(TheoremToolCommand): + """ + This class holds a ``Prover``, a goal, and a list of assumptions. When + prove() is called, the ``Prover`` is executed with the goal and assumptions. + """ + + @abstractmethod + def prove(self, verbose=False): + """ + Perform the actual proof. + """ + + @abstractmethod + def proof(self, simplify=True): + """ + Return the proof string + :param simplify: bool simplify the proof? + :return: str + """ + + @abstractmethod + def get_prover(self): + """ + Return the prover object + :return: ``Prover`` + """ + + +class ModelBuilderCommand(TheoremToolCommand): + """ + This class holds a ``ModelBuilder``, a goal, and a list of assumptions. + When build_model() is called, the ``ModelBuilder`` is executed with the goal + and assumptions. + """ + + @abstractmethod + def build_model(self, verbose=False): + """ + Perform the actual model building. + :return: A model if one is generated; None otherwise. + :rtype: sem.Valuation + """ + + @abstractmethod + def model(self, format=None): + """ + Return a string representation of the model + + :param simplify: bool simplify the proof? + :return: str + """ + + @abstractmethod + def get_model_builder(self): + """ + Return the model builder object + :return: ``ModelBuilder`` + """ + + +class BaseTheoremToolCommand(TheoremToolCommand): + """ + This class holds a goal and a list of assumptions to be used in proving + or model building. + """ + + def __init__(self, goal=None, assumptions=None): + """ + :param goal: Input expression to prove + :type goal: sem.Expression + :param assumptions: Input expressions to use as assumptions in + the proof. + :type assumptions: list(sem.Expression) + """ + self._goal = goal + + if not assumptions: + self._assumptions = [] + else: + self._assumptions = list(assumptions) + + self._result = None + """A holder for the result, to prevent unnecessary re-proving""" + + def add_assumptions(self, new_assumptions): + """ + Add new assumptions to the assumption list. + + :param new_assumptions: new assumptions + :type new_assumptions: list(sem.Expression) + """ + self._assumptions.extend(new_assumptions) + self._result = None + + def retract_assumptions(self, retracted, debug=False): + """ + Retract assumptions from the assumption list. + + :param debug: If True, give warning when ``retracted`` is not present on + assumptions list. + :type debug: bool + :param retracted: assumptions to be retracted + :type retracted: list(sem.Expression) + """ + retracted = set(retracted) + result_list = list(filter(lambda a: a not in retracted, self._assumptions)) + if debug and result_list == self._assumptions: + print(Warning("Assumptions list has not been changed:")) + self.print_assumptions() + + self._assumptions = result_list + + self._result = None + + def assumptions(self): + """ + List the current assumptions. + + :return: list of ``Expression`` + """ + return self._assumptions + + def goal(self): + """ + Return the goal + + :return: ``Expression`` + """ + return self._goal + + def print_assumptions(self): + """ + Print the list of the current assumptions. + """ + for a in self.assumptions(): + print(a) + + +class BaseProverCommand(BaseTheoremToolCommand, ProverCommand): + """ + This class holds a ``Prover``, a goal, and a list of assumptions. When + prove() is called, the ``Prover`` is executed with the goal and assumptions. + """ + + def __init__(self, prover, goal=None, assumptions=None): + """ + :param prover: The theorem tool to execute with the assumptions + :type prover: Prover + :see: ``BaseTheoremToolCommand`` + """ + self._prover = prover + """The theorem tool to execute with the assumptions""" + + BaseTheoremToolCommand.__init__(self, goal, assumptions) + + self._proof = None + + def prove(self, verbose=False): + """ + Perform the actual proof. Store the result to prevent unnecessary + re-proving. + """ + if self._result is None: + self._result, self._proof = self._prover._prove( + self.goal(), self.assumptions(), verbose + ) + return self._result + + def proof(self, simplify=True): + """ + Return the proof string + :param simplify: bool simplify the proof? + :return: str + """ + if self._result is None: + raise LookupError("You have to call prove() first to get a proof!") + else: + return self.decorate_proof(self._proof, simplify) + + def decorate_proof(self, proof_string, simplify=True): + """ + Modify and return the proof string + :param proof_string: str the proof to decorate + :param simplify: bool simplify the proof? + :return: str + """ + return proof_string + + def get_prover(self): + return self._prover + + +class BaseModelBuilderCommand(BaseTheoremToolCommand, ModelBuilderCommand): + """ + This class holds a ``ModelBuilder``, a goal, and a list of assumptions. When + build_model() is called, the ``ModelBuilder`` is executed with the goal and + assumptions. + """ + + def __init__(self, modelbuilder, goal=None, assumptions=None): + """ + :param modelbuilder: The theorem tool to execute with the assumptions + :type modelbuilder: ModelBuilder + :see: ``BaseTheoremToolCommand`` + """ + self._modelbuilder = modelbuilder + """The theorem tool to execute with the assumptions""" + + BaseTheoremToolCommand.__init__(self, goal, assumptions) + + self._model = None + + def build_model(self, verbose=False): + """ + Attempt to build a model. Store the result to prevent unnecessary + re-building. + """ + if self._result is None: + self._result, self._model = self._modelbuilder._build_model( + self.goal(), self.assumptions(), verbose + ) + return self._result + + def model(self, format=None): + """ + Return a string representation of the model + + :param simplify: bool simplify the proof? + :return: str + """ + if self._result is None: + raise LookupError('You have to call build_model() first to ' 'get a model!') + else: + return self._decorate_model(self._model, format) + + def _decorate_model(self, valuation_str, format=None): + """ + :param valuation_str: str with the model builder's output + :param format: str indicating the format for displaying + :return: str + """ + return valuation_str + + def get_model_builder(self): + return self._modelbuilder + + +class TheoremToolCommandDecorator(TheoremToolCommand): + """ + A base decorator for the ``ProverCommandDecorator`` and + ``ModelBuilderCommandDecorator`` classes from which decorators can extend. + """ + + def __init__(self, command): + """ + :param command: ``TheoremToolCommand`` to decorate + """ + self._command = command + + # The decorator has its own versions of 'result' different from the + # underlying command + self._result = None + + def assumptions(self): + return self._command.assumptions() + + def goal(self): + return self._command.goal() + + def add_assumptions(self, new_assumptions): + self._command.add_assumptions(new_assumptions) + self._result = None + + def retract_assumptions(self, retracted, debug=False): + self._command.retract_assumptions(retracted, debug) + self._result = None + + def print_assumptions(self): + self._command.print_assumptions() + + +class ProverCommandDecorator(TheoremToolCommandDecorator, ProverCommand): + """ + A base decorator for the ``ProverCommand`` class from which other + prover command decorators can extend. + """ + + def __init__(self, proverCommand): + """ + :param proverCommand: ``ProverCommand`` to decorate + """ + TheoremToolCommandDecorator.__init__(self, proverCommand) + + # The decorator has its own versions of 'result' and 'proof' + # because they may be different from the underlying command + self._proof = None + + def prove(self, verbose=False): + if self._result is None: + prover = self.get_prover() + self._result, self._proof = prover._prove( + self.goal(), self.assumptions(), verbose + ) + return self._result + + def proof(self, simplify=True): + """ + Return the proof string + :param simplify: bool simplify the proof? + :return: str + """ + if self._result is None: + raise LookupError("You have to call prove() first to get a proof!") + else: + return self.decorate_proof(self._proof, simplify) + + def decorate_proof(self, proof_string, simplify=True): + """ + Modify and return the proof string + :param proof_string: str the proof to decorate + :param simplify: bool simplify the proof? + :return: str + """ + return self._command.decorate_proof(proof_string, simplify) + + def get_prover(self): + return self._command.get_prover() + + +class ModelBuilderCommandDecorator(TheoremToolCommandDecorator, ModelBuilderCommand): + """ + A base decorator for the ``ModelBuilderCommand`` class from which other + prover command decorators can extend. + """ + + def __init__(self, modelBuilderCommand): + """ + :param modelBuilderCommand: ``ModelBuilderCommand`` to decorate + """ + TheoremToolCommandDecorator.__init__(self, modelBuilderCommand) + + # The decorator has its own versions of 'result' and 'valuation' + # because they may be different from the underlying command + self._model = None + + def build_model(self, verbose=False): + """ + Attempt to build a model. Store the result to prevent unnecessary + re-building. + """ + if self._result is None: + modelbuilder = self.get_model_builder() + self._result, self._model = modelbuilder._build_model( + self.goal(), self.assumptions(), verbose + ) + return self._result + + def model(self, format=None): + """ + Return a string representation of the model + + :param simplify: bool simplify the proof? + :return: str + """ + if self._result is None: + raise LookupError('You have to call build_model() first to ' 'get a model!') + else: + return self._decorate_model(self._model, format) + + def _decorate_model(self, valuation_str, format=None): + """ + Modify and return the proof string + :param valuation_str: str with the model builder's output + :param format: str indicating the format for displaying + :return: str + """ + return self._command._decorate_model(valuation_str, format) + + def get_model_builder(self): + return self._command.get_prover() + + +class ParallelProverBuilder(Prover, ModelBuilder): + """ + This class stores both a prover and a model builder and when either + prove() or build_model() is called, then both theorem tools are run in + parallel. Whichever finishes first, the prover or the model builder, is the + result that will be used. + """ + + def __init__(self, prover, modelbuilder): + self._prover = prover + self._modelbuilder = modelbuilder + + def _prove(self, goal=None, assumptions=None, verbose=False): + return self._run(goal, assumptions, verbose), '' + + def _build_model(self, goal=None, assumptions=None, verbose=False): + return not self._run(goal, assumptions, verbose), '' + + def _run(self, goal, assumptions, verbose): + # Set up two thread, Prover and ModelBuilder to run in parallel + tp_thread = TheoremToolThread( + lambda: self._prover.prove(goal, assumptions, verbose), verbose, 'TP' + ) + mb_thread = TheoremToolThread( + lambda: self._modelbuilder.build_model(goal, assumptions, verbose), + verbose, + 'MB', + ) + + tp_thread.start() + mb_thread.start() + + while tp_thread.isAlive() and mb_thread.isAlive(): + # wait until either the prover or the model builder is done + pass + + if tp_thread.result is not None: + return tp_thread.result + elif mb_thread.result is not None: + return not mb_thread.result + else: + return None + + +class ParallelProverBuilderCommand(BaseProverCommand, BaseModelBuilderCommand): + """ + This command stores both a prover and a model builder and when either + prove() or build_model() is called, then both theorem tools are run in + parallel. Whichever finishes first, the prover or the model builder, is the + result that will be used. + + Because the theorem prover result is the opposite of the model builder + result, we will treat self._result as meaning "proof found/no model found". + """ + + def __init__(self, prover, modelbuilder, goal=None, assumptions=None): + BaseProverCommand.__init__(self, prover, goal, assumptions) + BaseModelBuilderCommand.__init__(self, modelbuilder, goal, assumptions) + + def prove(self, verbose=False): + return self._run(verbose) + + def build_model(self, verbose=False): + return not self._run(verbose) + + def _run(self, verbose): + # Set up two thread, Prover and ModelBuilder to run in parallel + tp_thread = TheoremToolThread( + lambda: BaseProverCommand.prove(self, verbose), verbose, 'TP' + ) + mb_thread = TheoremToolThread( + lambda: BaseModelBuilderCommand.build_model(self, verbose), verbose, 'MB' + ) + + tp_thread.start() + mb_thread.start() + + while tp_thread.isAlive() and mb_thread.isAlive(): + # wait until either the prover or the model builder is done + pass + + if tp_thread.result is not None: + self._result = tp_thread.result + elif mb_thread.result is not None: + self._result = not mb_thread.result + return self._result + + +class TheoremToolThread(threading.Thread): + def __init__(self, command, verbose, name=None): + threading.Thread.__init__(self) + self._command = command + self._result = None + self._verbose = verbose + self._name = name + + def run(self): + try: + self._result = self._command() + if self._verbose: + print( + 'Thread %s finished with result %s at %s' + % (self._name, self._result, time.localtime(time.time())) + ) + except Exception as e: + print(e) + print('Thread %s completed abnormally' % (self._name)) + + @property + def result(self): + return self._result diff --git a/venv.bak/lib/python3.7/site-packages/nltk/inference/discourse.py b/venv.bak/lib/python3.7/site-packages/nltk/inference/discourse.py new file mode 100644 index 0000000..7dad02d --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/inference/discourse.py @@ -0,0 +1,670 @@ +# Natural Language Toolkit: Discourse Processing +# +# Author: Ewan Klein +# Dan Garrette +# +# URL: +# For license information, see LICENSE.TXT + +""" +Module for incrementally developing simple discourses, and checking for semantic ambiguity, +consistency and informativeness. + +Many of the ideas are based on the CURT family of programs of Blackburn and Bos +(see http://homepages.inf.ed.ac.uk/jbos/comsem/book1.html). + +Consistency checking is carried out by using the ``mace`` module to call the Mace4 model builder. +Informativeness checking is carried out with a call to ``Prover.prove()`` from +the ``inference`` module. + +``DiscourseTester`` is a constructor for discourses. +The basic data structure is a list of sentences, stored as ``self._sentences``. Each sentence in the list +is assigned a "sentence ID" (``sid``) of the form ``s``\ *i*. For example:: + + s0: A boxer walks + s1: Every boxer chases a girl + +Each sentence can be ambiguous between a number of readings, each of which receives a +"reading ID" (``rid``) of the form ``s``\ *i* -``r``\ *j*. For example:: + + s0 readings: + + s0-r1: some x.(boxer(x) & walk(x)) + s0-r0: some x.(boxerdog(x) & walk(x)) + +A "thread" is a list of readings, represented as a list of ``rid``\ s. +Each thread receives a "thread ID" (``tid``) of the form ``d``\ *i*. +For example:: + + d0: ['s0-r0', 's1-r0'] + +The set of all threads for a discourse is the Cartesian product of all the readings of the sequences of sentences. +(This is not intended to scale beyond very short discourses!) The method ``readings(filter=True)`` will only show +those threads which are consistent (taking into account any background assumptions). +""" +from __future__ import print_function + +import os +from abc import ABCMeta, abstractmethod +from operator import and_, add +from functools import reduce + +from six import add_metaclass + +from nltk.data import show_cfg +from nltk.tag import RegexpTagger +from nltk.parse import load_parser +from nltk.parse.malt import MaltParser +from nltk.sem.drt import resolve_anaphora, AnaphoraResolutionException +from nltk.sem.glue import DrtGlue +from nltk.sem.logic import Expression + +from nltk.inference.mace import MaceCommand +from nltk.inference.prover9 import Prover9Command + + +@add_metaclass(ABCMeta) +class ReadingCommand(object): + @abstractmethod + def parse_to_readings(self, sentence): + """ + :param sentence: the sentence to read + :type sentence: str + """ + + def process_thread(self, sentence_readings): + """ + This method should be used to handle dependencies between readings such + as resolving anaphora. + + :param sentence_readings: readings to process + :type sentence_readings: list(Expression) + :return: the list of readings after processing + :rtype: list(Expression) + """ + return sentence_readings + + @abstractmethod + def combine_readings(self, readings): + """ + :param readings: readings to combine + :type readings: list(Expression) + :return: one combined reading + :rtype: Expression + """ + + @abstractmethod + def to_fol(self, expression): + """ + Convert this expression into a First-Order Logic expression. + + :param expression: an expression + :type expression: Expression + :return: a FOL version of the input expression + :rtype: Expression + """ + + +class CfgReadingCommand(ReadingCommand): + def __init__(self, gramfile=None): + """ + :param gramfile: name of file where grammar can be loaded + :type gramfile: str + """ + self._gramfile = ( + gramfile if gramfile else 'grammars/book_grammars/discourse.fcfg' + ) + self._parser = load_parser(self._gramfile) + + def parse_to_readings(self, sentence): + """:see: ReadingCommand.parse_to_readings()""" + from nltk.sem import root_semrep + + tokens = sentence.split() + trees = self._parser.parse(tokens) + return [root_semrep(tree) for tree in trees] + + def combine_readings(self, readings): + """:see: ReadingCommand.combine_readings()""" + return reduce(and_, readings) + + def to_fol(self, expression): + """:see: ReadingCommand.to_fol()""" + return expression + + +class DrtGlueReadingCommand(ReadingCommand): + def __init__(self, semtype_file=None, remove_duplicates=False, depparser=None): + """ + :param semtype_file: name of file where grammar can be loaded + :param remove_duplicates: should duplicates be removed? + :param depparser: the dependency parser + """ + if semtype_file is None: + semtype_file = os.path.join( + 'grammars', 'sample_grammars', 'drt_glue.semtype' + ) + self._glue = DrtGlue( + semtype_file=semtype_file, + remove_duplicates=remove_duplicates, + depparser=depparser, + ) + + def parse_to_readings(self, sentence): + """:see: ReadingCommand.parse_to_readings()""" + return self._glue.parse_to_meaning(sentence) + + def process_thread(self, sentence_readings): + """:see: ReadingCommand.process_thread()""" + try: + return [self.combine_readings(sentence_readings)] + except AnaphoraResolutionException: + return [] + + def combine_readings(self, readings): + """:see: ReadingCommand.combine_readings()""" + thread_reading = reduce(add, readings) + return resolve_anaphora(thread_reading.simplify()) + + def to_fol(self, expression): + """:see: ReadingCommand.to_fol()""" + return expression.fol() + + +class DiscourseTester(object): + """ + Check properties of an ongoing discourse. + """ + + def __init__(self, input, reading_command=None, background=None): + """ + Initialize a ``DiscourseTester``. + + :param input: the discourse sentences + :type input: list of str + :param background: Formulas which express background assumptions + :type background: list(Expression) + """ + self._input = input + self._sentences = dict([('s%s' % i, sent) for i, sent in enumerate(input)]) + self._models = None + self._readings = {} + self._reading_command = ( + reading_command if reading_command else CfgReadingCommand() + ) + self._threads = {} + self._filtered_threads = {} + if background is not None: + from nltk.sem.logic import Expression + + for e in background: + assert isinstance(e, Expression) + self._background = background + else: + self._background = [] + + ############################### + # Sentences + ############################### + + def sentences(self): + """ + Display the list of sentences in the current discourse. + """ + for id in sorted(self._sentences): + print("%s: %s" % (id, self._sentences[id])) + + def add_sentence(self, sentence, informchk=False, consistchk=False): + """ + Add a sentence to the current discourse. + + Updates ``self._input`` and ``self._sentences``. + :param sentence: An input sentence + :type sentence: str + :param informchk: if ``True``, check that the result of adding the sentence is thread-informative. Updates ``self._readings``. + :param consistchk: if ``True``, check that the result of adding the sentence is thread-consistent. Updates ``self._readings``. + + """ + # check whether the new sentence is informative (i.e. not entailed by the previous discourse) + if informchk: + self.readings(verbose=False) + for tid in sorted(self._threads): + assumptions = [reading for (rid, reading) in self.expand_threads(tid)] + assumptions += self._background + for sent_reading in self._get_readings(sentence): + tp = Prover9Command(goal=sent_reading, assumptions=assumptions) + if tp.prove(): + print( + "Sentence '%s' under reading '%s':" + % (sentence, str(sent_reading)) + ) + print("Not informative relative to thread '%s'" % tid) + + self._input.append(sentence) + self._sentences = dict( + [('s%s' % i, sent) for i, sent in enumerate(self._input)] + ) + # check whether adding the new sentence to the discourse preserves consistency (i.e. a model can be found for the combined set of + # of assumptions + if consistchk: + self.readings(verbose=False) + self.models(show=False) + + def retract_sentence(self, sentence, verbose=True): + """ + Remove a sentence from the current discourse. + + Updates ``self._input``, ``self._sentences`` and ``self._readings``. + :param sentence: An input sentence + :type sentence: str + :param verbose: If ``True``, report on the updated list of sentences. + """ + try: + self._input.remove(sentence) + except ValueError: + print( + "Retraction failed. The sentence '%s' is not part of the current discourse:" + % sentence + ) + self.sentences() + return None + self._sentences = dict( + [('s%s' % i, sent) for i, sent in enumerate(self._input)] + ) + self.readings(verbose=False) + if verbose: + print("Current sentences are ") + self.sentences() + + def grammar(self): + """ + Print out the grammar in use for parsing input sentences + """ + show_cfg(self._reading_command._gramfile) + + ############################### + # Readings and Threads + ############################### + + def _get_readings(self, sentence): + """ + Build a list of semantic readings for a sentence. + + :rtype: list(Expression) + """ + return self._reading_command.parse_to_readings(sentence) + + def _construct_readings(self): + """ + Use ``self._sentences`` to construct a value for ``self._readings``. + """ + # re-initialize self._readings in case we have retracted a sentence + self._readings = {} + for sid in sorted(self._sentences): + sentence = self._sentences[sid] + readings = self._get_readings(sentence) + self._readings[sid] = dict( + [ + ("%s-r%s" % (sid, rid), reading.simplify()) + for rid, reading in enumerate(sorted(readings, key=str)) + ] + ) + + def _construct_threads(self): + """ + Use ``self._readings`` to construct a value for ``self._threads`` + and use the model builder to construct a value for ``self._filtered_threads`` + """ + thread_list = [[]] + for sid in sorted(self._readings): + thread_list = self.multiply(thread_list, sorted(self._readings[sid])) + self._threads = dict( + [("d%s" % tid, thread) for tid, thread in enumerate(thread_list)] + ) + # re-initialize the filtered threads + self._filtered_threads = {} + # keep the same ids, but only include threads which get models + consistency_checked = self._check_consistency(self._threads) + for (tid, thread) in self._threads.items(): + if (tid, True) in consistency_checked: + self._filtered_threads[tid] = thread + + def _show_readings(self, sentence=None): + """ + Print out the readings for the discourse (or a single sentence). + """ + if sentence is not None: + print("The sentence '%s' has these readings:" % sentence) + for r in [str(reading) for reading in (self._get_readings(sentence))]: + print(" %s" % r) + else: + for sid in sorted(self._readings): + print() + print('%s readings:' % sid) + print() #'-' * 30 + for rid in sorted(self._readings[sid]): + lf = self._readings[sid][rid] + print("%s: %s" % (rid, lf.normalize())) + + def _show_threads(self, filter=False, show_thread_readings=False): + """ + Print out the value of ``self._threads`` or ``self._filtered_hreads`` + """ + threads = self._filtered_threads if filter else self._threads + for tid in sorted(threads): + if show_thread_readings: + readings = [ + self._readings[rid.split('-')[0]][rid] for rid in self._threads[tid] + ] + try: + thread_reading = ( + ": %s" + % self._reading_command.combine_readings(readings).normalize() + ) + except Exception as e: + thread_reading = ': INVALID: %s' % e.__class__.__name__ + else: + thread_reading = '' + + print("%s:" % tid, self._threads[tid], thread_reading) + + def readings( + self, + sentence=None, + threaded=False, + verbose=True, + filter=False, + show_thread_readings=False, + ): + """ + Construct and show the readings of the discourse (or of a single sentence). + + :param sentence: test just this sentence + :type sentence: str + :param threaded: if ``True``, print out each thread ID and the corresponding thread. + :param filter: if ``True``, only print out consistent thread IDs and threads. + """ + self._construct_readings() + self._construct_threads() + + # if we are filtering or showing thread readings, show threads + if filter or show_thread_readings: + threaded = True + + if verbose: + if not threaded: + self._show_readings(sentence=sentence) + else: + self._show_threads( + filter=filter, show_thread_readings=show_thread_readings + ) + + def expand_threads(self, thread_id, threads=None): + """ + Given a thread ID, find the list of ``logic.Expression`` objects corresponding to the reading IDs in that thread. + + :param thread_id: thread ID + :type thread_id: str + :param threads: a mapping from thread IDs to lists of reading IDs + :type threads: dict + :return: A list of pairs ``(rid, reading)`` where reading is the ``logic.Expression`` associated with a reading ID + :rtype: list of tuple + """ + if threads is None: + threads = self._threads + return [ + (rid, self._readings[sid][rid]) + for rid in threads[thread_id] + for sid in rid.split('-')[:1] + ] + + ############################### + # Models and Background + ############################### + + def _check_consistency(self, threads, show=False, verbose=False): + results = [] + for tid in sorted(threads): + assumptions = [ + reading for (rid, reading) in self.expand_threads(tid, threads=threads) + ] + assumptions = list( + map( + self._reading_command.to_fol, + self._reading_command.process_thread(assumptions), + ) + ) + if assumptions: + assumptions += self._background + # if Mace4 finds a model, it always seems to find it quickly + mb = MaceCommand(None, assumptions, max_models=20) + modelfound = mb.build_model() + else: + modelfound = False + results.append((tid, modelfound)) + if show: + spacer(80) + print("Model for Discourse Thread %s" % tid) + spacer(80) + if verbose: + for a in assumptions: + print(a) + spacer(80) + if modelfound: + print(mb.model(format='cooked')) + else: + print("No model found!\n") + return results + + def models(self, thread_id=None, show=True, verbose=False): + """ + Call Mace4 to build a model for each current discourse thread. + + :param thread_id: thread ID + :type thread_id: str + :param show: If ``True``, display the model that has been found. + """ + self._construct_readings() + self._construct_threads() + threads = {thread_id: self._threads[thread_id]} if thread_id else self._threads + + for (tid, modelfound) in self._check_consistency( + threads, show=show, verbose=verbose + ): + idlist = [rid for rid in threads[tid]] + + if not modelfound: + print("Inconsistent discourse: %s %s:" % (tid, idlist)) + for rid, reading in self.expand_threads(tid): + print(" %s: %s" % (rid, reading.normalize())) + print() + else: + print("Consistent discourse: %s %s:" % (tid, idlist)) + for rid, reading in self.expand_threads(tid): + print(" %s: %s" % (rid, reading.normalize())) + print() + + def add_background(self, background, verbose=False): + """ + Add a list of background assumptions for reasoning about the discourse. + + When called, this method also updates the discourse model's set of readings and threads. + :param background: Formulas which contain background information + :type background: list(Expression) + """ + from nltk.sem.logic import Expression + + for (count, e) in enumerate(background): + assert isinstance(e, Expression) + if verbose: + print("Adding assumption %s to background" % count) + self._background.append(e) + + # update the state + self._construct_readings() + self._construct_threads() + + def background(self): + """ + Show the current background assumptions. + """ + for e in self._background: + print(str(e)) + + ############################### + # Misc + ############################### + + @staticmethod + def multiply(discourse, readings): + """ + Multiply every thread in ``discourse`` by every reading in ``readings``. + + Given discourse = [['A'], ['B']], readings = ['a', 'b', 'c'] , returns + [['A', 'a'], ['A', 'b'], ['A', 'c'], ['B', 'a'], ['B', 'b'], ['B', 'c']] + + :param discourse: the current list of readings + :type discourse: list of lists + :param readings: an additional list of readings + :type readings: list(Expression) + :rtype: A list of lists + """ + result = [] + for sublist in discourse: + for r in readings: + new = [] + new += sublist + new.append(r) + result.append(new) + return result + + +# multiply = DiscourseTester.multiply +# L1 = [['A'], ['B']] +# L2 = ['a', 'b', 'c'] +# print multiply(L1,L2) + + +def load_fol(s): + """ + Temporarily duplicated from ``nltk.sem.util``. + Convert a file of first order formulas into a list of ``Expression`` objects. + + :param s: the contents of the file + :type s: str + :return: a list of parsed formulas. + :rtype: list(Expression) + """ + statements = [] + for linenum, line in enumerate(s.splitlines()): + line = line.strip() + if line.startswith('#') or line == '': + continue + try: + statements.append(Expression.fromstring(line)) + except Exception: + raise ValueError('Unable to parse line %s: %s' % (linenum, line)) + return statements + + +############################### +# Demo +############################### +def discourse_demo(reading_command=None): + """ + Illustrate the various methods of ``DiscourseTester`` + """ + dt = DiscourseTester( + ['A boxer walks', 'Every boxer chases a girl'], reading_command + ) + dt.models() + print() + # dt.grammar() + print() + dt.sentences() + print() + dt.readings() + print() + dt.readings(threaded=True) + print() + dt.models('d1') + dt.add_sentence('John is a boxer') + print() + dt.sentences() + print() + dt.readings(threaded=True) + print() + dt = DiscourseTester( + ['A student dances', 'Every student is a person'], reading_command + ) + print() + dt.add_sentence('No person dances', consistchk=True) + print() + dt.readings() + print() + dt.retract_sentence('No person dances', verbose=True) + print() + dt.models() + print() + dt.readings('A person dances') + print() + dt.add_sentence('A person dances', informchk=True) + dt = DiscourseTester( + ['Vincent is a boxer', 'Fido is a boxer', 'Vincent is married', 'Fido barks'], + reading_command, + ) + dt.readings(filter=True) + import nltk.data + + background_file = os.path.join('grammars', 'book_grammars', 'background.fol') + background = nltk.data.load(background_file) + + print() + dt.add_background(background, verbose=False) + dt.background() + print() + dt.readings(filter=True) + print() + dt.models() + + +def drt_discourse_demo(reading_command=None): + """ + Illustrate the various methods of ``DiscourseTester`` + """ + dt = DiscourseTester(['every dog chases a boy', 'he runs'], reading_command) + dt.models() + print() + dt.sentences() + print() + dt.readings() + print() + dt.readings(show_thread_readings=True) + print() + dt.readings(filter=True, show_thread_readings=True) + + +def spacer(num=30): + print('-' * num) + + +def demo(): + discourse_demo() + + tagger = RegexpTagger( + [ + ('^(chases|runs)$', 'VB'), + ('^(a)$', 'ex_quant'), + ('^(every)$', 'univ_quant'), + ('^(dog|boy)$', 'NN'), + ('^(he)$', 'PRP'), + ] + ) + depparser = MaltParser(tagger=tagger) + drt_discourse_demo( + DrtGlueReadingCommand(remove_duplicates=False, depparser=depparser) + ) + + +if __name__ == '__main__': + demo() diff --git a/venv.bak/lib/python3.7/site-packages/nltk/inference/mace.py b/venv.bak/lib/python3.7/site-packages/nltk/inference/mace.py new file mode 100644 index 0000000..7763b75 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/inference/mace.py @@ -0,0 +1,385 @@ +# Natural Language Toolkit: Interface to the Mace4 Model Builder +# +# Author: Dan Garrette +# Ewan Klein + +# URL: +# For license information, see LICENSE.TXT + +""" +A model builder that makes use of the external 'Mace4' package. +""" +from __future__ import print_function + +import os +import tempfile + +from nltk.sem.logic import is_indvar +from nltk.sem import Valuation, Expression + +from nltk.inference.api import ModelBuilder, BaseModelBuilderCommand +from nltk.inference.prover9 import Prover9CommandParent, Prover9Parent + + +class MaceCommand(Prover9CommandParent, BaseModelBuilderCommand): + """ + A ``MaceCommand`` specific to the ``Mace`` model builder. It contains + a print_assumptions() method that is used to print the list + of assumptions in multiple formats. + """ + + _interpformat_bin = None + + def __init__(self, goal=None, assumptions=None, max_models=500, model_builder=None): + """ + :param goal: Input expression to prove + :type goal: sem.Expression + :param assumptions: Input expressions to use as assumptions in + the proof. + :type assumptions: list(sem.Expression) + :param max_models: The maximum number of models that Mace will try before + simply returning false. (Use 0 for no maximum.) + :type max_models: int + """ + if model_builder is not None: + assert isinstance(model_builder, Mace) + else: + model_builder = Mace(max_models) + + BaseModelBuilderCommand.__init__(self, model_builder, goal, assumptions) + + @property + def valuation(mbc): + return mbc.model('valuation') + + def _convert2val(self, valuation_str): + """ + Transform the output file into an NLTK-style Valuation. + + :return: A model if one is generated; None otherwise. + :rtype: sem.Valuation + """ + valuation_standard_format = self._transform_output(valuation_str, 'standard') + + val = [] + for line in valuation_standard_format.splitlines(False): + l = line.strip() + + if l.startswith('interpretation'): + # find the number of entities in the model + num_entities = int(l[l.index('(') + 1 : l.index(',')].strip()) + + elif l.startswith('function') and l.find('_') == -1: + # replace the integer identifier with a corresponding alphabetic character + name = l[l.index('(') + 1 : l.index(',')].strip() + if is_indvar(name): + name = name.upper() + value = int(l[l.index('[') + 1 : l.index(']')].strip()) + val.append((name, MaceCommand._make_model_var(value))) + + elif l.startswith('relation'): + l = l[l.index('(') + 1 :] + if '(' in l: + # relation is not nullary + name = l[: l.index('(')].strip() + values = [ + int(v.strip()) + for v in l[l.index('[') + 1 : l.index(']')].split(',') + ] + val.append( + (name, MaceCommand._make_relation_set(num_entities, values)) + ) + else: + # relation is nullary + name = l[: l.index(',')].strip() + value = int(l[l.index('[') + 1 : l.index(']')].strip()) + val.append((name, value == 1)) + + return Valuation(val) + + @staticmethod + def _make_relation_set(num_entities, values): + """ + Convert a Mace4-style relation table into a dictionary. + + :param num_entities: the number of entities in the model; determines the row length in the table. + :type num_entities: int + :param values: a list of 1's and 0's that represent whether a relation holds in a Mace4 model. + :type values: list of int + """ + r = set() + for position in [pos for (pos, v) in enumerate(values) if v == 1]: + r.add( + tuple(MaceCommand._make_relation_tuple(position, values, num_entities)) + ) + return r + + @staticmethod + def _make_relation_tuple(position, values, num_entities): + if len(values) == 1: + return [] + else: + sublist_size = len(values) // num_entities + sublist_start = position // sublist_size + sublist_position = int(position % sublist_size) + + sublist = values[ + sublist_start * sublist_size : (sublist_start + 1) * sublist_size + ] + return [ + MaceCommand._make_model_var(sublist_start) + ] + MaceCommand._make_relation_tuple( + sublist_position, sublist, num_entities + ) + + @staticmethod + def _make_model_var(value): + """ + Pick an alphabetic character as identifier for an entity in the model. + + :param value: where to index into the list of characters + :type value: int + """ + letter = [ + 'a', + 'b', + 'c', + 'd', + 'e', + 'f', + 'g', + 'h', + 'i', + 'j', + 'k', + 'l', + 'm', + 'n', + 'o', + 'p', + 'q', + 'r', + 's', + 't', + 'u', + 'v', + 'w', + 'x', + 'y', + 'z', + ][value] + num = value // 26 + return letter + str(num) if num > 0 else letter + + def _decorate_model(self, valuation_str, format): + """ + Print out a Mace4 model using any Mace4 ``interpformat`` format. + See http://www.cs.unm.edu/~mccune/mace4/manual/ for details. + + :param valuation_str: str with the model builder's output + :param format: str indicating the format for displaying + models. Defaults to 'standard' format. + :return: str + """ + if not format: + return valuation_str + elif format == 'valuation': + return self._convert2val(valuation_str) + else: + return self._transform_output(valuation_str, format) + + def _transform_output(self, valuation_str, format): + """ + Transform the output file into any Mace4 ``interpformat`` format. + + :param format: Output format for displaying models. + :type format: str + """ + if format in [ + 'standard', + 'standard2', + 'portable', + 'tabular', + 'raw', + 'cooked', + 'xml', + 'tex', + ]: + return self._call_interpformat(valuation_str, [format])[0] + else: + raise LookupError("The specified format does not exist") + + def _call_interpformat(self, input_str, args=[], verbose=False): + """ + Call the ``interpformat`` binary with the given input. + + :param input_str: A string whose contents are used as stdin. + :param args: A list of command-line arguments. + :return: A tuple (stdout, returncode) + :see: ``config_prover9`` + """ + if self._interpformat_bin is None: + self._interpformat_bin = self._modelbuilder._find_binary( + 'interpformat', verbose + ) + + return self._modelbuilder._call( + input_str, self._interpformat_bin, args, verbose + ) + + +class Mace(Prover9Parent, ModelBuilder): + _mace4_bin = None + + def __init__(self, end_size=500): + self._end_size = end_size + """The maximum model size that Mace will try before + simply returning false. (Use -1 for no maximum.)""" + + def _build_model(self, goal=None, assumptions=None, verbose=False): + """ + Use Mace4 to build a first order model. + + :return: ``True`` if a model was found (i.e. Mace returns value of 0), + else ``False`` + """ + if not assumptions: + assumptions = [] + + stdout, returncode = self._call_mace4( + self.prover9_input(goal, assumptions), verbose=verbose + ) + return (returncode == 0, stdout) + + def _call_mace4(self, input_str, args=[], verbose=False): + """ + Call the ``mace4`` binary with the given input. + + :param input_str: A string whose contents are used as stdin. + :param args: A list of command-line arguments. + :return: A tuple (stdout, returncode) + :see: ``config_prover9`` + """ + if self._mace4_bin is None: + self._mace4_bin = self._find_binary('mace4', verbose) + + updated_input_str = '' + if self._end_size > 0: + updated_input_str += 'assign(end_size, %d).\n\n' % self._end_size + updated_input_str += input_str + + return self._call(updated_input_str, self._mace4_bin, args, verbose) + + +def spacer(num=30): + print('-' * num) + + +def decode_result(found): + """ + Decode the result of model_found() + + :param found: The output of model_found() + :type found: bool + """ + return {True: 'Countermodel found', False: 'No countermodel found', None: 'None'}[ + found + ] + + +def test_model_found(arguments): + """ + Try some proofs and exhibit the results. + """ + for (goal, assumptions) in arguments: + g = Expression.fromstring(goal) + alist = [lp.parse(a) for a in assumptions] + m = MaceCommand(g, assumptions=alist, max_models=50) + found = m.build_model() + for a in alist: + print(' %s' % a) + print('|- %s: %s\n' % (g, decode_result(found))) + + +def test_build_model(arguments): + """ + Try to build a ``nltk.sem.Valuation``. + """ + g = Expression.fromstring('all x.man(x)') + alist = [ + Expression.fromstring(a) + for a in [ + 'man(John)', + 'man(Socrates)', + 'man(Bill)', + 'some x.(-(x = John) & man(x) & sees(John,x))', + 'some x.(-(x = Bill) & man(x))', + 'all x.some y.(man(x) -> gives(Socrates,x,y))', + ] + ] + + m = MaceCommand(g, assumptions=alist) + m.build_model() + spacer() + print("Assumptions and Goal") + spacer() + for a in alist: + print(' %s' % a) + print('|- %s: %s\n' % (g, decode_result(m.build_model()))) + spacer() + # print m.model('standard') + # print m.model('cooked') + print("Valuation") + spacer() + print(m.valuation, '\n') + + +def test_transform_output(argument_pair): + """ + Transform the model into various Mace4 ``interpformat`` formats. + """ + g = Expression.fromstring(argument_pair[0]) + alist = [lp.parse(a) for a in argument_pair[1]] + m = MaceCommand(g, assumptions=alist) + m.build_model() + for a in alist: + print(' %s' % a) + print('|- %s: %s\n' % (g, m.build_model())) + for format in ['standard', 'portable', 'xml', 'cooked']: + spacer() + print("Using '%s' format" % format) + spacer() + print(m.model(format=format)) + + +def test_make_relation_set(): + print( + MaceCommand._make_relation_set(num_entities=3, values=[1, 0, 1]) + == set([('c',), ('a',)]) + ) + print( + MaceCommand._make_relation_set( + num_entities=3, values=[0, 0, 0, 0, 0, 0, 1, 0, 0] + ) + == set([('c', 'a')]) + ) + print( + MaceCommand._make_relation_set(num_entities=2, values=[0, 0, 1, 0, 0, 0, 1, 0]) + == set([('a', 'b', 'a'), ('b', 'b', 'a')]) + ) + + +arguments = [ + ('mortal(Socrates)', ['all x.(man(x) -> mortal(x))', 'man(Socrates)']), + ('(not mortal(Socrates))', ['all x.(man(x) -> mortal(x))', 'man(Socrates)']), +] + + +def demo(): + test_model_found(arguments) + test_build_model(arguments) + test_transform_output(arguments[1]) + + +if __name__ == '__main__': + demo() diff --git a/venv.bak/lib/python3.7/site-packages/nltk/inference/nonmonotonic.py b/venv.bak/lib/python3.7/site-packages/nltk/inference/nonmonotonic.py new file mode 100644 index 0000000..b9180f0 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/inference/nonmonotonic.py @@ -0,0 +1,563 @@ +# Natural Language Toolkit: Nonmonotonic Reasoning +# +# Author: Daniel H. Garrette +# +# Copyright (C) 2001-2019 NLTK Project +# URL: +# For license information, see LICENSE.TXT + +""" +A module to perform nonmonotonic reasoning. The ideas and demonstrations in +this module are based on "Logical Foundations of Artificial Intelligence" by +Michael R. Genesereth and Nils J. Nilsson. +""" +from __future__ import print_function, unicode_literals + +from collections import defaultdict +from functools import reduce + +from nltk.inference.prover9 import Prover9, Prover9Command +from nltk.sem.logic import ( + VariableExpression, + EqualityExpression, + ApplicationExpression, + Expression, + AbstractVariableExpression, + AllExpression, + BooleanExpression, + NegatedExpression, + ExistsExpression, + Variable, + ImpExpression, + AndExpression, + unique_variable, + operator, +) + +from nltk.inference.api import Prover, ProverCommandDecorator +from nltk.compat import python_2_unicode_compatible + + +class ProverParseError(Exception): + pass + + +def get_domain(goal, assumptions): + if goal is None: + all_expressions = assumptions + else: + all_expressions = assumptions + [-goal] + return reduce(operator.or_, (a.constants() for a in all_expressions), set()) + + +class ClosedDomainProver(ProverCommandDecorator): + """ + This is a prover decorator that adds domain closure assumptions before + proving. + """ + + def assumptions(self): + assumptions = [a for a in self._command.assumptions()] + goal = self._command.goal() + domain = get_domain(goal, assumptions) + return [self.replace_quants(ex, domain) for ex in assumptions] + + def goal(self): + goal = self._command.goal() + domain = get_domain(goal, self._command.assumptions()) + return self.replace_quants(goal, domain) + + def replace_quants(self, ex, domain): + """ + Apply the closed domain assumption to the expression + - Domain = union([e.free()|e.constants() for e in all_expressions]) + - translate "exists x.P" to "(z=d1 | z=d2 | ... ) & P.replace(x,z)" OR + "P.replace(x, d1) | P.replace(x, d2) | ..." + - translate "all x.P" to "P.replace(x, d1) & P.replace(x, d2) & ..." + :param ex: ``Expression`` + :param domain: set of {Variable}s + :return: ``Expression`` + """ + if isinstance(ex, AllExpression): + conjuncts = [ + ex.term.replace(ex.variable, VariableExpression(d)) for d in domain + ] + conjuncts = [self.replace_quants(c, domain) for c in conjuncts] + return reduce(lambda x, y: x & y, conjuncts) + elif isinstance(ex, BooleanExpression): + return ex.__class__( + self.replace_quants(ex.first, domain), + self.replace_quants(ex.second, domain), + ) + elif isinstance(ex, NegatedExpression): + return -self.replace_quants(ex.term, domain) + elif isinstance(ex, ExistsExpression): + disjuncts = [ + ex.term.replace(ex.variable, VariableExpression(d)) for d in domain + ] + disjuncts = [self.replace_quants(d, domain) for d in disjuncts] + return reduce(lambda x, y: x | y, disjuncts) + else: + return ex + + +class UniqueNamesProver(ProverCommandDecorator): + """ + This is a prover decorator that adds unique names assumptions before + proving. + """ + + def assumptions(self): + """ + - Domain = union([e.free()|e.constants() for e in all_expressions]) + - if "d1 = d2" cannot be proven from the premises, then add "d1 != d2" + """ + assumptions = self._command.assumptions() + + domain = list(get_domain(self._command.goal(), assumptions)) + + # build a dictionary of obvious equalities + eq_sets = SetHolder() + for a in assumptions: + if isinstance(a, EqualityExpression): + av = a.first.variable + bv = a.second.variable + # put 'a' and 'b' in the same set + eq_sets[av].add(bv) + + new_assumptions = [] + for i, a in enumerate(domain): + for b in domain[i + 1 :]: + # if a and b are not already in the same equality set + if b not in eq_sets[a]: + newEqEx = EqualityExpression( + VariableExpression(a), VariableExpression(b) + ) + if Prover9().prove(newEqEx, assumptions): + # we can prove that the names are the same entity. + # remember that they are equal so we don't re-check. + eq_sets[a].add(b) + else: + # we can't prove it, so assume unique names + new_assumptions.append(-newEqEx) + + return assumptions + new_assumptions + + +class SetHolder(list): + """ + A list of sets of Variables. + """ + + def __getitem__(self, item): + """ + :param item: ``Variable`` + :return: the set containing 'item' + """ + assert isinstance(item, Variable) + for s in self: + if item in s: + return s + # item is not found in any existing set. so create a new set + new = set([item]) + self.append(new) + return new + + +class ClosedWorldProver(ProverCommandDecorator): + """ + This is a prover decorator that completes predicates before proving. + + If the assumptions contain "P(A)", then "all x.(P(x) -> (x=A))" is the completion of "P". + If the assumptions contain "all x.(ostrich(x) -> bird(x))", then "all x.(bird(x) -> ostrich(x))" is the completion of "bird". + If the assumptions don't contain anything that are "P", then "all x.-P(x)" is the completion of "P". + + walk(Socrates) + Socrates != Bill + + all x.(walk(x) -> (x=Socrates)) + ---------------- + -walk(Bill) + + see(Socrates, John) + see(John, Mary) + Socrates != John + John != Mary + + all x.all y.(see(x,y) -> ((x=Socrates & y=John) | (x=John & y=Mary))) + ---------------- + -see(Socrates, Mary) + + all x.(ostrich(x) -> bird(x)) + bird(Tweety) + -ostrich(Sam) + Sam != Tweety + + all x.(bird(x) -> (ostrich(x) | x=Tweety)) + + all x.-ostrich(x) + ------------------- + -bird(Sam) + """ + + def assumptions(self): + assumptions = self._command.assumptions() + + predicates = self._make_predicate_dict(assumptions) + + new_assumptions = [] + for p in predicates: + predHolder = predicates[p] + new_sig = self._make_unique_signature(predHolder) + new_sig_exs = [VariableExpression(v) for v in new_sig] + + disjuncts = [] + + # Turn the signatures into disjuncts + for sig in predHolder.signatures: + equality_exs = [] + for v1, v2 in zip(new_sig_exs, sig): + equality_exs.append(EqualityExpression(v1, v2)) + disjuncts.append(reduce(lambda x, y: x & y, equality_exs)) + + # Turn the properties into disjuncts + for prop in predHolder.properties: + # replace variables from the signature with new sig variables + bindings = {} + for v1, v2 in zip(new_sig_exs, prop[0]): + bindings[v2] = v1 + disjuncts.append(prop[1].substitute_bindings(bindings)) + + # make the assumption + if disjuncts: + # disjuncts exist, so make an implication + antecedent = self._make_antecedent(p, new_sig) + consequent = reduce(lambda x, y: x | y, disjuncts) + accum = ImpExpression(antecedent, consequent) + else: + # nothing has property 'p' + accum = NegatedExpression(self._make_antecedent(p, new_sig)) + + # quantify the implication + for new_sig_var in new_sig[::-1]: + accum = AllExpression(new_sig_var, accum) + new_assumptions.append(accum) + + return assumptions + new_assumptions + + def _make_unique_signature(self, predHolder): + """ + This method figures out how many arguments the predicate takes and + returns a tuple containing that number of unique variables. + """ + return tuple(unique_variable() for i in range(predHolder.signature_len)) + + def _make_antecedent(self, predicate, signature): + """ + Return an application expression with 'predicate' as the predicate + and 'signature' as the list of arguments. + """ + antecedent = predicate + for v in signature: + antecedent = antecedent(VariableExpression(v)) + return antecedent + + def _make_predicate_dict(self, assumptions): + """ + Create a dictionary of predicates from the assumptions. + + :param assumptions: a list of ``Expression``s + :return: dict mapping ``AbstractVariableExpression`` to ``PredHolder`` + """ + predicates = defaultdict(PredHolder) + for a in assumptions: + self._map_predicates(a, predicates) + return predicates + + def _map_predicates(self, expression, predDict): + if isinstance(expression, ApplicationExpression): + func, args = expression.uncurry() + if isinstance(func, AbstractVariableExpression): + predDict[func].append_sig(tuple(args)) + elif isinstance(expression, AndExpression): + self._map_predicates(expression.first, predDict) + self._map_predicates(expression.second, predDict) + elif isinstance(expression, AllExpression): + # collect all the universally quantified variables + sig = [expression.variable] + term = expression.term + while isinstance(term, AllExpression): + sig.append(term.variable) + term = term.term + if isinstance(term, ImpExpression): + if isinstance(term.first, ApplicationExpression) and isinstance( + term.second, ApplicationExpression + ): + func1, args1 = term.first.uncurry() + func2, args2 = term.second.uncurry() + if ( + isinstance(func1, AbstractVariableExpression) + and isinstance(func2, AbstractVariableExpression) + and sig == [v.variable for v in args1] + and sig == [v.variable for v in args2] + ): + predDict[func2].append_prop((tuple(sig), term.first)) + predDict[func1].validate_sig_len(sig) + + +@python_2_unicode_compatible +class PredHolder(object): + """ + This class will be used by a dictionary that will store information + about predicates to be used by the ``ClosedWorldProver``. + + The 'signatures' property is a list of tuples defining signatures for + which the predicate is true. For instance, 'see(john, mary)' would be + result in the signature '(john,mary)' for 'see'. + + The second element of the pair is a list of pairs such that the first + element of the pair is a tuple of variables and the second element is an + expression of those variables that makes the predicate true. For instance, + 'all x.all y.(see(x,y) -> know(x,y))' would result in "((x,y),('see(x,y)'))" + for 'know'. + """ + + def __init__(self): + self.signatures = [] + self.properties = [] + self.signature_len = None + + def append_sig(self, new_sig): + self.validate_sig_len(new_sig) + self.signatures.append(new_sig) + + def append_prop(self, new_prop): + self.validate_sig_len(new_prop[0]) + self.properties.append(new_prop) + + def validate_sig_len(self, new_sig): + if self.signature_len is None: + self.signature_len = len(new_sig) + elif self.signature_len != len(new_sig): + raise Exception("Signature lengths do not match") + + def __str__(self): + return '(%s,%s,%s)' % (self.signatures, self.properties, self.signature_len) + + def __repr__(self): + return "%s" % self + + +def closed_domain_demo(): + lexpr = Expression.fromstring + + p1 = lexpr(r'exists x.walk(x)') + p2 = lexpr(r'man(Socrates)') + c = lexpr(r'walk(Socrates)') + prover = Prover9Command(c, [p1, p2]) + print(prover.prove()) + cdp = ClosedDomainProver(prover) + print('assumptions:') + for a in cdp.assumptions(): + print(' ', a) + print('goal:', cdp.goal()) + print(cdp.prove()) + + p1 = lexpr(r'exists x.walk(x)') + p2 = lexpr(r'man(Socrates)') + p3 = lexpr(r'-walk(Bill)') + c = lexpr(r'walk(Socrates)') + prover = Prover9Command(c, [p1, p2, p3]) + print(prover.prove()) + cdp = ClosedDomainProver(prover) + print('assumptions:') + for a in cdp.assumptions(): + print(' ', a) + print('goal:', cdp.goal()) + print(cdp.prove()) + + p1 = lexpr(r'exists x.walk(x)') + p2 = lexpr(r'man(Socrates)') + p3 = lexpr(r'-walk(Bill)') + c = lexpr(r'walk(Socrates)') + prover = Prover9Command(c, [p1, p2, p3]) + print(prover.prove()) + cdp = ClosedDomainProver(prover) + print('assumptions:') + for a in cdp.assumptions(): + print(' ', a) + print('goal:', cdp.goal()) + print(cdp.prove()) + + p1 = lexpr(r'walk(Socrates)') + p2 = lexpr(r'walk(Bill)') + c = lexpr(r'all x.walk(x)') + prover = Prover9Command(c, [p1, p2]) + print(prover.prove()) + cdp = ClosedDomainProver(prover) + print('assumptions:') + for a in cdp.assumptions(): + print(' ', a) + print('goal:', cdp.goal()) + print(cdp.prove()) + + p1 = lexpr(r'girl(mary)') + p2 = lexpr(r'dog(rover)') + p3 = lexpr(r'all x.(girl(x) -> -dog(x))') + p4 = lexpr(r'all x.(dog(x) -> -girl(x))') + p5 = lexpr(r'chase(mary, rover)') + c = lexpr(r'exists y.(dog(y) & all x.(girl(x) -> chase(x,y)))') + prover = Prover9Command(c, [p1, p2, p3, p4, p5]) + print(prover.prove()) + cdp = ClosedDomainProver(prover) + print('assumptions:') + for a in cdp.assumptions(): + print(' ', a) + print('goal:', cdp.goal()) + print(cdp.prove()) + + +def unique_names_demo(): + lexpr = Expression.fromstring + + p1 = lexpr(r'man(Socrates)') + p2 = lexpr(r'man(Bill)') + c = lexpr(r'exists x.exists y.(x != y)') + prover = Prover9Command(c, [p1, p2]) + print(prover.prove()) + unp = UniqueNamesProver(prover) + print('assumptions:') + for a in unp.assumptions(): + print(' ', a) + print('goal:', unp.goal()) + print(unp.prove()) + + p1 = lexpr(r'all x.(walk(x) -> (x = Socrates))') + p2 = lexpr(r'Bill = William') + p3 = lexpr(r'Bill = Billy') + c = lexpr(r'-walk(William)') + prover = Prover9Command(c, [p1, p2, p3]) + print(prover.prove()) + unp = UniqueNamesProver(prover) + print('assumptions:') + for a in unp.assumptions(): + print(' ', a) + print('goal:', unp.goal()) + print(unp.prove()) + + +def closed_world_demo(): + lexpr = Expression.fromstring + + p1 = lexpr(r'walk(Socrates)') + p2 = lexpr(r'(Socrates != Bill)') + c = lexpr(r'-walk(Bill)') + prover = Prover9Command(c, [p1, p2]) + print(prover.prove()) + cwp = ClosedWorldProver(prover) + print('assumptions:') + for a in cwp.assumptions(): + print(' ', a) + print('goal:', cwp.goal()) + print(cwp.prove()) + + p1 = lexpr(r'see(Socrates, John)') + p2 = lexpr(r'see(John, Mary)') + p3 = lexpr(r'(Socrates != John)') + p4 = lexpr(r'(John != Mary)') + c = lexpr(r'-see(Socrates, Mary)') + prover = Prover9Command(c, [p1, p2, p3, p4]) + print(prover.prove()) + cwp = ClosedWorldProver(prover) + print('assumptions:') + for a in cwp.assumptions(): + print(' ', a) + print('goal:', cwp.goal()) + print(cwp.prove()) + + p1 = lexpr(r'all x.(ostrich(x) -> bird(x))') + p2 = lexpr(r'bird(Tweety)') + p3 = lexpr(r'-ostrich(Sam)') + p4 = lexpr(r'Sam != Tweety') + c = lexpr(r'-bird(Sam)') + prover = Prover9Command(c, [p1, p2, p3, p4]) + print(prover.prove()) + cwp = ClosedWorldProver(prover) + print('assumptions:') + for a in cwp.assumptions(): + print(' ', a) + print('goal:', cwp.goal()) + print(cwp.prove()) + + +def combination_prover_demo(): + lexpr = Expression.fromstring + + p1 = lexpr(r'see(Socrates, John)') + p2 = lexpr(r'see(John, Mary)') + c = lexpr(r'-see(Socrates, Mary)') + prover = Prover9Command(c, [p1, p2]) + print(prover.prove()) + command = ClosedDomainProver(UniqueNamesProver(ClosedWorldProver(prover))) + for a in command.assumptions(): + print(a) + print(command.prove()) + + +def default_reasoning_demo(): + lexpr = Expression.fromstring + + premises = [] + + # define taxonomy + premises.append(lexpr(r'all x.(elephant(x) -> animal(x))')) + premises.append(lexpr(r'all x.(bird(x) -> animal(x))')) + premises.append(lexpr(r'all x.(dove(x) -> bird(x))')) + premises.append(lexpr(r'all x.(ostrich(x) -> bird(x))')) + premises.append(lexpr(r'all x.(flying_ostrich(x) -> ostrich(x))')) + + # default properties + premises.append( + lexpr(r'all x.((animal(x) & -Ab1(x)) -> -fly(x))') + ) # normal animals don't fly + premises.append( + lexpr(r'all x.((bird(x) & -Ab2(x)) -> fly(x))') + ) # normal birds fly + premises.append( + lexpr(r'all x.((ostrich(x) & -Ab3(x)) -> -fly(x))') + ) # normal ostriches don't fly + + # specify abnormal entities + premises.append(lexpr(r'all x.(bird(x) -> Ab1(x))')) # flight + premises.append(lexpr(r'all x.(ostrich(x) -> Ab2(x))')) # non-flying bird + premises.append(lexpr(r'all x.(flying_ostrich(x) -> Ab3(x))')) # flying ostrich + + # define entities + premises.append(lexpr(r'elephant(E)')) + premises.append(lexpr(r'dove(D)')) + premises.append(lexpr(r'ostrich(O)')) + + # print the assumptions + prover = Prover9Command(None, premises) + command = UniqueNamesProver(ClosedWorldProver(prover)) + for a in command.assumptions(): + print(a) + + print_proof('-fly(E)', premises) + print_proof('fly(D)', premises) + print_proof('-fly(O)', premises) + + +def print_proof(goal, premises): + lexpr = Expression.fromstring + prover = Prover9Command(lexpr(goal), premises) + command = UniqueNamesProver(ClosedWorldProver(prover)) + print(goal, prover.prove(), command.prove()) + + +def demo(): + closed_domain_demo() + unique_names_demo() + closed_world_demo() + combination_prover_demo() + default_reasoning_demo() + + +if __name__ == '__main__': + demo() diff --git a/venv.bak/lib/python3.7/site-packages/nltk/inference/prover9.py b/venv.bak/lib/python3.7/site-packages/nltk/inference/prover9.py new file mode 100644 index 0000000..3ac69fa --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/inference/prover9.py @@ -0,0 +1,509 @@ +# Natural Language Toolkit: Interface to the Prover9 Theorem Prover +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Dan Garrette +# Ewan Klein +# +# URL: +# For license information, see LICENSE.TXT +""" +A theorem prover that makes use of the external 'Prover9' package. +""" +from __future__ import print_function + +import os +import subprocess + +import nltk +from nltk.sem.logic import ( + Expression, + ExistsExpression, + AllExpression, + NegatedExpression, + AndExpression, + IffExpression, + OrExpression, + EqualityExpression, + ImpExpression, +) +from nltk.inference.api import BaseProverCommand, Prover + +# +# Following is not yet used. Return code for 2 actually realized as 512. +# +p9_return_codes = { + 0: True, + 1: "(FATAL)", # A fatal error occurred (user's syntax error). + 2: False, # (SOS_EMPTY) Prover9 ran out of things to do + # (sos list exhausted). + 3: "(MAX_MEGS)", # The max_megs (memory limit) parameter was exceeded. + 4: "(MAX_SECONDS)", # The max_seconds parameter was exceeded. + 5: "(MAX_GIVEN)", # The max_given parameter was exceeded. + 6: "(MAX_KEPT)", # The max_kept parameter was exceeded. + 7: "(ACTION)", # A Prover9 action terminated the search. + 101: "(SIGSEGV)", # Prover9 crashed, most probably due to a bug. +} + + +class Prover9CommandParent(object): + """ + A common base class used by both ``Prover9Command`` and ``MaceCommand``, + which is responsible for maintaining a goal and a set of assumptions, + and generating prover9-style input files from them. + """ + + def print_assumptions(self, output_format='nltk'): + """ + Print the list of the current assumptions. + """ + if output_format.lower() == 'nltk': + for a in self.assumptions(): + print(a) + elif output_format.lower() == 'prover9': + for a in convert_to_prover9(self.assumptions()): + print(a) + else: + raise NameError( + "Unrecognized value for 'output_format': %s" % output_format + ) + + +class Prover9Command(Prover9CommandParent, BaseProverCommand): + """ + A ``ProverCommand`` specific to the ``Prover9`` prover. It contains + the a print_assumptions() method that is used to print the list + of assumptions in multiple formats. + """ + + def __init__(self, goal=None, assumptions=None, timeout=60, prover=None): + """ + :param goal: Input expression to prove + :type goal: sem.Expression + :param assumptions: Input expressions to use as assumptions in + the proof. + :type assumptions: list(sem.Expression) + :param timeout: number of seconds before timeout; set to 0 for + no timeout. + :type timeout: int + :param prover: a prover. If not set, one will be created. + :type prover: Prover9 + """ + if not assumptions: + assumptions = [] + + if prover is not None: + assert isinstance(prover, Prover9) + else: + prover = Prover9(timeout) + + BaseProverCommand.__init__(self, prover, goal, assumptions) + + def decorate_proof(self, proof_string, simplify=True): + """ + :see BaseProverCommand.decorate_proof() + """ + if simplify: + return self._prover._call_prooftrans(proof_string, ['striplabels'])[ + 0 + ].rstrip() + else: + return proof_string.rstrip() + + +class Prover9Parent(object): + """ + A common class extended by both ``Prover9`` and ``Mace ``. + It contains the functionality required to convert NLTK-style + expressions into Prover9-style expressions. + """ + + _binary_location = None + + def config_prover9(self, binary_location, verbose=False): + if binary_location is None: + self._binary_location = None + self._prover9_bin = None + else: + name = 'prover9' + self._prover9_bin = nltk.internals.find_binary( + name, + path_to_bin=binary_location, + env_vars=['PROVER9'], + url='http://www.cs.unm.edu/~mccune/prover9/', + binary_names=[name, name + '.exe'], + verbose=verbose, + ) + self._binary_location = self._prover9_bin.rsplit(os.path.sep, 1) + + def prover9_input(self, goal, assumptions): + """ + :return: The input string that should be provided to the + prover9 binary. This string is formed based on the goal, + assumptions, and timeout value of this object. + """ + s = '' + + if assumptions: + s += 'formulas(assumptions).\n' + for p9_assumption in convert_to_prover9(assumptions): + s += ' %s.\n' % p9_assumption + s += 'end_of_list.\n\n' + + if goal: + s += 'formulas(goals).\n' + s += ' %s.\n' % convert_to_prover9(goal) + s += 'end_of_list.\n\n' + + return s + + def binary_locations(self): + """ + A list of directories that should be searched for the prover9 + executables. This list is used by ``config_prover9`` when searching + for the prover9 executables. + """ + return [ + '/usr/local/bin/prover9', + '/usr/local/bin/prover9/bin', + '/usr/local/bin', + '/usr/bin', + '/usr/local/prover9', + '/usr/local/share/prover9', + ] + + def _find_binary(self, name, verbose=False): + binary_locations = self.binary_locations() + if self._binary_location is not None: + binary_locations += [self._binary_location] + return nltk.internals.find_binary( + name, + searchpath=binary_locations, + env_vars=['PROVER9'], + url='http://www.cs.unm.edu/~mccune/prover9/', + binary_names=[name, name + '.exe'], + verbose=verbose, + ) + + def _call(self, input_str, binary, args=[], verbose=False): + """ + Call the binary with the given input. + + :param input_str: A string whose contents are used as stdin. + :param binary: The location of the binary to call + :param args: A list of command-line arguments. + :return: A tuple (stdout, returncode) + :see: ``config_prover9`` + """ + if verbose: + print('Calling:', binary) + print('Args:', args) + print('Input:\n', input_str, '\n') + + # Call prover9 via a subprocess + cmd = [binary] + args + try: + input_str = input_str.encode("utf8") + except AttributeError: + pass + p = subprocess.Popen( + cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, stdin=subprocess.PIPE + ) + (stdout, stderr) = p.communicate(input=input_str) + + if verbose: + print('Return code:', p.returncode) + if stdout: + print('stdout:\n', stdout, '\n') + if stderr: + print('stderr:\n', stderr, '\n') + + return (stdout.decode("utf-8"), p.returncode) + + +def convert_to_prover9(input): + """ + Convert a ``logic.Expression`` to Prover9 format. + """ + if isinstance(input, list): + result = [] + for s in input: + try: + result.append(_convert_to_prover9(s.simplify())) + except: + print('input %s cannot be converted to Prover9 input syntax' % input) + raise + return result + else: + try: + return _convert_to_prover9(input.simplify()) + except: + print('input %s cannot be converted to Prover9 input syntax' % input) + raise + + +def _convert_to_prover9(expression): + """ + Convert ``logic.Expression`` to Prover9 formatted string. + """ + if isinstance(expression, ExistsExpression): + return ( + 'exists ' + + str(expression.variable) + + ' ' + + _convert_to_prover9(expression.term) + ) + elif isinstance(expression, AllExpression): + return ( + 'all ' + + str(expression.variable) + + ' ' + + _convert_to_prover9(expression.term) + ) + elif isinstance(expression, NegatedExpression): + return '-(' + _convert_to_prover9(expression.term) + ')' + elif isinstance(expression, AndExpression): + return ( + '(' + + _convert_to_prover9(expression.first) + + ' & ' + + _convert_to_prover9(expression.second) + + ')' + ) + elif isinstance(expression, OrExpression): + return ( + '(' + + _convert_to_prover9(expression.first) + + ' | ' + + _convert_to_prover9(expression.second) + + ')' + ) + elif isinstance(expression, ImpExpression): + return ( + '(' + + _convert_to_prover9(expression.first) + + ' -> ' + + _convert_to_prover9(expression.second) + + ')' + ) + elif isinstance(expression, IffExpression): + return ( + '(' + + _convert_to_prover9(expression.first) + + ' <-> ' + + _convert_to_prover9(expression.second) + + ')' + ) + elif isinstance(expression, EqualityExpression): + return ( + '(' + + _convert_to_prover9(expression.first) + + ' = ' + + _convert_to_prover9(expression.second) + + ')' + ) + else: + return str(expression) + + +class Prover9(Prover9Parent, Prover): + _prover9_bin = None + _prooftrans_bin = None + + def __init__(self, timeout=60): + self._timeout = timeout + """The timeout value for prover9. If a proof can not be found + in this amount of time, then prover9 will return false. + (Use 0 for no timeout.)""" + + def _prove(self, goal=None, assumptions=None, verbose=False): + """ + Use Prover9 to prove a theorem. + :return: A pair whose first element is a boolean indicating if the + proof was successful (i.e. returns value of 0) and whose second element + is the output of the prover. + """ + if not assumptions: + assumptions = [] + + stdout, returncode = self._call_prover9( + self.prover9_input(goal, assumptions), verbose=verbose + ) + return (returncode == 0, stdout) + + def prover9_input(self, goal, assumptions): + """ + :see: Prover9Parent.prover9_input + """ + s = 'clear(auto_denials).\n' # only one proof required + return s + Prover9Parent.prover9_input(self, goal, assumptions) + + def _call_prover9(self, input_str, args=[], verbose=False): + """ + Call the ``prover9`` binary with the given input. + + :param input_str: A string whose contents are used as stdin. + :param args: A list of command-line arguments. + :return: A tuple (stdout, returncode) + :see: ``config_prover9`` + """ + if self._prover9_bin is None: + self._prover9_bin = self._find_binary('prover9', verbose) + + updated_input_str = '' + if self._timeout > 0: + updated_input_str += 'assign(max_seconds, %d).\n\n' % self._timeout + updated_input_str += input_str + + stdout, returncode = self._call( + updated_input_str, self._prover9_bin, args, verbose + ) + + if returncode not in [0, 2]: + errormsgprefix = '%%ERROR:' + if errormsgprefix in stdout: + msgstart = stdout.index(errormsgprefix) + errormsg = stdout[msgstart:].strip() + else: + errormsg = None + if returncode in [3, 4, 5, 6]: + raise Prover9LimitExceededException(returncode, errormsg) + else: + raise Prover9FatalException(returncode, errormsg) + + return stdout, returncode + + def _call_prooftrans(self, input_str, args=[], verbose=False): + """ + Call the ``prooftrans`` binary with the given input. + + :param input_str: A string whose contents are used as stdin. + :param args: A list of command-line arguments. + :return: A tuple (stdout, returncode) + :see: ``config_prover9`` + """ + if self._prooftrans_bin is None: + self._prooftrans_bin = self._find_binary('prooftrans', verbose) + + return self._call(input_str, self._prooftrans_bin, args, verbose) + + +class Prover9Exception(Exception): + def __init__(self, returncode, message): + msg = p9_return_codes[returncode] + if message: + msg += '\n%s' % message + Exception.__init__(self, msg) + + +class Prover9FatalException(Prover9Exception): + pass + + +class Prover9LimitExceededException(Prover9Exception): + pass + + +###################################################################### +# { Tests and Demos +###################################################################### + + +def test_config(): + + a = Expression.fromstring('(walk(j) & sing(j))') + g = Expression.fromstring('walk(j)') + p = Prover9Command(g, assumptions=[a]) + p._executable_path = None + p.prover9_search = [] + p.prove() + # config_prover9('/usr/local/bin') + print(p.prove()) + print(p.proof()) + + +def test_convert_to_prover9(expr): + """ + Test that parsing works OK. + """ + for t in expr: + e = Expression.fromstring(t) + print(convert_to_prover9(e)) + + +def test_prove(arguments): + """ + Try some proofs and exhibit the results. + """ + for (goal, assumptions) in arguments: + g = Expression.fromstring(goal) + alist = [Expression.fromstring(a) for a in assumptions] + p = Prover9Command(g, assumptions=alist).prove() + for a in alist: + print(' %s' % a) + print('|- %s: %s\n' % (g, p)) + + +arguments = [ + ('(man(x) <-> (not (not man(x))))', []), + ('(not (man(x) & (not man(x))))', []), + ('(man(x) | (not man(x)))', []), + ('(man(x) & (not man(x)))', []), + ('(man(x) -> man(x))', []), + ('(not (man(x) & (not man(x))))', []), + ('(man(x) | (not man(x)))', []), + ('(man(x) -> man(x))', []), + ('(man(x) <-> man(x))', []), + ('(not (man(x) <-> (not man(x))))', []), + ('mortal(Socrates)', ['all x.(man(x) -> mortal(x))', 'man(Socrates)']), + ('((all x.(man(x) -> walks(x)) & man(Socrates)) -> some y.walks(y))', []), + ('(all x.man(x) -> all x.man(x))', []), + ('some x.all y.sees(x,y)', []), + ( + 'some e3.(walk(e3) & subj(e3, mary))', + [ + 'some e1.(see(e1) & subj(e1, john) & some e2.(pred(e1, e2) & walk(e2) & subj(e2, mary)))' + ], + ), + ( + 'some x e1.(see(e1) & subj(e1, x) & some e2.(pred(e1, e2) & walk(e2) & subj(e2, mary)))', + [ + 'some e1.(see(e1) & subj(e1, john) & some e2.(pred(e1, e2) & walk(e2) & subj(e2, mary)))' + ], + ), +] + +expressions = [ + r'some x y.sees(x,y)', + r'some x.(man(x) & walks(x))', + r'\x.(man(x) & walks(x))', + r'\x y.sees(x,y)', + r'walks(john)', + r'\x.big(x, \y.mouse(y))', + r'(walks(x) & (runs(x) & (threes(x) & fours(x))))', + r'(walks(x) -> runs(x))', + r'some x.(PRO(x) & sees(John, x))', + r'some x.(man(x) & (not walks(x)))', + r'all x.(man(x) -> walks(x))', +] + + +def spacer(num=45): + print('-' * num) + + +def demo(): + print("Testing configuration") + spacer() + test_config() + print() + print("Testing conversion to Prover9 format") + spacer() + test_convert_to_prover9(expressions) + print() + print("Testing proofs") + spacer() + test_prove(arguments) + + +if __name__ == '__main__': + demo() diff --git a/venv.bak/lib/python3.7/site-packages/nltk/inference/resolution.py b/venv.bak/lib/python3.7/site-packages/nltk/inference/resolution.py new file mode 100644 index 0000000..06761a9 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/inference/resolution.py @@ -0,0 +1,765 @@ +# Natural Language Toolkit: First-order Resolution-based Theorem Prover +# +# Author: Dan Garrette +# +# Copyright (C) 2001-2019 NLTK Project +# URL: +# For license information, see LICENSE.TXT + +""" +Module for a resolution-based First Order theorem prover. +""" +from __future__ import print_function, unicode_literals + +import operator +from collections import defaultdict +from functools import reduce + +from nltk.sem import skolemize +from nltk.sem.logic import ( + VariableExpression, + EqualityExpression, + ApplicationExpression, + Expression, + NegatedExpression, + Variable, + AndExpression, + unique_variable, + OrExpression, + is_indvar, + IndividualVariableExpression, + Expression, +) + +from nltk.inference.api import Prover, BaseProverCommand +from nltk.compat import python_2_unicode_compatible + + +class ProverParseError(Exception): + pass + + +class ResolutionProver(Prover): + ANSWER_KEY = 'ANSWER' + _assume_false = True + + def _prove(self, goal=None, assumptions=None, verbose=False): + """ + :param goal: Input expression to prove + :type goal: sem.Expression + :param assumptions: Input expressions to use as assumptions in the proof + :type assumptions: list(sem.Expression) + """ + if not assumptions: + assumptions = [] + + result = None + try: + clauses = [] + if goal: + clauses.extend(clausify(-goal)) + for a in assumptions: + clauses.extend(clausify(a)) + result, clauses = self._attempt_proof(clauses) + if verbose: + print(ResolutionProverCommand._decorate_clauses(clauses)) + except RuntimeError as e: + if self._assume_false and str(e).startswith( + 'maximum recursion depth exceeded' + ): + result = False + clauses = [] + else: + if verbose: + print(e) + else: + raise e + return (result, clauses) + + def _attempt_proof(self, clauses): + # map indices to lists of indices, to store attempted unifications + tried = defaultdict(list) + + i = 0 + while i < len(clauses): + if not clauses[i].is_tautology(): + # since we try clauses in order, we should start after the last + # index tried + if tried[i]: + j = tried[i][-1] + 1 + else: + j = i + 1 # nothing tried yet for 'i', so start with the next + + while j < len(clauses): + # don't: 1) unify a clause with itself, + # 2) use tautologies + if i != j and j and not clauses[j].is_tautology(): + tried[i].append(j) + newclauses = clauses[i].unify(clauses[j]) + if newclauses: + for newclause in newclauses: + newclause._parents = (i + 1, j + 1) + clauses.append(newclause) + if not len(newclause): # if there's an empty clause + return (True, clauses) + i = -1 # since we added a new clause, restart from the top + break + j += 1 + i += 1 + return (False, clauses) + + +class ResolutionProverCommand(BaseProverCommand): + def __init__(self, goal=None, assumptions=None, prover=None): + """ + :param goal: Input expression to prove + :type goal: sem.Expression + :param assumptions: Input expressions to use as assumptions in + the proof. + :type assumptions: list(sem.Expression) + """ + if prover is not None: + assert isinstance(prover, ResolutionProver) + else: + prover = ResolutionProver() + + BaseProverCommand.__init__(self, prover, goal, assumptions) + self._clauses = None + + def prove(self, verbose=False): + """ + Perform the actual proof. Store the result to prevent unnecessary + re-proving. + """ + if self._result is None: + self._result, clauses = self._prover._prove( + self.goal(), self.assumptions(), verbose + ) + self._clauses = clauses + self._proof = ResolutionProverCommand._decorate_clauses(clauses) + return self._result + + def find_answers(self, verbose=False): + self.prove(verbose) + + answers = set() + answer_ex = VariableExpression(Variable(ResolutionProver.ANSWER_KEY)) + for clause in self._clauses: + for term in clause: + if ( + isinstance(term, ApplicationExpression) + and term.function == answer_ex + and not isinstance(term.argument, IndividualVariableExpression) + ): + answers.add(term.argument) + return answers + + @staticmethod + def _decorate_clauses(clauses): + """ + Decorate the proof output. + """ + out = '' + max_clause_len = max([len(str(clause)) for clause in clauses]) + max_seq_len = len(str(len(clauses))) + for i in range(len(clauses)): + parents = 'A' + taut = '' + if clauses[i].is_tautology(): + taut = 'Tautology' + if clauses[i]._parents: + parents = str(clauses[i]._parents) + parents = ' ' * (max_clause_len - len(str(clauses[i])) + 1) + parents + seq = ' ' * (max_seq_len - len(str(i + 1))) + str(i + 1) + out += '[%s] %s %s %s\n' % (seq, clauses[i], parents, taut) + return out + + +@python_2_unicode_compatible +class Clause(list): + def __init__(self, data): + list.__init__(self, data) + self._is_tautology = None + self._parents = None + + def unify(self, other, bindings=None, used=None, skipped=None, debug=False): + """ + Attempt to unify this Clause with the other, returning a list of + resulting, unified, Clauses. + + :param other: ``Clause`` with which to unify + :param bindings: ``BindingDict`` containing bindings that should be used + during the unification + :param used: tuple of two lists of atoms. The first lists the + atoms from 'self' that were successfully unified with atoms from + 'other'. The second lists the atoms from 'other' that were successfully + unified with atoms from 'self'. + :param skipped: tuple of two ``Clause`` objects. The first is a list of all + the atoms from the 'self' Clause that have not been unified with + anything on the path. The second is same thing for the 'other' Clause. + :param debug: bool indicating whether debug statements should print + :return: list containing all the resulting ``Clause`` objects that could be + obtained by unification + """ + if bindings is None: + bindings = BindingDict() + if used is None: + used = ([], []) + if skipped is None: + skipped = ([], []) + if isinstance(debug, bool): + debug = DebugObject(debug) + + newclauses = _iterate_first( + self, other, bindings, used, skipped, _complete_unify_path, debug + ) + + # remove subsumed clauses. make a list of all indices of subsumed + # clauses, and then remove them from the list + subsumed = [] + for i, c1 in enumerate(newclauses): + if i not in subsumed: + for j, c2 in enumerate(newclauses): + if i != j and j not in subsumed and c1.subsumes(c2): + subsumed.append(j) + result = [] + for i in range(len(newclauses)): + if i not in subsumed: + result.append(newclauses[i]) + + return result + + def isSubsetOf(self, other): + """ + Return True iff every term in 'self' is a term in 'other'. + + :param other: ``Clause`` + :return: bool + """ + for a in self: + if a not in other: + return False + return True + + def subsumes(self, other): + """ + Return True iff 'self' subsumes 'other', this is, if there is a + substitution such that every term in 'self' can be unified with a term + in 'other'. + + :param other: ``Clause`` + :return: bool + """ + negatedother = [] + for atom in other: + if isinstance(atom, NegatedExpression): + negatedother.append(atom.term) + else: + negatedother.append(-atom) + + negatedotherClause = Clause(negatedother) + + bindings = BindingDict() + used = ([], []) + skipped = ([], []) + debug = DebugObject(False) + + return ( + len( + _iterate_first( + self, + negatedotherClause, + bindings, + used, + skipped, + _subsumes_finalize, + debug, + ) + ) + > 0 + ) + + def __getslice__(self, start, end): + return Clause(list.__getslice__(self, start, end)) + + def __sub__(self, other): + return Clause([a for a in self if a not in other]) + + def __add__(self, other): + return Clause(list.__add__(self, other)) + + def is_tautology(self): + """ + Self is a tautology if it contains ground terms P and -P. The ground + term, P, must be an exact match, ie, not using unification. + """ + if self._is_tautology is not None: + return self._is_tautology + for i, a in enumerate(self): + if not isinstance(a, EqualityExpression): + j = len(self) - 1 + while j > i: + b = self[j] + if isinstance(a, NegatedExpression): + if a.term == b: + self._is_tautology = True + return True + elif isinstance(b, NegatedExpression): + if a == b.term: + self._is_tautology = True + return True + j -= 1 + self._is_tautology = False + return False + + def free(self): + return reduce(operator.or_, ((atom.free() | atom.constants()) for atom in self)) + + def replace(self, variable, expression): + """ + Replace every instance of variable with expression across every atom + in the clause + + :param variable: ``Variable`` + :param expression: ``Expression`` + """ + return Clause([atom.replace(variable, expression) for atom in self]) + + def substitute_bindings(self, bindings): + """ + Replace every binding + + :param bindings: A list of tuples mapping Variable Expressions to the + Expressions to which they are bound + :return: ``Clause`` + """ + return Clause([atom.substitute_bindings(bindings) for atom in self]) + + def __str__(self): + return '{' + ', '.join("%s" % item for item in self) + '}' + + def __repr__(self): + return "%s" % self + + +def _iterate_first(first, second, bindings, used, skipped, finalize_method, debug): + """ + This method facilitates movement through the terms of 'self' + """ + debug.line('unify(%s,%s) %s' % (first, second, bindings)) + + if not len(first) or not len(second): # if no more recursions can be performed + return finalize_method(first, second, bindings, used, skipped, debug) + else: + # explore this 'self' atom + result = _iterate_second( + first, second, bindings, used, skipped, finalize_method, debug + 1 + ) + + # skip this possible 'self' atom + newskipped = (skipped[0] + [first[0]], skipped[1]) + result += _iterate_first( + first[1:], second, bindings, used, newskipped, finalize_method, debug + 1 + ) + + try: + newbindings, newused, unused = _unify_terms( + first[0], second[0], bindings, used + ) + # Unification found, so progress with this line of unification + # put skipped and unused terms back into play for later unification. + newfirst = first[1:] + skipped[0] + unused[0] + newsecond = second[1:] + skipped[1] + unused[1] + result += _iterate_first( + newfirst, + newsecond, + newbindings, + newused, + ([], []), + finalize_method, + debug + 1, + ) + except BindingException: + # the atoms could not be unified, + pass + + return result + + +def _iterate_second(first, second, bindings, used, skipped, finalize_method, debug): + """ + This method facilitates movement through the terms of 'other' + """ + debug.line('unify(%s,%s) %s' % (first, second, bindings)) + + if not len(first) or not len(second): # if no more recursions can be performed + return finalize_method(first, second, bindings, used, skipped, debug) + else: + # skip this possible pairing and move to the next + newskipped = (skipped[0], skipped[1] + [second[0]]) + result = _iterate_second( + first, second[1:], bindings, used, newskipped, finalize_method, debug + 1 + ) + + try: + newbindings, newused, unused = _unify_terms( + first[0], second[0], bindings, used + ) + # Unification found, so progress with this line of unification + # put skipped and unused terms back into play for later unification. + newfirst = first[1:] + skipped[0] + unused[0] + newsecond = second[1:] + skipped[1] + unused[1] + result += _iterate_second( + newfirst, + newsecond, + newbindings, + newused, + ([], []), + finalize_method, + debug + 1, + ) + except BindingException: + # the atoms could not be unified, + pass + + return result + + +def _unify_terms(a, b, bindings=None, used=None): + """ + This method attempts to unify two terms. Two expressions are unifiable + if there exists a substitution function S such that S(a) == S(-b). + + :param a: ``Expression`` + :param b: ``Expression`` + :param bindings: ``BindingDict`` a starting set of bindings with which + the unification must be consistent + :return: ``BindingDict`` A dictionary of the bindings required to unify + :raise ``BindingException``: If the terms cannot be unified + """ + assert isinstance(a, Expression) + assert isinstance(b, Expression) + + if bindings is None: + bindings = BindingDict() + if used is None: + used = ([], []) + + # Use resolution + if isinstance(a, NegatedExpression) and isinstance(b, ApplicationExpression): + newbindings = most_general_unification(a.term, b, bindings) + newused = (used[0] + [a], used[1] + [b]) + unused = ([], []) + elif isinstance(a, ApplicationExpression) and isinstance(b, NegatedExpression): + newbindings = most_general_unification(a, b.term, bindings) + newused = (used[0] + [a], used[1] + [b]) + unused = ([], []) + + # Use demodulation + elif isinstance(a, EqualityExpression): + newbindings = BindingDict([(a.first.variable, a.second)]) + newused = (used[0] + [a], used[1]) + unused = ([], [b]) + elif isinstance(b, EqualityExpression): + newbindings = BindingDict([(b.first.variable, b.second)]) + newused = (used[0], used[1] + [b]) + unused = ([a], []) + + else: + raise BindingException((a, b)) + + return newbindings, newused, unused + + +def _complete_unify_path(first, second, bindings, used, skipped, debug): + if used[0] or used[1]: # if bindings were made along the path + newclause = Clause(skipped[0] + skipped[1] + first + second) + debug.line(' -> New Clause: %s' % newclause) + return [newclause.substitute_bindings(bindings)] + else: # no bindings made means no unification occurred. so no result + debug.line(' -> End') + return [] + + +def _subsumes_finalize(first, second, bindings, used, skipped, debug): + if not len(skipped[0]) and not len(first): + # If there are no skipped terms and no terms left in 'first', then + # all of the terms in the original 'self' were unified with terms + # in 'other'. Therefore, there exists a binding (this one) such that + # every term in self can be unified with a term in other, which + # is the definition of subsumption. + return [True] + else: + return [] + + +def clausify(expression): + """ + Skolemize, clausify, and standardize the variables apart. + """ + clause_list = [] + for clause in _clausify(skolemize(expression)): + for free in clause.free(): + if is_indvar(free.name): + newvar = VariableExpression(unique_variable()) + clause = clause.replace(free, newvar) + clause_list.append(clause) + return clause_list + + +def _clausify(expression): + """ + :param expression: a skolemized expression in CNF + """ + if isinstance(expression, AndExpression): + return _clausify(expression.first) + _clausify(expression.second) + elif isinstance(expression, OrExpression): + first = _clausify(expression.first) + second = _clausify(expression.second) + assert len(first) == 1 + assert len(second) == 1 + return [first[0] + second[0]] + elif isinstance(expression, EqualityExpression): + return [Clause([expression])] + elif isinstance(expression, ApplicationExpression): + return [Clause([expression])] + elif isinstance(expression, NegatedExpression): + if isinstance(expression.term, ApplicationExpression): + return [Clause([expression])] + elif isinstance(expression.term, EqualityExpression): + return [Clause([expression])] + raise ProverParseError() + + +@python_2_unicode_compatible +class BindingDict(object): + def __init__(self, binding_list=None): + """ + :param binding_list: list of (``AbstractVariableExpression``, ``AtomicExpression``) to initialize the dictionary + """ + self.d = {} + + if binding_list: + for (v, b) in binding_list: + self[v] = b + + def __setitem__(self, variable, binding): + """ + A binding is consistent with the dict if its variable is not already bound, OR if its + variable is already bound to its argument. + + :param variable: ``Variable`` The variable to bind + :param binding: ``Expression`` The atomic to which 'variable' should be bound + :raise BindingException: If the variable cannot be bound in this dictionary + """ + assert isinstance(variable, Variable) + assert isinstance(binding, Expression) + + try: + existing = self[variable] + except KeyError: + existing = None + + if not existing or binding == existing: + self.d[variable] = binding + elif isinstance(binding, IndividualVariableExpression): + # Since variable is already bound, try to bind binding to variable + try: + existing = self[binding.variable] + except KeyError: + existing = None + + binding2 = VariableExpression(variable) + + if not existing or binding2 == existing: + self.d[binding.variable] = binding2 + else: + raise BindingException( + 'Variable %s already bound to another ' 'value' % (variable) + ) + else: + raise BindingException( + 'Variable %s already bound to another ' 'value' % (variable) + ) + + def __getitem__(self, variable): + """ + Return the expression to which 'variable' is bound + """ + assert isinstance(variable, Variable) + + intermediate = self.d[variable] + while intermediate: + try: + intermediate = self.d[intermediate] + except KeyError: + return intermediate + + def __contains__(self, item): + return item in self.d + + def __add__(self, other): + """ + :param other: ``BindingDict`` The dict with which to combine self + :return: ``BindingDict`` A new dict containing all the elements of both parameters + :raise BindingException: If the parameter dictionaries are not consistent with each other + """ + try: + combined = BindingDict() + for v in self.d: + combined[v] = self.d[v] + for v in other.d: + combined[v] = other.d[v] + return combined + except BindingException: + raise BindingException( + "Attempting to add two contradicting " + "BindingDicts: '%s' and '%s'" % (self, other) + ) + + def __len__(self): + return len(self.d) + + def __str__(self): + data_str = ', '.join('%s: %s' % (v, self.d[v]) for v in sorted(self.d.keys())) + return '{' + data_str + '}' + + def __repr__(self): + return "%s" % self + + +def most_general_unification(a, b, bindings=None): + """ + Find the most general unification of the two given expressions + + :param a: ``Expression`` + :param b: ``Expression`` + :param bindings: ``BindingDict`` a starting set of bindings with which the + unification must be consistent + :return: a list of bindings + :raise BindingException: if the Expressions cannot be unified + """ + if bindings is None: + bindings = BindingDict() + + if a == b: + return bindings + elif isinstance(a, IndividualVariableExpression): + return _mgu_var(a, b, bindings) + elif isinstance(b, IndividualVariableExpression): + return _mgu_var(b, a, bindings) + elif isinstance(a, ApplicationExpression) and isinstance(b, ApplicationExpression): + return most_general_unification( + a.function, b.function, bindings + ) + most_general_unification(a.argument, b.argument, bindings) + raise BindingException((a, b)) + + +def _mgu_var(var, expression, bindings): + if var.variable in expression.free() | expression.constants(): + raise BindingException((var, expression)) + else: + return BindingDict([(var.variable, expression)]) + bindings + + +class BindingException(Exception): + def __init__(self, arg): + if isinstance(arg, tuple): + Exception.__init__(self, "'%s' cannot be bound to '%s'" % arg) + else: + Exception.__init__(self, arg) + + +class UnificationException(Exception): + def __init__(self, a, b): + Exception.__init__(self, "'%s' cannot unify with '%s'" % (a, b)) + + +class DebugObject(object): + def __init__(self, enabled=True, indent=0): + self.enabled = enabled + self.indent = indent + + def __add__(self, i): + return DebugObject(self.enabled, self.indent + i) + + def line(self, line): + if self.enabled: + print(' ' * self.indent + line) + + +def testResolutionProver(): + resolution_test(r'man(x)') + resolution_test(r'(man(x) -> man(x))') + resolution_test(r'(man(x) -> --man(x))') + resolution_test(r'-(man(x) and -man(x))') + resolution_test(r'(man(x) or -man(x))') + resolution_test(r'(man(x) -> man(x))') + resolution_test(r'-(man(x) and -man(x))') + resolution_test(r'(man(x) or -man(x))') + resolution_test(r'(man(x) -> man(x))') + resolution_test(r'(man(x) iff man(x))') + resolution_test(r'-(man(x) iff -man(x))') + resolution_test('all x.man(x)') + resolution_test('-all x.some y.F(x,y) & some x.all y.(-F(x,y))') + resolution_test('some x.all y.sees(x,y)') + + p1 = Expression.fromstring(r'all x.(man(x) -> mortal(x))') + p2 = Expression.fromstring(r'man(Socrates)') + c = Expression.fromstring(r'mortal(Socrates)') + print('%s, %s |- %s: %s' % (p1, p2, c, ResolutionProver().prove(c, [p1, p2]))) + + p1 = Expression.fromstring(r'all x.(man(x) -> walks(x))') + p2 = Expression.fromstring(r'man(John)') + c = Expression.fromstring(r'some y.walks(y)') + print('%s, %s |- %s: %s' % (p1, p2, c, ResolutionProver().prove(c, [p1, p2]))) + + p = Expression.fromstring(r'some e1.some e2.(believe(e1,john,e2) & walk(e2,mary))') + c = Expression.fromstring(r'some e0.walk(e0,mary)') + print('%s |- %s: %s' % (p, c, ResolutionProver().prove(c, [p]))) + + +def resolution_test(e): + f = Expression.fromstring(e) + t = ResolutionProver().prove(f) + print('|- %s: %s' % (f, t)) + + +def test_clausify(): + lexpr = Expression.fromstring + + print(clausify(lexpr('P(x) | Q(x)'))) + print(clausify(lexpr('(P(x) & Q(x)) | R(x)'))) + print(clausify(lexpr('P(x) | (Q(x) & R(x))'))) + print(clausify(lexpr('(P(x) & Q(x)) | (R(x) & S(x))'))) + + print(clausify(lexpr('P(x) | Q(x) | R(x)'))) + print(clausify(lexpr('P(x) | (Q(x) & R(x)) | S(x)'))) + + print(clausify(lexpr('exists x.P(x) | Q(x)'))) + + print(clausify(lexpr('-(-P(x) & Q(x))'))) + print(clausify(lexpr('P(x) <-> Q(x)'))) + print(clausify(lexpr('-(P(x) <-> Q(x))'))) + print(clausify(lexpr('-(all x.P(x))'))) + print(clausify(lexpr('-(some x.P(x))'))) + + print(clausify(lexpr('some x.P(x)'))) + print(clausify(lexpr('some x.all y.P(x,y)'))) + print(clausify(lexpr('all y.some x.P(x,y)'))) + print(clausify(lexpr('all z.all y.some x.P(x,y,z)'))) + print(clausify(lexpr('all x.(all y.P(x,y) -> -all y.(Q(x,y) -> R(x,y)))'))) + + +def demo(): + test_clausify() + print() + testResolutionProver() + print() + + p = Expression.fromstring('man(x)') + print(ResolutionProverCommand(p, [p]).prove()) + + +if __name__ == '__main__': + demo() diff --git a/venv.bak/lib/python3.7/site-packages/nltk/inference/tableau.py b/venv.bak/lib/python3.7/site-packages/nltk/inference/tableau.py new file mode 100644 index 0000000..e8cc840 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/inference/tableau.py @@ -0,0 +1,715 @@ +# Natural Language Toolkit: First-Order Tableau Theorem Prover +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Dan Garrette +# +# URL: +# For license information, see LICENSE.TXT + +""" +Module for a tableau-based First Order theorem prover. +""" +from __future__ import print_function, unicode_literals + +from nltk.internals import Counter + +from nltk.sem.logic import ( + VariableExpression, + EqualityExpression, + ApplicationExpression, + Expression, + AbstractVariableExpression, + AllExpression, + NegatedExpression, + ExistsExpression, + Variable, + ImpExpression, + AndExpression, + unique_variable, + LambdaExpression, + IffExpression, + OrExpression, + FunctionVariableExpression, +) + +from nltk.inference.api import Prover, BaseProverCommand + +_counter = Counter() + + +class ProverParseError(Exception): + pass + + +class TableauProver(Prover): + _assume_false = False + + def _prove(self, goal=None, assumptions=None, verbose=False): + if not assumptions: + assumptions = [] + + result = None + try: + agenda = Agenda() + if goal: + agenda.put(-goal) + agenda.put_all(assumptions) + debugger = Debug(verbose) + result = self._attempt_proof(agenda, set(), set(), debugger) + except RuntimeError as e: + if self._assume_false and str(e).startswith( + 'maximum recursion depth exceeded' + ): + result = False + else: + if verbose: + print(e) + else: + raise e + return (result, '\n'.join(debugger.lines)) + + def _attempt_proof(self, agenda, accessible_vars, atoms, debug): + (current, context), category = agenda.pop_first() + + # if there's nothing left in the agenda, and we haven't closed the path + if not current: + debug.line('AGENDA EMPTY') + return False + + proof_method = { + Categories.ATOM: self._attempt_proof_atom, + Categories.PROP: self._attempt_proof_prop, + Categories.N_ATOM: self._attempt_proof_n_atom, + Categories.N_PROP: self._attempt_proof_n_prop, + Categories.APP: self._attempt_proof_app, + Categories.N_APP: self._attempt_proof_n_app, + Categories.N_EQ: self._attempt_proof_n_eq, + Categories.D_NEG: self._attempt_proof_d_neg, + Categories.N_ALL: self._attempt_proof_n_all, + Categories.N_EXISTS: self._attempt_proof_n_some, + Categories.AND: self._attempt_proof_and, + Categories.N_OR: self._attempt_proof_n_or, + Categories.N_IMP: self._attempt_proof_n_imp, + Categories.OR: self._attempt_proof_or, + Categories.IMP: self._attempt_proof_imp, + Categories.N_AND: self._attempt_proof_n_and, + Categories.IFF: self._attempt_proof_iff, + Categories.N_IFF: self._attempt_proof_n_iff, + Categories.EQ: self._attempt_proof_eq, + Categories.EXISTS: self._attempt_proof_some, + Categories.ALL: self._attempt_proof_all, + }[category] + + debug.line((current, context)) + return proof_method(current, context, agenda, accessible_vars, atoms, debug) + + def _attempt_proof_atom( + self, current, context, agenda, accessible_vars, atoms, debug + ): + # Check if the branch is closed. Return 'True' if it is + if (current, True) in atoms: + debug.line('CLOSED', 1) + return True + + if context: + if isinstance(context.term, NegatedExpression): + current = current.negate() + agenda.put(context(current).simplify()) + return self._attempt_proof(agenda, accessible_vars, atoms, debug + 1) + else: + # mark all AllExpressions as 'not exhausted' into the agenda since we are (potentially) adding new accessible vars + agenda.mark_alls_fresh() + return self._attempt_proof( + agenda, + accessible_vars | set(current.args), + atoms | set([(current, False)]), + debug + 1, + ) + + def _attempt_proof_n_atom( + self, current, context, agenda, accessible_vars, atoms, debug + ): + # Check if the branch is closed. Return 'True' if it is + if (current.term, False) in atoms: + debug.line('CLOSED', 1) + return True + + if context: + if isinstance(context.term, NegatedExpression): + current = current.negate() + agenda.put(context(current).simplify()) + return self._attempt_proof(agenda, accessible_vars, atoms, debug + 1) + else: + # mark all AllExpressions as 'not exhausted' into the agenda since we are (potentially) adding new accessible vars + agenda.mark_alls_fresh() + return self._attempt_proof( + agenda, + accessible_vars | set(current.term.args), + atoms | set([(current.term, True)]), + debug + 1, + ) + + def _attempt_proof_prop( + self, current, context, agenda, accessible_vars, atoms, debug + ): + # Check if the branch is closed. Return 'True' if it is + if (current, True) in atoms: + debug.line('CLOSED', 1) + return True + + # mark all AllExpressions as 'not exhausted' into the agenda since we are (potentially) adding new accessible vars + agenda.mark_alls_fresh() + return self._attempt_proof( + agenda, accessible_vars, atoms | set([(current, False)]), debug + 1 + ) + + def _attempt_proof_n_prop( + self, current, context, agenda, accessible_vars, atoms, debug + ): + # Check if the branch is closed. Return 'True' if it is + if (current.term, False) in atoms: + debug.line('CLOSED', 1) + return True + + # mark all AllExpressions as 'not exhausted' into the agenda since we are (potentially) adding new accessible vars + agenda.mark_alls_fresh() + return self._attempt_proof( + agenda, accessible_vars, atoms | set([(current.term, True)]), debug + 1 + ) + + def _attempt_proof_app( + self, current, context, agenda, accessible_vars, atoms, debug + ): + f, args = current.uncurry() + for i, arg in enumerate(args): + if not TableauProver.is_atom(arg): + ctx = f + nv = Variable('X%s' % _counter.get()) + for j, a in enumerate(args): + ctx = ctx(VariableExpression(nv)) if i == j else ctx(a) + if context: + ctx = context(ctx).simplify() + ctx = LambdaExpression(nv, ctx) + agenda.put(arg, ctx) + return self._attempt_proof(agenda, accessible_vars, atoms, debug + 1) + raise Exception('If this method is called, there must be a non-atomic argument') + + def _attempt_proof_n_app( + self, current, context, agenda, accessible_vars, atoms, debug + ): + f, args = current.term.uncurry() + for i, arg in enumerate(args): + if not TableauProver.is_atom(arg): + ctx = f + nv = Variable('X%s' % _counter.get()) + for j, a in enumerate(args): + ctx = ctx(VariableExpression(nv)) if i == j else ctx(a) + if context: + # combine new context with existing + ctx = context(ctx).simplify() + ctx = LambdaExpression(nv, -ctx) + agenda.put(-arg, ctx) + return self._attempt_proof(agenda, accessible_vars, atoms, debug + 1) + raise Exception('If this method is called, there must be a non-atomic argument') + + def _attempt_proof_n_eq( + self, current, context, agenda, accessible_vars, atoms, debug + ): + ########################################################################### + # Since 'current' is of type '~(a=b)', the path is closed if 'a' == 'b' + ########################################################################### + if current.term.first == current.term.second: + debug.line('CLOSED', 1) + return True + + agenda[Categories.N_EQ].add((current, context)) + current._exhausted = True + return self._attempt_proof( + agenda, + accessible_vars | set([current.term.first, current.term.second]), + atoms, + debug + 1, + ) + + def _attempt_proof_d_neg( + self, current, context, agenda, accessible_vars, atoms, debug + ): + agenda.put(current.term.term, context) + return self._attempt_proof(agenda, accessible_vars, atoms, debug + 1) + + def _attempt_proof_n_all( + self, current, context, agenda, accessible_vars, atoms, debug + ): + agenda[Categories.EXISTS].add( + (ExistsExpression(current.term.variable, -current.term.term), context) + ) + return self._attempt_proof(agenda, accessible_vars, atoms, debug + 1) + + def _attempt_proof_n_some( + self, current, context, agenda, accessible_vars, atoms, debug + ): + agenda[Categories.ALL].add( + (AllExpression(current.term.variable, -current.term.term), context) + ) + return self._attempt_proof(agenda, accessible_vars, atoms, debug + 1) + + def _attempt_proof_and( + self, current, context, agenda, accessible_vars, atoms, debug + ): + agenda.put(current.first, context) + agenda.put(current.second, context) + return self._attempt_proof(agenda, accessible_vars, atoms, debug + 1) + + def _attempt_proof_n_or( + self, current, context, agenda, accessible_vars, atoms, debug + ): + agenda.put(-current.term.first, context) + agenda.put(-current.term.second, context) + return self._attempt_proof(agenda, accessible_vars, atoms, debug + 1) + + def _attempt_proof_n_imp( + self, current, context, agenda, accessible_vars, atoms, debug + ): + agenda.put(current.term.first, context) + agenda.put(-current.term.second, context) + return self._attempt_proof(agenda, accessible_vars, atoms, debug + 1) + + def _attempt_proof_or( + self, current, context, agenda, accessible_vars, atoms, debug + ): + new_agenda = agenda.clone() + agenda.put(current.first, context) + new_agenda.put(current.second, context) + return self._attempt_proof( + agenda, accessible_vars, atoms, debug + 1 + ) and self._attempt_proof(new_agenda, accessible_vars, atoms, debug + 1) + + def _attempt_proof_imp( + self, current, context, agenda, accessible_vars, atoms, debug + ): + new_agenda = agenda.clone() + agenda.put(-current.first, context) + new_agenda.put(current.second, context) + return self._attempt_proof( + agenda, accessible_vars, atoms, debug + 1 + ) and self._attempt_proof(new_agenda, accessible_vars, atoms, debug + 1) + + def _attempt_proof_n_and( + self, current, context, agenda, accessible_vars, atoms, debug + ): + new_agenda = agenda.clone() + agenda.put(-current.term.first, context) + new_agenda.put(-current.term.second, context) + return self._attempt_proof( + agenda, accessible_vars, atoms, debug + 1 + ) and self._attempt_proof(new_agenda, accessible_vars, atoms, debug + 1) + + def _attempt_proof_iff( + self, current, context, agenda, accessible_vars, atoms, debug + ): + new_agenda = agenda.clone() + agenda.put(current.first, context) + agenda.put(current.second, context) + new_agenda.put(-current.first, context) + new_agenda.put(-current.second, context) + return self._attempt_proof( + agenda, accessible_vars, atoms, debug + 1 + ) and self._attempt_proof(new_agenda, accessible_vars, atoms, debug + 1) + + def _attempt_proof_n_iff( + self, current, context, agenda, accessible_vars, atoms, debug + ): + new_agenda = agenda.clone() + agenda.put(current.term.first, context) + agenda.put(-current.term.second, context) + new_agenda.put(-current.term.first, context) + new_agenda.put(current.term.second, context) + return self._attempt_proof( + agenda, accessible_vars, atoms, debug + 1 + ) and self._attempt_proof(new_agenda, accessible_vars, atoms, debug + 1) + + def _attempt_proof_eq( + self, current, context, agenda, accessible_vars, atoms, debug + ): + ######################################################################### + # Since 'current' is of the form '(a = b)', replace ALL free instances + # of 'a' with 'b' + ######################################################################### + agenda.put_atoms(atoms) + agenda.replace_all(current.first, current.second) + accessible_vars.discard(current.first) + agenda.mark_neqs_fresh() + return self._attempt_proof(agenda, accessible_vars, set(), debug + 1) + + def _attempt_proof_some( + self, current, context, agenda, accessible_vars, atoms, debug + ): + new_unique_variable = VariableExpression(unique_variable()) + agenda.put(current.term.replace(current.variable, new_unique_variable), context) + agenda.mark_alls_fresh() + return self._attempt_proof( + agenda, accessible_vars | set([new_unique_variable]), atoms, debug + 1 + ) + + def _attempt_proof_all( + self, current, context, agenda, accessible_vars, atoms, debug + ): + try: + current._used_vars + except AttributeError: + current._used_vars = set() + + # if there are accessible_vars on the path + if accessible_vars: + # get the set of bound variables that have not be used by this AllExpression + bv_available = accessible_vars - current._used_vars + + if bv_available: + variable_to_use = list(bv_available)[0] + debug.line('--> Using \'%s\'' % variable_to_use, 2) + current._used_vars |= set([variable_to_use]) + agenda.put( + current.term.replace(current.variable, variable_to_use), context + ) + agenda[Categories.ALL].add((current, context)) + return self._attempt_proof(agenda, accessible_vars, atoms, debug + 1) + + else: + # no more available variables to substitute + debug.line('--> Variables Exhausted', 2) + current._exhausted = True + agenda[Categories.ALL].add((current, context)) + return self._attempt_proof(agenda, accessible_vars, atoms, debug + 1) + + else: + new_unique_variable = VariableExpression(unique_variable()) + debug.line('--> Using \'%s\'' % new_unique_variable, 2) + current._used_vars |= set([new_unique_variable]) + agenda.put( + current.term.replace(current.variable, new_unique_variable), context + ) + agenda[Categories.ALL].add((current, context)) + agenda.mark_alls_fresh() + return self._attempt_proof( + agenda, accessible_vars | set([new_unique_variable]), atoms, debug + 1 + ) + + @staticmethod + def is_atom(e): + if isinstance(e, NegatedExpression): + e = e.term + + if isinstance(e, ApplicationExpression): + for arg in e.args: + if not TableauProver.is_atom(arg): + return False + return True + elif isinstance(e, AbstractVariableExpression) or isinstance( + e, LambdaExpression + ): + return True + else: + return False + + +class TableauProverCommand(BaseProverCommand): + def __init__(self, goal=None, assumptions=None, prover=None): + """ + :param goal: Input expression to prove + :type goal: sem.Expression + :param assumptions: Input expressions to use as assumptions in + the proof. + :type assumptions: list(sem.Expression) + """ + if prover is not None: + assert isinstance(prover, TableauProver) + else: + prover = TableauProver() + + BaseProverCommand.__init__(self, prover, goal, assumptions) + + +class Agenda(object): + def __init__(self): + self.sets = tuple(set() for i in range(21)) + + def clone(self): + new_agenda = Agenda() + set_list = [s.copy() for s in self.sets] + + new_allExs = set() + for allEx, _ in set_list[Categories.ALL]: + new_allEx = AllExpression(allEx.variable, allEx.term) + try: + new_allEx._used_vars = set(used for used in allEx._used_vars) + except AttributeError: + new_allEx._used_vars = set() + new_allExs.add((new_allEx, None)) + set_list[Categories.ALL] = new_allExs + + set_list[Categories.N_EQ] = set( + (NegatedExpression(n_eq.term), ctx) + for (n_eq, ctx) in set_list[Categories.N_EQ] + ) + + new_agenda.sets = tuple(set_list) + return new_agenda + + def __getitem__(self, index): + return self.sets[index] + + def put(self, expression, context=None): + if isinstance(expression, AllExpression): + ex_to_add = AllExpression(expression.variable, expression.term) + try: + ex_to_add._used_vars = set(used for used in expression._used_vars) + except AttributeError: + ex_to_add._used_vars = set() + else: + ex_to_add = expression + self.sets[self._categorize_expression(ex_to_add)].add((ex_to_add, context)) + + def put_all(self, expressions): + for expression in expressions: + self.put(expression) + + def put_atoms(self, atoms): + for atom, neg in atoms: + if neg: + self[Categories.N_ATOM].add((-atom, None)) + else: + self[Categories.ATOM].add((atom, None)) + + def pop_first(self): + """ Pop the first expression that appears in the agenda """ + for i, s in enumerate(self.sets): + if s: + if i in [Categories.N_EQ, Categories.ALL]: + for ex in s: + try: + if not ex[0]._exhausted: + s.remove(ex) + return (ex, i) + except AttributeError: + s.remove(ex) + return (ex, i) + else: + return (s.pop(), i) + return ((None, None), None) + + def replace_all(self, old, new): + for s in self.sets: + for ex, ctx in s: + ex.replace(old.variable, new) + if ctx is not None: + ctx.replace(old.variable, new) + + def mark_alls_fresh(self): + for u, _ in self.sets[Categories.ALL]: + u._exhausted = False + + def mark_neqs_fresh(self): + for neq, _ in self.sets[Categories.N_EQ]: + neq._exhausted = False + + def _categorize_expression(self, current): + if isinstance(current, NegatedExpression): + return self._categorize_NegatedExpression(current) + elif isinstance(current, FunctionVariableExpression): + return Categories.PROP + elif TableauProver.is_atom(current): + return Categories.ATOM + elif isinstance(current, AllExpression): + return Categories.ALL + elif isinstance(current, AndExpression): + return Categories.AND + elif isinstance(current, OrExpression): + return Categories.OR + elif isinstance(current, ImpExpression): + return Categories.IMP + elif isinstance(current, IffExpression): + return Categories.IFF + elif isinstance(current, EqualityExpression): + return Categories.EQ + elif isinstance(current, ExistsExpression): + return Categories.EXISTS + elif isinstance(current, ApplicationExpression): + return Categories.APP + else: + raise ProverParseError("cannot categorize %s" % current.__class__.__name__) + + def _categorize_NegatedExpression(self, current): + negated = current.term + + if isinstance(negated, NegatedExpression): + return Categories.D_NEG + elif isinstance(negated, FunctionVariableExpression): + return Categories.N_PROP + elif TableauProver.is_atom(negated): + return Categories.N_ATOM + elif isinstance(negated, AllExpression): + return Categories.N_ALL + elif isinstance(negated, AndExpression): + return Categories.N_AND + elif isinstance(negated, OrExpression): + return Categories.N_OR + elif isinstance(negated, ImpExpression): + return Categories.N_IMP + elif isinstance(negated, IffExpression): + return Categories.N_IFF + elif isinstance(negated, EqualityExpression): + return Categories.N_EQ + elif isinstance(negated, ExistsExpression): + return Categories.N_EXISTS + elif isinstance(negated, ApplicationExpression): + return Categories.N_APP + else: + raise ProverParseError("cannot categorize %s" % negated.__class__.__name__) + + +class Debug(object): + def __init__(self, verbose, indent=0, lines=None): + self.verbose = verbose + self.indent = indent + + if not lines: + lines = [] + self.lines = lines + + def __add__(self, increment): + return Debug(self.verbose, self.indent + 1, self.lines) + + def line(self, data, indent=0): + if isinstance(data, tuple): + ex, ctx = data + if ctx: + data = '%s, %s' % (ex, ctx) + else: + data = '%s' % ex + + if isinstance(ex, AllExpression): + try: + used_vars = "[%s]" % ( + ",".join("%s" % ve.variable.name for ve in ex._used_vars) + ) + data += ': %s' % used_vars + except AttributeError: + data += ': []' + + newline = '%s%s' % (' ' * (self.indent + indent), data) + self.lines.append(newline) + + if self.verbose: + print(newline) + + +class Categories(object): + ATOM = 0 + PROP = 1 + N_ATOM = 2 + N_PROP = 3 + APP = 4 + N_APP = 5 + N_EQ = 6 + D_NEG = 7 + N_ALL = 8 + N_EXISTS = 9 + AND = 10 + N_OR = 11 + N_IMP = 12 + OR = 13 + IMP = 14 + N_AND = 15 + IFF = 16 + N_IFF = 17 + EQ = 18 + EXISTS = 19 + ALL = 20 + + +def testTableauProver(): + tableau_test('P | -P') + tableau_test('P & -P') + tableau_test('Q', ['P', '(P -> Q)']) + tableau_test('man(x)') + tableau_test('(man(x) -> man(x))') + tableau_test('(man(x) -> --man(x))') + tableau_test('-(man(x) and -man(x))') + tableau_test('(man(x) or -man(x))') + tableau_test('(man(x) -> man(x))') + tableau_test('-(man(x) and -man(x))') + tableau_test('(man(x) or -man(x))') + tableau_test('(man(x) -> man(x))') + tableau_test('(man(x) iff man(x))') + tableau_test('-(man(x) iff -man(x))') + tableau_test('all x.man(x)') + tableau_test('all x.all y.((x = y) -> (y = x))') + tableau_test('all x.all y.all z.(((x = y) & (y = z)) -> (x = z))') + # tableau_test('-all x.some y.F(x,y) & some x.all y.(-F(x,y))') + # tableau_test('some x.all y.sees(x,y)') + + p1 = 'all x.(man(x) -> mortal(x))' + p2 = 'man(Socrates)' + c = 'mortal(Socrates)' + tableau_test(c, [p1, p2]) + + p1 = 'all x.(man(x) -> walks(x))' + p2 = 'man(John)' + c = 'some y.walks(y)' + tableau_test(c, [p1, p2]) + + p = '((x = y) & walks(y))' + c = 'walks(x)' + tableau_test(c, [p]) + + p = '((x = y) & ((y = z) & (z = w)))' + c = '(x = w)' + tableau_test(c, [p]) + + p = 'some e1.some e2.(believe(e1,john,e2) & walk(e2,mary))' + c = 'some e0.walk(e0,mary)' + tableau_test(c, [p]) + + c = '(exists x.exists z3.((x = Mary) & ((z3 = John) & sees(z3,x))) <-> exists x.exists z4.((x = John) & ((z4 = Mary) & sees(x,z4))))' + tableau_test(c) + + +# p = 'some e1.some e2.((believe e1 john e2) and (walk e2 mary))' +# c = 'some x.some e3.some e4.((believe e3 x e4) and (walk e4 mary))' +# tableau_test(c, [p]) + + +def testHigherOrderTableauProver(): + tableau_test('believe(j, -lie(b))', ['believe(j, -lie(b) & -cheat(b))']) + tableau_test('believe(j, lie(b) & cheat(b))', ['believe(j, lie(b))']) + tableau_test( + 'believe(j, lie(b))', ['lie(b)'] + ) # how do we capture that John believes all things that are true + tableau_test( + 'believe(j, know(b, cheat(b)))', + ['believe(j, know(b, lie(b)) & know(b, steals(b) & cheat(b)))'], + ) + tableau_test('P(Q(y), R(y) & R(z))', ['P(Q(x) & Q(y), R(y) & R(z))']) + + tableau_test('believe(j, cheat(b) & lie(b))', ['believe(j, lie(b) & cheat(b))']) + tableau_test('believe(j, -cheat(b) & -lie(b))', ['believe(j, -lie(b) & -cheat(b))']) + + +def tableau_test(c, ps=None, verbose=False): + pc = Expression.fromstring(c) + pps = [Expression.fromstring(p) for p in ps] if ps else [] + if not ps: + ps = [] + print( + '%s |- %s: %s' + % (', '.join(ps), pc, TableauProver().prove(pc, pps, verbose=verbose)) + ) + + +def demo(): + testTableauProver() + testHigherOrderTableauProver() + + +if __name__ == '__main__': + demo() diff --git a/venv.bak/lib/python3.7/site-packages/nltk/internals.py b/venv.bak/lib/python3.7/site-packages/nltk/internals.py new file mode 100644 index 0000000..1af990d --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/internals.py @@ -0,0 +1,1143 @@ +# Natural Language Toolkit: Internal utility functions +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Steven Bird +# Edward Loper +# Nitin Madnani +# URL: +# For license information, see LICENSE.TXT +from __future__ import print_function + +import subprocess +import os +import fnmatch +import re +import warnings +import textwrap +import types +import sys +import stat +import locale + +# Use the c version of ElementTree, which is faster, if possible: +try: + from xml.etree import cElementTree as ElementTree +except ImportError: + from xml.etree import ElementTree + +from six import string_types + +from nltk import compat + +########################################################################## +# Java Via Command-Line +########################################################################## + +_java_bin = None +_java_options = [] +# [xx] add classpath option to config_java? +def config_java(bin=None, options=None, verbose=False): + """ + Configure nltk's java interface, by letting nltk know where it can + find the Java binary, and what extra options (if any) should be + passed to Java when it is run. + + :param bin: The full path to the Java binary. If not specified, + then nltk will search the system for a Java binary; and if + one is not found, it will raise a ``LookupError`` exception. + :type bin: str + :param options: A list of options that should be passed to the + Java binary when it is called. A common value is + ``'-Xmx512m'``, which tells Java binary to increase + the maximum heap size to 512 megabytes. If no options are + specified, then do not modify the options list. + :type options: list(str) + """ + global _java_bin, _java_options + _java_bin = find_binary( + 'java', + bin, + env_vars=['JAVAHOME', 'JAVA_HOME'], + verbose=verbose, + binary_names=['java.exe'], + ) + + if options is not None: + if isinstance(options, string_types): + options = options.split() + _java_options = list(options) + + +def java(cmd, classpath=None, stdin=None, stdout=None, stderr=None, blocking=True): + """ + Execute the given java command, by opening a subprocess that calls + Java. If java has not yet been configured, it will be configured + by calling ``config_java()`` with no arguments. + + :param cmd: The java command that should be called, formatted as + a list of strings. Typically, the first string will be the name + of the java class; and the remaining strings will be arguments + for that java class. + :type cmd: list(str) + + :param classpath: A ``':'`` separated list of directories, JAR + archives, and ZIP archives to search for class files. + :type classpath: str + + :param stdin, stdout, stderr: Specify the executed programs' + standard input, standard output and standard error file + handles, respectively. Valid values are ``subprocess.PIPE``, + an existing file descriptor (a positive integer), an existing + file object, 'pipe', 'stdout', 'devnull' and None. ``subprocess.PIPE`` indicates that a + new pipe to the child should be created. With None, no + redirection will occur; the child's file handles will be + inherited from the parent. Additionally, stderr can be + ``subprocess.STDOUT``, which indicates that the stderr data + from the applications should be captured into the same file + handle as for stdout. + + :param blocking: If ``false``, then return immediately after + spawning the subprocess. In this case, the return value is + the ``Popen`` object, and not a ``(stdout, stderr)`` tuple. + + :return: If ``blocking=True``, then return a tuple ``(stdout, + stderr)``, containing the stdout and stderr outputs generated + by the java command if the ``stdout`` and ``stderr`` parameters + were set to ``subprocess.PIPE``; or None otherwise. If + ``blocking=False``, then return a ``subprocess.Popen`` object. + + :raise OSError: If the java command returns a nonzero return code. + """ + + subprocess_output_dict = {'pipe': subprocess.PIPE, 'stdout': subprocess.STDOUT, 'devnull': subprocess.DEVNULL} + + stdin = subprocess_output_dict.get(stdin, stdin) + stdout = subprocess_output_dict.get(stdout, stdout) + stderr = subprocess_output_dict.get(stderr, stderr) + + if isinstance(cmd, string_types): + raise TypeError('cmd should be a list of strings') + + # Make sure we know where a java binary is. + if _java_bin is None: + config_java() + + # Set up the classpath. + if isinstance(classpath, string_types): + classpaths = [classpath] + else: + classpaths = list(classpath) + classpath = os.path.pathsep.join(classpaths) + + # Construct the full command string. + cmd = list(cmd) + cmd = ['-cp', classpath] + cmd + cmd = [_java_bin] + _java_options + cmd + + # Call java via a subprocess + p = subprocess.Popen(cmd, stdin=stdin, stdout=stdout, stderr=stderr) + if not blocking: + return p + (stdout, stderr) = p.communicate() + + # Check the return code. + if p.returncode != 0: + print(_decode_stdoutdata(stderr)) + raise OSError('Java command failed : ' + str(cmd)) + + return (stdout, stderr) + + +if 0: + # config_java(options='-Xmx512m') + # Write: + # java('weka.classifiers.bayes.NaiveBayes', + # ['-d', '/tmp/names.model', '-t', '/tmp/train.arff'], + # classpath='/Users/edloper/Desktop/weka/weka.jar') + # Read: + (a, b) = java( + [ + 'weka.classifiers.bayes.NaiveBayes', + '-l', + '/tmp/names.model', + '-T', + '/tmp/test.arff', + '-p', + '0', + ], # , '-distribution'], + classpath='/Users/edloper/Desktop/weka/weka.jar', + ) + + +###################################################################### +# Parsing +###################################################################### + + +class ReadError(ValueError): + """ + Exception raised by read_* functions when they fail. + :param position: The index in the input string where an error occurred. + :param expected: What was expected when an error occurred. + """ + + def __init__(self, expected, position): + ValueError.__init__(self, expected, position) + self.expected = expected + self.position = position + + def __str__(self): + return 'Expected %s at %s' % (self.expected, self.position) + + +_STRING_START_RE = re.compile(r"[uU]?[rR]?(\"\"\"|\'\'\'|\"|\')") + + +def read_str(s, start_position): + """ + If a Python string literal begins at the specified position in the + given string, then return a tuple ``(val, end_position)`` + containing the value of the string literal and the position where + it ends. Otherwise, raise a ``ReadError``. + + :param s: A string that will be checked to see if within which a + Python string literal exists. + :type s: str + + :param start_position: The specified beginning position of the string ``s`` + to begin regex matching. + :type start_position: int + + :return: A tuple containing the matched string literal evaluated as a + string and the end position of the string literal. + :rtype: tuple(str, int) + + :raise ReadError: If the ``_STRING_START_RE`` regex doesn't return a + match in ``s`` at ``start_position``, i.e., open quote. If the + ``_STRING_END_RE`` regex doesn't return a match in ``s`` at the + end of the first match, i.e., close quote. + :raise ValueError: If an invalid string (i.e., contains an invalid + escape sequence) is passed into the ``eval``. + + :Example: + >>> from nltk.internals import read_str + >>> read_str('"Hello", World!', 0) + ('Hello', 7) + + """ + # Read the open quote, and any modifiers. + m = _STRING_START_RE.match(s, start_position) + if not m: + raise ReadError('open quote', start_position) + quotemark = m.group(1) + + # Find the close quote. + _STRING_END_RE = re.compile(r'\\|%s' % quotemark) + position = m.end() + while True: + match = _STRING_END_RE.search(s, position) + if not match: + raise ReadError('close quote', position) + if match.group(0) == '\\': + position = match.end() + 1 + else: + break + + # Process it, using eval. Strings with invalid escape sequences + # might raise ValueEerror. + try: + return eval(s[start_position : match.end()]), match.end() + except ValueError as e: + raise ReadError('invalid string (%s)' % e) + + +_READ_INT_RE = re.compile(r'-?\d+') + + +def read_int(s, start_position): + """ + If an integer begins at the specified position in the given + string, then return a tuple ``(val, end_position)`` containing the + value of the integer and the position where it ends. Otherwise, + raise a ``ReadError``. + + :param s: A string that will be checked to see if within which a + Python integer exists. + :type s: str + + :param start_position: The specified beginning position of the string ``s`` + to begin regex matching. + :type start_position: int + + :return: A tuple containing the matched integer casted to an int, + and the end position of the int in ``s``. + :rtype: tuple(int, int) + + :raise ReadError: If the ``_READ_INT_RE`` regex doesn't return a + match in ``s`` at ``start_position``. + + :Example: + >>> from nltk.internals import read_int + >>> read_int('42 is the answer', 0) + (42, 2) + + """ + m = _READ_INT_RE.match(s, start_position) + if not m: + raise ReadError('integer', start_position) + return int(m.group()), m.end() + + +_READ_NUMBER_VALUE = re.compile(r'-?(\d*)([.]?\d*)?') + + +def read_number(s, start_position): + """ + If an integer or float begins at the specified position in the + given string, then return a tuple ``(val, end_position)`` + containing the value of the number and the position where it ends. + Otherwise, raise a ``ReadError``. + + :param s: A string that will be checked to see if within which a + Python number exists. + :type s: str + + :param start_position: The specified beginning position of the string ``s`` + to begin regex matching. + :type start_position: int + + :return: A tuple containing the matched number casted to a ``float``, + and the end position of the number in ``s``. + :rtype: tuple(float, int) + + :raise ReadError: If the ``_READ_NUMBER_VALUE`` regex doesn't return a + match in ``s`` at ``start_position``. + + :Example: + >>> from nltk.internals import read_number + >>> read_number('Pi is 3.14159', 6) + (3.14159, 13) + + """ + m = _READ_NUMBER_VALUE.match(s, start_position) + if not m or not (m.group(1) or m.group(2)): + raise ReadError('number', start_position) + if m.group(2): + return float(m.group()), m.end() + else: + return int(m.group()), m.end() + + +###################################################################### +# Check if a method has been overridden +###################################################################### + + +def overridden(method): + """ + :return: True if ``method`` overrides some method with the same + name in a base class. This is typically used when defining + abstract base classes or interfaces, to allow subclasses to define + either of two related methods: + + >>> class EaterI: + ... '''Subclass must define eat() or batch_eat().''' + ... def eat(self, food): + ... if overridden(self.batch_eat): + ... return self.batch_eat([food])[0] + ... else: + ... raise NotImplementedError() + ... def batch_eat(self, foods): + ... return [self.eat(food) for food in foods] + + :type method: instance method + """ + # [xx] breaks on classic classes! + if isinstance(method, types.MethodType) and compat.get_im_class(method) is not None: + name = method.__name__ + funcs = [ + cls.__dict__[name] + for cls in _mro(compat.get_im_class(method)) + if name in cls.__dict__ + ] + return len(funcs) > 1 + else: + raise TypeError('Expected an instance method.') + + +def _mro(cls): + """ + Return the method resolution order for ``cls`` -- i.e., a list + containing ``cls`` and all its base classes, in the order in which + they would be checked by ``getattr``. For new-style classes, this + is just cls.__mro__. For classic classes, this can be obtained by + a depth-first left-to-right traversal of ``__bases__``. + """ + if isinstance(cls, type): + return cls.__mro__ + else: + mro = [cls] + for base in cls.__bases__: + mro.extend(_mro(base)) + return mro + + +###################################################################### +# Deprecation decorator & base class +###################################################################### +# [xx] dedent msg first if it comes from a docstring. + + +def _add_epytext_field(obj, field, message): + """Add an epytext @field to a given object's docstring.""" + indent = '' + # If we already have a docstring, then add a blank line to separate + # it from the new field, and check its indentation. + if obj.__doc__: + obj.__doc__ = obj.__doc__.rstrip() + '\n\n' + indents = re.findall(r'(?<=\n)[ ]+(?!\s)', obj.__doc__.expandtabs()) + if indents: + indent = min(indents) + # If we don't have a docstring, add an empty one. + else: + obj.__doc__ = '' + + obj.__doc__ += textwrap.fill( + '@%s: %s' % (field, message), + initial_indent=indent, + subsequent_indent=indent + ' ', + ) + + +def deprecated(message): + """ + A decorator used to mark functions as deprecated. This will cause + a warning to be printed the when the function is used. Usage: + + >>> from nltk.internals import deprecated + >>> @deprecated('Use foo() instead') + ... def bar(x): + ... print(x/10) + + """ + + def decorator(func): + msg = "Function %s() has been deprecated. %s" % (func.__name__, message) + msg = '\n' + textwrap.fill(msg, initial_indent=' ', subsequent_indent=' ') + + def newFunc(*args, **kwargs): + warnings.warn(msg, category=DeprecationWarning, stacklevel=2) + return func(*args, **kwargs) + + # Copy the old function's name, docstring, & dict + newFunc.__dict__.update(func.__dict__) + newFunc.__name__ = func.__name__ + newFunc.__doc__ = func.__doc__ + newFunc.__deprecated__ = True + # Add a @deprecated field to the docstring. + _add_epytext_field(newFunc, 'deprecated', message) + return newFunc + + return decorator + + +class Deprecated(object): + """ + A base class used to mark deprecated classes. A typical usage is to + alert users that the name of a class has changed: + + >>> from nltk.internals import Deprecated + >>> class NewClassName(object): + ... pass # All logic goes here. + ... + >>> class OldClassName(Deprecated, NewClassName): + ... "Use NewClassName instead." + + The docstring of the deprecated class will be used in the + deprecation warning message. + """ + + def __new__(cls, *args, **kwargs): + # Figure out which class is the deprecated one. + dep_cls = None + for base in _mro(cls): + if Deprecated in base.__bases__: + dep_cls = base + break + assert dep_cls, 'Unable to determine which base is deprecated.' + + # Construct an appropriate warning. + doc = dep_cls.__doc__ or ''.strip() + # If there's a @deprecated field, strip off the field marker. + doc = re.sub(r'\A\s*@deprecated:', r'', doc) + # Strip off any indentation. + doc = re.sub(r'(?m)^\s*', '', doc) + # Construct a 'name' string. + name = 'Class %s' % dep_cls.__name__ + if cls != dep_cls: + name += ' (base class for %s)' % cls.__name__ + # Put it all together. + msg = '%s has been deprecated. %s' % (name, doc) + # Wrap it. + msg = '\n' + textwrap.fill(msg, initial_indent=' ', subsequent_indent=' ') + warnings.warn(msg, category=DeprecationWarning, stacklevel=2) + # Do the actual work of __new__. + return object.__new__(cls) + + +########################################################################## +# COUNTER, FOR UNIQUE NAMING +########################################################################## + + +class Counter: + """ + A counter that auto-increments each time its value is read. + """ + + def __init__(self, initial_value=0): + self._value = initial_value + + def get(self): + self._value += 1 + return self._value + + +########################################################################## +# Search for files/binaries +########################################################################## + + +def find_file_iter( + filename, + env_vars=(), + searchpath=(), + file_names=None, + url=None, + verbose=False, + finding_dir=False, +): + """ + Search for a file to be used by nltk. + + :param filename: The name or path of the file. + :param env_vars: A list of environment variable names to check. + :param file_names: A list of alternative file names to check. + :param searchpath: List of directories to search. + :param url: URL presented to user for download help. + :param verbose: Whether or not to print path when a file is found. + """ + file_names = [filename] + (file_names or []) + assert isinstance(filename, string_types) + assert not isinstance(file_names, string_types) + assert not isinstance(searchpath, string_types) + if isinstance(env_vars, string_types): + env_vars = env_vars.split() + yielded = False + + # File exists, no magic + for alternative in file_names: + path_to_file = os.path.join(filename, alternative) + if os.path.isfile(path_to_file): + if verbose: + print('[Found %s: %s]' % (filename, path_to_file)) + yielded = True + yield path_to_file + # Check the bare alternatives + if os.path.isfile(alternative): + if verbose: + print('[Found %s: %s]' % (filename, alternative)) + yielded = True + yield alternative + # Check if the alternative is inside a 'file' directory + path_to_file = os.path.join(filename, 'file', alternative) + if os.path.isfile(path_to_file): + if verbose: + print('[Found %s: %s]' % (filename, path_to_file)) + yielded = True + yield path_to_file + + # Check environment variables + for env_var in env_vars: + if env_var in os.environ: + if finding_dir: # This is to file a directory instead of file + yielded = True + yield os.environ[env_var] + + for env_dir in os.environ[env_var].split(os.pathsep): + # Check if the environment variable contains a direct path to the bin + if os.path.isfile(env_dir): + if verbose: + print('[Found %s: %s]' % (filename, env_dir)) + yielded = True + yield env_dir + # Check if the possible bin names exist inside the environment variable directories + for alternative in file_names: + path_to_file = os.path.join(env_dir, alternative) + if os.path.isfile(path_to_file): + if verbose: + print('[Found %s: %s]' % (filename, path_to_file)) + yielded = True + yield path_to_file + # Check if the alternative is inside a 'file' directory + # path_to_file = os.path.join(env_dir, 'file', alternative) + + # Check if the alternative is inside a 'bin' directory + path_to_file = os.path.join(env_dir, 'bin', alternative) + + if os.path.isfile(path_to_file): + if verbose: + print('[Found %s: %s]' % (filename, path_to_file)) + yielded = True + yield path_to_file + + # Check the path list. + for directory in searchpath: + for alternative in file_names: + path_to_file = os.path.join(directory, alternative) + if os.path.isfile(path_to_file): + yielded = True + yield path_to_file + + # If we're on a POSIX system, then try using the 'which' command + # to find the file. + if os.name == 'posix': + for alternative in file_names: + try: + p = subprocess.Popen( + ['which', alternative], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) + stdout, stderr = p.communicate() + path = _decode_stdoutdata(stdout).strip() + if path.endswith(alternative) and os.path.exists(path): + if verbose: + print('[Found %s: %s]' % (filename, path)) + yielded = True + yield path + except (KeyboardInterrupt, SystemExit, OSError): + raise + finally: + pass + + if not yielded: + msg = ( + "NLTK was unable to find the %s file!" + "\nUse software specific " + "configuration paramaters" % filename + ) + if env_vars: + msg += ' or set the %s environment variable' % env_vars[0] + msg += '.' + if searchpath: + msg += '\n\n Searched in:' + msg += ''.join('\n - %s' % d for d in searchpath) + if url: + msg += '\n\n For more information on %s, see:\n <%s>' % (filename, url) + div = '=' * 75 + raise LookupError('\n\n%s\n%s\n%s' % (div, msg, div)) + + +def find_file( + filename, env_vars=(), searchpath=(), file_names=None, url=None, verbose=False +): + return next( + find_file_iter(filename, env_vars, searchpath, file_names, url, verbose) + ) + + +def find_dir( + filename, env_vars=(), searchpath=(), file_names=None, url=None, verbose=False +): + return next( + find_file_iter( + filename, env_vars, searchpath, file_names, url, verbose, finding_dir=True + ) + ) + + +def find_binary_iter( + name, + path_to_bin=None, + env_vars=(), + searchpath=(), + binary_names=None, + url=None, + verbose=False, +): + """ + Search for a file to be used by nltk. + + :param name: The name or path of the file. + :param path_to_bin: The user-supplied binary location (deprecated) + :param env_vars: A list of environment variable names to check. + :param file_names: A list of alternative file names to check. + :param searchpath: List of directories to search. + :param url: URL presented to user for download help. + :param verbose: Whether or not to print path when a file is found. + """ + for file in find_file_iter( + path_to_bin or name, env_vars, searchpath, binary_names, url, verbose + ): + yield file + + +def find_binary( + name, + path_to_bin=None, + env_vars=(), + searchpath=(), + binary_names=None, + url=None, + verbose=False, +): + return next( + find_binary_iter( + name, path_to_bin, env_vars, searchpath, binary_names, url, verbose + ) + ) + + +def find_jar_iter( + name_pattern, + path_to_jar=None, + env_vars=(), + searchpath=(), + url=None, + verbose=False, + is_regex=False, +): + """ + Search for a jar that is used by nltk. + + :param name_pattern: The name of the jar file + :param path_to_jar: The user-supplied jar location, or None. + :param env_vars: A list of environment variable names to check + in addition to the CLASSPATH variable which is + checked by default. + :param searchpath: List of directories to search. + :param is_regex: Whether name is a regular expression. + """ + + assert isinstance(name_pattern, string_types) + assert not isinstance(searchpath, string_types) + if isinstance(env_vars, string_types): + env_vars = env_vars.split() + yielded = False + + # Make sure we check the CLASSPATH first + env_vars = ['CLASSPATH'] + list(env_vars) + + # If an explicit location was given, then check it, and yield it if + # it's present; otherwise, complain. + if path_to_jar is not None: + if os.path.isfile(path_to_jar): + yielded = True + yield path_to_jar + else: + raise LookupError( + 'Could not find %s jar file at %s' % (name_pattern, path_to_jar) + ) + + # Check environment variables + for env_var in env_vars: + if env_var in os.environ: + if env_var == 'CLASSPATH': + classpath = os.environ['CLASSPATH'] + for cp in classpath.split(os.path.pathsep): + if os.path.isfile(cp): + filename = os.path.basename(cp) + if ( + is_regex + and re.match(name_pattern, filename) + or (not is_regex and filename == name_pattern) + ): + if verbose: + print('[Found %s: %s]' % (name_pattern, cp)) + yielded = True + yield cp + # The case where user put directory containing the jar file in the classpath + if os.path.isdir(cp): + if not is_regex: + if os.path.isfile(os.path.join(cp, name_pattern)): + if verbose: + print('[Found %s: %s]' % (name_pattern, cp)) + yielded = True + yield os.path.join(cp, name_pattern) + else: + # Look for file using regular expression + for file_name in os.listdir(cp): + if re.match(name_pattern, file_name): + if verbose: + print( + '[Found %s: %s]' + % ( + name_pattern, + os.path.join(cp, file_name), + ) + ) + yielded = True + yield os.path.join(cp, file_name) + + else: + jar_env = os.environ[env_var] + jar_iter = ( + ( + os.path.join(jar_env, path_to_jar) + for path_to_jar in os.listdir(jar_env) + ) + if os.path.isdir(jar_env) + else (jar_env,) + ) + for path_to_jar in jar_iter: + if os.path.isfile(path_to_jar): + filename = os.path.basename(path_to_jar) + if ( + is_regex + and re.match(name_pattern, filename) + or (not is_regex and filename == name_pattern) + ): + if verbose: + print('[Found %s: %s]' % (name_pattern, path_to_jar)) + yielded = True + yield path_to_jar + + # Check the path list. + for directory in searchpath: + if is_regex: + for filename in os.listdir(directory): + path_to_jar = os.path.join(directory, filename) + if os.path.isfile(path_to_jar): + if re.match(name_pattern, filename): + if verbose: + print('[Found %s: %s]' % (filename, path_to_jar)) + yielded = True + yield path_to_jar + else: + path_to_jar = os.path.join(directory, name_pattern) + if os.path.isfile(path_to_jar): + if verbose: + print('[Found %s: %s]' % (name_pattern, path_to_jar)) + yielded = True + yield path_to_jar + + if not yielded: + # If nothing was found, raise an error + msg = "NLTK was unable to find %s!" % name_pattern + if env_vars: + msg += ' Set the %s environment variable' % env_vars[0] + msg = textwrap.fill(msg + '.', initial_indent=' ', subsequent_indent=' ') + if searchpath: + msg += '\n\n Searched in:' + msg += ''.join('\n - %s' % d for d in searchpath) + if url: + msg += '\n\n For more information, on %s, see:\n <%s>' % ( + name_pattern, + url, + ) + div = '=' * 75 + raise LookupError('\n\n%s\n%s\n%s' % (div, msg, div)) + + +def find_jar( + name_pattern, + path_to_jar=None, + env_vars=(), + searchpath=(), + url=None, + verbose=False, + is_regex=False, +): + return next( + find_jar_iter( + name_pattern, path_to_jar, env_vars, searchpath, url, verbose, is_regex + ) + ) + + +def find_jars_within_path(path_to_jars): + return [ + os.path.join(root, filename) + for root, dirnames, filenames in os.walk(path_to_jars) + for filename in fnmatch.filter(filenames, '*.jar') + ] + + +def _decode_stdoutdata(stdoutdata): + """ Convert data read from stdout/stderr to unicode """ + if not isinstance(stdoutdata, bytes): + return stdoutdata + + encoding = getattr(sys.__stdout__, "encoding", locale.getpreferredencoding()) + if encoding is None: + return stdoutdata.decode() + return stdoutdata.decode(encoding) + + +########################################################################## +# Import Stdlib Module +########################################################################## + + +def import_from_stdlib(module): + """ + When python is run from within the nltk/ directory tree, the + current directory is included at the beginning of the search path. + Unfortunately, that means that modules within nltk can sometimes + shadow standard library modules. As an example, the stdlib + 'inspect' module will attempt to import the stdlib 'tokenize' + module, but will instead end up importing NLTK's 'tokenize' module + instead (causing the import to fail). + """ + old_path = sys.path + sys.path = [d for d in sys.path if d not in ('', '.')] + m = __import__(module) + sys.path = old_path + return m + + +########################################################################## +# Wrapper for ElementTree Elements +########################################################################## + + +@compat.python_2_unicode_compatible +class ElementWrapper(object): + """ + A wrapper around ElementTree Element objects whose main purpose is + to provide nicer __repr__ and __str__ methods. In addition, any + of the wrapped Element's methods that return other Element objects + are overridden to wrap those values before returning them. + + This makes Elements more convenient to work with in + interactive sessions and doctests, at the expense of some + efficiency. + """ + + # Prevent double-wrapping: + def __new__(cls, etree): + """ + Create and return a wrapper around a given Element object. + If ``etree`` is an ``ElementWrapper``, then ``etree`` is + returned as-is. + """ + if isinstance(etree, ElementWrapper): + return etree + else: + return object.__new__(ElementWrapper) + + def __init__(self, etree): + r""" + Initialize a new Element wrapper for ``etree``. + + If ``etree`` is a string, then it will be converted to an + Element object using ``ElementTree.fromstring()`` first: + + >>> ElementWrapper("") + \n"> + + """ + if isinstance(etree, string_types): + etree = ElementTree.fromstring(etree) + self.__dict__['_etree'] = etree + + def unwrap(self): + """ + Return the Element object wrapped by this wrapper. + """ + return self._etree + + ##//////////////////////////////////////////////////////////// + # { String Representation + ##//////////////////////////////////////////////////////////// + + def __repr__(self): + s = ElementTree.tostring(self._etree, encoding='utf8').decode('utf8') + if len(s) > 60: + e = s.rfind('<') + if (len(s) - e) > 30: + e = -20 + s = '%s...%s' % (s[:30], s[e:]) + return '' % s + + def __str__(self): + """ + :return: the result of applying ``ElementTree.tostring()`` to + the wrapped Element object. + """ + return ( + ElementTree.tostring(self._etree, encoding='utf8').decode('utf8').rstrip() + ) + + ##//////////////////////////////////////////////////////////// + # { Element interface Delegation (pass-through) + ##//////////////////////////////////////////////////////////// + + def __getattr__(self, attrib): + return getattr(self._etree, attrib) + + def __setattr__(self, attr, value): + return setattr(self._etree, attr, value) + + def __delattr__(self, attr): + return delattr(self._etree, attr) + + def __setitem__(self, index, element): + self._etree[index] = element + + def __delitem__(self, index): + del self._etree[index] + + def __setslice__(self, start, stop, elements): + self._etree[start:stop] = elements + + def __delslice__(self, start, stop): + del self._etree[start:stop] + + def __len__(self): + return len(self._etree) + + ##//////////////////////////////////////////////////////////// + # { Element interface Delegation (wrap result) + ##//////////////////////////////////////////////////////////// + + def __getitem__(self, index): + return ElementWrapper(self._etree[index]) + + def __getslice__(self, start, stop): + return [ElementWrapper(elt) for elt in self._etree[start:stop]] + + def getchildren(self): + return [ElementWrapper(elt) for elt in self._etree] + + def getiterator(self, tag=None): + return (ElementWrapper(elt) for elt in self._etree.getiterator(tag)) + + def makeelement(self, tag, attrib): + return ElementWrapper(self._etree.makeelement(tag, attrib)) + + def find(self, path): + elt = self._etree.find(path) + if elt is None: + return elt + else: + return ElementWrapper(elt) + + def findall(self, path): + return [ElementWrapper(elt) for elt in self._etree.findall(path)] + + +###################################################################### +# Helper for Handling Slicing +###################################################################### + + +def slice_bounds(sequence, slice_obj, allow_step=False): + """ + Given a slice, return the corresponding (start, stop) bounds, + taking into account None indices and negative indices. The + following guarantees are made for the returned start and stop values: + + - 0 <= start <= len(sequence) + - 0 <= stop <= len(sequence) + - start <= stop + + :raise ValueError: If ``slice_obj.step`` is not None. + :param allow_step: If true, then the slice object may have a + non-None step. If it does, then return a tuple + (start, stop, step). + """ + start, stop = (slice_obj.start, slice_obj.stop) + + # If allow_step is true, then include the step in our return + # value tuple. + if allow_step: + step = slice_obj.step + if step is None: + step = 1 + # Use a recursive call without allow_step to find the slice + # bounds. If step is negative, then the roles of start and + # stop (in terms of default values, etc), are swapped. + if step < 0: + start, stop = slice_bounds(sequence, slice(stop, start)) + else: + start, stop = slice_bounds(sequence, slice(start, stop)) + return start, stop, step + + # Otherwise, make sure that no non-default step value is used. + elif slice_obj.step not in (None, 1): + raise ValueError( + 'slices with steps are not supported by %s' % sequence.__class__.__name__ + ) + + # Supply default offsets. + if start is None: + start = 0 + if stop is None: + stop = len(sequence) + + # Handle negative indices. + if start < 0: + start = max(0, len(sequence) + start) + if stop < 0: + stop = max(0, len(sequence) + stop) + + # Make sure stop doesn't go past the end of the list. Note that + # we avoid calculating len(sequence) if possible, because for lazy + # sequences, calculating the length of a sequence can be expensive. + if stop > 0: + try: + sequence[stop - 1] + except IndexError: + stop = len(sequence) + + # Make sure start isn't past stop. + start = min(start, stop) + + # That's all folks! + return start, stop + + +###################################################################### +# Permission Checking +###################################################################### + + +def is_writable(path): + # Ensure that it exists. + if not os.path.exists(path): + return False + + # If we're on a posix system, check its permissions. + if hasattr(os, 'getuid'): + statdata = os.stat(path) + perm = stat.S_IMODE(statdata.st_mode) + # is it world-writable? + if perm & 0o002: + return True + # do we own it? + elif statdata.st_uid == os.getuid() and (perm & 0o200): + return True + # are we in a group that can write to it? + elif (statdata.st_gid in [os.getgid()] + os.getgroups()) and (perm & 0o020): + return True + # otherwise, we can't write to it. + else: + return False + + # Otherwise, we'll assume it's writable. + # [xx] should we do other checks on other platforms? + return True + + +###################################################################### +# NLTK Error reporting +###################################################################### + + +def raise_unorderable_types(ordering, a, b): + raise TypeError( + "unorderable types: %s() %s %s()" + % (type(a).__name__, ordering, type(b).__name__) + ) diff --git a/venv.bak/lib/python3.7/site-packages/nltk/jsontags.py b/venv.bak/lib/python3.7/site-packages/nltk/jsontags.py new file mode 100644 index 0000000..f85f67b --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/jsontags.py @@ -0,0 +1,66 @@ +# -*- coding: utf-8 -*- +# Natural Language Toolkit: JSON Encoder/Decoder Helpers +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Steven Xu +# +# URL: +# For license information, see LICENSE.TXT + +""" +Register JSON tags, so the nltk data loader knows what module and class to look for. + +NLTK uses simple '!' tags to mark the types of objects, but the fully-qualified +"tag:nltk.org,2011:" prefix is also accepted in case anyone ends up +using it. +""" + +import json + +json_tags = {} + +TAG_PREFIX = '!' + + +def register_tag(cls): + """ + Decorates a class to register it's json tag. + """ + json_tags[TAG_PREFIX + getattr(cls, 'json_tag')] = cls + return cls + + +class JSONTaggedEncoder(json.JSONEncoder): + def default(self, obj): + obj_tag = getattr(obj, 'json_tag', None) + if obj_tag is None: + return super(JSONTaggedEncoder, self).default(obj) + obj_tag = TAG_PREFIX + obj_tag + obj = obj.encode_json_obj() + return {obj_tag: obj} + + +class JSONTaggedDecoder(json.JSONDecoder): + def decode(self, s): + return self.decode_obj(super(JSONTaggedDecoder, self).decode(s)) + + @classmethod + def decode_obj(cls, obj): + # Decode nested objects first. + if isinstance(obj, dict): + obj = dict((key, cls.decode_obj(val)) for (key, val) in obj.items()) + elif isinstance(obj, list): + obj = list(cls.decode_obj(val) for val in obj) + # Check if we have a tagged object. + if not isinstance(obj, dict) or len(obj) != 1: + return obj + obj_tag = next(iter(obj.keys())) + if not obj_tag.startswith('!'): + return obj + if obj_tag not in json_tags: + raise ValueError('Unknown tag', obj_tag) + obj_cls = json_tags[obj_tag] + return obj_cls.decode_json_obj(obj[obj_tag]) + + +__all__ = ['register_tag', 'json_tags', 'JSONTaggedEncoder', 'JSONTaggedDecoder'] diff --git a/venv.bak/lib/python3.7/site-packages/nltk/lazyimport.py b/venv.bak/lib/python3.7/site-packages/nltk/lazyimport.py new file mode 100644 index 0000000..e51f2c2 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/lazyimport.py @@ -0,0 +1,145 @@ +# This module is from mx/DateTime/LazyModule.py and is +# distributed under the terms of the eGenix.com Public License Agreement +# http://www.egenix.com/products/eGenix.com-Public-License-1.1.0.pdf + +""" Helper to enable simple lazy module import. + + 'Lazy' means the actual import is deferred until an attribute is + requested from the module's namespace. This has the advantage of + allowing all imports to be done at the top of a script (in a + prominent and visible place) without having a great impact + on startup time. + + Copyright (c) 1999-2005, Marc-Andre Lemburg; mailto:mal@lemburg.com + See the documentation for further information on copyrights, + or contact the author. All Rights Reserved. +""" +from __future__ import print_function + +### Constants + +_debug = 0 + +### + + +class LazyModule: + + """ Lazy module class. + + Lazy modules are imported into the given namespaces whenever a + non-special attribute (there are some attributes like __doc__ + that class instances handle without calling __getattr__) is + requested. The module is then registered under the given name + in locals usually replacing the import wrapper instance. The + import itself is done using globals as global namespace. + + Example of creating a lazy load module: + + ISO = LazyModule('ISO',locals(),globals()) + + Later, requesting an attribute from ISO will load the module + automatically into the locals() namespace, overriding the + LazyModule instance: + + t = ISO.Week(1998,1,1) + + """ + + # Flag which inidicates whether the LazyModule is initialized or not + __lazymodule_init = 0 + + # Name of the module to load + __lazymodule_name = '' + + # Flag which indicates whether the module was loaded or not + __lazymodule_loaded = 0 + + # Locals dictionary where to register the module + __lazymodule_locals = None + + # Globals dictionary to use for the module import + __lazymodule_globals = None + + def __init__(self, name, locals, globals=None): + + """ Create a LazyModule instance wrapping module name. + + The module will later on be registered in locals under the + given module name. + + globals is optional and defaults to locals. + + """ + self.__lazymodule_locals = locals + if globals is None: + globals = locals + self.__lazymodule_globals = globals + mainname = globals.get('__name__', '') + if mainname: + self.__name__ = mainname + '.' + name + self.__lazymodule_name = name + else: + self.__name__ = self.__lazymodule_name = name + self.__lazymodule_init = 1 + + def __lazymodule_import(self): + + """ Import the module now. + """ + # Load and register module + name = self.__lazymodule_name + if self.__lazymodule_loaded: + return self.__lazymodule_locals[name] + if _debug: + print('LazyModule: Loading module %r' % name) + self.__lazymodule_locals[name] = module = __import__( + name, self.__lazymodule_locals, self.__lazymodule_globals, '*' + ) + + # Fill namespace with all symbols from original module to + # provide faster access. + self.__dict__.update(module.__dict__) + + # Set import flag + self.__dict__['__lazymodule_loaded'] = 1 + + if _debug: + print('LazyModule: Module %r loaded' % name) + return module + + def __getattr__(self, name): + + """ Import the module on demand and get the attribute. + """ + if self.__lazymodule_loaded: + raise AttributeError(name) + if _debug: + print( + 'LazyModule: ' + 'Module load triggered by attribute %r read access' % name + ) + module = self.__lazymodule_import() + return getattr(module, name) + + def __setattr__(self, name, value): + + """ Import the module on demand and set the attribute. + """ + if not self.__lazymodule_init: + self.__dict__[name] = value + return + if self.__lazymodule_loaded: + self.__lazymodule_locals[self.__lazymodule_name] = value + self.__dict__[name] = value + return + if _debug: + print( + 'LazyModule: ' + 'Module load triggered by attribute %r write access' % name + ) + module = self.__lazymodule_import() + setattr(module, name, value) + + def __repr__(self): + return "" % self.__name__ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/lm/__init__.py b/venv.bak/lib/python3.7/site-packages/nltk/lm/__init__.py new file mode 100644 index 0000000..8a729c6 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/lm/__init__.py @@ -0,0 +1,240 @@ +# -*- coding: utf-8 -*- +# Natural Language Toolkit: Language Models +# +# Copyright (C) 2001-2019 NLTK Project +# Authors: Ilia Kurenkov +# URL: >> text = [['a', 'b', 'c'], ['a', 'c', 'd', 'c', 'e', 'f']] + +If we want to train a bigram model, we need to turn this text into bigrams. +Here's what the first sentence of our text would look like if we use a function +from NLTK for this. + + >>> from nltk.util import bigrams + >>> list(bigrams(text[0])) + [('a', 'b'), ('b', 'c')] + +Notice how "b" occurs both as the first and second member of different bigrams +but "a" and "c" don't? Wouldn't it be nice to somehow indicate how often sentences +start with "a" and end with "c"? +A standard way to deal with this is to add special "padding" symbols to the +sentence before splitting it into ngrams. +Fortunately, NLTK also has a function for that, let's see what it does to the +first sentence. + + >>> from nltk.util import pad_sequence + >>> list(pad_sequence(text[0], + ... pad_left=True, + ... left_pad_symbol="", + ... pad_right=True, + ... right_pad_symbol="", + ... n=2)) + ['', 'a', 'b', 'c', ''] + +Note the `n` argument, that tells the function we need padding for bigrams. +Now, passing all these parameters every time is tedious and in most cases they +can be safely assumed as defaults anyway. +Thus our module provides a convenience function that has all these arguments +already set while the other arguments remain the same as for `pad_sequence`. + + >>> from nltk.lm.preprocessing import pad_both_ends + >>> list(pad_both_ends(text[0], n=2)) + ['', 'a', 'b', 'c', ''] + +Combining the two parts discussed so far we get the following preparation steps +for one sentence. + + >>> list(bigrams(pad_both_ends(text[0], n=2))) + [('', 'a'), ('a', 'b'), ('b', 'c'), ('c', '')] + +To make our model more robust we could also train it on unigrams (single words) +as well as bigrams, its main source of information. +NLTK once again helpfully provides a function called `everygrams`. +While not the most efficient, it is conceptually simple. + + + >>> from nltk.util import everygrams + >>> padded_bigrams = list(pad_both_ends(text[0], n=2)) + >>> list(everygrams(padded_bigrams, max_len=2)) + [('',), + ('a',), + ('b',), + ('c',), + ('',), + ('', 'a'), + ('a', 'b'), + ('b', 'c'), + ('c', '')] + +We are almost ready to start counting ngrams, just one more step left. +During training and evaluation our model will rely on a vocabulary that +defines which words are "known" to the model. +To create this vocabulary we need to pad our sentences (just like for counting +ngrams) and then combine the sentences into one flat stream of words. + + >>> from nltk.lm.preprocessing import flatten + >>> list(flatten(pad_both_ends(sent, n=2) for sent in text)) + ['', 'a', 'b', 'c', '', '', 'a', 'c', 'd', 'c', 'e', 'f', ''] + +In most cases we want to use the same text as the source for both vocabulary +and ngram counts. +Now that we understand what this means for our preprocessing, we can simply import +a function that does everything for us. + + >>> from nltk.lm.preprocessing import padded_everygram_pipeline + >>> train, vocab = padded_everygram_pipeline(2, text) + +So as to avoid re-creating the text in memory, both `train` and `vocab` are lazy +iterators. They are evaluated on demand at training time. + + +Training +======== +Having prepared our data we are ready to start training a model. +As a simple example, let us train a Maximum Likelihood Estimator (MLE). +We only need to specify the highest ngram order to instantiate it. + + >>> from nltk.lm import MLE + >>> lm = MLE(2) + +This automatically creates an empty vocabulary... + + >>> len(lm.vocab) + 0 + +... which gets filled as we fit the model. + + >>> lm.fit(train, vocab) + >>> print(lm.vocab) + + >>> len(lm.vocab) + 9 + +The vocabulary helps us handle words that have not occurred during training. + + >>> lm.vocab.lookup(text[0]) + ('a', 'b', 'c') + >>> lm.vocab.lookup(["aliens", "from", "Mars"]) + ('', '', '') + +Moreover, in some cases we want to ignore words that we did see during training +but that didn't occur frequently enough, to provide us useful information. +You can tell the vocabulary to ignore such words. +To find out how that works, check out the docs for the `Vocabulary` class. + + +Using a Trained Model +===================== +When it comes to ngram models the training boils down to counting up the ngrams +from the training corpus. + + >>> print(lm.counts) + + +This provides a convenient interface to access counts for unigrams... + + >>> lm.counts['a'] + 2 + +...and bigrams (in this case "a b") + + >>> lm.counts[['a']]['b'] + 1 + +And so on. However, the real purpose of training a language model is to have it +score how probable words are in certain contexts. +This being MLE, the model returns the item's relative frequency as its score. + + >>> lm.score("a") + 0.15384615384615385 + +Items that are not seen during training are mapped to the vocabulary's +"unknown label" token. This is "" by default. + + >>> lm.score("") == lm.score("aliens") + True + +Here's how you get the score for a word given some preceding context. +For example we want to know what is the chance that "b" is preceded by "a". + + >>> lm.score("b", ["a"]) + 0.5 + +To avoid underflow when working with many small score values it makes sense to +take their logarithm. +For convenience this can be done with the `logscore` method. + + >>> lm.logscore("a") + -2.700439718141092 + +Building on this method, we can also evaluate our model's cross-entropy and +perplexity with respect to sequences of ngrams. + + >>> test = [('a', 'b'), ('c', 'd')] + >>> lm.entropy(test) + 1.292481250360578 + >>> lm.perplexity(test) + 2.449489742783178 + +It is advisable to preprocess your test text exactly the same way as you did +the training text. + +One cool feature of ngram models is that they can be used to generate text. + + >>> lm.generate(1, random_seed=3) + '' + >>> lm.generate(5, random_seed=3) + ['', 'a', 'b', 'c', 'd'] + +Provide `random_seed` if you want to consistently reproduce the same text all +other things being equal. Here we are using it to test the examples. + +You can also condition your generation on some preceding text with the `context` +argument. + + >>> lm.generate(5, text_seed=['c'], random_seed=3) + ['', 'c', 'd', 'c', 'd'] + +Note that an ngram model is restricted in how much preceding context it can +take into account. For example, a trigram model can only condition its output +on 2 preceding words. If you pass in a 4-word context, the first two words +will be ignored. +""" + +from nltk.lm.models import ( + MLE, + Lidstone, + Laplace, + WittenBellInterpolated, + KneserNeyInterpolated, +) +from nltk.lm.counter import NgramCounter +from nltk.lm.vocabulary import Vocabulary + +__all__ = [ + "Vocabulary", + "NgramCounter", + "MLE", + "Lidstone", + "Laplace", + "WittenBellInterpolated", + "KneserNeyInterpolated", +] diff --git a/venv.bak/lib/python3.7/site-packages/nltk/lm/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/lm/__pycache__/__init__.cpython-37.pyc new file mode 100644 index 0000000..0d85a59 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/lm/__pycache__/__init__.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/lm/__pycache__/api.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/lm/__pycache__/api.cpython-37.pyc new file mode 100644 index 0000000..f3115ac Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/lm/__pycache__/api.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/lm/__pycache__/counter.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/lm/__pycache__/counter.cpython-37.pyc new file mode 100644 index 0000000..b5297ba Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/lm/__pycache__/counter.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/lm/__pycache__/models.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/lm/__pycache__/models.cpython-37.pyc new file mode 100644 index 0000000..81eb218 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/lm/__pycache__/models.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/lm/__pycache__/preprocessing.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/lm/__pycache__/preprocessing.cpython-37.pyc new file mode 100644 index 0000000..13d93c7 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/lm/__pycache__/preprocessing.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/lm/__pycache__/smoothing.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/lm/__pycache__/smoothing.cpython-37.pyc new file mode 100644 index 0000000..4d40649 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/lm/__pycache__/smoothing.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/lm/__pycache__/util.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/lm/__pycache__/util.cpython-37.pyc new file mode 100644 index 0000000..8274713 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/lm/__pycache__/util.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/lm/__pycache__/vocabulary.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/lm/__pycache__/vocabulary.cpython-37.pyc new file mode 100644 index 0000000..08dc92c Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/lm/__pycache__/vocabulary.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/lm/api.py b/venv.bak/lib/python3.7/site-packages/nltk/lm/api.py new file mode 100644 index 0000000..e276dba --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/lm/api.py @@ -0,0 +1,252 @@ +# -*- coding: utf-8 -*- +# Natural Language Toolkit: Language Models +# +# Copyright (C) 2001-2019 NLTK Project +# Authors: Ilia Kurenkov +# URL: +# For license information, see LICENSE.TXT +"""Language Model Interface.""" +from __future__ import division, unicode_literals + +import random +from abc import ABCMeta, abstractmethod +from bisect import bisect + +from six import add_metaclass + +from nltk.lm.counter import NgramCounter +from nltk.lm.util import log_base2 +from nltk.lm.vocabulary import Vocabulary + +try: + from itertools import accumulate +except ImportError: + import operator + + def accumulate(iterable, func=operator.add): + """Return running totals""" + # accumulate([1,2,3,4,5]) --> 1 3 6 10 15 + # accumulate([1,2,3,4,5], operator.mul) --> 1 2 6 24 120 + it = iter(iterable) + try: + total = next(it) + except StopIteration: + return + yield total + for element in it: + total = func(total, element) + yield total + + +@add_metaclass(ABCMeta) +class Smoothing(object): + """Ngram Smoothing Interface + + Implements Chen & Goodman 1995's idea that all smoothing algorithms have + certain features in common. This should ideally allow smoothing algoritms to + work both with Backoff and Interpolation. + """ + + def __init__(self, vocabulary, counter): + """ + :param vocabulary: The Ngram vocabulary object. + :type vocabulary: nltk.lm.vocab.Vocabulary + :param counter: The counts of the vocabulary items. + :type counter: nltk.lm.counter.NgramCounter + """ + self.vocab = vocabulary + self.counts = counter + + @abstractmethod + def unigram_score(self, word): + raise NotImplementedError() + + @abstractmethod + def alpha_gamma(self, word, context): + raise NotImplementedError() + + +def _mean(items): + """Return average (aka mean) for sequence of items.""" + return sum(items) / len(items) + + +def _random_generator(seed_or_generator): + if isinstance(seed_or_generator, random.Random): + return seed_or_generator + return random.Random(seed_or_generator) + + +def _weighted_choice(population, weights, random_generator=None): + """Like random.choice, but with weights. + + Heavily inspired by python 3.6 `random.choices`. + """ + if not population: + raise ValueError("Can't choose from empty population") + if len(population) != len(weights): + raise ValueError("The number of weights does not match the population") + cum_weights = list(accumulate(weights)) + total = cum_weights[-1] + threshold = random_generator.random() + return population[bisect(cum_weights, total * threshold)] + + +@add_metaclass(ABCMeta) +class LanguageModel(object): + """ABC for Language Models. + + Cannot be directly instantiated itself. + + """ + + def __init__(self, order, vocabulary=None, counter=None): + """Creates new LanguageModel. + + :param vocabulary: If provided, this vocabulary will be used instead + of creating a new one when training. + :type vocabulary: `nltk.lm.Vocabulary` or None + :param counter: If provided, use this object to count ngrams. + :type vocabulary: `nltk.lm.NgramCounter` or None + :param ngrams_fn: If given, defines how sentences in training text are turned to ngram + sequences. + :type ngrams_fn: function or None + :param pad_fn: If given, defines how senteces in training text are padded. + :type pad_fn: function or None + + """ + self.order = order + self.vocab = Vocabulary() if vocabulary is None else vocabulary + self.counts = NgramCounter() if counter is None else counter + + def fit(self, text, vocabulary_text=None): + """Trains the model on a text. + + :param text: Training text as a sequence of sentences. + + """ + if not self.vocab: + if vocabulary_text is None: + raise ValueError( + "Cannot fit without a vocabulary or text to " "create it from." + ) + self.vocab.update(vocabulary_text) + self.counts.update(self.vocab.lookup(sent) for sent in text) + + def score(self, word, context=None): + """Masks out of vocab (OOV) words and computes their model score. + + For model-specific logic of calculating scores, see the `unmasked_score` + method. + """ + return self.unmasked_score( + self.vocab.lookup(word), self.vocab.lookup(context) if context else None + ) + + @abstractmethod + def unmasked_score(self, word, context=None): + """Score a word given some optional context. + + Concrete models are expected to provide an implementation. + Note that this method does not mask its arguments with the OOV label. + Use the `score` method for that. + + :param str word: Word for which we want the score + :param tuple(str) context: Context the word is in. + If `None`, compute unigram score. + :param context: tuple(str) or None + :rtype: float + + """ + raise NotImplementedError() + + def logscore(self, word, context=None): + """Evaluate the log score of this word in this context. + + The arguments are the same as for `score` and `unmasked_score`. + + """ + return log_base2(self.score(word, context)) + + def context_counts(self, context): + """Helper method for retrieving counts for a given context. + + Assumes context has been checked and oov words in it masked. + :type context: tuple(str) or None + + """ + return ( + self.counts[len(context) + 1][context] if context else self.counts.unigrams + ) + + def entropy(self, text_ngrams): + """Calculate cross-entropy of model for given evaluation text. + + :param Iterable(tuple(str)) text_ngrams: A sequence of ngram tuples. + :rtype: float + + """ + return -1 * _mean( + [self.logscore(ngram[-1], ngram[:-1]) for ngram in text_ngrams] + ) + + def perplexity(self, text_ngrams): + """Calculates the perplexity of the given text. + + This is simply 2 ** cross-entropy for the text, so the arguments are the same. + + """ + return pow(2.0, self.entropy(text_ngrams)) + + def generate(self, num_words=1, text_seed=None, random_seed=None): + """Generate words from the model. + + :param int num_words: How many words to generate. By default 1. + :param text_seed: Generation can be conditioned on preceding context. + :param random_seed: A random seed or an instance of `random.Random`. If provided, + makes the random sampling part of generation reproducible. + :return: One (str) word or a list of words generated from model. + + Examples: + + >>> from nltk.lm import MLE + >>> lm = MLE(2) + >>> lm.fit([[("a", "b"), ("b", "c")]], vocabulary_text=['a', 'b', 'c']) + >>> lm.fit([[("a",), ("b",), ("c",)]]) + >>> lm.generate(random_seed=3) + 'a' + >>> lm.generate(text_seed=['a']) + 'b' + + """ + text_seed = [] if text_seed is None else list(text_seed) + random_generator = _random_generator(random_seed) + # base recursion case + if num_words == 1: + context = ( + text_seed[-self.order + 1 :] + if len(text_seed) >= self.order + else text_seed + ) + samples = self.context_counts(self.vocab.lookup(context)) + while context and not samples: + context = context[1:] if len(context) > 1 else [] + samples = self.context_counts(self.vocab.lookup(context)) + # sorting achieves two things: + # - reproducible randomness when sampling + # - turning Mapping into Sequence which _weighted_choice expects + samples = sorted(samples) + return _weighted_choice( + samples, tuple(self.score(w, context) for w in samples), random_generator + ) + # build up text one word at a time + generated = [] + for _ in range(num_words): + generated.append( + self.generate( + num_words=1, + text_seed=text_seed + generated, + random_seed=random_generator, + ) + ) + return generated diff --git a/venv.bak/lib/python3.7/site-packages/nltk/lm/counter.py b/venv.bak/lib/python3.7/site-packages/nltk/lm/counter.py new file mode 100644 index 0000000..1ceaa42 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/lm/counter.py @@ -0,0 +1,168 @@ +# -*- coding: utf-8 -*- +# Natural Language Toolkit +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Ilia Kurenkov +# URL: +# For license information, see LICENSE.TXT +""" +Language Model Counter +---------------------- +""" + +from __future__ import unicode_literals + +from collections import Sequence, defaultdict + +from six import string_types +from nltk import compat +from nltk.probability import ConditionalFreqDist, FreqDist + + +@compat.python_2_unicode_compatible +class NgramCounter(object): + """Class for counting ngrams. + + Will count any ngram sequence you give it ;) + + First we need to make sure we are feeding the counter sentences of ngrams. + + >>> text = [["a", "b", "c", "d"], ["a", "c", "d", "c"]] + >>> from nltk.util import ngrams + >>> text_bigrams = [ngrams(sent, 2) for sent in text] + >>> text_unigrams = [ngrams(sent, 1) for sent in text] + + The counting itself is very simple. + + >>> from nltk.lm import NgramCounter + >>> ngram_counts = NgramCounter(text_bigrams + text_unigrams) + + You can conveniently access ngram counts using standard python dictionary notation. + String keys will give you unigram counts. + + >>> ngram_counts['a'] + 2 + >>> ngram_counts['aliens'] + 0 + + If you want to access counts for higher order ngrams, use a list or a tuple. + These are treated as "context" keys, so what you get is a frequency distribution + over all continuations after the given context. + + >>> sorted(ngram_counts[['a']].items()) + [('b', 1), ('c', 1)] + >>> sorted(ngram_counts[('a',)].items()) + [('b', 1), ('c', 1)] + + This is equivalent to specifying explicitly the order of the ngram (in this case + 2 for bigram) and indexing on the context. + >>> ngram_counts[2][('a',)] is ngram_counts[['a']] + True + + Note that the keys in `ConditionalFreqDist` cannot be lists, only tuples! + It is generally advisable to use the less verbose and more flexible square + bracket notation. + + To get the count of the full ngram "a b", do this: + + >>> ngram_counts[['a']]['b'] + 1 + + Specifying the ngram order as a number can be useful for accessing all ngrams + in that order. + + >>> ngram_counts[2] + + + The keys of this `ConditionalFreqDist` are the contexts we discussed earlier. + Unigrams can also be accessed with a human-friendly alias. + + >>> ngram_counts.unigrams is ngram_counts[1] + True + + Similarly to `collections.Counter`, you can update counts after initialization. + + >>> ngram_counts['e'] + 0 + >>> ngram_counts.update([ngrams(["d", "e", "f"], 1)]) + >>> ngram_counts['e'] + 1 + + """ + + def __init__(self, ngram_text=None): + """Creates a new NgramCounter. + + If `ngram_text` is specified, counts ngrams from it, otherwise waits for + `update` method to be called explicitly. + + :param ngram_text: Optional text containing senteces of ngrams, as for `update` method. + :type ngram_text: Iterable(Iterable(tuple(str))) or None + + """ + self._counts = defaultdict(ConditionalFreqDist) + self._counts[1] = self.unigrams = FreqDist() + + if ngram_text: + self.update(ngram_text) + + def update(self, ngram_text): + """Updates ngram counts from `ngram_text`. + + Expects `ngram_text` to be a sequence of sentences (sequences). + Each sentence consists of ngrams as tuples of strings. + + :param Iterable(Iterable(tuple(str))) ngram_text: Text containing senteces of ngrams. + :raises TypeError: if the ngrams are not tuples. + + """ + + for sent in ngram_text: + for ngram in sent: + if not isinstance(ngram, tuple): + raise TypeError( + "Ngram <{0}> isn't a tuple, " + "but {1}".format(ngram, type(ngram)) + ) + + ngram_order = len(ngram) + if ngram_order == 1: + self.unigrams[ngram[0]] += 1 + continue + + context, word = ngram[:-1], ngram[-1] + self[ngram_order][context][word] += 1 + + def N(self): + """Returns grand total number of ngrams stored. + + This includes ngrams from all orders, so some duplication is expected. + :rtype: int + + >>> from nltk.lm import NgramCounter + >>> counts = NgramCounter([[("a", "b"), ("c",), ("d", "e")]]) + >>> counts.N() + 3 + + """ + return sum(val.N() for val in self._counts.values()) + + def __getitem__(self, item): + """User-friendly access to ngram counts.""" + if isinstance(item, int): + return self._counts[item] + elif isinstance(item, string_types): + return self._counts.__getitem__(1)[item] + elif isinstance(item, Sequence): + return self._counts.__getitem__(len(item) + 1)[tuple(item)] + + def __str__(self): + return "<{0} with {1} ngram orders and {2} ngrams>".format( + self.__class__.__name__, len(self._counts), self.N() + ) + + def __len__(self): + return self._counts.__len__() + + def __contains__(self, item): + return item in self._counts diff --git a/venv.bak/lib/python3.7/site-packages/nltk/lm/models.py b/venv.bak/lib/python3.7/site-packages/nltk/lm/models.py new file mode 100644 index 0000000..639a1ce --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/lm/models.py @@ -0,0 +1,100 @@ +# -*- coding: utf-8 -*- +# Natural Language Toolkit: Language Models +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Ilia Kurenkov +# URL: +# For license information, see LICENSE.TXT +"""Language Models""" +from __future__ import division, unicode_literals + +from nltk import compat +from nltk.lm.api import LanguageModel, Smoothing +from nltk.lm.smoothing import KneserNey, WittenBell + + +@compat.python_2_unicode_compatible +class MLE(LanguageModel): + """Class for providing MLE ngram model scores. + + Inherits initialization from BaseNgramModel. + """ + + def unmasked_score(self, word, context=None): + """Returns the MLE score for a word given a context. + + Args: + - word is expcected to be a string + - context is expected to be something reasonably convertible to a tuple + """ + return self.context_counts(context).freq(word) + + +@compat.python_2_unicode_compatible +class Lidstone(LanguageModel): + """Provides Lidstone-smoothed scores. + + In addition to initialization arguments from BaseNgramModel also requires + a number by which to increase the counts, gamma. + """ + + def __init__(self, gamma, *args, **kwargs): + super(Lidstone, self).__init__(*args, **kwargs) + self.gamma = gamma + + def unmasked_score(self, word, context=None): + """Add-one smoothing: Lidstone or Laplace. + + To see what kind, look at `gamma` attribute on the class. + + """ + counts = self.context_counts(context) + word_count = counts[word] + norm_count = counts.N() + return (word_count + self.gamma) / (norm_count + len(self.vocab) * self.gamma) + + +@compat.python_2_unicode_compatible +class Laplace(Lidstone): + """Implements Laplace (add one) smoothing. + + Initialization identical to BaseNgramModel because gamma is always 1. + """ + + def __init__(self, *args, **kwargs): + super(Laplace, self).__init__(1, *args, **kwargs) + + +class InterpolatedLanguageModel(LanguageModel): + """Logic common to all interpolated language models. + + The idea to abstract this comes from Chen & Goodman 1995. + """ + + def __init__(self, smoothing_cls, order, **kwargs): + assert issubclass(smoothing_cls, Smoothing) + params = kwargs.pop("params", {}) + super(InterpolatedLanguageModel, self).__init__(order, **kwargs) + self.estimator = smoothing_cls(self.vocab, self.counts, **params) + + def unmasked_score(self, word, context=None): + if not context: + return self.estimator.unigram_score(word) + alpha, gamma = self.estimator.alpha_gamma(word, context) + return alpha + gamma * self.unmasked_score(word, context[1:]) + + +class WittenBellInterpolated(InterpolatedLanguageModel): + """Interpolated version of Witten-Bell smoothing.""" + + def __init__(self, order, **kwargs): + super(WittenBellInterpolated, self).__init__(WittenBell, order, **kwargs) + + +class KneserNeyInterpolated(InterpolatedLanguageModel): + """Interpolated version of Kneser-Ney smoothing.""" + + def __init__(self, order, discount=0.1, **kwargs): + super(KneserNeyInterpolated, self).__init__( + KneserNey, order, params={"discount": discount}, **kwargs + ) diff --git a/venv.bak/lib/python3.7/site-packages/nltk/lm/preprocessing.py b/venv.bak/lib/python3.7/site-packages/nltk/lm/preprocessing.py new file mode 100644 index 0000000..c9d695a --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/lm/preprocessing.py @@ -0,0 +1,51 @@ +# -*- coding: utf-8 -*- +# Natural Language Toolkit: Language Model Unit Tests +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Ilia Kurenkov +# URL: +# For license information, see LICENSE.TXT +from functools import partial +from itertools import chain + +from nltk.util import everygrams, pad_sequence + +flatten = chain.from_iterable +pad_both_ends = partial( + pad_sequence, + pad_left=True, + left_pad_symbol="", + pad_right=True, + right_pad_symbol="", +) +pad_both_ends.__doc__ = """Pads both ends of a sentence to length specified by ngram order. + + Following convention pads the start of sentence pads its end. + """ + + +def padded_everygrams(order, sentence): + """Helper with some useful defaults. + + Applies pad_both_ends to sentence and follows it up with everygrams. + """ + return everygrams(list(pad_both_ends(sentence, n=order)), max_len=order) + + +def padded_everygram_pipeline(order, text): + """Default preprocessing for a sequence of sentences. + + Creates two iterators: + - sentences padded and turned into sequences of `nltk.util.everygrams` + - sentences padded as above and chained together for a flat stream of words + + :param order: Largest ngram length produced by `everygrams`. + :param text: Text to iterate over. Expected to be an iterable of sentences: + Iterable[Iterable[str]] + :return: iterator over text as ngrams, iterator over text as vocabulary data + """ + padding_fn = partial(pad_both_ends, n=order) + return ( + (everygrams(list(padding_fn(sent)), max_len=order) for sent in text), + flatten(map(padding_fn, text)), + ) diff --git a/venv.bak/lib/python3.7/site-packages/nltk/lm/smoothing.py b/venv.bak/lib/python3.7/site-packages/nltk/lm/smoothing.py new file mode 100644 index 0000000..f8d39c7 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/lm/smoothing.py @@ -0,0 +1,61 @@ +# -*- coding: utf-8 -*- +# Natural Language Toolkit: Language Model Unit Tests +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Ilia Kurenkov +# URL: +# For license information, see LICENSE.TXT +"""Smoothing algorithms for language modeling. + +According to Chen & Goodman 1995 these should work with both Backoff and +Interpolation. +""" + +from nltk.lm.api import Smoothing + + +def _count_non_zero_vals(dictionary): + return sum(1.0 for c in dictionary.values() if c > 0) + + +class WittenBell(Smoothing): + """Witten-Bell smoothing.""" + + def __init__(self, vocabulary, counter, discount=0.1, **kwargs): + super(WittenBell, self).__init__(vocabulary, counter, *kwargs) + + def alpha_gamma(self, word, context): + gamma = self.gamma(context) + return (1.0 - gamma) * self.alpha(word, context), gamma + + def unigram_score(self, word): + return self.counts.unigrams.freq(word) + + def alpha(self, word, context): + return self.counts[context].freq(word) + + def gamma(self, context): + n_plus = _count_non_zero_vals(self.counts[context]) + return n_plus / (n_plus + self.counts[len(context) + 1].N()) + + +class KneserNey(Smoothing): + """Kneser-Ney Smoothing.""" + + def __init__(self, vocabulary, counter, discount=0.1, **kwargs): + super(KneserNey, self).__init__(vocabulary, counter, *kwargs) + self.discount = discount + + def unigram_score(self, word): + return 1.0 / len(self.vocab) + + def alpha_gamma(self, word, context): + prefix_counts = self.counts[context] + return self.alpha(word, prefix_counts), self.gamma(prefix_counts) + + def alpha(self, word, prefix_counts): + return max(prefix_counts[word] - self.discount, 0.0) / prefix_counts.N() + + def gamma(self, prefix_counts): + return self.discount * _count_non_zero_vals(prefix_counts) / prefix_counts.N() + diff --git a/venv.bak/lib/python3.7/site-packages/nltk/lm/util.py b/venv.bak/lib/python3.7/site-packages/nltk/lm/util.py new file mode 100644 index 0000000..62457e3 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/lm/util.py @@ -0,0 +1,20 @@ +# -*- coding: utf-8 -*- +# Natural Language Toolkit +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Ilia Kurenkov +# URL: +# For license information, see LICENSE.TXT +"""Language Model Utilities""" + +from math import log + +NEG_INF = float("-inf") +POS_INF = float("inf") + + +def log_base2(score): + """Convenience function for computing logarithms with base 2.""" + if score == 0.0: + return NEG_INF + return log(score, 2) diff --git a/venv.bak/lib/python3.7/site-packages/nltk/lm/vocabulary.py b/venv.bak/lib/python3.7/site-packages/nltk/lm/vocabulary.py new file mode 100644 index 0000000..3c7439d --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/lm/vocabulary.py @@ -0,0 +1,248 @@ +# -*- coding: utf-8 -*- +# Natural Language Toolkit +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Ilia Kurenkov +# URL: +# For license information, see LICENSE.TXT +"""Language Model Vocabulary""" + +from __future__ import unicode_literals + +import sys +from collections import Counter, Iterable +from itertools import chain + +from nltk import compat + +try: + # Python >= 3.4 + from functools import singledispatch +except ImportError: + # Python < 3.4 + from singledispatch import singledispatch + + +@singledispatch +def _dispatched_lookup(words, vocab): + raise TypeError( + "Unsupported type for looking up in vocabulary: {0}".format(type(words)) + ) + + +@_dispatched_lookup.register(Iterable) +def _(words, vocab): + """Look up a sequence of words in the vocabulary. + + Returns an iterator over looked up words. + + """ + return tuple(_dispatched_lookup(w, vocab) for w in words) + + +try: + # Python 2 unicode + str type + basestring +except NameError: + # Python 3 unicode + str type + basestring = str + + +@_dispatched_lookup.register(basestring) +def _string_lookup(word, vocab): + """Looks up one word in the vocabulary.""" + return word if word in vocab else vocab.unk_label + + +@compat.python_2_unicode_compatible +class Vocabulary(object): + """Stores language model vocabulary. + + Satisfies two common language modeling requirements for a vocabulary: + - When checking membership and calculating its size, filters items + by comparing their counts to a cutoff value. + - Adds a special "unknown" token which unseen words are mapped to. + + >>> words = ['a', 'c', '-', 'd', 'c', 'a', 'b', 'r', 'a', 'c', 'd'] + >>> from nltk.lm import Vocabulary + >>> vocab = Vocabulary(words, unk_cutoff=2) + + Tokens with counts greater than or equal to the cutoff value will + be considered part of the vocabulary. + + >>> vocab['c'] + 3 + >>> 'c' in vocab + True + >>> vocab['d'] + 2 + >>> 'd' in vocab + True + + Tokens with frequency counts less than the cutoff value will be considered not + part of the vocabulary even though their entries in the count dictionary are + preserved. + + >>> vocab['b'] + 1 + >>> 'b' in vocab + False + >>> vocab['aliens'] + 0 + >>> 'aliens' in vocab + False + + Keeping the count entries for seen words allows us to change the cutoff value + without having to recalculate the counts. + + >>> vocab2 = Vocabulary(vocab.counts, unk_cutoff=1) + >>> "b" in vocab2 + True + + The cutoff value influences not only membership checking but also the result of + getting the size of the vocabulary using the built-in `len`. + Note that while the number of keys in the vocabulary's counter stays the same, + the items in the vocabulary differ depending on the cutoff. + We use `sorted` to demonstrate because it keeps the order consistent. + + >>> sorted(vocab2.counts) + ['-', 'a', 'b', 'c', 'd', 'r'] + >>> sorted(vocab2) + ['-', '', 'a', 'b', 'c', 'd', 'r'] + >>> sorted(vocab.counts) + ['-', 'a', 'b', 'c', 'd', 'r'] + >>> sorted(vocab) + ['', 'a', 'c', 'd'] + + In addition to items it gets populated with, the vocabulary stores a special + token that stands in for so-called "unknown" items. By default it's "". + + >>> "" in vocab + True + + We can look up words in a vocabulary using its `lookup` method. + "Unseen" words (with counts less than cutoff) are looked up as the unknown label. + If given one word (a string) as an input, this method will return a string. + + >>> vocab.lookup("a") + 'a' + >>> vocab.lookup("aliens") + '' + + If given a sequence, it will return an tuple of the looked up words. + + >>> vocab.lookup(["p", 'a', 'r', 'd', 'b', 'c']) + ('', 'a', '', 'd', '', 'c') + + It's possible to update the counts after the vocabulary has been created. + The interface follows that of `collections.Counter`. + + >>> vocab['b'] + 1 + >>> vocab.update(["b", "b", "c"]) + >>> vocab['b'] + 3 + """ + + def __init__(self, counts=None, unk_cutoff=1, unk_label=""): + """Create a new Vocabulary. + + :param counts: Optional iterable or `collections.Counter` instance to + pre-seed the Vocabulary. In case it is iterable, counts + are calculated. + :param int unk_cutoff: Words that occur less frequently than this value + are not considered part of the vocabulary. + :param unk_label: Label for marking words not part of vocabulary. + + """ + if isinstance(counts, Counter): + self.counts = counts + else: + self.counts = Counter() + if isinstance(counts, Iterable): + self.counts.update(counts) + self.unk_label = unk_label + if unk_cutoff < 1: + raise ValueError( + "Cutoff value cannot be less than 1. Got: {0}".format(unk_cutoff) + ) + self._cutoff = unk_cutoff + + @property + def cutoff(self): + """Cutoff value. + + Items with count below this value are not considered part of vocabulary. + + """ + return self._cutoff + + def update(self, *counter_args, **counter_kwargs): + """Update vocabulary counts. + + Wraps `collections.Counter.update` method. + + """ + self.counts.update(*counter_args, **counter_kwargs) + + def lookup(self, words): + """Look up one or more words in the vocabulary. + + If passed one word as a string will return that word or `self.unk_label`. + Otherwise will assume it was passed a sequence of words, will try to look + each of them up and return an iterator over the looked up words. + + :param words: Word(s) to look up. + :type words: Iterable(str) or str + :rtype: generator(str) or str + :raises: TypeError for types other than strings or iterables + + >>> from nltk.lm import Vocabulary + >>> vocab = Vocabulary(["a", "b", "c", "a", "b"], unk_cutoff=2) + >>> vocab.lookup("a") + 'a' + >>> vocab.lookup("aliens") + '' + >>> vocab.lookup(["a", "b", "c", ["x", "b"]]) + ('a', 'b', '', ('', 'b')) + + """ + return _dispatched_lookup(words, self) + + def __getitem__(self, item): + return self._cutoff if item == self.unk_label else self.counts[item] + + def __contains__(self, item): + """Only consider items with counts GE to cutoff as being in the + vocabulary.""" + return self[item] >= self.cutoff + + def __iter__(self): + """Building on membership check define how to iterate over + vocabulary.""" + return chain( + (item for item in self.counts if item in self), + [self.unk_label] if self.counts else [], + ) + + def __len__(self): + """Computing size of vocabulary reflects the cutoff.""" + return sum(1 for _ in self) + + def __eq__(self, other): + return ( + self.unk_label == other.unk_label + and self.cutoff == other.cutoff + and self.counts == other.counts + ) + + if sys.version_info[0] == 2: + # see https://stackoverflow.com/a/35781654/4501212 + def __ne__(self, other): + equal = self.__eq__(other) + return equal if equal is NotImplemented else not equal + + def __str__(self): + return "<{0} with cutoff={1} unk_label='{2}' and {3} items>".format( + self.__class__.__name__, self.cutoff, self.unk_label, len(self) + ) diff --git a/venv.bak/lib/python3.7/site-packages/nltk/metrics/__init__.py b/venv.bak/lib/python3.7/site-packages/nltk/metrics/__init__.py new file mode 100644 index 0000000..bb1f7cf --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/metrics/__init__.py @@ -0,0 +1,51 @@ +# Natural Language Toolkit: Metrics +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Steven Bird +# Edward Loper +# URL: +# For license information, see LICENSE.TXT +# + +""" +NLTK Metrics + +Classes and methods for scoring processing modules. +""" + +from nltk.metrics.scores import ( + accuracy, + precision, + recall, + f_measure, + log_likelihood, + approxrand, +) +from nltk.metrics.confusionmatrix import ConfusionMatrix +from nltk.metrics.distance import ( + edit_distance, + edit_distance_align, + binary_distance, + jaccard_distance, + masi_distance, + interval_distance, + custom_distance, + presence, + fractional_presence, +) +from nltk.metrics.paice import Paice +from nltk.metrics.segmentation import windowdiff, ghd, pk +from nltk.metrics.agreement import AnnotationTask +from nltk.metrics.association import ( + NgramAssocMeasures, + BigramAssocMeasures, + TrigramAssocMeasures, + QuadgramAssocMeasures, + ContingencyMeasures, +) +from nltk.metrics.spearman import ( + spearman_correlation, + ranks_from_sequence, + ranks_from_scores, +) +from nltk.metrics.aline import align diff --git a/venv.bak/lib/python3.7/site-packages/nltk/metrics/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/metrics/__pycache__/__init__.cpython-37.pyc new file mode 100644 index 0000000..b8344b7 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/metrics/__pycache__/__init__.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/metrics/__pycache__/agreement.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/metrics/__pycache__/agreement.cpython-37.pyc new file mode 100644 index 0000000..3e80bfb Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/metrics/__pycache__/agreement.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/metrics/__pycache__/aline.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/metrics/__pycache__/aline.cpython-37.pyc new file mode 100644 index 0000000..e2e69cb Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/metrics/__pycache__/aline.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/metrics/__pycache__/association.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/metrics/__pycache__/association.cpython-37.pyc new file mode 100644 index 0000000..3d1d368 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/metrics/__pycache__/association.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/metrics/__pycache__/confusionmatrix.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/metrics/__pycache__/confusionmatrix.cpython-37.pyc new file mode 100644 index 0000000..fdc1bb6 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/metrics/__pycache__/confusionmatrix.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/metrics/__pycache__/distance.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/metrics/__pycache__/distance.cpython-37.pyc new file mode 100644 index 0000000..cd49cd2 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/metrics/__pycache__/distance.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/metrics/__pycache__/paice.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/metrics/__pycache__/paice.cpython-37.pyc new file mode 100644 index 0000000..437362b Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/metrics/__pycache__/paice.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/metrics/__pycache__/scores.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/metrics/__pycache__/scores.cpython-37.pyc new file mode 100644 index 0000000..acdf332 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/metrics/__pycache__/scores.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/metrics/__pycache__/segmentation.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/metrics/__pycache__/segmentation.cpython-37.pyc new file mode 100644 index 0000000..50c22e0 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/metrics/__pycache__/segmentation.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/metrics/__pycache__/spearman.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/metrics/__pycache__/spearman.cpython-37.pyc new file mode 100644 index 0000000..362b189 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/metrics/__pycache__/spearman.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/metrics/agreement.py b/venv.bak/lib/python3.7/site-packages/nltk/metrics/agreement.py new file mode 100644 index 0000000..445a87f --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/metrics/agreement.py @@ -0,0 +1,495 @@ +# Natural Language Toolkit: Agreement Metrics +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Tom Lippincott +# URL: +# For license information, see LICENSE.TXT +# + +""" +Implementations of inter-annotator agreement coefficients surveyed by Artstein +and Poesio (2007), Inter-Coder Agreement for Computational Linguistics. + +An agreement coefficient calculates the amount that annotators agreed on label +assignments beyond what is expected by chance. + +In defining the AnnotationTask class, we use naming conventions similar to the +paper's terminology. There are three types of objects in an annotation task: + + the coders (variables "c" and "C") + the items to be annotated (variables "i" and "I") + the potential categories to be assigned (variables "k" and "K") + +Additionally, it is often the case that we don't want to treat two different +labels as complete disagreement, and so the AnnotationTask constructor can also +take a distance metric as a final argument. Distance metrics are simply +functions that take two arguments, and return a value between 0.0 and 1.0 +indicating the distance between them. If not supplied, the default is binary +comparison between the arguments. + +The simplest way to initialize an AnnotationTask is with a list of triples, +each containing a coder's assignment for one object in the task: + + task = AnnotationTask(data=[('c1', '1', 'v1'),('c2', '1', 'v1'),...]) + +Note that the data list needs to contain the same number of triples for each +individual coder, containing category values for the same set of items. + +Alpha (Krippendorff 1980) +Kappa (Cohen 1960) +S (Bennet, Albert and Goldstein 1954) +Pi (Scott 1955) + + +TODO: Describe handling of multiple coders and missing data + +Expected results from the Artstein and Poesio survey paper: + + >>> from nltk.metrics.agreement import AnnotationTask + >>> import os.path + >>> t = AnnotationTask(data=[x.split() for x in open(os.path.join(os.path.dirname(__file__), "artstein_poesio_example.txt"))]) + >>> t.avg_Ao() + 0.88 + >>> t.pi() + 0.7995322418977615... + >>> t.S() + 0.8199999999999998... + + This would have returned a wrong value (0.0) in @785fb79 as coders are in + the wrong order. Subsequently, all values for pi(), S(), and kappa() would + have been wrong as they are computed with avg_Ao(). + >>> t2 = AnnotationTask(data=[('b','1','stat'),('a','1','stat')]) + >>> t2.avg_Ao() + 1.0 + + The following, of course, also works. + >>> t3 = AnnotationTask(data=[('a','1','othr'),('b','1','othr')]) + >>> t3.avg_Ao() + 1.0 + +""" +from __future__ import print_function, unicode_literals, division + +import logging +from itertools import groupby +from operator import itemgetter + +from six import iteritems + +from nltk.probability import FreqDist, ConditionalFreqDist +from nltk.internals import deprecated +from nltk.compat import python_2_unicode_compatible + +from nltk.metrics.distance import binary_distance + +log = logging.getLogger(__name__) + + +@python_2_unicode_compatible +class AnnotationTask(object): + """Represents an annotation task, i.e. people assign labels to items. + + Notation tries to match notation in Artstein and Poesio (2007). + + In general, coders and items can be represented as any hashable object. + Integers, for example, are fine, though strings are more readable. + Labels must support the distance functions applied to them, so e.g. + a string-edit-distance makes no sense if your labels are integers, + whereas interval distance needs numeric values. A notable case of this + is the MASI metric, which requires Python sets. + """ + + def __init__(self, data=None, distance=binary_distance): + """Initialize an annotation task. + + The data argument can be None (to create an empty annotation task) or a sequence of 3-tuples, + each representing a coder's labeling of an item: + (coder,item,label) + + The distance argument is a function taking two arguments (labels) and producing a numerical distance. + The distance from a label to itself should be zero: + distance(l,l) = 0 + """ + self.distance = distance + self.I = set() + self.K = set() + self.C = set() + self.data = [] + if data is not None: + self.load_array(data) + + def __str__(self): + return "\r\n".join( + map( + lambda x: "%s\t%s\t%s" + % (x['coder'], x['item'].replace('_', "\t"), ",".join(x['labels'])), + self.data, + ) + ) + + def load_array(self, array): + """Load an sequence of annotation results, appending to any data already loaded. + + The argument is a sequence of 3-tuples, each representing a coder's labeling of an item: + (coder,item,label) + """ + for coder, item, labels in array: + self.C.add(coder) + self.K.add(labels) + self.I.add(item) + self.data.append({'coder': coder, 'labels': labels, 'item': item}) + + def agr(self, cA, cB, i, data=None): + """Agreement between two coders on a given item + + """ + data = data or self.data + # cfedermann: we don't know what combination of coder/item will come + # first in x; to avoid StopIteration problems due to assuming an order + # cA,cB, we allow either for k1 and then look up the missing as k2. + k1 = next((x for x in data if x['coder'] in (cA, cB) and x['item'] == i)) + if k1['coder'] == cA: + k2 = next((x for x in data if x['coder'] == cB and x['item'] == i)) + else: + k2 = next((x for x in data if x['coder'] == cA and x['item'] == i)) + + ret = 1.0 - float(self.distance(k1['labels'], k2['labels'])) + log.debug("Observed agreement between %s and %s on %s: %f", cA, cB, i, ret) + log.debug( + "Distance between \"%r\" and \"%r\": %f", + k1['labels'], + k2['labels'], + 1.0 - ret, + ) + return ret + + def Nk(self, k): + return float(sum(1 for x in self.data if x['labels'] == k)) + + def Nik(self, i, k): + return float(sum(1 for x in self.data if x['item'] == i and x['labels'] == k)) + + def Nck(self, c, k): + return float(sum(1 for x in self.data if x['coder'] == c and x['labels'] == k)) + + @deprecated('Use Nk, Nik or Nck instead') + def N(self, k=None, i=None, c=None): + """Implements the "n-notation" used in Artstein and Poesio (2007) + + """ + if k is not None and i is None and c is None: + ret = self.Nk(k) + elif k is not None and i is not None and c is None: + ret = self.Nik(i, k) + elif k is not None and c is not None and i is None: + ret = self.Nck(c, k) + else: + raise ValueError( + "You must pass either i or c, not both! (k=%r,i=%r,c=%r)" % (k, i, c) + ) + log.debug("Count on N[%s,%s,%s]: %d", k, i, c, ret) + return ret + + def _grouped_data(self, field, data=None): + data = data or self.data + return groupby(sorted(data, key=itemgetter(field)), itemgetter(field)) + + def Ao(self, cA, cB): + """Observed agreement between two coders on all items. + + """ + data = self._grouped_data( + 'item', (x for x in self.data if x['coder'] in (cA, cB)) + ) + ret = sum(self.agr(cA, cB, item, item_data) for item, item_data in data) / len( + self.I + ) + log.debug("Observed agreement between %s and %s: %f", cA, cB, ret) + return ret + + def _pairwise_average(self, function): + """ + Calculates the average of function results for each coder pair + """ + total = 0 + n = 0 + s = self.C.copy() + for cA in self.C: + s.remove(cA) + for cB in s: + total += function(cA, cB) + n += 1 + ret = total / n + return ret + + def avg_Ao(self): + """Average observed agreement across all coders and items. + + """ + ret = self._pairwise_average(self.Ao) + log.debug("Average observed agreement: %f", ret) + return ret + + def Do_Kw_pairwise(self, cA, cB, max_distance=1.0): + """The observed disagreement for the weighted kappa coefficient. + + """ + total = 0.0 + data = (x for x in self.data if x['coder'] in (cA, cB)) + for i, itemdata in self._grouped_data('item', data): + # we should have two items; distance doesn't care which comes first + total += self.distance(next(itemdata)['labels'], next(itemdata)['labels']) + + ret = total / (len(self.I) * max_distance) + log.debug("Observed disagreement between %s and %s: %f", cA, cB, ret) + return ret + + def Do_Kw(self, max_distance=1.0): + """Averaged over all labelers + + """ + ret = self._pairwise_average( + lambda cA, cB: self.Do_Kw_pairwise(cA, cB, max_distance) + ) + log.debug("Observed disagreement: %f", ret) + return ret + + # Agreement Coefficients + def S(self): + """Bennett, Albert and Goldstein 1954 + + """ + Ae = 1.0 / len(self.K) + ret = (self.avg_Ao() - Ae) / (1.0 - Ae) + return ret + + def pi(self): + """Scott 1955; here, multi-pi. + Equivalent to K from Siegel and Castellan (1988). + + """ + total = 0.0 + label_freqs = FreqDist(x['labels'] for x in self.data) + for k, f in iteritems(label_freqs): + total += f ** 2 + Ae = total / ((len(self.I) * len(self.C)) ** 2) + return (self.avg_Ao() - Ae) / (1 - Ae) + + def Ae_kappa(self, cA, cB): + Ae = 0.0 + nitems = float(len(self.I)) + label_freqs = ConditionalFreqDist((x['labels'], x['coder']) for x in self.data) + for k in label_freqs.conditions(): + Ae += (label_freqs[k][cA] / nitems) * (label_freqs[k][cB] / nitems) + return Ae + + def kappa_pairwise(self, cA, cB): + """ + + """ + Ae = self.Ae_kappa(cA, cB) + ret = (self.Ao(cA, cB) - Ae) / (1.0 - Ae) + log.debug("Expected agreement between %s and %s: %f", cA, cB, Ae) + return ret + + def kappa(self): + """Cohen 1960 + Averages naively over kappas for each coder pair. + + """ + return self._pairwise_average(self.kappa_pairwise) + + def multi_kappa(self): + """Davies and Fleiss 1982 + Averages over observed and expected agreements for each coder pair. + + """ + Ae = self._pairwise_average(self.Ae_kappa) + return (self.avg_Ao() - Ae) / (1.0 - Ae) + + def Disagreement(self, label_freqs): + total_labels = sum(label_freqs.values()) + pairs = 0.0 + for j, nj in iteritems(label_freqs): + for l, nl in iteritems(label_freqs): + pairs += float(nj * nl) * self.distance(l, j) + return 1.0 * pairs / (total_labels * (total_labels - 1)) + + def alpha(self): + """Krippendorff 1980 + + """ + # check for degenerate cases + if len(self.K) == 0: + raise ValueError("Cannot calculate alpha, no data present!") + if len(self.K) == 1: + log.debug("Only one annotation value, allpha returning 1.") + return 1 + if len(self.C) == 1 and len(self.I) == 1: + raise ValueError("Cannot calculate alpha, only one coder and item present!") + + total_disagreement = 0.0 + total_ratings = 0 + all_valid_labels_freq = FreqDist([]) + + total_do = 0.0 # Total observed disagreement for all items. + for i, itemdata in self._grouped_data('item'): + label_freqs = FreqDist(x['labels'] for x in itemdata) + labels_count = sum(label_freqs.values()) + if labels_count < 2: + # Ignore the item. + continue + all_valid_labels_freq += label_freqs + total_do += self.Disagreement(label_freqs) * labels_count + + do = total_do / sum(all_valid_labels_freq.values()) + + de = self.Disagreement(all_valid_labels_freq) # Expected disagreement. + k_alpha = 1.0 - do / de + + return k_alpha + + def weighted_kappa_pairwise(self, cA, cB, max_distance=1.0): + """Cohen 1968 + + """ + total = 0.0 + label_freqs = ConditionalFreqDist( + (x['coder'], x['labels']) for x in self.data if x['coder'] in (cA, cB) + ) + for j in self.K: + for l in self.K: + total += label_freqs[cA][j] * label_freqs[cB][l] * self.distance(j, l) + De = total / (max_distance * pow(len(self.I), 2)) + log.debug("Expected disagreement between %s and %s: %f", cA, cB, De) + Do = self.Do_Kw_pairwise(cA, cB) + ret = 1.0 - (Do / De) + return ret + + def weighted_kappa(self, max_distance=1.0): + """Cohen 1968 + + """ + return self._pairwise_average( + lambda cA, cB: self.weighted_kappa_pairwise(cA, cB, max_distance) + ) + + +if __name__ == '__main__': + + import re + import optparse + from nltk.metrics import distance + + # process command-line arguments + parser = optparse.OptionParser() + parser.add_option( + "-d", + "--distance", + dest="distance", + default="binary_distance", + help="distance metric to use", + ) + parser.add_option( + "-a", + "--agreement", + dest="agreement", + default="kappa", + help="agreement coefficient to calculate", + ) + parser.add_option( + "-e", + "--exclude", + dest="exclude", + action="append", + default=[], + help="coder names to exclude (may be specified multiple times)", + ) + parser.add_option( + "-i", + "--include", + dest="include", + action="append", + default=[], + help="coder names to include, same format as exclude", + ) + parser.add_option( + "-f", + "--file", + dest="file", + help="file to read labelings from, each line with three columns: 'labeler item labels'", + ) + parser.add_option( + "-v", + "--verbose", + dest="verbose", + default='0', + help="how much debugging to print on stderr (0-4)", + ) + parser.add_option( + "-c", + "--columnsep", + dest="columnsep", + default="\t", + help="char/string that separates the three columns in the file, defaults to tab", + ) + parser.add_option( + "-l", + "--labelsep", + dest="labelsep", + default=",", + help="char/string that separates labels (if labelers can assign more than one), defaults to comma", + ) + parser.add_option( + "-p", + "--presence", + dest="presence", + default=None, + help="convert each labeling into 1 or 0, based on presence of LABEL", + ) + parser.add_option( + "-T", + "--thorough", + dest="thorough", + default=False, + action="store_true", + help="calculate agreement for every subset of the annotators", + ) + (options, remainder) = parser.parse_args() + + if not options.file: + parser.print_help() + exit() + + logging.basicConfig(level=50 - 10 * int(options.verbose)) + + # read in data from the specified file + data = [] + with open(options.file, 'r') as infile: + for l in infile: + toks = l.split(options.columnsep) + coder, object_, labels = ( + toks[0], + str(toks[1:-1]), + frozenset(toks[-1].strip().split(options.labelsep)), + ) + if ( + (options.include == options.exclude) + or (len(options.include) > 0 and coder in options.include) + or (len(options.exclude) > 0 and coder not in options.exclude) + ): + data.append((coder, object_, labels)) + + if options.presence: + task = AnnotationTask( + data, getattr(distance, options.distance)(options.presence) + ) + else: + task = AnnotationTask(data, getattr(distance, options.distance)) + + if options.thorough: + pass + else: + print(getattr(task, options.agreement)()) + + logging.shutdown() diff --git a/venv.bak/lib/python3.7/site-packages/nltk/metrics/aline.py b/venv.bak/lib/python3.7/site-packages/nltk/metrics/aline.py new file mode 100644 index 0000000..3b8cba5 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/metrics/aline.py @@ -0,0 +1,1356 @@ +# -*- coding: utf-8 -*- +# Natural Language Toolkit: ALINE +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Greg Kondrak +# Geoff Bacon (Python port) +# URL: +# For license information, see LICENSE.TXT + +""" +ALINE +http://webdocs.cs.ualberta.ca/~kondrak/ +Copyright 2002 by Grzegorz Kondrak. + +ALINE is an algorithm for aligning phonetic sequences, described in [1]. +This module is a port of Kondrak's (2002) ALINE. It provides functions for +phonetic sequence alignment and similarity analysis. These are useful in +historical linguistics, sociolinguistics and synchronic phonology. + +ALINE has parameters that can be tuned for desired output. These parameters are: +- C_skip, C_sub, C_exp, C_vwl +- Salience weights +- Segmental features + +In this implementation, some parameters have been changed from their default +values as described in [1], in order to replicate published results. All changes +are noted in comments. + +Example usage +------------- + +# Get optimal alignment of two phonetic sequences + +>>> align('θin', 'tenwis') # doctest: +SKIP +[[('θ', 't'), ('i', 'e'), ('n', 'n'), ('-', 'w'), ('-', 'i'), ('-', 's')]] + +[1] G. Kondrak. Algorithms for Language Reconstruction. PhD dissertation, +University of Toronto. +""" + +from __future__ import unicode_literals + +try: + import numpy as np +except ImportError: + np = None + +# === Constants === + +inf = float('inf') + +# Default values for maximum similarity scores (Kondrak 2002: 54) +C_skip = 10 # Indels +C_sub = 35 # Substitutions +C_exp = 45 # Expansions/compressions +C_vwl = 5 # Vowel/consonant relative weight (decreased from 10) + +consonants = [ + 'B', + 'N', + 'R', + 'b', + 'c', + 'd', + 'f', + 'g', + 'h', + 'j', + 'k', + 'l', + 'm', + 'n', + 'p', + 'q', + 'r', + 's', + 't', + 'v', + 'x', + 'z', + 'ç', + 'ð', + 'ħ', + 'ŋ', + 'ɖ', + 'ɟ', + 'ɢ', + 'ɣ', + 'ɦ', + 'ɬ', + 'ɮ', + 'ɰ', + 'ɱ', + 'ɲ', + 'ɳ', + 'ɴ', + 'ɸ', + 'ɹ', + 'ɻ', + 'ɽ', + 'ɾ', + 'ʀ', + 'ʁ', + 'ʂ', + 'ʃ', + 'ʈ', + 'ʋ', + 'ʐ ', + 'ʒ', + 'ʔ', + 'ʕ', + 'ʙ', + 'ʝ', + 'β', + 'θ', + 'χ', + 'ʐ', + 'w', +] + +# Relevant features for comparing consonants and vowels +R_c = [ + 'aspirated', + 'lateral', + 'manner', + 'nasal', + 'place', + 'retroflex', + 'syllabic', + 'voice', +] +# 'high' taken out of R_v because same as manner +R_v = [ + 'back', + 'lateral', + 'long', + 'manner', + 'nasal', + 'place', + 'retroflex', + 'round', + 'syllabic', + 'voice', +] + +# Flattened feature matrix (Kondrak 2002: 56) +similarity_matrix = { + # place + 'bilabial': 1.0, + 'labiodental': 0.95, + 'dental': 0.9, + 'alveolar': 0.85, + 'retroflex': 0.8, + 'palato-alveolar': 0.75, + 'palatal': 0.7, + 'velar': 0.6, + 'uvular': 0.5, + 'pharyngeal': 0.3, + 'glottal': 0.1, + 'labiovelar': 1.0, + 'vowel': -1.0, # added 'vowel' + # manner + 'stop': 1.0, + 'affricate': 0.9, + 'fricative': 0.85, # increased fricative from 0.8 + 'trill': 0.7, + 'tap': 0.65, + 'approximant': 0.6, + 'high vowel': 0.4, + 'mid vowel': 0.2, + 'low vowel': 0.0, + 'vowel2': 0.5, # added vowel + # high + 'high': 1.0, + 'mid': 0.5, + 'low': 0.0, + # back + 'front': 1.0, + 'central': 0.5, + 'back': 0.0, + # binary features + 'plus': 1.0, + 'minus': 0.0, +} + +# Relative weights of phonetic features (Kondrak 2002: 55) +salience = { + 'syllabic': 5, + 'place': 40, + 'manner': 50, + 'voice': 5, # decreased from 10 + 'nasal': 20, # increased from 10 + 'retroflex': 10, + 'lateral': 10, + 'aspirated': 5, + 'long': 0, # decreased from 1 + 'high': 3, # decreased from 5 + 'back': 2, # decreased from 5 + 'round': 2, # decreased from 5 +} + +# (Kondrak 2002: 59-60) +feature_matrix = { + # Consonants + 'p': { + 'place': 'bilabial', + 'manner': 'stop', + 'syllabic': 'minus', + 'voice': 'minus', + 'nasal': 'minus', + 'retroflex': 'minus', + 'lateral': 'minus', + 'aspirated': 'minus', + }, + 'b': { + 'place': 'bilabial', + 'manner': 'stop', + 'syllabic': 'minus', + 'voice': 'plus', + 'nasal': 'minus', + 'retroflex': 'minus', + 'lateral': 'minus', + 'aspirated': 'minus', + }, + 't': { + 'place': 'alveolar', + 'manner': 'stop', + 'syllabic': 'minus', + 'voice': 'minus', + 'nasal': 'minus', + 'retroflex': 'minus', + 'lateral': 'minus', + 'aspirated': 'minus', + }, + 'd': { + 'place': 'alveolar', + 'manner': 'stop', + 'syllabic': 'minus', + 'voice': 'plus', + 'nasal': 'minus', + 'retroflex': 'minus', + 'lateral': 'minus', + 'aspirated': 'minus', + }, + 'ʈ': { + 'place': 'retroflex', + 'manner': 'stop', + 'syllabic': 'minus', + 'voice': 'minus', + 'nasal': 'minus', + 'retroflex': 'plus', + 'lateral': 'minus', + 'aspirated': 'minus', + }, + 'ɖ': { + 'place': 'retroflex', + 'manner': 'stop', + 'syllabic': 'minus', + 'voice': 'plus', + 'nasal': 'minus', + 'retroflex': 'plus', + 'lateral': 'minus', + 'aspirated': 'minus', + }, + 'c': { + 'place': 'palatal', + 'manner': 'stop', + 'syllabic': 'minus', + 'voice': 'minus', + 'nasal': 'minus', + 'retroflex': 'minus', + 'lateral': 'minus', + 'aspirated': 'minus', + }, + 'ɟ': { + 'place': 'palatal', + 'manner': 'stop', + 'syllabic': 'minus', + 'voice': 'plus', + 'nasal': 'minus', + 'retroflex': 'minus', + 'lateral': 'minus', + 'aspirated': 'minus', + }, + 'k': { + 'place': 'velar', + 'manner': 'stop', + 'syllabic': 'minus', + 'voice': 'minus', + 'nasal': 'minus', + 'retroflex': 'minus', + 'lateral': 'minus', + 'aspirated': 'minus', + }, + 'g': { + 'place': 'velar', + 'manner': 'stop', + 'syllabic': 'minus', + 'voice': 'plus', + 'nasal': 'minus', + 'retroflex': 'minus', + 'lateral': 'minus', + 'aspirated': 'minus', + }, + 'q': { + 'place': 'uvular', + 'manner': 'stop', + 'syllabic': 'minus', + 'voice': 'minus', + 'nasal': 'minus', + 'retroflex': 'minus', + 'lateral': 'minus', + 'aspirated': 'minus', + }, + 'ɢ': { + 'place': 'uvular', + 'manner': 'stop', + 'syllabic': 'minus', + 'voice': 'plus', + 'nasal': 'minus', + 'retroflex': 'minus', + 'lateral': 'minus', + 'aspirated': 'minus', + }, + 'ʔ': { + 'place': 'glottal', + 'manner': 'stop', + 'syllabic': 'minus', + 'voice': 'minus', + 'nasal': 'minus', + 'retroflex': 'minus', + 'lateral': 'minus', + 'aspirated': 'minus', + }, + 'm': { + 'place': 'bilabial', + 'manner': 'stop', + 'syllabic': 'minus', + 'voice': 'plus', + 'nasal': 'plus', + 'retroflex': 'minus', + 'lateral': 'minus', + 'aspirated': 'minus', + }, + 'ɱ': { + 'place': 'labiodental', + 'manner': 'stop', + 'syllabic': 'minus', + 'voice': 'plus', + 'nasal': 'plus', + 'retroflex': 'minus', + 'lateral': 'minus', + 'aspirated': 'minus', + }, + 'n': { + 'place': 'alveolar', + 'manner': 'stop', + 'syllabic': 'minus', + 'voice': 'plus', + 'nasal': 'plus', + 'retroflex': 'minus', + 'lateral': 'minus', + 'aspirated': 'minus', + }, + 'ɳ': { + 'place': 'retroflex', + 'manner': 'stop', + 'syllabic': 'minus', + 'voice': 'plus', + 'nasal': 'plus', + 'retroflex': 'plus', + 'lateral': 'minus', + 'aspirated': 'minus', + }, + 'ɲ': { + 'place': 'palatal', + 'manner': 'stop', + 'syllabic': 'minus', + 'voice': 'plus', + 'nasal': 'plus', + 'retroflex': 'minus', + 'lateral': 'minus', + 'aspirated': 'minus', + }, + 'ŋ': { + 'place': 'velar', + 'manner': 'stop', + 'syllabic': 'minus', + 'voice': 'plus', + 'nasal': 'plus', + 'retroflex': 'minus', + 'lateral': 'minus', + 'aspirated': 'minus', + }, + 'ɴ': { + 'place': 'uvular', + 'manner': 'stop', + 'syllabic': 'minus', + 'voice': 'plus', + 'nasal': 'plus', + 'retroflex': 'minus', + 'lateral': 'minus', + 'aspirated': 'minus', + }, + 'N': { + 'place': 'uvular', + 'manner': 'stop', + 'syllabic': 'minus', + 'voice': 'plus', + 'nasal': 'plus', + 'retroflex': 'minus', + 'lateral': 'minus', + 'aspirated': 'minus', + }, + 'ʙ': { + 'place': 'bilabial', + 'manner': 'trill', + 'syllabic': 'minus', + 'voice': 'plus', + 'nasal': 'minus', + 'retroflex': 'minus', + 'lateral': 'minus', + 'aspirated': 'minus', + }, + 'B': { + 'place': 'bilabial', + 'manner': 'trill', + 'syllabic': 'minus', + 'voice': 'plus', + 'nasal': 'minus', + 'retroflex': 'minus', + 'lateral': 'minus', + 'aspirated': 'minus', + }, + 'r': { + 'place': 'alveolar', + 'manner': 'trill', + 'syllabic': 'minus', + 'voice': 'plus', + 'nasal': 'minus', + 'retroflex': 'plus', + 'lateral': 'minus', + 'aspirated': 'minus', + }, + 'ʀ': { + 'place': 'uvular', + 'manner': 'trill', + 'syllabic': 'minus', + 'voice': 'plus', + 'nasal': 'minus', + 'retroflex': 'minus', + 'lateral': 'minus', + 'aspirated': 'minus', + }, + 'R': { + 'place': 'uvular', + 'manner': 'trill', + 'syllabic': 'minus', + 'voice': 'plus', + 'nasal': 'minus', + 'retroflex': 'minus', + 'lateral': 'minus', + 'aspirated': 'minus', + }, + 'ɾ': { + 'place': 'alveolar', + 'manner': 'tap', + 'syllabic': 'minus', + 'voice': 'plus', + 'nasal': 'minus', + 'retroflex': 'minus', + 'lateral': 'minus', + 'aspirated': 'minus', + }, + 'ɽ': { + 'place': 'retroflex', + 'manner': 'tap', + 'syllabic': 'minus', + 'voice': 'plus', + 'nasal': 'minus', + 'retroflex': 'plus', + 'lateral': 'minus', + 'aspirated': 'minus', + }, + 'ɸ': { + 'place': 'bilabial', + 'manner': 'fricative', + 'syllabic': 'minus', + 'voice': 'minus', + 'nasal': 'minus', + 'retroflex': 'minus', + 'lateral': 'minus', + 'aspirated': 'minus', + }, + 'β': { + 'place': 'bilabial', + 'manner': 'fricative', + 'syllabic': 'minus', + 'voice': 'plus', + 'nasal': 'minus', + 'retroflex': 'minus', + 'lateral': 'minus', + 'aspirated': 'minus', + }, + 'f': { + 'place': 'labiodental', + 'manner': 'fricative', + 'syllabic': 'minus', + 'voice': 'minus', + 'nasal': 'minus', + 'retroflex': 'minus', + 'lateral': 'minus', + 'aspirated': 'minus', + }, + 'v': { + 'place': 'labiodental', + 'manner': 'fricative', + 'syllabic': 'minus', + 'voice': 'plus', + 'nasal': 'minus', + 'retroflex': 'minus', + 'lateral': 'minus', + 'aspirated': 'minus', + }, + 'θ': { + 'place': 'dental', + 'manner': 'fricative', + 'syllabic': 'minus', + 'voice': 'minus', + 'nasal': 'minus', + 'retroflex': 'minus', + 'lateral': 'minus', + 'aspirated': 'minus', + }, + 'ð': { + 'place': 'dental', + 'manner': 'fricative', + 'syllabic': 'minus', + 'voice': 'plus', + 'nasal': 'minus', + 'retroflex': 'minus', + 'lateral': 'minus', + 'aspirated': 'minus', + }, + 's': { + 'place': 'alveolar', + 'manner': 'fricative', + 'syllabic': 'minus', + 'voice': 'minus', + 'nasal': 'minus', + 'retroflex': 'minus', + 'lateral': 'minus', + 'aspirated': 'minus', + }, + 'z': { + 'place': 'alveolar', + 'manner': 'fricative', + 'syllabic': 'minus', + 'voice': 'plus', + 'nasal': 'minus', + 'retroflex': 'minus', + 'lateral': 'minus', + 'aspirated': 'minus', + }, + 'ʃ': { + 'place': 'palato-alveolar', + 'manner': 'fricative', + 'syllabic': 'minus', + 'voice': 'minus', + 'nasal': 'minus', + 'retroflex': 'minus', + 'lateral': 'minus', + 'aspirated': 'minus', + }, + 'ʒ': { + 'place': 'palato-alveolar', + 'manner': 'fricative', + 'syllabic': 'minus', + 'voice': 'plus', + 'nasal': 'minus', + 'retroflex': 'minus', + 'lateral': 'minus', + 'aspirated': 'minus', + }, + 'ʂ': { + 'place': 'retroflex', + 'manner': 'fricative', + 'syllabic': 'minus', + 'voice': 'minus', + 'nasal': 'minus', + 'retroflex': 'plus', + 'lateral': 'minus', + 'aspirated': 'minus', + }, + 'ʐ': { + 'place': 'retroflex', + 'manner': 'fricative', + 'syllabic': 'minus', + 'voice': 'plus', + 'nasal': 'minus', + 'retroflex': 'plus', + 'lateral': 'minus', + 'aspirated': 'minus', + }, + 'ç': { + 'place': 'palatal', + 'manner': 'fricative', + 'syllabic': 'minus', + 'voice': 'minus', + 'nasal': 'minus', + 'retroflex': 'minus', + 'lateral': 'minus', + 'aspirated': 'minus', + }, + 'ʝ': { + 'place': 'palatal', + 'manner': 'fricative', + 'syllabic': 'minus', + 'voice': 'plus', + 'nasal': 'minus', + 'retroflex': 'minus', + 'lateral': 'minus', + 'aspirated': 'minus', + }, + 'x': { + 'place': 'velar', + 'manner': 'fricative', + 'syllabic': 'minus', + 'voice': 'minus', + 'nasal': 'minus', + 'retroflex': 'minus', + 'lateral': 'minus', + 'aspirated': 'minus', + }, + 'ɣ': { + 'place': 'velar', + 'manner': 'fricative', + 'syllabic': 'minus', + 'voice': 'plus', + 'nasal': 'minus', + 'retroflex': 'minus', + 'lateral': 'minus', + 'aspirated': 'minus', + }, + 'χ': { + 'place': 'uvular', + 'manner': 'fricative', + 'syllabic': 'minus', + 'voice': 'minus', + 'nasal': 'minus', + 'retroflex': 'minus', + 'lateral': 'minus', + 'aspirated': 'minus', + }, + 'ʁ': { + 'place': 'uvular', + 'manner': 'fricative', + 'syllabic': 'minus', + 'voice': 'plus', + 'nasal': 'minus', + 'retroflex': 'minus', + 'lateral': 'minus', + 'aspirated': 'minus', + }, + 'ħ': { + 'place': 'pharyngeal', + 'manner': 'fricative', + 'syllabic': 'minus', + 'voice': 'minus', + 'nasal': 'minus', + 'retroflex': 'minus', + 'lateral': 'minus', + 'aspirated': 'minus', + }, + 'ʕ': { + 'place': 'pharyngeal', + 'manner': 'fricative', + 'syllabic': 'minus', + 'voice': 'plus', + 'nasal': 'minus', + 'retroflex': 'minus', + 'lateral': 'minus', + 'aspirated': 'minus', + }, + 'h': { + 'place': 'glottal', + 'manner': 'fricative', + 'syllabic': 'minus', + 'voice': 'minus', + 'nasal': 'minus', + 'retroflex': 'minus', + 'lateral': 'minus', + 'aspirated': 'minus', + }, + 'ɦ': { + 'place': 'glottal', + 'manner': 'fricative', + 'syllabic': 'minus', + 'voice': 'plus', + 'nasal': 'minus', + 'retroflex': 'minus', + 'lateral': 'minus', + 'aspirated': 'minus', + }, + 'ɬ': { + 'place': 'alveolar', + 'manner': 'fricative', + 'syllabic': 'minus', + 'voice': 'minus', + 'nasal': 'minus', + 'retroflex': 'minus', + 'lateral': 'plus', + 'aspirated': 'minus', + }, + 'ɮ': { + 'place': 'alveolar', + 'manner': 'fricative', + 'syllabic': 'minus', + 'voice': 'plus', + 'nasal': 'minus', + 'retroflex': 'minus', + 'lateral': 'plus', + 'aspirated': 'minus', + }, + 'ʋ': { + 'place': 'labiodental', + 'manner': 'approximant', + 'syllabic': 'minus', + 'voice': 'plus', + 'nasal': 'minus', + 'retroflex': 'minus', + 'lateral': 'minus', + 'aspirated': 'minus', + }, + 'ɹ': { + 'place': 'alveolar', + 'manner': 'approximant', + 'syllabic': 'minus', + 'voice': 'plus', + 'nasal': 'minus', + 'retroflex': 'minus', + 'lateral': 'minus', + 'aspirated': 'minus', + }, + 'ɻ': { + 'place': 'retroflex', + 'manner': 'approximant', + 'syllabic': 'minus', + 'voice': 'plus', + 'nasal': 'minus', + 'retroflex': 'plus', + 'lateral': 'minus', + 'aspirated': 'minus', + }, + 'j': { + 'place': 'palatal', + 'manner': 'approximant', + 'syllabic': 'minus', + 'voice': 'plus', + 'nasal': 'minus', + 'retroflex': 'minus', + 'lateral': 'minus', + 'aspirated': 'minus', + }, + 'ɰ': { + 'place': 'velar', + 'manner': 'approximant', + 'syllabic': 'minus', + 'voice': 'plus', + 'nasal': 'minus', + 'retroflex': 'minus', + 'lateral': 'minus', + 'aspirated': 'minus', + }, + 'l': { + 'place': 'alveolar', + 'manner': 'approximant', + 'syllabic': 'minus', + 'voice': 'plus', + 'nasal': 'minus', + 'retroflex': 'minus', + 'lateral': 'plus', + 'aspirated': 'minus', + }, + 'w': { + 'place': 'labiovelar', + 'manner': 'approximant', + 'syllabic': 'minus', + 'voice': 'plus', + 'nasal': 'minus', + 'retroflex': 'minus', + 'lateral': 'minus', + 'aspirated': 'minus', + }, + # Vowels + 'i': { + 'place': 'vowel', + 'manner': 'vowel2', + 'syllabic': 'plus', + 'voice': 'plus', + 'nasal': 'minus', + 'retroflex': 'minus', + 'lateral': 'minus', + 'high': 'high', + 'back': 'front', + 'round': 'minus', + 'long': 'minus', + 'aspirated': 'minus', + }, + 'y': { + 'place': 'vowel', + 'manner': 'vowel2', + 'syllabic': 'plus', + 'voice': 'plus', + 'nasal': 'minus', + 'retroflex': 'minus', + 'lateral': 'minus', + 'high': 'high', + 'back': 'front', + 'round': 'plus', + 'long': 'minus', + 'aspirated': 'minus', + }, + 'e': { + 'place': 'vowel', + 'manner': 'vowel2', + 'syllabic': 'plus', + 'voice': 'plus', + 'nasal': 'minus', + 'retroflex': 'minus', + 'lateral': 'minus', + 'high': 'mid', + 'back': 'front', + 'round': 'minus', + 'long': 'minus', + 'aspirated': 'minus', + }, + 'E': { + 'place': 'vowel', + 'manner': 'vowel2', + 'syllabic': 'plus', + 'voice': 'plus', + 'nasal': 'minus', + 'retroflex': 'minus', + 'lateral': 'minus', + 'high': 'mid', + 'back': 'front', + 'round': 'minus', + 'long': 'plus', + 'aspirated': 'minus', + }, + 'ø': { + 'place': 'vowel', + 'manner': 'vowel2', + 'syllabic': 'plus', + 'voice': 'plus', + 'nasal': 'minus', + 'retroflex': 'minus', + 'lateral': 'minus', + 'high': 'mid', + 'back': 'front', + 'round': 'plus', + 'long': 'minus', + 'aspirated': 'minus', + }, + 'ɛ': { + 'place': 'vowel', + 'manner': 'vowel2', + 'syllabic': 'plus', + 'voice': 'plus', + 'nasal': 'minus', + 'retroflex': 'minus', + 'lateral': 'minus', + 'high': 'mid', + 'back': 'front', + 'round': 'minus', + 'long': 'minus', + 'aspirated': 'minus', + }, + 'œ': { + 'place': 'vowel', + 'manner': 'vowel2', + 'syllabic': 'plus', + 'voice': 'plus', + 'nasal': 'minus', + 'retroflex': 'minus', + 'lateral': 'minus', + 'high': 'mid', + 'back': 'front', + 'round': 'plus', + 'long': 'minus', + 'aspirated': 'minus', + }, + 'æ': { + 'place': 'vowel', + 'manner': 'vowel2', + 'syllabic': 'plus', + 'voice': 'plus', + 'nasal': 'minus', + 'retroflex': 'minus', + 'lateral': 'minus', + 'high': 'low', + 'back': 'front', + 'round': 'minus', + 'long': 'minus', + 'aspirated': 'minus', + }, + 'a': { + 'place': 'vowel', + 'manner': 'vowel2', + 'syllabic': 'plus', + 'voice': 'plus', + 'nasal': 'minus', + 'retroflex': 'minus', + 'lateral': 'minus', + 'high': 'low', + 'back': 'front', + 'round': 'minus', + 'long': 'minus', + 'aspirated': 'minus', + }, + 'A': { + 'place': 'vowel', + 'manner': 'vowel2', + 'syllabic': 'plus', + 'voice': 'plus', + 'nasal': 'minus', + 'retroflex': 'minus', + 'lateral': 'minus', + 'high': 'low', + 'back': 'front', + 'round': 'minus', + 'long': 'plus', + 'aspirated': 'minus', + }, + 'ɨ': { + 'place': 'vowel', + 'manner': 'vowel2', + 'syllabic': 'plus', + 'voice': 'plus', + 'nasal': 'minus', + 'retroflex': 'minus', + 'lateral': 'minus', + 'high': 'high', + 'back': 'central', + 'round': 'minus', + 'long': 'minus', + 'aspirated': 'minus', + }, + 'ʉ': { + 'place': 'vowel', + 'manner': 'vowel2', + 'syllabic': 'plus', + 'voice': 'plus', + 'nasal': 'minus', + 'retroflex': 'minus', + 'lateral': 'minus', + 'high': 'high', + 'back': 'central', + 'round': 'plus', + 'long': 'minus', + 'aspirated': 'minus', + }, + 'ə': { + 'place': 'vowel', + 'manner': 'vowel2', + 'syllabic': 'plus', + 'voice': 'plus', + 'nasal': 'minus', + 'retroflex': 'minus', + 'lateral': 'minus', + 'high': 'mid', + 'back': 'central', + 'round': 'minus', + 'long': 'minus', + 'aspirated': 'minus', + }, + 'u': { + 'place': 'vowel', + 'manner': 'vowel2', + 'syllabic': 'plus', + 'voice': 'plus', + 'nasal': 'minus', + 'retroflex': 'minus', + 'lateral': 'minus', + 'high': 'high', + 'back': 'back', + 'round': 'plus', + 'long': 'minus', + 'aspirated': 'minus', + }, + 'U': { + 'place': 'vowel', + 'manner': 'vowel2', + 'syllabic': 'plus', + 'voice': 'plus', + 'nasal': 'minus', + 'retroflex': 'minus', + 'lateral': 'minus', + 'high': 'high', + 'back': 'back', + 'round': 'plus', + 'long': 'plus', + 'aspirated': 'minus', + }, + 'o': { + 'place': 'vowel', + 'manner': 'vowel2', + 'syllabic': 'plus', + 'voice': 'plus', + 'nasal': 'minus', + 'retroflex': 'minus', + 'lateral': 'minus', + 'high': 'mid', + 'back': 'back', + 'round': 'plus', + 'long': 'minus', + 'aspirated': 'minus', + }, + 'O': { + 'place': 'vowel', + 'manner': 'vowel2', + 'syllabic': 'plus', + 'voice': 'plus', + 'nasal': 'minus', + 'retroflex': 'minus', + 'lateral': 'minus', + 'high': 'mid', + 'back': 'back', + 'round': 'plus', + 'long': 'plus', + 'aspirated': 'minus', + }, + 'ɔ': { + 'place': 'vowel', + 'manner': 'vowel2', + 'syllabic': 'plus', + 'voice': 'plus', + 'nasal': 'minus', + 'retroflex': 'minus', + 'lateral': 'minus', + 'high': 'mid', + 'back': 'back', + 'round': 'plus', + 'long': 'minus', + 'aspirated': 'minus', + }, + 'ɒ': { + 'place': 'vowel', + 'manner': 'vowel2', + 'syllabic': 'plus', + 'voice': 'plus', + 'nasal': 'minus', + 'retroflex': 'minus', + 'lateral': 'minus', + 'high': 'low', + 'back': 'back', + 'round': 'minus', + 'long': 'minus', + 'aspirated': 'minus', + }, + 'I': { + 'place': 'vowel', + 'manner': 'vowel2', + 'syllabic': 'plus', + 'voice': 'plus', + 'nasal': 'minus', + 'retroflex': 'minus', + 'lateral': 'minus', + 'high': 'high', + 'back': 'front', + 'round': 'minus', + 'long': 'plus', + 'aspirated': 'minus', + }, +} + +# === Algorithm === + + +def align(str1, str2, epsilon=0): + """ + Compute the alignment of two phonetic strings. + + :type str1, str2: str + :param str1, str2: Two strings to be aligned + :type epsilon: float (0.0 to 1.0) + :param epsilon: Adjusts threshold similarity score for near-optimal alignments + + :rtpye: list(list(tuple(str, str))) + :return: Alignment(s) of str1 and str2 + + (Kondrak 2002: 51) + """ + if np is None: + raise ImportError('You need numpy in order to use the align function') + + assert 0.0 <= epsilon <= 1.0, "Epsilon must be between 0.0 and 1.0." + m = len(str1) + n = len(str2) + # This includes Kondrak's initialization of row 0 and column 0 to all 0s. + S = np.zeros((m + 1, n + 1), dtype=float) + + # If i <= 1 or j <= 1, don't allow expansions as it doesn't make sense, + # and breaks array and string indices. Make sure they never get chosen + # by setting them to -inf. + for i in range(1, m + 1): + for j in range(1, n + 1): + edit1 = S[i - 1, j] + sigma_skip(str1[i - 1]) + edit2 = S[i, j - 1] + sigma_skip(str2[j - 1]) + edit3 = S[i - 1, j - 1] + sigma_sub(str1[i - 1], str2[j - 1]) + if i > 1: + edit4 = S[i - 2, j - 1] + sigma_exp(str2[j - 1], str1[i - 2 : i]) + else: + edit4 = -inf + if j > 1: + edit5 = S[i - 1, j - 2] + sigma_exp(str1[i - 1], str2[j - 2 : j]) + else: + edit5 = -inf + S[i, j] = max(edit1, edit2, edit3, edit4, edit5, 0) + + T = (1 - epsilon) * np.amax(S) # Threshold score for near-optimal alignments + + alignments = [] + for i in range(1, m + 1): + for j in range(1, n + 1): + if S[i, j] >= T: + alignments.append(_retrieve(i, j, 0, S, T, str1, str2, [])) + return alignments + + +def _retrieve(i, j, s, S, T, str1, str2, out): + """ + Retrieve the path through the similarity matrix S starting at (i, j). + + :rtype: list(tuple(str, str)) + :return: Alignment of str1 and str2 + """ + if S[i, j] == 0: + return out + else: + if j > 1 and S[i - 1, j - 2] + sigma_exp(str1[i - 1], str2[j - 2 : j]) + s >= T: + out.insert(0, (str1[i - 1], str2[j - 2 : j])) + _retrieve( + i - 1, + j - 2, + s + sigma_exp(str1[i - 1], str2[j - 2 : j]), + S, + T, + str1, + str2, + out, + ) + elif ( + i > 1 and S[i - 2, j - 1] + sigma_exp(str2[j - 1], str1[i - 2 : i]) + s >= T + ): + out.insert(0, (str1[i - 2 : i], str2[j - 1])) + _retrieve( + i - 2, + j - 1, + s + sigma_exp(str2[j - 1], str1[i - 2 : i]), + S, + T, + str1, + str2, + out, + ) + elif S[i, j - 1] + sigma_skip(str2[j - 1]) + s >= T: + out.insert(0, ('-', str2[j - 1])) + _retrieve(i, j - 1, s + sigma_skip(str2[j - 1]), S, T, str1, str2, out) + elif S[i - 1, j] + sigma_skip(str1[i - 1]) + s >= T: + out.insert(0, (str1[i - 1], '-')) + _retrieve(i - 1, j, s + sigma_skip(str1[i - 1]), S, T, str1, str2, out) + elif S[i - 1, j - 1] + sigma_sub(str1[i - 1], str2[j - 1]) + s >= T: + out.insert(0, (str1[i - 1], str2[j - 1])) + _retrieve( + i - 1, + j - 1, + s + sigma_sub(str1[i - 1], str2[j - 1]), + S, + T, + str1, + str2, + out, + ) + return out + + +def sigma_skip(p): + """ + Returns score of an indel of P. + + (Kondrak 2002: 54) + """ + return C_skip + + +def sigma_sub(p, q): + """ + Returns score of a substitution of P with Q. + + (Kondrak 2002: 54) + """ + return C_sub - delta(p, q) - V(p) - V(q) + + +def sigma_exp(p, q): + """ + Returns score of an expansion/compression. + + (Kondrak 2002: 54) + """ + q1 = q[0] + q2 = q[1] + return C_exp - delta(p, q1) - delta(p, q2) - V(p) - max(V(q1), V(q2)) + + +def delta(p, q): + """ + Return weighted sum of difference between P and Q. + + (Kondrak 2002: 54) + """ + features = R(p, q) + total = 0 + for f in features: + total += diff(p, q, f) * salience[f] + return total + + +def diff(p, q, f): + """ + Returns difference between phonetic segments P and Q for feature F. + + (Kondrak 2002: 52, 54) + """ + p_features, q_features = feature_matrix[p], feature_matrix[q] + return abs(similarity_matrix[p_features[f]] - similarity_matrix[q_features[f]]) + + +def R(p, q): + """ + Return relevant features for segment comparsion. + + (Kondrak 2002: 54) + """ + if p in consonants or q in consonants: + return R_c + return R_v + + +def V(p): + """ + Return vowel weight if P is vowel. + + (Kondrak 2002: 54) + """ + if p in consonants: + return 0 + return C_vwl + + +# === Test === + + +def demo(): + """ + A demonstration of the result of aligning phonetic sequences + used in Kondrak's (2002) dissertation. + """ + data = [pair.split(',') for pair in cognate_data.split('\n')] + for pair in data: + alignment = align(pair[0], pair[1])[0] + alignment = ['({}, {})'.format(a[0], a[1]) for a in alignment] + alignment = ' '.join(alignment) + print('{} ~ {} : {}'.format(pair[0], pair[1], alignment)) + + +cognate_data = """jo,ʒə +tu,ty +nosotros,nu +kjen,ki +ke,kwa +todos,tu +una,ən +dos,dø +tres,trwa +ombre,om +arbol,arbrə +pluma,plym +kabeθa,kap +boka,buʃ +pje,pje +koraθon,kœr +ber,vwar +benir,vənir +deθir,dir +pobre,povrə +ðis,dIzes +ðæt,das +wat,vas +nat,nixt +loŋ,laŋ +mæn,man +fleʃ,flajʃ +bləd,blyt +feðər,fEdər +hær,hAr +ir,Or +aj,awgə +nowz,nAzə +mawθ,munt +təŋ,tsuŋə +fut,fys +nij,knI +hænd,hant +hart,herts +livər,lEbər +ænd,ante +æt,ad +blow,flAre +ir,awris +ijt,edere +fiʃ,piʃkis +flow,fluere +staɾ,stella +ful,plenus +græs,gramen +hart,kordis +horn,korny +aj,ego +nij,genU +məðər,mAter +mawntən,mons +nejm,nomen +njuw,nowus +wən,unus +rawnd,rotundus +sow,suere +sit,sedere +θrij,tres +tuwθ,dentis +θin,tenwis +kinwawa,kenuaʔ +nina,nenah +napewa,napɛw +wapimini,wapemen +namesa,namɛʔs +okimawa,okemaw +ʃiʃipa,seʔsep +ahkohkwa,ahkɛh +pematesiweni,pematesewen +asenja,aʔsɛn""" + +if __name__ == '__main__': + demo() diff --git a/venv.bak/lib/python3.7/site-packages/nltk/metrics/association.py b/venv.bak/lib/python3.7/site-packages/nltk/metrics/association.py new file mode 100644 index 0000000..4994f1f --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/metrics/association.py @@ -0,0 +1,465 @@ +# Natural Language Toolkit: Ngram Association Measures +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Joel Nothman +# URL: +# For license information, see LICENSE.TXT + +""" +Provides scoring functions for a number of association measures through a +generic, abstract implementation in ``NgramAssocMeasures``, and n-specific +``BigramAssocMeasures`` and ``TrigramAssocMeasures``. +""" + +from __future__ import division + +import math as _math +from abc import ABCMeta, abstractmethod +from functools import reduce + +from six import add_metaclass + +_log2 = lambda x: _math.log(x, 2.0) +_ln = _math.log + +_product = lambda s: reduce(lambda x, y: x * y, s) + +_SMALL = 1e-20 + +try: + from scipy.stats import fisher_exact +except ImportError: + + def fisher_exact(*_args, **_kwargs): + raise NotImplementedError + + +### Indices to marginals arguments: + +NGRAM = 0 +"""Marginals index for the ngram count""" + +UNIGRAMS = -2 +"""Marginals index for a tuple of each unigram count""" + +TOTAL = -1 +"""Marginals index for the number of words in the data""" + + +@add_metaclass(ABCMeta) +class NgramAssocMeasures(object): + """ + An abstract class defining a collection of generic association measures. + Each public method returns a score, taking the following arguments:: + + score_fn(count_of_ngram, + (count_of_n-1gram_1, ..., count_of_n-1gram_j), + (count_of_n-2gram_1, ..., count_of_n-2gram_k), + ..., + (count_of_1gram_1, ..., count_of_1gram_n), + count_of_total_words) + + See ``BigramAssocMeasures`` and ``TrigramAssocMeasures`` + + Inheriting classes should define a property _n, and a method _contingency + which calculates contingency values from marginals in order for all + association measures defined here to be usable. + """ + + _n = 0 + + @staticmethod + @abstractmethod + def _contingency(*marginals): + """Calculates values of a contingency table from marginal values.""" + raise NotImplementedError( + "The contingency table is not available" "in the general ngram case" + ) + + @staticmethod + @abstractmethod + def _marginals(*contingency): + """Calculates values of contingency table marginals from its values.""" + raise NotImplementedError( + "The contingency table is not available" "in the general ngram case" + ) + + @classmethod + def _expected_values(cls, cont): + """Calculates expected values for a contingency table.""" + n_all = sum(cont) + bits = [1 << i for i in range(cls._n)] + + # For each contingency table cell + for i in range(len(cont)): + # Yield the expected value + yield ( + _product( + sum(cont[x] for x in range(2 ** cls._n) if (x & j) == (i & j)) + for j in bits + ) + / (n_all ** (cls._n - 1)) + ) + + @staticmethod + def raw_freq(*marginals): + """Scores ngrams by their frequency""" + return marginals[NGRAM] / marginals[TOTAL] + + @classmethod + def student_t(cls, *marginals): + """Scores ngrams using Student's t test with independence hypothesis + for unigrams, as in Manning and Schutze 5.3.1. + """ + return ( + marginals[NGRAM] + - _product(marginals[UNIGRAMS]) / (marginals[TOTAL] ** (cls._n - 1)) + ) / (marginals[NGRAM] + _SMALL) ** 0.5 + + @classmethod + def chi_sq(cls, *marginals): + """Scores ngrams using Pearson's chi-square as in Manning and Schutze + 5.3.3. + """ + cont = cls._contingency(*marginals) + exps = cls._expected_values(cont) + return sum((obs - exp) ** 2 / (exp + _SMALL) for obs, exp in zip(cont, exps)) + + @staticmethod + def mi_like(*marginals, **kwargs): + """Scores ngrams using a variant of mutual information. The keyword + argument power sets an exponent (default 3) for the numerator. No + logarithm of the result is calculated. + """ + return marginals[NGRAM] ** kwargs.get('power', 3) / _product( + marginals[UNIGRAMS] + ) + + @classmethod + def pmi(cls, *marginals): + """Scores ngrams by pointwise mutual information, as in Manning and + Schutze 5.4. + """ + return _log2(marginals[NGRAM] * marginals[TOTAL] ** (cls._n - 1)) - _log2( + _product(marginals[UNIGRAMS]) + ) + + @classmethod + def likelihood_ratio(cls, *marginals): + """Scores ngrams using likelihood ratios as in Manning and Schutze 5.3.4. + """ + cont = cls._contingency(*marginals) + return cls._n * sum( + obs * _ln(obs / (exp + _SMALL) + _SMALL) + for obs, exp in zip(cont, cls._expected_values(cont)) + ) + + @classmethod + def poisson_stirling(cls, *marginals): + """Scores ngrams using the Poisson-Stirling measure.""" + exp = _product(marginals[UNIGRAMS]) / (marginals[TOTAL] ** (cls._n - 1)) + return marginals[NGRAM] * (_log2(marginals[NGRAM] / exp) - 1) + + @classmethod + def jaccard(cls, *marginals): + """Scores ngrams using the Jaccard index.""" + cont = cls._contingency(*marginals) + return cont[0] / sum(cont[:-1]) + + +class BigramAssocMeasures(NgramAssocMeasures): + """ + A collection of bigram association measures. Each association measure + is provided as a function with three arguments:: + + bigram_score_fn(n_ii, (n_ix, n_xi), n_xx) + + The arguments constitute the marginals of a contingency table, counting + the occurrences of particular events in a corpus. The letter i in the + suffix refers to the appearance of the word in question, while x indicates + the appearance of any word. Thus, for example: + + n_ii counts (w1, w2), i.e. the bigram being scored + n_ix counts (w1, *) + n_xi counts (*, w2) + n_xx counts (*, *), i.e. any bigram + + This may be shown with respect to a contingency table:: + + w1 ~w1 + ------ ------ + w2 | n_ii | n_oi | = n_xi + ------ ------ + ~w2 | n_io | n_oo | + ------ ------ + = n_ix TOTAL = n_xx + """ + + _n = 2 + + @staticmethod + def _contingency(n_ii, n_ix_xi_tuple, n_xx): + """Calculates values of a bigram contingency table from marginal values.""" + (n_ix, n_xi) = n_ix_xi_tuple + n_oi = n_xi - n_ii + n_io = n_ix - n_ii + return (n_ii, n_oi, n_io, n_xx - n_ii - n_oi - n_io) + + @staticmethod + def _marginals(n_ii, n_oi, n_io, n_oo): + """Calculates values of contingency table marginals from its values.""" + return (n_ii, (n_oi + n_ii, n_io + n_ii), n_oo + n_oi + n_io + n_ii) + + @staticmethod + def _expected_values(cont): + """Calculates expected values for a contingency table.""" + n_xx = sum(cont) + # For each contingency table cell + for i in range(4): + yield (cont[i] + cont[i ^ 1]) * (cont[i] + cont[i ^ 2]) / n_xx + + @classmethod + def phi_sq(cls, *marginals): + """Scores bigrams using phi-square, the square of the Pearson correlation + coefficient. + """ + n_ii, n_io, n_oi, n_oo = cls._contingency(*marginals) + + return (n_ii * n_oo - n_io * n_oi) ** 2 / ( + (n_ii + n_io) * (n_ii + n_oi) * (n_io + n_oo) * (n_oi + n_oo) + ) + + @classmethod + def chi_sq(cls, n_ii, n_ix_xi_tuple, n_xx): + """Scores bigrams using chi-square, i.e. phi-sq multiplied by the number + of bigrams, as in Manning and Schutze 5.3.3. + """ + (n_ix, n_xi) = n_ix_xi_tuple + return n_xx * cls.phi_sq(n_ii, (n_ix, n_xi), n_xx) + + @classmethod + def fisher(cls, *marginals): + """Scores bigrams using Fisher's Exact Test (Pedersen 1996). Less + sensitive to small counts than PMI or Chi Sq, but also more expensive + to compute. Requires scipy. + """ + + n_ii, n_io, n_oi, n_oo = cls._contingency(*marginals) + + (odds, pvalue) = fisher_exact([[n_ii, n_io], [n_oi, n_oo]], alternative='less') + return pvalue + + @staticmethod + def dice(n_ii, n_ix_xi_tuple, n_xx): + """Scores bigrams using Dice's coefficient.""" + (n_ix, n_xi) = n_ix_xi_tuple + return 2 * n_ii / (n_ix + n_xi) + + +class TrigramAssocMeasures(NgramAssocMeasures): + """ + A collection of trigram association measures. Each association measure + is provided as a function with four arguments:: + + trigram_score_fn(n_iii, + (n_iix, n_ixi, n_xii), + (n_ixx, n_xix, n_xxi), + n_xxx) + + The arguments constitute the marginals of a contingency table, counting + the occurrences of particular events in a corpus. The letter i in the + suffix refers to the appearance of the word in question, while x indicates + the appearance of any word. Thus, for example: + n_iii counts (w1, w2, w3), i.e. the trigram being scored + n_ixx counts (w1, *, *) + n_xxx counts (*, *, *), i.e. any trigram + """ + + _n = 3 + + @staticmethod + def _contingency(n_iii, n_iix_tuple, n_ixx_tuple, n_xxx): + """Calculates values of a trigram contingency table (or cube) from + marginal values. + >>> TrigramAssocMeasures._contingency(1, (1, 1, 1), (1, 73, 1), 2000) + (1, 0, 0, 0, 0, 72, 0, 1927) + """ + (n_iix, n_ixi, n_xii) = n_iix_tuple + (n_ixx, n_xix, n_xxi) = n_ixx_tuple + n_oii = n_xii - n_iii + n_ioi = n_ixi - n_iii + n_iio = n_iix - n_iii + n_ooi = n_xxi - n_iii - n_oii - n_ioi + n_oio = n_xix - n_iii - n_oii - n_iio + n_ioo = n_ixx - n_iii - n_ioi - n_iio + n_ooo = n_xxx - n_iii - n_oii - n_ioi - n_iio - n_ooi - n_oio - n_ioo + + return (n_iii, n_oii, n_ioi, n_ooi, n_iio, n_oio, n_ioo, n_ooo) + + @staticmethod + def _marginals(*contingency): + """Calculates values of contingency table marginals from its values. + >>> TrigramAssocMeasures._marginals(1, 0, 0, 0, 0, 72, 0, 1927) + (1, (1, 1, 1), (1, 73, 1), 2000) + """ + n_iii, n_oii, n_ioi, n_ooi, n_iio, n_oio, n_ioo, n_ooo = contingency + return ( + n_iii, + (n_iii + n_iio, n_iii + n_ioi, n_iii + n_oii), + ( + n_iii + n_ioi + n_iio + n_ioo, + n_iii + n_oii + n_iio + n_oio, + n_iii + n_oii + n_ioi + n_ooi, + ), + sum(contingency), + ) + + +class QuadgramAssocMeasures(NgramAssocMeasures): + """ + A collection of quadgram association measures. Each association measure + is provided as a function with five arguments:: + + trigram_score_fn(n_iiii, + (n_iiix, n_iixi, n_ixii, n_xiii), + (n_iixx, n_ixix, n_ixxi, n_xixi, n_xxii, n_xiix), + (n_ixxx, n_xixx, n_xxix, n_xxxi), + n_all) + + The arguments constitute the marginals of a contingency table, counting + the occurrences of particular events in a corpus. The letter i in the + suffix refers to the appearance of the word in question, while x indicates + the appearance of any word. Thus, for example: + n_iiii counts (w1, w2, w3, w4), i.e. the quadgram being scored + n_ixxi counts (w1, *, *, w4) + n_xxxx counts (*, *, *, *), i.e. any quadgram + """ + + _n = 4 + + @staticmethod + def _contingency(n_iiii, n_iiix_tuple, n_iixx_tuple, n_ixxx_tuple, n_xxxx): + """Calculates values of a quadgram contingency table from + marginal values. + """ + (n_iiix, n_iixi, n_ixii, n_xiii) = n_iiix_tuple + (n_iixx, n_ixix, n_ixxi, n_xixi, n_xxii, n_xiix) = n_iixx_tuple + (n_ixxx, n_xixx, n_xxix, n_xxxi) = n_ixxx_tuple + n_oiii = n_xiii - n_iiii + n_ioii = n_ixii - n_iiii + n_iioi = n_iixi - n_iiii + n_ooii = n_xxii - n_iiii - n_oiii - n_ioii + n_oioi = n_xixi - n_iiii - n_oiii - n_iioi + n_iooi = n_ixxi - n_iiii - n_ioii - n_iioi + n_oooi = n_xxxi - n_iiii - n_oiii - n_ioii - n_iioi - n_ooii - n_iooi - n_oioi + n_iiio = n_iiix - n_iiii + n_oiio = n_xiix - n_iiii - n_oiii - n_iiio + n_ioio = n_ixix - n_iiii - n_ioii - n_iiio + n_ooio = n_xxix - n_iiii - n_oiii - n_ioii - n_iiio - n_ooii - n_ioio - n_oiio + n_iioo = n_iixx - n_iiii - n_iioi - n_iiio + n_oioo = n_xixx - n_iiii - n_oiii - n_iioi - n_iiio - n_oioi - n_oiio - n_iioo + n_iooo = n_ixxx - n_iiii - n_ioii - n_iioi - n_iiio - n_iooi - n_iioo - n_ioio + n_oooo = ( + n_xxxx + - n_iiii + - n_oiii + - n_ioii + - n_iioi + - n_ooii + - n_oioi + - n_iooi + - n_oooi + - n_iiio + - n_oiio + - n_ioio + - n_ooio + - n_iioo + - n_oioo + - n_iooo + ) + + return ( + n_iiii, + n_oiii, + n_ioii, + n_ooii, + n_iioi, + n_oioi, + n_iooi, + n_oooi, + n_iiio, + n_oiio, + n_ioio, + n_ooio, + n_iioo, + n_oioo, + n_iooo, + n_oooo, + ) + + @staticmethod + def _marginals(*contingency): + """Calculates values of contingency table marginals from its values. + QuadgramAssocMeasures._marginals(1, 0, 2, 46, 552, 825, 2577, 34967, 1, 0, 2, 48, 7250, 9031, 28585, 356653) + (1, (2, 553, 3, 1), (7804, 6, 3132, 1378, 49, 2), (38970, 17660, 100, 38970), 440540) + """ + n_iiii, n_oiii, n_ioii, n_ooii, n_iioi, n_oioi, n_iooi, n_oooi, n_iiio, n_oiio, n_ioio, n_ooio, n_iioo, n_oioo, n_iooo, n_oooo = ( + contingency + ) + + n_iiix = n_iiii + n_iiio + n_iixi = n_iiii + n_iioi + n_ixii = n_iiii + n_ioii + n_xiii = n_iiii + n_oiii + + n_iixx = n_iiii + n_iioi + n_iiio + n_iioo + n_ixix = n_iiii + n_ioii + n_iiio + n_ioio + n_ixxi = n_iiii + n_ioii + n_iioi + n_iooi + n_xixi = n_iiii + n_oiii + n_iioi + n_oioi + n_xxii = n_iiii + n_oiii + n_ioii + n_ooii + n_xiix = n_iiii + n_oiii + n_iiio + n_oiio + + n_ixxx = n_iiii + n_ioii + n_iioi + n_iiio + n_iooi + n_iioo + n_ioio + n_iooo + n_xixx = n_iiii + n_oiii + n_iioi + n_iiio + n_oioi + n_oiio + n_iioo + n_oioo + n_xxix = n_iiii + n_oiii + n_ioii + n_iiio + n_ooii + n_ioio + n_oiio + n_ooio + n_xxxi = n_iiii + n_oiii + n_ioii + n_iioi + n_ooii + n_iooi + n_oioi + n_oooi + + n_all = sum(contingency) + + return ( + n_iiii, + (n_iiix, n_iixi, n_ixii, n_xiii), + (n_iixx, n_ixix, n_ixxi, n_xixi, n_xxii, n_xiix), + (n_ixxx, n_xixx, n_xxix, n_xxxi), + n_all, + ) + + +class ContingencyMeasures(object): + """Wraps NgramAssocMeasures classes such that the arguments of association + measures are contingency table values rather than marginals. + """ + + def __init__(self, measures): + """Constructs a ContingencyMeasures given a NgramAssocMeasures class""" + self.__class__.__name__ = 'Contingency' + measures.__class__.__name__ + for k in dir(measures): + if k.startswith('__'): + continue + v = getattr(measures, k) + if not k.startswith('_'): + v = self._make_contingency_fn(measures, v) + setattr(self, k, v) + + @staticmethod + def _make_contingency_fn(measures, old_fn): + """From an association measure function, produces a new function which + accepts contingency table values as its arguments. + """ + + def res(*contingency): + return old_fn(*measures._marginals(*contingency)) + + res.__doc__ = old_fn.__doc__ + res.__name__ = old_fn.__name__ + return res diff --git a/venv.bak/lib/python3.7/site-packages/nltk/metrics/confusionmatrix.py b/venv.bak/lib/python3.7/site-packages/nltk/metrics/confusionmatrix.py new file mode 100644 index 0000000..3f82f29 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/metrics/confusionmatrix.py @@ -0,0 +1,218 @@ +# Natural Language Toolkit: Confusion Matrices +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Edward Loper +# Steven Bird +# URL: +# For license information, see LICENSE.TXT +from __future__ import print_function, unicode_literals +from nltk.probability import FreqDist +from nltk.compat import python_2_unicode_compatible + + +@python_2_unicode_compatible +class ConfusionMatrix(object): + """ + The confusion matrix between a list of reference values and a + corresponding list of test values. Entry *[r,t]* of this + matrix is a count of the number of times that the reference value + *r* corresponds to the test value *t*. E.g.: + + >>> from nltk.metrics import ConfusionMatrix + >>> ref = 'DET NN VB DET JJ NN NN IN DET NN'.split() + >>> test = 'DET VB VB DET NN NN NN IN DET NN'.split() + >>> cm = ConfusionMatrix(ref, test) + >>> print(cm['NN', 'NN']) + 3 + + Note that the diagonal entries *Ri=Tj* of this matrix + corresponds to correct values; and the off-diagonal entries + correspond to incorrect values. + """ + + def __init__(self, reference, test, sort_by_count=False): + """ + Construct a new confusion matrix from a list of reference + values and a corresponding list of test values. + + :type reference: list + :param reference: An ordered list of reference values. + :type test: list + :param test: A list of values to compare against the + corresponding reference values. + :raise ValueError: If ``reference`` and ``length`` do not have + the same length. + """ + if len(reference) != len(test): + raise ValueError('Lists must have the same length.') + + # Get a list of all values. + if sort_by_count: + ref_fdist = FreqDist(reference) + test_fdist = FreqDist(test) + + def key(v): + return -(ref_fdist[v] + test_fdist[v]) + + values = sorted(set(reference + test), key=key) + else: + values = sorted(set(reference + test)) + + # Construct a value->index dictionary + indices = dict((val, i) for (i, val) in enumerate(values)) + + # Make a confusion matrix table. + confusion = [[0 for val in values] for val in values] + max_conf = 0 # Maximum confusion + for w, g in zip(reference, test): + confusion[indices[w]][indices[g]] += 1 + max_conf = max(max_conf, confusion[indices[w]][indices[g]]) + + #: A list of all values in ``reference`` or ``test``. + self._values = values + #: A dictionary mapping values in ``self._values`` to their indices. + self._indices = indices + #: The confusion matrix itself (as a list of lists of counts). + self._confusion = confusion + #: The greatest count in ``self._confusion`` (used for printing). + self._max_conf = max_conf + #: The total number of values in the confusion matrix. + self._total = len(reference) + #: The number of correct (on-diagonal) values in the matrix. + self._correct = sum(confusion[i][i] for i in range(len(values))) + + def __getitem__(self, li_lj_tuple): + """ + :return: The number of times that value ``li`` was expected and + value ``lj`` was given. + :rtype: int + """ + (li, lj) = li_lj_tuple + i = self._indices[li] + j = self._indices[lj] + return self._confusion[i][j] + + def __repr__(self): + return '' % (self._correct, self._total) + + def __str__(self): + return self.pretty_format() + + def pretty_format( + self, + show_percents=False, + values_in_chart=True, + truncate=None, + sort_by_count=False, + ): + """ + :return: A multi-line string representation of this confusion matrix. + :type truncate: int + :param truncate: If specified, then only show the specified + number of values. Any sorting (e.g., sort_by_count) + will be performed before truncation. + :param sort_by_count: If true, then sort by the count of each + label in the reference data. I.e., labels that occur more + frequently in the reference label will be towards the left + edge of the matrix, and labels that occur less frequently + will be towards the right edge. + + @todo: add marginals? + """ + confusion = self._confusion + + values = self._values + if sort_by_count: + values = sorted( + values, key=lambda v: -sum(self._confusion[self._indices[v]]) + ) + + if truncate: + values = values[:truncate] + + if values_in_chart: + value_strings = ["%s" % val for val in values] + else: + value_strings = [str(n + 1) for n in range(len(values))] + + # Construct a format string for row values + valuelen = max(len(val) for val in value_strings) + value_format = '%' + repr(valuelen) + 's | ' + # Construct a format string for matrix entries + if show_percents: + entrylen = 6 + entry_format = '%5.1f%%' + zerostr = ' .' + else: + entrylen = len(repr(self._max_conf)) + entry_format = '%' + repr(entrylen) + 'd' + zerostr = ' ' * (entrylen - 1) + '.' + + # Write the column values. + s = '' + for i in range(valuelen): + s += (' ' * valuelen) + ' |' + for val in value_strings: + if i >= valuelen - len(val): + s += val[i - valuelen + len(val)].rjust(entrylen + 1) + else: + s += ' ' * (entrylen + 1) + s += ' |\n' + + # Write a dividing line + s += '%s-+-%s+\n' % ('-' * valuelen, '-' * ((entrylen + 1) * len(values))) + + # Write the entries. + for val, li in zip(value_strings, values): + i = self._indices[li] + s += value_format % val + for lj in values: + j = self._indices[lj] + if confusion[i][j] == 0: + s += zerostr + elif show_percents: + s += entry_format % (100.0 * confusion[i][j] / self._total) + else: + s += entry_format % confusion[i][j] + if i == j: + prevspace = s.rfind(' ') + s = s[:prevspace] + '<' + s[prevspace + 1 :] + '>' + else: + s += ' ' + s += '|\n' + + # Write a dividing line + s += '%s-+-%s+\n' % ('-' * valuelen, '-' * ((entrylen + 1) * len(values))) + + # Write a key + s += '(row = reference; col = test)\n' + if not values_in_chart: + s += 'Value key:\n' + for i, value in enumerate(values): + s += '%6d: %s\n' % (i + 1, value) + + return s + + def key(self): + values = self._values + str = 'Value key:\n' + indexlen = len(repr(len(values) - 1)) + key_format = ' %' + repr(indexlen) + 'd: %s\n' + for i in range(len(values)): + str += key_format % (i, values[i]) + + return str + + +def demo(): + reference = 'DET NN VB DET JJ NN NN IN DET NN'.split() + test = 'DET VB VB DET NN NN NN IN DET NN'.split() + print('Reference =', reference) + print('Test =', test) + print('Confusion matrix:') + print(ConfusionMatrix(reference, test)) + print(ConfusionMatrix(reference, test).pretty_format(sort_by_count=True)) + + +if __name__ == '__main__': + demo() diff --git a/venv.bak/lib/python3.7/site-packages/nltk/metrics/distance.py b/venv.bak/lib/python3.7/site-packages/nltk/metrics/distance.py new file mode 100644 index 0000000..3817d90 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/metrics/distance.py @@ -0,0 +1,487 @@ +# -*- coding: utf-8 -*- +# Natural Language Toolkit: Distance Metrics +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Edward Loper +# Steven Bird +# Tom Lippincott +# URL: +# For license information, see LICENSE.TXT +# + +""" +Distance Metrics. + +Compute the distance between two items (usually strings). +As metrics, they must satisfy the following three requirements: + +1. d(a, a) = 0 +2. d(a, b) >= 0 +3. d(a, c) <= d(a, b) + d(b, c) +""" + +from __future__ import print_function +from __future__ import division + +import warnings +import operator + +def _edit_dist_init(len1, len2): + lev = [] + for i in range(len1): + lev.append([0] * len2) # initialize 2D array to zero + for i in range(len1): + lev[i][0] = i # column 0: 0,1,2,3,4,... + for j in range(len2): + lev[0][j] = j # row 0: 0,1,2,3,4,... + return lev + + +def _edit_dist_step(lev, i, j, s1, s2, substitution_cost=1, transpositions=False): + c1 = s1[i - 1] + c2 = s2[j - 1] + + # skipping a character in s1 + a = lev[i - 1][j] + 1 + # skipping a character in s2 + b = lev[i][j - 1] + 1 + # substitution + c = lev[i - 1][j - 1] + (substitution_cost if c1 != c2 else 0) + + # transposition + d = c + 1 # never picked by default + if transpositions and i > 1 and j > 1: + if s1[i - 2] == c2 and s2[j - 2] == c1: + d = lev[i - 2][j - 2] + 1 + + # pick the cheapest + lev[i][j] = min(a, b, c, d) + + +def edit_distance(s1, s2, substitution_cost=1, transpositions=False): + """ + Calculate the Levenshtein edit-distance between two strings. + The edit distance is the number of characters that need to be + substituted, inserted, or deleted, to transform s1 into s2. For + example, transforming "rain" to "shine" requires three steps, + consisting of two substitutions and one insertion: + "rain" -> "sain" -> "shin" -> "shine". These operations could have + been done in other orders, but at least three steps are needed. + + Allows specifying the cost of substitution edits (e.g., "a" -> "b"), + because sometimes it makes sense to assign greater penalties to + substitutions. + + This also optionally allows transposition edits (e.g., "ab" -> "ba"), + though this is disabled by default. + + :param s1, s2: The strings to be analysed + :param transpositions: Whether to allow transposition edits + :type s1: str + :type s2: str + :type substitution_cost: int + :type transpositions: bool + :rtype int + """ + # set up a 2-D array + len1 = len(s1) + len2 = len(s2) + lev = _edit_dist_init(len1 + 1, len2 + 1) + + # iterate over the array + for i in range(len1): + for j in range(len2): + _edit_dist_step( + lev, + i + 1, + j + 1, + s1, + s2, + substitution_cost=substitution_cost, + transpositions=transpositions, + ) + return lev[len1][len2] + + +def _edit_dist_backtrace(lev): + i, j = len(lev) - 1, len(lev[0]) - 1 + alignment = [(i, j)] + + while (i, j) != (0, 0): + directions = [ + (i - 1, j), # skip s1 + (i, j - 1), # skip s2 + (i - 1, j - 1), # substitution + ] + + direction_costs = ( + (lev[i][j] if (i >= 0 and j >= 0) else float('inf'), (i, j)) for i, j in directions + ) + _, (i, j) = min(direction_costs, key=operator.itemgetter(0)) + + alignment.append((i, j)) + return list(reversed(alignment)) + + +def edit_distance_align(s1, s2, substitution_cost=1): + """ + Calculate the minimum Levenshtein edit-distance based alignment + mapping between two strings. The alignment finds the mapping + from string s1 to s2 that minimizes the edit distance cost. + For example, mapping "rain" to "shine" would involve 2 + substitutions, 2 matches and an insertion resulting in + the following mapping: + [(0, 0), (1, 1), (2, 2), (3, 3), (4, 4), (4, 5)] + NB: (0, 0) is the start state without any letters associated + See more: https://web.stanford.edu/class/cs124/lec/med.pdf + + In case of multiple valid minimum-distance alignments, the + backtrace has the following operation precedence: + 1. Skip s1 character + 2. Skip s2 character + 3. Substitute s1 and s2 characters + The backtrace is carried out in reverse string order. + + This function does not support transposition. + + :param s1, s2: The strings to be aligned + :type s1: str + :type s2: str + :type substitution_cost: int + :rtype List[Tuple(int, int)] + """ + # set up a 2-D array + len1 = len(s1) + len2 = len(s2) + lev = _edit_dist_init(len1 + 1, len2 + 1) + + # iterate over the array + for i in range(len1): + for j in range(len2): + _edit_dist_step( + lev, + i + 1, + j + 1, + s1, + s2, + substitution_cost=substitution_cost, + transpositions=False, + ) + + # backtrace to find alignment + alignment = _edit_dist_backtrace(lev) + return alignment + + +def binary_distance(label1, label2): + """Simple equality test. + + 0.0 if the labels are identical, 1.0 if they are different. + + >>> from nltk.metrics import binary_distance + >>> binary_distance(1,1) + 0.0 + + >>> binary_distance(1,3) + 1.0 + """ + + return 0.0 if label1 == label2 else 1.0 + + +def jaccard_distance(label1, label2): + """Distance metric comparing set-similarity. + + """ + return (len(label1.union(label2)) - len(label1.intersection(label2))) / len( + label1.union(label2) + ) + + +def masi_distance(label1, label2): + """Distance metric that takes into account partial agreement when multiple + labels are assigned. + + >>> from nltk.metrics import masi_distance + >>> masi_distance(set([1, 2]), set([1, 2, 3, 4])) + 0.665 + + Passonneau 2006, Measuring Agreement on Set-Valued Items (MASI) + for Semantic and Pragmatic Annotation. + """ + + len_intersection = len(label1.intersection(label2)) + len_union = len(label1.union(label2)) + len_label1 = len(label1) + len_label2 = len(label2) + if len_label1 == len_label2 and len_label1 == len_intersection: + m = 1 + elif len_intersection == min(len_label1, len_label2): + m = 0.67 + elif len_intersection > 0: + m = 0.33 + else: + m = 0 + + return 1 - len_intersection / len_union * m + + +def interval_distance(label1, label2): + """Krippendorff's interval distance metric + + >>> from nltk.metrics import interval_distance + >>> interval_distance(1,10) + 81 + + Krippendorff 1980, Content Analysis: An Introduction to its Methodology + """ + + try: + return pow(label1 - label2, 2) + # return pow(list(label1)[0]-list(label2)[0],2) + except: + print("non-numeric labels not supported with interval distance") + + +def presence(label): + """Higher-order function to test presence of a given label + """ + + return lambda x, y: 1.0 * ((label in x) == (label in y)) + + +def fractional_presence(label): + return ( + lambda x, y: abs(((1.0 / len(x)) - (1.0 / len(y)))) + * (label in x and label in y) + or 0.0 * (label not in x and label not in y) + or abs((1.0 / len(x))) * (label in x and label not in y) + or ((1.0 / len(y))) * (label not in x and label in y) + ) + + +def custom_distance(file): + data = {} + with open(file, 'r') as infile: + for l in infile: + labelA, labelB, dist = l.strip().split("\t") + labelA = frozenset([labelA]) + labelB = frozenset([labelB]) + data[frozenset([labelA, labelB])] = float(dist) + return lambda x, y: data[frozenset([x, y])] + + +def jaro_similarity(s1, s2): + """ + Computes the Jaro similarity between 2 sequences from: + + Matthew A. Jaro (1989). Advances in record linkage methodology + as applied to the 1985 census of Tampa Florida. Journal of the + American Statistical Association. 84 (406): 414-20. + + The Jaro distance between is the min no. of single-character transpositions + required to change one word into another. The Jaro similarity formula from + https://en.wikipedia.org/wiki/Jaro%E2%80%93Winkler_distance : + + jaro_sim = 0 if m = 0 else 1/3 * (m/|s_1| + m/s_2 + (m-t)/m) + + where: + - |s_i| is the length of string s_i + - m is the no. of matching characters + - t is the half no. of possible transpositions. + + """ + # First, store the length of the strings + # because they will be re-used several times. + len_s1, len_s2 = len(s1), len(s2) + + # The upper bound of the distance for being a matched character. + match_bound = max(len_s1, len_s2) // 2 - 1 + + # Initialize the counts for matches and transpositions. + matches = 0 # no.of matched characters in s1 and s2 + transpositions = 0 # no. of transpositions between s1 and s2 + flagged_1 = [] # positions in s1 which are matches to some character in s2 + flagged_2 = [] # positions in s2 which are matches to some character in s1 + + # Iterate through sequences, check for matches and compute transpositions. + for i in range(len_s1): # Iterate through each character. + upperbound = min(i + match_bound, len_s2 - 1) + lowerbound = max(0, i - match_bound) + for j in range(lowerbound, upperbound + 1): + if s1[i] == s2[j] and j not in flagged_2: + matches += 1 + flagged_1.append(i) + flagged_2.append(j) + break + flagged_2.sort() + for i, j in zip(flagged_1, flagged_2): + if s1[i] != s2[j]: + transpositions += 1 + + if matches == 0: + return 0 + else: + return ( + 1 + / 3 + * ( + matches / len_s1 + + matches / len_s2 + + (matches - transpositions // 2) / matches + ) + ) + + +def jaro_winkler_similarity(s1, s2, p=0.1, max_l=4): + """ + The Jaro Winkler distance is an extension of the Jaro similarity in: + + William E. Winkler. 1990. String Comparator Metrics and Enhanced + Decision Rules in the Fellegi-Sunter Model of Record Linkage. + Proceedings of the Section on Survey Research Methods. + American Statistical Association: 354-359. + such that: + + jaro_winkler_sim = jaro_sim + ( l * p * (1 - jaro_sim) ) + + where, + + - jaro_sim is the output from the Jaro Similarity, + see jaro_similarity() + - l is the length of common prefix at the start of the string + - this implementation provides an upperbound for the l value + to keep the prefixes.A common value of this upperbound is 4. + - p is the constant scaling factor to overweigh common prefixes. + The Jaro-Winkler similarity will fall within the [0, 1] bound, + given that max(p)<=0.25 , default is p=0.1 in Winkler (1990) + + + Test using outputs from https://www.census.gov/srd/papers/pdf/rr93-8.pdf + from "Table 5 Comparison of String Comparators Rescaled between 0 and 1" + + >>> winkler_examples = [("billy", "billy"), ("billy", "bill"), ("billy", "blily"), + ... ("massie", "massey"), ("yvette", "yevett"), ("billy", "bolly"), ("dwayne", "duane"), + ... ("dixon", "dickson"), ("billy", "susan")] + + >>> winkler_scores = [1.000, 0.967, 0.947, 0.944, 0.911, 0.893, 0.858, 0.853, 0.000] + >>> jaro_scores = [1.000, 0.933, 0.933, 0.889, 0.889, 0.867, 0.822, 0.790, 0.000] + + # One way to match the values on the Winkler's paper is to provide a different + # p scaling factor for different pairs of strings, e.g. + >>> p_factors = [0.1, 0.125, 0.20, 0.125, 0.20, 0.20, 0.20, 0.15, 0.1] + + >>> for (s1, s2), jscore, wscore, p in zip(winkler_examples, jaro_scores, winkler_scores, p_factors): + ... assert round(jaro_similarity(s1, s2), 3) == jscore + ... assert round(jaro_winkler_similarity(s1, s2, p=p), 3) == wscore + + + Test using outputs from https://www.census.gov/srd/papers/pdf/rr94-5.pdf from + "Table 2.1. Comparison of String Comparators Using Last Names, First Names, and Street Names" + + >>> winkler_examples = [('SHACKLEFORD', 'SHACKELFORD'), ('DUNNINGHAM', 'CUNNIGHAM'), + ... ('NICHLESON', 'NICHULSON'), ('JONES', 'JOHNSON'), ('MASSEY', 'MASSIE'), + ... ('ABROMS', 'ABRAMS'), ('HARDIN', 'MARTINEZ'), ('ITMAN', 'SMITH'), + ... ('JERALDINE', 'GERALDINE'), ('MARHTA', 'MARTHA'), ('MICHELLE', 'MICHAEL'), + ... ('JULIES', 'JULIUS'), ('TANYA', 'TONYA'), ('DWAYNE', 'DUANE'), ('SEAN', 'SUSAN'), + ... ('JON', 'JOHN'), ('JON', 'JAN'), ('BROOKHAVEN', 'BRROKHAVEN'), + ... ('BROOK HALLOW', 'BROOK HLLW'), ('DECATUR', 'DECATIR'), ('FITZRUREITER', 'FITZENREITER'), + ... ('HIGBEE', 'HIGHEE'), ('HIGBEE', 'HIGVEE'), ('LACURA', 'LOCURA'), ('IOWA', 'IONA'), ('1ST', 'IST')] + + >>> jaro_scores = [0.970, 0.896, 0.926, 0.790, 0.889, 0.889, 0.722, 0.467, 0.926, + ... 0.944, 0.869, 0.889, 0.867, 0.822, 0.783, 0.917, 0.000, 0.933, 0.944, 0.905, + ... 0.856, 0.889, 0.889, 0.889, 0.833, 0.000] + + >>> winkler_scores = [0.982, 0.896, 0.956, 0.832, 0.944, 0.922, 0.722, 0.467, 0.926, + ... 0.961, 0.921, 0.933, 0.880, 0.858, 0.805, 0.933, 0.000, 0.947, 0.967, 0.943, + ... 0.913, 0.922, 0.922, 0.900, 0.867, 0.000] + + # One way to match the values on the Winkler's paper is to provide a different + # p scaling factor for different pairs of strings, e.g. + >>> p_factors = [0.1, 0.1, 0.1, 0.1, 0.125, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.20, + ... 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1] + + + >>> for (s1, s2), jscore, wscore, p in zip(winkler_examples, jaro_scores, winkler_scores, p_factors): + ... if (s1, s2) in [('JON', 'JAN'), ('1ST', 'IST')]: + ... continue # Skip bad examples from the paper. + ... assert round(jaro_similarity(s1, s2), 3) == jscore + ... assert round(jaro_winkler_similarity(s1, s2, p=p), 3) == wscore + + + + This test-case proves that the output of Jaro-Winkler similarity depends on + the product l * p and not on the product max_l * p. Here the product max_l * p > 1 + however the product l * p <= 1 + + >>> round(jaro_winkler_similarity('TANYA', 'TONYA', p=0.1, max_l=100), 3) + 0.88 + + + """ + # To ensure that the output of the Jaro-Winkler's similarity + # falls between [0,1], the product of l * p needs to be + # also fall between [0,1]. + if not 0 <= max_l * p <= 1: + warnings.warn( + str( + "The product `max_l * p` might not fall between [0,1]." + "Jaro-Winkler similarity might not be between 0 and 1." + ) + ) + + # Compute the Jaro similarity + jaro_sim = jaro_similarity(s1, s2) + + # Initialize the upper bound for the no. of prefixes. + # if user did not pre-define the upperbound, + # use shorter length between s1 and s2 + + # Compute the prefix matches. + l = 0 + # zip() will automatically loop until the end of shorter string. + for s1_i, s2_i in zip(s1, s2): + if s1_i == s2_i: + l += 1 + else: + break + if l == max_l: + break + # Return the similarity value as described in docstring. + return jaro_sim + (l * p * (1 - jaro_sim)) + + +def demo(): + string_distance_examples = [ + ("rain", "shine"), + ("abcdef", "acbdef"), + ("language", "lnaguaeg"), + ("language", "lnaugage"), + ("language", "lngauage"), + ] + for s1, s2 in string_distance_examples: + print("Edit distance btwn '%s' and '%s':" % (s1, s2), edit_distance(s1, s2)) + print( + "Edit dist with transpositions btwn '%s' and '%s':" % (s1, s2), + edit_distance(s1, s2, transpositions=True), + ) + print("Jaro similarity btwn '%s' and '%s':" % (s1, s2), jaro_similarity(s1, s2)) + print( + "Jaro-Winkler similarity btwn '%s' and '%s':" % (s1, s2), + jaro_winkler_similarity(s1, s2), + ) + print( + "Jaro-Winkler distance btwn '%s' and '%s':" % (s1, s2), + 1 - jaro_winkler_similarity(s1, s2), + ) + s1 = set([1, 2, 3, 4]) + s2 = set([3, 4, 5]) + print("s1:", s1) + print("s2:", s2) + print("Binary distance:", binary_distance(s1, s2)) + print("Jaccard distance:", jaccard_distance(s1, s2)) + print("MASI distance:", masi_distance(s1, s2)) + + +if __name__ == '__main__': + demo() diff --git a/venv.bak/lib/python3.7/site-packages/nltk/metrics/paice.py b/venv.bak/lib/python3.7/site-packages/nltk/metrics/paice.py new file mode 100644 index 0000000..b26069b --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/metrics/paice.py @@ -0,0 +1,389 @@ +# Natural Language Toolkit: Agreement Metrics +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Lauri Hallila +# URL: +# For license information, see LICENSE.TXT +# + +"""Counts Paice's performance statistics for evaluating stemming algorithms. + +What is required: + - A dictionary of words grouped by their real lemmas + - A dictionary of words grouped by stems from a stemming algorithm + +When these are given, Understemming Index (UI), Overstemming Index (OI), +Stemming Weight (SW) and Error-rate relative to truncation (ERRT) are counted. + +References: +Chris D. Paice (1994). An evaluation method for stemming algorithms. +In Proceedings of SIGIR, 42--50. +""" + +from math import sqrt + + +def get_words_from_dictionary(lemmas): + ''' + Get original set of words used for analysis. + + :param lemmas: A dictionary where keys are lemmas and values are sets + or lists of words corresponding to that lemma. + :type lemmas: dict(str): list(str) + :return: Set of words that exist as values in the dictionary + :rtype: set(str) + ''' + words = set() + for lemma in lemmas: + words.update(set(lemmas[lemma])) + return words + + +def _truncate(words, cutlength): + '''Group words by stems defined by truncating them at given length. + + :param words: Set of words used for analysis + :param cutlength: Words are stemmed by cutting at this length. + :type words: set(str) or list(str) + :type cutlength: int + :return: Dictionary where keys are stems and values are sets of words + corresponding to that stem. + :rtype: dict(str): set(str) + ''' + stems = {} + for word in words: + stem = word[:cutlength] + try: + stems[stem].update([word]) + except KeyError: + stems[stem] = set([word]) + return stems + + +# Reference: http://en.wikipedia.org/wiki/Line-line_intersection +def _count_intersection(l1, l2): + '''Count intersection between two line segments defined by coordinate pairs. + + :param l1: Tuple of two coordinate pairs defining the first line segment + :param l2: Tuple of two coordinate pairs defining the second line segment + :type l1: tuple(float, float) + :type l2: tuple(float, float) + :return: Coordinates of the intersection + :rtype: tuple(float, float) + ''' + x1, y1 = l1[0] + x2, y2 = l1[1] + x3, y3 = l2[0] + x4, y4 = l2[1] + + denominator = (x1 - x2) * (y3 - y4) - (y1 - y2) * (x3 - x4) + + if denominator == 0.0: # lines are parallel + if x1 == x2 == x3 == x4 == 0.0: + # When lines are parallel, they must be on the y-axis. + # We can ignore x-axis because we stop counting the + # truncation line when we get there. + # There are no other options as UI (x-axis) grows and + # OI (y-axis) diminishes when we go along the truncation line. + return (0.0, y4) + + x = ( + (x1 * y2 - y1 * x2) * (x3 - x4) - (x1 - x2) * (x3 * y4 - y3 * x4) + ) / denominator + y = ( + (x1 * y2 - y1 * x2) * (y3 - y4) - (y1 - y2) * (x3 * y4 - y3 * x4) + ) / denominator + return (x, y) + + +def _get_derivative(coordinates): + '''Get derivative of the line from (0,0) to given coordinates. + + :param coordinates: A coordinate pair + :type coordinates: tuple(float, float) + :return: Derivative; inf if x is zero + :rtype: float + ''' + try: + return coordinates[1] / coordinates[0] + except ZeroDivisionError: + return float('inf') + + +def _calculate_cut(lemmawords, stems): + '''Count understemmed and overstemmed pairs for (lemma, stem) pair with common words. + + :param lemmawords: Set or list of words corresponding to certain lemma. + :param stems: A dictionary where keys are stems and values are sets + or lists of words corresponding to that stem. + :type lemmawords: set(str) or list(str) + :type stems: dict(str): set(str) + :return: Amount of understemmed and overstemmed pairs contributed by words + existing in both lemmawords and stems. + :rtype: tuple(float, float) + ''' + umt, wmt = 0.0, 0.0 + for stem in stems: + cut = set(lemmawords) & set(stems[stem]) + if cut: + cutcount = len(cut) + stemcount = len(stems[stem]) + # Unachieved merge total + umt += cutcount * (len(lemmawords) - cutcount) + # Wrongly merged total + wmt += cutcount * (stemcount - cutcount) + return (umt, wmt) + + +def _calculate(lemmas, stems): + '''Calculate actual and maximum possible amounts of understemmed and overstemmed word pairs. + + :param lemmas: A dictionary where keys are lemmas and values are sets + or lists of words corresponding to that lemma. + :param stems: A dictionary where keys are stems and values are sets + or lists of words corresponding to that stem. + :type lemmas: dict(str): list(str) + :type stems: dict(str): set(str) + :return: Global unachieved merge total (gumt), + global desired merge total (gdmt), + global wrongly merged total (gwmt) and + global desired non-merge total (gdnt). + :rtype: tuple(float, float, float, float) + ''' + + n = sum(len(lemmas[word]) for word in lemmas) + + gdmt, gdnt, gumt, gwmt = (0.0, 0.0, 0.0, 0.0) + + for lemma in lemmas: + lemmacount = len(lemmas[lemma]) + + # Desired merge total + gdmt += lemmacount * (lemmacount - 1) + + # Desired non-merge total + gdnt += lemmacount * (n - lemmacount) + + # For each (lemma, stem) pair with common words, count how many + # pairs are understemmed and overstemmed. + umt, wmt = _calculate_cut(lemmas[lemma], stems) + + # Add to total undesired and wrongly-merged totals + gumt += umt + gwmt += wmt + + # Each object is counted twice, so divide by two + return (gumt / 2, gdmt / 2, gwmt / 2, gdnt / 2) + + +def _indexes(gumt, gdmt, gwmt, gdnt): + '''Count Understemming Index (UI), Overstemming Index (OI) and Stemming Weight (SW). + + :param gumt, gdmt, gwmt, gdnt: Global unachieved merge total (gumt), + global desired merge total (gdmt), + global wrongly merged total (gwmt) and + global desired non-merge total (gdnt). + :type gumt, gdmt, gwmt, gdnt: float + :return: Understemming Index (UI), + Overstemming Index (OI) and + Stemming Weight (SW). + :rtype: tuple(float, float, float) + ''' + # Calculate Understemming Index (UI), + # Overstemming Index (OI) and Stemming Weight (SW) + try: + ui = gumt / gdmt + except ZeroDivisionError: + # If GDMT (max merge total) is 0, define UI as 0 + ui = 0.0 + try: + oi = gwmt / gdnt + except ZeroDivisionError: + # IF GDNT (max non-merge total) is 0, define OI as 0 + oi = 0.0 + try: + sw = oi / ui + except ZeroDivisionError: + if oi == 0.0: + # OI and UI are 0, define SW as 'not a number' + sw = float('nan') + else: + # UI is 0, define SW as infinity + sw = float('inf') + return (ui, oi, sw) + + +class Paice(object): + '''Class for storing lemmas, stems and evaluation metrics.''' + + def __init__(self, lemmas, stems): + ''' + :param lemmas: A dictionary where keys are lemmas and values are sets + or lists of words corresponding to that lemma. + :param stems: A dictionary where keys are stems and values are sets + or lists of words corresponding to that stem. + :type lemmas: dict(str): list(str) + :type stems: dict(str): set(str) + ''' + self.lemmas = lemmas + self.stems = stems + self.coords = [] + self.gumt, self.gdmt, self.gwmt, self.gdnt = (None, None, None, None) + self.ui, self.oi, self.sw = (None, None, None) + self.errt = None + self.update() + + def __str__(self): + text = ['Global Unachieved Merge Total (GUMT): %s\n' % self.gumt] + text.append('Global Desired Merge Total (GDMT): %s\n' % self.gdmt) + text.append('Global Wrongly-Merged Total (GWMT): %s\n' % self.gwmt) + text.append('Global Desired Non-merge Total (GDNT): %s\n' % self.gdnt) + text.append('Understemming Index (GUMT / GDMT): %s\n' % self.ui) + text.append('Overstemming Index (GWMT / GDNT): %s\n' % self.oi) + text.append('Stemming Weight (OI / UI): %s\n' % self.sw) + text.append('Error-Rate Relative to Truncation (ERRT): %s\r\n' % self.errt) + coordinates = ' '.join(['(%s, %s)' % item for item in self.coords]) + text.append('Truncation line: %s' % coordinates) + return ''.join(text) + + def _get_truncation_indexes(self, words, cutlength): + '''Count (UI, OI) when stemming is done by truncating words at \'cutlength\'. + + :param words: Words used for the analysis + :param cutlength: Words are stemmed by cutting them at this length + :type words: set(str) or list(str) + :type cutlength: int + :return: Understemming and overstemming indexes + :rtype: tuple(int, int) + ''' + + truncated = _truncate(words, cutlength) + gumt, gdmt, gwmt, gdnt = _calculate(self.lemmas, truncated) + ui, oi = _indexes(gumt, gdmt, gwmt, gdnt)[:2] + return (ui, oi) + + def _get_truncation_coordinates(self, cutlength=0): + '''Count (UI, OI) pairs for truncation points until we find the segment where (ui, oi) crosses the truncation line. + + :param cutlength: Optional parameter to start counting from (ui, oi) + coordinates gotten by stemming at this length. Useful for speeding up + the calculations when you know the approximate location of the + intersection. + :type cutlength: int + :return: List of coordinate pairs that define the truncation line + :rtype: list(tuple(float, float)) + ''' + words = get_words_from_dictionary(self.lemmas) + maxlength = max(len(word) for word in words) + + # Truncate words from different points until (0, 0) - (ui, oi) segment crosses the truncation line + coords = [] + while cutlength <= maxlength: + # Get (UI, OI) pair of current truncation point + pair = self._get_truncation_indexes(words, cutlength) + + # Store only new coordinates so we'll have an actual + # line segment when counting the intersection point + if pair not in coords: + coords.append(pair) + if pair == (0.0, 0.0): + # Stop counting if truncation line goes through origo; + # length from origo to truncation line is 0 + return coords + if len(coords) >= 2 and pair[0] > 0.0: + derivative1 = _get_derivative(coords[-2]) + derivative2 = _get_derivative(coords[-1]) + # Derivative of the truncation line is a decreasing value; + # when it passes Stemming Weight, we've found the segment + # of truncation line intersecting with (0, 0) - (ui, oi) segment + if derivative1 >= self.sw >= derivative2: + return coords + cutlength += 1 + return coords + + def _errt(self): + '''Count Error-Rate Relative to Truncation (ERRT). + + :return: ERRT, length of the line from origo to (UI, OI) divided by + the length of the line from origo to the point defined by the same + line when extended until the truncation line. + :rtype: float + ''' + # Count (UI, OI) pairs for truncation points until we find the segment where (ui, oi) crosses the truncation line + self.coords = self._get_truncation_coordinates() + if (0.0, 0.0) in self.coords: + # Truncation line goes through origo, so ERRT cannot be counted + if (self.ui, self.oi) != (0.0, 0.0): + return float('inf') + else: + return float('nan') + if (self.ui, self.oi) == (0.0, 0.0): + # (ui, oi) is origo; define errt as 0.0 + return 0.0 + # Count the intersection point + # Note that (self.ui, self.oi) cannot be (0.0, 0.0) and self.coords has different coordinates + # so we have actual line segments instead of a line segment and a point + intersection = _count_intersection( + ((0, 0), (self.ui, self.oi)), self.coords[-2:] + ) + # Count OP (length of the line from origo to (ui, oi)) + op = sqrt(self.ui ** 2 + self.oi ** 2) + # Count OT (length of the line from origo to truncation line that goes through (ui, oi)) + ot = sqrt(intersection[0] ** 2 + intersection[1] ** 2) + # OP / OT tells how well the stemming algorithm works compared to just truncating words + return op / ot + + def update(self): + '''Update statistics after lemmas and stems have been set.''' + self.gumt, self.gdmt, self.gwmt, self.gdnt = _calculate(self.lemmas, self.stems) + self.ui, self.oi, self.sw = _indexes(self.gumt, self.gdmt, self.gwmt, self.gdnt) + self.errt = self._errt() + + +def demo(): + '''Demonstration of the module.''' + # Some words with their real lemmas + lemmas = { + 'kneel': ['kneel', 'knelt'], + 'range': ['range', 'ranged'], + 'ring': ['ring', 'rang', 'rung'], + } + # Same words with stems from a stemming algorithm + stems = { + 'kneel': ['kneel'], + 'knelt': ['knelt'], + 'rang': ['rang', 'range', 'ranged'], + 'ring': ['ring'], + 'rung': ['rung'], + } + print('Words grouped by their lemmas:') + for lemma in sorted(lemmas): + print('%s => %s' % (lemma, ' '.join(lemmas[lemma]))) + print() + print('Same words grouped by a stemming algorithm:') + for stem in sorted(stems): + print('%s => %s' % (stem, ' '.join(stems[stem]))) + print() + p = Paice(lemmas, stems) + print(p) + print() + # Let's "change" results from a stemming algorithm + stems = { + 'kneel': ['kneel'], + 'knelt': ['knelt'], + 'rang': ['rang'], + 'range': ['range', 'ranged'], + 'ring': ['ring'], + 'rung': ['rung'], + } + print('Counting stats after changing stemming results:') + for stem in sorted(stems): + print('%s => %s' % (stem, ' '.join(stems[stem]))) + print() + p.stems = stems + p.update() + print(p) + + +if __name__ == '__main__': + demo() diff --git a/venv.bak/lib/python3.7/site-packages/nltk/metrics/scores.py b/venv.bak/lib/python3.7/site-packages/nltk/metrics/scores.py new file mode 100644 index 0000000..9e6a516 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/metrics/scores.py @@ -0,0 +1,231 @@ +# Natural Language Toolkit: Evaluation +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Edward Loper +# Steven Bird +# URL: +# For license information, see LICENSE.TXT +from __future__ import print_function, division + +from math import fabs +import operator +from random import shuffle +from functools import reduce + +from six.moves import range, zip + +try: + from scipy.stats.stats import betai +except ImportError: + betai = None + +from nltk.util import LazyConcatenation, LazyMap + + +def accuracy(reference, test): + """ + Given a list of reference values and a corresponding list of test + values, return the fraction of corresponding values that are + equal. In particular, return the fraction of indices + ``0= actual_stat: + c += 1 + + if verbose and i % 10 == 0: + print('pseudo-statistic: %f' % pseudo_stat) + print('significance: %f' % ((c + 1) / (i + 1))) + print('-' * 60) + + significance = (c + 1) / (shuffles + 1) + + if verbose: + print('significance: %f' % significance) + if betai: + for phi in [0.01, 0.05, 0.10, 0.15, 0.25, 0.50]: + print("prob(phi<=%f): %f" % (phi, betai(c, shuffles, phi))) + + return (significance, c, shuffles) + + +def demo(): + print('-' * 75) + reference = 'DET NN VB DET JJ NN NN IN DET NN'.split() + test = 'DET VB VB DET NN NN NN IN DET NN'.split() + print('Reference =', reference) + print('Test =', test) + print('Accuracy:', accuracy(reference, test)) + + print('-' * 75) + reference_set = set(reference) + test_set = set(test) + print('Reference =', reference_set) + print('Test = ', test_set) + print('Precision:', precision(reference_set, test_set)) + print(' Recall:', recall(reference_set, test_set)) + print('F-Measure:', f_measure(reference_set, test_set)) + print('-' * 75) + + +if __name__ == '__main__': + demo() diff --git a/venv.bak/lib/python3.7/site-packages/nltk/metrics/segmentation.py b/venv.bak/lib/python3.7/site-packages/nltk/metrics/segmentation.py new file mode 100644 index 0000000..9a96c15 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/metrics/segmentation.py @@ -0,0 +1,234 @@ +# Natural Language Toolkit: Text Segmentation Metrics +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Edward Loper +# Steven Bird +# David Doukhan +# URL: +# For license information, see LICENSE.TXT + + +""" +Text Segmentation Metrics + +1. Windowdiff + +Pevzner, L., and Hearst, M., A Critique and Improvement of + an Evaluation Metric for Text Segmentation, +Computational Linguistics 28, 19-36 + + +2. Generalized Hamming Distance + +Bookstein A., Kulyukin V.A., Raita T. +Generalized Hamming Distance +Information Retrieval 5, 2002, pp 353-375 + +Baseline implementation in C++ +http://digital.cs.usu.edu/~vkulyukin/vkweb/software/ghd/ghd.html + +Study describing benefits of Generalized Hamming Distance Versus +WindowDiff for evaluating text segmentation tasks +Begsten, Y. Quel indice pour mesurer l'efficacite en segmentation de textes ? +TALN 2009 + + +3. Pk text segmentation metric + +Beeferman D., Berger A., Lafferty J. (1999) +Statistical Models for Text Segmentation +Machine Learning, 34, 177-210 +""" + +try: + import numpy as np +except ImportError: + pass + +from six.moves import range + + +def windowdiff(seg1, seg2, k, boundary="1", weighted=False): + """ + Compute the windowdiff score for a pair of segmentations. A + segmentation is any sequence over a vocabulary of two items + (e.g. "0", "1"), where the specified boundary value is used to + mark the edge of a segmentation. + + >>> s1 = "000100000010" + >>> s2 = "000010000100" + >>> s3 = "100000010000" + >>> '%.2f' % windowdiff(s1, s1, 3) + '0.00' + >>> '%.2f' % windowdiff(s1, s2, 3) + '0.30' + >>> '%.2f' % windowdiff(s2, s3, 3) + '0.80' + + :param seg1: a segmentation + :type seg1: str or list + :param seg2: a segmentation + :type seg2: str or list + :param k: window width + :type k: int + :param boundary: boundary value + :type boundary: str or int or bool + :param weighted: use the weighted variant of windowdiff + :type weighted: boolean + :rtype: float + """ + + if len(seg1) != len(seg2): + raise ValueError("Segmentations have unequal length") + if k > len(seg1): + raise ValueError( + "Window width k should be smaller or equal than segmentation lengths" + ) + wd = 0 + for i in range(len(seg1) - k + 1): + ndiff = abs(seg1[i : i + k].count(boundary) - seg2[i : i + k].count(boundary)) + if weighted: + wd += ndiff + else: + wd += min(1, ndiff) + return wd / (len(seg1) - k + 1.0) + + +# Generalized Hamming Distance + + +def _init_mat(nrows, ncols, ins_cost, del_cost): + mat = np.empty((nrows, ncols)) + mat[0, :] = ins_cost * np.arange(ncols) + mat[:, 0] = del_cost * np.arange(nrows) + return mat + + +def _ghd_aux(mat, rowv, colv, ins_cost, del_cost, shift_cost_coeff): + for i, rowi in enumerate(rowv): + for j, colj in enumerate(colv): + shift_cost = shift_cost_coeff * abs(rowi - colj) + mat[i, j] + if rowi == colj: + # boundaries are at the same location, no transformation required + tcost = mat[i, j] + elif rowi > colj: + # boundary match through a deletion + tcost = del_cost + mat[i, j + 1] + else: + # boundary match through an insertion + tcost = ins_cost + mat[i + 1, j] + mat[i + 1, j + 1] = min(tcost, shift_cost) + + +def ghd(ref, hyp, ins_cost=2.0, del_cost=2.0, shift_cost_coeff=1.0, boundary='1'): + """ + Compute the Generalized Hamming Distance for a reference and a hypothetical + segmentation, corresponding to the cost related to the transformation + of the hypothetical segmentation into the reference segmentation + through boundary insertion, deletion and shift operations. + + A segmentation is any sequence over a vocabulary of two items + (e.g. "0", "1"), where the specified boundary value is used to + mark the edge of a segmentation. + + Recommended parameter values are a shift_cost_coeff of 2. + Associated with a ins_cost, and del_cost equal to the mean segment + length in the reference segmentation. + + >>> # Same examples as Kulyukin C++ implementation + >>> ghd('1100100000', '1100010000', 1.0, 1.0, 0.5) + 0.5 + >>> ghd('1100100000', '1100000001', 1.0, 1.0, 0.5) + 2.0 + >>> ghd('011', '110', 1.0, 1.0, 0.5) + 1.0 + >>> ghd('1', '0', 1.0, 1.0, 0.5) + 1.0 + >>> ghd('111', '000', 1.0, 1.0, 0.5) + 3.0 + >>> ghd('000', '111', 1.0, 2.0, 0.5) + 6.0 + + :param ref: the reference segmentation + :type ref: str or list + :param hyp: the hypothetical segmentation + :type hyp: str or list + :param ins_cost: insertion cost + :type ins_cost: float + :param del_cost: deletion cost + :type del_cost: float + :param shift_cost_coeff: constant used to compute the cost of a shift. + shift cost = shift_cost_coeff * |i - j| where i and j are + the positions indicating the shift + :type shift_cost_coeff: float + :param boundary: boundary value + :type boundary: str or int or bool + :rtype: float + """ + + ref_idx = [i for (i, val) in enumerate(ref) if val == boundary] + hyp_idx = [i for (i, val) in enumerate(hyp) if val == boundary] + + nref_bound = len(ref_idx) + nhyp_bound = len(hyp_idx) + + if nref_bound == 0 and nhyp_bound == 0: + return 0.0 + elif nref_bound > 0 and nhyp_bound == 0: + return nref_bound * ins_cost + elif nref_bound == 0 and nhyp_bound > 0: + return nhyp_bound * del_cost + + mat = _init_mat(nhyp_bound + 1, nref_bound + 1, ins_cost, del_cost) + _ghd_aux(mat, hyp_idx, ref_idx, ins_cost, del_cost, shift_cost_coeff) + return mat[-1, -1] + + +# Beeferman's Pk text segmentation evaluation metric + + +def pk(ref, hyp, k=None, boundary='1'): + """ + Compute the Pk metric for a pair of segmentations A segmentation + is any sequence over a vocabulary of two items (e.g. "0", "1"), + where the specified boundary value is used to mark the edge of a + segmentation. + + >>> '%.2f' % pk('0100'*100, '1'*400, 2) + '0.50' + >>> '%.2f' % pk('0100'*100, '0'*400, 2) + '0.50' + >>> '%.2f' % pk('0100'*100, '0100'*100, 2) + '0.00' + + :param ref: the reference segmentation + :type ref: str or list + :param hyp: the segmentation to evaluate + :type hyp: str or list + :param k: window size, if None, set to half of the average reference segment length + :type boundary: str or int or bool + :param boundary: boundary value + :type boundary: str or int or bool + :rtype: float + """ + + if k is None: + k = int(round(len(ref) / (ref.count(boundary) * 2.0))) + + err = 0 + for i in range(len(ref) - k + 1): + r = ref[i : i + k].count(boundary) > 0 + h = hyp[i : i + k].count(boundary) > 0 + if r != h: + err += 1 + return err / (len(ref) - k + 1.0) + + +# skip doctests if numpy is not installed +def setup_module(module): + from nose import SkipTest + + try: + import numpy + except ImportError: + raise SkipTest("numpy is required for nltk.metrics.segmentation") diff --git a/venv.bak/lib/python3.7/site-packages/nltk/metrics/spearman.py b/venv.bak/lib/python3.7/site-packages/nltk/metrics/spearman.py new file mode 100644 index 0000000..3736b8f --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/metrics/spearman.py @@ -0,0 +1,69 @@ +# Natural Language Toolkit: Spearman Rank Correlation +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Joel Nothman +# URL: +# For license information, see LICENSE.TXT +from __future__ import division + +""" +Tools for comparing ranked lists. +""" + + +def _rank_dists(ranks1, ranks2): + """Finds the difference between the values in ranks1 and ranks2 for keys + present in both dicts. If the arguments are not dicts, they are converted + from (key, rank) sequences. + """ + ranks1 = dict(ranks1) + ranks2 = dict(ranks2) + for k in ranks1: + try: + yield k, ranks1[k] - ranks2[k] + except KeyError: + pass + + +def spearman_correlation(ranks1, ranks2): + """Returns the Spearman correlation coefficient for two rankings, which + should be dicts or sequences of (key, rank). The coefficient ranges from + -1.0 (ranks are opposite) to 1.0 (ranks are identical), and is only + calculated for keys in both rankings (for meaningful results, remove keys + present in only one list before ranking).""" + n = 0 + res = 0 + for k, d in _rank_dists(ranks1, ranks2): + res += d * d + n += 1 + try: + return 1 - (6 * res / (n * (n * n - 1))) + except ZeroDivisionError: + # Result is undefined if only one item is ranked + return 0.0 + + +def ranks_from_sequence(seq): + """Given a sequence, yields each element with an increasing rank, suitable + for use as an argument to ``spearman_correlation``. + """ + return ((k, i) for i, k in enumerate(seq)) + + +def ranks_from_scores(scores, rank_gap=1e-15): + """Given a sequence of (key, score) tuples, yields each key with an + increasing rank, tying with previous key's rank if the difference between + their scores is less than rank_gap. Suitable for use as an argument to + ``spearman_correlation``. + """ + prev_score = None + rank = 0 + for i, (key, score) in enumerate(scores): + try: + if abs(score - prev_score) > rank_gap: + rank = i + except TypeError: + pass + + yield key, rank + prev_score = score diff --git a/venv.bak/lib/python3.7/site-packages/nltk/misc/__init__.py b/venv.bak/lib/python3.7/site-packages/nltk/misc/__init__.py new file mode 100644 index 0000000..63c1da9 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/misc/__init__.py @@ -0,0 +1,11 @@ +# Natural Language Toolkit: Miscellaneous modules +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Steven Bird +# URL: +# For license information, see LICENSE.TXT + +from nltk.misc.chomsky import generate_chomsky +from nltk.misc.wordfinder import word_finder +from nltk.misc.minimalset import MinimalSet +from nltk.misc.babelfish import babelize_shell diff --git a/venv.bak/lib/python3.7/site-packages/nltk/misc/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/misc/__pycache__/__init__.cpython-37.pyc new file mode 100644 index 0000000..5854a40 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/misc/__pycache__/__init__.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/misc/__pycache__/babelfish.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/misc/__pycache__/babelfish.cpython-37.pyc new file mode 100644 index 0000000..4ecb58c Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/misc/__pycache__/babelfish.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/misc/__pycache__/chomsky.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/misc/__pycache__/chomsky.cpython-37.pyc new file mode 100644 index 0000000..5deade6 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/misc/__pycache__/chomsky.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/misc/__pycache__/minimalset.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/misc/__pycache__/minimalset.cpython-37.pyc new file mode 100644 index 0000000..b89fa52 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/misc/__pycache__/minimalset.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/misc/__pycache__/sort.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/misc/__pycache__/sort.cpython-37.pyc new file mode 100644 index 0000000..f043957 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/misc/__pycache__/sort.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/misc/__pycache__/wordfinder.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/misc/__pycache__/wordfinder.cpython-37.pyc new file mode 100644 index 0000000..e60037c Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/misc/__pycache__/wordfinder.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/misc/babelfish.py b/venv.bak/lib/python3.7/site-packages/nltk/misc/babelfish.py new file mode 100644 index 0000000..fb00bf5 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/misc/babelfish.py @@ -0,0 +1,11 @@ +""" +This module previously provided an interface to Babelfish online +translation service; this service is no longer available; this +module is kept in NLTK source code in order to provide better error +messages for people following the NLTK Book 2.0. +""" +from __future__ import print_function + + +def babelize_shell(): + print("Babelfish online translation service is no longer available.") diff --git a/venv.bak/lib/python3.7/site-packages/nltk/misc/chomsky.py b/venv.bak/lib/python3.7/site-packages/nltk/misc/chomsky.py new file mode 100644 index 0000000..d910024 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/misc/chomsky.py @@ -0,0 +1,136 @@ +# Chomsky random text generator, version 1.1, Raymond Hettinger, 2005/09/13 +# http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/440546 + +""" +CHOMSKY is an aid to writing linguistic papers in the style +of the great master. It is based on selected phrases taken +from actual books and articles written by Noam Chomsky. +Upon request, it assembles the phrases in the elegant +stylistic patterns that Chomsky is noted for. +To generate n sentences of linguistic wisdom, type + + (CHOMSKY n) -- for example + (CHOMSKY 5) generates half a screen of linguistic truth. +""" +from __future__ import print_function + +leadins = """To characterize a linguistic level L, + On the other hand, + This suggests that + It appears that + Furthermore, + We will bring evidence in favor of the following thesis: + To provide a constituent structure for T(Z,K), + From C1, it follows that + For any transformation which is sufficiently diversified in \ +application to be of any interest, + Analogously, + Clearly, + Note that + Of course, + Suppose, for instance, that + Thus + With this clarification, + Conversely, + We have already seen that + By combining adjunctions and certain deformations, + I suggested that these results would follow from the assumption that + If the position of the trace in (99c) were only relatively \ +inaccessible to movement, + However, this assumption is not correct, since + Comparing these examples with their parasitic gap counterparts in \ +(96) and (97), we see that + In the discussion of resumptive pronouns following (81), + So far, + Nevertheless, + For one thing, + Summarizing, then, we assume that + A consequence of the approach just outlined is that + Presumably, + On our assumptions, + It may be, then, that + It must be emphasized, once again, that + Let us continue to suppose that + Notice, incidentally, that """ +# List of LEADINs to buy time. + +subjects = """ the notion of level of grammaticalness + a case of semigrammaticalness of a different sort + most of the methodological work in modern linguistics + a subset of English sentences interesting on quite independent grounds + the natural general principle that will subsume this case + an important property of these three types of EC + any associated supporting element + the appearance of parasitic gaps in domains relatively inaccessible \ +to ordinary extraction + the speaker-hearer's linguistic intuition + the descriptive power of the base component + the earlier discussion of deviance + this analysis of a formative as a pair of sets of features + this selectionally introduced contextual feature + a descriptively adequate grammar + the fundamental error of regarding functional notions as categorial + relational information + the systematic use of complex symbols + the theory of syntactic features developed earlier""" +# List of SUBJECTs chosen for maximum professorial macho. + +verbs = """can be defined in such a way as to impose + delimits + suffices to account for + cannot be arbitrary in + is not subject to + does not readily tolerate + raises serious doubts about + is not quite equivalent to + does not affect the structure of + may remedy and, at the same time, eliminate + is not to be considered in determining + is to be regarded as + is unspecified with respect to + is, apparently, determined by + is necessary to impose an interpretation on + appears to correlate rather closely with + is rather different from""" +# List of VERBs chosen for autorecursive obfuscation. + +objects = """ problems of phonemic and morphological analysis. + a corpus of utterance tokens upon which conformity has been defined \ +by the paired utterance test. + the traditional practice of grammarians. + the levels of acceptability from fairly high (e.g. (99a)) to virtual \ +gibberish (e.g. (98d)). + a stipulation to place the constructions into these various categories. + a descriptive fact. + a parasitic gap construction. + the extended c-command discussed in connection with (34). + the ultimate standard that determines the accuracy of any proposed grammar. + the system of base rules exclusive of the lexicon. + irrelevant intervening contexts in selectional rules. + nondistinctness in the sense of distinctive feature theory. + a general convention regarding the forms of the grammar. + an abstract underlying order. + an important distinction in language use. + the requirement that branching is not tolerated within the dominance \ +scope of a complex symbol. + the strong generative capacity of the theory.""" +# List of OBJECTs selected for profound sententiousness. + +import textwrap, random +from itertools import chain, islice + +from six.moves import zip + + +def generate_chomsky(times=5, line_length=72): + parts = [] + for part in (leadins, subjects, verbs, objects): + phraselist = list(map(str.strip, part.splitlines())) + random.shuffle(phraselist) + parts.append(phraselist) + output = chain(*islice(zip(*parts), 0, times)) + print(textwrap.fill(" ".join(output), line_length)) + + +if __name__ == '__main__': + generate_chomsky() diff --git a/venv.bak/lib/python3.7/site-packages/nltk/misc/minimalset.py b/venv.bak/lib/python3.7/site-packages/nltk/misc/minimalset.py new file mode 100644 index 0000000..ca298e8 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/misc/minimalset.py @@ -0,0 +1,85 @@ +# Natural Language Toolkit: Minimal Sets +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Steven Bird +# URL: +# For license information, see LICENSE.TXT + +from collections import defaultdict + + +class MinimalSet(object): + """ + Find contexts where more than one possible target value can + appear. E.g. if targets are word-initial letters, and contexts + are the remainders of words, then we would like to find cases like + "fat" vs "cat", and "training" vs "draining". If targets are + parts-of-speech and contexts are words, then we would like to find + cases like wind (noun) 'air in rapid motion', vs wind (verb) + 'coil, wrap'. + """ + + def __init__(self, parameters=None): + """ + Create a new minimal set. + + :param parameters: The (context, target, display) tuples for the item + :type parameters: list(tuple(str, str, str)) + """ + self._targets = set() # the contrastive information + self._contexts = set() # what we are controlling for + self._seen = defaultdict(set) # to record what we have seen + self._displays = {} # what we will display + + if parameters: + for context, target, display in parameters: + self.add(context, target, display) + + def add(self, context, target, display): + """ + Add a new item to the minimal set, having the specified + context, target, and display form. + + :param context: The context in which the item of interest appears + :type context: str + :param target: The item of interest + :type target: str + :param display: The information to be reported for each item + :type display: str + """ + # Store the set of targets that occurred in this context + self._seen[context].add(target) + + # Keep track of which contexts and targets we have seen + self._contexts.add(context) + self._targets.add(target) + + # For a given context and target, store the display form + self._displays[(context, target)] = display + + def contexts(self, minimum=2): + """ + Determine which contexts occurred with enough distinct targets. + + :param minimum: the minimum number of distinct target forms + :type minimum: int + :rtype list + """ + return [c for c in self._contexts if len(self._seen[c]) >= minimum] + + def display(self, context, target, default=""): + if (context, target) in self._displays: + return self._displays[(context, target)] + else: + return default + + def display_all(self, context): + result = [] + for target in self._targets: + x = self.display(context, target) + if x: + result.append(x) + return result + + def targets(self): + return self._targets diff --git a/venv.bak/lib/python3.7/site-packages/nltk/misc/sort.py b/venv.bak/lib/python3.7/site-packages/nltk/misc/sort.py new file mode 100644 index 0000000..cef988e --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/misc/sort.py @@ -0,0 +1,180 @@ +# Natural Language Toolkit: List Sorting +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Steven Bird +# URL: +# For license information, see LICENSE.TXT + +""" +This module provides a variety of list sorting algorithms, to +illustrate the many different algorithms (recipes) for solving a +problem, and how to analyze algorithms experimentally. +""" +from __future__ import print_function, division + +# These algorithms are taken from: +# Levitin (2004) The Design and Analysis of Algorithms + +################################################################## +# Selection Sort +################################################################## + + +def selection(a): + """ + Selection Sort: scan the list to find its smallest element, then + swap it with the first element. The remainder of the list is one + element smaller; apply the same method to this list, and so on. + """ + count = 0 + + for i in range(len(a) - 1): + min = i + + for j in range(i + 1, len(a)): + if a[j] < a[min]: + min = j + + count += 1 + + a[min], a[i] = a[i], a[min] + + return count + + +################################################################## +# Bubble Sort +################################################################## + + +def bubble(a): + """ + Bubble Sort: compare adjacent elements of the list left-to-right, + and swap them if they are out of order. After one pass through + the list swapping adjacent items, the largest item will be in + the rightmost position. The remainder is one element smaller; + apply the same method to this list, and so on. + """ + count = 0 + for i in range(len(a) - 1): + for j in range(len(a) - i - 1): + if a[j + 1] < a[j]: + a[j], a[j + 1] = a[j + 1], a[j] + count += 1 + return count + + +################################################################## +# Merge Sort +################################################################## + + +def _merge_lists(b, c): + count = 0 + i = j = 0 + a = [] + while i < len(b) and j < len(c): + count += 1 + if b[i] <= c[j]: + a.append(b[i]) + i += 1 + else: + a.append(c[j]) + j += 1 + if i == len(b): + a += c[j:] + else: + a += b[i:] + return a, count + + +def merge(a): + """ + Merge Sort: split the list in half, and sort each half, then + combine the sorted halves. + """ + count = 0 + if len(a) > 1: + midpoint = len(a) // 2 + b = a[:midpoint] + c = a[midpoint:] + count_b = merge(b) + count_c = merge(c) + result, count_a = _merge_lists(b, c) + a[:] = result # copy the result back into a. + count = count_a + count_b + count_c + return count + + +################################################################## +# Quick Sort +################################################################## + + +def _partition(a, l, r): + p = a[l] + i = l + j = r + 1 + count = 0 + while True: + while i < r: + i += 1 + if a[i] >= p: + break + while j > l: + j -= 1 + if j < l or a[j] <= p: + break + a[i], a[j] = a[j], a[i] # swap + count += 1 + if i >= j: + break + a[i], a[j] = a[j], a[i] # undo last swap + a[l], a[j] = a[j], a[l] + return j, count + + +def _quick(a, l, r): + count = 0 + if l < r: + s, count = _partition(a, l, r) + count += _quick(a, l, s - 1) + count += _quick(a, s + 1, r) + return count + + +def quick(a): + return _quick(a, 0, len(a) - 1) + + +################################################################## +# Demonstration +################################################################## + + +def demo(): + from random import shuffle + + for size in (10, 20, 50, 100, 200, 500, 1000): + a = list(range(size)) + + # various sort methods + shuffle(a) + count_selection = selection(a) + shuffle(a) + count_bubble = bubble(a) + shuffle(a) + count_merge = merge(a) + shuffle(a) + count_quick = quick(a) + + print( + ( + ("size=%5d: selection=%8d, bubble=%8d, " "merge=%6d, quick=%6d") + % (size, count_selection, count_bubble, count_merge, count_quick) + ) + ) + + +if __name__ == '__main__': + demo() diff --git a/venv.bak/lib/python3.7/site-packages/nltk/misc/wordfinder.py b/venv.bak/lib/python3.7/site-packages/nltk/misc/wordfinder.py new file mode 100644 index 0000000..a0b8ae7 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/misc/wordfinder.py @@ -0,0 +1,140 @@ +# Natural Language Toolkit: Word Finder +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Steven Bird +# URL: +# For license information, see LICENSE.TXT + +# Simplified from PHP version by Robert Klein +# http://fswordfinder.sourceforge.net/ +from __future__ import print_function + +import random + + +# reverse a word with probability 0.5 +def revword(word): + if random.randint(1, 2) == 1: + return word[::-1] + return word + + +# try to insert word at position x,y; direction encoded in xf,yf +def step(word, x, xf, y, yf, grid): + for i in range(len(word)): + if grid[xf(i)][yf(i)] != "" and grid[xf(i)][yf(i)] != word[i]: + return False + for i in range(len(word)): + grid[xf(i)][yf(i)] = word[i] + return True + + +# try to insert word at position x,y, in direction dir +def check(word, dir, x, y, grid, rows, cols): + if dir == 1: + if x - len(word) < 0 or y - len(word) < 0: + return False + return step(word, x, lambda i: x - i, y, lambda i: y - i, grid) + elif dir == 2: + if x - len(word) < 0: + return False + return step(word, x, lambda i: x - i, y, lambda i: y, grid) + elif dir == 3: + if x - len(word) < 0 or y + (len(word) - 1) >= cols: + return False + return step(word, x, lambda i: x - i, y, lambda i: y + i, grid) + elif dir == 4: + if y - len(word) < 0: + return False + return step(word, x, lambda i: x, y, lambda i: y - i, grid) + + +def wordfinder(words, rows=20, cols=20, attempts=50, alph='ABCDEFGHIJKLMNOPQRSTUVWXYZ'): + """ + Attempt to arrange words into a letter-grid with the specified + number of rows and columns. Try each word in several positions + and directions, until it can be fitted into the grid, or the + maximum number of allowable attempts is exceeded. Returns a tuple + consisting of the grid and the words that were successfully + placed. + + :param words: the list of words to be put into the grid + :type words: list + :param rows: the number of rows in the grid + :type rows: int + :param cols: the number of columns in the grid + :type cols: int + :param attempts: the number of times to attempt placing a word + :type attempts: int + :param alph: the alphabet, to be used for filling blank cells + :type alph: list + :rtype: tuple + """ + + # place longer words first + words = sorted(words, key=len, reverse=True) + + grid = [] # the letter grid + used = [] # the words we used + + # initialize the grid + for i in range(rows): + grid.append([""] * cols) + + # try to place each word + for word in words: + word = word.strip().upper() # normalize + save = word # keep a record of the word + word = revword(word) + for attempt in range(attempts): + r = random.randint(0, len(word)) + dir = random.choice([1, 2, 3, 4]) + x = random.randint(0, rows) + y = random.randint(0, cols) + if dir == 1: + x += r + y += r + elif dir == 2: + x += r + elif dir == 3: + x += r + y -= r + elif dir == 4: + y += r + if 0 <= x < rows and 0 <= y < cols: + if check(word, dir, x, y, grid, rows, cols): + # used.append((save, dir, x, y, word)) + used.append(save) + break + + # Fill up the remaining spaces + for i in range(rows): + for j in range(cols): + if grid[i][j] == '': + grid[i][j] = random.choice(alph) + + return grid, used + + +def word_finder(): + from nltk.corpus import words + + wordlist = words.words() + random.shuffle(wordlist) + wordlist = wordlist[:200] + wordlist = [w for w in wordlist if 3 <= len(w) <= 12] + grid, used = wordfinder(wordlist) + + print("Word Finder\n") + for i in range(len(grid)): + for j in range(len(grid[i])): + print(grid[i][j], end=' ') + print() + print() + + for i in range(len(used)): + print("%d:" % (i + 1), used[i]) + + +if __name__ == '__main__': + word_finder() diff --git a/venv.bak/lib/python3.7/site-packages/nltk/parse/__init__.py b/venv.bak/lib/python3.7/site-packages/nltk/parse/__init__.py new file mode 100644 index 0000000..52cd4f1 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/parse/__init__.py @@ -0,0 +1,102 @@ +# Natural Language Toolkit: Parsers +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Steven Bird +# Edward Loper +# URL: +# For license information, see LICENSE.TXT +# + +""" +NLTK Parsers + +Classes and interfaces for producing tree structures that represent +the internal organization of a text. This task is known as "parsing" +the text, and the resulting tree structures are called the text's +"parses". Typically, the text is a single sentence, and the tree +structure represents the syntactic structure of the sentence. +However, parsers can also be used in other domains. For example, +parsers can be used to derive the morphological structure of the +morphemes that make up a word, or to derive the discourse structure +for a set of utterances. + +Sometimes, a single piece of text can be represented by more than one +tree structure. Texts represented by more than one tree structure are +called "ambiguous" texts. Note that there are actually two ways in +which a text can be ambiguous: + + - The text has multiple correct parses. + - There is not enough information to decide which of several + candidate parses is correct. + +However, the parser module does *not* distinguish these two types of +ambiguity. + +The parser module defines ``ParserI``, a standard interface for parsing +texts; and two simple implementations of that interface, +``ShiftReduceParser`` and ``RecursiveDescentParser``. It also contains +three sub-modules for specialized kinds of parsing: + + - ``nltk.parser.chart`` defines chart parsing, which uses dynamic + programming to efficiently parse texts. + - ``nltk.parser.probabilistic`` defines probabilistic parsing, which + associates a probability with each parse. +""" + +from nltk.parse.api import ParserI +from nltk.parse.chart import ( + ChartParser, + SteppingChartParser, + TopDownChartParser, + BottomUpChartParser, + BottomUpLeftCornerChartParser, + LeftCornerChartParser, +) +from nltk.parse.featurechart import ( + FeatureChartParser, + FeatureTopDownChartParser, + FeatureBottomUpChartParser, + FeatureBottomUpLeftCornerChartParser, +) +from nltk.parse.earleychart import ( + IncrementalChartParser, + EarleyChartParser, + IncrementalTopDownChartParser, + IncrementalBottomUpChartParser, + IncrementalBottomUpLeftCornerChartParser, + IncrementalLeftCornerChartParser, + FeatureIncrementalChartParser, + FeatureEarleyChartParser, + FeatureIncrementalTopDownChartParser, + FeatureIncrementalBottomUpChartParser, + FeatureIncrementalBottomUpLeftCornerChartParser, +) +from nltk.parse.pchart import ( + BottomUpProbabilisticChartParser, + InsideChartParser, + RandomChartParser, + UnsortedChartParser, + LongestChartParser, +) +from nltk.parse.recursivedescent import ( + RecursiveDescentParser, + SteppingRecursiveDescentParser, +) +from nltk.parse.shiftreduce import ShiftReduceParser, SteppingShiftReduceParser +from nltk.parse.util import load_parser, TestGrammar, extract_test_sentences +from nltk.parse.viterbi import ViterbiParser +from nltk.parse.dependencygraph import DependencyGraph +from nltk.parse.projectivedependencyparser import ( + ProjectiveDependencyParser, + ProbabilisticProjectiveDependencyParser, +) +from nltk.parse.nonprojectivedependencyparser import ( + NonprojectiveDependencyParser, + NaiveBayesDependencyScorer, + ProbabilisticNonprojectiveParser, +) +from nltk.parse.malt import MaltParser +from nltk.parse.evaluate import DependencyEvaluator +from nltk.parse.transitionparser import TransitionParser +from nltk.parse.bllip import BllipParser +from nltk.parse.corenlp import CoreNLPParser, CoreNLPDependencyParser diff --git a/venv.bak/lib/python3.7/site-packages/nltk/parse/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/parse/__pycache__/__init__.cpython-37.pyc new file mode 100644 index 0000000..25471bc Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/parse/__pycache__/__init__.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/parse/__pycache__/api.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/parse/__pycache__/api.cpython-37.pyc new file mode 100644 index 0000000..a4f6d28 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/parse/__pycache__/api.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/parse/__pycache__/bllip.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/parse/__pycache__/bllip.cpython-37.pyc new file mode 100644 index 0000000..a4b9a2a Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/parse/__pycache__/bllip.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/parse/__pycache__/chart.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/parse/__pycache__/chart.cpython-37.pyc new file mode 100644 index 0000000..dc7fecc Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/parse/__pycache__/chart.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/parse/__pycache__/corenlp.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/parse/__pycache__/corenlp.cpython-37.pyc new file mode 100644 index 0000000..6e15d47 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/parse/__pycache__/corenlp.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/parse/__pycache__/dependencygraph.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/parse/__pycache__/dependencygraph.cpython-37.pyc new file mode 100644 index 0000000..4971db0 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/parse/__pycache__/dependencygraph.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/parse/__pycache__/earleychart.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/parse/__pycache__/earleychart.cpython-37.pyc new file mode 100644 index 0000000..8cea257 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/parse/__pycache__/earleychart.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/parse/__pycache__/evaluate.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/parse/__pycache__/evaluate.cpython-37.pyc new file mode 100644 index 0000000..45502af Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/parse/__pycache__/evaluate.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/parse/__pycache__/featurechart.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/parse/__pycache__/featurechart.cpython-37.pyc new file mode 100644 index 0000000..0d642e7 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/parse/__pycache__/featurechart.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/parse/__pycache__/generate.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/parse/__pycache__/generate.cpython-37.pyc new file mode 100644 index 0000000..a4c9664 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/parse/__pycache__/generate.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/parse/__pycache__/malt.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/parse/__pycache__/malt.cpython-37.pyc new file mode 100644 index 0000000..c93f6a5 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/parse/__pycache__/malt.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/parse/__pycache__/nonprojectivedependencyparser.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/parse/__pycache__/nonprojectivedependencyparser.cpython-37.pyc new file mode 100644 index 0000000..d24cfc5 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/parse/__pycache__/nonprojectivedependencyparser.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/parse/__pycache__/pchart.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/parse/__pycache__/pchart.cpython-37.pyc new file mode 100644 index 0000000..23e965a Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/parse/__pycache__/pchart.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/parse/__pycache__/projectivedependencyparser.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/parse/__pycache__/projectivedependencyparser.cpython-37.pyc new file mode 100644 index 0000000..f8ef496 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/parse/__pycache__/projectivedependencyparser.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/parse/__pycache__/recursivedescent.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/parse/__pycache__/recursivedescent.cpython-37.pyc new file mode 100644 index 0000000..2d121b9 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/parse/__pycache__/recursivedescent.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/parse/__pycache__/shiftreduce.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/parse/__pycache__/shiftreduce.cpython-37.pyc new file mode 100644 index 0000000..297ded2 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/parse/__pycache__/shiftreduce.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/parse/__pycache__/stanford.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/parse/__pycache__/stanford.cpython-37.pyc new file mode 100644 index 0000000..3ad20f1 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/parse/__pycache__/stanford.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/parse/__pycache__/transitionparser.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/parse/__pycache__/transitionparser.cpython-37.pyc new file mode 100644 index 0000000..33a235e Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/parse/__pycache__/transitionparser.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/parse/__pycache__/util.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/parse/__pycache__/util.cpython-37.pyc new file mode 100644 index 0000000..cefbab6 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/parse/__pycache__/util.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/parse/__pycache__/viterbi.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/parse/__pycache__/viterbi.cpython-37.pyc new file mode 100644 index 0000000..fa67cc9 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/parse/__pycache__/viterbi.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/parse/api.py b/venv.bak/lib/python3.7/site-packages/nltk/parse/api.py new file mode 100644 index 0000000..5372b10 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/parse/api.py @@ -0,0 +1,72 @@ +# Natural Language Toolkit: Parser API +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Steven Bird +# Edward Loper +# URL: +# For license information, see LICENSE.TXT +# + +import itertools + +from nltk.internals import overridden + + +class ParserI(object): + """ + A processing class for deriving trees that represent possible + structures for a sequence of tokens. These tree structures are + known as "parses". Typically, parsers are used to derive syntax + trees for sentences. But parsers can also be used to derive other + kinds of tree structure, such as morphological trees and discourse + structures. + + Subclasses must define: + - at least one of: ``parse()``, ``parse_sents()``. + + Subclasses may define: + - ``grammar()`` + """ + + def grammar(self): + """ + :return: The grammar used by this parser. + """ + raise NotImplementedError() + + def parse(self, sent, *args, **kwargs): + """ + :return: An iterator that generates parse trees for the sentence. + When possible this list is sorted from most likely to least likely. + + :param sent: The sentence to be parsed + :type sent: list(str) + :rtype: iter(Tree) + """ + if overridden(self.parse_sents): + return next(self.parse_sents([sent], *args, **kwargs)) + elif overridden(self.parse_one): + return ( + tree + for tree in [self.parse_one(sent, *args, **kwargs)] + if tree is not None + ) + elif overridden(self.parse_all): + return iter(self.parse_all(sent, *args, **kwargs)) + else: + raise NotImplementedError() + + def parse_sents(self, sents, *args, **kwargs): + """ + Apply ``self.parse()`` to each element of ``sents``. + :rtype: iter(iter(Tree)) + """ + return (self.parse(sent, *args, **kwargs) for sent in sents) + + def parse_all(self, sent, *args, **kwargs): + """:rtype: list(Tree)""" + return list(self.parse(sent, *args, **kwargs)) + + def parse_one(self, sent, *args, **kwargs): + """:rtype: Tree or None""" + return next(self.parse(sent, *args, **kwargs), None) diff --git a/venv.bak/lib/python3.7/site-packages/nltk/parse/bllip.py b/venv.bak/lib/python3.7/site-packages/nltk/parse/bllip.py new file mode 100644 index 0000000..01934b8 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/parse/bllip.py @@ -0,0 +1,314 @@ +# Natural Language Toolkit: Interface to BLLIP Parser +# +# Author: David McClosky +# +# Copyright (C) 2001-2019 NLTK Project +# URL: +# For license information, see LICENSE.TXT + +from __future__ import print_function + +from nltk.parse.api import ParserI +from nltk.tree import Tree + +""" +Interface for parsing with BLLIP Parser. Requires the Python +bllipparser module. BllipParser objects can be constructed with the +``BllipParser.from_unified_model_dir`` class method or manually using the +``BllipParser`` constructor. The former is generally easier if you have +a BLLIP Parser unified model directory -- a basic model can be obtained +from NLTK's downloader. More unified parsing models can be obtained with +BLLIP Parser's ModelFetcher (run ``python -m bllipparser.ModelFetcher`` +or see docs for ``bllipparser.ModelFetcher.download_and_install_model``). + +Basic usage:: + + # download and install a basic unified parsing model (Wall Street Journal) + # sudo python -m nltk.downloader bllip_wsj_no_aux + + >>> from nltk.data import find + >>> model_dir = find('models/bllip_wsj_no_aux').path + >>> bllip = BllipParser.from_unified_model_dir(model_dir) + + # 1-best parsing + >>> sentence1 = 'British left waffles on Falklands .'.split() + >>> top_parse = bllip.parse_one(sentence1) + >>> print(top_parse) + (S1 + (S + (NP (JJ British) (NN left)) + (VP (VBZ waffles) (PP (IN on) (NP (NNP Falklands)))) + (. .))) + + # n-best parsing + >>> sentence2 = 'Time flies'.split() + >>> all_parses = bllip.parse_all(sentence2) + >>> print(len(all_parses)) + 50 + >>> print(all_parses[0]) + (S1 (S (NP (NNP Time)) (VP (VBZ flies)))) + + # incorporating external tagging constraints (None means unconstrained tag) + >>> constrained1 = bllip.tagged_parse([('Time', 'VB'), ('flies', 'NNS')]) + >>> print(next(constrained1)) + (S1 (NP (VB Time) (NNS flies))) + >>> constrained2 = bllip.tagged_parse([('Time', 'NN'), ('flies', None)]) + >>> print(next(constrained2)) + (S1 (NP (NN Time) (VBZ flies))) + +References +---------- + +- Charniak, Eugene. "A maximum-entropy-inspired parser." Proceedings of + the 1st North American chapter of the Association for Computational + Linguistics conference. Association for Computational Linguistics, + 2000. + +- Charniak, Eugene, and Mark Johnson. "Coarse-to-fine n-best parsing + and MaxEnt discriminative reranking." Proceedings of the 43rd Annual + Meeting on Association for Computational Linguistics. Association + for Computational Linguistics, 2005. + +Known issues +------------ + +Note that BLLIP Parser is not currently threadsafe. Since this module +uses a SWIG interface, it is potentially unsafe to create multiple +``BllipParser`` objects in the same process. BLLIP Parser currently +has issues with non-ASCII text and will raise an error if given any. + +See http://pypi.python.org/pypi/bllipparser/ for more information +on BLLIP Parser's Python interface. +""" + +__all__ = ['BllipParser'] + +# this block allows this module to be imported even if bllipparser isn't +# available +try: + from bllipparser import RerankingParser + from bllipparser.RerankingParser import get_unified_model_parameters + + def _ensure_bllip_import_or_error(): + pass + + +except ImportError as ie: + + def _ensure_bllip_import_or_error(ie=ie): + raise ImportError("Couldn't import bllipparser module: %s" % ie) + + +def _ensure_ascii(words): + try: + for i, word in enumerate(words): + word.decode('ascii') + except UnicodeDecodeError: + raise ValueError( + "Token %d (%r) is non-ASCII. BLLIP Parser " + "currently doesn't support non-ASCII inputs." % (i, word) + ) + + +def _scored_parse_to_nltk_tree(scored_parse): + return Tree.fromstring(str(scored_parse.ptb_parse)) + + +class BllipParser(ParserI): + """ + Interface for parsing with BLLIP Parser. BllipParser objects can be + constructed with the ``BllipParser.from_unified_model_dir`` class + method or manually using the ``BllipParser`` constructor. + """ + + def __init__( + self, + parser_model=None, + reranker_features=None, + reranker_weights=None, + parser_options=None, + reranker_options=None, + ): + """ + Load a BLLIP Parser model from scratch. You'll typically want to + use the ``from_unified_model_dir()`` class method to construct + this object. + + :param parser_model: Path to parser model directory + :type parser_model: str + + :param reranker_features: Path the reranker model's features file + :type reranker_features: str + + :param reranker_weights: Path the reranker model's weights file + :type reranker_weights: str + + :param parser_options: optional dictionary of parser options, see + ``bllipparser.RerankingParser.RerankingParser.load_parser_options()`` + for more information. + :type parser_options: dict(str) + + :param reranker_options: optional + dictionary of reranker options, see + ``bllipparser.RerankingParser.RerankingParser.load_reranker_model()`` + for more information. + :type reranker_options: dict(str) + """ + _ensure_bllip_import_or_error() + + parser_options = parser_options or {} + reranker_options = reranker_options or {} + + self.rrp = RerankingParser() + self.rrp.load_parser_model(parser_model, **parser_options) + if reranker_features and reranker_weights: + self.rrp.load_reranker_model( + features_filename=reranker_features, + weights_filename=reranker_weights, + **reranker_options + ) + + def parse(self, sentence): + """ + Use BLLIP Parser to parse a sentence. Takes a sentence as a list + of words; it will be automatically tagged with this BLLIP Parser + instance's tagger. + + :return: An iterator that generates parse trees for the sentence + from most likely to least likely. + + :param sentence: The sentence to be parsed + :type sentence: list(str) + :rtype: iter(Tree) + """ + _ensure_ascii(sentence) + nbest_list = self.rrp.parse(sentence) + for scored_parse in nbest_list: + yield _scored_parse_to_nltk_tree(scored_parse) + + def tagged_parse(self, word_and_tag_pairs): + """ + Use BLLIP to parse a sentence. Takes a sentence as a list of + (word, tag) tuples; the sentence must have already been tokenized + and tagged. BLLIP will attempt to use the tags provided but may + use others if it can't come up with a complete parse subject + to those constraints. You may also specify a tag as ``None`` + to leave a token's tag unconstrained. + + :return: An iterator that generates parse trees for the sentence + from most likely to least likely. + + :param sentence: Input sentence to parse as (word, tag) pairs + :type sentence: list(tuple(str, str)) + :rtype: iter(Tree) + """ + words = [] + tag_map = {} + for i, (word, tag) in enumerate(word_and_tag_pairs): + words.append(word) + if tag is not None: + tag_map[i] = tag + + _ensure_ascii(words) + nbest_list = self.rrp.parse_tagged(words, tag_map) + for scored_parse in nbest_list: + yield _scored_parse_to_nltk_tree(scored_parse) + + @classmethod + def from_unified_model_dir( + cls, model_dir, parser_options=None, reranker_options=None + ): + """ + Create a ``BllipParser`` object from a unified parsing model + directory. Unified parsing model directories are a standardized + way of storing BLLIP parser and reranker models together on disk. + See ``bllipparser.RerankingParser.get_unified_model_parameters()`` + for more information about unified model directories. + + :return: A ``BllipParser`` object using the parser and reranker + models in the model directory. + + :param model_dir: Path to the unified model directory. + :type model_dir: str + :param parser_options: optional dictionary of parser options, see + ``bllipparser.RerankingParser.RerankingParser.load_parser_options()`` + for more information. + :type parser_options: dict(str) + :param reranker_options: optional dictionary of reranker options, see + ``bllipparser.RerankingParser.RerankingParser.load_reranker_model()`` + for more information. + :type reranker_options: dict(str) + :rtype: BllipParser + """ + ( + parser_model_dir, + reranker_features_filename, + reranker_weights_filename, + ) = get_unified_model_parameters(model_dir) + return cls( + parser_model_dir, + reranker_features_filename, + reranker_weights_filename, + parser_options, + reranker_options, + ) + + +def demo(): + """This assumes the Python module bllipparser is installed.""" + + # download and install a basic unified parsing model (Wall Street Journal) + # sudo python -m nltk.downloader bllip_wsj_no_aux + + from nltk.data import find + + model_dir = find('models/bllip_wsj_no_aux').path + + print('Loading BLLIP Parsing models...') + # the easiest way to get started is to use a unified model + bllip = BllipParser.from_unified_model_dir(model_dir) + print('Done.') + + sentence1 = 'British left waffles on Falklands .'.split() + sentence2 = 'I saw the man with the telescope .'.split() + # this sentence is known to fail under the WSJ parsing model + fail1 = '# ! ? : -'.split() + for sentence in (sentence1, sentence2, fail1): + print('Sentence: %r' % ' '.join(sentence)) + try: + tree = next(bllip.parse(sentence)) + print(tree) + except StopIteration: + print("(parse failed)") + + # n-best parsing demo + for i, parse in enumerate(bllip.parse(sentence1)): + print('parse %d:\n%s' % (i, parse)) + + # using external POS tag constraints + print( + "forcing 'tree' to be 'NN':", + next(bllip.tagged_parse([('A', None), ('tree', 'NN')])), + ) + print( + "forcing 'A' to be 'DT' and 'tree' to be 'NNP':", + next(bllip.tagged_parse([('A', 'DT'), ('tree', 'NNP')])), + ) + # constraints don't have to make sense... (though on more complicated + # sentences, they may cause the parse to fail) + print( + "forcing 'A' to be 'NNP':", + next(bllip.tagged_parse([('A', 'NNP'), ('tree', None)])), + ) + + +def setup_module(module): + from nose import SkipTest + + try: + _ensure_bllip_import_or_error() + except ImportError: + raise SkipTest( + 'doctests from nltk.parse.bllip are skipped because ' + 'the bllipparser module is not installed' + ) diff --git a/venv.bak/lib/python3.7/site-packages/nltk/parse/chart.py b/venv.bak/lib/python3.7/site-packages/nltk/parse/chart.py new file mode 100644 index 0000000..f1f68b4 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/parse/chart.py @@ -0,0 +1,1863 @@ +# -*- coding: utf-8 -*- +# Natural Language Toolkit: A Chart Parser +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Edward Loper +# Steven Bird +# Jean Mark Gawron +# Peter Ljunglöf +# URL: +# For license information, see LICENSE.TXT + +""" +Data classes and parser implementations for "chart parsers", which +use dynamic programming to efficiently parse a text. A chart +parser derives parse trees for a text by iteratively adding "edges" +to a "chart." Each edge represents a hypothesis about the tree +structure for a subsequence of the text. The chart is a +"blackboard" for composing and combining these hypotheses. + +When a chart parser begins parsing a text, it creates a new (empty) +chart, spanning the text. It then incrementally adds new edges to the +chart. A set of "chart rules" specifies the conditions under which +new edges should be added to the chart. Once the chart reaches a +stage where none of the chart rules adds any new edges, parsing is +complete. + +Charts are encoded with the ``Chart`` class, and edges are encoded with +the ``TreeEdge`` and ``LeafEdge`` classes. The chart parser module +defines three chart parsers: + + - ``ChartParser`` is a simple and flexible chart parser. Given a + set of chart rules, it will apply those rules to the chart until + no more edges are added. + + - ``SteppingChartParser`` is a subclass of ``ChartParser`` that can + be used to step through the parsing process. +""" +from __future__ import print_function, division, unicode_literals + +import itertools +import re +import warnings +from functools import total_ordering + +from six.moves import range + +from nltk.tree import Tree +from nltk.grammar import PCFG, is_nonterminal, is_terminal +from nltk.util import OrderedDict +from nltk.internals import raise_unorderable_types +from nltk.compat import python_2_unicode_compatible, unicode_repr + +from nltk.parse.api import ParserI + + +######################################################################## +## Edges +######################################################################## + + +@total_ordering +class EdgeI(object): + """ + A hypothesis about the structure of part of a sentence. + Each edge records the fact that a structure is (partially) + consistent with the sentence. An edge contains: + + - A span, indicating what part of the sentence is + consistent with the hypothesized structure. + - A left-hand side, specifying what kind of structure is + hypothesized. + - A right-hand side, specifying the contents of the + hypothesized structure. + - A dot position, indicating how much of the hypothesized + structure is consistent with the sentence. + + Every edge is either complete or incomplete: + + - An edge is complete if its structure is fully consistent + with the sentence. + - An edge is incomplete if its structure is partially + consistent with the sentence. For every incomplete edge, the + span specifies a possible prefix for the edge's structure. + + There are two kinds of edge: + + - A ``TreeEdge`` records which trees have been found to + be (partially) consistent with the text. + - A ``LeafEdge`` records the tokens occurring in the text. + + The ``EdgeI`` interface provides a common interface to both types + of edge, allowing chart parsers to treat them in a uniform manner. + """ + + def __init__(self): + if self.__class__ == EdgeI: + raise TypeError('Edge is an abstract interface') + + # //////////////////////////////////////////////////////////// + # Span + # //////////////////////////////////////////////////////////// + + def span(self): + """ + Return a tuple ``(s, e)``, where ``tokens[s:e]`` is the + portion of the sentence that is consistent with this + edge's structure. + + :rtype: tuple(int, int) + """ + raise NotImplementedError() + + def start(self): + """ + Return the start index of this edge's span. + + :rtype: int + """ + raise NotImplementedError() + + def end(self): + """ + Return the end index of this edge's span. + + :rtype: int + """ + raise NotImplementedError() + + def length(self): + """ + Return the length of this edge's span. + + :rtype: int + """ + raise NotImplementedError() + + # //////////////////////////////////////////////////////////// + # Left Hand Side + # //////////////////////////////////////////////////////////// + + def lhs(self): + """ + Return this edge's left-hand side, which specifies what kind + of structure is hypothesized by this edge. + + :see: ``TreeEdge`` and ``LeafEdge`` for a description of + the left-hand side values for each edge type. + """ + raise NotImplementedError() + + # //////////////////////////////////////////////////////////// + # Right Hand Side + # //////////////////////////////////////////////////////////// + + def rhs(self): + """ + Return this edge's right-hand side, which specifies + the content of the structure hypothesized by this edge. + + :see: ``TreeEdge`` and ``LeafEdge`` for a description of + the right-hand side values for each edge type. + """ + raise NotImplementedError() + + def dot(self): + """ + Return this edge's dot position, which indicates how much of + the hypothesized structure is consistent with the + sentence. In particular, ``self.rhs[:dot]`` is consistent + with ``tokens[self.start():self.end()]``. + + :rtype: int + """ + raise NotImplementedError() + + def nextsym(self): + """ + Return the element of this edge's right-hand side that + immediately follows its dot. + + :rtype: Nonterminal or terminal or None + """ + raise NotImplementedError() + + def is_complete(self): + """ + Return True if this edge's structure is fully consistent + with the text. + + :rtype: bool + """ + raise NotImplementedError() + + def is_incomplete(self): + """ + Return True if this edge's structure is partially consistent + with the text. + + :rtype: bool + """ + raise NotImplementedError() + + # //////////////////////////////////////////////////////////// + # Comparisons & hashing + # //////////////////////////////////////////////////////////// + + def __eq__(self, other): + return ( + self.__class__ is other.__class__ + and self._comparison_key == other._comparison_key + ) + + def __ne__(self, other): + return not self == other + + def __lt__(self, other): + if not isinstance(other, EdgeI): + raise_unorderable_types("<", self, other) + if self.__class__ is other.__class__: + return self._comparison_key < other._comparison_key + else: + return self.__class__.__name__ < other.__class__.__name__ + + def __hash__(self): + try: + return self._hash + except AttributeError: + self._hash = hash(self._comparison_key) + return self._hash + + +@python_2_unicode_compatible +class TreeEdge(EdgeI): + """ + An edge that records the fact that a tree is (partially) + consistent with the sentence. A tree edge consists of: + + - A span, indicating what part of the sentence is + consistent with the hypothesized tree. + - A left-hand side, specifying the hypothesized tree's node + value. + - A right-hand side, specifying the hypothesized tree's + children. Each element of the right-hand side is either a + terminal, specifying a token with that terminal as its leaf + value; or a nonterminal, specifying a subtree with that + nonterminal's symbol as its node value. + - A dot position, indicating which children are consistent + with part of the sentence. In particular, if ``dot`` is the + dot position, ``rhs`` is the right-hand size, ``(start,end)`` + is the span, and ``sentence`` is the list of tokens in the + sentence, then ``tokens[start:end]`` can be spanned by the + children specified by ``rhs[:dot]``. + + For more information about edges, see the ``EdgeI`` interface. + """ + + def __init__(self, span, lhs, rhs, dot=0): + """ + Construct a new ``TreeEdge``. + + :type span: tuple(int, int) + :param span: A tuple ``(s, e)``, where ``tokens[s:e]`` is the + portion of the sentence that is consistent with the new + edge's structure. + :type lhs: Nonterminal + :param lhs: The new edge's left-hand side, specifying the + hypothesized tree's node value. + :type rhs: list(Nonterminal and str) + :param rhs: The new edge's right-hand side, specifying the + hypothesized tree's children. + :type dot: int + :param dot: The position of the new edge's dot. This position + specifies what prefix of the production's right hand side + is consistent with the text. In particular, if + ``sentence`` is the list of tokens in the sentence, then + ``okens[span[0]:span[1]]`` can be spanned by the + children specified by ``rhs[:dot]``. + """ + self._span = span + self._lhs = lhs + rhs = tuple(rhs) + self._rhs = rhs + self._dot = dot + self._comparison_key = (span, lhs, rhs, dot) + + @staticmethod + def from_production(production, index): + """ + Return a new ``TreeEdge`` formed from the given production. + The new edge's left-hand side and right-hand side will + be taken from ``production``; its span will be + ``(index,index)``; and its dot position will be ``0``. + + :rtype: TreeEdge + """ + return TreeEdge( + span=(index, index), lhs=production.lhs(), rhs=production.rhs(), dot=0 + ) + + def move_dot_forward(self, new_end): + """ + Return a new ``TreeEdge`` formed from this edge. + The new edge's dot position is increased by ``1``, + and its end index will be replaced by ``new_end``. + + :param new_end: The new end index. + :type new_end: int + :rtype: TreeEdge + """ + return TreeEdge( + span=(self._span[0], new_end), + lhs=self._lhs, + rhs=self._rhs, + dot=self._dot + 1, + ) + + # Accessors + def lhs(self): + return self._lhs + + def span(self): + return self._span + + def start(self): + return self._span[0] + + def end(self): + return self._span[1] + + def length(self): + return self._span[1] - self._span[0] + + def rhs(self): + return self._rhs + + def dot(self): + return self._dot + + def is_complete(self): + return self._dot == len(self._rhs) + + def is_incomplete(self): + return self._dot != len(self._rhs) + + def nextsym(self): + if self._dot >= len(self._rhs): + return None + else: + return self._rhs[self._dot] + + # String representation + def __str__(self): + str = '[%s:%s] ' % (self._span[0], self._span[1]) + str += '%-2r ->' % (self._lhs,) + + for i in range(len(self._rhs)): + if i == self._dot: + str += ' *' + str += ' %s' % unicode_repr(self._rhs[i]) + if len(self._rhs) == self._dot: + str += ' *' + return str + + def __repr__(self): + return '[Edge: %s]' % self + + +@python_2_unicode_compatible +class LeafEdge(EdgeI): + """ + An edge that records the fact that a leaf value is consistent with + a word in the sentence. A leaf edge consists of: + + - An index, indicating the position of the word. + - A leaf, specifying the word's content. + + A leaf edge's left-hand side is its leaf value, and its right hand + side is ``()``. Its span is ``[index, index+1]``, and its dot + position is ``0``. + """ + + def __init__(self, leaf, index): + """ + Construct a new ``LeafEdge``. + + :param leaf: The new edge's leaf value, specifying the word + that is recorded by this edge. + :param index: The new edge's index, specifying the position of + the word that is recorded by this edge. + """ + self._leaf = leaf + self._index = index + self._comparison_key = (leaf, index) + + # Accessors + def lhs(self): + return self._leaf + + def span(self): + return (self._index, self._index + 1) + + def start(self): + return self._index + + def end(self): + return self._index + 1 + + def length(self): + return 1 + + def rhs(self): + return () + + def dot(self): + return 0 + + def is_complete(self): + return True + + def is_incomplete(self): + return False + + def nextsym(self): + return None + + # String representations + def __str__(self): + return '[%s:%s] %s' % (self._index, self._index + 1, unicode_repr(self._leaf)) + + def __repr__(self): + return '[Edge: %s]' % (self) + + +######################################################################## +## Chart +######################################################################## + + +class Chart(object): + """ + A blackboard for hypotheses about the syntactic constituents of a + sentence. A chart contains a set of edges, and each edge encodes + a single hypothesis about the structure of some portion of the + sentence. + + The ``select`` method can be used to select a specific collection + of edges. For example ``chart.select(is_complete=True, start=0)`` + yields all complete edges whose start indices are 0. To ensure + the efficiency of these selection operations, ``Chart`` dynamically + creates and maintains an index for each set of attributes that + have been selected on. + + In order to reconstruct the trees that are represented by an edge, + the chart associates each edge with a set of child pointer lists. + A child pointer list is a list of the edges that license an + edge's right-hand side. + + :ivar _tokens: The sentence that the chart covers. + :ivar _num_leaves: The number of tokens. + :ivar _edges: A list of the edges in the chart + :ivar _edge_to_cpls: A dictionary mapping each edge to a set + of child pointer lists that are associated with that edge. + :ivar _indexes: A dictionary mapping tuples of edge attributes + to indices, where each index maps the corresponding edge + attribute values to lists of edges. + """ + + def __init__(self, tokens): + """ + Construct a new chart. The chart is initialized with the + leaf edges corresponding to the terminal leaves. + + :type tokens: list + :param tokens: The sentence that this chart will be used to parse. + """ + # Record the sentence token and the sentence length. + self._tokens = tuple(tokens) + self._num_leaves = len(self._tokens) + + # Initialise the chart. + self.initialize() + + def initialize(self): + """ + Clear the chart. + """ + # A list of edges contained in this chart. + self._edges = [] + + # The set of child pointer lists associated with each edge. + self._edge_to_cpls = {} + + # Indexes mapping attribute values to lists of edges + # (used by select()). + self._indexes = {} + + # //////////////////////////////////////////////////////////// + # Sentence Access + # //////////////////////////////////////////////////////////// + + def num_leaves(self): + """ + Return the number of words in this chart's sentence. + + :rtype: int + """ + return self._num_leaves + + def leaf(self, index): + """ + Return the leaf value of the word at the given index. + + :rtype: str + """ + return self._tokens[index] + + def leaves(self): + """ + Return a list of the leaf values of each word in the + chart's sentence. + + :rtype: list(str) + """ + return self._tokens + + # //////////////////////////////////////////////////////////// + # Edge access + # //////////////////////////////////////////////////////////// + + def edges(self): + """ + Return a list of all edges in this chart. New edges + that are added to the chart after the call to edges() + will *not* be contained in this list. + + :rtype: list(EdgeI) + :see: ``iteredges``, ``select`` + """ + return self._edges[:] + + def iteredges(self): + """ + Return an iterator over the edges in this chart. It is + not guaranteed that new edges which are added to the + chart before the iterator is exhausted will also be generated. + + :rtype: iter(EdgeI) + :see: ``edges``, ``select`` + """ + return iter(self._edges) + + # Iterating over the chart yields its edges. + __iter__ = iteredges + + def num_edges(self): + """ + Return the number of edges contained in this chart. + + :rtype: int + """ + return len(self._edge_to_cpls) + + def select(self, **restrictions): + """ + Return an iterator over the edges in this chart. Any + new edges that are added to the chart before the iterator + is exahusted will also be generated. ``restrictions`` + can be used to restrict the set of edges that will be + generated. + + :param span: Only generate edges ``e`` where ``e.span()==span`` + :param start: Only generate edges ``e`` where ``e.start()==start`` + :param end: Only generate edges ``e`` where ``e.end()==end`` + :param length: Only generate edges ``e`` where ``e.length()==length`` + :param lhs: Only generate edges ``e`` where ``e.lhs()==lhs`` + :param rhs: Only generate edges ``e`` where ``e.rhs()==rhs`` + :param nextsym: Only generate edges ``e`` where + ``e.nextsym()==nextsym`` + :param dot: Only generate edges ``e`` where ``e.dot()==dot`` + :param is_complete: Only generate edges ``e`` where + ``e.is_complete()==is_complete`` + :param is_incomplete: Only generate edges ``e`` where + ``e.is_incomplete()==is_incomplete`` + :rtype: iter(EdgeI) + """ + # If there are no restrictions, then return all edges. + if restrictions == {}: + return iter(self._edges) + + # Find the index corresponding to the given restrictions. + restr_keys = sorted(restrictions.keys()) + restr_keys = tuple(restr_keys) + + # If it doesn't exist, then create it. + if restr_keys not in self._indexes: + self._add_index(restr_keys) + + vals = tuple(restrictions[key] for key in restr_keys) + return iter(self._indexes[restr_keys].get(vals, [])) + + def _add_index(self, restr_keys): + """ + A helper function for ``select``, which creates a new index for + a given set of attributes (aka restriction keys). + """ + # Make sure it's a valid index. + for key in restr_keys: + if not hasattr(EdgeI, key): + raise ValueError('Bad restriction: %s' % key) + + # Create the index. + index = self._indexes[restr_keys] = {} + + # Add all existing edges to the index. + for edge in self._edges: + vals = tuple(getattr(edge, key)() for key in restr_keys) + index.setdefault(vals, []).append(edge) + + def _register_with_indexes(self, edge): + """ + A helper function for ``insert``, which registers the new + edge with all existing indexes. + """ + for (restr_keys, index) in self._indexes.items(): + vals = tuple(getattr(edge, key)() for key in restr_keys) + index.setdefault(vals, []).append(edge) + + # //////////////////////////////////////////////////////////// + # Edge Insertion + # //////////////////////////////////////////////////////////// + + def insert_with_backpointer(self, new_edge, previous_edge, child_edge): + """ + Add a new edge to the chart, using a pointer to the previous edge. + """ + cpls = self.child_pointer_lists(previous_edge) + new_cpls = [cpl + (child_edge,) for cpl in cpls] + return self.insert(new_edge, *new_cpls) + + def insert(self, edge, *child_pointer_lists): + """ + Add a new edge to the chart, and return True if this operation + modified the chart. In particular, return true iff the chart + did not already contain ``edge``, or if it did not already associate + ``child_pointer_lists`` with ``edge``. + + :type edge: EdgeI + :param edge: The new edge + :type child_pointer_lists: sequence of tuple(EdgeI) + :param child_pointer_lists: A sequence of lists of the edges that + were used to form this edge. This list is used to reconstruct + the trees (or partial trees) that are associated with ``edge``. + :rtype: bool + """ + # Is it a new edge? + if edge not in self._edge_to_cpls: + # Add it to the list of edges. + self._append_edge(edge) + # Register with indexes. + self._register_with_indexes(edge) + + # Get the set of child pointer lists for this edge. + cpls = self._edge_to_cpls.setdefault(edge, OrderedDict()) + chart_was_modified = False + for child_pointer_list in child_pointer_lists: + child_pointer_list = tuple(child_pointer_list) + if child_pointer_list not in cpls: + # It's a new CPL; register it, and return true. + cpls[child_pointer_list] = True + chart_was_modified = True + return chart_was_modified + + def _append_edge(self, edge): + self._edges.append(edge) + + # //////////////////////////////////////////////////////////// + # Tree extraction & child pointer lists + # //////////////////////////////////////////////////////////// + + def parses(self, root, tree_class=Tree): + """ + Return an iterator of the complete tree structures that span + the entire chart, and whose root node is ``root``. + """ + for edge in self.select(start=0, end=self._num_leaves, lhs=root): + for tree in self.trees(edge, tree_class=tree_class, complete=True): + yield tree + + def trees(self, edge, tree_class=Tree, complete=False): + """ + Return an iterator of the tree structures that are associated + with ``edge``. + + If ``edge`` is incomplete, then the unexpanded children will be + encoded as childless subtrees, whose node value is the + corresponding terminal or nonterminal. + + :rtype: list(Tree) + :note: If two trees share a common subtree, then the same + Tree may be used to encode that subtree in + both trees. If you need to eliminate this subtree + sharing, then create a deep copy of each tree. + """ + return iter(self._trees(edge, complete, memo={}, tree_class=tree_class)) + + def _trees(self, edge, complete, memo, tree_class): + """ + A helper function for ``trees``. + + :param memo: A dictionary used to record the trees that we've + generated for each edge, so that when we see an edge more + than once, we can reuse the same trees. + """ + # If we've seen this edge before, then reuse our old answer. + if edge in memo: + return memo[edge] + + # when we're reading trees off the chart, don't use incomplete edges + if complete and edge.is_incomplete(): + return [] + + # Leaf edges. + if isinstance(edge, LeafEdge): + leaf = self._tokens[edge.start()] + memo[edge] = [leaf] + return [leaf] + + # Until we're done computing the trees for edge, set + # memo[edge] to be empty. This has the effect of filtering + # out any cyclic trees (i.e., trees that contain themselves as + # descendants), because if we reach this edge via a cycle, + # then it will appear that the edge doesn't generate any trees. + memo[edge] = [] + trees = [] + lhs = edge.lhs().symbol() + + # Each child pointer list can be used to form trees. + for cpl in self.child_pointer_lists(edge): + # Get the set of child choices for each child pointer. + # child_choices[i] is the set of choices for the tree's + # ith child. + child_choices = [self._trees(cp, complete, memo, tree_class) for cp in cpl] + + # For each combination of children, add a tree. + for children in itertools.product(*child_choices): + trees.append(tree_class(lhs, children)) + + # If the edge is incomplete, then extend it with "partial trees": + if edge.is_incomplete(): + unexpanded = [tree_class(elt, []) for elt in edge.rhs()[edge.dot() :]] + for tree in trees: + tree.extend(unexpanded) + + # Update the memoization dictionary. + memo[edge] = trees + + # Return the list of trees. + return trees + + def child_pointer_lists(self, edge): + """ + Return the set of child pointer lists for the given edge. + Each child pointer list is a list of edges that have + been used to form this edge. + + :rtype: list(list(EdgeI)) + """ + # Make a copy, in case they modify it. + return self._edge_to_cpls.get(edge, {}).keys() + + # //////////////////////////////////////////////////////////// + # Display + # //////////////////////////////////////////////////////////// + def pretty_format_edge(self, edge, width=None): + """ + Return a pretty-printed string representation of a given edge + in this chart. + + :rtype: str + :param width: The number of characters allotted to each + index in the sentence. + """ + if width is None: + width = 50 // (self.num_leaves() + 1) + (start, end) = (edge.start(), edge.end()) + + str = '|' + ('.' + ' ' * (width - 1)) * start + + # Zero-width edges are "#" if complete, ">" if incomplete + if start == end: + if edge.is_complete(): + str += '#' + else: + str += '>' + + # Spanning complete edges are "[===]"; Other edges are + # "[---]" if complete, "[--->" if incomplete + elif edge.is_complete() and edge.span() == (0, self._num_leaves): + str += '[' + ('=' * width) * (end - start - 1) + '=' * (width - 1) + ']' + elif edge.is_complete(): + str += '[' + ('-' * width) * (end - start - 1) + '-' * (width - 1) + ']' + else: + str += '[' + ('-' * width) * (end - start - 1) + '-' * (width - 1) + '>' + + str += (' ' * (width - 1) + '.') * (self._num_leaves - end) + return str + '| %s' % edge + + def pretty_format_leaves(self, width=None): + """ + Return a pretty-printed string representation of this + chart's leaves. This string can be used as a header + for calls to ``pretty_format_edge``. + """ + if width is None: + width = 50 // (self.num_leaves() + 1) + + if self._tokens is not None and width > 1: + header = '|.' + for tok in self._tokens: + header += tok[: width - 1].center(width - 1) + '.' + header += '|' + else: + header = '' + + return header + + def pretty_format(self, width=None): + """ + Return a pretty-printed string representation of this chart. + + :param width: The number of characters allotted to each + index in the sentence. + :rtype: str + """ + if width is None: + width = 50 // (self.num_leaves() + 1) + # sort edges: primary key=length, secondary key=start index. + # (and filter out the token edges) + edges = sorted([(e.length(), e.start(), e) for e in self]) + edges = [e for (_, _, e) in edges] + + return ( + self.pretty_format_leaves(width) + + '\n' + + '\n'.join(self.pretty_format_edge(edge, width) for edge in edges) + ) + + # //////////////////////////////////////////////////////////// + # Display: Dot (AT&T Graphviz) + # //////////////////////////////////////////////////////////// + + def dot_digraph(self): + # Header + s = 'digraph nltk_chart {\n' + # s += ' size="5,5";\n' + s += ' rankdir=LR;\n' + s += ' node [height=0.1,width=0.1];\n' + s += ' node [style=filled, color="lightgray"];\n' + + # Set up the nodes + for y in range(self.num_edges(), -1, -1): + if y == 0: + s += ' node [style=filled, color="black"];\n' + for x in range(self.num_leaves() + 1): + if y == 0 or ( + x <= self._edges[y - 1].start() or x >= self._edges[y - 1].end() + ): + s += ' %04d.%04d [label=""];\n' % (x, y) + + # Add a spacer + s += ' x [style=invis]; x->0000.0000 [style=invis];\n' + + # Declare ranks. + for x in range(self.num_leaves() + 1): + s += ' {rank=same;' + for y in range(self.num_edges() + 1): + if y == 0 or ( + x <= self._edges[y - 1].start() or x >= self._edges[y - 1].end() + ): + s += ' %04d.%04d' % (x, y) + s += '}\n' + + # Add the leaves + s += ' edge [style=invis, weight=100];\n' + s += ' node [shape=plaintext]\n' + s += ' 0000.0000' + for x in range(self.num_leaves()): + s += '->%s->%04d.0000' % (self.leaf(x), x + 1) + s += ';\n\n' + + # Add the edges + s += ' edge [style=solid, weight=1];\n' + for y, edge in enumerate(self): + for x in range(edge.start()): + s += ' %04d.%04d -> %04d.%04d [style="invis"];\n' % ( + x, + y + 1, + x + 1, + y + 1, + ) + s += ' %04d.%04d -> %04d.%04d [label="%s"];\n' % ( + edge.start(), + y + 1, + edge.end(), + y + 1, + edge, + ) + for x in range(edge.end(), self.num_leaves()): + s += ' %04d.%04d -> %04d.%04d [style="invis"];\n' % ( + x, + y + 1, + x + 1, + y + 1, + ) + s += '}\n' + return s + + +######################################################################## +## Chart Rules +######################################################################## + + +class ChartRuleI(object): + """ + A rule that specifies what new edges are licensed by any given set + of existing edges. Each chart rule expects a fixed number of + edges, as indicated by the class variable ``NUM_EDGES``. In + particular: + + - A chart rule with ``NUM_EDGES=0`` specifies what new edges are + licensed, regardless of existing edges. + - A chart rule with ``NUM_EDGES=1`` specifies what new edges are + licensed by a single existing edge. + - A chart rule with ``NUM_EDGES=2`` specifies what new edges are + licensed by a pair of existing edges. + + :type NUM_EDGES: int + :cvar NUM_EDGES: The number of existing edges that this rule uses + to license new edges. Typically, this number ranges from zero + to two. + """ + + def apply(self, chart, grammar, *edges): + """ + Return a generator that will add edges licensed by this rule + and the given edges to the chart, one at a time. Each + time the generator is resumed, it will either add a new + edge and yield that edge; or return. + + :type edges: list(EdgeI) + :param edges: A set of existing edges. The number of edges + that should be passed to ``apply()`` is specified by the + ``NUM_EDGES`` class variable. + :rtype: iter(EdgeI) + """ + raise NotImplementedError() + + def apply_everywhere(self, chart, grammar): + """ + Return a generator that will add all edges licensed by + this rule, given the edges that are currently in the + chart, one at a time. Each time the generator is resumed, + it will either add a new edge and yield that edge; or return. + + :rtype: iter(EdgeI) + """ + raise NotImplementedError() + + +@python_2_unicode_compatible +class AbstractChartRule(ChartRuleI): + """ + An abstract base class for chart rules. ``AbstractChartRule`` + provides: + + - A default implementation for ``apply``. + - A default implementation for ``apply_everywhere``, + (Currently, this implementation assumes that ``NUM_EDGES``<=3.) + - A default implementation for ``__str__``, which returns a + name based on the rule's class name. + """ + + # Subclasses must define apply. + def apply(self, chart, grammar, *edges): + raise NotImplementedError() + + # Default: loop through the given number of edges, and call + # self.apply() for each set of edges. + def apply_everywhere(self, chart, grammar): + if self.NUM_EDGES == 0: + for new_edge in self.apply(chart, grammar): + yield new_edge + + elif self.NUM_EDGES == 1: + for e1 in chart: + for new_edge in self.apply(chart, grammar, e1): + yield new_edge + + elif self.NUM_EDGES == 2: + for e1 in chart: + for e2 in chart: + for new_edge in self.apply(chart, grammar, e1, e2): + yield new_edge + + elif self.NUM_EDGES == 3: + for e1 in chart: + for e2 in chart: + for e3 in chart: + for new_edge in self.apply(chart, grammar, e1, e2, e3): + yield new_edge + + else: + raise AssertionError('NUM_EDGES>3 is not currently supported') + + # Default: return a name based on the class name. + def __str__(self): + # Add spaces between InitialCapsWords. + return re.sub('([a-z])([A-Z])', r'\1 \2', self.__class__.__name__) + + +# //////////////////////////////////////////////////////////// +# Fundamental Rule +# //////////////////////////////////////////////////////////// + + +class FundamentalRule(AbstractChartRule): + """ + A rule that joins two adjacent edges to form a single combined + edge. In particular, this rule specifies that any pair of edges + + - ``[A -> alpha \* B beta][i:j]`` + - ``[B -> gamma \*][j:k]`` + + licenses the edge: + + - ``[A -> alpha B * beta][i:j]`` + """ + + NUM_EDGES = 2 + + def apply(self, chart, grammar, left_edge, right_edge): + # Make sure the rule is applicable. + if not ( + left_edge.is_incomplete() + and right_edge.is_complete() + and left_edge.end() == right_edge.start() + and left_edge.nextsym() == right_edge.lhs() + ): + return + + # Construct the new edge. + new_edge = left_edge.move_dot_forward(right_edge.end()) + + # Insert it into the chart. + if chart.insert_with_backpointer(new_edge, left_edge, right_edge): + yield new_edge + + +class SingleEdgeFundamentalRule(FundamentalRule): + """ + A rule that joins a given edge with adjacent edges in the chart, + to form combined edges. In particular, this rule specifies that + either of the edges: + + - ``[A -> alpha \* B beta][i:j]`` + - ``[B -> gamma \*][j:k]`` + + licenses the edge: + + - ``[A -> alpha B * beta][i:j]`` + + if the other edge is already in the chart. + + :note: This is basically ``FundamentalRule``, with one edge left + unspecified. + """ + + NUM_EDGES = 1 + + def apply(self, chart, grammar, edge): + if edge.is_incomplete(): + for new_edge in self._apply_incomplete(chart, grammar, edge): + yield new_edge + else: + for new_edge in self._apply_complete(chart, grammar, edge): + yield new_edge + + def _apply_complete(self, chart, grammar, right_edge): + for left_edge in chart.select( + end=right_edge.start(), is_complete=False, nextsym=right_edge.lhs() + ): + new_edge = left_edge.move_dot_forward(right_edge.end()) + if chart.insert_with_backpointer(new_edge, left_edge, right_edge): + yield new_edge + + def _apply_incomplete(self, chart, grammar, left_edge): + for right_edge in chart.select( + start=left_edge.end(), is_complete=True, lhs=left_edge.nextsym() + ): + new_edge = left_edge.move_dot_forward(right_edge.end()) + if chart.insert_with_backpointer(new_edge, left_edge, right_edge): + yield new_edge + + +# //////////////////////////////////////////////////////////// +# Inserting Terminal Leafs +# //////////////////////////////////////////////////////////// + + +class LeafInitRule(AbstractChartRule): + NUM_EDGES = 0 + + def apply(self, chart, grammar): + for index in range(chart.num_leaves()): + new_edge = LeafEdge(chart.leaf(index), index) + if chart.insert(new_edge, ()): + yield new_edge + + +# //////////////////////////////////////////////////////////// +# Top-Down Prediction +# //////////////////////////////////////////////////////////// + + +class TopDownInitRule(AbstractChartRule): + """ + A rule licensing edges corresponding to the grammar productions for + the grammar's start symbol. In particular, this rule specifies that + ``[S -> \* alpha][0:i]`` is licensed for each grammar production + ``S -> alpha``, where ``S`` is the grammar's start symbol. + """ + + NUM_EDGES = 0 + + def apply(self, chart, grammar): + for prod in grammar.productions(lhs=grammar.start()): + new_edge = TreeEdge.from_production(prod, 0) + if chart.insert(new_edge, ()): + yield new_edge + + +class TopDownPredictRule(AbstractChartRule): + """ + A rule licensing edges corresponding to the grammar productions + for the nonterminal following an incomplete edge's dot. In + particular, this rule specifies that + ``[A -> alpha \* B beta][i:j]`` licenses the edge + ``[B -> \* gamma][j:j]`` for each grammar production ``B -> gamma``. + + :note: This rule corresponds to the Predictor Rule in Earley parsing. + """ + + NUM_EDGES = 1 + + def apply(self, chart, grammar, edge): + if edge.is_complete(): + return + for prod in grammar.productions(lhs=edge.nextsym()): + new_edge = TreeEdge.from_production(prod, edge.end()) + if chart.insert(new_edge, ()): + yield new_edge + + +class CachedTopDownPredictRule(TopDownPredictRule): + """ + A cached version of ``TopDownPredictRule``. After the first time + this rule is applied to an edge with a given ``end`` and ``next``, + it will not generate any more edges for edges with that ``end`` and + ``next``. + + If ``chart`` or ``grammar`` are changed, then the cache is flushed. + """ + + def __init__(self): + TopDownPredictRule.__init__(self) + self._done = {} + + def apply(self, chart, grammar, edge): + if edge.is_complete(): + return + nextsym, index = edge.nextsym(), edge.end() + if not is_nonterminal(nextsym): + return + + # If we've already applied this rule to an edge with the same + # next & end, and the chart & grammar have not changed, then + # just return (no new edges to add). + done = self._done.get((nextsym, index), (None, None)) + if done[0] is chart and done[1] is grammar: + return + + # Add all the edges indicated by the top down expand rule. + for prod in grammar.productions(lhs=nextsym): + # If the left corner in the predicted production is + # leaf, it must match with the input. + if prod.rhs(): + first = prod.rhs()[0] + if is_terminal(first): + if index >= chart.num_leaves() or first != chart.leaf(index): + continue + + new_edge = TreeEdge.from_production(prod, index) + if chart.insert(new_edge, ()): + yield new_edge + + # Record the fact that we've applied this rule. + self._done[nextsym, index] = (chart, grammar) + + +# //////////////////////////////////////////////////////////// +# Bottom-Up Prediction +# //////////////////////////////////////////////////////////// + + +class BottomUpPredictRule(AbstractChartRule): + """ + A rule licensing any edge corresponding to a production whose + right-hand side begins with a complete edge's left-hand side. In + particular, this rule specifies that ``[A -> alpha \*]`` licenses + the edge ``[B -> \* A beta]`` for each grammar production ``B -> A beta``. + """ + + NUM_EDGES = 1 + + def apply(self, chart, grammar, edge): + if edge.is_incomplete(): + return + for prod in grammar.productions(rhs=edge.lhs()): + new_edge = TreeEdge.from_production(prod, edge.start()) + if chart.insert(new_edge, ()): + yield new_edge + + +class BottomUpPredictCombineRule(BottomUpPredictRule): + """ + A rule licensing any edge corresponding to a production whose + right-hand side begins with a complete edge's left-hand side. In + particular, this rule specifies that ``[A -> alpha \*]`` + licenses the edge ``[B -> A \* beta]`` for each grammar + production ``B -> A beta``. + + :note: This is like ``BottomUpPredictRule``, but it also applies + the ``FundamentalRule`` to the resulting edge. + """ + + NUM_EDGES = 1 + + def apply(self, chart, grammar, edge): + if edge.is_incomplete(): + return + for prod in grammar.productions(rhs=edge.lhs()): + new_edge = TreeEdge(edge.span(), prod.lhs(), prod.rhs(), 1) + if chart.insert(new_edge, (edge,)): + yield new_edge + + +class EmptyPredictRule(AbstractChartRule): + """ + A rule that inserts all empty productions as passive edges, + in every position in the chart. + """ + + NUM_EDGES = 0 + + def apply(self, chart, grammar): + for prod in grammar.productions(empty=True): + for index in range(chart.num_leaves() + 1): + new_edge = TreeEdge.from_production(prod, index) + if chart.insert(new_edge, ()): + yield new_edge + + +######################################################################## +## Filtered Bottom Up +######################################################################## + + +class FilteredSingleEdgeFundamentalRule(SingleEdgeFundamentalRule): + def _apply_complete(self, chart, grammar, right_edge): + end = right_edge.end() + nexttoken = end < chart.num_leaves() and chart.leaf(end) + for left_edge in chart.select( + end=right_edge.start(), is_complete=False, nextsym=right_edge.lhs() + ): + if _bottomup_filter(grammar, nexttoken, left_edge.rhs(), left_edge.dot()): + new_edge = left_edge.move_dot_forward(right_edge.end()) + if chart.insert_with_backpointer(new_edge, left_edge, right_edge): + yield new_edge + + def _apply_incomplete(self, chart, grammar, left_edge): + for right_edge in chart.select( + start=left_edge.end(), is_complete=True, lhs=left_edge.nextsym() + ): + end = right_edge.end() + nexttoken = end < chart.num_leaves() and chart.leaf(end) + if _bottomup_filter(grammar, nexttoken, left_edge.rhs(), left_edge.dot()): + new_edge = left_edge.move_dot_forward(right_edge.end()) + if chart.insert_with_backpointer(new_edge, left_edge, right_edge): + yield new_edge + + +class FilteredBottomUpPredictCombineRule(BottomUpPredictCombineRule): + def apply(self, chart, grammar, edge): + if edge.is_incomplete(): + return + + end = edge.end() + nexttoken = end < chart.num_leaves() and chart.leaf(end) + for prod in grammar.productions(rhs=edge.lhs()): + if _bottomup_filter(grammar, nexttoken, prod.rhs()): + new_edge = TreeEdge(edge.span(), prod.lhs(), prod.rhs(), 1) + if chart.insert(new_edge, (edge,)): + yield new_edge + + +def _bottomup_filter(grammar, nexttoken, rhs, dot=0): + if len(rhs) <= dot + 1: + return True + _next = rhs[dot + 1] + if is_terminal(_next): + return nexttoken == _next + else: + return grammar.is_leftcorner(_next, nexttoken) + + +######################################################################## +## Generic Chart Parser +######################################################################## + +TD_STRATEGY = [ + LeafInitRule(), + TopDownInitRule(), + CachedTopDownPredictRule(), + SingleEdgeFundamentalRule(), +] +BU_STRATEGY = [ + LeafInitRule(), + EmptyPredictRule(), + BottomUpPredictRule(), + SingleEdgeFundamentalRule(), +] +BU_LC_STRATEGY = [ + LeafInitRule(), + EmptyPredictRule(), + BottomUpPredictCombineRule(), + SingleEdgeFundamentalRule(), +] + +LC_STRATEGY = [ + LeafInitRule(), + FilteredBottomUpPredictCombineRule(), + FilteredSingleEdgeFundamentalRule(), +] + + +class ChartParser(ParserI): + """ + A generic chart parser. A "strategy", or list of + ``ChartRuleI`` instances, is used to decide what edges to add to + the chart. In particular, ``ChartParser`` uses the following + algorithm to parse texts: + + | Until no new edges are added: + | For each *rule* in *strategy*: + | Apply *rule* to any applicable edges in the chart. + | Return any complete parses in the chart + """ + + def __init__( + self, + grammar, + strategy=BU_LC_STRATEGY, + trace=0, + trace_chart_width=50, + use_agenda=True, + chart_class=Chart, + ): + """ + Create a new chart parser, that uses ``grammar`` to parse + texts. + + :type grammar: CFG + :param grammar: The grammar used to parse texts. + :type strategy: list(ChartRuleI) + :param strategy: A list of rules that should be used to decide + what edges to add to the chart (top-down strategy by default). + :type trace: int + :param trace: The level of tracing that should be used when + parsing a text. ``0`` will generate no tracing output; + and higher numbers will produce more verbose tracing + output. + :type trace_chart_width: int + :param trace_chart_width: The default total width reserved for + the chart in trace output. The remainder of each line will + be used to display edges. + :type use_agenda: bool + :param use_agenda: Use an optimized agenda-based algorithm, + if possible. + :param chart_class: The class that should be used to create + the parse charts. + """ + self._grammar = grammar + self._strategy = strategy + self._trace = trace + self._trace_chart_width = trace_chart_width + # If the strategy only consists of axioms (NUM_EDGES==0) and + # inference rules (NUM_EDGES==1), we can use an agenda-based algorithm: + self._use_agenda = use_agenda + self._chart_class = chart_class + + self._axioms = [] + self._inference_rules = [] + for rule in strategy: + if rule.NUM_EDGES == 0: + self._axioms.append(rule) + elif rule.NUM_EDGES == 1: + self._inference_rules.append(rule) + else: + self._use_agenda = False + + def grammar(self): + return self._grammar + + def _trace_new_edges(self, chart, rule, new_edges, trace, edge_width): + if not trace: + return + print_rule_header = trace > 1 + for edge in new_edges: + if print_rule_header: + print('%s:' % rule) + print_rule_header = False + print(chart.pretty_format_edge(edge, edge_width)) + + def chart_parse(self, tokens, trace=None): + """ + Return the final parse ``Chart`` from which all possible + parse trees can be extracted. + + :param tokens: The sentence to be parsed + :type tokens: list(str) + :rtype: Chart + """ + if trace is None: + trace = self._trace + trace_new_edges = self._trace_new_edges + + tokens = list(tokens) + self._grammar.check_coverage(tokens) + chart = self._chart_class(tokens) + grammar = self._grammar + + # Width, for printing trace edges. + trace_edge_width = self._trace_chart_width // (chart.num_leaves() + 1) + if trace: + print(chart.pretty_format_leaves(trace_edge_width)) + + if self._use_agenda: + # Use an agenda-based algorithm. + for axiom in self._axioms: + new_edges = list(axiom.apply(chart, grammar)) + trace_new_edges(chart, axiom, new_edges, trace, trace_edge_width) + + inference_rules = self._inference_rules + agenda = chart.edges() + # We reverse the initial agenda, since it is a stack + # but chart.edges() functions as a queue. + agenda.reverse() + while agenda: + edge = agenda.pop() + for rule in inference_rules: + new_edges = list(rule.apply(chart, grammar, edge)) + if trace: + trace_new_edges(chart, rule, new_edges, trace, trace_edge_width) + agenda += new_edges + + else: + # Do not use an agenda-based algorithm. + edges_added = True + while edges_added: + edges_added = False + for rule in self._strategy: + new_edges = list(rule.apply_everywhere(chart, grammar)) + edges_added = len(new_edges) + trace_new_edges(chart, rule, new_edges, trace, trace_edge_width) + + # Return the final chart. + return chart + + def parse(self, tokens, tree_class=Tree): + chart = self.chart_parse(tokens) + return iter(chart.parses(self._grammar.start(), tree_class=tree_class)) + + +class TopDownChartParser(ChartParser): + """ + A ``ChartParser`` using a top-down parsing strategy. + See ``ChartParser`` for more information. + """ + + def __init__(self, grammar, **parser_args): + ChartParser.__init__(self, grammar, TD_STRATEGY, **parser_args) + + +class BottomUpChartParser(ChartParser): + """ + A ``ChartParser`` using a bottom-up parsing strategy. + See ``ChartParser`` for more information. + """ + + def __init__(self, grammar, **parser_args): + if isinstance(grammar, PCFG): + warnings.warn( + "BottomUpChartParser only works for CFG, " + "use BottomUpProbabilisticChartParser instead", + category=DeprecationWarning, + ) + ChartParser.__init__(self, grammar, BU_STRATEGY, **parser_args) + + +class BottomUpLeftCornerChartParser(ChartParser): + """ + A ``ChartParser`` using a bottom-up left-corner parsing strategy. + This strategy is often more efficient than standard bottom-up. + See ``ChartParser`` for more information. + """ + + def __init__(self, grammar, **parser_args): + ChartParser.__init__(self, grammar, BU_LC_STRATEGY, **parser_args) + + +class LeftCornerChartParser(ChartParser): + def __init__(self, grammar, **parser_args): + if not grammar.is_nonempty(): + raise ValueError( + "LeftCornerParser only works for grammars " "without empty productions." + ) + ChartParser.__init__(self, grammar, LC_STRATEGY, **parser_args) + + +######################################################################## +## Stepping Chart Parser +######################################################################## + + +class SteppingChartParser(ChartParser): + """ + A ``ChartParser`` that allows you to step through the parsing + process, adding a single edge at a time. It also allows you to + change the parser's strategy or grammar midway through parsing a + text. + + The ``initialize`` method is used to start parsing a text. ``step`` + adds a single edge to the chart. ``set_strategy`` changes the + strategy used by the chart parser. ``parses`` returns the set of + parses that has been found by the chart parser. + + :ivar _restart: Records whether the parser's strategy, grammar, + or chart has been changed. If so, then ``step`` must restart + the parsing algorithm. + """ + + def __init__(self, grammar, strategy=[], trace=0): + self._chart = None + self._current_chartrule = None + self._restart = False + ChartParser.__init__(self, grammar, strategy, trace) + + # //////////////////////////////////////////////////////////// + # Initialization + # //////////////////////////////////////////////////////////// + + def initialize(self, tokens): + "Begin parsing the given tokens." + self._chart = Chart(list(tokens)) + self._restart = True + + # //////////////////////////////////////////////////////////// + # Stepping + # //////////////////////////////////////////////////////////// + + def step(self): + """ + Return a generator that adds edges to the chart, one at a + time. Each time the generator is resumed, it adds a single + edge and yields that edge. If no more edges can be added, + then it yields None. + + If the parser's strategy, grammar, or chart is changed, then + the generator will continue adding edges using the new + strategy, grammar, or chart. + + Note that this generator never terminates, since the grammar + or strategy might be changed to values that would add new + edges. Instead, it yields None when no more edges can be + added with the current strategy and grammar. + """ + if self._chart is None: + raise ValueError('Parser must be initialized first') + while True: + self._restart = False + w = 50 // (self._chart.num_leaves() + 1) + + for e in self._parse(): + if self._trace > 1: + print(self._current_chartrule) + if self._trace > 0: + print(self._chart.pretty_format_edge(e, w)) + yield e + if self._restart: + break + else: + yield None # No more edges. + + def _parse(self): + """ + A generator that implements the actual parsing algorithm. + ``step`` iterates through this generator, and restarts it + whenever the parser's strategy, grammar, or chart is modified. + """ + chart = self._chart + grammar = self._grammar + edges_added = 1 + while edges_added > 0: + edges_added = 0 + for rule in self._strategy: + self._current_chartrule = rule + for e in rule.apply_everywhere(chart, grammar): + edges_added += 1 + yield e + + # //////////////////////////////////////////////////////////// + # Accessors + # //////////////////////////////////////////////////////////// + + def strategy(self): + "Return the strategy used by this parser." + return self._strategy + + def grammar(self): + "Return the grammar used by this parser." + return self._grammar + + def chart(self): + "Return the chart that is used by this parser." + return self._chart + + def current_chartrule(self): + "Return the chart rule used to generate the most recent edge." + return self._current_chartrule + + def parses(self, tree_class=Tree): + "Return the parse trees currently contained in the chart." + return self._chart.parses(self._grammar.start(), tree_class) + + # //////////////////////////////////////////////////////////// + # Parser modification + # //////////////////////////////////////////////////////////// + + def set_strategy(self, strategy): + """ + Change the strategy that the parser uses to decide which edges + to add to the chart. + + :type strategy: list(ChartRuleI) + :param strategy: A list of rules that should be used to decide + what edges to add to the chart. + """ + if strategy == self._strategy: + return + self._strategy = strategy[:] # Make a copy. + self._restart = True + + def set_grammar(self, grammar): + "Change the grammar used by the parser." + if grammar is self._grammar: + return + self._grammar = grammar + self._restart = True + + def set_chart(self, chart): + "Load a given chart into the chart parser." + if chart is self._chart: + return + self._chart = chart + self._restart = True + + # //////////////////////////////////////////////////////////// + # Standard parser methods + # //////////////////////////////////////////////////////////// + + def parse(self, tokens, tree_class=Tree): + tokens = list(tokens) + self._grammar.check_coverage(tokens) + + # Initialize ourselves. + self.initialize(tokens) + + # Step until no more edges are generated. + for e in self.step(): + if e is None: + break + + # Return an iterator of complete parses. + return self.parses(tree_class=tree_class) + + +######################################################################## +## Demo Code +######################################################################## + + +def demo_grammar(): + from nltk.grammar import CFG + + return CFG.fromstring( + """ +S -> NP VP +PP -> "with" NP +NP -> NP PP +VP -> VP PP +VP -> Verb NP +VP -> Verb +NP -> Det Noun +NP -> "John" +NP -> "I" +Det -> "the" +Det -> "my" +Det -> "a" +Noun -> "dog" +Noun -> "cookie" +Verb -> "ate" +Verb -> "saw" +Prep -> "with" +Prep -> "under" +""" + ) + + +def demo( + choice=None, + print_times=True, + print_grammar=False, + print_trees=True, + trace=2, + sent='I saw John with a dog with my cookie', + numparses=5, +): + """ + A demonstration of the chart parsers. + """ + import sys, time + from nltk import nonterminals, Production, CFG + + # The grammar for ChartParser and SteppingChartParser: + grammar = demo_grammar() + if print_grammar: + print("* Grammar") + print(grammar) + + # Tokenize the sample sentence. + print("* Sentence:") + print(sent) + tokens = sent.split() + print(tokens) + print() + + # Ask the user which parser to test, + # if the parser wasn't provided as an argument + if choice is None: + print(' 1: Top-down chart parser') + print(' 2: Bottom-up chart parser') + print(' 3: Bottom-up left-corner chart parser') + print(' 4: Left-corner chart parser with bottom-up filter') + print(' 5: Stepping chart parser (alternating top-down & bottom-up)') + print(' 6: All parsers') + print('\nWhich parser (1-6)? ', end=' ') + choice = sys.stdin.readline().strip() + print() + + choice = str(choice) + if choice not in "123456": + print('Bad parser number') + return + + # Keep track of how long each parser takes. + times = {} + + strategies = { + '1': ('Top-down', TD_STRATEGY), + '2': ('Bottom-up', BU_STRATEGY), + '3': ('Bottom-up left-corner', BU_LC_STRATEGY), + '4': ('Filtered left-corner', LC_STRATEGY), + } + choices = [] + if choice in strategies: + choices = [choice] + if choice == '6': + choices = "1234" + + # Run the requested chart parser(s), except the stepping parser. + for strategy in choices: + print("* Strategy: " + strategies[strategy][0]) + print() + cp = ChartParser(grammar, strategies[strategy][1], trace=trace) + t = time.time() + chart = cp.chart_parse(tokens) + parses = list(chart.parses(grammar.start())) + + times[strategies[strategy][0]] = time.time() - t + print("Nr edges in chart:", len(chart.edges())) + if numparses: + assert len(parses) == numparses, 'Not all parses found' + if print_trees: + for tree in parses: + print(tree) + else: + print("Nr trees:", len(parses)) + print() + + # Run the stepping parser, if requested. + if choice in "56": + print("* Strategy: Stepping (top-down vs bottom-up)") + print() + t = time.time() + cp = SteppingChartParser(grammar, trace=trace) + cp.initialize(tokens) + for i in range(5): + print('*** SWITCH TO TOP DOWN') + cp.set_strategy(TD_STRATEGY) + for j, e in enumerate(cp.step()): + if j > 20 or e is None: + break + print('*** SWITCH TO BOTTOM UP') + cp.set_strategy(BU_STRATEGY) + for j, e in enumerate(cp.step()): + if j > 20 or e is None: + break + times['Stepping'] = time.time() - t + print("Nr edges in chart:", len(cp.chart().edges())) + if numparses: + assert len(list(cp.parses())) == numparses, 'Not all parses found' + if print_trees: + for tree in cp.parses(): + print(tree) + else: + print("Nr trees:", len(list(cp.parses()))) + print() + + # Print the times of all parsers: + if not (print_times and times): + return + print("* Parsing times") + print() + maxlen = max(len(key) for key in times) + format = '%' + repr(maxlen) + 's parser: %6.3fsec' + times_items = times.items() + for (parser, t) in sorted(times_items, key=lambda a: a[1]): + print(format % (parser, t)) + + +if __name__ == '__main__': + demo() diff --git a/venv.bak/lib/python3.7/site-packages/nltk/parse/corenlp.py b/venv.bak/lib/python3.7/site-packages/nltk/parse/corenlp.py new file mode 100644 index 0000000..b9838ba --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/parse/corenlp.py @@ -0,0 +1,775 @@ +# -*- coding: utf-8 -*- +# Natural Language Toolkit: Interface to the CoreNLP REST API. +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Dmitrijs Milajevs +# +# URL: +# For license information, see LICENSE.TXT + +from __future__ import unicode_literals + +import re +import json +import time +import socket + +from nltk.internals import find_jar_iter, config_java, java, _java_options + +from nltk.tag.api import TaggerI +from nltk.parse.api import ParserI +from nltk.tokenize.api import TokenizerI +from nltk.parse.dependencygraph import DependencyGraph +from nltk.tree import Tree + +from unittest import skip + +_stanford_url = 'http://stanfordnlp.github.io/CoreNLP/' + + +class CoreNLPServerError(EnvironmentError): + """Exceptions associated with the Core NLP server.""" + + +def try_port(port=0): + sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + sock.bind(('', port)) + + p = sock.getsockname()[1] + sock.close() + + return p + + +class CoreNLPServer(object): + + _MODEL_JAR_PATTERN = r'stanford-corenlp-(\d+)\.(\d+)\.(\d+)-models\.jar' + _JAR = r'stanford-corenlp-(\d+)\.(\d+)\.(\d+)\.jar' + + def __init__( + self, + path_to_jar=None, + path_to_models_jar=None, + verbose=False, + java_options=None, + corenlp_options=None, + port=None, + ): + + if corenlp_options is None: + corenlp_options = ['-preload', 'tokenize,ssplit,pos,lemma,parse,depparse'] + + jars = list( + find_jar_iter( + self._JAR, + path_to_jar, + env_vars=('CORENLP',), + searchpath=(), + url=_stanford_url, + verbose=verbose, + is_regex=True, + ) + ) + + # find the most recent code and model jar + stanford_jar = max(jars, key=lambda model_name: re.match(self._JAR, model_name)) + + if port is None: + try: + port = try_port(9000) + except socket.error: + port = try_port() + corenlp_options.append(str(port)) + else: + try_port(port) + + self.url = 'http://localhost:{}'.format(port) + + model_jar = max( + find_jar_iter( + self._MODEL_JAR_PATTERN, + path_to_models_jar, + env_vars=('CORENLP_MODELS',), + searchpath=(), + url=_stanford_url, + verbose=verbose, + is_regex=True, + ), + key=lambda model_name: re.match(self._MODEL_JAR_PATTERN, model_name), + ) + + self.verbose = verbose + + self._classpath = stanford_jar, model_jar + + self.corenlp_options = corenlp_options + self.java_options = java_options or ['-mx2g'] + + def start(self, stdout='devnull', stderr='devnull'): + """ Starts the CoreNLP server + + :param stdout, stderr: Specifies where CoreNLP output is redirected. Valid values are 'devnull', 'stdout', 'pipe' + """ + import requests + + cmd = ['edu.stanford.nlp.pipeline.StanfordCoreNLPServer'] + + if self.corenlp_options: + cmd.extend(self.corenlp_options) + + # Configure java. + default_options = ' '.join(_java_options) + config_java(options=self.java_options, verbose=self.verbose) + + try: + self.popen = java( + cmd, + classpath=self._classpath, + blocking=False, + stdout=stdout, + stderr=stderr, + ) + finally: + # Return java configurations to their default values. + config_java(options=default_options, verbose=self.verbose) + + # Check that the server is istill running. + returncode = self.popen.poll() + if returncode is not None: + _, stderrdata = self.popen.communicate() + raise CoreNLPServerError( + returncode, + 'Could not start the server. ' + 'The error was: {}'.format(stderrdata.decode('ascii')), + ) + + for i in range(30): + try: + response = requests.get(requests.compat.urljoin(self.url, 'live')) + except requests.exceptions.ConnectionError: + time.sleep(1) + else: + if response.ok: + break + else: + raise CoreNLPServerError('Could not connect to the server.') + + for i in range(60): + try: + response = requests.get(requests.compat.urljoin(self.url, 'ready')) + except requests.exceptions.ConnectionError: + time.sleep(1) + else: + if response.ok: + break + else: + raise CoreNLPServerError('The server is not ready.') + + def stop(self): + self.popen.terminate() + self.popen.wait() + + def __enter__(self): + self.start() + + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + self.stop() + return False + + +class GenericCoreNLPParser(ParserI, TokenizerI, TaggerI): + """Interface to the CoreNLP Parser.""" + + def __init__(self, url='http://localhost:9000', encoding='utf8', tagtype=None): + import requests + + self.url = url + self.encoding = encoding + + if tagtype not in ['pos', 'ner', None]: + raise ValueError("tagtype must be either 'pos', 'ner' or None") + + self.tagtype = tagtype + + self.session = requests.Session() + + def parse_sents(self, sentences, *args, **kwargs): + """Parse multiple sentences. + + Takes multiple sentences as a list where each sentence is a list of + words. Each sentence will be automatically tagged with this + CoreNLPParser instance's tagger. + + If a whitespace exists inside a token, then the token will be treated as + several tokens. + + :param sentences: Input sentences to parse + :type sentences: list(list(str)) + :rtype: iter(iter(Tree)) + """ + # Converting list(list(str)) -> list(str) + sentences = (' '.join(words) for words in sentences) + return self.raw_parse_sents(sentences, *args, **kwargs) + + def raw_parse(self, sentence, properties=None, *args, **kwargs): + """Parse a sentence. + + Takes a sentence as a string; before parsing, it will be automatically + tokenized and tagged by the CoreNLP Parser. + + :param sentence: Input sentence to parse + :type sentence: str + :rtype: iter(Tree) + """ + default_properties = {'tokenize.whitespace': 'false'} + default_properties.update(properties or {}) + + return next( + self.raw_parse_sents( + [sentence], properties=default_properties, *args, **kwargs + ) + ) + + def api_call(self, data, properties=None, timeout=60): + default_properties = { + 'outputFormat': 'json', + 'annotators': 'tokenize,pos,lemma,ssplit,{parser_annotator}'.format( + parser_annotator=self.parser_annotator + ), + } + + default_properties.update(properties or {}) + + response = self.session.post( + self.url, + params={'properties': json.dumps(default_properties)}, + data=data.encode(self.encoding), + timeout=timeout, + ) + + response.raise_for_status() + + return response.json() + + def raw_parse_sents( + self, sentences, verbose=False, properties=None, *args, **kwargs + ): + """Parse multiple sentences. + + Takes multiple sentences as a list of strings. Each sentence will be + automatically tokenized and tagged. + + :param sentences: Input sentences to parse. + :type sentences: list(str) + :rtype: iter(iter(Tree)) + + """ + default_properties = { + # Only splits on '\n', never inside the sentence. + 'ssplit.eolonly': 'true' + } + + default_properties.update(properties or {}) + + """ + for sentence in sentences: + parsed_data = self.api_call(sentence, properties=default_properties) + + assert len(parsed_data['sentences']) == 1 + + for parse in parsed_data['sentences']: + tree = self.make_tree(parse) + yield iter([tree]) + """ + parsed_data = self.api_call('\n'.join(sentences), properties=default_properties) + for parsed_sent in parsed_data['sentences']: + tree = self.make_tree(parsed_sent) + yield iter([tree]) + + def parse_text(self, text, *args, **kwargs): + """Parse a piece of text. + + The text might contain several sentences which will be split by CoreNLP. + + :param str text: text to be split. + :returns: an iterable of syntactic structures. # TODO: should it be an iterable of iterables? + + """ + parsed_data = self.api_call(text, *args, **kwargs) + + for parse in parsed_data['sentences']: + yield self.make_tree(parse) + + def tokenize(self, text, properties=None): + """Tokenize a string of text. + + >>> parser = CoreNLPParser(url='http://localhost:9000') + + >>> text = 'Good muffins cost $3.88\\nin New York. Please buy me\\ntwo of them.\\nThanks.' + >>> list(parser.tokenize(text)) + ['Good', 'muffins', 'cost', '$', '3.88', 'in', 'New', 'York', '.', 'Please', 'buy', 'me', 'two', 'of', 'them', '.', 'Thanks', '.'] + + >>> s = "The colour of the wall is blue." + >>> list( + ... parser.tokenize( + ... 'The colour of the wall is blue.', + ... properties={'tokenize.options': 'americanize=true'}, + ... ) + ... ) + ['The', 'color', 'of', 'the', 'wall', 'is', 'blue', '.'] + + """ + default_properties = {'annotators': 'tokenize,ssplit'} + + default_properties.update(properties or {}) + + result = self.api_call(text, properties=default_properties) + + for sentence in result['sentences']: + for token in sentence['tokens']: + yield token['originalText'] or token['word'] + + def tag_sents(self, sentences): + """ + Tag multiple sentences. + + Takes multiple sentences as a list where each sentence is a list of + tokens. + + :param sentences: Input sentences to tag + :type sentences: list(list(str)) + :rtype: list(list(tuple(str, str)) + """ + # Converting list(list(str)) -> list(str) + sentences = (' '.join(words) for words in sentences) + return [sentences[0] for sentences in self.raw_tag_sents(sentences)] + + def tag(self, sentence): + """ + Tag a list of tokens. + + :rtype: list(tuple(str, str)) + + >>> parser = CoreNLPParser(url='http://localhost:9000', tagtype='ner') + >>> tokens = 'Rami Eid is studying at Stony Brook University in NY'.split() + >>> parser.tag(tokens) + [('Rami', 'PERSON'), ('Eid', 'PERSON'), ('is', 'O'), ('studying', 'O'), ('at', 'O'), ('Stony', 'ORGANIZATION'), + ('Brook', 'ORGANIZATION'), ('University', 'ORGANIZATION'), ('in', 'O'), ('NY', 'O')] + + >>> parser = CoreNLPParser(url='http://localhost:9000', tagtype='pos') + >>> tokens = "What is the airspeed of an unladen swallow ?".split() + >>> parser.tag(tokens) + [('What', 'WP'), ('is', 'VBZ'), ('the', 'DT'), + ('airspeed', 'NN'), ('of', 'IN'), ('an', 'DT'), + ('unladen', 'JJ'), ('swallow', 'VB'), ('?', '.')] + """ + return self.tag_sents([sentence])[0] + + def raw_tag_sents(self, sentences): + """ + Tag multiple sentences. + + Takes multiple sentences as a list where each sentence is a string. + + :param sentences: Input sentences to tag + :type sentences: list(str) + :rtype: list(list(list(tuple(str, str))) + """ + default_properties = { + 'ssplit.isOneSentence': 'true', + 'annotators': 'tokenize,ssplit,', + } + + # Supports only 'pos' or 'ner' tags. + assert self.tagtype in ['pos', 'ner'] + default_properties['annotators'] += self.tagtype + for sentence in sentences: + tagged_data = self.api_call(sentence, properties=default_properties) + yield [ + [ + (token['word'], token[self.tagtype]) + for token in tagged_sentence['tokens'] + ] + for tagged_sentence in tagged_data['sentences'] + ] + + +class CoreNLPParser(GenericCoreNLPParser): + """ + >>> parser = CoreNLPParser(url='http://localhost:9000') + + >>> next( + ... parser.raw_parse('The quick brown fox jumps over the lazy dog.') + ... ).pretty_print() # doctest: +NORMALIZE_WHITESPACE + ROOT + | + S + _______________|__________________________ + | VP | + | _________|___ | + | | PP | + | | ________|___ | + NP | | NP | + ____|__________ | | _______|____ | + DT JJ JJ NN VBZ IN DT JJ NN . + | | | | | | | | | | + The quick brown fox jumps over the lazy dog . + + >>> (parse_fox, ), (parse_wolf, ) = parser.raw_parse_sents( + ... [ + ... 'The quick brown fox jumps over the lazy dog.', + ... 'The quick grey wolf jumps over the lazy fox.', + ... ] + ... ) + + >>> parse_fox.pretty_print() # doctest: +NORMALIZE_WHITESPACE + ROOT + | + S + _______________|__________________________ + | VP | + | _________|___ | + | | PP | + | | ________|___ | + NP | | NP | + ____|__________ | | _______|____ | + DT JJ JJ NN VBZ IN DT JJ NN . + | | | | | | | | | | + The quick brown fox jumps over the lazy dog . + + >>> parse_wolf.pretty_print() # doctest: +NORMALIZE_WHITESPACE + ROOT + | + S + _______________|__________________________ + | VP | + | _________|___ | + | | PP | + | | ________|___ | + NP | | NP | + ____|_________ | | _______|____ | + DT JJ JJ NN VBZ IN DT JJ NN . + | | | | | | | | | | + The quick grey wolf jumps over the lazy fox . + + >>> (parse_dog, ), (parse_friends, ) = parser.parse_sents( + ... [ + ... "I 'm a dog".split(), + ... "This is my friends ' cat ( the tabby )".split(), + ... ] + ... ) + + >>> parse_dog.pretty_print() # doctest: +NORMALIZE_WHITESPACE + ROOT + | + S + _______|____ + | VP + | ________|___ + NP | NP + | | ___|___ + PRP VBP DT NN + | | | | + I 'm a dog + + >>> parse_friends.pretty_print() # doctest: +NORMALIZE_WHITESPACE + ROOT + | + S + ____|___________ + | VP + | ___________|_____________ + | | NP + | | _______|_________ + | | NP PRN + | | _____|_______ ____|______________ + NP | NP | | NP | + | | ______|_________ | | ___|____ | + DT VBZ PRP$ NNS POS NN -LRB- DT NN -RRB- + | | | | | | | | | | + This is my friends ' cat -LRB- the tabby -RRB- + + >>> parse_john, parse_mary, = parser.parse_text( + ... 'John loves Mary. Mary walks.' + ... ) + + >>> parse_john.pretty_print() # doctest: +NORMALIZE_WHITESPACE + ROOT + | + S + _____|_____________ + | VP | + | ____|___ | + NP | NP | + | | | | + NNP VBZ NNP . + | | | | + John loves Mary . + + >>> parse_mary.pretty_print() # doctest: +NORMALIZE_WHITESPACE + ROOT + | + S + _____|____ + NP VP | + | | | + NNP VBZ . + | | | + Mary walks . + + Special cases + ------------- + + >>> next( + ... parser.raw_parse( + ... 'NASIRIYA, Iraq—Iraqi doctors who treated former prisoner of war ' + ... 'Jessica Lynch have angrily dismissed claims made in her biography ' + ... 'that she was raped by her Iraqi captors.' + ... ) + ... ).height() + 20 + + >>> next( + ... parser.raw_parse( + ... "The broader Standard & Poor's 500 Index <.SPX> was 0.46 points lower, or " + ... '0.05 percent, at 997.02.' + ... ) + ... ).height() + 9 + + """ + + _OUTPUT_FORMAT = 'penn' + parser_annotator = 'parse' + + def make_tree(self, result): + return Tree.fromstring(result['parse']) + + +class CoreNLPDependencyParser(GenericCoreNLPParser): + """Dependency parser. + + >>> dep_parser = CoreNLPDependencyParser(url='http://localhost:9000') + + >>> parse, = dep_parser.raw_parse( + ... 'The quick brown fox jumps over the lazy dog.' + ... ) + >>> print(parse.to_conll(4)) # doctest: +NORMALIZE_WHITESPACE + The DT 4 det + quick JJ 4 amod + brown JJ 4 amod + fox NN 5 nsubj + jumps VBZ 0 ROOT + over IN 9 case + the DT 9 det + lazy JJ 9 amod + dog NN 5 nmod + . . 5 punct + + >>> print(parse.tree()) # doctest: +NORMALIZE_WHITESPACE + (jumps (fox The quick brown) (dog over the lazy) .) + + >>> for governor, dep, dependent in parse.triples(): + ... print(governor, dep, dependent) # doctest: +NORMALIZE_WHITESPACE + ('jumps', 'VBZ') nsubj ('fox', 'NN') + ('fox', 'NN') det ('The', 'DT') + ('fox', 'NN') amod ('quick', 'JJ') + ('fox', 'NN') amod ('brown', 'JJ') + ('jumps', 'VBZ') nmod ('dog', 'NN') + ('dog', 'NN') case ('over', 'IN') + ('dog', 'NN') det ('the', 'DT') + ('dog', 'NN') amod ('lazy', 'JJ') + ('jumps', 'VBZ') punct ('.', '.') + + >>> (parse_fox, ), (parse_dog, ) = dep_parser.raw_parse_sents( + ... [ + ... 'The quick brown fox jumps over the lazy dog.', + ... 'The quick grey wolf jumps over the lazy fox.', + ... ] + ... ) + >>> print(parse_fox.to_conll(4)) # doctest: +NORMALIZE_WHITESPACE + The DT 4 det + quick JJ 4 amod + brown JJ 4 amod + fox NN 5 nsubj + jumps VBZ 0 ROOT + over IN 9 case + the DT 9 det + lazy JJ 9 amod + dog NN 5 nmod + . . 5 punct + + >>> print(parse_dog.to_conll(4)) # doctest: +NORMALIZE_WHITESPACE + The DT 4 det + quick JJ 4 amod + grey JJ 4 amod + wolf NN 5 nsubj + jumps VBZ 0 ROOT + over IN 9 case + the DT 9 det + lazy JJ 9 amod + fox NN 5 nmod + . . 5 punct + + >>> (parse_dog, ), (parse_friends, ) = dep_parser.parse_sents( + ... [ + ... "I 'm a dog".split(), + ... "This is my friends ' cat ( the tabby )".split(), + ... ] + ... ) + >>> print(parse_dog.to_conll(4)) # doctest: +NORMALIZE_WHITESPACE + I PRP 4 nsubj + 'm VBP 4 cop + a DT 4 det + dog NN 0 ROOT + + >>> print(parse_friends.to_conll(4)) # doctest: +NORMALIZE_WHITESPACE + This DT 6 nsubj + is VBZ 6 cop + my PRP$ 4 nmod:poss + friends NNS 6 nmod:poss + ' POS 4 case + cat NN 0 ROOT + -LRB- -LRB- 9 punct + the DT 9 det + tabby NN 6 appos + -RRB- -RRB- 9 punct + + >>> parse_john, parse_mary, = dep_parser.parse_text( + ... 'John loves Mary. Mary walks.' + ... ) + + >>> print(parse_john.to_conll(4)) # doctest: +NORMALIZE_WHITESPACE + John NNP 2 nsubj + loves VBZ 0 ROOT + Mary NNP 2 dobj + . . 2 punct + + >>> print(parse_mary.to_conll(4)) # doctest: +NORMALIZE_WHITESPACE + Mary NNP 2 nsubj + walks VBZ 0 ROOT + . . 2 punct + + Special cases + ------------- + + Non-breaking space inside of a token. + + >>> len( + ... next( + ... dep_parser.raw_parse( + ... 'Anhalt said children typically treat a 20-ounce soda bottle as one ' + ... 'serving, while it actually contains 2 1/2 servings.' + ... ) + ... ).nodes + ... ) + 21 + + Phone numbers. + + >>> len( + ... next( + ... dep_parser.raw_parse('This is not going to crash: 01 111 555.') + ... ).nodes + ... ) + 10 + + >>> print( + ... next( + ... dep_parser.raw_parse('The underscore _ should not simply disappear.') + ... ).to_conll(4) + ... ) # doctest: +NORMALIZE_WHITESPACE + The DT 3 det + underscore VBP 3 amod + _ NN 7 nsubj + should MD 7 aux + not RB 7 neg + simply RB 7 advmod + disappear VB 0 ROOT + . . 7 punct + + >>> print( + ... '\\n'.join( + ... next( + ... dep_parser.raw_parse( + ... 'for all of its insights into the dream world of teen life , and its electronic expression through ' + ... 'cyber culture , the film gives no quarter to anyone seeking to pull a cohesive story out of its 2 ' + ... '1/2-hour running time .' + ... ) + ... ).to_conll(4).split('\\n')[-8:] + ... ) + ... ) + its PRP$ 40 nmod:poss + 2 1/2 CD 40 nummod + - : 40 punct + hour NN 31 nmod + running VBG 42 amod + time NN 40 dep + . . 24 punct + + + """ + + _OUTPUT_FORMAT = 'conll2007' + parser_annotator = 'depparse' + + def make_tree(self, result): + + return DependencyGraph( + ( + ' '.join(n_items[1:]) # NLTK expects an iterable of strings... + for n_items in sorted(transform(result)) + ), + cell_separator=' ', # To make sure that a non-breaking space is kept inside of a token. + ) + + +def transform(sentence): + for dependency in sentence['basicDependencies']: + + dependent_index = dependency['dependent'] + token = sentence['tokens'][dependent_index - 1] + + # Return values that we don't know as '_'. Also, consider tag and ctag + # to be equal. + yield ( + dependent_index, + '_', + token['word'], + token['lemma'], + token['pos'], + token['pos'], + '_', + str(dependency['governor']), + dependency['dep'], + '_', + '_', + ) + + +@skip('Skipping all CoreNLP tests.') +def setup_module(module): + from nose import SkipTest + + global server + + try: + server = CoreNLPServer(port=9000) + except LookupError as e: + raise SkipTest('Could not instantiate CoreNLPServer.') + + try: + server.start() + except CoreNLPServerError as e: + raise SkipTest( + 'Skipping CoreNLP tests because the server could not be started. ' + 'Make sure that the 9000 port is free. ' + '{}'.format(e.strerror) + ) + + +@skip('Skipping all CoreNLP tests.') +def teardown_module(module): + server.stop() diff --git a/venv.bak/lib/python3.7/site-packages/nltk/parse/dependencygraph.py b/venv.bak/lib/python3.7/site-packages/nltk/parse/dependencygraph.py new file mode 100644 index 0000000..8c6156b --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/parse/dependencygraph.py @@ -0,0 +1,785 @@ +# Natural Language Toolkit: Dependency Grammars +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Jason Narad +# Steven Bird (modifications) +# +# URL: +# For license information, see LICENSE.TXT +# + +""" +Tools for reading and writing dependency trees. +The input is assumed to be in Malt-TAB format +(http://stp.lingfil.uu.se/~nivre/research/MaltXML.html). +""" +from __future__ import print_function, unicode_literals + +from collections import defaultdict +from itertools import chain +from pprint import pformat +import subprocess +import warnings + +from six import string_types + +from nltk.tree import Tree +from nltk.compat import python_2_unicode_compatible + + +################################################################# +# DependencyGraph Class +################################################################# + + +@python_2_unicode_compatible +class DependencyGraph(object): + """ + A container for the nodes and labelled edges of a dependency structure. + """ + + def __init__( + self, + tree_str=None, + cell_extractor=None, + zero_based=False, + cell_separator=None, + top_relation_label='ROOT', + ): + """Dependency graph. + + We place a dummy `TOP` node with the index 0, since the root node is + often assigned 0 as its head. This also means that the indexing of the + nodes corresponds directly to the Malt-TAB format, which starts at 1. + + If zero-based is True, then Malt-TAB-like input with node numbers + starting at 0 and the root node assigned -1 (as produced by, e.g., + zpar). + + :param str cell_separator: the cell separator. If not provided, cells + are split by whitespace. + + :param str top_relation_label: the label by which the top relation is + identified, for examlple, `ROOT`, `null` or `TOP`. + + """ + self.nodes = defaultdict( + lambda: { + 'address': None, + 'word': None, + 'lemma': None, + 'ctag': None, + 'tag': None, + 'feats': None, + 'head': None, + 'deps': defaultdict(list), + 'rel': None, + } + ) + + self.nodes[0].update({'ctag': 'TOP', 'tag': 'TOP', 'address': 0}) + + self.root = None + + if tree_str: + self._parse( + tree_str, + cell_extractor=cell_extractor, + zero_based=zero_based, + cell_separator=cell_separator, + top_relation_label=top_relation_label, + ) + + def remove_by_address(self, address): + """ + Removes the node with the given address. References + to this node in others will still exist. + """ + del self.nodes[address] + + def redirect_arcs(self, originals, redirect): + """ + Redirects arcs to any of the nodes in the originals list + to the redirect node address. + """ + for node in self.nodes.values(): + new_deps = [] + for dep in node['deps']: + if dep in originals: + new_deps.append(redirect) + else: + new_deps.append(dep) + node['deps'] = new_deps + + def add_arc(self, head_address, mod_address): + """ + Adds an arc from the node specified by head_address to the + node specified by the mod address. + """ + relation = self.nodes[mod_address]['rel'] + self.nodes[head_address]['deps'].setdefault(relation, []) + self.nodes[head_address]['deps'][relation].append(mod_address) + # self.nodes[head_address]['deps'].append(mod_address) + + def connect_graph(self): + """ + Fully connects all non-root nodes. All nodes are set to be dependents + of the root node. + """ + for node1 in self.nodes.values(): + for node2 in self.nodes.values(): + if node1['address'] != node2['address'] and node2['rel'] != 'TOP': + relation = node2['rel'] + node1['deps'].setdefault(relation, []) + node1['deps'][relation].append(node2['address']) + # node1['deps'].append(node2['address']) + + def get_by_address(self, node_address): + """Return the node with the given address.""" + return self.nodes[node_address] + + def contains_address(self, node_address): + """ + Returns true if the graph contains a node with the given node + address, false otherwise. + """ + return node_address in self.nodes + + def to_dot(self): + """Return a dot representation suitable for using with Graphviz. + + >>> dg = DependencyGraph( + ... 'John N 2\\n' + ... 'loves V 0\\n' + ... 'Mary N 2' + ... ) + >>> print(dg.to_dot()) + digraph G{ + edge [dir=forward] + node [shape=plaintext] + + 0 [label="0 (None)"] + 0 -> 2 [label="ROOT"] + 1 [label="1 (John)"] + 2 [label="2 (loves)"] + 2 -> 1 [label=""] + 2 -> 3 [label=""] + 3 [label="3 (Mary)"] + } + + """ + # Start the digraph specification + s = 'digraph G{\n' + s += 'edge [dir=forward]\n' + s += 'node [shape=plaintext]\n' + + # Draw the remaining nodes + for node in sorted(self.nodes.values(), key=lambda v: v['address']): + s += '\n%s [label="%s (%s)"]' % ( + node['address'], + node['address'], + node['word'], + ) + for rel, deps in node['deps'].items(): + for dep in deps: + if rel is not None: + s += '\n%s -> %s [label="%s"]' % (node['address'], dep, rel) + else: + s += '\n%s -> %s ' % (node['address'], dep) + s += "\n}" + + return s + + def _repr_svg_(self): + """Show SVG representation of the transducer (IPython magic). + + >>> dg = DependencyGraph( + ... 'John N 2\\n' + ... 'loves V 0\\n' + ... 'Mary N 2' + ... ) + >>> dg._repr_svg_().split('\\n')[0] + '' + + """ + dot_string = self.to_dot() + + try: + process = subprocess.Popen( + ['dot', '-Tsvg'], + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + universal_newlines=True, + ) + except OSError: + raise Exception('Cannot find the dot binary from Graphviz package') + out, err = process.communicate(dot_string) + if err: + raise Exception( + 'Cannot create svg representation by running dot from string: {}' + ''.format(dot_string) + ) + return out + + def __str__(self): + return pformat(self.nodes) + + def __repr__(self): + return "".format(len(self.nodes)) + + @staticmethod + def load( + filename, zero_based=False, cell_separator=None, top_relation_label='ROOT' + ): + """ + :param filename: a name of a file in Malt-TAB format + :param zero_based: nodes in the input file are numbered starting from 0 + rather than 1 (as produced by, e.g., zpar) + :param str cell_separator: the cell separator. If not provided, cells + are split by whitespace. + :param str top_relation_label: the label by which the top relation is + identified, for examlple, `ROOT`, `null` or `TOP`. + + :return: a list of DependencyGraphs + + """ + with open(filename) as infile: + return [ + DependencyGraph( + tree_str, + zero_based=zero_based, + cell_separator=cell_separator, + top_relation_label=top_relation_label, + ) + for tree_str in infile.read().split('\n\n') + ] + + def left_children(self, node_index): + """ + Returns the number of left children under the node specified + by the given address. + """ + children = chain.from_iterable(self.nodes[node_index]['deps'].values()) + index = self.nodes[node_index]['address'] + return sum(1 for c in children if c < index) + + def right_children(self, node_index): + """ + Returns the number of right children under the node specified + by the given address. + """ + children = chain.from_iterable(self.nodes[node_index]['deps'].values()) + index = self.nodes[node_index]['address'] + return sum(1 for c in children if c > index) + + def add_node(self, node): + if not self.contains_address(node['address']): + self.nodes[node['address']].update(node) + + def _parse( + self, + input_, + cell_extractor=None, + zero_based=False, + cell_separator=None, + top_relation_label='ROOT', + ): + """Parse a sentence. + + :param extractor: a function that given a tuple of cells returns a + 7-tuple, where the values are ``word, lemma, ctag, tag, feats, head, + rel``. + + :param str cell_separator: the cell separator. If not provided, cells + are split by whitespace. + + :param str top_relation_label: the label by which the top relation is + identified, for examlple, `ROOT`, `null` or `TOP`. + + """ + + def extract_3_cells(cells, index): + word, tag, head = cells + return index, word, word, tag, tag, '', head, '' + + def extract_4_cells(cells, index): + word, tag, head, rel = cells + return index, word, word, tag, tag, '', head, rel + + def extract_7_cells(cells, index): + line_index, word, lemma, tag, _, head, rel = cells + try: + index = int(line_index) + except ValueError: + # index can't be parsed as an integer, use default + pass + return index, word, lemma, tag, tag, '', head, rel + + def extract_10_cells(cells, index): + line_index, word, lemma, ctag, tag, feats, head, rel, _, _ = cells + try: + index = int(line_index) + except ValueError: + # index can't be parsed as an integer, use default + pass + return index, word, lemma, ctag, tag, feats, head, rel + + extractors = { + 3: extract_3_cells, + 4: extract_4_cells, + 7: extract_7_cells, + 10: extract_10_cells, + } + + if isinstance(input_, string_types): + input_ = (line for line in input_.split('\n')) + + lines = (l.rstrip() for l in input_) + lines = (l for l in lines if l) + + cell_number = None + for index, line in enumerate(lines, start=1): + cells = line.split(cell_separator) + if cell_number is None: + cell_number = len(cells) + else: + assert cell_number == len(cells) + + if cell_extractor is None: + try: + cell_extractor = extractors[cell_number] + except KeyError: + raise ValueError( + 'Number of tab-delimited fields ({0}) not supported by ' + 'CoNLL(10) or Malt-Tab(4) format'.format(cell_number) + ) + + try: + index, word, lemma, ctag, tag, feats, head, rel = cell_extractor( + cells, index + ) + except (TypeError, ValueError): + # cell_extractor doesn't take 2 arguments or doesn't return 8 + # values; assume the cell_extractor is an older external + # extractor and doesn't accept or return an index. + word, lemma, ctag, tag, feats, head, rel = cell_extractor(cells) + + if head == '_': + continue + + head = int(head) + if zero_based: + head += 1 + + self.nodes[index].update( + { + 'address': index, + 'word': word, + 'lemma': lemma, + 'ctag': ctag, + 'tag': tag, + 'feats': feats, + 'head': head, + 'rel': rel, + } + ) + + # Make sure that the fake root node has labeled dependencies. + if (cell_number == 3) and (head == 0): + rel = top_relation_label + self.nodes[head]['deps'][rel].append(index) + + if self.nodes[0]['deps'][top_relation_label]: + root_address = self.nodes[0]['deps'][top_relation_label][0] + self.root = self.nodes[root_address] + self.top_relation_label = top_relation_label + else: + warnings.warn( + "The graph doesn't contain a node " "that depends on the root element." + ) + + def _word(self, node, filter=True): + w = node['word'] + if filter: + if w != ',': + return w + return w + + def _tree(self, i): + """ Turn dependency graphs into NLTK trees. + + :param int i: index of a node + :return: either a word (if the indexed node is a leaf) or a ``Tree``. + """ + node = self.get_by_address(i) + word = node['word'] + deps = sorted(chain.from_iterable(node['deps'].values())) + + if deps: + return Tree(word, [self._tree(dep) for dep in deps]) + else: + return word + + def tree(self): + """ + Starting with the ``root`` node, build a dependency tree using the NLTK + ``Tree`` constructor. Dependency labels are omitted. + """ + node = self.root + + word = node['word'] + deps = sorted(chain.from_iterable(node['deps'].values())) + return Tree(word, [self._tree(dep) for dep in deps]) + + def triples(self, node=None): + """ + Extract dependency triples of the form: + ((head word, head tag), rel, (dep word, dep tag)) + """ + + if not node: + node = self.root + + head = (node['word'], node['ctag']) + for i in sorted(chain.from_iterable(node['deps'].values())): + dep = self.get_by_address(i) + yield (head, dep['rel'], (dep['word'], dep['ctag'])) + for triple in self.triples(node=dep): + yield triple + + def _hd(self, i): + try: + return self.nodes[i]['head'] + except IndexError: + return None + + def _rel(self, i): + try: + return self.nodes[i]['rel'] + except IndexError: + return None + + # what's the return type? Boolean or list? + def contains_cycle(self): + """Check whether there are cycles. + + >>> dg = DependencyGraph(treebank_data) + >>> dg.contains_cycle() + False + + >>> cyclic_dg = DependencyGraph() + >>> top = {'word': None, 'deps': [1], 'rel': 'TOP', 'address': 0} + >>> child1 = {'word': None, 'deps': [2], 'rel': 'NTOP', 'address': 1} + >>> child2 = {'word': None, 'deps': [4], 'rel': 'NTOP', 'address': 2} + >>> child3 = {'word': None, 'deps': [1], 'rel': 'NTOP', 'address': 3} + >>> child4 = {'word': None, 'deps': [3], 'rel': 'NTOP', 'address': 4} + >>> cyclic_dg.nodes = { + ... 0: top, + ... 1: child1, + ... 2: child2, + ... 3: child3, + ... 4: child4, + ... } + >>> cyclic_dg.root = top + + >>> cyclic_dg.contains_cycle() + [3, 1, 2, 4] + + """ + distances = {} + + for node in self.nodes.values(): + for dep in node['deps']: + key = tuple([node['address'], dep]) + distances[key] = 1 + + for _ in self.nodes: + new_entries = {} + + for pair1 in distances: + for pair2 in distances: + if pair1[1] == pair2[0]: + key = tuple([pair1[0], pair2[1]]) + new_entries[key] = distances[pair1] + distances[pair2] + + for pair in new_entries: + distances[pair] = new_entries[pair] + if pair[0] == pair[1]: + path = self.get_cycle_path(self.get_by_address(pair[0]), pair[0]) + return path + + return False # return []? + + def get_cycle_path(self, curr_node, goal_node_index): + for dep in curr_node['deps']: + if dep == goal_node_index: + return [curr_node['address']] + for dep in curr_node['deps']: + path = self.get_cycle_path(self.get_by_address(dep), goal_node_index) + if len(path) > 0: + path.insert(0, curr_node['address']) + return path + return [] + + def to_conll(self, style): + """ + The dependency graph in CoNLL format. + + :param style: the style to use for the format (3, 4, 10 columns) + :type style: int + :rtype: str + """ + + if style == 3: + template = '{word}\t{tag}\t{head}\n' + elif style == 4: + template = '{word}\t{tag}\t{head}\t{rel}\n' + elif style == 10: + template = ( + '{i}\t{word}\t{lemma}\t{ctag}\t{tag}\t{feats}\t{head}\t{rel}\t_\t_\n' + ) + else: + raise ValueError( + 'Number of tab-delimited fields ({0}) not supported by ' + 'CoNLL(10) or Malt-Tab(4) format'.format(style) + ) + + return ''.join( + template.format(i=i, **node) + for i, node in sorted(self.nodes.items()) + if node['tag'] != 'TOP' + ) + + def nx_graph(self): + """Convert the data in a ``nodelist`` into a networkx labeled directed graph.""" + import networkx + + nx_nodelist = list(range(1, len(self.nodes))) + nx_edgelist = [ + (n, self._hd(n), self._rel(n)) for n in nx_nodelist if self._hd(n) + ] + self.nx_labels = {} + for n in nx_nodelist: + self.nx_labels[n] = self.nodes[n]['word'] + + g = networkx.MultiDiGraph() + g.add_nodes_from(nx_nodelist) + g.add_edges_from(nx_edgelist) + + return g + + +class DependencyGraphError(Exception): + """Dependency graph exception.""" + + +def demo(): + malt_demo() + conll_demo() + conll_file_demo() + cycle_finding_demo() + + +def malt_demo(nx=False): + """ + A demonstration of the result of reading a dependency + version of the first sentence of the Penn Treebank. + """ + dg = DependencyGraph( + """Pierre NNP 2 NMOD +Vinken NNP 8 SUB +, , 2 P +61 CD 5 NMOD +years NNS 6 AMOD +old JJ 2 NMOD +, , 2 P +will MD 0 ROOT +join VB 8 VC +the DT 11 NMOD +board NN 9 OBJ +as IN 9 VMOD +a DT 15 NMOD +nonexecutive JJ 15 NMOD +director NN 12 PMOD +Nov. NNP 9 VMOD +29 CD 16 NMOD +. . 9 VMOD +""" + ) + tree = dg.tree() + tree.pprint() + if nx: + # currently doesn't work + import networkx + from matplotlib import pylab + + g = dg.nx_graph() + g.info() + pos = networkx.spring_layout(g, dim=1) + networkx.draw_networkx_nodes(g, pos, node_size=50) + # networkx.draw_networkx_edges(g, pos, edge_color='k', width=8) + networkx.draw_networkx_labels(g, pos, dg.nx_labels) + pylab.xticks([]) + pylab.yticks([]) + pylab.savefig('tree.png') + pylab.show() + + +def conll_demo(): + """ + A demonstration of how to read a string representation of + a CoNLL format dependency tree. + """ + dg = DependencyGraph(conll_data1) + tree = dg.tree() + tree.pprint() + print(dg) + print(dg.to_conll(4)) + + +def conll_file_demo(): + print('Mass conll_read demo...') + graphs = [DependencyGraph(entry) for entry in conll_data2.split('\n\n') if entry] + for graph in graphs: + tree = graph.tree() + print('\n') + tree.pprint() + + +def cycle_finding_demo(): + dg = DependencyGraph(treebank_data) + print(dg.contains_cycle()) + cyclic_dg = DependencyGraph() + cyclic_dg.add_node({'word': None, 'deps': [1], 'rel': 'TOP', 'address': 0}) + cyclic_dg.add_node({'word': None, 'deps': [2], 'rel': 'NTOP', 'address': 1}) + cyclic_dg.add_node({'word': None, 'deps': [4], 'rel': 'NTOP', 'address': 2}) + cyclic_dg.add_node({'word': None, 'deps': [1], 'rel': 'NTOP', 'address': 3}) + cyclic_dg.add_node({'word': None, 'deps': [3], 'rel': 'NTOP', 'address': 4}) + print(cyclic_dg.contains_cycle()) + + +treebank_data = """Pierre NNP 2 NMOD +Vinken NNP 8 SUB +, , 2 P +61 CD 5 NMOD +years NNS 6 AMOD +old JJ 2 NMOD +, , 2 P +will MD 0 ROOT +join VB 8 VC +the DT 11 NMOD +board NN 9 OBJ +as IN 9 VMOD +a DT 15 NMOD +nonexecutive JJ 15 NMOD +director NN 12 PMOD +Nov. NNP 9 VMOD +29 CD 16 NMOD +. . 9 VMOD +""" + +conll_data1 = """ +1 Ze ze Pron Pron per|3|evofmv|nom 2 su _ _ +2 had heb V V trans|ovt|1of2of3|ev 0 ROOT _ _ +3 met met Prep Prep voor 8 mod _ _ +4 haar haar Pron Pron bez|3|ev|neut|attr 5 det _ _ +5 moeder moeder N N soort|ev|neut 3 obj1 _ _ +6 kunnen kan V V hulp|ott|1of2of3|mv 2 vc _ _ +7 gaan ga V V hulp|inf 6 vc _ _ +8 winkelen winkel V V intrans|inf 11 cnj _ _ +9 , , Punc Punc komma 8 punct _ _ +10 zwemmen zwem V V intrans|inf 11 cnj _ _ +11 of of Conj Conj neven 7 vc _ _ +12 terrassen terras N N soort|mv|neut 11 cnj _ _ +13 . . Punc Punc punt 12 punct _ _ +""" + +conll_data2 = """1 Cathy Cathy N N eigen|ev|neut 2 su _ _ +2 zag zie V V trans|ovt|1of2of3|ev 0 ROOT _ _ +3 hen hen Pron Pron per|3|mv|datofacc 2 obj1 _ _ +4 wild wild Adj Adj attr|stell|onverv 5 mod _ _ +5 zwaaien zwaai N N soort|mv|neut 2 vc _ _ +6 . . Punc Punc punt 5 punct _ _ + +1 Ze ze Pron Pron per|3|evofmv|nom 2 su _ _ +2 had heb V V trans|ovt|1of2of3|ev 0 ROOT _ _ +3 met met Prep Prep voor 8 mod _ _ +4 haar haar Pron Pron bez|3|ev|neut|attr 5 det _ _ +5 moeder moeder N N soort|ev|neut 3 obj1 _ _ +6 kunnen kan V V hulp|ott|1of2of3|mv 2 vc _ _ +7 gaan ga V V hulp|inf 6 vc _ _ +8 winkelen winkel V V intrans|inf 11 cnj _ _ +9 , , Punc Punc komma 8 punct _ _ +10 zwemmen zwem V V intrans|inf 11 cnj _ _ +11 of of Conj Conj neven 7 vc _ _ +12 terrassen terras N N soort|mv|neut 11 cnj _ _ +13 . . Punc Punc punt 12 punct _ _ + +1 Dat dat Pron Pron aanw|neut|attr 2 det _ _ +2 werkwoord werkwoord N N soort|ev|neut 6 obj1 _ _ +3 had heb V V hulp|ovt|1of2of3|ev 0 ROOT _ _ +4 ze ze Pron Pron per|3|evofmv|nom 6 su _ _ +5 zelf zelf Pron Pron aanw|neut|attr|wzelf 3 predm _ _ +6 uitgevonden vind V V trans|verldw|onverv 3 vc _ _ +7 . . Punc Punc punt 6 punct _ _ + +1 Het het Pron Pron onbep|neut|zelfst 2 su _ _ +2 hoorde hoor V V trans|ovt|1of2of3|ev 0 ROOT _ _ +3 bij bij Prep Prep voor 2 ld _ _ +4 de de Art Art bep|zijdofmv|neut 6 det _ _ +5 warme warm Adj Adj attr|stell|vervneut 6 mod _ _ +6 zomerdag zomerdag N N soort|ev|neut 3 obj1 _ _ +7 die die Pron Pron betr|neut|zelfst 6 mod _ _ +8 ze ze Pron Pron per|3|evofmv|nom 12 su _ _ +9 ginds ginds Adv Adv gew|aanw 12 mod _ _ +10 achter achter Adv Adv gew|geenfunc|stell|onverv 12 svp _ _ +11 had heb V V hulp|ovt|1of2of3|ev 7 body _ _ +12 gelaten laat V V trans|verldw|onverv 11 vc _ _ +13 . . Punc Punc punt 12 punct _ _ + +1 Ze ze Pron Pron per|3|evofmv|nom 2 su _ _ +2 hadden heb V V trans|ovt|1of2of3|mv 0 ROOT _ _ +3 languit languit Adv Adv gew|geenfunc|stell|onverv 11 mod _ _ +4 naast naast Prep Prep voor 11 mod _ _ +5 elkaar elkaar Pron Pron rec|neut 4 obj1 _ _ +6 op op Prep Prep voor 11 ld _ _ +7 de de Art Art bep|zijdofmv|neut 8 det _ _ +8 strandstoelen strandstoel N N soort|mv|neut 6 obj1 _ _ +9 kunnen kan V V hulp|inf 2 vc _ _ +10 gaan ga V V hulp|inf 9 vc _ _ +11 liggen lig V V intrans|inf 10 vc _ _ +12 . . Punc Punc punt 11 punct _ _ + +1 Zij zij Pron Pron per|3|evofmv|nom 2 su _ _ +2 zou zal V V hulp|ovt|1of2of3|ev 7 cnj _ _ +3 mams mams N N soort|ev|neut 4 det _ _ +4 rug rug N N soort|ev|neut 5 obj1 _ _ +5 ingewreven wrijf V V trans|verldw|onverv 6 vc _ _ +6 hebben heb V V hulp|inf 2 vc _ _ +7 en en Conj Conj neven 0 ROOT _ _ +8 mam mam V V trans|ovt|1of2of3|ev 7 cnj _ _ +9 de de Art Art bep|zijdofmv|neut 10 det _ _ +10 hare hare Pron Pron bez|3|ev|neut|attr 8 obj1 _ _ +11 . . Punc Punc punt 10 punct _ _ + +1 Of of Conj Conj onder|metfin 0 ROOT _ _ +2 ze ze Pron Pron per|3|evofmv|nom 3 su _ _ +3 had heb V V hulp|ovt|1of2of3|ev 0 ROOT _ _ +4 gewoon gewoon Adj Adj adv|stell|onverv 10 mod _ _ +5 met met Prep Prep voor 10 mod _ _ +6 haar haar Pron Pron bez|3|ev|neut|attr 7 det _ _ +7 vriendinnen vriendin N N soort|mv|neut 5 obj1 _ _ +8 rond rond Adv Adv deelv 10 svp _ _ +9 kunnen kan V V hulp|inf 3 vc _ _ +10 slenteren slenter V V intrans|inf 9 vc _ _ +11 in in Prep Prep voor 10 mod _ _ +12 de de Art Art bep|zijdofmv|neut 13 det _ _ +13 buurt buurt N N soort|ev|neut 11 obj1 _ _ +14 van van Prep Prep voor 13 mod _ _ +15 Trafalgar_Square Trafalgar_Square MWU N_N eigen|ev|neut_eigen|ev|neut 14 obj1 _ _ +16 . . Punc Punc punt 15 punct _ _ +""" + +if __name__ == '__main__': + demo() diff --git a/venv.bak/lib/python3.7/site-packages/nltk/parse/earleychart.py b/venv.bak/lib/python3.7/site-packages/nltk/parse/earleychart.py new file mode 100644 index 0000000..fdb8136 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/parse/earleychart.py @@ -0,0 +1,556 @@ +# -*- coding: utf-8 -*- +# Natural Language Toolkit: An Incremental Earley Chart Parser +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Peter Ljunglöf +# Rob Speer +# Edward Loper +# Steven Bird +# Jean Mark Gawron +# URL: +# For license information, see LICENSE.TXT + +""" +Data classes and parser implementations for *incremental* chart +parsers, which use dynamic programming to efficiently parse a text. +A "chart parser" derives parse trees for a text by iteratively adding +\"edges\" to a \"chart\". Each "edge" represents a hypothesis about the tree +structure for a subsequence of the text. The "chart" is a +\"blackboard\" for composing and combining these hypotheses. + +A parser is "incremental", if it guarantees that for all i, j where i < j, +all edges ending at i are built before any edges ending at j. +This is appealing for, say, speech recognizer hypothesis filtering. + +The main parser class is ``EarleyChartParser``, which is a top-down +algorithm, originally formulated by Jay Earley (1970). +""" +from __future__ import print_function, division + +from six.moves import range + +from nltk.parse.chart import ( + Chart, + ChartParser, + EdgeI, + LeafEdge, + LeafInitRule, + BottomUpPredictRule, + BottomUpPredictCombineRule, + TopDownInitRule, + SingleEdgeFundamentalRule, + EmptyPredictRule, + CachedTopDownPredictRule, + FilteredSingleEdgeFundamentalRule, + FilteredBottomUpPredictCombineRule, +) +from nltk.parse.featurechart import ( + FeatureChart, + FeatureChartParser, + FeatureTopDownInitRule, + FeatureTopDownPredictRule, + FeatureEmptyPredictRule, + FeatureBottomUpPredictRule, + FeatureBottomUpPredictCombineRule, + FeatureSingleEdgeFundamentalRule, +) + +# //////////////////////////////////////////////////////////// +# Incremental Chart +# //////////////////////////////////////////////////////////// + + +class IncrementalChart(Chart): + def initialize(self): + # A sequence of edge lists contained in this chart. + self._edgelists = tuple([] for x in self._positions()) + + # The set of child pointer lists associated with each edge. + self._edge_to_cpls = {} + + # Indexes mapping attribute values to lists of edges + # (used by select()). + self._indexes = {} + + def edges(self): + return list(self.iteredges()) + + def iteredges(self): + return (edge for edgelist in self._edgelists for edge in edgelist) + + def select(self, end, **restrictions): + edgelist = self._edgelists[end] + + # If there are no restrictions, then return all edges. + if restrictions == {}: + return iter(edgelist) + + # Find the index corresponding to the given restrictions. + restr_keys = sorted(restrictions.keys()) + restr_keys = tuple(restr_keys) + + # If it doesn't exist, then create it. + if restr_keys not in self._indexes: + self._add_index(restr_keys) + + vals = tuple(restrictions[key] for key in restr_keys) + return iter(self._indexes[restr_keys][end].get(vals, [])) + + def _add_index(self, restr_keys): + # Make sure it's a valid index. + for key in restr_keys: + if not hasattr(EdgeI, key): + raise ValueError('Bad restriction: %s' % key) + + # Create the index. + index = self._indexes[restr_keys] = tuple({} for x in self._positions()) + + # Add all existing edges to the index. + for end, edgelist in enumerate(self._edgelists): + this_index = index[end] + for edge in edgelist: + vals = tuple(getattr(edge, key)() for key in restr_keys) + this_index.setdefault(vals, []).append(edge) + + def _register_with_indexes(self, edge): + end = edge.end() + for (restr_keys, index) in self._indexes.items(): + vals = tuple(getattr(edge, key)() for key in restr_keys) + index[end].setdefault(vals, []).append(edge) + + def _append_edge(self, edge): + self._edgelists[edge.end()].append(edge) + + def _positions(self): + return range(self.num_leaves() + 1) + + +class FeatureIncrementalChart(IncrementalChart, FeatureChart): + def select(self, end, **restrictions): + edgelist = self._edgelists[end] + + # If there are no restrictions, then return all edges. + if restrictions == {}: + return iter(edgelist) + + # Find the index corresponding to the given restrictions. + restr_keys = sorted(restrictions.keys()) + restr_keys = tuple(restr_keys) + + # If it doesn't exist, then create it. + if restr_keys not in self._indexes: + self._add_index(restr_keys) + + vals = tuple( + self._get_type_if_possible(restrictions[key]) for key in restr_keys + ) + return iter(self._indexes[restr_keys][end].get(vals, [])) + + def _add_index(self, restr_keys): + # Make sure it's a valid index. + for key in restr_keys: + if not hasattr(EdgeI, key): + raise ValueError('Bad restriction: %s' % key) + + # Create the index. + index = self._indexes[restr_keys] = tuple({} for x in self._positions()) + + # Add all existing edges to the index. + for end, edgelist in enumerate(self._edgelists): + this_index = index[end] + for edge in edgelist: + vals = tuple( + self._get_type_if_possible(getattr(edge, key)()) + for key in restr_keys + ) + this_index.setdefault(vals, []).append(edge) + + def _register_with_indexes(self, edge): + end = edge.end() + for (restr_keys, index) in self._indexes.items(): + vals = tuple( + self._get_type_if_possible(getattr(edge, key)()) for key in restr_keys + ) + index[end].setdefault(vals, []).append(edge) + + +# //////////////////////////////////////////////////////////// +# Incremental CFG Rules +# //////////////////////////////////////////////////////////// + + +class CompleteFundamentalRule(SingleEdgeFundamentalRule): + def _apply_incomplete(self, chart, grammar, left_edge): + end = left_edge.end() + # When the chart is incremental, we only have to look for + # empty complete edges here. + for right_edge in chart.select( + start=end, end=end, is_complete=True, lhs=left_edge.nextsym() + ): + new_edge = left_edge.move_dot_forward(right_edge.end()) + if chart.insert_with_backpointer(new_edge, left_edge, right_edge): + yield new_edge + + +class CompleterRule(CompleteFundamentalRule): + _fundamental_rule = CompleteFundamentalRule() + + def apply(self, chart, grammar, edge): + if not isinstance(edge, LeafEdge): + for new_edge in self._fundamental_rule.apply(chart, grammar, edge): + yield new_edge + + +class ScannerRule(CompleteFundamentalRule): + _fundamental_rule = CompleteFundamentalRule() + + def apply(self, chart, grammar, edge): + if isinstance(edge, LeafEdge): + for new_edge in self._fundamental_rule.apply(chart, grammar, edge): + yield new_edge + + +class PredictorRule(CachedTopDownPredictRule): + pass + + +class FilteredCompleteFundamentalRule(FilteredSingleEdgeFundamentalRule): + def apply(self, chart, grammar, edge): + # Since the Filtered rule only works for grammars without empty productions, + # we only have to bother with complete edges here. + if edge.is_complete(): + for new_edge in self._apply_complete(chart, grammar, edge): + yield new_edge + + +# //////////////////////////////////////////////////////////// +# Incremental FCFG Rules +# //////////////////////////////////////////////////////////// + + +class FeatureCompleteFundamentalRule(FeatureSingleEdgeFundamentalRule): + def _apply_incomplete(self, chart, grammar, left_edge): + fr = self._fundamental_rule + end = left_edge.end() + # When the chart is incremental, we only have to look for + # empty complete edges here. + for right_edge in chart.select( + start=end, end=end, is_complete=True, lhs=left_edge.nextsym() + ): + for new_edge in fr.apply(chart, grammar, left_edge, right_edge): + yield new_edge + + +class FeatureCompleterRule(CompleterRule): + _fundamental_rule = FeatureCompleteFundamentalRule() + + +class FeatureScannerRule(ScannerRule): + _fundamental_rule = FeatureCompleteFundamentalRule() + + +class FeaturePredictorRule(FeatureTopDownPredictRule): + pass + + +# //////////////////////////////////////////////////////////// +# Incremental CFG Chart Parsers +# //////////////////////////////////////////////////////////// + +EARLEY_STRATEGY = [ + LeafInitRule(), + TopDownInitRule(), + CompleterRule(), + ScannerRule(), + PredictorRule(), +] +TD_INCREMENTAL_STRATEGY = [ + LeafInitRule(), + TopDownInitRule(), + CachedTopDownPredictRule(), + CompleteFundamentalRule(), +] +BU_INCREMENTAL_STRATEGY = [ + LeafInitRule(), + EmptyPredictRule(), + BottomUpPredictRule(), + CompleteFundamentalRule(), +] +BU_LC_INCREMENTAL_STRATEGY = [ + LeafInitRule(), + EmptyPredictRule(), + BottomUpPredictCombineRule(), + CompleteFundamentalRule(), +] + +LC_INCREMENTAL_STRATEGY = [ + LeafInitRule(), + FilteredBottomUpPredictCombineRule(), + FilteredCompleteFundamentalRule(), +] + + +class IncrementalChartParser(ChartParser): + """ + An *incremental* chart parser implementing Jay Earley's + parsing algorithm: + + | For each index end in [0, 1, ..., N]: + | For each edge such that edge.end = end: + | If edge is incomplete and edge.next is not a part of speech: + | Apply PredictorRule to edge + | If edge is incomplete and edge.next is a part of speech: + | Apply ScannerRule to edge + | If edge is complete: + | Apply CompleterRule to edge + | Return any complete parses in the chart + """ + + def __init__( + self, + grammar, + strategy=BU_LC_INCREMENTAL_STRATEGY, + trace=0, + trace_chart_width=50, + chart_class=IncrementalChart, + ): + """ + Create a new Earley chart parser, that uses ``grammar`` to + parse texts. + + :type grammar: CFG + :param grammar: The grammar used to parse texts. + :type trace: int + :param trace: The level of tracing that should be used when + parsing a text. ``0`` will generate no tracing output; + and higher numbers will produce more verbose tracing + output. + :type trace_chart_width: int + :param trace_chart_width: The default total width reserved for + the chart in trace output. The remainder of each line will + be used to display edges. + :param chart_class: The class that should be used to create + the charts used by this parser. + """ + self._grammar = grammar + self._trace = trace + self._trace_chart_width = trace_chart_width + self._chart_class = chart_class + + self._axioms = [] + self._inference_rules = [] + for rule in strategy: + if rule.NUM_EDGES == 0: + self._axioms.append(rule) + elif rule.NUM_EDGES == 1: + self._inference_rules.append(rule) + else: + raise ValueError( + "Incremental inference rules must have " "NUM_EDGES == 0 or 1" + ) + + def chart_parse(self, tokens, trace=None): + if trace is None: + trace = self._trace + trace_new_edges = self._trace_new_edges + + tokens = list(tokens) + self._grammar.check_coverage(tokens) + chart = self._chart_class(tokens) + grammar = self._grammar + + # Width, for printing trace edges. + trace_edge_width = self._trace_chart_width // (chart.num_leaves() + 1) + if trace: + print(chart.pretty_format_leaves(trace_edge_width)) + + for axiom in self._axioms: + new_edges = list(axiom.apply(chart, grammar)) + trace_new_edges(chart, axiom, new_edges, trace, trace_edge_width) + + inference_rules = self._inference_rules + for end in range(chart.num_leaves() + 1): + if trace > 1: + print("\n* Processing queue:", end, "\n") + agenda = list(chart.select(end=end)) + while agenda: + edge = agenda.pop() + for rule in inference_rules: + new_edges = list(rule.apply(chart, grammar, edge)) + trace_new_edges(chart, rule, new_edges, trace, trace_edge_width) + for new_edge in new_edges: + if new_edge.end() == end: + agenda.append(new_edge) + + return chart + + +class EarleyChartParser(IncrementalChartParser): + def __init__(self, grammar, **parser_args): + IncrementalChartParser.__init__(self, grammar, EARLEY_STRATEGY, **parser_args) + + +class IncrementalTopDownChartParser(IncrementalChartParser): + def __init__(self, grammar, **parser_args): + IncrementalChartParser.__init__( + self, grammar, TD_INCREMENTAL_STRATEGY, **parser_args + ) + + +class IncrementalBottomUpChartParser(IncrementalChartParser): + def __init__(self, grammar, **parser_args): + IncrementalChartParser.__init__( + self, grammar, BU_INCREMENTAL_STRATEGY, **parser_args + ) + + +class IncrementalBottomUpLeftCornerChartParser(IncrementalChartParser): + def __init__(self, grammar, **parser_args): + IncrementalChartParser.__init__( + self, grammar, BU_LC_INCREMENTAL_STRATEGY, **parser_args + ) + + +class IncrementalLeftCornerChartParser(IncrementalChartParser): + def __init__(self, grammar, **parser_args): + if not grammar.is_nonempty(): + raise ValueError( + "IncrementalLeftCornerParser only works for grammars " + "without empty productions." + ) + IncrementalChartParser.__init__( + self, grammar, LC_INCREMENTAL_STRATEGY, **parser_args + ) + + +# //////////////////////////////////////////////////////////// +# Incremental FCFG Chart Parsers +# //////////////////////////////////////////////////////////// + +EARLEY_FEATURE_STRATEGY = [ + LeafInitRule(), + FeatureTopDownInitRule(), + FeatureCompleterRule(), + FeatureScannerRule(), + FeaturePredictorRule(), +] +TD_INCREMENTAL_FEATURE_STRATEGY = [ + LeafInitRule(), + FeatureTopDownInitRule(), + FeatureTopDownPredictRule(), + FeatureCompleteFundamentalRule(), +] +BU_INCREMENTAL_FEATURE_STRATEGY = [ + LeafInitRule(), + FeatureEmptyPredictRule(), + FeatureBottomUpPredictRule(), + FeatureCompleteFundamentalRule(), +] +BU_LC_INCREMENTAL_FEATURE_STRATEGY = [ + LeafInitRule(), + FeatureEmptyPredictRule(), + FeatureBottomUpPredictCombineRule(), + FeatureCompleteFundamentalRule(), +] + + +class FeatureIncrementalChartParser(IncrementalChartParser, FeatureChartParser): + def __init__( + self, + grammar, + strategy=BU_LC_INCREMENTAL_FEATURE_STRATEGY, + trace_chart_width=20, + chart_class=FeatureIncrementalChart, + **parser_args + ): + IncrementalChartParser.__init__( + self, + grammar, + strategy=strategy, + trace_chart_width=trace_chart_width, + chart_class=chart_class, + **parser_args + ) + + +class FeatureEarleyChartParser(FeatureIncrementalChartParser): + def __init__(self, grammar, **parser_args): + FeatureIncrementalChartParser.__init__( + self, grammar, EARLEY_FEATURE_STRATEGY, **parser_args + ) + + +class FeatureIncrementalTopDownChartParser(FeatureIncrementalChartParser): + def __init__(self, grammar, **parser_args): + FeatureIncrementalChartParser.__init__( + self, grammar, TD_INCREMENTAL_FEATURE_STRATEGY, **parser_args + ) + + +class FeatureIncrementalBottomUpChartParser(FeatureIncrementalChartParser): + def __init__(self, grammar, **parser_args): + FeatureIncrementalChartParser.__init__( + self, grammar, BU_INCREMENTAL_FEATURE_STRATEGY, **parser_args + ) + + +class FeatureIncrementalBottomUpLeftCornerChartParser(FeatureIncrementalChartParser): + def __init__(self, grammar, **parser_args): + FeatureIncrementalChartParser.__init__( + self, grammar, BU_LC_INCREMENTAL_FEATURE_STRATEGY, **parser_args + ) + + +# //////////////////////////////////////////////////////////// +# Demonstration +# //////////////////////////////////////////////////////////// + + +def demo( + print_times=True, + print_grammar=False, + print_trees=True, + trace=2, + sent='I saw John with a dog with my cookie', + numparses=5, +): + """ + A demonstration of the Earley parsers. + """ + import sys, time + from nltk.parse.chart import demo_grammar + + # The grammar for ChartParser and SteppingChartParser: + grammar = demo_grammar() + if print_grammar: + print("* Grammar") + print(grammar) + + # Tokenize the sample sentence. + print("* Sentence:") + print(sent) + tokens = sent.split() + print(tokens) + print() + + # Do the parsing. + earley = EarleyChartParser(grammar, trace=trace) + t = time.clock() + chart = earley.chart_parse(tokens) + parses = list(chart.parses(grammar.start())) + t = time.clock() - t + + # Print results. + if numparses: + assert len(parses) == numparses, 'Not all parses found' + if print_trees: + for tree in parses: + print(tree) + else: + print("Nr trees:", len(parses)) + if print_times: + print("Time:", t) + + +if __name__ == '__main__': + demo() diff --git a/venv.bak/lib/python3.7/site-packages/nltk/parse/evaluate.py b/venv.bak/lib/python3.7/site-packages/nltk/parse/evaluate.py new file mode 100644 index 0000000..d79ad46 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/parse/evaluate.py @@ -0,0 +1,131 @@ +# Natural Language Toolkit: evaluation of dependency parser +# +# Author: Long Duong +# +# Copyright (C) 2001-2019 NLTK Project +# URL: +# For license information, see LICENSE.TXT + +from __future__ import division + +import unicodedata + + +class DependencyEvaluator(object): + """ + Class for measuring labelled and unlabelled attachment score for + dependency parsing. Note that the evaluation ignores punctuation. + + >>> from nltk.parse import DependencyGraph, DependencyEvaluator + + >>> gold_sent = DependencyGraph(\""" + ... Pierre NNP 2 NMOD + ... Vinken NNP 8 SUB + ... , , 2 P + ... 61 CD 5 NMOD + ... years NNS 6 AMOD + ... old JJ 2 NMOD + ... , , 2 P + ... will MD 0 ROOT + ... join VB 8 VC + ... the DT 11 NMOD + ... board NN 9 OBJ + ... as IN 9 VMOD + ... a DT 15 NMOD + ... nonexecutive JJ 15 NMOD + ... director NN 12 PMOD + ... Nov. NNP 9 VMOD + ... 29 CD 16 NMOD + ... . . 9 VMOD + ... \""") + + >>> parsed_sent = DependencyGraph(\""" + ... Pierre NNP 8 NMOD + ... Vinken NNP 1 SUB + ... , , 3 P + ... 61 CD 6 NMOD + ... years NNS 6 AMOD + ... old JJ 2 NMOD + ... , , 3 AMOD + ... will MD 0 ROOT + ... join VB 8 VC + ... the DT 11 AMOD + ... board NN 9 OBJECT + ... as IN 9 NMOD + ... a DT 15 NMOD + ... nonexecutive JJ 15 NMOD + ... director NN 12 PMOD + ... Nov. NNP 9 VMOD + ... 29 CD 16 NMOD + ... . . 9 VMOD + ... \""") + + >>> de = DependencyEvaluator([parsed_sent],[gold_sent]) + >>> las, uas = de.eval() + >>> las + 0.6... + >>> uas + 0.8... + >>> abs(uas - 0.8) < 0.00001 + True + """ + + def __init__(self, parsed_sents, gold_sents): + """ + :param parsed_sents: the list of parsed_sents as the output of parser + :type parsed_sents: list(DependencyGraph) + """ + self._parsed_sents = parsed_sents + self._gold_sents = gold_sents + + def _remove_punct(self, inStr): + """ + Function to remove punctuation from Unicode string. + :param input: the input string + :return: Unicode string after remove all punctuation + """ + punc_cat = set(["Pc", "Pd", "Ps", "Pe", "Pi", "Pf", "Po"]) + return "".join(x for x in inStr if unicodedata.category(x) not in punc_cat) + + def eval(self): + """ + Return the Labeled Attachment Score (LAS) and Unlabeled Attachment Score (UAS) + + :return : tuple(float,float) + """ + if len(self._parsed_sents) != len(self._gold_sents): + raise ValueError( + " Number of parsed sentence is different with number of gold sentence." + ) + + corr = 0 + corrL = 0 + total = 0 + + for i in range(len(self._parsed_sents)): + parsed_sent_nodes = self._parsed_sents[i].nodes + gold_sent_nodes = self._gold_sents[i].nodes + + if len(parsed_sent_nodes) != len(gold_sent_nodes): + raise ValueError("Sentences must have equal length.") + + for parsed_node_address, parsed_node in parsed_sent_nodes.items(): + gold_node = gold_sent_nodes[parsed_node_address] + + if parsed_node["word"] is None: + continue + if parsed_node["word"] != gold_node["word"]: + raise ValueError("Sentence sequence is not matched.") + + # Ignore if word is punctuation by default + # if (parsed_sent[j]["word"] in string.punctuation): + if self._remove_punct(parsed_node["word"]) == "": + continue + + total += 1 + if parsed_node["head"] == gold_node["head"]: + corr += 1 + if parsed_node["rel"] == gold_node["rel"]: + corrL += 1 + + return corrL / total, corr / total diff --git a/venv.bak/lib/python3.7/site-packages/nltk/parse/featurechart.py b/venv.bak/lib/python3.7/site-packages/nltk/parse/featurechart.py new file mode 100644 index 0000000..ee9e274 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/parse/featurechart.py @@ -0,0 +1,681 @@ +# -*- coding: utf-8 -*- +# Natural Language Toolkit: Chart Parser for Feature-Based Grammars +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Rob Speer +# Peter Ljunglöf +# URL: +# For license information, see LICENSE.TXT + +""" +Extension of chart parsing implementation to handle grammars with +feature structures as nodes. +""" +from __future__ import print_function, unicode_literals + +from six.moves import range + +from nltk.compat import python_2_unicode_compatible +from nltk.featstruct import FeatStruct, unify, TYPE, find_variables +from nltk.sem import logic +from nltk.tree import Tree +from nltk.grammar import ( + Nonterminal, + Production, + CFG, + FeatStructNonterminal, + is_nonterminal, + is_terminal, +) +from nltk.parse.chart import ( + TreeEdge, + Chart, + ChartParser, + EdgeI, + FundamentalRule, + LeafInitRule, + EmptyPredictRule, + BottomUpPredictRule, + SingleEdgeFundamentalRule, + BottomUpPredictCombineRule, + CachedTopDownPredictRule, + TopDownInitRule, +) + +# //////////////////////////////////////////////////////////// +# Tree Edge +# //////////////////////////////////////////////////////////// + + +@python_2_unicode_compatible +class FeatureTreeEdge(TreeEdge): + """ + A specialized tree edge that allows shared variable bindings + between nonterminals on the left-hand side and right-hand side. + + Each ``FeatureTreeEdge`` contains a set of ``bindings``, i.e., a + dictionary mapping from variables to values. If the edge is not + complete, then these bindings are simply stored. However, if the + edge is complete, then the constructor applies these bindings to + every nonterminal in the edge whose symbol implements the + interface ``SubstituteBindingsI``. + """ + + def __init__(self, span, lhs, rhs, dot=0, bindings=None): + """ + Construct a new edge. If the edge is incomplete (i.e., if + ``dot alpha \* B1 beta][i:j]`` + - ``[B2 -> gamma \*][j:k]`` + + licenses the edge: + + - ``[A -> alpha B3 \* beta][i:j]`` + + assuming that B1 and B2 can be unified to generate B3. + """ + + def apply(self, chart, grammar, left_edge, right_edge): + # Make sure the rule is applicable. + if not ( + left_edge.end() == right_edge.start() + and left_edge.is_incomplete() + and right_edge.is_complete() + and isinstance(left_edge, FeatureTreeEdge) + ): + return + found = right_edge.lhs() + nextsym = left_edge.nextsym() + if isinstance(right_edge, FeatureTreeEdge): + if not is_nonterminal(nextsym): + return + if left_edge.nextsym()[TYPE] != right_edge.lhs()[TYPE]: + return + # Create a copy of the bindings. + bindings = left_edge.bindings() + # We rename vars here, because we don't want variables + # from the two different productions to match. + found = found.rename_variables(used_vars=left_edge.variables()) + # Unify B1 (left_edge.nextsym) with B2 (right_edge.lhs) to + # generate B3 (result). + result = unify(nextsym, found, bindings, rename_vars=False) + if result is None: + return + else: + if nextsym != found: + return + # Create a copy of the bindings. + bindings = left_edge.bindings() + + # Construct the new edge. + new_edge = left_edge.move_dot_forward(right_edge.end(), bindings) + + # Add it to the chart, with appropriate child pointers. + if chart.insert_with_backpointer(new_edge, left_edge, right_edge): + yield new_edge + + +class FeatureSingleEdgeFundamentalRule(SingleEdgeFundamentalRule): + """ + A specialized version of the completer / single edge fundamental rule + that operates on nonterminals whose symbols are ``FeatStructNonterminal``s. + Rather than simply comparing the nonterminals for equality, they are + unified. + """ + + _fundamental_rule = FeatureFundamentalRule() + + def _apply_complete(self, chart, grammar, right_edge): + fr = self._fundamental_rule + for left_edge in chart.select( + end=right_edge.start(), is_complete=False, nextsym=right_edge.lhs() + ): + for new_edge in fr.apply(chart, grammar, left_edge, right_edge): + yield new_edge + + def _apply_incomplete(self, chart, grammar, left_edge): + fr = self._fundamental_rule + for right_edge in chart.select( + start=left_edge.end(), is_complete=True, lhs=left_edge.nextsym() + ): + for new_edge in fr.apply(chart, grammar, left_edge, right_edge): + yield new_edge + + +# //////////////////////////////////////////////////////////// +# Top-Down Prediction +# //////////////////////////////////////////////////////////// + + +class FeatureTopDownInitRule(TopDownInitRule): + def apply(self, chart, grammar): + for prod in grammar.productions(lhs=grammar.start()): + new_edge = FeatureTreeEdge.from_production(prod, 0) + if chart.insert(new_edge, ()): + yield new_edge + + +class FeatureTopDownPredictRule(CachedTopDownPredictRule): + """ + A specialized version of the (cached) top down predict rule that operates + on nonterminals whose symbols are ``FeatStructNonterminal``s. Rather + than simply comparing the nonterminals for equality, they are + unified. + + The top down expand rule states that: + + - ``[A -> alpha \* B1 beta][i:j]`` + + licenses the edge: + + - ``[B2 -> \* gamma][j:j]`` + + for each grammar production ``B2 -> gamma``, assuming that B1 + and B2 can be unified. + """ + + def apply(self, chart, grammar, edge): + if edge.is_complete(): + return + nextsym, index = edge.nextsym(), edge.end() + if not is_nonterminal(nextsym): + return + + # If we've already applied this rule to an edge with the same + # next & end, and the chart & grammar have not changed, then + # just return (no new edges to add). + nextsym_with_bindings = edge.next_with_bindings() + done = self._done.get((nextsym_with_bindings, index), (None, None)) + if done[0] is chart and done[1] is grammar: + return + + for prod in grammar.productions(lhs=nextsym): + # If the left corner in the predicted production is + # leaf, it must match with the input. + if prod.rhs(): + first = prod.rhs()[0] + if is_terminal(first): + if index >= chart.num_leaves(): + continue + if first != chart.leaf(index): + continue + + # We rename vars here, because we don't want variables + # from the two different productions to match. + if unify(prod.lhs(), nextsym_with_bindings, rename_vars=True): + new_edge = FeatureTreeEdge.from_production(prod, edge.end()) + if chart.insert(new_edge, ()): + yield new_edge + + # Record the fact that we've applied this rule. + self._done[nextsym_with_bindings, index] = (chart, grammar) + + +# //////////////////////////////////////////////////////////// +# Bottom-Up Prediction +# //////////////////////////////////////////////////////////// + + +class FeatureBottomUpPredictRule(BottomUpPredictRule): + def apply(self, chart, grammar, edge): + if edge.is_incomplete(): + return + for prod in grammar.productions(rhs=edge.lhs()): + if isinstance(edge, FeatureTreeEdge): + _next = prod.rhs()[0] + if not is_nonterminal(_next): + continue + + new_edge = FeatureTreeEdge.from_production(prod, edge.start()) + if chart.insert(new_edge, ()): + yield new_edge + + +class FeatureBottomUpPredictCombineRule(BottomUpPredictCombineRule): + def apply(self, chart, grammar, edge): + if edge.is_incomplete(): + return + found = edge.lhs() + for prod in grammar.productions(rhs=found): + bindings = {} + if isinstance(edge, FeatureTreeEdge): + _next = prod.rhs()[0] + if not is_nonterminal(_next): + continue + + # We rename vars here, because we don't want variables + # from the two different productions to match. + used_vars = find_variables( + (prod.lhs(),) + prod.rhs(), fs_class=FeatStruct + ) + found = found.rename_variables(used_vars=used_vars) + + result = unify(_next, found, bindings, rename_vars=False) + if result is None: + continue + + new_edge = FeatureTreeEdge.from_production( + prod, edge.start() + ).move_dot_forward(edge.end(), bindings) + if chart.insert(new_edge, (edge,)): + yield new_edge + + +class FeatureEmptyPredictRule(EmptyPredictRule): + def apply(self, chart, grammar): + for prod in grammar.productions(empty=True): + for index in range(chart.num_leaves() + 1): + new_edge = FeatureTreeEdge.from_production(prod, index) + if chart.insert(new_edge, ()): + yield new_edge + + +# //////////////////////////////////////////////////////////// +# Feature Chart Parser +# //////////////////////////////////////////////////////////// + +TD_FEATURE_STRATEGY = [ + LeafInitRule(), + FeatureTopDownInitRule(), + FeatureTopDownPredictRule(), + FeatureSingleEdgeFundamentalRule(), +] +BU_FEATURE_STRATEGY = [ + LeafInitRule(), + FeatureEmptyPredictRule(), + FeatureBottomUpPredictRule(), + FeatureSingleEdgeFundamentalRule(), +] +BU_LC_FEATURE_STRATEGY = [ + LeafInitRule(), + FeatureEmptyPredictRule(), + FeatureBottomUpPredictCombineRule(), + FeatureSingleEdgeFundamentalRule(), +] + + +class FeatureChartParser(ChartParser): + def __init__( + self, + grammar, + strategy=BU_LC_FEATURE_STRATEGY, + trace_chart_width=20, + chart_class=FeatureChart, + **parser_args + ): + ChartParser.__init__( + self, + grammar, + strategy=strategy, + trace_chart_width=trace_chart_width, + chart_class=chart_class, + **parser_args + ) + + +class FeatureTopDownChartParser(FeatureChartParser): + def __init__(self, grammar, **parser_args): + FeatureChartParser.__init__(self, grammar, TD_FEATURE_STRATEGY, **parser_args) + + +class FeatureBottomUpChartParser(FeatureChartParser): + def __init__(self, grammar, **parser_args): + FeatureChartParser.__init__(self, grammar, BU_FEATURE_STRATEGY, **parser_args) + + +class FeatureBottomUpLeftCornerChartParser(FeatureChartParser): + def __init__(self, grammar, **parser_args): + FeatureChartParser.__init__( + self, grammar, BU_LC_FEATURE_STRATEGY, **parser_args + ) + + +# //////////////////////////////////////////////////////////// +# Instantiate Variable Chart +# //////////////////////////////////////////////////////////// + + +class InstantiateVarsChart(FeatureChart): + """ + A specialized chart that 'instantiates' variables whose names + start with '@', by replacing them with unique new variables. + In particular, whenever a complete edge is added to the chart, any + variables in the edge's ``lhs`` whose names start with '@' will be + replaced by unique new ``Variable``s. + """ + + def __init__(self, tokens): + FeatureChart.__init__(self, tokens) + + def initialize(self): + self._instantiated = set() + FeatureChart.initialize(self) + + def insert(self, edge, child_pointer_list): + if edge in self._instantiated: + return False + self.instantiate_edge(edge) + return FeatureChart.insert(self, edge, child_pointer_list) + + def instantiate_edge(self, edge): + """ + If the edge is a ``FeatureTreeEdge``, and it is complete, + then instantiate all variables whose names start with '@', + by replacing them with unique new variables. + + Note that instantiation is done in-place, since the + parsing algorithms might already hold a reference to + the edge for future use. + """ + # If the edge is a leaf, or is not complete, or is + # already in the chart, then just return it as-is. + if not isinstance(edge, FeatureTreeEdge): + return + if not edge.is_complete(): + return + if edge in self._edge_to_cpls: + return + + # Get a list of variables that need to be instantiated. + # If there are none, then return as-is. + inst_vars = self.inst_vars(edge) + if not inst_vars: + return + + # Instantiate the edge! + self._instantiated.add(edge) + edge._lhs = edge.lhs().substitute_bindings(inst_vars) + + def inst_vars(self, edge): + return dict( + (var, logic.unique_variable()) + for var in edge.lhs().variables() + if var.name.startswith('@') + ) + + +# //////////////////////////////////////////////////////////// +# Demo +# //////////////////////////////////////////////////////////// + + +def demo_grammar(): + from nltk.grammar import FeatureGrammar + + return FeatureGrammar.fromstring( + """ +S -> NP VP +PP -> Prep NP +NP -> NP PP +VP -> VP PP +VP -> Verb NP +VP -> Verb +NP -> Det[pl=?x] Noun[pl=?x] +NP -> "John" +NP -> "I" +Det -> "the" +Det -> "my" +Det[-pl] -> "a" +Noun[-pl] -> "dog" +Noun[-pl] -> "cookie" +Verb -> "ate" +Verb -> "saw" +Prep -> "with" +Prep -> "under" +""" + ) + + +def demo( + print_times=True, + print_grammar=True, + print_trees=True, + print_sentence=True, + trace=1, + parser=FeatureChartParser, + sent='I saw John with a dog with my cookie', +): + import sys, time + + print() + grammar = demo_grammar() + if print_grammar: + print(grammar) + print() + print("*", parser.__name__) + if print_sentence: + print("Sentence:", sent) + tokens = sent.split() + t = time.clock() + cp = parser(grammar, trace=trace) + chart = cp.chart_parse(tokens) + trees = list(chart.parses(grammar.start())) + if print_times: + print("Time: %s" % (time.clock() - t)) + if print_trees: + for tree in trees: + print(tree) + else: + print("Nr trees:", len(trees)) + + +def run_profile(): + import profile + + profile.run('for i in range(1): demo()', '/tmp/profile.out') + import pstats + + p = pstats.Stats('/tmp/profile.out') + p.strip_dirs().sort_stats('time', 'cum').print_stats(60) + p.strip_dirs().sort_stats('cum', 'time').print_stats(60) + + +if __name__ == '__main__': + from nltk.data import load + + demo() + print() + grammar = load('grammars/book_grammars/feat0.fcfg') + cp = FeatureChartParser(grammar, trace=2) + sent = 'Kim likes children' + tokens = sent.split() + trees = cp.parse(tokens) + for tree in trees: + print(tree) diff --git a/venv.bak/lib/python3.7/site-packages/nltk/parse/generate.py b/venv.bak/lib/python3.7/site-packages/nltk/parse/generate.py new file mode 100644 index 0000000..e0a7cb2 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/parse/generate.py @@ -0,0 +1,90 @@ +# -*- coding: utf-8 -*- +# Natural Language Toolkit: Generating from a CFG +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Steven Bird +# Peter Ljunglöf +# URL: +# For license information, see LICENSE.TXT +# +from __future__ import print_function + +import itertools +import sys +from nltk.grammar import Nonterminal + + +def generate(grammar, start=None, depth=None, n=None): + """ + Generates an iterator of all sentences from a CFG. + + :param grammar: The Grammar used to generate sentences. + :param start: The Nonterminal from which to start generate sentences. + :param depth: The maximal depth of the generated tree. + :param n: The maximum number of sentences to return. + :return: An iterator of lists of terminal tokens. + """ + if not start: + start = grammar.start() + if depth is None: + depth = sys.maxsize + + iter = _generate_all(grammar, [start], depth) + + if n: + iter = itertools.islice(iter, n) + + return iter + + +def _generate_all(grammar, items, depth): + if items: + try: + for frag1 in _generate_one(grammar, items[0], depth): + for frag2 in _generate_all(grammar, items[1:], depth): + yield frag1 + frag2 + except RuntimeError as _error: + if _error.message == "maximum recursion depth exceeded": + # Helpful error message while still showing the recursion stack. + raise RuntimeError( + "The grammar has rule(s) that yield infinite recursion!!" + ) + else: + raise + else: + yield [] + + +def _generate_one(grammar, item, depth): + if depth > 0: + if isinstance(item, Nonterminal): + for prod in grammar.productions(lhs=item): + for frag in _generate_all(grammar, prod.rhs(), depth - 1): + yield frag + else: + yield [item] + + +demo_grammar = """ + S -> NP VP + NP -> Det N + PP -> P NP + VP -> 'slept' | 'saw' NP | 'walked' PP + Det -> 'the' | 'a' + N -> 'man' | 'park' | 'dog' + P -> 'in' | 'with' +""" + + +def demo(N=23): + from nltk.grammar import CFG + + print('Generating the first %d sentences for demo grammar:' % (N,)) + print(demo_grammar) + grammar = CFG.fromstring(demo_grammar) + for n, sent in enumerate(generate(grammar, n=N), 1): + print('%3d. %s' % (n, ' '.join(sent))) + + +if __name__ == '__main__': + demo() diff --git a/venv.bak/lib/python3.7/site-packages/nltk/parse/malt.py b/venv.bak/lib/python3.7/site-packages/nltk/parse/malt.py new file mode 100644 index 0000000..c588b5c --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/parse/malt.py @@ -0,0 +1,399 @@ +# -*- coding: utf-8 -*- +# Natural Language Toolkit: Interface to MaltParser +# +# Author: Dan Garrette +# Contributor: Liling Tan, Mustufain, osamamukhtar11 +# +# Copyright (C) 2001-2019 NLTK Project +# URL: +# For license information, see LICENSE.TXT + +from __future__ import print_function, unicode_literals + +import os +import sys +import tempfile +import subprocess +import inspect + +from six import text_type + +from nltk.data import ZipFilePathPointer +from nltk.internals import find_dir, find_file, find_jars_within_path + +from nltk.parse.api import ParserI +from nltk.parse.dependencygraph import DependencyGraph +from nltk.parse.util import taggedsents_to_conll + + +def malt_regex_tagger(): + from nltk.tag import RegexpTagger + + _tagger = RegexpTagger( + [ + (r'\.$', '.'), + (r'\,$', ','), + (r'\?$', '?'), # fullstop, comma, Qmark + (r'\($', '('), + (r'\)$', ')'), # round brackets + (r'\[$', '['), + (r'\]$', ']'), # square brackets + (r'^-?[0-9]+(.[0-9]+)?$', 'CD'), # cardinal numbers + (r'(The|the|A|a|An|an)$', 'DT'), # articles + (r'(He|he|She|she|It|it|I|me|Me|You|you)$', 'PRP'), # pronouns + (r'(His|his|Her|her|Its|its)$', 'PRP$'), # possesive + (r'(my|Your|your|Yours|yours)$', 'PRP$'), # possesive + (r'(on|On|in|In|at|At|since|Since)$', 'IN'), # time prepopsitions + (r'(for|For|ago|Ago|before|Before)$', 'IN'), # time prepopsitions + (r'(till|Till|until|Until)$', 'IN'), # time prepopsitions + (r'(by|By|beside|Beside)$', 'IN'), # space prepopsitions + (r'(under|Under|below|Below)$', 'IN'), # space prepopsitions + (r'(over|Over|above|Above)$', 'IN'), # space prepopsitions + (r'(across|Across|through|Through)$', 'IN'), # space prepopsitions + (r'(into|Into|towards|Towards)$', 'IN'), # space prepopsitions + (r'(onto|Onto|from|From)$', 'IN'), # space prepopsitions + (r'.*able$', 'JJ'), # adjectives + (r'.*ness$', 'NN'), # nouns formed from adjectives + (r'.*ly$', 'RB'), # adverbs + (r'.*s$', 'NNS'), # plural nouns + (r'.*ing$', 'VBG'), # gerunds + (r'.*ed$', 'VBD'), # past tense verbs + (r'.*', 'NN'), # nouns (default) + ] + ) + return _tagger.tag + + +def find_maltparser(parser_dirname): + """ + A module to find MaltParser .jar file and its dependencies. + """ + if os.path.exists(parser_dirname): # If a full path is given. + _malt_dir = parser_dirname + else: # Try to find path to maltparser directory in environment variables. + _malt_dir = find_dir(parser_dirname, env_vars=('MALT_PARSER',)) + # Checks that that the found directory contains all the necessary .jar + malt_dependencies = ['', '', ''] + _malt_jars = set(find_jars_within_path(_malt_dir)) + _jars = set(os.path.split(jar)[1] for jar in _malt_jars) + malt_dependencies = set(['log4j.jar', 'libsvm.jar', 'liblinear-1.8.jar']) + + assert malt_dependencies.issubset(_jars) + assert any( + filter(lambda i: i.startswith('maltparser-') and i.endswith('.jar'), _jars) + ) + return list(_malt_jars) + + +def find_malt_model(model_filename): + """ + A module to find pre-trained MaltParser model. + """ + if model_filename is None: + return 'malt_temp.mco' + elif os.path.exists(model_filename): # If a full path is given. + return model_filename + else: # Try to find path to malt model in environment variables. + return find_file(model_filename, env_vars=('MALT_MODEL',), verbose=False) + + +class MaltParser(ParserI): + """ + A class for dependency parsing with MaltParser. The input is the paths to: + - a maltparser directory + - (optionally) the path to a pre-trained MaltParser .mco model file + - (optionally) the tagger to use for POS tagging before parsing + - (optionally) additional Java arguments + + Example: + >>> from nltk.parse import malt + >>> # With MALT_PARSER and MALT_MODEL environment set. + >>> mp = malt.MaltParser('maltparser-1.7.2', 'engmalt.linear-1.7.mco') # doctest: +SKIP + >>> mp.parse_one('I shot an elephant in my pajamas .'.split()).tree() # doctest: +SKIP + (shot I (elephant an) (in (pajamas my)) .) + >>> # Without MALT_PARSER and MALT_MODEL environment. + >>> mp = malt.MaltParser('/home/user/maltparser-1.7.2/', '/home/user/engmalt.linear-1.7.mco') # doctest: +SKIP + >>> mp.parse_one('I shot an elephant in my pajamas .'.split()).tree() # doctest: +SKIP + (shot I (elephant an) (in (pajamas my)) .) + """ + + def __init__( + self, + parser_dirname, + model_filename=None, + tagger=None, + additional_java_args=None, + ): + """ + An interface for parsing with the Malt Parser. + + :param parser_dirname: The path to the maltparser directory that + contains the maltparser-1.x.jar + :type parser_dirname: str + :param model_filename: The name of the pre-trained model with .mco file + extension. If provided, training will not be required. + (see http://www.maltparser.org/mco/mco.html and + see http://www.patful.com/chalk/node/185) + :type model_filename: str + :param tagger: The tagger used to POS tag the raw string before + formatting to CONLL format. It should behave like `nltk.pos_tag` + :type tagger: function + :param additional_java_args: This is the additional Java arguments that + one can use when calling Maltparser, usually this is the heapsize + limits, e.g. `additional_java_args=['-Xmx1024m']` + (see http://goo.gl/mpDBvQ) + :type additional_java_args: list + """ + + # Find all the necessary jar files for MaltParser. + self.malt_jars = find_maltparser(parser_dirname) + # Initialize additional java arguments. + self.additional_java_args = ( + additional_java_args if additional_java_args is not None else [] + ) + # Initialize model. + self.model = find_malt_model(model_filename) + self._trained = self.model != 'malt_temp.mco' + # Set the working_dir parameters i.e. `-w` from MaltParser's option. + self.working_dir = tempfile.gettempdir() + # Initialize POS tagger. + self.tagger = tagger if tagger is not None else malt_regex_tagger() + + def parse_tagged_sents(self, sentences, verbose=False, top_relation_label='null'): + """ + Use MaltParser to parse multiple POS tagged sentences. Takes multiple + sentences where each sentence is a list of (word, tag) tuples. + The sentences must have already been tokenized and tagged. + + :param sentences: Input sentences to parse + :type sentence: list(list(tuple(str, str))) + :return: iter(iter(``DependencyGraph``)) the dependency graph + representation of each sentence + """ + if not self._trained: + raise Exception("Parser has not been trained. Call train() first.") + + with tempfile.NamedTemporaryFile( + prefix='malt_input.conll.', dir=self.working_dir, mode='w', delete=False + ) as input_file: + with tempfile.NamedTemporaryFile( + prefix='malt_output.conll.', + dir=self.working_dir, + mode='w', + delete=False, + ) as output_file: + # Convert list of sentences to CONLL format. + for line in taggedsents_to_conll(sentences): + input_file.write(text_type(line)) + input_file.close() + + # Generate command to run maltparser. + cmd = self.generate_malt_command( + input_file.name, output_file.name, mode="parse" + ) + + # This is a maltparser quirk, it needs to be run + # where the model file is. otherwise it goes into an awkward + # missing .jars or strange -w working_dir problem. + _current_path = os.getcwd() # Remembers the current path. + try: # Change to modelfile path + os.chdir(os.path.split(self.model)[0]) + except: + pass + ret = self._execute(cmd, verbose) # Run command. + os.chdir(_current_path) # Change back to current path. + + if ret is not 0: + raise Exception( + "MaltParser parsing (%s) failed with exit " + "code %d" % (' '.join(cmd), ret) + ) + + # Must return iter(iter(Tree)) + with open(output_file.name) as infile: + for tree_str in infile.read().split('\n\n'): + yield ( + iter( + [ + DependencyGraph( + tree_str, top_relation_label=top_relation_label + ) + ] + ) + ) + + os.remove(input_file.name) + os.remove(output_file.name) + + def parse_sents(self, sentences, verbose=False, top_relation_label='null'): + """ + Use MaltParser to parse multiple sentences. + Takes a list of sentences, where each sentence is a list of words. + Each sentence will be automatically tagged with this + MaltParser instance's tagger. + + :param sentences: Input sentences to parse + :type sentence: list(list(str)) + :return: iter(DependencyGraph) + """ + tagged_sentences = (self.tagger(sentence) for sentence in sentences) + return self.parse_tagged_sents( + tagged_sentences, verbose, top_relation_label=top_relation_label + ) + + def generate_malt_command(self, inputfilename, outputfilename=None, mode=None): + """ + This function generates the maltparser command use at the terminal. + + :param inputfilename: path to the input file + :type inputfilename: str + :param outputfilename: path to the output file + :type outputfilename: str + """ + + cmd = ['java'] + cmd += self.additional_java_args # Adds additional java arguments + # Joins classpaths with ";" if on Windows and on Linux/Mac use ":" + classpaths_separator = ';' if sys.platform.startswith('win') else ':' + cmd += [ + '-cp', + classpaths_separator.join(self.malt_jars), + ] # Adds classpaths for jars + cmd += ['org.maltparser.Malt'] # Adds the main function. + + # Adds the model file. + if os.path.exists(self.model): # when parsing + cmd += ['-c', os.path.split(self.model)[-1]] + else: # when learning + cmd += ['-c', self.model] + + cmd += ['-i', inputfilename] + if mode == 'parse': + cmd += ['-o', outputfilename] + cmd += ['-m', mode] # mode use to generate parses. + return cmd + + @staticmethod + def _execute(cmd, verbose=False): + output = None if verbose else subprocess.PIPE + p = subprocess.Popen(cmd, stdout=output, stderr=output) + return p.wait() + + def train(self, depgraphs, verbose=False): + """ + Train MaltParser from a list of ``DependencyGraph`` objects + + :param depgraphs: list of ``DependencyGraph`` objects for training input data + :type depgraphs: DependencyGraph + """ + + # Write the conll_str to malt_train.conll file in /tmp/ + with tempfile.NamedTemporaryFile( + prefix='malt_train.conll.', dir=self.working_dir, mode='w', delete=False + ) as input_file: + input_str = '\n'.join(dg.to_conll(10) for dg in depgraphs) + input_file.write(text_type(input_str)) + # Trains the model with the malt_train.conll + self.train_from_file(input_file.name, verbose=verbose) + # Removes the malt_train.conll once training finishes. + os.remove(input_file.name) + + def train_from_file(self, conll_file, verbose=False): + """ + Train MaltParser from a file + :param conll_file: str for the filename of the training input data + :type conll_file: str + """ + + # If conll_file is a ZipFilePathPointer, + # then we need to do some extra massaging + if isinstance(conll_file, ZipFilePathPointer): + with tempfile.NamedTemporaryFile( + prefix='malt_train.conll.', dir=self.working_dir, mode='w', delete=False + ) as input_file: + with conll_file.open() as conll_input_file: + conll_str = conll_input_file.read() + input_file.write(text_type(conll_str)) + return self.train_from_file(input_file.name, verbose=verbose) + + # Generate command to run maltparser. + cmd = self.generate_malt_command(conll_file, mode="learn") + ret = self._execute(cmd, verbose) + if ret != 0: + raise Exception( + "MaltParser training (%s) failed with exit " + "code %d" % (' '.join(cmd), ret) + ) + self._trained = True + + +if __name__ == '__main__': + ''' + A demonstration function to show how NLTK users can use the malt parser API. + + >>> from nltk import pos_tag + >>> assert 'MALT_PARSER' in os.environ, str( + ... "Please set MALT_PARSER in your global environment, e.g.:\n" + ... "$ export MALT_PARSER='/home/user/maltparser-1.7.2/'") + >>> + >>> assert 'MALT_MODEL' in os.environ, str( + ... "Please set MALT_MODEL in your global environment, e.g.:\n" + ... "$ export MALT_MODEL='/home/user/engmalt.linear-1.7.mco'") + >>> + >>> _dg1_str = str("1 John _ NNP _ _ 2 SUBJ _ _\n" + ... "2 sees _ VB _ _ 0 ROOT _ _\n" + ... "3 a _ DT _ _ 4 SPEC _ _\n" + ... "4 dog _ NN _ _ 2 OBJ _ _\n" + ... "5 . _ . _ _ 2 PUNCT _ _\n") + >>> + >>> + >>> _dg2_str = str("1 John _ NNP _ _ 2 SUBJ _ _\n" + ... "2 walks _ VB _ _ 0 ROOT _ _\n" + ... "3 . _ . _ _ 2 PUNCT _ _\n") + >>> dg1 = DependencyGraph(_dg1_str) + >>> dg2 = DependencyGraph(_dg2_str) + >>> # Initialize a MaltParser object + >>> parser_dirname = 'maltparser-1.7.2' + >>> mp = MaltParser(parser_dirname=parser_dirname) + >>> + >>> # Trains a model. + >>> mp.train([dg1,dg2], verbose=False) + >>> sent1 = ['John','sees','Mary', '.'] + >>> sent2 = ['John', 'walks', 'a', 'dog', '.'] + >>> + >>> # Parse a single sentence. + >>> parsed_sent1 = mp.parse_one(sent1) + >>> parsed_sent2 = mp.parse_one(sent2) + >>> print (parsed_sent1.tree()) + (sees John Mary .) + >>> print (parsed_sent2.tree()) + (walks John (dog a) .) + >>> + >>> # Parsing multiple sentences. + >>> sentences = [sent1,sent2] + >>> parsed_sents = mp.parse_sents(sentences) + >>> print(next(next(parsed_sents)).tree()) + (sees John Mary .) + >>> print(next(next(parsed_sents)).tree()) + (walks John (dog a) .) + >>> + >>> # Initialize a MaltParser object with an English pre-trained model. + >>> parser_dirname = 'maltparser-1.7.2' + >>> model_name = 'engmalt.linear-1.7.mco' + >>> mp = MaltParser(parser_dirname=parser_dirname, model_filename=model_name, tagger=pos_tag) + >>> sent1 = 'I shot an elephant in my pajamas .'.split() + >>> sent2 = 'Time flies like banana .'.split() + >>> # Parse a single sentence. + >>> print(mp.parse_one(sent1).tree()) + (shot I (elephant an) (in (pajamas my)) .) + # Parsing multiple sentences + >>> sentences = [sent1,sent2] + >>> parsed_sents = mp.parse_sents(sentences) + >>> print(next(next(parsed_sents)).tree()) + (shot I (elephant an) (in (pajamas my)) .) + >>> print(next(next(parsed_sents)).tree()) + (flies Time (like banana) .) + ''' + import doctest + + doctest.testmod() diff --git a/venv.bak/lib/python3.7/site-packages/nltk/parse/nonprojectivedependencyparser.py b/venv.bak/lib/python3.7/site-packages/nltk/parse/nonprojectivedependencyparser.py new file mode 100644 index 0000000..9b8bddc --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/parse/nonprojectivedependencyparser.py @@ -0,0 +1,778 @@ +# Natural Language Toolkit: Dependency Grammars +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Jason Narad +# +# URL: +# For license information, see LICENSE.TXT +# +from __future__ import print_function + +import math +import logging + +from six.moves import range + +from nltk.parse.dependencygraph import DependencyGraph + +logger = logging.getLogger(__name__) + +################################################################# +# DependencyScorerI - Interface for Graph-Edge Weight Calculation +################################################################# + + +class DependencyScorerI(object): + """ + A scorer for calculated the weights on the edges of a weighted + dependency graph. This is used by a + ``ProbabilisticNonprojectiveParser`` to initialize the edge + weights of a ``DependencyGraph``. While typically this would be done + by training a binary classifier, any class that can return a + multidimensional list representation of the edge weights can + implement this interface. As such, it has no necessary + fields. + """ + + def __init__(self): + if self.__class__ == DependencyScorerI: + raise TypeError('DependencyScorerI is an abstract interface') + + def train(self, graphs): + """ + :type graphs: list(DependencyGraph) + :param graphs: A list of dependency graphs to train the scorer. + Typically the edges present in the graphs can be used as + positive training examples, and the edges not present as negative + examples. + """ + raise NotImplementedError() + + def score(self, graph): + """ + :type graph: DependencyGraph + :param graph: A dependency graph whose set of edges need to be + scored. + :rtype: A three-dimensional list of numbers. + :return: The score is returned in a multidimensional(3) list, such + that the outer-dimension refers to the head, and the + inner-dimension refers to the dependencies. For instance, + scores[0][1] would reference the list of scores corresponding to + arcs from node 0 to node 1. The node's 'address' field can be used + to determine its number identification. + + For further illustration, a score list corresponding to Fig.2 of + Keith Hall's 'K-best Spanning Tree Parsing' paper: + scores = [[[], [5], [1], [1]], + [[], [], [11], [4]], + [[], [10], [], [5]], + [[], [8], [8], []]] + When used in conjunction with a MaxEntClassifier, each score would + correspond to the confidence of a particular edge being classified + with the positive training examples. + """ + raise NotImplementedError() + + +################################################################# +# NaiveBayesDependencyScorer +################################################################# + + +class NaiveBayesDependencyScorer(DependencyScorerI): + """ + A dependency scorer built around a MaxEnt classifier. In this + particular class that classifier is a ``NaiveBayesClassifier``. + It uses head-word, head-tag, child-word, and child-tag features + for classification. + + >>> from nltk.parse.dependencygraph import DependencyGraph, conll_data2 + + >>> graphs = [DependencyGraph(entry) for entry in conll_data2.split('\\n\\n') if entry] + >>> npp = ProbabilisticNonprojectiveParser() + >>> npp.train(graphs, NaiveBayesDependencyScorer()) + >>> parses = npp.parse(['Cathy', 'zag', 'hen', 'zwaaien', '.'], ['N', 'V', 'Pron', 'Adj', 'N', 'Punc']) + >>> len(list(parses)) + 1 + + """ + + def __init__(self): + pass # Do nothing without throwing error + + def train(self, graphs): + """ + Trains a ``NaiveBayesClassifier`` using the edges present in + graphs list as positive examples, the edges not present as + negative examples. Uses a feature vector of head-word, + head-tag, child-word, and child-tag. + + :type graphs: list(DependencyGraph) + :param graphs: A list of dependency graphs to train the scorer. + """ + + from nltk.classify import NaiveBayesClassifier + + # Create training labeled training examples + labeled_examples = [] + for graph in graphs: + for head_node in graph.nodes.values(): + for child_index, child_node in graph.nodes.items(): + if child_index in head_node['deps']: + label = "T" + else: + label = "F" + labeled_examples.append( + ( + dict( + a=head_node['word'], + b=head_node['tag'], + c=child_node['word'], + d=child_node['tag'], + ), + label, + ) + ) + + self.classifier = NaiveBayesClassifier.train(labeled_examples) + + def score(self, graph): + """ + Converts the graph into a feature-based representation of + each edge, and then assigns a score to each based on the + confidence of the classifier in assigning it to the + positive label. Scores are returned in a multidimensional list. + + :type graph: DependencyGraph + :param graph: A dependency graph to score. + :rtype: 3 dimensional list + :return: Edge scores for the graph parameter. + """ + # Convert graph to feature representation + edges = [] + for head_node in graph.nodes.values(): + for child_node in graph.nodes.values(): + edges.append( + ( + dict( + a=head_node['word'], + b=head_node['tag'], + c=child_node['word'], + d=child_node['tag'], + ) + ) + ) + + # Score edges + edge_scores = [] + row = [] + count = 0 + for pdist in self.classifier.prob_classify_many(edges): + logger.debug('%.4f %.4f', pdist.prob('T'), pdist.prob('F')) + # smoothing in case the probability = 0 + row.append([math.log(pdist.prob("T") + 0.00000000001)]) + count += 1 + if count == len(graph.nodes): + edge_scores.append(row) + row = [] + count = 0 + return edge_scores + + +################################################################# +# A Scorer for Demo Purposes +################################################################# +# A short class necessary to show parsing example from paper +class DemoScorer(DependencyScorerI): + def train(self, graphs): + print('Training...') + + def score(self, graph): + # scores for Keith Hall 'K-best Spanning Tree Parsing' paper + return [ + [[], [5], [1], [1]], + [[], [], [11], [4]], + [[], [10], [], [5]], + [[], [8], [8], []], + ] + + +################################################################# +# Non-Projective Probabilistic Parsing +################################################################# + + +class ProbabilisticNonprojectiveParser(object): + """A probabilistic non-projective dependency parser. + + Nonprojective dependencies allows for "crossing branches" in the parse tree + which is necessary for representing particular linguistic phenomena, or even + typical parses in some languages. This parser follows the MST parsing + algorithm, outlined in McDonald(2005), which likens the search for the best + non-projective parse to finding the maximum spanning tree in a weighted + directed graph. + + >>> class Scorer(DependencyScorerI): + ... def train(self, graphs): + ... pass + ... + ... def score(self, graph): + ... return [ + ... [[], [5], [1], [1]], + ... [[], [], [11], [4]], + ... [[], [10], [], [5]], + ... [[], [8], [8], []], + ... ] + + + >>> npp = ProbabilisticNonprojectiveParser() + >>> npp.train([], Scorer()) + + >>> parses = npp.parse(['v1', 'v2', 'v3'], [None, None, None]) + >>> len(list(parses)) + 1 + + Rule based example + ------------------ + + >>> from nltk.grammar import DependencyGrammar + + >>> grammar = DependencyGrammar.fromstring(''' + ... 'taught' -> 'play' | 'man' + ... 'man' -> 'the' | 'in' + ... 'in' -> 'corner' + ... 'corner' -> 'the' + ... 'play' -> 'golf' | 'dachshund' | 'to' + ... 'dachshund' -> 'his' + ... ''') + + >>> ndp = NonprojectiveDependencyParser(grammar) + >>> parses = ndp.parse(['the', 'man', 'in', 'the', 'corner', 'taught', 'his', 'dachshund', 'to', 'play', 'golf']) + >>> len(list(parses)) + 4 + + """ + + def __init__(self): + """ + Creates a new non-projective parser. + """ + logging.debug('initializing prob. nonprojective...') + + def train(self, graphs, dependency_scorer): + """ + Trains a ``DependencyScorerI`` from a set of ``DependencyGraph`` objects, + and establishes this as the parser's scorer. This is used to + initialize the scores on a ``DependencyGraph`` during the parsing + procedure. + + :type graphs: list(DependencyGraph) + :param graphs: A list of dependency graphs to train the scorer. + :type dependency_scorer: DependencyScorerI + :param dependency_scorer: A scorer which implements the + ``DependencyScorerI`` interface. + """ + self._scorer = dependency_scorer + self._scorer.train(graphs) + + def initialize_edge_scores(self, graph): + """ + Assigns a score to every edge in the ``DependencyGraph`` graph. + These scores are generated via the parser's scorer which + was assigned during the training process. + + :type graph: DependencyGraph + :param graph: A dependency graph to assign scores to. + """ + self.scores = self._scorer.score(graph) + + def collapse_nodes(self, new_node, cycle_path, g_graph, b_graph, c_graph): + """ + Takes a list of nodes that have been identified to belong to a cycle, + and collapses them into on larger node. The arcs of all nodes in + the graph must be updated to account for this. + + :type new_node: Node. + :param new_node: A Node (Dictionary) to collapse the cycle nodes into. + :type cycle_path: A list of integers. + :param cycle_path: A list of node addresses, each of which is in the cycle. + :type g_graph, b_graph, c_graph: DependencyGraph + :param g_graph, b_graph, c_graph: Graphs which need to be updated. + """ + logger.debug('Collapsing nodes...') + # Collapse all cycle nodes into v_n+1 in G_Graph + for cycle_node_index in cycle_path: + g_graph.remove_by_address(cycle_node_index) + g_graph.add_node(new_node) + g_graph.redirect_arcs(cycle_path, new_node['address']) + + def update_edge_scores(self, new_node, cycle_path): + """ + Updates the edge scores to reflect a collapse operation into + new_node. + + :type new_node: A Node. + :param new_node: The node which cycle nodes are collapsed into. + :type cycle_path: A list of integers. + :param cycle_path: A list of node addresses that belong to the cycle. + """ + logger.debug('cycle %s', cycle_path) + + cycle_path = self.compute_original_indexes(cycle_path) + + logger.debug('old cycle %s', cycle_path) + logger.debug('Prior to update: %s', self.scores) + + for i, row in enumerate(self.scores): + for j, column in enumerate(self.scores[i]): + logger.debug(self.scores[i][j]) + if j in cycle_path and i not in cycle_path and self.scores[i][j]: + subtract_val = self.compute_max_subtract_score(j, cycle_path) + + logger.debug('%s - %s', self.scores[i][j], subtract_val) + + new_vals = [] + for cur_val in self.scores[i][j]: + new_vals.append(cur_val - subtract_val) + + self.scores[i][j] = new_vals + + for i, row in enumerate(self.scores): + for j, cell in enumerate(self.scores[i]): + if i in cycle_path and j in cycle_path: + self.scores[i][j] = [] + + logger.debug('After update: %s', self.scores) + + def compute_original_indexes(self, new_indexes): + """ + As nodes are collapsed into others, they are replaced + by the new node in the graph, but it's still necessary + to keep track of what these original nodes were. This + takes a list of node addresses and replaces any collapsed + node addresses with their original addresses. + + :type new_indexes: A list of integers. + :param new_indexes: A list of node addresses to check for + subsumed nodes. + """ + swapped = True + while swapped: + originals = [] + swapped = False + for new_index in new_indexes: + if new_index in self.inner_nodes: + for old_val in self.inner_nodes[new_index]: + if old_val not in originals: + originals.append(old_val) + swapped = True + else: + originals.append(new_index) + new_indexes = originals + return new_indexes + + def compute_max_subtract_score(self, column_index, cycle_indexes): + """ + When updating scores the score of the highest-weighted incoming + arc is subtracted upon collapse. This returns the correct + amount to subtract from that edge. + + :type column_index: integer. + :param column_index: A index representing the column of incoming arcs + to a particular node being updated + :type cycle_indexes: A list of integers. + :param cycle_indexes: Only arcs from cycle nodes are considered. This + is a list of such nodes addresses. + """ + max_score = -100000 + for row_index in cycle_indexes: + for subtract_val in self.scores[row_index][column_index]: + if subtract_val > max_score: + max_score = subtract_val + return max_score + + def best_incoming_arc(self, node_index): + """ + Returns the source of the best incoming arc to the + node with address: node_index + + :type node_index: integer. + :param node_index: The address of the 'destination' node, + the node that is arced to. + """ + originals = self.compute_original_indexes([node_index]) + logger.debug('originals: %s', originals) + + max_arc = None + max_score = None + for row_index in range(len(self.scores)): + for col_index in range(len(self.scores[row_index])): + # print self.scores[row_index][col_index] + if col_index in originals and ( + max_score is None or self.scores[row_index][col_index] > max_score + ): + max_score = self.scores[row_index][col_index] + max_arc = row_index + logger.debug('%s, %s', row_index, col_index) + + logger.debug(max_score) + + for key in self.inner_nodes: + replaced_nodes = self.inner_nodes[key] + if max_arc in replaced_nodes: + return key + + return max_arc + + def original_best_arc(self, node_index): + originals = self.compute_original_indexes([node_index]) + max_arc = None + max_score = None + max_orig = None + for row_index in range(len(self.scores)): + for col_index in range(len(self.scores[row_index])): + if col_index in originals and ( + max_score is None or self.scores[row_index][col_index] > max_score + ): + max_score = self.scores[row_index][col_index] + max_arc = row_index + max_orig = col_index + return [max_arc, max_orig] + + def parse(self, tokens, tags): + """ + Parses a list of tokens in accordance to the MST parsing algorithm + for non-projective dependency parses. Assumes that the tokens to + be parsed have already been tagged and those tags are provided. Various + scoring methods can be used by implementing the ``DependencyScorerI`` + interface and passing it to the training algorithm. + + :type tokens: list(str) + :param tokens: A list of words or punctuation to be parsed. + :type tags: list(str) + :param tags: A list of tags corresponding by index to the words in the tokens list. + :return: An iterator of non-projective parses. + :rtype: iter(DependencyGraph) + """ + self.inner_nodes = {} + + # Initialize g_graph + g_graph = DependencyGraph() + for index, token in enumerate(tokens): + g_graph.nodes[index + 1].update( + {'word': token, 'tag': tags[index], 'rel': 'NTOP', 'address': index + 1} + ) + # print (g_graph.nodes) + + # Fully connect non-root nodes in g_graph + g_graph.connect_graph() + original_graph = DependencyGraph() + for index, token in enumerate(tokens): + original_graph.nodes[index + 1].update( + {'word': token, 'tag': tags[index], 'rel': 'NTOP', 'address': index + 1} + ) + + b_graph = DependencyGraph() + c_graph = DependencyGraph() + + for index, token in enumerate(tokens): + c_graph.nodes[index + 1].update( + {'word': token, 'tag': tags[index], 'rel': 'NTOP', 'address': index + 1} + ) + + # Assign initial scores to g_graph edges + self.initialize_edge_scores(g_graph) + logger.debug(self.scores) + # Initialize a list of unvisited vertices (by node address) + unvisited_vertices = [vertex['address'] for vertex in c_graph.nodes.values()] + # Iterate over unvisited vertices + nr_vertices = len(tokens) + betas = {} + while unvisited_vertices: + # Mark current node as visited + current_vertex = unvisited_vertices.pop(0) + logger.debug('current_vertex: %s', current_vertex) + # Get corresponding node n_i to vertex v_i + current_node = g_graph.get_by_address(current_vertex) + logger.debug('current_node: %s', current_node) + # Get best in-edge node b for current node + best_in_edge = self.best_incoming_arc(current_vertex) + betas[current_vertex] = self.original_best_arc(current_vertex) + logger.debug('best in arc: %s --> %s', best_in_edge, current_vertex) + # b_graph = Union(b_graph, b) + for new_vertex in [current_vertex, best_in_edge]: + b_graph.nodes[new_vertex].update( + {'word': 'TEMP', 'rel': 'NTOP', 'address': new_vertex} + ) + b_graph.add_arc(best_in_edge, current_vertex) + # Beta(current node) = b - stored for parse recovery + # If b_graph contains a cycle, collapse it + cycle_path = b_graph.contains_cycle() + if cycle_path: + # Create a new node v_n+1 with address = len(nodes) + 1 + new_node = {'word': 'NONE', 'rel': 'NTOP', 'address': nr_vertices + 1} + # c_graph = Union(c_graph, v_n+1) + c_graph.add_node(new_node) + # Collapse all nodes in cycle C into v_n+1 + self.update_edge_scores(new_node, cycle_path) + self.collapse_nodes(new_node, cycle_path, g_graph, b_graph, c_graph) + for cycle_index in cycle_path: + c_graph.add_arc(new_node['address'], cycle_index) + # self.replaced_by[cycle_index] = new_node['address'] + + self.inner_nodes[new_node['address']] = cycle_path + + # Add v_n+1 to list of unvisited vertices + unvisited_vertices.insert(0, nr_vertices + 1) + + # increment # of nodes counter + nr_vertices += 1 + + # Remove cycle nodes from b_graph; B = B - cycle c + for cycle_node_address in cycle_path: + b_graph.remove_by_address(cycle_node_address) + + logger.debug('g_graph: %s', g_graph) + logger.debug('b_graph: %s', b_graph) + logger.debug('c_graph: %s', c_graph) + logger.debug('Betas: %s', betas) + logger.debug('replaced nodes %s', self.inner_nodes) + + # Recover parse tree + logger.debug('Final scores: %s', self.scores) + + logger.debug('Recovering parse...') + for i in range(len(tokens) + 1, nr_vertices + 1): + betas[betas[i][1]] = betas[i] + + logger.debug('Betas: %s', betas) + for node in original_graph.nodes.values(): + # TODO: It's dangerous to assume that deps it a dictionary + # because it's a default dictionary. Ideally, here we should not + # be concerned how dependencies are stored inside of a dependency + # graph. + node['deps'] = {} + for i in range(1, len(tokens) + 1): + original_graph.add_arc(betas[i][0], betas[i][1]) + + logger.debug('Done.') + yield original_graph + + +################################################################# +# Rule-based Non-Projective Parser +################################################################# + + +class NonprojectiveDependencyParser(object): + """ + A non-projective, rule-based, dependency parser. This parser + will return the set of all possible non-projective parses based on + the word-to-word relations defined in the parser's dependency + grammar, and will allow the branches of the parse tree to cross + in order to capture a variety of linguistic phenomena that a + projective parser will not. + """ + + def __init__(self, dependency_grammar): + """ + Creates a new ``NonprojectiveDependencyParser``. + + :param dependency_grammar: a grammar of word-to-word relations. + :type dependency_grammar: DependencyGrammar + """ + self._grammar = dependency_grammar + + def parse(self, tokens): + """ + Parses the input tokens with respect to the parser's grammar. Parsing + is accomplished by representing the search-space of possible parses as + a fully-connected directed graph. Arcs that would lead to ungrammatical + parses are removed and a lattice is constructed of length n, where n is + the number of input tokens, to represent all possible grammatical + traversals. All possible paths through the lattice are then enumerated + to produce the set of non-projective parses. + + param tokens: A list of tokens to parse. + type tokens: list(str) + return: An iterator of non-projective parses. + rtype: iter(DependencyGraph) + """ + # Create graph representation of tokens + self._graph = DependencyGraph() + + for index, token in enumerate(tokens): + self._graph.nodes[index] = { + 'word': token, + 'deps': [], + 'rel': 'NTOP', + 'address': index, + } + + for head_node in self._graph.nodes.values(): + deps = [] + for dep_node in self._graph.nodes.values(): + if ( + self._grammar.contains(head_node['word'], dep_node['word']) + and head_node['word'] != dep_node['word'] + ): + deps.append(dep_node['address']) + head_node['deps'] = deps + + # Create lattice of possible heads + roots = [] + possible_heads = [] + for i, word in enumerate(tokens): + heads = [] + for j, head in enumerate(tokens): + if (i != j) and self._grammar.contains(head, word): + heads.append(j) + if len(heads) == 0: + roots.append(i) + possible_heads.append(heads) + + # Set roots to attempt + if len(roots) < 2: + if len(roots) == 0: + for i in range(len(tokens)): + roots.append(i) + + # Traverse lattice + analyses = [] + for root in roots: + stack = [] + analysis = [[] for i in range(len(possible_heads))] + i = 0 + forward = True + while i >= 0: + if forward: + if len(possible_heads[i]) == 1: + analysis[i] = possible_heads[i][0] + elif len(possible_heads[i]) == 0: + analysis[i] = -1 + else: + head = possible_heads[i].pop() + analysis[i] = head + stack.append([i, head]) + if not forward: + index_on_stack = False + for stack_item in stack: + if stack_item[0] == i: + index_on_stack = True + orig_length = len(possible_heads[i]) + + if index_on_stack and orig_length == 0: + for j in range(len(stack) - 1, -1, -1): + stack_item = stack[j] + if stack_item[0] == i: + possible_heads[i].append(stack.pop(j)[1]) + + elif index_on_stack and orig_length > 0: + head = possible_heads[i].pop() + analysis[i] = head + stack.append([i, head]) + forward = True + + if i + 1 == len(possible_heads): + analyses.append(analysis[:]) + forward = False + if forward: + i += 1 + else: + i -= 1 + + # Filter parses + # ensure 1 root, every thing has 1 head + for analysis in analyses: + if analysis.count(-1) > 1: + # there are several root elements! + continue + + graph = DependencyGraph() + graph.root = graph.nodes[analysis.index(-1) + 1] + + for address, (token, head_index) in enumerate( + zip(tokens, analysis), start=1 + ): + head_address = head_index + 1 + + node = graph.nodes[address] + node.update({'word': token, 'address': address}) + + if head_address == 0: + rel = 'ROOT' + else: + rel = '' + graph.nodes[head_index + 1]['deps'][rel].append(address) + + # TODO: check for cycles + yield graph + + +################################################################# +# Demos +################################################################# + + +def demo(): + # hall_demo() + nonprojective_conll_parse_demo() + rule_based_demo() + + +def hall_demo(): + npp = ProbabilisticNonprojectiveParser() + npp.train([], DemoScorer()) + for parse_graph in npp.parse(['v1', 'v2', 'v3'], [None, None, None]): + print(parse_graph) + + +def nonprojective_conll_parse_demo(): + from nltk.parse.dependencygraph import conll_data2 + + graphs = [DependencyGraph(entry) for entry in conll_data2.split('\n\n') if entry] + npp = ProbabilisticNonprojectiveParser() + npp.train(graphs, NaiveBayesDependencyScorer()) + for parse_graph in npp.parse( + ['Cathy', 'zag', 'hen', 'zwaaien', '.'], ['N', 'V', 'Pron', 'Adj', 'N', 'Punc'] + ): + print(parse_graph) + + +def rule_based_demo(): + from nltk.grammar import DependencyGrammar + + grammar = DependencyGrammar.fromstring( + """ + 'taught' -> 'play' | 'man' + 'man' -> 'the' | 'in' + 'in' -> 'corner' + 'corner' -> 'the' + 'play' -> 'golf' | 'dachshund' | 'to' + 'dachshund' -> 'his' + """ + ) + print(grammar) + ndp = NonprojectiveDependencyParser(grammar) + graphs = ndp.parse( + [ + 'the', + 'man', + 'in', + 'the', + 'corner', + 'taught', + 'his', + 'dachshund', + 'to', + 'play', + 'golf', + ] + ) + print('Graphs:') + for graph in graphs: + print(graph) + + +if __name__ == '__main__': + demo() diff --git a/venv.bak/lib/python3.7/site-packages/nltk/parse/pchart.py b/venv.bak/lib/python3.7/site-packages/nltk/parse/pchart.py new file mode 100644 index 0000000..924d9a6 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/parse/pchart.py @@ -0,0 +1,581 @@ +# Natural Language Toolkit: Probabilistic Chart Parsers +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Edward Loper +# Steven Bird +# URL: +# For license information, see LICENSE.TXT + +""" +Classes and interfaces for associating probabilities with tree +structures that represent the internal organization of a text. The +probabilistic parser module defines ``BottomUpProbabilisticChartParser``. + +``BottomUpProbabilisticChartParser`` is an abstract class that implements +a bottom-up chart parser for ``PCFG`` grammars. It maintains a queue of edges, +and adds them to the chart one at a time. The ordering of this queue +is based on the probabilities associated with the edges, allowing the +parser to expand more likely edges before less likely ones. Each +subclass implements a different queue ordering, producing different +search strategies. Currently the following subclasses are defined: + + - ``InsideChartParser`` searches edges in decreasing order of + their trees' inside probabilities. + - ``RandomChartParser`` searches edges in random order. + - ``LongestChartParser`` searches edges in decreasing order of their + location's length. + +The ``BottomUpProbabilisticChartParser`` constructor has an optional +argument beam_size. If non-zero, this controls the size of the beam +(aka the edge queue). This option is most useful with InsideChartParser. +""" +from __future__ import print_function, unicode_literals + +##////////////////////////////////////////////////////// +## Bottom-Up PCFG Chart Parser +##////////////////////////////////////////////////////// + +# [XX] This might not be implemented quite right -- it would be better +# to associate probabilities with child pointer lists. + +import random +from functools import reduce +from nltk.tree import Tree, ProbabilisticTree +from nltk.grammar import Nonterminal, PCFG + +from nltk.parse.api import ParserI +from nltk.parse.chart import Chart, LeafEdge, TreeEdge, AbstractChartRule +from nltk.compat import python_2_unicode_compatible + +# Probabilistic edges +class ProbabilisticLeafEdge(LeafEdge): + def prob(self): + return 1.0 + + +class ProbabilisticTreeEdge(TreeEdge): + def __init__(self, prob, *args, **kwargs): + TreeEdge.__init__(self, *args, **kwargs) + self._prob = prob + # two edges with different probabilities are not equal. + self._comparison_key = (self._comparison_key, prob) + + def prob(self): + return self._prob + + @staticmethod + def from_production(production, index, p): + return ProbabilisticTreeEdge( + p, (index, index), production.lhs(), production.rhs(), 0 + ) + + +# Rules using probabilistic edges +class ProbabilisticBottomUpInitRule(AbstractChartRule): + NUM_EDGES = 0 + + def apply(self, chart, grammar): + for index in range(chart.num_leaves()): + new_edge = ProbabilisticLeafEdge(chart.leaf(index), index) + if chart.insert(new_edge, ()): + yield new_edge + + +class ProbabilisticBottomUpPredictRule(AbstractChartRule): + NUM_EDGES = 1 + + def apply(self, chart, grammar, edge): + if edge.is_incomplete(): + return + for prod in grammar.productions(): + if edge.lhs() == prod.rhs()[0]: + new_edge = ProbabilisticTreeEdge.from_production( + prod, edge.start(), prod.prob() + ) + if chart.insert(new_edge, ()): + yield new_edge + + +class ProbabilisticFundamentalRule(AbstractChartRule): + NUM_EDGES = 2 + + def apply(self, chart, grammar, left_edge, right_edge): + # Make sure the rule is applicable. + if not ( + left_edge.end() == right_edge.start() + and left_edge.nextsym() == right_edge.lhs() + and left_edge.is_incomplete() + and right_edge.is_complete() + ): + return + + # Construct the new edge. + p = left_edge.prob() * right_edge.prob() + new_edge = ProbabilisticTreeEdge( + p, + span=(left_edge.start(), right_edge.end()), + lhs=left_edge.lhs(), + rhs=left_edge.rhs(), + dot=left_edge.dot() + 1, + ) + + # Add it to the chart, with appropriate child pointers. + changed_chart = False + for cpl1 in chart.child_pointer_lists(left_edge): + if chart.insert(new_edge, cpl1 + (right_edge,)): + changed_chart = True + + # If we changed the chart, then generate the edge. + if changed_chart: + yield new_edge + + +@python_2_unicode_compatible +class SingleEdgeProbabilisticFundamentalRule(AbstractChartRule): + NUM_EDGES = 1 + + _fundamental_rule = ProbabilisticFundamentalRule() + + def apply(self, chart, grammar, edge1): + fr = self._fundamental_rule + if edge1.is_incomplete(): + # edge1 = left_edge; edge2 = right_edge + for edge2 in chart.select( + start=edge1.end(), is_complete=True, lhs=edge1.nextsym() + ): + for new_edge in fr.apply(chart, grammar, edge1, edge2): + yield new_edge + else: + # edge2 = left_edge; edge1 = right_edge + for edge2 in chart.select( + end=edge1.start(), is_complete=False, nextsym=edge1.lhs() + ): + for new_edge in fr.apply(chart, grammar, edge2, edge1): + yield new_edge + + def __str__(self): + return 'Fundamental Rule' + + +class BottomUpProbabilisticChartParser(ParserI): + """ + An abstract bottom-up parser for ``PCFG`` grammars that uses a ``Chart`` to + record partial results. ``BottomUpProbabilisticChartParser`` maintains + a queue of edges that can be added to the chart. This queue is + initialized with edges for each token in the text that is being + parsed. ``BottomUpProbabilisticChartParser`` inserts these edges into + the chart one at a time, starting with the most likely edges, and + proceeding to less likely edges. For each edge that is added to + the chart, it may become possible to insert additional edges into + the chart; these are added to the queue. This process continues + until enough complete parses have been generated, or until the + queue is empty. + + The sorting order for the queue is not specified by + ``BottomUpProbabilisticChartParser``. Different sorting orders will + result in different search strategies. The sorting order for the + queue is defined by the method ``sort_queue``; subclasses are required + to provide a definition for this method. + + :type _grammar: PCFG + :ivar _grammar: The grammar used to parse sentences. + :type _trace: int + :ivar _trace: The level of tracing output that should be generated + when parsing a text. + """ + + def __init__(self, grammar, beam_size=0, trace=0): + """ + Create a new ``BottomUpProbabilisticChartParser``, that uses + ``grammar`` to parse texts. + + :type grammar: PCFG + :param grammar: The grammar used to parse texts. + :type beam_size: int + :param beam_size: The maximum length for the parser's edge queue. + :type trace: int + :param trace: The level of tracing that should be used when + parsing a text. ``0`` will generate no tracing output; + and higher numbers will produce more verbose tracing + output. + """ + if not isinstance(grammar, PCFG): + raise ValueError("The grammar must be probabilistic PCFG") + self._grammar = grammar + self.beam_size = beam_size + self._trace = trace + + def grammar(self): + return self._grammar + + def trace(self, trace=2): + """ + Set the level of tracing output that should be generated when + parsing a text. + + :type trace: int + :param trace: The trace level. A trace level of ``0`` will + generate no tracing output; and higher trace levels will + produce more verbose tracing output. + :rtype: None + """ + self._trace = trace + + # TODO: change this to conform more with the standard ChartParser + def parse(self, tokens): + self._grammar.check_coverage(tokens) + chart = Chart(list(tokens)) + grammar = self._grammar + + # Chart parser rules. + bu_init = ProbabilisticBottomUpInitRule() + bu = ProbabilisticBottomUpPredictRule() + fr = SingleEdgeProbabilisticFundamentalRule() + + # Our queue + queue = [] + + # Initialize the chart. + for edge in bu_init.apply(chart, grammar): + if self._trace > 1: + print( + ' %-50s [%s]' + % (chart.pretty_format_edge(edge, width=2), edge.prob()) + ) + queue.append(edge) + + while len(queue) > 0: + # Re-sort the queue. + self.sort_queue(queue, chart) + + # Prune the queue to the correct size if a beam was defined + if self.beam_size: + self._prune(queue, chart) + + # Get the best edge. + edge = queue.pop() + if self._trace > 0: + print( + ' %-50s [%s]' + % (chart.pretty_format_edge(edge, width=2), edge.prob()) + ) + + # Apply BU & FR to it. + queue.extend(bu.apply(chart, grammar, edge)) + queue.extend(fr.apply(chart, grammar, edge)) + + # Get a list of complete parses. + parses = list(chart.parses(grammar.start(), ProbabilisticTree)) + + # Assign probabilities to the trees. + prod_probs = {} + for prod in grammar.productions(): + prod_probs[prod.lhs(), prod.rhs()] = prod.prob() + for parse in parses: + self._setprob(parse, prod_probs) + + # Sort by probability + parses.sort(reverse=True, key=lambda tree: tree.prob()) + + return iter(parses) + + def _setprob(self, tree, prod_probs): + if tree.prob() is not None: + return + + # Get the prob of the CFG production. + lhs = Nonterminal(tree.label()) + rhs = [] + for child in tree: + if isinstance(child, Tree): + rhs.append(Nonterminal(child.label())) + else: + rhs.append(child) + prob = prod_probs[lhs, tuple(rhs)] + + # Get the probs of children. + for child in tree: + if isinstance(child, Tree): + self._setprob(child, prod_probs) + prob *= child.prob() + + tree.set_prob(prob) + + def sort_queue(self, queue, chart): + """ + Sort the given queue of ``Edge`` objects, placing the edge that should + be tried first at the beginning of the queue. This method + will be called after each ``Edge`` is added to the queue. + + :param queue: The queue of ``Edge`` objects to sort. Each edge in + this queue is an edge that could be added to the chart by + the fundamental rule; but that has not yet been added. + :type queue: list(Edge) + :param chart: The chart being used to parse the text. This + chart can be used to provide extra information for sorting + the queue. + :type chart: Chart + :rtype: None + """ + raise NotImplementedError() + + def _prune(self, queue, chart): + """ Discard items in the queue if the queue is longer than the beam.""" + if len(queue) > self.beam_size: + split = len(queue) - self.beam_size + if self._trace > 2: + for edge in queue[:split]: + print(' %-50s [DISCARDED]' % chart.pretty_format_edge(edge, 2)) + del queue[:split] + + +class InsideChartParser(BottomUpProbabilisticChartParser): + """ + A bottom-up parser for ``PCFG`` grammars that tries edges in descending + order of the inside probabilities of their trees. The "inside + probability" of a tree is simply the + probability of the entire tree, ignoring its context. In + particular, the inside probability of a tree generated by + production *p* with children *c[1], c[2], ..., c[n]* is + *P(p)P(c[1])P(c[2])...P(c[n])*; and the inside + probability of a token is 1 if it is present in the text, and 0 if + it is absent. + + This sorting order results in a type of lowest-cost-first search + strategy. + """ + + # Inherit constructor. + def sort_queue(self, queue, chart): + """ + Sort the given queue of edges, in descending order of the + inside probabilities of the edges' trees. + + :param queue: The queue of ``Edge`` objects to sort. Each edge in + this queue is an edge that could be added to the chart by + the fundamental rule; but that has not yet been added. + :type queue: list(Edge) + :param chart: The chart being used to parse the text. This + chart can be used to provide extra information for sorting + the queue. + :type chart: Chart + :rtype: None + """ + queue.sort(key=lambda edge: edge.prob()) + + +# Eventually, this will become some sort of inside-outside parser: +# class InsideOutsideParser(BottomUpProbabilisticChartParser): +# def __init__(self, grammar, trace=0): +# # Inherit docs. +# BottomUpProbabilisticChartParser.__init__(self, grammar, trace) +# +# # Find the best path from S to each nonterminal +# bestp = {} +# for production in grammar.productions(): bestp[production.lhs()]=0 +# bestp[grammar.start()] = 1.0 +# +# for i in range(len(grammar.productions())): +# for production in grammar.productions(): +# lhs = production.lhs() +# for elt in production.rhs(): +# bestp[elt] = max(bestp[lhs]*production.prob(), +# bestp.get(elt,0)) +# +# self._bestp = bestp +# for (k,v) in self._bestp.items(): print k,v +# +# def _sortkey(self, edge): +# return edge.structure()[PROB] * self._bestp[edge.lhs()] +# +# def sort_queue(self, queue, chart): +# queue.sort(key=self._sortkey) + + +class RandomChartParser(BottomUpProbabilisticChartParser): + """ + A bottom-up parser for ``PCFG`` grammars that tries edges in random order. + This sorting order results in a random search strategy. + """ + + # Inherit constructor + def sort_queue(self, queue, chart): + i = random.randint(0, len(queue) - 1) + (queue[-1], queue[i]) = (queue[i], queue[-1]) + + +class UnsortedChartParser(BottomUpProbabilisticChartParser): + """ + A bottom-up parser for ``PCFG`` grammars that tries edges in whatever order. + """ + + # Inherit constructor + def sort_queue(self, queue, chart): + return + + +class LongestChartParser(BottomUpProbabilisticChartParser): + """ + A bottom-up parser for ``PCFG`` grammars that tries longer edges before + shorter ones. This sorting order results in a type of best-first + search strategy. + """ + + # Inherit constructor + def sort_queue(self, queue, chart): + queue.sort(key=lambda edge: edge.length()) + + +##////////////////////////////////////////////////////// +## Test Code +##////////////////////////////////////////////////////// + + +def demo(choice=None, draw_parses=None, print_parses=None): + """ + A demonstration of the probabilistic parsers. The user is + prompted to select which demo to run, and how many parses should + be found; and then each parser is run on the same demo, and a + summary of the results are displayed. + """ + import sys, time + from nltk import tokenize + from nltk.parse import pchart + + # Define two demos. Each demo has a sentence and a grammar. + toy_pcfg1 = PCFG.fromstring( + """ + S -> NP VP [1.0] + NP -> Det N [0.5] | NP PP [0.25] | 'John' [0.1] | 'I' [0.15] + Det -> 'the' [0.8] | 'my' [0.2] + N -> 'man' [0.5] | 'telescope' [0.5] + VP -> VP PP [0.1] | V NP [0.7] | V [0.2] + V -> 'ate' [0.35] | 'saw' [0.65] + PP -> P NP [1.0] + P -> 'with' [0.61] | 'under' [0.39] + """ + ) + + toy_pcfg2 = PCFG.fromstring( + """ + S -> NP VP [1.0] + VP -> V NP [.59] + VP -> V [.40] + VP -> VP PP [.01] + NP -> Det N [.41] + NP -> Name [.28] + NP -> NP PP [.31] + PP -> P NP [1.0] + V -> 'saw' [.21] + V -> 'ate' [.51] + V -> 'ran' [.28] + N -> 'boy' [.11] + N -> 'cookie' [.12] + N -> 'table' [.13] + N -> 'telescope' [.14] + N -> 'hill' [.5] + Name -> 'Jack' [.52] + Name -> 'Bob' [.48] + P -> 'with' [.61] + P -> 'under' [.39] + Det -> 'the' [.41] + Det -> 'a' [.31] + Det -> 'my' [.28] + """ + ) + + demos = [ + ('I saw John with my telescope', toy_pcfg1), + ('the boy saw Jack with Bob under the table with a telescope', toy_pcfg2), + ] + + if choice is None: + # Ask the user which demo they want to use. + print() + for i in range(len(demos)): + print('%3s: %s' % (i + 1, demos[i][0])) + print(' %r' % demos[i][1]) + print() + print('Which demo (%d-%d)? ' % (1, len(demos)), end=' ') + choice = int(sys.stdin.readline().strip()) - 1 + try: + sent, grammar = demos[choice] + except: + print('Bad sentence number') + return + + # Tokenize the sentence. + tokens = sent.split() + + # Define a list of parsers. We'll use all parsers. + parsers = [ + pchart.InsideChartParser(grammar), + pchart.RandomChartParser(grammar), + pchart.UnsortedChartParser(grammar), + pchart.LongestChartParser(grammar), + pchart.InsideChartParser(grammar, beam_size=len(tokens) + 1), # was BeamParser + ] + + # Run the parsers on the tokenized sentence. + times = [] + average_p = [] + num_parses = [] + all_parses = {} + for parser in parsers: + print('\ns: %s\nparser: %s\ngrammar: %s' % (sent, parser, grammar)) + parser.trace(3) + t = time.time() + parses = list(parser.parse(tokens)) + times.append(time.time() - t) + p = reduce(lambda a, b: a + b.prob(), parses, 0) / len(parses) if parses else 0 + average_p.append(p) + num_parses.append(len(parses)) + for p in parses: + all_parses[p.freeze()] = 1 + + # Print some summary statistics + print() + print(' Parser Beam | Time (secs) # Parses Average P(parse)') + print('------------------------+------------------------------------------') + for i in range(len(parsers)): + print( + '%18s %4d |%11.4f%11d%19.14f' + % ( + parsers[i].__class__.__name__, + parsers[i].beam_size, + times[i], + num_parses[i], + average_p[i], + ) + ) + parses = all_parses.keys() + if parses: + p = reduce(lambda a, b: a + b.prob(), parses, 0) / len(parses) + else: + p = 0 + print('------------------------+------------------------------------------') + print('%18s |%11s%11d%19.14f' % ('(All Parses)', 'n/a', len(parses), p)) + + if draw_parses is None: + # Ask the user if we should draw the parses. + print() + print('Draw parses (y/n)? ', end=' ') + draw_parses = sys.stdin.readline().strip().lower().startswith('y') + if draw_parses: + from nltk.draw.tree import draw_trees + + print(' please wait...') + draw_trees(*parses) + + if print_parses is None: + # Ask the user if we should print the parses. + print() + print('Print parses (y/n)? ', end=' ') + print_parses = sys.stdin.readline().strip().lower().startswith('y') + if print_parses: + for parse in parses: + print(parse) + + +if __name__ == '__main__': + demo() diff --git a/venv.bak/lib/python3.7/site-packages/nltk/parse/projectivedependencyparser.py b/venv.bak/lib/python3.7/site-packages/nltk/parse/projectivedependencyparser.py new file mode 100644 index 0000000..d29ee8c --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/parse/projectivedependencyparser.py @@ -0,0 +1,718 @@ +# Natural Language Toolkit: Dependency Grammars +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Jason Narad +# +# URL: +# For license information, see LICENSE.TXT +# +from __future__ import print_function, unicode_literals + +from collections import defaultdict +from itertools import chain +from functools import total_ordering + +from nltk.grammar import ( + DependencyProduction, + DependencyGrammar, + ProbabilisticDependencyGrammar, +) +from nltk.parse.dependencygraph import DependencyGraph +from nltk.internals import raise_unorderable_types +from nltk.compat import python_2_unicode_compatible + +################################################################# +# Dependency Span +################################################################# + + +@total_ordering +@python_2_unicode_compatible +class DependencySpan(object): + """ + A contiguous span over some part of the input string representing + dependency (head -> modifier) relationships amongst words. An atomic + span corresponds to only one word so it isn't a 'span' in the conventional + sense, as its _start_index = _end_index = _head_index for concatenation + purposes. All other spans are assumed to have arcs between all nodes + within the start and end indexes of the span, and one head index corresponding + to the head word for the entire span. This is the same as the root node if + the dependency structure were depicted as a graph. + """ + + def __init__(self, start_index, end_index, head_index, arcs, tags): + self._start_index = start_index + self._end_index = end_index + self._head_index = head_index + self._arcs = arcs + self._tags = tags + self._comparison_key = (start_index, end_index, head_index, tuple(arcs)) + self._hash = hash(self._comparison_key) + + def head_index(self): + """ + :return: An value indexing the head of the entire ``DependencySpan``. + :rtype: int + """ + return self._head_index + + def __repr__(self): + """ + :return: A concise string representatino of the ``DependencySpan``. + :rtype: str. + """ + return 'Span %d-%d; Head Index: %d' % ( + self._start_index, + self._end_index, + self._head_index, + ) + + def __str__(self): + """ + :return: A verbose string representation of the ``DependencySpan``. + :rtype: str + """ + str = 'Span %d-%d; Head Index: %d' % ( + self._start_index, + self._end_index, + self._head_index, + ) + for i in range(len(self._arcs)): + str += '\n%d <- %d, %s' % (i, self._arcs[i], self._tags[i]) + return str + + def __eq__(self, other): + return ( + type(self) == type(other) and self._comparison_key == other._comparison_key + ) + + def __ne__(self, other): + return not self == other + + def __lt__(self, other): + if not isinstance(other, DependencySpan): + raise_unorderable_types("<", self, other) + return self._comparison_key < other._comparison_key + + def __hash__(self): + """ + :return: The hash value of this ``DependencySpan``. + """ + return self._hash + + +################################################################# +# Chart Cell +################################################################# + + +@python_2_unicode_compatible +class ChartCell(object): + """ + A cell from the parse chart formed when performing the CYK algorithm. + Each cell keeps track of its x and y coordinates (though this will probably + be discarded), and a list of spans serving as the cell's entries. + """ + + def __init__(self, x, y): + """ + :param x: This cell's x coordinate. + :type x: int. + :param y: This cell's y coordinate. + :type y: int. + """ + self._x = x + self._y = y + self._entries = set([]) + + def add(self, span): + """ + Appends the given span to the list of spans + representing the chart cell's entries. + + :param span: The span to add. + :type span: DependencySpan + """ + self._entries.add(span) + + def __str__(self): + """ + :return: A verbose string representation of this ``ChartCell``. + :rtype: str. + """ + return 'CC[%d,%d]: %s' % (self._x, self._y, self._entries) + + def __repr__(self): + """ + :return: A concise string representation of this ``ChartCell``. + :rtype: str. + """ + return '%s' % self + + +################################################################# +# Parsing with Dependency Grammars +################################################################# + + +class ProjectiveDependencyParser(object): + """ + A projective, rule-based, dependency parser. A ProjectiveDependencyParser + is created with a DependencyGrammar, a set of productions specifying + word-to-word dependency relations. The parse() method will then + return the set of all parses, in tree representation, for a given input + sequence of tokens. Each parse must meet the requirements of the both + the grammar and the projectivity constraint which specifies that the + branches of the dependency tree are not allowed to cross. Alternatively, + this can be understood as stating that each parent node and its children + in the parse tree form a continuous substring of the input sequence. + """ + + def __init__(self, dependency_grammar): + """ + Create a new ProjectiveDependencyParser, from a word-to-word + dependency grammar ``DependencyGrammar``. + + :param dependency_grammar: A word-to-word relation dependencygrammar. + :type dependency_grammar: DependencyGrammar + """ + self._grammar = dependency_grammar + + def parse(self, tokens): + """ + Performs a projective dependency parse on the list of tokens using + a chart-based, span-concatenation algorithm similar to Eisner (1996). + + :param tokens: The list of input tokens. + :type tokens: list(str) + :return: An iterator over parse trees. + :rtype: iter(Tree) + """ + self._tokens = list(tokens) + chart = [] + for i in range(0, len(self._tokens) + 1): + chart.append([]) + for j in range(0, len(self._tokens) + 1): + chart[i].append(ChartCell(i, j)) + if i == j + 1: + chart[i][j].add(DependencySpan(i - 1, i, i - 1, [-1], ['null'])) + + for i in range(1, len(self._tokens) + 1): + for j in range(i - 2, -1, -1): + for k in range(i - 1, j, -1): + for span1 in chart[k][j]._entries: + for span2 in chart[i][k]._entries: + for newspan in self.concatenate(span1, span2): + chart[i][j].add(newspan) + + for parse in chart[len(self._tokens)][0]._entries: + conll_format = "" + # malt_format = "" + for i in range(len(tokens)): + # malt_format += '%s\t%s\t%d\t%s\n' % (tokens[i], 'null', parse._arcs[i] + 1, 'null') + # conll_format += '\t%d\t%s\t%s\t%s\t%s\t%s\t%d\t%s\t%s\t%s\n' % (i+1, tokens[i], tokens[i], 'null', 'null', 'null', parse._arcs[i] + 1, 'null', '-', '-') + # Modify to comply with the new Dependency Graph requirement (at least must have an root elements) + conll_format += '\t%d\t%s\t%s\t%s\t%s\t%s\t%d\t%s\t%s\t%s\n' % ( + i + 1, + tokens[i], + tokens[i], + 'null', + 'null', + 'null', + parse._arcs[i] + 1, + 'ROOT', + '-', + '-', + ) + dg = DependencyGraph(conll_format) + # if self.meets_arity(dg): + yield dg.tree() + + def concatenate(self, span1, span2): + """ + Concatenates the two spans in whichever way possible. This + includes rightward concatenation (from the leftmost word of the + leftmost span to the rightmost word of the rightmost span) and + leftward concatenation (vice-versa) between adjacent spans. Unlike + Eisner's presentation of span concatenation, these spans do not + share or pivot on a particular word/word-index. + + :return: A list of new spans formed through concatenation. + :rtype: list(DependencySpan) + """ + spans = [] + if span1._start_index == span2._start_index: + print('Error: Mismatched spans - replace this with thrown error') + if span1._start_index > span2._start_index: + temp_span = span1 + span1 = span2 + span2 = temp_span + # adjacent rightward covered concatenation + new_arcs = span1._arcs + span2._arcs + new_tags = span1._tags + span2._tags + if self._grammar.contains( + self._tokens[span1._head_index], self._tokens[span2._head_index] + ): + # print 'Performing rightward cover %d to %d' % (span1._head_index, span2._head_index) + new_arcs[span2._head_index - span1._start_index] = span1._head_index + spans.append( + DependencySpan( + span1._start_index, + span2._end_index, + span1._head_index, + new_arcs, + new_tags, + ) + ) + # adjacent leftward covered concatenation + new_arcs = span1._arcs + span2._arcs + if self._grammar.contains( + self._tokens[span2._head_index], self._tokens[span1._head_index] + ): + # print 'performing leftward cover %d to %d' % (span2._head_index, span1._head_index) + new_arcs[span1._head_index - span1._start_index] = span2._head_index + spans.append( + DependencySpan( + span1._start_index, + span2._end_index, + span2._head_index, + new_arcs, + new_tags, + ) + ) + return spans + + +################################################################# +# Parsing with Probabilistic Dependency Grammars +################################################################# + + +class ProbabilisticProjectiveDependencyParser(object): + """A probabilistic, projective dependency parser. + + This parser returns the most probable projective parse derived from the + probabilistic dependency grammar derived from the train() method. The + probabilistic model is an implementation of Eisner's (1996) Model C, which + conditions on head-word, head-tag, child-word, and child-tag. The decoding + uses a bottom-up chart-based span concatenation algorithm that's identical + to the one utilized by the rule-based projective parser. + + Usage example + ------------- + >>> from nltk.parse.dependencygraph import conll_data2 + + >>> graphs = [ + ... DependencyGraph(entry) for entry in conll_data2.split('\\n\\n') if entry + ... ] + + >>> ppdp = ProbabilisticProjectiveDependencyParser() + >>> ppdp.train(graphs) + + >>> sent = ['Cathy', 'zag', 'hen', 'wild', 'zwaaien', '.'] + >>> list(ppdp.parse(sent)) + [Tree('zag', ['Cathy', 'hen', Tree('zwaaien', ['wild', '.'])])] + + """ + + def __init__(self): + """ + Create a new probabilistic dependency parser. No additional + operations are necessary. + """ + + def parse(self, tokens): + """ + Parses the list of tokens subject to the projectivity constraint + and the productions in the parser's grammar. This uses a method + similar to the span-concatenation algorithm defined in Eisner (1996). + It returns the most probable parse derived from the parser's + probabilistic dependency grammar. + """ + self._tokens = list(tokens) + chart = [] + for i in range(0, len(self._tokens) + 1): + chart.append([]) + for j in range(0, len(self._tokens) + 1): + chart[i].append(ChartCell(i, j)) + if i == j + 1: + if tokens[i - 1] in self._grammar._tags: + for tag in self._grammar._tags[tokens[i - 1]]: + chart[i][j].add( + DependencySpan(i - 1, i, i - 1, [-1], [tag]) + ) + else: + print( + 'No tag found for input token \'%s\', parse is impossible.' + % tokens[i - 1] + ) + return [] + for i in range(1, len(self._tokens) + 1): + for j in range(i - 2, -1, -1): + for k in range(i - 1, j, -1): + for span1 in chart[k][j]._entries: + for span2 in chart[i][k]._entries: + for newspan in self.concatenate(span1, span2): + chart[i][j].add(newspan) + trees = [] + max_parse = None + max_score = 0 + for parse in chart[len(self._tokens)][0]._entries: + conll_format = "" + malt_format = "" + for i in range(len(tokens)): + malt_format += '%s\t%s\t%d\t%s\n' % ( + tokens[i], + 'null', + parse._arcs[i] + 1, + 'null', + ) + # conll_format += '\t%d\t%s\t%s\t%s\t%s\t%s\t%d\t%s\t%s\t%s\n' % (i+1, tokens[i], tokens[i], parse._tags[i], parse._tags[i], 'null', parse._arcs[i] + 1, 'null', '-', '-') + # Modify to comply with recent change in dependency graph such that there must be a ROOT element. + conll_format += '\t%d\t%s\t%s\t%s\t%s\t%s\t%d\t%s\t%s\t%s\n' % ( + i + 1, + tokens[i], + tokens[i], + parse._tags[i], + parse._tags[i], + 'null', + parse._arcs[i] + 1, + 'ROOT', + '-', + '-', + ) + dg = DependencyGraph(conll_format) + score = self.compute_prob(dg) + trees.append((score, dg.tree())) + trees.sort() + return (tree for (score, tree) in trees) + + def concatenate(self, span1, span2): + """ + Concatenates the two spans in whichever way possible. This + includes rightward concatenation (from the leftmost word of the + leftmost span to the rightmost word of the rightmost span) and + leftward concatenation (vice-versa) between adjacent spans. Unlike + Eisner's presentation of span concatenation, these spans do not + share or pivot on a particular word/word-index. + + :return: A list of new spans formed through concatenation. + :rtype: list(DependencySpan) + """ + spans = [] + if span1._start_index == span2._start_index: + print('Error: Mismatched spans - replace this with thrown error') + if span1._start_index > span2._start_index: + temp_span = span1 + span1 = span2 + span2 = temp_span + # adjacent rightward covered concatenation + new_arcs = span1._arcs + span2._arcs + new_tags = span1._tags + span2._tags + if self._grammar.contains( + self._tokens[span1._head_index], self._tokens[span2._head_index] + ): + new_arcs[span2._head_index - span1._start_index] = span1._head_index + spans.append( + DependencySpan( + span1._start_index, + span2._end_index, + span1._head_index, + new_arcs, + new_tags, + ) + ) + # adjacent leftward covered concatenation + new_arcs = span1._arcs + span2._arcs + new_tags = span1._tags + span2._tags + if self._grammar.contains( + self._tokens[span2._head_index], self._tokens[span1._head_index] + ): + new_arcs[span1._head_index - span1._start_index] = span2._head_index + spans.append( + DependencySpan( + span1._start_index, + span2._end_index, + span2._head_index, + new_arcs, + new_tags, + ) + ) + return spans + + def train(self, graphs): + """ + Trains a ProbabilisticDependencyGrammar based on the list of input + DependencyGraphs. This model is an implementation of Eisner's (1996) + Model C, which derives its statistics from head-word, head-tag, + child-word, and child-tag relationships. + + :param graphs: A list of dependency graphs to train from. + :type: list(DependencyGraph) + """ + productions = [] + events = defaultdict(int) + tags = {} + for dg in graphs: + for node_index in range(1, len(dg.nodes)): + # children = dg.nodes[node_index]['deps'] + children = list(chain(*dg.nodes[node_index]['deps'].values())) + + nr_left_children = dg.left_children(node_index) + nr_right_children = dg.right_children(node_index) + nr_children = nr_left_children + nr_right_children + for child_index in range( + 0 - (nr_left_children + 1), nr_right_children + 2 + ): + head_word = dg.nodes[node_index]['word'] + head_tag = dg.nodes[node_index]['tag'] + if head_word in tags: + tags[head_word].add(head_tag) + else: + tags[head_word] = set([head_tag]) + child = 'STOP' + child_tag = 'STOP' + prev_word = 'START' + prev_tag = 'START' + if child_index < 0: + array_index = child_index + nr_left_children + if array_index >= 0: + child = dg.nodes[children[array_index]]['word'] + child_tag = dg.nodes[children[array_index]]['tag'] + if child_index != -1: + prev_word = dg.nodes[children[array_index + 1]]['word'] + prev_tag = dg.nodes[children[array_index + 1]]['tag'] + if child != 'STOP': + productions.append(DependencyProduction(head_word, [child])) + head_event = '(head (%s %s) (mods (%s, %s, %s) left))' % ( + child, + child_tag, + prev_tag, + head_word, + head_tag, + ) + mod_event = '(mods (%s, %s, %s) left))' % ( + prev_tag, + head_word, + head_tag, + ) + events[head_event] += 1 + events[mod_event] += 1 + elif child_index > 0: + array_index = child_index + nr_left_children - 1 + if array_index < nr_children: + child = dg.nodes[children[array_index]]['word'] + child_tag = dg.nodes[children[array_index]]['tag'] + if child_index != 1: + prev_word = dg.nodes[children[array_index - 1]]['word'] + prev_tag = dg.nodes[children[array_index - 1]]['tag'] + if child != 'STOP': + productions.append(DependencyProduction(head_word, [child])) + head_event = '(head (%s %s) (mods (%s, %s, %s) right))' % ( + child, + child_tag, + prev_tag, + head_word, + head_tag, + ) + mod_event = '(mods (%s, %s, %s) right))' % ( + prev_tag, + head_word, + head_tag, + ) + events[head_event] += 1 + events[mod_event] += 1 + self._grammar = ProbabilisticDependencyGrammar(productions, events, tags) + + def compute_prob(self, dg): + """ + Computes the probability of a dependency graph based + on the parser's probability model (defined by the parser's + statistical dependency grammar). + + :param dg: A dependency graph to score. + :type dg: DependencyGraph + :return: The probability of the dependency graph. + :rtype: int + """ + prob = 1.0 + for node_index in range(1, len(dg.nodes)): + # children = dg.nodes[node_index]['deps'] + children = list(chain(*dg.nodes[node_index]['deps'].values())) + + nr_left_children = dg.left_children(node_index) + nr_right_children = dg.right_children(node_index) + nr_children = nr_left_children + nr_right_children + for child_index in range(0 - (nr_left_children + 1), nr_right_children + 2): + head_word = dg.nodes[node_index]['word'] + head_tag = dg.nodes[node_index]['tag'] + child = 'STOP' + child_tag = 'STOP' + prev_word = 'START' + prev_tag = 'START' + if child_index < 0: + array_index = child_index + nr_left_children + if array_index >= 0: + child = dg.nodes[children[array_index]]['word'] + child_tag = dg.nodes[children[array_index]]['tag'] + if child_index != -1: + prev_word = dg.nodes[children[array_index + 1]]['word'] + prev_tag = dg.nodes[children[array_index + 1]]['tag'] + head_event = '(head (%s %s) (mods (%s, %s, %s) left))' % ( + child, + child_tag, + prev_tag, + head_word, + head_tag, + ) + mod_event = '(mods (%s, %s, %s) left))' % ( + prev_tag, + head_word, + head_tag, + ) + h_count = self._grammar._events[head_event] + m_count = self._grammar._events[mod_event] + + # If the grammar is not covered + if m_count != 0: + prob *= h_count / m_count + else: + prob = 0.00000001 # Very small number + + elif child_index > 0: + array_index = child_index + nr_left_children - 1 + if array_index < nr_children: + child = dg.nodes[children[array_index]]['word'] + child_tag = dg.nodes[children[array_index]]['tag'] + if child_index != 1: + prev_word = dg.nodes[children[array_index - 1]]['word'] + prev_tag = dg.nodes[children[array_index - 1]]['tag'] + head_event = '(head (%s %s) (mods (%s, %s, %s) right))' % ( + child, + child_tag, + prev_tag, + head_word, + head_tag, + ) + mod_event = '(mods (%s, %s, %s) right))' % ( + prev_tag, + head_word, + head_tag, + ) + h_count = self._grammar._events[head_event] + m_count = self._grammar._events[mod_event] + + if m_count != 0: + prob *= h_count / m_count + else: + prob = 0.00000001 # Very small number + + return prob + + +################################################################# +# Demos +################################################################# + + +def demo(): + projective_rule_parse_demo() + # arity_parse_demo() + projective_prob_parse_demo() + + +def projective_rule_parse_demo(): + """ + A demonstration showing the creation and use of a + ``DependencyGrammar`` to perform a projective dependency + parse. + """ + grammar = DependencyGrammar.fromstring( + """ + 'scratch' -> 'cats' | 'walls' + 'walls' -> 'the' + 'cats' -> 'the' + """ + ) + print(grammar) + pdp = ProjectiveDependencyParser(grammar) + trees = pdp.parse(['the', 'cats', 'scratch', 'the', 'walls']) + for tree in trees: + print(tree) + + +def arity_parse_demo(): + """ + A demonstration showing the creation of a ``DependencyGrammar`` + in which a specific number of modifiers is listed for a given + head. This can further constrain the number of possible parses + created by a ``ProjectiveDependencyParser``. + """ + print() + print('A grammar with no arity constraints. Each DependencyProduction') + print('specifies a relationship between one head word and only one') + print('modifier word.') + grammar = DependencyGrammar.fromstring( + """ + 'fell' -> 'price' | 'stock' + 'price' -> 'of' | 'the' + 'of' -> 'stock' + 'stock' -> 'the' + """ + ) + print(grammar) + + print() + print('For the sentence \'The price of the stock fell\', this grammar') + print('will produce the following three parses:') + pdp = ProjectiveDependencyParser(grammar) + trees = pdp.parse(['the', 'price', 'of', 'the', 'stock', 'fell']) + for tree in trees: + print(tree) + + print() + print('By contrast, the following grammar contains a ') + print('DependencyProduction that specifies a relationship') + print('between a single head word, \'price\', and two modifier') + print('words, \'of\' and \'the\'.') + grammar = DependencyGrammar.fromstring( + """ + 'fell' -> 'price' | 'stock' + 'price' -> 'of' 'the' + 'of' -> 'stock' + 'stock' -> 'the' + """ + ) + print(grammar) + + print() + print( + 'This constrains the number of possible parses to just one:' + ) # unimplemented, soon to replace + pdp = ProjectiveDependencyParser(grammar) + trees = pdp.parse(['the', 'price', 'of', 'the', 'stock', 'fell']) + for tree in trees: + print(tree) + + +def projective_prob_parse_demo(): + """ + A demo showing the training and use of a projective + dependency parser. + """ + from nltk.parse.dependencygraph import conll_data2 + + graphs = [DependencyGraph(entry) for entry in conll_data2.split('\n\n') if entry] + ppdp = ProbabilisticProjectiveDependencyParser() + print('Training Probabilistic Projective Dependency Parser...') + ppdp.train(graphs) + + sent = ['Cathy', 'zag', 'hen', 'wild', 'zwaaien', '.'] + print('Parsing \'', " ".join(sent), '\'...') + print('Parse:') + for tree in ppdp.parse(sent): + print(tree) + + +if __name__ == '__main__': + demo() diff --git a/venv.bak/lib/python3.7/site-packages/nltk/parse/recursivedescent.py b/venv.bak/lib/python3.7/site-packages/nltk/parse/recursivedescent.py new file mode 100644 index 0000000..a9ab322 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/parse/recursivedescent.py @@ -0,0 +1,690 @@ +# Natural Language Toolkit: Recursive Descent Parser +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Edward Loper +# Steven Bird +# URL: +# For license information, see LICENSE.TXT +from __future__ import print_function, unicode_literals + +from nltk.grammar import Nonterminal +from nltk.tree import Tree, ImmutableTree +from nltk.compat import unicode_repr + +from nltk.parse.api import ParserI + +##////////////////////////////////////////////////////// +## Recursive Descent Parser +##////////////////////////////////////////////////////// +class RecursiveDescentParser(ParserI): + """ + A simple top-down CFG parser that parses texts by recursively + expanding the fringe of a Tree, and matching it against a + text. + + ``RecursiveDescentParser`` uses a list of tree locations called a + "frontier" to remember which subtrees have not yet been expanded + and which leaves have not yet been matched against the text. Each + tree location consists of a list of child indices specifying the + path from the root of the tree to a subtree or a leaf; see the + reference documentation for Tree for more information + about tree locations. + + When the parser begins parsing a text, it constructs a tree + containing only the start symbol, and a frontier containing the + location of the tree's root node. It then extends the tree to + cover the text, using the following recursive procedure: + + - If the frontier is empty, and the text is covered by the tree, + then return the tree as a possible parse. + - If the frontier is empty, and the text is not covered by the + tree, then return no parses. + - If the first element of the frontier is a subtree, then + use CFG productions to "expand" it. For each applicable + production, add the expanded subtree's children to the + frontier, and recursively find all parses that can be + generated by the new tree and frontier. + - If the first element of the frontier is a token, then "match" + it against the next token from the text. Remove the token + from the frontier, and recursively find all parses that can be + generated by the new tree and frontier. + + :see: ``nltk.grammar`` + """ + + def __init__(self, grammar, trace=0): + """ + Create a new ``RecursiveDescentParser``, that uses ``grammar`` + to parse texts. + + :type grammar: CFG + :param grammar: The grammar used to parse texts. + :type trace: int + :param trace: The level of tracing that should be used when + parsing a text. ``0`` will generate no tracing output; + and higher numbers will produce more verbose tracing + output. + """ + self._grammar = grammar + self._trace = trace + + def grammar(self): + return self._grammar + + def parse(self, tokens): + # Inherit docs from ParserI + + tokens = list(tokens) + self._grammar.check_coverage(tokens) + + # Start a recursive descent parse, with an initial tree + # containing just the start symbol. + start = self._grammar.start().symbol() + initial_tree = Tree(start, []) + frontier = [()] + if self._trace: + self._trace_start(initial_tree, frontier, tokens) + return self._parse(tokens, initial_tree, frontier) + + def _parse(self, remaining_text, tree, frontier): + """ + Recursively expand and match each elements of ``tree`` + specified by ``frontier``, to cover ``remaining_text``. Return + a list of all parses found. + + :return: An iterator of all parses that can be generated by + matching and expanding the elements of ``tree`` + specified by ``frontier``. + :rtype: iter(Tree) + :type tree: Tree + :param tree: A partial structure for the text that is + currently being parsed. The elements of ``tree`` + that are specified by ``frontier`` have not yet been + expanded or matched. + :type remaining_text: list(str) + :param remaining_text: The portion of the text that is not yet + covered by ``tree``. + :type frontier: list(tuple(int)) + :param frontier: A list of the locations within ``tree`` of + all subtrees that have not yet been expanded, and all + leaves that have not yet been matched. This list sorted + in left-to-right order of location within the tree. + """ + + # If the tree covers the text, and there's nothing left to + # expand, then we've found a complete parse; return it. + if len(remaining_text) == 0 and len(frontier) == 0: + if self._trace: + self._trace_succeed(tree, frontier) + yield tree + + # If there's still text, but nothing left to expand, we failed. + elif len(frontier) == 0: + if self._trace: + self._trace_backtrack(tree, frontier) + + # If the next element on the frontier is a tree, expand it. + elif isinstance(tree[frontier[0]], Tree): + for result in self._expand(remaining_text, tree, frontier): + yield result + + # If the next element on the frontier is a token, match it. + else: + for result in self._match(remaining_text, tree, frontier): + yield result + + def _match(self, rtext, tree, frontier): + """ + :rtype: iter(Tree) + :return: an iterator of all parses that can be generated by + matching the first element of ``frontier`` against the + first token in ``rtext``. In particular, if the first + element of ``frontier`` has the same type as the first + token in ``rtext``, then substitute the token into + ``tree``; and return all parses that can be generated by + matching and expanding the remaining elements of + ``frontier``. If the first element of ``frontier`` does not + have the same type as the first token in ``rtext``, then + return empty list. + + :type tree: Tree + :param tree: A partial structure for the text that is + currently being parsed. The elements of ``tree`` + that are specified by ``frontier`` have not yet been + expanded or matched. + :type rtext: list(str) + :param rtext: The portion of the text that is not yet + covered by ``tree``. + :type frontier: list of tuple of int + :param frontier: A list of the locations within ``tree`` of + all subtrees that have not yet been expanded, and all + leaves that have not yet been matched. + """ + + tree_leaf = tree[frontier[0]] + if len(rtext) > 0 and tree_leaf == rtext[0]: + # If it's a terminal that matches rtext[0], then substitute + # in the token, and continue parsing. + newtree = tree.copy(deep=True) + newtree[frontier[0]] = rtext[0] + if self._trace: + self._trace_match(newtree, frontier[1:], rtext[0]) + for result in self._parse(rtext[1:], newtree, frontier[1:]): + yield result + else: + # If it's a non-matching terminal, fail. + if self._trace: + self._trace_backtrack(tree, frontier, rtext[:1]) + + def _expand(self, remaining_text, tree, frontier, production=None): + """ + :rtype: iter(Tree) + :return: An iterator of all parses that can be generated by + expanding the first element of ``frontier`` with + ``production``. In particular, if the first element of + ``frontier`` is a subtree whose node type is equal to + ``production``'s left hand side, then add a child to that + subtree for each element of ``production``'s right hand + side; and return all parses that can be generated by + matching and expanding the remaining elements of + ``frontier``. If the first element of ``frontier`` is not a + subtree whose node type is equal to ``production``'s left + hand side, then return an empty list. If ``production`` is + not specified, then return a list of all parses that can + be generated by expanding the first element of ``frontier`` + with *any* CFG production. + + :type tree: Tree + :param tree: A partial structure for the text that is + currently being parsed. The elements of ``tree`` + that are specified by ``frontier`` have not yet been + expanded or matched. + :type remaining_text: list(str) + :param remaining_text: The portion of the text that is not yet + covered by ``tree``. + :type frontier: list(tuple(int)) + :param frontier: A list of the locations within ``tree`` of + all subtrees that have not yet been expanded, and all + leaves that have not yet been matched. + """ + + if production is None: + productions = self._grammar.productions() + else: + productions = [production] + + for production in productions: + lhs = production.lhs().symbol() + if lhs == tree[frontier[0]].label(): + subtree = self._production_to_tree(production) + if frontier[0] == (): + newtree = subtree + else: + newtree = tree.copy(deep=True) + newtree[frontier[0]] = subtree + new_frontier = [ + frontier[0] + (i,) for i in range(len(production.rhs())) + ] + if self._trace: + self._trace_expand(newtree, new_frontier, production) + for result in self._parse( + remaining_text, newtree, new_frontier + frontier[1:] + ): + yield result + + def _production_to_tree(self, production): + """ + :rtype: Tree + :return: The Tree that is licensed by ``production``. + In particular, given the production ``[lhs -> elt[1] ... elt[n]]`` + return a tree that has a node ``lhs.symbol``, and + ``n`` children. For each nonterminal element + ``elt[i]`` in the production, the tree token has a + childless subtree with node value ``elt[i].symbol``; and + for each terminal element ``elt[j]``, the tree token has + a leaf token with type ``elt[j]``. + + :param production: The CFG production that licenses the tree + token that should be returned. + :type production: Production + """ + children = [] + for elt in production.rhs(): + if isinstance(elt, Nonterminal): + children.append(Tree(elt.symbol(), [])) + else: + # This will be matched. + children.append(elt) + return Tree(production.lhs().symbol(), children) + + def trace(self, trace=2): + """ + Set the level of tracing output that should be generated when + parsing a text. + + :type trace: int + :param trace: The trace level. A trace level of ``0`` will + generate no tracing output; and higher trace levels will + produce more verbose tracing output. + :rtype: None + """ + self._trace = trace + + def _trace_fringe(self, tree, treeloc=None): + """ + Print trace output displaying the fringe of ``tree``. The + fringe of ``tree`` consists of all of its leaves and all of + its childless subtrees. + + :rtype: None + """ + + if treeloc == (): + print("*", end=' ') + if isinstance(tree, Tree): + if len(tree) == 0: + print(unicode_repr(Nonterminal(tree.label())), end=' ') + for i in range(len(tree)): + if treeloc is not None and i == treeloc[0]: + self._trace_fringe(tree[i], treeloc[1:]) + else: + self._trace_fringe(tree[i]) + else: + print(unicode_repr(tree), end=' ') + + def _trace_tree(self, tree, frontier, operation): + """ + Print trace output displaying the parser's current state. + + :param operation: A character identifying the operation that + generated the current state. + :rtype: None + """ + if self._trace == 2: + print(' %c [' % operation, end=' ') + else: + print(' [', end=' ') + if len(frontier) > 0: + self._trace_fringe(tree, frontier[0]) + else: + self._trace_fringe(tree) + print(']') + + def _trace_start(self, tree, frontier, text): + print('Parsing %r' % " ".join(text)) + if self._trace > 2: + print('Start:') + if self._trace > 1: + self._trace_tree(tree, frontier, ' ') + + def _trace_expand(self, tree, frontier, production): + if self._trace > 2: + print('Expand: %s' % production) + if self._trace > 1: + self._trace_tree(tree, frontier, 'E') + + def _trace_match(self, tree, frontier, tok): + if self._trace > 2: + print('Match: %r' % tok) + if self._trace > 1: + self._trace_tree(tree, frontier, 'M') + + def _trace_succeed(self, tree, frontier): + if self._trace > 2: + print('GOOD PARSE:') + if self._trace == 1: + print('Found a parse:\n%s' % tree) + if self._trace > 1: + self._trace_tree(tree, frontier, '+') + + def _trace_backtrack(self, tree, frontier, toks=None): + if self._trace > 2: + if toks: + print('Backtrack: %r match failed' % toks[0]) + else: + print('Backtrack') + + +##////////////////////////////////////////////////////// +## Stepping Recursive Descent Parser +##////////////////////////////////////////////////////// +class SteppingRecursiveDescentParser(RecursiveDescentParser): + """ + A ``RecursiveDescentParser`` that allows you to step through the + parsing process, performing a single operation at a time. + + The ``initialize`` method is used to start parsing a text. + ``expand`` expands the first element on the frontier using a single + CFG production, and ``match`` matches the first element on the + frontier against the next text token. ``backtrack`` undoes the most + recent expand or match operation. ``step`` performs a single + expand, match, or backtrack operation. ``parses`` returns the set + of parses that have been found by the parser. + + :ivar _history: A list of ``(rtext, tree, frontier)`` tripples, + containing the previous states of the parser. This history is + used to implement the ``backtrack`` operation. + :ivar _tried_e: A record of all productions that have been tried + for a given tree. This record is used by ``expand`` to perform + the next untried production. + :ivar _tried_m: A record of what tokens have been matched for a + given tree. This record is used by ``step`` to decide whether + or not to match a token. + :see: ``nltk.grammar`` + """ + + def __init__(self, grammar, trace=0): + super(SteppingRecursiveDescentParser, self).__init__(grammar, trace) + self._rtext = None + self._tree = None + self._frontier = [()] + self._tried_e = {} + self._tried_m = {} + self._history = [] + self._parses = [] + + # [XX] TEMPORARY HACK WARNING! This should be replaced with + # something nicer when we get the chance. + def _freeze(self, tree): + c = tree.copy() + # for pos in c.treepositions('leaves'): + # c[pos] = c[pos].freeze() + return ImmutableTree.convert(c) + + def parse(self, tokens): + tokens = list(tokens) + self.initialize(tokens) + while self.step() is not None: + pass + return self.parses() + + def initialize(self, tokens): + """ + Start parsing a given text. This sets the parser's tree to + the start symbol, its frontier to the root node, and its + remaining text to ``token['SUBTOKENS']``. + """ + + self._rtext = tokens + start = self._grammar.start().symbol() + self._tree = Tree(start, []) + self._frontier = [()] + self._tried_e = {} + self._tried_m = {} + self._history = [] + self._parses = [] + if self._trace: + self._trace_start(self._tree, self._frontier, self._rtext) + + def remaining_text(self): + """ + :return: The portion of the text that is not yet covered by the + tree. + :rtype: list(str) + """ + return self._rtext + + def frontier(self): + """ + :return: A list of the tree locations of all subtrees that + have not yet been expanded, and all leaves that have not + yet been matched. + :rtype: list(tuple(int)) + """ + return self._frontier + + def tree(self): + """ + :return: A partial structure for the text that is + currently being parsed. The elements specified by the + frontier have not yet been expanded or matched. + :rtype: Tree + """ + return self._tree + + def step(self): + """ + Perform a single parsing operation. If an untried match is + possible, then perform the match, and return the matched + token. If an untried expansion is possible, then perform the + expansion, and return the production that it is based on. If + backtracking is possible, then backtrack, and return True. + Otherwise, return None. + + :return: None if no operation was performed; a token if a match + was performed; a production if an expansion was performed; + and True if a backtrack operation was performed. + :rtype: Production or String or bool + """ + # Try matching (if we haven't already) + if self.untried_match(): + token = self.match() + if token is not None: + return token + + # Try expanding. + production = self.expand() + if production is not None: + return production + + # Try backtracking + if self.backtrack(): + self._trace_backtrack(self._tree, self._frontier) + return True + + # Nothing left to do. + return None + + def expand(self, production=None): + """ + Expand the first element of the frontier. In particular, if + the first element of the frontier is a subtree whose node type + is equal to ``production``'s left hand side, then add a child + to that subtree for each element of ``production``'s right hand + side. If ``production`` is not specified, then use the first + untried expandable production. If all expandable productions + have been tried, do nothing. + + :return: The production used to expand the frontier, if an + expansion was performed. If no expansion was performed, + return None. + :rtype: Production or None + """ + + # Make sure we *can* expand. + if len(self._frontier) == 0: + return None + if not isinstance(self._tree[self._frontier[0]], Tree): + return None + + # If they didn't specify a production, check all untried ones. + if production is None: + productions = self.untried_expandable_productions() + else: + productions = [production] + + parses = [] + for prod in productions: + # Record that we've tried this production now. + self._tried_e.setdefault(self._freeze(self._tree), []).append(prod) + + # Try expanding. + for _result in self._expand(self._rtext, self._tree, self._frontier, prod): + return prod + + # We didn't expand anything. + return None + + def match(self): + """ + Match the first element of the frontier. In particular, if + the first element of the frontier has the same type as the + next text token, then substitute the text token into the tree. + + :return: The token matched, if a match operation was + performed. If no match was performed, return None + :rtype: str or None + """ + + # Record that we've tried matching this token. + tok = self._rtext[0] + self._tried_m.setdefault(self._freeze(self._tree), []).append(tok) + + # Make sure we *can* match. + if len(self._frontier) == 0: + return None + if isinstance(self._tree[self._frontier[0]], Tree): + return None + + for _result in self._match(self._rtext, self._tree, self._frontier): + # Return the token we just matched. + return self._history[-1][0][0] + return None + + def backtrack(self): + """ + Return the parser to its state before the most recent + match or expand operation. Calling ``undo`` repeatedly return + the parser to successively earlier states. If no match or + expand operations have been performed, ``undo`` will make no + changes. + + :return: true if an operation was successfully undone. + :rtype: bool + """ + if len(self._history) == 0: + return False + (self._rtext, self._tree, self._frontier) = self._history.pop() + return True + + def expandable_productions(self): + """ + :return: A list of all the productions for which expansions + are available for the current parser state. + :rtype: list(Production) + """ + # Make sure we *can* expand. + if len(self._frontier) == 0: + return [] + frontier_child = self._tree[self._frontier[0]] + if len(self._frontier) == 0 or not isinstance(frontier_child, Tree): + return [] + + return [ + p + for p in self._grammar.productions() + if p.lhs().symbol() == frontier_child.label() + ] + + def untried_expandable_productions(self): + """ + :return: A list of all the untried productions for which + expansions are available for the current parser state. + :rtype: list(Production) + """ + + tried_expansions = self._tried_e.get(self._freeze(self._tree), []) + return [p for p in self.expandable_productions() if p not in tried_expansions] + + def untried_match(self): + """ + :return: Whether the first element of the frontier is a token + that has not yet been matched. + :rtype: bool + """ + + if len(self._rtext) == 0: + return False + tried_matches = self._tried_m.get(self._freeze(self._tree), []) + return self._rtext[0] not in tried_matches + + def currently_complete(self): + """ + :return: Whether the parser's current state represents a + complete parse. + :rtype: bool + """ + return len(self._frontier) == 0 and len(self._rtext) == 0 + + def _parse(self, remaining_text, tree, frontier): + """ + A stub version of ``_parse`` that sets the parsers current + state to the given arguments. In ``RecursiveDescentParser``, + the ``_parse`` method is used to recursively continue parsing a + text. ``SteppingRecursiveDescentParser`` overrides it to + capture these recursive calls. It records the parser's old + state in the history (to allow for backtracking), and updates + the parser's new state using the given arguments. Finally, it + returns ``[1]``, which is used by ``match`` and ``expand`` to + detect whether their operations were successful. + + :return: ``[1]`` + :rtype: list of int + """ + self._history.append((self._rtext, self._tree, self._frontier)) + self._rtext = remaining_text + self._tree = tree + self._frontier = frontier + + # Is it a good parse? If so, record it. + if len(frontier) == 0 and len(remaining_text) == 0: + self._parses.append(tree) + self._trace_succeed(self._tree, self._frontier) + + return [1] + + def parses(self): + """ + :return: An iterator of the parses that have been found by this + parser so far. + :rtype: list of Tree + """ + return iter(self._parses) + + def set_grammar(self, grammar): + """ + Change the grammar used to parse texts. + + :param grammar: The new grammar. + :type grammar: CFG + """ + self._grammar = grammar + + +##////////////////////////////////////////////////////// +## Demonstration Code +##////////////////////////////////////////////////////// + + +def demo(): + """ + A demonstration of the recursive descent parser. + """ + + from nltk import parse, CFG + + grammar = CFG.fromstring( + """ + S -> NP VP + NP -> Det N | Det N PP + VP -> V NP | V NP PP + PP -> P NP + NP -> 'I' + N -> 'man' | 'park' | 'telescope' | 'dog' + Det -> 'the' | 'a' + P -> 'in' | 'with' + V -> 'saw' + """ + ) + + for prod in grammar.productions(): + print(prod) + + sent = 'I saw a man in the park'.split() + parser = parse.RecursiveDescentParser(grammar, trace=2) + for p in parser.parse(sent): + print(p) + + +if __name__ == '__main__': + demo() diff --git a/venv.bak/lib/python3.7/site-packages/nltk/parse/shiftreduce.py b/venv.bak/lib/python3.7/site-packages/nltk/parse/shiftreduce.py new file mode 100644 index 0000000..a3514db --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/parse/shiftreduce.py @@ -0,0 +1,481 @@ +# Natural Language Toolkit: Shift-Reduce Parser +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Edward Loper +# Steven Bird +# URL: +# For license information, see LICENSE.TXT +from __future__ import print_function, unicode_literals + +from nltk.grammar import Nonterminal +from nltk.tree import Tree +from nltk.compat import unicode_repr + +from nltk.parse.api import ParserI + +##////////////////////////////////////////////////////// +## Shift/Reduce Parser +##////////////////////////////////////////////////////// +class ShiftReduceParser(ParserI): + """ + A simple bottom-up CFG parser that uses two operations, "shift" + and "reduce", to find a single parse for a text. + + ``ShiftReduceParser`` maintains a stack, which records the + structure of a portion of the text. This stack is a list of + strings and Trees that collectively cover a portion of + the text. For example, while parsing the sentence "the dog saw + the man" with a typical grammar, ``ShiftReduceParser`` will produce + the following stack, which covers "the dog saw":: + + [(NP: (Det: 'the') (N: 'dog')), (V: 'saw')] + + ``ShiftReduceParser`` attempts to extend the stack to cover the + entire text, and to combine the stack elements into a single tree, + producing a complete parse for the sentence. + + Initially, the stack is empty. It is extended to cover the text, + from left to right, by repeatedly applying two operations: + + - "shift" moves a token from the beginning of the text to the + end of the stack. + - "reduce" uses a CFG production to combine the rightmost stack + elements into a single Tree. + + Often, more than one operation can be performed on a given stack. + In this case, ``ShiftReduceParser`` uses the following heuristics + to decide which operation to perform: + + - Only shift if no reductions are available. + - If multiple reductions are available, then apply the reduction + whose CFG production is listed earliest in the grammar. + + Note that these heuristics are not guaranteed to choose an + operation that leads to a parse of the text. Also, if multiple + parses exists, ``ShiftReduceParser`` will return at most one of + them. + + :see: ``nltk.grammar`` + """ + + def __init__(self, grammar, trace=0): + """ + Create a new ``ShiftReduceParser``, that uses ``grammar`` to + parse texts. + + :type grammar: Grammar + :param grammar: The grammar used to parse texts. + :type trace: int + :param trace: The level of tracing that should be used when + parsing a text. ``0`` will generate no tracing output; + and higher numbers will produce more verbose tracing + output. + """ + self._grammar = grammar + self._trace = trace + self._check_grammar() + + def grammar(self): + return self._grammar + + def parse(self, tokens): + tokens = list(tokens) + self._grammar.check_coverage(tokens) + + # initialize the stack. + stack = [] + remaining_text = tokens + + # Trace output. + if self._trace: + print('Parsing %r' % " ".join(tokens)) + self._trace_stack(stack, remaining_text) + + # iterate through the text, pushing the token onto + # the stack, then reducing the stack. + while len(remaining_text) > 0: + self._shift(stack, remaining_text) + while self._reduce(stack, remaining_text): + pass + + # Did we reduce everything? + if len(stack) == 1: + # Did we end up with the right category? + if stack[0].label() == self._grammar.start().symbol(): + yield stack[0] + + def _shift(self, stack, remaining_text): + """ + Move a token from the beginning of ``remaining_text`` to the + end of ``stack``. + + :type stack: list(str and Tree) + :param stack: A list of strings and Trees, encoding + the structure of the text that has been parsed so far. + :type remaining_text: list(str) + :param remaining_text: The portion of the text that is not yet + covered by ``stack``. + :rtype: None + """ + stack.append(remaining_text[0]) + remaining_text.remove(remaining_text[0]) + if self._trace: + self._trace_shift(stack, remaining_text) + + def _match_rhs(self, rhs, rightmost_stack): + """ + :rtype: bool + :return: true if the right hand side of a CFG production + matches the rightmost elements of the stack. ``rhs`` + matches ``rightmost_stack`` if they are the same length, + and each element of ``rhs`` matches the corresponding + element of ``rightmost_stack``. A nonterminal element of + ``rhs`` matches any Tree whose node value is equal + to the nonterminal's symbol. A terminal element of ``rhs`` + matches any string whose type is equal to the terminal. + :type rhs: list(terminal and Nonterminal) + :param rhs: The right hand side of a CFG production. + :type rightmost_stack: list(string and Tree) + :param rightmost_stack: The rightmost elements of the parser's + stack. + """ + + if len(rightmost_stack) != len(rhs): + return False + for i in range(len(rightmost_stack)): + if isinstance(rightmost_stack[i], Tree): + if not isinstance(rhs[i], Nonterminal): + return False + if rightmost_stack[i].label() != rhs[i].symbol(): + return False + else: + if isinstance(rhs[i], Nonterminal): + return False + if rightmost_stack[i] != rhs[i]: + return False + return True + + def _reduce(self, stack, remaining_text, production=None): + """ + Find a CFG production whose right hand side matches the + rightmost stack elements; and combine those stack elements + into a single Tree, with the node specified by the + production's left-hand side. If more than one CFG production + matches the stack, then use the production that is listed + earliest in the grammar. The new Tree replaces the + elements in the stack. + + :rtype: Production or None + :return: If a reduction is performed, then return the CFG + production that the reduction is based on; otherwise, + return false. + :type stack: list(string and Tree) + :param stack: A list of strings and Trees, encoding + the structure of the text that has been parsed so far. + :type remaining_text: list(str) + :param remaining_text: The portion of the text that is not yet + covered by ``stack``. + """ + if production is None: + productions = self._grammar.productions() + else: + productions = [production] + + # Try each production, in order. + for production in productions: + rhslen = len(production.rhs()) + + # check if the RHS of a production matches the top of the stack + if self._match_rhs(production.rhs(), stack[-rhslen:]): + + # combine the tree to reflect the reduction + tree = Tree(production.lhs().symbol(), stack[-rhslen:]) + stack[-rhslen:] = [tree] + + # We reduced something + if self._trace: + self._trace_reduce(stack, production, remaining_text) + return production + + # We didn't reduce anything + return None + + def trace(self, trace=2): + """ + Set the level of tracing output that should be generated when + parsing a text. + + :type trace: int + :param trace: The trace level. A trace level of ``0`` will + generate no tracing output; and higher trace levels will + produce more verbose tracing output. + :rtype: None + """ + # 1: just show shifts. + # 2: show shifts & reduces + # 3: display which tokens & productions are shifed/reduced + self._trace = trace + + def _trace_stack(self, stack, remaining_text, marker=' '): + """ + Print trace output displaying the given stack and text. + + :rtype: None + :param marker: A character that is printed to the left of the + stack. This is used with trace level 2 to print 'S' + before shifted stacks and 'R' before reduced stacks. + """ + s = ' ' + marker + ' [ ' + for elt in stack: + if isinstance(elt, Tree): + s += unicode_repr(Nonterminal(elt.label())) + ' ' + else: + s += unicode_repr(elt) + ' ' + s += '* ' + ' '.join(remaining_text) + ']' + print(s) + + def _trace_shift(self, stack, remaining_text): + """ + Print trace output displaying that a token has been shifted. + + :rtype: None + """ + if self._trace > 2: + print('Shift %r:' % stack[-1]) + if self._trace == 2: + self._trace_stack(stack, remaining_text, 'S') + elif self._trace > 0: + self._trace_stack(stack, remaining_text) + + def _trace_reduce(self, stack, production, remaining_text): + """ + Print trace output displaying that ``production`` was used to + reduce ``stack``. + + :rtype: None + """ + if self._trace > 2: + rhs = " ".join(production.rhs()) + print('Reduce %r <- %s' % (production.lhs(), rhs)) + if self._trace == 2: + self._trace_stack(stack, remaining_text, 'R') + elif self._trace > 1: + self._trace_stack(stack, remaining_text) + + def _check_grammar(self): + """ + Check to make sure that all of the CFG productions are + potentially useful. If any productions can never be used, + then print a warning. + + :rtype: None + """ + productions = self._grammar.productions() + + # Any production whose RHS is an extension of another production's RHS + # will never be used. + for i in range(len(productions)): + for j in range(i + 1, len(productions)): + rhs1 = productions[i].rhs() + rhs2 = productions[j].rhs() + if rhs1[: len(rhs2)] == rhs2: + print('Warning: %r will never be used' % productions[i]) + + +##////////////////////////////////////////////////////// +## Stepping Shift/Reduce Parser +##////////////////////////////////////////////////////// +class SteppingShiftReduceParser(ShiftReduceParser): + """ + A ``ShiftReduceParser`` that allows you to setp through the parsing + process, performing a single operation at a time. It also allows + you to change the parser's grammar midway through parsing a text. + + The ``initialize`` method is used to start parsing a text. + ``shift`` performs a single shift operation, and ``reduce`` performs + a single reduce operation. ``step`` will perform a single reduce + operation if possible; otherwise, it will perform a single shift + operation. ``parses`` returns the set of parses that have been + found by the parser. + + :ivar _history: A list of ``(stack, remaining_text)`` pairs, + containing all of the previous states of the parser. This + history is used to implement the ``undo`` operation. + :see: ``nltk.grammar`` + """ + + def __init__(self, grammar, trace=0): + super(SteppingShiftReduceParser, self).__init__(grammar, trace) + self._stack = None + self._remaining_text = None + self._history = [] + + def parse(self, tokens): + tokens = list(tokens) + self.initialize(tokens) + while self.step(): + pass + return self.parses() + + def stack(self): + """ + :return: The parser's stack. + :rtype: list(str and Tree) + """ + return self._stack + + def remaining_text(self): + """ + :return: The portion of the text that is not yet covered by the + stack. + :rtype: list(str) + """ + return self._remaining_text + + def initialize(self, tokens): + """ + Start parsing a given text. This sets the parser's stack to + ``[]`` and sets its remaining text to ``tokens``. + """ + self._stack = [] + self._remaining_text = tokens + self._history = [] + + def step(self): + """ + Perform a single parsing operation. If a reduction is + possible, then perform that reduction, and return the + production that it is based on. Otherwise, if a shift is + possible, then perform it, and return True. Otherwise, + return False. + + :return: False if no operation was performed; True if a shift was + performed; and the CFG production used to reduce if a + reduction was performed. + :rtype: Production or bool + """ + return self.reduce() or self.shift() + + def shift(self): + """ + Move a token from the beginning of the remaining text to the + end of the stack. If there are no more tokens in the + remaining text, then do nothing. + + :return: True if the shift operation was successful. + :rtype: bool + """ + if len(self._remaining_text) == 0: + return False + self._history.append((self._stack[:], self._remaining_text[:])) + self._shift(self._stack, self._remaining_text) + return True + + def reduce(self, production=None): + """ + Use ``production`` to combine the rightmost stack elements into + a single Tree. If ``production`` does not match the + rightmost stack elements, then do nothing. + + :return: The production used to reduce the stack, if a + reduction was performed. If no reduction was performed, + return None. + + :rtype: Production or None + """ + self._history.append((self._stack[:], self._remaining_text[:])) + return_val = self._reduce(self._stack, self._remaining_text, production) + + if not return_val: + self._history.pop() + return return_val + + def undo(self): + """ + Return the parser to its state before the most recent + shift or reduce operation. Calling ``undo`` repeatedly return + the parser to successively earlier states. If no shift or + reduce operations have been performed, ``undo`` will make no + changes. + + :return: true if an operation was successfully undone. + :rtype: bool + """ + if len(self._history) == 0: + return False + (self._stack, self._remaining_text) = self._history.pop() + return True + + def reducible_productions(self): + """ + :return: A list of the productions for which reductions are + available for the current parser state. + :rtype: list(Production) + """ + productions = [] + for production in self._grammar.productions(): + rhslen = len(production.rhs()) + if self._match_rhs(production.rhs(), self._stack[-rhslen:]): + productions.append(production) + return productions + + def parses(self): + """ + :return: An iterator of the parses that have been found by this + parser so far. + :rtype: iter(Tree) + """ + if ( + len(self._remaining_text) == 0 + and len(self._stack) == 1 + and self._stack[0].label() == self._grammar.start().symbol() + ): + yield self._stack[0] + + # copied from nltk.parser + + def set_grammar(self, grammar): + """ + Change the grammar used to parse texts. + + :param grammar: The new grammar. + :type grammar: CFG + """ + self._grammar = grammar + + +##////////////////////////////////////////////////////// +## Demonstration Code +##////////////////////////////////////////////////////// + + +def demo(): + """ + A demonstration of the shift-reduce parser. + """ + + from nltk import parse, CFG + + grammar = CFG.fromstring( + """ + S -> NP VP + NP -> Det N | Det N PP + VP -> V NP | V NP PP + PP -> P NP + NP -> 'I' + N -> 'man' | 'park' | 'telescope' | 'dog' + Det -> 'the' | 'a' + P -> 'in' | 'with' + V -> 'saw' + """ + ) + + sent = 'I saw a man in the park'.split() + + parser = parse.ShiftReduceParser(grammar, trace=2) + for p in parser.parse(sent): + print(p) + + +if __name__ == '__main__': + demo() diff --git a/venv.bak/lib/python3.7/site-packages/nltk/parse/stanford.py b/venv.bak/lib/python3.7/site-packages/nltk/parse/stanford.py new file mode 100644 index 0000000..8943df1 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/parse/stanford.py @@ -0,0 +1,492 @@ +# -*- coding: utf-8 -*- +# Natural Language Toolkit: Interface to the Stanford Parser +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Steven Xu +# +# URL: +# For license information, see LICENSE.TXT + +from __future__ import unicode_literals + +import tempfile +import os +import warnings +from unittest import skip +from subprocess import PIPE + +from six import text_type + +from nltk.internals import ( + find_jar_iter, + config_java, + java, + _java_options, + find_jars_within_path, +) + +from nltk.parse.api import ParserI +from nltk.parse.dependencygraph import DependencyGraph +from nltk.tree import Tree + +_stanford_url = 'https://nlp.stanford.edu/software/lex-parser.shtml' + + +class GenericStanfordParser(ParserI): + """Interface to the Stanford Parser""" + + _MODEL_JAR_PATTERN = r'stanford-parser-(\d+)(\.(\d+))+-models\.jar' + _JAR = r'stanford-parser\.jar' + _MAIN_CLASS = 'edu.stanford.nlp.parser.lexparser.LexicalizedParser' + + _USE_STDIN = False + _DOUBLE_SPACED_OUTPUT = False + + def __init__( + self, + path_to_jar=None, + path_to_models_jar=None, + model_path='edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz', + encoding='utf8', + verbose=False, + java_options='-mx4g', + corenlp_options='', + ): + + # find the most recent code and model jar + stanford_jar = max( + find_jar_iter( + self._JAR, + path_to_jar, + env_vars=('STANFORD_PARSER', 'STANFORD_CORENLP'), + searchpath=(), + url=_stanford_url, + verbose=verbose, + is_regex=True, + ), + key=lambda model_path: os.path.dirname(model_path), + ) + + model_jar = max( + find_jar_iter( + self._MODEL_JAR_PATTERN, + path_to_models_jar, + env_vars=('STANFORD_MODELS', 'STANFORD_CORENLP'), + searchpath=(), + url=_stanford_url, + verbose=verbose, + is_regex=True, + ), + key=lambda model_path: os.path.dirname(model_path), + ) + + # self._classpath = (stanford_jar, model_jar) + + # Adding logging jar files to classpath + stanford_dir = os.path.split(stanford_jar)[0] + self._classpath = tuple([model_jar] + find_jars_within_path(stanford_dir)) + + self.model_path = model_path + self._encoding = encoding + self.corenlp_options = corenlp_options + self.java_options = java_options + + def _parse_trees_output(self, output_): + res = [] + cur_lines = [] + cur_trees = [] + blank = False + for line in output_.splitlines(False): + if line == '': + if blank: + res.append(iter(cur_trees)) + cur_trees = [] + blank = False + elif self._DOUBLE_SPACED_OUTPUT: + cur_trees.append(self._make_tree('\n'.join(cur_lines))) + cur_lines = [] + blank = True + else: + res.append(iter([self._make_tree('\n'.join(cur_lines))])) + cur_lines = [] + else: + cur_lines.append(line) + blank = False + return iter(res) + + def parse_sents(self, sentences, verbose=False): + """ + Use StanfordParser to parse multiple sentences. Takes multiple sentences as a + list where each sentence is a list of words. + Each sentence will be automatically tagged with this StanfordParser instance's + tagger. + If whitespaces exists inside a token, then the token will be treated as + separate tokens. + + :param sentences: Input sentences to parse + :type sentences: list(list(str)) + :rtype: iter(iter(Tree)) + """ + cmd = [ + self._MAIN_CLASS, + '-model', + self.model_path, + '-sentences', + 'newline', + '-outputFormat', + self._OUTPUT_FORMAT, + '-tokenized', + '-escaper', + 'edu.stanford.nlp.process.PTBEscapingProcessor', + ] + return self._parse_trees_output( + self._execute( + cmd, '\n'.join(' '.join(sentence) for sentence in sentences), verbose + ) + ) + + def raw_parse(self, sentence, verbose=False): + """ + Use StanfordParser to parse a sentence. Takes a sentence as a string; + before parsing, it will be automatically tokenized and tagged by + the Stanford Parser. + + :param sentence: Input sentence to parse + :type sentence: str + :rtype: iter(Tree) + """ + return next(self.raw_parse_sents([sentence], verbose)) + + def raw_parse_sents(self, sentences, verbose=False): + """ + Use StanfordParser to parse multiple sentences. Takes multiple sentences as a + list of strings. + Each sentence will be automatically tokenized and tagged by the Stanford Parser. + + :param sentences: Input sentences to parse + :type sentences: list(str) + :rtype: iter(iter(Tree)) + """ + cmd = [ + self._MAIN_CLASS, + '-model', + self.model_path, + '-sentences', + 'newline', + '-outputFormat', + self._OUTPUT_FORMAT, + ] + return self._parse_trees_output( + self._execute(cmd, '\n'.join(sentences), verbose) + ) + + def tagged_parse(self, sentence, verbose=False): + """ + Use StanfordParser to parse a sentence. Takes a sentence as a list of + (word, tag) tuples; the sentence must have already been tokenized and + tagged. + + :param sentence: Input sentence to parse + :type sentence: list(tuple(str, str)) + :rtype: iter(Tree) + """ + return next(self.tagged_parse_sents([sentence], verbose)) + + def tagged_parse_sents(self, sentences, verbose=False): + """ + Use StanfordParser to parse multiple sentences. Takes multiple sentences + where each sentence is a list of (word, tag) tuples. + The sentences must have already been tokenized and tagged. + + :param sentences: Input sentences to parse + :type sentences: list(list(tuple(str, str))) + :rtype: iter(iter(Tree)) + """ + tag_separator = '/' + cmd = [ + self._MAIN_CLASS, + '-model', + self.model_path, + '-sentences', + 'newline', + '-outputFormat', + self._OUTPUT_FORMAT, + '-tokenized', + '-tagSeparator', + tag_separator, + '-tokenizerFactory', + 'edu.stanford.nlp.process.WhitespaceTokenizer', + '-tokenizerMethod', + 'newCoreLabelTokenizerFactory', + ] + # We don't need to escape slashes as "splitting is done on the last instance of the character in the token" + return self._parse_trees_output( + self._execute( + cmd, + '\n'.join( + ' '.join(tag_separator.join(tagged) for tagged in sentence) + for sentence in sentences + ), + verbose, + ) + ) + + def _execute(self, cmd, input_, verbose=False): + encoding = self._encoding + cmd.extend(['-encoding', encoding]) + if self.corenlp_options: + cmd.append(self.corenlp_options) + + default_options = ' '.join(_java_options) + + # Configure java. + config_java(options=self.java_options, verbose=verbose) + + # Windows is incompatible with NamedTemporaryFile() without passing in delete=False. + with tempfile.NamedTemporaryFile(mode='wb', delete=False) as input_file: + # Write the actual sentences to the temporary input file + if isinstance(input_, text_type) and encoding: + input_ = input_.encode(encoding) + input_file.write(input_) + input_file.flush() + + # Run the tagger and get the output. + if self._USE_STDIN: + input_file.seek(0) + stdout, stderr = java( + cmd, + classpath=self._classpath, + stdin=input_file, + stdout=PIPE, + stderr=PIPE, + ) + else: + cmd.append(input_file.name) + stdout, stderr = java( + cmd, classpath=self._classpath, stdout=PIPE, stderr=PIPE + ) + + stdout = stdout.replace(b'\xc2\xa0', b' ') + stdout = stdout.replace(b'\x00\xa0', b' ') + stdout = stdout.decode(encoding) + + os.unlink(input_file.name) + + # Return java configurations to their default values. + config_java(options=default_options, verbose=False) + + return stdout + + +class StanfordParser(GenericStanfordParser): + """ + >>> parser=StanfordParser( + ... model_path="edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz" + ... ) + + >>> list(parser.raw_parse("the quick brown fox jumps over the lazy dog")) # doctest: +NORMALIZE_WHITESPACE + [Tree('ROOT', [Tree('NP', [Tree('NP', [Tree('DT', ['the']), Tree('JJ', ['quick']), Tree('JJ', ['brown']), + Tree('NN', ['fox'])]), Tree('NP', [Tree('NP', [Tree('NNS', ['jumps'])]), Tree('PP', [Tree('IN', ['over']), + Tree('NP', [Tree('DT', ['the']), Tree('JJ', ['lazy']), Tree('NN', ['dog'])])])])])])] + + >>> sum([list(dep_graphs) for dep_graphs in parser.raw_parse_sents(( + ... "the quick brown fox jumps over the lazy dog", + ... "the quick grey wolf jumps over the lazy fox" + ... ))], []) # doctest: +NORMALIZE_WHITESPACE + [Tree('ROOT', [Tree('NP', [Tree('NP', [Tree('DT', ['the']), Tree('JJ', ['quick']), Tree('JJ', ['brown']), + Tree('NN', ['fox'])]), Tree('NP', [Tree('NP', [Tree('NNS', ['jumps'])]), Tree('PP', [Tree('IN', ['over']), + Tree('NP', [Tree('DT', ['the']), Tree('JJ', ['lazy']), Tree('NN', ['dog'])])])])])]), Tree('ROOT', [Tree('NP', + [Tree('NP', [Tree('DT', ['the']), Tree('JJ', ['quick']), Tree('JJ', ['grey']), Tree('NN', ['wolf'])]), Tree('NP', + [Tree('NP', [Tree('NNS', ['jumps'])]), Tree('PP', [Tree('IN', ['over']), Tree('NP', [Tree('DT', ['the']), + Tree('JJ', ['lazy']), Tree('NN', ['fox'])])])])])])] + + >>> sum([list(dep_graphs) for dep_graphs in parser.parse_sents(( + ... "I 'm a dog".split(), + ... "This is my friends ' cat ( the tabby )".split(), + ... ))], []) # doctest: +NORMALIZE_WHITESPACE + [Tree('ROOT', [Tree('S', [Tree('NP', [Tree('PRP', ['I'])]), Tree('VP', [Tree('VBP', ["'m"]), + Tree('NP', [Tree('DT', ['a']), Tree('NN', ['dog'])])])])]), Tree('ROOT', [Tree('S', [Tree('NP', + [Tree('DT', ['This'])]), Tree('VP', [Tree('VBZ', ['is']), Tree('NP', [Tree('NP', [Tree('NP', [Tree('PRP$', ['my']), + Tree('NNS', ['friends']), Tree('POS', ["'"])]), Tree('NN', ['cat'])]), Tree('PRN', [Tree('-LRB-', [Tree('', []), + Tree('NP', [Tree('DT', ['the']), Tree('NN', ['tabby'])]), Tree('-RRB-', [])])])])])])])] + + >>> sum([list(dep_graphs) for dep_graphs in parser.tagged_parse_sents(( + ... ( + ... ("The", "DT"), + ... ("quick", "JJ"), + ... ("brown", "JJ"), + ... ("fox", "NN"), + ... ("jumped", "VBD"), + ... ("over", "IN"), + ... ("the", "DT"), + ... ("lazy", "JJ"), + ... ("dog", "NN"), + ... (".", "."), + ... ), + ... ))],[]) # doctest: +NORMALIZE_WHITESPACE + [Tree('ROOT', [Tree('S', [Tree('NP', [Tree('DT', ['The']), Tree('JJ', ['quick']), Tree('JJ', ['brown']), + Tree('NN', ['fox'])]), Tree('VP', [Tree('VBD', ['jumped']), Tree('PP', [Tree('IN', ['over']), Tree('NP', + [Tree('DT', ['the']), Tree('JJ', ['lazy']), Tree('NN', ['dog'])])])]), Tree('.', ['.'])])])] + """ + + _OUTPUT_FORMAT = 'penn' + + def __init__(self, *args, **kwargs): + warnings.warn( + "The StanfordParser will be deprecated\n" + "Please use \033[91mnltk.parse.corenlp.CoreNLPParser\033[0m instead.", + DeprecationWarning, + stacklevel=2, + ) + + super(StanfordParser, self).__init__(*args, **kwargs) + + def _make_tree(self, result): + return Tree.fromstring(result) + + +class StanfordDependencyParser(GenericStanfordParser): + + """ + >>> dep_parser=StanfordDependencyParser( + ... model_path="edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz" + ... ) + + >>> [parse.tree() for parse in dep_parser.raw_parse("The quick brown fox jumps over the lazy dog.")] # doctest: +NORMALIZE_WHITESPACE + [Tree('jumps', [Tree('fox', ['The', 'quick', 'brown']), Tree('dog', ['over', 'the', 'lazy'])])] + + >>> [list(parse.triples()) for parse in dep_parser.raw_parse("The quick brown fox jumps over the lazy dog.")] # doctest: +NORMALIZE_WHITESPACE + [[((u'jumps', u'VBZ'), u'nsubj', (u'fox', u'NN')), ((u'fox', u'NN'), u'det', (u'The', u'DT')), + ((u'fox', u'NN'), u'amod', (u'quick', u'JJ')), ((u'fox', u'NN'), u'amod', (u'brown', u'JJ')), + ((u'jumps', u'VBZ'), u'nmod', (u'dog', u'NN')), ((u'dog', u'NN'), u'case', (u'over', u'IN')), + ((u'dog', u'NN'), u'det', (u'the', u'DT')), ((u'dog', u'NN'), u'amod', (u'lazy', u'JJ'))]] + + >>> sum([[parse.tree() for parse in dep_graphs] for dep_graphs in dep_parser.raw_parse_sents(( + ... "The quick brown fox jumps over the lazy dog.", + ... "The quick grey wolf jumps over the lazy fox." + ... ))], []) # doctest: +NORMALIZE_WHITESPACE + [Tree('jumps', [Tree('fox', ['The', 'quick', 'brown']), Tree('dog', ['over', 'the', 'lazy'])]), + Tree('jumps', [Tree('wolf', ['The', 'quick', 'grey']), Tree('fox', ['over', 'the', 'lazy'])])] + + >>> sum([[parse.tree() for parse in dep_graphs] for dep_graphs in dep_parser.parse_sents(( + ... "I 'm a dog".split(), + ... "This is my friends ' cat ( the tabby )".split(), + ... ))], []) # doctest: +NORMALIZE_WHITESPACE + [Tree('dog', ['I', "'m", 'a']), Tree('cat', ['This', 'is', Tree('friends', ['my', "'"]), Tree('tabby', ['the'])])] + + >>> sum([[list(parse.triples()) for parse in dep_graphs] for dep_graphs in dep_parser.tagged_parse_sents(( + ... ( + ... ("The", "DT"), + ... ("quick", "JJ"), + ... ("brown", "JJ"), + ... ("fox", "NN"), + ... ("jumped", "VBD"), + ... ("over", "IN"), + ... ("the", "DT"), + ... ("lazy", "JJ"), + ... ("dog", "NN"), + ... (".", "."), + ... ), + ... ))],[]) # doctest: +NORMALIZE_WHITESPACE + [[((u'jumped', u'VBD'), u'nsubj', (u'fox', u'NN')), ((u'fox', u'NN'), u'det', (u'The', u'DT')), + ((u'fox', u'NN'), u'amod', (u'quick', u'JJ')), ((u'fox', u'NN'), u'amod', (u'brown', u'JJ')), + ((u'jumped', u'VBD'), u'nmod', (u'dog', u'NN')), ((u'dog', u'NN'), u'case', (u'over', u'IN')), + ((u'dog', u'NN'), u'det', (u'the', u'DT')), ((u'dog', u'NN'), u'amod', (u'lazy', u'JJ'))]] + + """ + + _OUTPUT_FORMAT = 'conll2007' + + def __init__(self, *args, **kwargs): + warnings.warn( + "The StanfordDependencyParser will be deprecated\n" + "Please use \033[91mnltk.parse.corenlp.CoreNLPDependencyParser\033[0m instead.", + DeprecationWarning, + stacklevel=2, + ) + + super(StanfordDependencyParser, self).__init__(*args, **kwargs) + + def _make_tree(self, result): + return DependencyGraph(result, top_relation_label='root') + + +class StanfordNeuralDependencyParser(GenericStanfordParser): + ''' + >>> from nltk.parse.stanford import StanfordNeuralDependencyParser + >>> dep_parser=StanfordNeuralDependencyParser(java_options='-mx4g') + + >>> [parse.tree() for parse in dep_parser.raw_parse("The quick brown fox jumps over the lazy dog.")] # doctest: +NORMALIZE_WHITESPACE + [Tree('jumps', [Tree('fox', ['The', 'quick', 'brown']), Tree('dog', ['over', 'the', 'lazy']), '.'])] + + >>> [list(parse.triples()) for parse in dep_parser.raw_parse("The quick brown fox jumps over the lazy dog.")] # doctest: +NORMALIZE_WHITESPACE + [[((u'jumps', u'VBZ'), u'nsubj', (u'fox', u'NN')), ((u'fox', u'NN'), u'det', + (u'The', u'DT')), ((u'fox', u'NN'), u'amod', (u'quick', u'JJ')), ((u'fox', u'NN'), + u'amod', (u'brown', u'JJ')), ((u'jumps', u'VBZ'), u'nmod', (u'dog', u'NN')), + ((u'dog', u'NN'), u'case', (u'over', u'IN')), ((u'dog', u'NN'), u'det', + (u'the', u'DT')), ((u'dog', u'NN'), u'amod', (u'lazy', u'JJ')), ((u'jumps', u'VBZ'), + u'punct', (u'.', u'.'))]] + + >>> sum([[parse.tree() for parse in dep_graphs] for dep_graphs in dep_parser.raw_parse_sents(( + ... "The quick brown fox jumps over the lazy dog.", + ... "The quick grey wolf jumps over the lazy fox." + ... ))], []) # doctest: +NORMALIZE_WHITESPACE + [Tree('jumps', [Tree('fox', ['The', 'quick', 'brown']), Tree('dog', ['over', + 'the', 'lazy']), '.']), Tree('jumps', [Tree('wolf', ['The', 'quick', 'grey']), + Tree('fox', ['over', 'the', 'lazy']), '.'])] + + >>> sum([[parse.tree() for parse in dep_graphs] for dep_graphs in dep_parser.parse_sents(( + ... "I 'm a dog".split(), + ... "This is my friends ' cat ( the tabby )".split(), + ... ))], []) # doctest: +NORMALIZE_WHITESPACE + [Tree('dog', ['I', "'m", 'a']), Tree('cat', ['This', 'is', Tree('friends', + ['my', "'"]), Tree('tabby', ['-LRB-', 'the', '-RRB-'])])] + ''' + + _OUTPUT_FORMAT = 'conll' + _MAIN_CLASS = 'edu.stanford.nlp.pipeline.StanfordCoreNLP' + _JAR = r'stanford-corenlp-(\d+)(\.(\d+))+\.jar' + _MODEL_JAR_PATTERN = r'stanford-corenlp-(\d+)(\.(\d+))+-models\.jar' + _USE_STDIN = True + _DOUBLE_SPACED_OUTPUT = True + + def __init__(self, *args, **kwargs): + warnings.warn( + "The StanfordNeuralDependencyParser will be deprecated\n" + "Please use \033[91mnltk.parse.corenlp.CoreNLPDependencyParser\033[0m instead.", + DeprecationWarning, + stacklevel=2, + ) + + super(StanfordNeuralDependencyParser, self).__init__(*args, **kwargs) + self.corenlp_options += '-annotators tokenize,ssplit,pos,depparse' + + def tagged_parse_sents(self, sentences, verbose=False): + ''' + Currently unimplemented because the neural dependency parser (and + the StanfordCoreNLP pipeline class) doesn't support passing in pre- + tagged tokens. + ''' + raise NotImplementedError( + 'tagged_parse[_sents] is not supported by ' + 'StanfordNeuralDependencyParser; use ' + 'parse[_sents] or raw_parse[_sents] instead.' + ) + + def _make_tree(self, result): + return DependencyGraph(result, top_relation_label='ROOT') + + +@skip("doctests from nltk.parse.stanford are skipped because it's deprecated") +def setup_module(module): + from nose import SkipTest + + try: + StanfordParser( + model_path='edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz' + ) + StanfordNeuralDependencyParser() + except LookupError: + raise SkipTest( + 'doctests from nltk.parse.stanford are skipped because one of the stanford parser or CoreNLP jars doesn\'t exist' + ) diff --git a/venv.bak/lib/python3.7/site-packages/nltk/parse/transitionparser.py b/venv.bak/lib/python3.7/site-packages/nltk/parse/transitionparser.py new file mode 100644 index 0000000..a60bc37 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/parse/transitionparser.py @@ -0,0 +1,792 @@ +# Natural Language Toolkit: Arc-Standard and Arc-eager Transition Based Parsers +# +# Author: Long Duong +# +# Copyright (C) 2001-2019 NLTK Project +# URL: +# For license information, see LICENSE.TXT + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +import tempfile +import pickle + +from os import remove +from copy import deepcopy +from operator import itemgetter + +try: + from numpy import array + from scipy import sparse + from sklearn.datasets import load_svmlight_file + from sklearn import svm +except ImportError: + pass + +from nltk.parse import ParserI, DependencyGraph, DependencyEvaluator + + +class Configuration(object): + """ + Class for holding configuration which is the partial analysis of the input sentence. + The transition based parser aims at finding set of operators that transfer the initial + configuration to the terminal configuration. + + The configuration includes: + - Stack: for storing partially proceeded words + - Buffer: for storing remaining input words + - Set of arcs: for storing partially built dependency tree + + This class also provides a method to represent a configuration as list of features. + """ + + def __init__(self, dep_graph): + """ + :param dep_graph: the representation of an input in the form of dependency graph. + :type dep_graph: DependencyGraph where the dependencies are not specified. + """ + # dep_graph.nodes contain list of token for a sentence + self.stack = [0] # The root element + self.buffer = list(range(1, len(dep_graph.nodes))) # The rest is in the buffer + self.arcs = [] # empty set of arc + self._tokens = dep_graph.nodes + self._max_address = len(self.buffer) + + def __str__(self): + return ( + 'Stack : ' + + str(self.stack) + + ' Buffer : ' + + str(self.buffer) + + ' Arcs : ' + + str(self.arcs) + ) + + def _check_informative(self, feat, flag=False): + """ + Check whether a feature is informative + The flag control whether "_" is informative or not + """ + if feat is None: + return False + if feat == '': + return False + if flag is False: + if feat == '_': + return False + return True + + def extract_features(self): + """ + Extract the set of features for the current configuration. Implement standard features as describe in + Table 3.2 (page 31) in Dependency Parsing book by Sandra Kubler, Ryan McDonal, Joakim Nivre. + Please note that these features are very basic. + :return: list(str) + """ + result = [] + # Todo : can come up with more complicated features set for better + # performance. + if len(self.stack) > 0: + # Stack 0 + stack_idx0 = self.stack[len(self.stack) - 1] + token = self._tokens[stack_idx0] + if self._check_informative(token['word'], True): + result.append('STK_0_FORM_' + token['word']) + if 'lemma' in token and self._check_informative(token['lemma']): + result.append('STK_0_LEMMA_' + token['lemma']) + if self._check_informative(token['tag']): + result.append('STK_0_POS_' + token['tag']) + if 'feats' in token and self._check_informative(token['feats']): + feats = token['feats'].split("|") + for feat in feats: + result.append('STK_0_FEATS_' + feat) + # Stack 1 + if len(self.stack) > 1: + stack_idx1 = self.stack[len(self.stack) - 2] + token = self._tokens[stack_idx1] + if self._check_informative(token['tag']): + result.append('STK_1_POS_' + token['tag']) + + # Left most, right most dependency of stack[0] + left_most = 1000000 + right_most = -1 + dep_left_most = '' + dep_right_most = '' + for (wi, r, wj) in self.arcs: + if wi == stack_idx0: + if (wj > wi) and (wj > right_most): + right_most = wj + dep_right_most = r + if (wj < wi) and (wj < left_most): + left_most = wj + dep_left_most = r + if self._check_informative(dep_left_most): + result.append('STK_0_LDEP_' + dep_left_most) + if self._check_informative(dep_right_most): + result.append('STK_0_RDEP_' + dep_right_most) + + # Check Buffered 0 + if len(self.buffer) > 0: + # Buffer 0 + buffer_idx0 = self.buffer[0] + token = self._tokens[buffer_idx0] + if self._check_informative(token['word'], True): + result.append('BUF_0_FORM_' + token['word']) + if 'lemma' in token and self._check_informative(token['lemma']): + result.append('BUF_0_LEMMA_' + token['lemma']) + if self._check_informative(token['tag']): + result.append('BUF_0_POS_' + token['tag']) + if 'feats' in token and self._check_informative(token['feats']): + feats = token['feats'].split("|") + for feat in feats: + result.append('BUF_0_FEATS_' + feat) + # Buffer 1 + if len(self.buffer) > 1: + buffer_idx1 = self.buffer[1] + token = self._tokens[buffer_idx1] + if self._check_informative(token['word'], True): + result.append('BUF_1_FORM_' + token['word']) + if self._check_informative(token['tag']): + result.append('BUF_1_POS_' + token['tag']) + if len(self.buffer) > 2: + buffer_idx2 = self.buffer[2] + token = self._tokens[buffer_idx2] + if self._check_informative(token['tag']): + result.append('BUF_2_POS_' + token['tag']) + if len(self.buffer) > 3: + buffer_idx3 = self.buffer[3] + token = self._tokens[buffer_idx3] + if self._check_informative(token['tag']): + result.append('BUF_3_POS_' + token['tag']) + # Left most, right most dependency of stack[0] + left_most = 1000000 + right_most = -1 + dep_left_most = '' + dep_right_most = '' + for (wi, r, wj) in self.arcs: + if wi == buffer_idx0: + if (wj > wi) and (wj > right_most): + right_most = wj + dep_right_most = r + if (wj < wi) and (wj < left_most): + left_most = wj + dep_left_most = r + if self._check_informative(dep_left_most): + result.append('BUF_0_LDEP_' + dep_left_most) + if self._check_informative(dep_right_most): + result.append('BUF_0_RDEP_' + dep_right_most) + + return result + + +class Transition(object): + """ + This class defines a set of transition which is applied to a configuration to get another configuration + Note that for different parsing algorithm, the transition is different. + """ + + # Define set of transitions + LEFT_ARC = 'LEFTARC' + RIGHT_ARC = 'RIGHTARC' + SHIFT = 'SHIFT' + REDUCE = 'REDUCE' + + def __init__(self, alg_option): + """ + :param alg_option: the algorithm option of this parser. Currently support `arc-standard` and `arc-eager` algorithm + :type alg_option: str + """ + self._algo = alg_option + if alg_option not in [ + TransitionParser.ARC_STANDARD, + TransitionParser.ARC_EAGER, + ]: + raise ValueError( + " Currently we only support %s and %s " + % (TransitionParser.ARC_STANDARD, TransitionParser.ARC_EAGER) + ) + + def left_arc(self, conf, relation): + """ + Note that the algorithm for left-arc is quite similar except for precondition for both arc-standard and arc-eager + :param configuration: is the current configuration + :return : A new configuration or -1 if the pre-condition is not satisfied + """ + if (len(conf.buffer) <= 0) or (len(conf.stack) <= 0): + return -1 + if conf.buffer[0] == 0: + # here is the Root element + return -1 + + idx_wi = conf.stack[len(conf.stack) - 1] + + flag = True + if self._algo == TransitionParser.ARC_EAGER: + for (idx_parent, r, idx_child) in conf.arcs: + if idx_child == idx_wi: + flag = False + + if flag: + conf.stack.pop() + idx_wj = conf.buffer[0] + conf.arcs.append((idx_wj, relation, idx_wi)) + else: + return -1 + + def right_arc(self, conf, relation): + """ + Note that the algorithm for right-arc is DIFFERENT for arc-standard and arc-eager + :param configuration: is the current configuration + :return : A new configuration or -1 if the pre-condition is not satisfied + """ + if (len(conf.buffer) <= 0) or (len(conf.stack) <= 0): + return -1 + if self._algo == TransitionParser.ARC_STANDARD: + idx_wi = conf.stack.pop() + idx_wj = conf.buffer[0] + conf.buffer[0] = idx_wi + conf.arcs.append((idx_wi, relation, idx_wj)) + else: # arc-eager + idx_wi = conf.stack[len(conf.stack) - 1] + idx_wj = conf.buffer.pop(0) + conf.stack.append(idx_wj) + conf.arcs.append((idx_wi, relation, idx_wj)) + + def reduce(self, conf): + """ + Note that the algorithm for reduce is only available for arc-eager + :param configuration: is the current configuration + :return : A new configuration or -1 if the pre-condition is not satisfied + """ + + if self._algo != TransitionParser.ARC_EAGER: + return -1 + if len(conf.stack) <= 0: + return -1 + + idx_wi = conf.stack[len(conf.stack) - 1] + flag = False + for (idx_parent, r, idx_child) in conf.arcs: + if idx_child == idx_wi: + flag = True + if flag: + conf.stack.pop() # reduce it + else: + return -1 + + def shift(self, conf): + """ + Note that the algorithm for shift is the SAME for arc-standard and arc-eager + :param configuration: is the current configuration + :return : A new configuration or -1 if the pre-condition is not satisfied + """ + if len(conf.buffer) <= 0: + return -1 + idx_wi = conf.buffer.pop(0) + conf.stack.append(idx_wi) + + +class TransitionParser(ParserI): + + """ + Class for transition based parser. Implement 2 algorithms which are "arc-standard" and "arc-eager" + """ + + ARC_STANDARD = 'arc-standard' + ARC_EAGER = 'arc-eager' + + def __init__(self, algorithm): + """ + :param algorithm: the algorithm option of this parser. Currently support `arc-standard` and `arc-eager` algorithm + :type algorithm: str + """ + if not (algorithm in [self.ARC_STANDARD, self.ARC_EAGER]): + raise ValueError( + " Currently we only support %s and %s " + % (self.ARC_STANDARD, self.ARC_EAGER) + ) + self._algorithm = algorithm + + self._dictionary = {} + self._transition = {} + self._match_transition = {} + + def _get_dep_relation(self, idx_parent, idx_child, depgraph): + p_node = depgraph.nodes[idx_parent] + c_node = depgraph.nodes[idx_child] + + if c_node['word'] is None: + return None # Root word + + if c_node['head'] == p_node['address']: + return c_node['rel'] + else: + return None + + def _convert_to_binary_features(self, features): + """ + :param features: list of feature string which is needed to convert to binary features + :type features: list(str) + :return : string of binary features in libsvm format which is 'featureID:value' pairs + """ + unsorted_result = [] + for feature in features: + self._dictionary.setdefault(feature, len(self._dictionary)) + unsorted_result.append(self._dictionary[feature]) + + # Default value of each feature is 1.0 + return ' '.join( + str(featureID) + ':1.0' for featureID in sorted(unsorted_result) + ) + + def _is_projective(self, depgraph): + arc_list = [] + for key in depgraph.nodes: + node = depgraph.nodes[key] + + if 'head' in node: + childIdx = node['address'] + parentIdx = node['head'] + if parentIdx is not None: + arc_list.append((parentIdx, childIdx)) + + for (parentIdx, childIdx) in arc_list: + # Ensure that childIdx < parentIdx + if childIdx > parentIdx: + temp = childIdx + childIdx = parentIdx + parentIdx = temp + for k in range(childIdx + 1, parentIdx): + for m in range(len(depgraph.nodes)): + if (m < childIdx) or (m > parentIdx): + if (k, m) in arc_list: + return False + if (m, k) in arc_list: + return False + return True + + def _write_to_file(self, key, binary_features, input_file): + """ + write the binary features to input file and update the transition dictionary + """ + self._transition.setdefault(key, len(self._transition) + 1) + self._match_transition[self._transition[key]] = key + + input_str = str(self._transition[key]) + ' ' + binary_features + '\n' + input_file.write(input_str.encode('utf-8')) + + def _create_training_examples_arc_std(self, depgraphs, input_file): + """ + Create the training example in the libsvm format and write it to the input_file. + Reference : Page 32, Chapter 3. Dependency Parsing by Sandra Kubler, Ryan McDonal and Joakim Nivre (2009) + """ + operation = Transition(self.ARC_STANDARD) + count_proj = 0 + training_seq = [] + + for depgraph in depgraphs: + if not self._is_projective(depgraph): + continue + + count_proj += 1 + conf = Configuration(depgraph) + while len(conf.buffer) > 0: + b0 = conf.buffer[0] + features = conf.extract_features() + binary_features = self._convert_to_binary_features(features) + + if len(conf.stack) > 0: + s0 = conf.stack[len(conf.stack) - 1] + # Left-arc operation + rel = self._get_dep_relation(b0, s0, depgraph) + if rel is not None: + key = Transition.LEFT_ARC + ':' + rel + self._write_to_file(key, binary_features, input_file) + operation.left_arc(conf, rel) + training_seq.append(key) + continue + + # Right-arc operation + rel = self._get_dep_relation(s0, b0, depgraph) + if rel is not None: + precondition = True + # Get the max-index of buffer + maxID = conf._max_address + + for w in range(maxID + 1): + if w != b0: + relw = self._get_dep_relation(b0, w, depgraph) + if relw is not None: + if (b0, relw, w) not in conf.arcs: + precondition = False + + if precondition: + key = Transition.RIGHT_ARC + ':' + rel + self._write_to_file(key, binary_features, input_file) + operation.right_arc(conf, rel) + training_seq.append(key) + continue + + # Shift operation as the default + key = Transition.SHIFT + self._write_to_file(key, binary_features, input_file) + operation.shift(conf) + training_seq.append(key) + + print(" Number of training examples : " + str(len(depgraphs))) + print(" Number of valid (projective) examples : " + str(count_proj)) + return training_seq + + def _create_training_examples_arc_eager(self, depgraphs, input_file): + """ + Create the training example in the libsvm format and write it to the input_file. + Reference : 'A Dynamic Oracle for Arc-Eager Dependency Parsing' by Joav Goldberg and Joakim Nivre + """ + operation = Transition(self.ARC_EAGER) + countProj = 0 + training_seq = [] + + for depgraph in depgraphs: + if not self._is_projective(depgraph): + continue + + countProj += 1 + conf = Configuration(depgraph) + while len(conf.buffer) > 0: + b0 = conf.buffer[0] + features = conf.extract_features() + binary_features = self._convert_to_binary_features(features) + + if len(conf.stack) > 0: + s0 = conf.stack[len(conf.stack) - 1] + # Left-arc operation + rel = self._get_dep_relation(b0, s0, depgraph) + if rel is not None: + key = Transition.LEFT_ARC + ':' + rel + self._write_to_file(key, binary_features, input_file) + operation.left_arc(conf, rel) + training_seq.append(key) + continue + + # Right-arc operation + rel = self._get_dep_relation(s0, b0, depgraph) + if rel is not None: + key = Transition.RIGHT_ARC + ':' + rel + self._write_to_file(key, binary_features, input_file) + operation.right_arc(conf, rel) + training_seq.append(key) + continue + + # reduce operation + flag = False + for k in range(s0): + if self._get_dep_relation(k, b0, depgraph) is not None: + flag = True + if self._get_dep_relation(b0, k, depgraph) is not None: + flag = True + if flag: + key = Transition.REDUCE + self._write_to_file(key, binary_features, input_file) + operation.reduce(conf) + training_seq.append(key) + continue + + # Shift operation as the default + key = Transition.SHIFT + self._write_to_file(key, binary_features, input_file) + operation.shift(conf) + training_seq.append(key) + + print(" Number of training examples : " + str(len(depgraphs))) + print(" Number of valid (projective) examples : " + str(countProj)) + return training_seq + + def train(self, depgraphs, modelfile, verbose=True): + """ + :param depgraphs : list of DependencyGraph as the training data + :type depgraphs : DependencyGraph + :param modelfile : file name to save the trained model + :type modelfile : str + """ + + try: + input_file = tempfile.NamedTemporaryFile( + prefix='transition_parse.train', dir=tempfile.gettempdir(), delete=False + ) + + if self._algorithm == self.ARC_STANDARD: + self._create_training_examples_arc_std(depgraphs, input_file) + else: + self._create_training_examples_arc_eager(depgraphs, input_file) + + input_file.close() + # Using the temporary file to train the libsvm classifier + x_train, y_train = load_svmlight_file(input_file.name) + # The parameter is set according to the paper: + # Algorithms for Deterministic Incremental Dependency Parsing by Joakim Nivre + # Todo : because of probability = True => very slow due to + # cross-validation. Need to improve the speed here + model = svm.SVC( + kernel='poly', + degree=2, + coef0=0, + gamma=0.2, + C=0.5, + verbose=verbose, + probability=True, + ) + + model.fit(x_train, y_train) + # Save the model to file name (as pickle) + pickle.dump(model, open(modelfile, 'wb')) + finally: + remove(input_file.name) + + def parse(self, depgraphs, modelFile): + """ + :param depgraphs: the list of test sentence, each sentence is represented as a dependency graph where the 'head' information is dummy + :type depgraphs: list(DependencyGraph) + :param modelfile: the model file + :type modelfile: str + :return: list (DependencyGraph) with the 'head' and 'rel' information + """ + result = [] + # First load the model + model = pickle.load(open(modelFile, 'rb')) + operation = Transition(self._algorithm) + + for depgraph in depgraphs: + conf = Configuration(depgraph) + while len(conf.buffer) > 0: + features = conf.extract_features() + col = [] + row = [] + data = [] + for feature in features: + if feature in self._dictionary: + col.append(self._dictionary[feature]) + row.append(0) + data.append(1.0) + np_col = array(sorted(col)) # NB : index must be sorted + np_row = array(row) + np_data = array(data) + + x_test = sparse.csr_matrix( + (np_data, (np_row, np_col)), shape=(1, len(self._dictionary)) + ) + + # It's best to use decision function as follow BUT it's not supported yet for sparse SVM + # Using decision funcion to build the votes array + # dec_func = model.decision_function(x_test)[0] + # votes = {} + # k = 0 + # for i in range(len(model.classes_)): + # for j in range(i+1, len(model.classes_)): + # #if dec_func[k] > 0: + # votes.setdefault(i,0) + # votes[i] +=1 + # else: + # votes.setdefault(j,0) + # votes[j] +=1 + # k +=1 + # Sort votes according to the values + # sorted_votes = sorted(votes.items(), key=itemgetter(1), reverse=True) + + # We will use predict_proba instead of decision_function + prob_dict = {} + pred_prob = model.predict_proba(x_test)[0] + for i in range(len(pred_prob)): + prob_dict[i] = pred_prob[i] + sorted_Prob = sorted(prob_dict.items(), key=itemgetter(1), reverse=True) + + # Note that SHIFT is always a valid operation + for (y_pred_idx, confidence) in sorted_Prob: + # y_pred = model.predict(x_test)[0] + # From the prediction match to the operation + y_pred = model.classes_[y_pred_idx] + + if y_pred in self._match_transition: + strTransition = self._match_transition[y_pred] + baseTransition = strTransition.split(":")[0] + + if baseTransition == Transition.LEFT_ARC: + if ( + operation.left_arc(conf, strTransition.split(":")[1]) + != -1 + ): + break + elif baseTransition == Transition.RIGHT_ARC: + if ( + operation.right_arc(conf, strTransition.split(":")[1]) + != -1 + ): + break + elif baseTransition == Transition.REDUCE: + if operation.reduce(conf) != -1: + break + elif baseTransition == Transition.SHIFT: + if operation.shift(conf) != -1: + break + else: + raise ValueError( + "The predicted transition is not recognized, expected errors" + ) + + # Finish with operations build the dependency graph from Conf.arcs + + new_depgraph = deepcopy(depgraph) + for key in new_depgraph.nodes: + node = new_depgraph.nodes[key] + node['rel'] = '' + # With the default, all the token depend on the Root + node['head'] = 0 + for (head, rel, child) in conf.arcs: + c_node = new_depgraph.nodes[child] + c_node['head'] = head + c_node['rel'] = rel + result.append(new_depgraph) + + return result + + +def demo(): + """ + >>> from nltk.parse import DependencyGraph, DependencyEvaluator + >>> from nltk.parse.transitionparser import TransitionParser, Configuration, Transition + >>> gold_sent = DependencyGraph(\""" + ... Economic JJ 2 ATT + ... news NN 3 SBJ + ... has VBD 0 ROOT + ... little JJ 5 ATT + ... effect NN 3 OBJ + ... on IN 5 ATT + ... financial JJ 8 ATT + ... markets NNS 6 PC + ... . . 3 PU + ... \""") + + >>> conf = Configuration(gold_sent) + + ###################### Check the Initial Feature ######################## + + >>> print(', '.join(conf.extract_features())) + STK_0_POS_TOP, BUF_0_FORM_Economic, BUF_0_LEMMA_Economic, BUF_0_POS_JJ, BUF_1_FORM_news, BUF_1_POS_NN, BUF_2_POS_VBD, BUF_3_POS_JJ + + ###################### Check The Transition ####################### + Check the Initialized Configuration + >>> print(conf) + Stack : [0] Buffer : [1, 2, 3, 4, 5, 6, 7, 8, 9] Arcs : [] + + A. Do some transition checks for ARC-STANDARD + + >>> operation = Transition('arc-standard') + >>> operation.shift(conf) + >>> operation.left_arc(conf, "ATT") + >>> operation.shift(conf) + >>> operation.left_arc(conf,"SBJ") + >>> operation.shift(conf) + >>> operation.shift(conf) + >>> operation.left_arc(conf, "ATT") + >>> operation.shift(conf) + >>> operation.shift(conf) + >>> operation.shift(conf) + >>> operation.left_arc(conf, "ATT") + + Middle Configuration and Features Check + >>> print(conf) + Stack : [0, 3, 5, 6] Buffer : [8, 9] Arcs : [(2, 'ATT', 1), (3, 'SBJ', 2), (5, 'ATT', 4), (8, 'ATT', 7)] + + >>> print(', '.join(conf.extract_features())) + STK_0_FORM_on, STK_0_LEMMA_on, STK_0_POS_IN, STK_1_POS_NN, BUF_0_FORM_markets, BUF_0_LEMMA_markets, BUF_0_POS_NNS, BUF_1_FORM_., BUF_1_POS_., BUF_0_LDEP_ATT + + >>> operation.right_arc(conf, "PC") + >>> operation.right_arc(conf, "ATT") + >>> operation.right_arc(conf, "OBJ") + >>> operation.shift(conf) + >>> operation.right_arc(conf, "PU") + >>> operation.right_arc(conf, "ROOT") + >>> operation.shift(conf) + + Terminated Configuration Check + >>> print(conf) + Stack : [0] Buffer : [] Arcs : [(2, 'ATT', 1), (3, 'SBJ', 2), (5, 'ATT', 4), (8, 'ATT', 7), (6, 'PC', 8), (5, 'ATT', 6), (3, 'OBJ', 5), (3, 'PU', 9), (0, 'ROOT', 3)] + + + B. Do some transition checks for ARC-EAGER + + >>> conf = Configuration(gold_sent) + >>> operation = Transition('arc-eager') + >>> operation.shift(conf) + >>> operation.left_arc(conf,'ATT') + >>> operation.shift(conf) + >>> operation.left_arc(conf,'SBJ') + >>> operation.right_arc(conf,'ROOT') + >>> operation.shift(conf) + >>> operation.left_arc(conf,'ATT') + >>> operation.right_arc(conf,'OBJ') + >>> operation.right_arc(conf,'ATT') + >>> operation.shift(conf) + >>> operation.left_arc(conf,'ATT') + >>> operation.right_arc(conf,'PC') + >>> operation.reduce(conf) + >>> operation.reduce(conf) + >>> operation.reduce(conf) + >>> operation.right_arc(conf,'PU') + >>> print(conf) + Stack : [0, 3, 9] Buffer : [] Arcs : [(2, 'ATT', 1), (3, 'SBJ', 2), (0, 'ROOT', 3), (5, 'ATT', 4), (3, 'OBJ', 5), (5, 'ATT', 6), (8, 'ATT', 7), (6, 'PC', 8), (3, 'PU', 9)] + + ###################### Check The Training Function ####################### + + A. Check the ARC-STANDARD training + >>> import tempfile + >>> import os + >>> input_file = tempfile.NamedTemporaryFile(prefix='transition_parse.train', dir=tempfile.gettempdir(), delete=False) + + >>> parser_std = TransitionParser('arc-standard') + >>> print(', '.join(parser_std._create_training_examples_arc_std([gold_sent], input_file))) + Number of training examples : 1 + Number of valid (projective) examples : 1 + SHIFT, LEFTARC:ATT, SHIFT, LEFTARC:SBJ, SHIFT, SHIFT, LEFTARC:ATT, SHIFT, SHIFT, SHIFT, LEFTARC:ATT, RIGHTARC:PC, RIGHTARC:ATT, RIGHTARC:OBJ, SHIFT, RIGHTARC:PU, RIGHTARC:ROOT, SHIFT + + >>> parser_std.train([gold_sent],'temp.arcstd.model', verbose=False) + Number of training examples : 1 + Number of valid (projective) examples : 1 + >>> remove(input_file.name) + + B. Check the ARC-EAGER training + + >>> input_file = tempfile.NamedTemporaryFile(prefix='transition_parse.train', dir=tempfile.gettempdir(),delete=False) + >>> parser_eager = TransitionParser('arc-eager') + >>> print(', '.join(parser_eager._create_training_examples_arc_eager([gold_sent], input_file))) + Number of training examples : 1 + Number of valid (projective) examples : 1 + SHIFT, LEFTARC:ATT, SHIFT, LEFTARC:SBJ, RIGHTARC:ROOT, SHIFT, LEFTARC:ATT, RIGHTARC:OBJ, RIGHTARC:ATT, SHIFT, LEFTARC:ATT, RIGHTARC:PC, REDUCE, REDUCE, REDUCE, RIGHTARC:PU + + >>> parser_eager.train([gold_sent],'temp.arceager.model', verbose=False) + Number of training examples : 1 + Number of valid (projective) examples : 1 + + >>> remove(input_file.name) + + ###################### Check The Parsing Function ######################## + + A. Check the ARC-STANDARD parser + + >>> result = parser_std.parse([gold_sent], 'temp.arcstd.model') + >>> de = DependencyEvaluator(result, [gold_sent]) + >>> de.eval() >= (0, 0) + True + + B. Check the ARC-EAGER parser + >>> result = parser_eager.parse([gold_sent], 'temp.arceager.model') + >>> de = DependencyEvaluator(result, [gold_sent]) + >>> de.eval() >= (0, 0) + True + + Remove test temporary files + >>> remove('temp.arceager.model') + >>> remove('temp.arcstd.model') + + Note that result is very poor because of only one training example. + """ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/parse/util.py b/venv.bak/lib/python3.7/site-packages/nltk/parse/util.py new file mode 100644 index 0000000..6ebe146 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/parse/util.py @@ -0,0 +1,236 @@ +# Natural Language Toolkit: Parser Utility Functions +# +# Author: Ewan Klein +# +# Copyright (C) 2001-2019 NLTK Project +# URL: +# For license information, see LICENSE.TXT + + +""" +Utility functions for parsers. +""" +from __future__ import print_function + +from nltk.grammar import CFG, FeatureGrammar, PCFG +from nltk.data import load + +from nltk.parse.chart import Chart, ChartParser +from nltk.parse.pchart import InsideChartParser +from nltk.parse.featurechart import FeatureChart, FeatureChartParser + + +def load_parser( + grammar_url, trace=0, parser=None, chart_class=None, beam_size=0, **load_args +): + """ + Load a grammar from a file, and build a parser based on that grammar. + The parser depends on the grammar format, and might also depend + on properties of the grammar itself. + + The following grammar formats are currently supported: + - ``'cfg'`` (CFGs: ``CFG``) + - ``'pcfg'`` (probabilistic CFGs: ``PCFG``) + - ``'fcfg'`` (feature-based CFGs: ``FeatureGrammar``) + + :type grammar_url: str + :param grammar_url: A URL specifying where the grammar is located. + The default protocol is ``"nltk:"``, which searches for the file + in the the NLTK data package. + :type trace: int + :param trace: The level of tracing that should be used when + parsing a text. ``0`` will generate no tracing output; + and higher numbers will produce more verbose tracing output. + :param parser: The class used for parsing; should be ``ChartParser`` + or a subclass. + If None, the class depends on the grammar format. + :param chart_class: The class used for storing the chart; + should be ``Chart`` or a subclass. + Only used for CFGs and feature CFGs. + If None, the chart class depends on the grammar format. + :type beam_size: int + :param beam_size: The maximum length for the parser's edge queue. + Only used for probabilistic CFGs. + :param load_args: Keyword parameters used when loading the grammar. + See ``data.load`` for more information. + """ + grammar = load(grammar_url, **load_args) + if not isinstance(grammar, CFG): + raise ValueError("The grammar must be a CFG, " "or a subclass thereof.") + if isinstance(grammar, PCFG): + if parser is None: + parser = InsideChartParser + return parser(grammar, trace=trace, beam_size=beam_size) + + elif isinstance(grammar, FeatureGrammar): + if parser is None: + parser = FeatureChartParser + if chart_class is None: + chart_class = FeatureChart + return parser(grammar, trace=trace, chart_class=chart_class) + + else: # Plain CFG. + if parser is None: + parser = ChartParser + if chart_class is None: + chart_class = Chart + return parser(grammar, trace=trace, chart_class=chart_class) + + +def taggedsent_to_conll(sentence): + """ + A module to convert a single POS tagged sentence into CONLL format. + + >>> from nltk import word_tokenize, pos_tag + >>> text = "This is a foobar sentence." + >>> for line in taggedsent_to_conll(pos_tag(word_tokenize(text))): + ... print(line, end="") + 1 This _ DT DT _ 0 a _ _ + 2 is _ VBZ VBZ _ 0 a _ _ + 3 a _ DT DT _ 0 a _ _ + 4 foobar _ JJ JJ _ 0 a _ _ + 5 sentence _ NN NN _ 0 a _ _ + 6 . _ . . _ 0 a _ _ + + :param sentence: A single input sentence to parse + :type sentence: list(tuple(str, str)) + :rtype: iter(str) + :return: a generator yielding a single sentence in CONLL format. + """ + for (i, (word, tag)) in enumerate(sentence, start=1): + input_str = [str(i), word, '_', tag, tag, '_', '0', 'a', '_', '_'] + input_str = "\t".join(input_str) + "\n" + yield input_str + + +def taggedsents_to_conll(sentences): + """ + A module to convert the a POS tagged document stream + (i.e. list of list of tuples, a list of sentences) and yield lines + in CONLL format. This module yields one line per word and two newlines + for end of sentence. + + >>> from nltk import word_tokenize, sent_tokenize, pos_tag + >>> text = "This is a foobar sentence. Is that right?" + >>> sentences = [pos_tag(word_tokenize(sent)) for sent in sent_tokenize(text)] + >>> for line in taggedsents_to_conll(sentences): + ... if line: + ... print(line, end="") + 1 This _ DT DT _ 0 a _ _ + 2 is _ VBZ VBZ _ 0 a _ _ + 3 a _ DT DT _ 0 a _ _ + 4 foobar _ JJ JJ _ 0 a _ _ + 5 sentence _ NN NN _ 0 a _ _ + 6 . _ . . _ 0 a _ _ + + + 1 Is _ VBZ VBZ _ 0 a _ _ + 2 that _ IN IN _ 0 a _ _ + 3 right _ NN NN _ 0 a _ _ + 4 ? _ . . _ 0 a _ _ + + + + :param sentences: Input sentences to parse + :type sentence: list(list(tuple(str, str))) + :rtype: iter(str) + :return: a generator yielding sentences in CONLL format. + """ + for sentence in sentences: + for input_str in taggedsent_to_conll(sentence): + yield input_str + yield '\n\n' + + +###################################################################### +# { Test Suites +###################################################################### + + +class TestGrammar(object): + """ + Unit tests for CFG. + """ + + def __init__(self, grammar, suite, accept=None, reject=None): + self.test_grammar = grammar + + self.cp = load_parser(grammar, trace=0) + self.suite = suite + self._accept = accept + self._reject = reject + + def run(self, show_trees=False): + """ + Sentences in the test suite are divided into two classes: + - grammatical (``accept``) and + - ungrammatical (``reject``). + If a sentence should parse accordng to the grammar, the value of + ``trees`` will be a non-empty list. If a sentence should be rejected + according to the grammar, then the value of ``trees`` will be None. + """ + for test in self.suite: + print(test['doc'] + ":", end=' ') + for key in ['accept', 'reject']: + for sent in test[key]: + tokens = sent.split() + trees = list(self.cp.parse(tokens)) + if show_trees and trees: + print() + print(sent) + for tree in trees: + print(tree) + if key == 'accept': + if trees == []: + raise ValueError("Sentence '%s' failed to parse'" % sent) + else: + accepted = True + else: + if trees: + raise ValueError("Sentence '%s' received a parse'" % sent) + else: + rejected = True + if accepted and rejected: + print("All tests passed!") + + +def extract_test_sentences(string, comment_chars="#%;", encoding=None): + """ + Parses a string with one test sentence per line. + Lines can optionally begin with: + - a bool, saying if the sentence is grammatical or not, or + - an int, giving the number of parse trees is should have, + The result information is followed by a colon, and then the sentence. + Empty lines and lines beginning with a comment char are ignored. + + :return: a list of tuple of sentences and expected results, + where a sentence is a list of str, + and a result is None, or bool, or int + + :param comment_chars: ``str`` of possible comment characters. + :param encoding: the encoding of the string, if it is binary + """ + if encoding is not None: + string = string.decode(encoding) + sentences = [] + for sentence in string.split('\n'): + if sentence == '' or sentence[0] in comment_chars: + continue + split_info = sentence.split(':', 1) + result = None + if len(split_info) == 2: + if split_info[0] in ['True', 'true', 'False', 'false']: + result = split_info[0] in ['True', 'true'] + sentence = split_info[1] + else: + result = int(split_info[0]) + sentence = split_info[1] + tokens = sentence.split() + if tokens == []: + continue + sentences += [(tokens, result)] + return sentences + + +# nose thinks it is a test +extract_test_sentences.__test__ = False diff --git a/venv.bak/lib/python3.7/site-packages/nltk/parse/viterbi.py b/venv.bak/lib/python3.7/site-packages/nltk/parse/viterbi.py new file mode 100644 index 0000000..7f6217e --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/parse/viterbi.py @@ -0,0 +1,415 @@ +# Natural Language Toolkit: Viterbi Probabilistic Parser +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Edward Loper +# Steven Bird +# URL: +# For license information, see LICENSE.TXT +from __future__ import print_function, unicode_literals + +from functools import reduce +from nltk.tree import Tree, ProbabilisticTree +from nltk.compat import python_2_unicode_compatible + +from nltk.parse.api import ParserI + +##////////////////////////////////////////////////////// +## Viterbi PCFG Parser +##////////////////////////////////////////////////////// + + +@python_2_unicode_compatible +class ViterbiParser(ParserI): + """ + A bottom-up ``PCFG`` parser that uses dynamic programming to find + the single most likely parse for a text. The ``ViterbiParser`` parser + parses texts by filling in a "most likely constituent table". + This table records the most probable tree representation for any + given span and node value. In particular, it has an entry for + every start index, end index, and node value, recording the most + likely subtree that spans from the start index to the end index, + and has the given node value. + + The ``ViterbiParser`` parser fills in this table incrementally. It starts + by filling in all entries for constituents that span one element + of text (i.e., entries where the end index is one greater than the + start index). After it has filled in all table entries for + constituents that span one element of text, it fills in the + entries for constitutants that span two elements of text. It + continues filling in the entries for constituents spanning larger + and larger portions of the text, until the entire table has been + filled. Finally, it returns the table entry for a constituent + spanning the entire text, whose node value is the grammar's start + symbol. + + In order to find the most likely constituent with a given span and + node value, the ``ViterbiParser`` parser considers all productions that + could produce that node value. For each production, it finds all + children that collectively cover the span and have the node values + specified by the production's right hand side. If the probability + of the tree formed by applying the production to the children is + greater than the probability of the current entry in the table, + then the table is updated with this new tree. + + A pseudo-code description of the algorithm used by + ``ViterbiParser`` is: + + | Create an empty most likely constituent table, *MLC*. + | For width in 1...len(text): + | For start in 1...len(text)-width: + | For prod in grammar.productions: + | For each sequence of subtrees [t[1], t[2], ..., t[n]] in MLC, + | where t[i].label()==prod.rhs[i], + | and the sequence covers [start:start+width]: + | old_p = MLC[start, start+width, prod.lhs] + | new_p = P(t[1])P(t[1])...P(t[n])P(prod) + | if new_p > old_p: + | new_tree = Tree(prod.lhs, t[1], t[2], ..., t[n]) + | MLC[start, start+width, prod.lhs] = new_tree + | Return MLC[0, len(text), start_symbol] + + :type _grammar: PCFG + :ivar _grammar: The grammar used to parse sentences. + :type _trace: int + :ivar _trace: The level of tracing output that should be generated + when parsing a text. + """ + + def __init__(self, grammar, trace=0): + """ + Create a new ``ViterbiParser`` parser, that uses ``grammar`` to + parse texts. + + :type grammar: PCFG + :param grammar: The grammar used to parse texts. + :type trace: int + :param trace: The level of tracing that should be used when + parsing a text. ``0`` will generate no tracing output; + and higher numbers will produce more verbose tracing + output. + """ + self._grammar = grammar + self._trace = trace + + def grammar(self): + return self._grammar + + def trace(self, trace=2): + """ + Set the level of tracing output that should be generated when + parsing a text. + + :type trace: int + :param trace: The trace level. A trace level of ``0`` will + generate no tracing output; and higher trace levels will + produce more verbose tracing output. + :rtype: None + """ + self._trace = trace + + def parse(self, tokens): + # Inherit docs from ParserI + + tokens = list(tokens) + self._grammar.check_coverage(tokens) + + # The most likely constituent table. This table specifies the + # most likely constituent for a given span and type. + # Constituents can be either Trees or tokens. For Trees, + # the "type" is the Nonterminal for the tree's root node + # value. For Tokens, the "type" is the token's type. + # The table is stored as a dictionary, since it is sparse. + constituents = {} + + # Initialize the constituents dictionary with the words from + # the text. + if self._trace: + print(('Inserting tokens into the most likely' + ' constituents table...')) + for index in range(len(tokens)): + token = tokens[index] + constituents[index, index + 1, token] = token + if self._trace > 1: + self._trace_lexical_insertion(token, index, len(tokens)) + + # Consider each span of length 1, 2, ..., n; and add any trees + # that might cover that span to the constituents dictionary. + for length in range(1, len(tokens) + 1): + if self._trace: + print( + ( + 'Finding the most likely constituents' + + ' spanning %d text elements...' % length + ) + ) + for start in range(len(tokens) - length + 1): + span = (start, start + length) + self._add_constituents_spanning(span, constituents, tokens) + + # Return the tree that spans the entire text & have the right cat + tree = constituents.get((0, len(tokens), self._grammar.start())) + if tree is not None: + yield tree + + def _add_constituents_spanning(self, span, constituents, tokens): + """ + Find any constituents that might cover ``span``, and add them + to the most likely constituents table. + + :rtype: None + :type span: tuple(int, int) + :param span: The section of the text for which we are + trying to find possible constituents. The span is + specified as a pair of integers, where the first integer + is the index of the first token that should be included in + the constituent; and the second integer is the index of + the first token that should not be included in the + constituent. I.e., the constituent should cover + ``text[span[0]:span[1]]``, where ``text`` is the text + that we are parsing. + + :type constituents: dict(tuple(int,int,Nonterminal) -> ProbabilisticToken or ProbabilisticTree) + :param constituents: The most likely constituents table. This + table records the most probable tree representation for + any given span and node value. In particular, + ``constituents(s,e,nv)`` is the most likely + ``ProbabilisticTree`` that covers ``text[s:e]`` + and has a node value ``nv.symbol()``, where ``text`` + is the text that we are parsing. When + ``_add_constituents_spanning`` is called, ``constituents`` + should contain all possible constituents that are shorter + than ``span``. + + :type tokens: list of tokens + :param tokens: The text we are parsing. This is only used for + trace output. + """ + # Since some of the grammar productions may be unary, we need to + # repeatedly try all of the productions until none of them add any + # new constituents. + changed = True + while changed: + changed = False + + # Find all ways instantiations of the grammar productions that + # cover the span. + instantiations = self._find_instantiations(span, constituents) + + # For each production instantiation, add a new + # ProbabilisticTree whose probability is the product + # of the childrens' probabilities and the production's + # probability. + for (production, children) in instantiations: + subtrees = [c for c in children if isinstance(c, Tree)] + p = reduce(lambda pr, t: pr * t.prob(), subtrees, production.prob()) + node = production.lhs().symbol() + tree = ProbabilisticTree(node, children, prob=p) + + # If it's new a constituent, then add it to the + # constituents dictionary. + c = constituents.get((span[0], span[1], production.lhs())) + if self._trace > 1: + if c is None or c != tree: + if c is None or c.prob() < tree.prob(): + print(' Insert:', end=' ') + else: + print(' Discard:', end=' ') + self._trace_production(production, p, span, len(tokens)) + if c is None or c.prob() < tree.prob(): + constituents[span[0], span[1], production.lhs()] = tree + changed = True + + def _find_instantiations(self, span, constituents): + """ + :return: a list of the production instantiations that cover a + given span of the text. A "production instantiation" is + a tuple containing a production and a list of children, + where the production's right hand side matches the list of + children; and the children cover ``span``. :rtype: list + of ``pair`` of ``Production``, (list of + (``ProbabilisticTree`` or token. + + :type span: tuple(int, int) + :param span: The section of the text for which we are + trying to find production instantiations. The span is + specified as a pair of integers, where the first integer + is the index of the first token that should be covered by + the production instantiation; and the second integer is + the index of the first token that should not be covered by + the production instantiation. + :type constituents: dict(tuple(int,int,Nonterminal) -> ProbabilisticToken or ProbabilisticTree) + :param constituents: The most likely constituents table. This + table records the most probable tree representation for + any given span and node value. See the module + documentation for more information. + """ + rv = [] + for production in self._grammar.productions(): + childlists = self._match_rhs(production.rhs(), span, constituents) + + for childlist in childlists: + rv.append((production, childlist)) + return rv + + def _match_rhs(self, rhs, span, constituents): + """ + :return: a set of all the lists of children that cover ``span`` + and that match ``rhs``. + :rtype: list(list(ProbabilisticTree or token) + + :type rhs: list(Nonterminal or any) + :param rhs: The list specifying what kinds of children need to + cover ``span``. Each nonterminal in ``rhs`` specifies + that the corresponding child should be a tree whose node + value is that nonterminal's symbol. Each terminal in ``rhs`` + specifies that the corresponding child should be a token + whose type is that terminal. + :type span: tuple(int, int) + :param span: The section of the text for which we are + trying to find child lists. The span is specified as a + pair of integers, where the first integer is the index of + the first token that should be covered by the child list; + and the second integer is the index of the first token + that should not be covered by the child list. + :type constituents: dict(tuple(int,int,Nonterminal) -> ProbabilisticToken or ProbabilisticTree) + :param constituents: The most likely constituents table. This + table records the most probable tree representation for + any given span and node value. See the module + documentation for more information. + """ + (start, end) = span + + # Base case + if start >= end and rhs == (): + return [[]] + if start >= end or rhs == (): + return [] + + # Find everything that matches the 1st symbol of the RHS + childlists = [] + for split in range(start, end + 1): + l = constituents.get((start, split, rhs[0])) + if l is not None: + rights = self._match_rhs(rhs[1:], (split, end), constituents) + childlists += [[l] + r for r in rights] + + return childlists + + def _trace_production(self, production, p, span, width): + """ + Print trace output indicating that a given production has been + applied at a given location. + + :param production: The production that has been applied + :type production: Production + :param p: The probability of the tree produced by the production. + :type p: float + :param span: The span of the production + :type span: tuple + :rtype: None + """ + + str = '|' + '.' * span[0] + str += '=' * (span[1] - span[0]) + str += '.' * (width - span[1]) + '| ' + str += '%s' % production + if self._trace > 2: + str = '%-40s %12.10f ' % (str, p) + + print(str) + + def _trace_lexical_insertion(self, token, index, width): + str = ' Insert: |' + '.' * index + '=' + '.' * (width - index - 1) + '| ' + str += '%s' % (token,) + print(str) + + def __repr__(self): + return '' % self._grammar + + +##////////////////////////////////////////////////////// +## Test Code +##////////////////////////////////////////////////////// + + +def demo(): + """ + A demonstration of the probabilistic parsers. The user is + prompted to select which demo to run, and how many parses should + be found; and then each parser is run on the same demo, and a + summary of the results are displayed. + """ + import sys, time + from nltk import tokenize + from nltk.parse import ViterbiParser + from nltk.grammar import toy_pcfg1, toy_pcfg2 + + # Define two demos. Each demo has a sentence and a grammar. + demos = [ + ('I saw the man with my telescope', toy_pcfg1), + ('the boy saw Jack with Bob under the table with a telescope', toy_pcfg2), + ] + + # Ask the user which demo they want to use. + print() + for i in range(len(demos)): + print('%3s: %s' % (i + 1, demos[i][0])) + print(' %r' % demos[i][1]) + print() + print('Which demo (%d-%d)? ' % (1, len(demos)), end=' ') + try: + snum = int(sys.stdin.readline().strip()) - 1 + sent, grammar = demos[snum] + except: + print('Bad sentence number') + return + + # Tokenize the sentence. + tokens = sent.split() + + parser = ViterbiParser(grammar) + all_parses = {} + + print('\nsent: %s\nparser: %s\ngrammar: %s' % (sent, parser, grammar)) + parser.trace(3) + t = time.time() + parses = parser.parse_all(tokens) + time = time.time() - t + average = ( + reduce(lambda a, b: a + b.prob(), parses, 0) / len(parses) if parses else 0 + ) + num_parses = len(parses) + for p in parses: + all_parses[p.freeze()] = 1 + + # Print some summary statistics + print() + print('Time (secs) # Parses Average P(parse)') + print('-----------------------------------------') + print('%11.4f%11d%19.14f' % (time, num_parses, average)) + parses = all_parses.keys() + if parses: + p = reduce(lambda a, b: a + b.prob(), parses, 0) / len(parses) + else: + p = 0 + print('------------------------------------------') + print('%11s%11d%19.14f' % ('n/a', len(parses), p)) + + # Ask the user if we should draw the parses. + print() + print('Draw parses (y/n)? ', end=' ') + if sys.stdin.readline().strip().lower().startswith('y'): + from nltk.draw.tree import draw_trees + + print(' please wait...') + draw_trees(*parses) + + # Ask the user if we should print the parses. + print() + print('Print parses (y/n)? ', end=' ') + if sys.stdin.readline().strip().lower().startswith('y'): + for parse in parses: + print(parse) + + +if __name__ == '__main__': + demo() diff --git a/venv.bak/lib/python3.7/site-packages/nltk/probability.py b/venv.bak/lib/python3.7/site-packages/nltk/probability.py new file mode 100644 index 0000000..9dc110c --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/probability.py @@ -0,0 +1,2539 @@ +# -*- coding: utf-8 -*- +# Natural Language Toolkit: Probability and Statistics +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Edward Loper +# Steven Bird (additions) +# Trevor Cohn (additions) +# Peter Ljunglöf (additions) +# Liang Dong (additions) +# Geoffrey Sampson (additions) +# Ilia Kurenkov (additions) +# +# URL: +# For license information, see LICENSE.TXT + +""" +Classes for representing and processing probabilistic information. + +The ``FreqDist`` class is used to encode "frequency distributions", +which count the number of times that each outcome of an experiment +occurs. + +The ``ProbDistI`` class defines a standard interface for "probability +distributions", which encode the probability of each outcome for an +experiment. There are two types of probability distribution: + + - "derived probability distributions" are created from frequency + distributions. They attempt to model the probability distribution + that generated the frequency distribution. + - "analytic probability distributions" are created directly from + parameters (such as variance). + +The ``ConditionalFreqDist`` class and ``ConditionalProbDistI`` interface +are used to encode conditional distributions. Conditional probability +distributions can be derived or analytic; but currently the only +implementation of the ``ConditionalProbDistI`` interface is +``ConditionalProbDist``, a derived distribution. + +""" +from __future__ import print_function, unicode_literals, division + +import math +import random +import warnings +import array +from collections import defaultdict, Counter +from functools import reduce +from abc import ABCMeta, abstractmethod + +from six import itervalues, text_type, add_metaclass + +from nltk import compat +from nltk.internals import raise_unorderable_types + +_NINF = float('-1e300') + +##////////////////////////////////////////////////////// +## Frequency Distributions +##////////////////////////////////////////////////////// + + +@compat.python_2_unicode_compatible +class FreqDist(Counter): + """ + A frequency distribution for the outcomes of an experiment. A + frequency distribution records the number of times each outcome of + an experiment has occurred. For example, a frequency distribution + could be used to record the frequency of each word type in a + document. Formally, a frequency distribution can be defined as a + function mapping from each sample to the number of times that + sample occurred as an outcome. + + Frequency distributions are generally constructed by running a + number of experiments, and incrementing the count for a sample + every time it is an outcome of an experiment. For example, the + following code will produce a frequency distribution that encodes + how often each word occurs in a text: + + >>> from nltk.tokenize import word_tokenize + >>> from nltk.probability import FreqDist + >>> sent = 'This is an example sentence' + >>> fdist = FreqDist() + >>> for word in word_tokenize(sent): + ... fdist[word.lower()] += 1 + + An equivalent way to do this is with the initializer: + + >>> fdist = FreqDist(word.lower() for word in word_tokenize(sent)) + + """ + + def __init__(self, samples=None): + """ + Construct a new frequency distribution. If ``samples`` is + given, then the frequency distribution will be initialized + with the count of each object in ``samples``; otherwise, it + will be initialized to be empty. + + In particular, ``FreqDist()`` returns an empty frequency + distribution; and ``FreqDist(samples)`` first creates an empty + frequency distribution, and then calls ``update`` with the + list ``samples``. + + :param samples: The samples to initialize the frequency + distribution with. + :type samples: Sequence + """ + Counter.__init__(self, samples) + + # Cached number of samples in this FreqDist + self._N = None + + def N(self): + """ + Return the total number of sample outcomes that have been + recorded by this FreqDist. For the number of unique + sample values (or bins) with counts greater than zero, use + ``FreqDist.B()``. + + :rtype: int + """ + if self._N is None: + # Not already cached, or cache has been invalidated + self._N = sum(self.values()) + return self._N + + def __setitem__(self, key, val): + """ + Override ``Counter.__setitem__()`` to invalidate the cached N + """ + self._N = None + super(FreqDist, self).__setitem__(key, val) + + def __delitem__(self, key): + """ + Override ``Counter.__delitem__()`` to invalidate the cached N + """ + self._N = None + super(FreqDist, self).__delitem__(key) + + def update(self, *args, **kwargs): + """ + Override ``Counter.update()`` to invalidate the cached N + """ + self._N = None + super(FreqDist, self).update(*args, **kwargs) + + def setdefault(self, key, val): + """ + Override ``Counter.setdefault()`` to invalidate the cached N + """ + self._N = None + super(FreqDist, self).setdefault(key, val) + + def B(self): + """ + Return the total number of sample values (or "bins") that + have counts greater than zero. For the total + number of sample outcomes recorded, use ``FreqDist.N()``. + (FreqDist.B() is the same as len(FreqDist).) + + :rtype: int + """ + return len(self) + + def hapaxes(self): + """ + Return a list of all samples that occur once (hapax legomena) + + :rtype: list + """ + return [item for item in self if self[item] == 1] + + def Nr(self, r, bins=None): + return self.r_Nr(bins)[r] + + def r_Nr(self, bins=None): + """ + Return the dictionary mapping r to Nr, the number of samples with frequency r, where Nr > 0. + + :type bins: int + :param bins: The number of possible sample outcomes. ``bins`` + is used to calculate Nr(0). In particular, Nr(0) is + ``bins-self.B()``. If ``bins`` is not specified, it + defaults to ``self.B()`` (so Nr(0) will be 0). + :rtype: int + """ + + _r_Nr = defaultdict(int) + for count in self.values(): + _r_Nr[count] += 1 + + # Special case for Nr[0]: + _r_Nr[0] = bins - self.B() if bins is not None else 0 + + return _r_Nr + + def _cumulative_frequencies(self, samples): + """ + Return the cumulative frequencies of the specified samples. + If no samples are specified, all counts are returned, starting + with the largest. + + :param samples: the samples whose frequencies should be returned. + :type samples: any + :rtype: list(float) + """ + cf = 0.0 + for sample in samples: + cf += self[sample] + yield cf + + # slightly odd nomenclature freq() if FreqDist does counts and ProbDist does probs, + # here, freq() does probs + def freq(self, sample): + """ + Return the frequency of a given sample. The frequency of a + sample is defined as the count of that sample divided by the + total number of sample outcomes that have been recorded by + this FreqDist. The count of a sample is defined as the + number of times that sample outcome was recorded by this + FreqDist. Frequencies are always real numbers in the range + [0, 1]. + + :param sample: the sample whose frequency + should be returned. + :type sample: any + :rtype: float + """ + n = self.N() + if n == 0: + return 0 + return self[sample] / n + + def max(self): + """ + Return the sample with the greatest number of outcomes in this + frequency distribution. If two or more samples have the same + number of outcomes, return one of them; which sample is + returned is undefined. If no outcomes have occurred in this + frequency distribution, return None. + + :return: The sample with the maximum number of outcomes in this + frequency distribution. + :rtype: any or None + """ + if len(self) == 0: + raise ValueError( + 'A FreqDist must have at least one sample before max is defined.' + ) + return self.most_common(1)[0][0] + + def plot(self, *args, **kwargs): + """ + Plot samples from the frequency distribution + displaying the most frequent sample first. If an integer + parameter is supplied, stop after this many samples have been + plotted. For a cumulative plot, specify cumulative=True. + (Requires Matplotlib to be installed.) + + :param title: The title for the graph + :type title: str + :param cumulative: A flag to specify whether the plot is cumulative (default = False) + :type title: bool + """ + try: + import matplotlib.pyplot as plt + except ImportError: + raise ValueError( + 'The plot function requires matplotlib to be installed.' + 'See http://matplotlib.org/' + ) + + if len(args) == 0: + args = [len(self)] + samples = [item for item, _ in self.most_common(*args)] + + cumulative = _get_kwarg(kwargs, 'cumulative', False) + percents = _get_kwarg(kwargs, 'percents', False) + if cumulative: + freqs = list(self._cumulative_frequencies(samples)) + ylabel = "Cumulative Counts" + if percents: + freqs = [f / freqs[len(freqs) - 1] * 100 for f in freqs] + ylabel = "Cumulative Percents" + else: + freqs = [self[sample] for sample in samples] + ylabel = "Counts" + # percents = [f * 100 for f in freqs] only in ProbDist? + + ax = plt.gca() + ax.grid(True, color = "silver") + + if "linewidth" not in kwargs: + kwargs["linewidth"] = 2 + if "title" in kwargs: + ax.set_title(kwargs["title"]) + del kwargs["title"] + + ax.plot(freqs, **kwargs) + ax.set_xticks(range(len(samples))) + ax.set_xticklabels([text_type(s) for s in samples], rotation=90) + ax.set_xlabel("Samples") + ax.set_ylabel(ylabel) + + plt.show() + + return ax + + def tabulate(self, *args, **kwargs): + """ + Tabulate the given samples from the frequency distribution (cumulative), + displaying the most frequent sample first. If an integer + parameter is supplied, stop after this many samples have been + plotted. + + :param samples: The samples to plot (default is all samples) + :type samples: list + :param cumulative: A flag to specify whether the freqs are cumulative (default = False) + :type title: bool + """ + if len(args) == 0: + args = [len(self)] + samples = [item for item, _ in self.most_common(*args)] + + cumulative = _get_kwarg(kwargs, 'cumulative', False) + if cumulative: + freqs = list(self._cumulative_frequencies(samples)) + else: + freqs = [self[sample] for sample in samples] + # percents = [f * 100 for f in freqs] only in ProbDist? + + width = max(len("%s" % s) for s in samples) + width = max(width, max(len("%d" % f) for f in freqs)) + + for i in range(len(samples)): + print("%*s" % (width, samples[i]), end=' ') + print() + for i in range(len(samples)): + print("%*d" % (width, freqs[i]), end=' ') + print() + + def copy(self): + """ + Create a copy of this frequency distribution. + + :rtype: FreqDist + """ + return self.__class__(self) + + # Mathematical operatiors + + def __add__(self, other): + """ + Add counts from two counters. + + >>> FreqDist('abbb') + FreqDist('bcc') + FreqDist({'b': 4, 'c': 2, 'a': 1}) + + """ + return self.__class__(super(FreqDist, self).__add__(other)) + + def __sub__(self, other): + """ + Subtract count, but keep only results with positive counts. + + >>> FreqDist('abbbc') - FreqDist('bccd') + FreqDist({'b': 2, 'a': 1}) + + """ + return self.__class__(super(FreqDist, self).__sub__(other)) + + def __or__(self, other): + """ + Union is the maximum of value in either of the input counters. + + >>> FreqDist('abbb') | FreqDist('bcc') + FreqDist({'b': 3, 'c': 2, 'a': 1}) + + """ + return self.__class__(super(FreqDist, self).__or__(other)) + + def __and__(self, other): + """ + Intersection is the minimum of corresponding counts. + + >>> FreqDist('abbb') & FreqDist('bcc') + FreqDist({'b': 1}) + + """ + return self.__class__(super(FreqDist, self).__and__(other)) + + def __le__(self, other): + if not isinstance(other, FreqDist): + raise_unorderable_types("<=", self, other) + return set(self).issubset(other) and all( + self[key] <= other[key] for key in self + ) + + # @total_ordering doesn't work here, since the class inherits from a builtin class + __ge__ = lambda self, other: not self <= other or self == other + __lt__ = lambda self, other: self <= other and not self == other + __gt__ = lambda self, other: not self <= other + + def __repr__(self): + """ + Return a string representation of this FreqDist. + + :rtype: string + """ + return self.pformat() + + def pprint(self, maxlen=10, stream=None): + """ + Print a string representation of this FreqDist to 'stream' + + :param maxlen: The maximum number of items to print + :type maxlen: int + :param stream: The stream to print to. stdout by default + """ + print(self.pformat(maxlen=maxlen), file=stream) + + def pformat(self, maxlen=10): + """ + Return a string representation of this FreqDist. + + :param maxlen: The maximum number of items to display + :type maxlen: int + :rtype: string + """ + items = ['{0!r}: {1!r}'.format(*item) for item in self.most_common(maxlen)] + if len(self) > maxlen: + items.append('...') + return 'FreqDist({{{0}}})'.format(', '.join(items)) + + def __str__(self): + """ + Return a string representation of this FreqDist. + + :rtype: string + """ + return '' % (len(self), self.N()) + + +##////////////////////////////////////////////////////// +## Probability Distributions +##////////////////////////////////////////////////////// + + +@add_metaclass(ABCMeta) +class ProbDistI(object): + """ + A probability distribution for the outcomes of an experiment. A + probability distribution specifies how likely it is that an + experiment will have any given outcome. For example, a + probability distribution could be used to predict the probability + that a token in a document will have a given type. Formally, a + probability distribution can be defined as a function mapping from + samples to nonnegative real numbers, such that the sum of every + number in the function's range is 1.0. A ``ProbDist`` is often + used to model the probability distribution of the experiment used + to generate a frequency distribution. + """ + + SUM_TO_ONE = True + """True if the probabilities of the samples in this probability + distribution will always sum to one.""" + + @abstractmethod + def __init__(self): + """ + Classes inheriting from ProbDistI should implement __init__. + """ + + @abstractmethod + def prob(self, sample): + """ + Return the probability for a given sample. Probabilities + are always real numbers in the range [0, 1]. + + :param sample: The sample whose probability + should be returned. + :type sample: any + :rtype: float + """ + + def logprob(self, sample): + """ + Return the base 2 logarithm of the probability for a given sample. + + :param sample: The sample whose probability + should be returned. + :type sample: any + :rtype: float + """ + # Default definition, in terms of prob() + p = self.prob(sample) + return math.log(p, 2) if p != 0 else _NINF + + @abstractmethod + def max(self): + """ + Return the sample with the greatest probability. If two or + more samples have the same probability, return one of them; + which sample is returned is undefined. + + :rtype: any + """ + + @abstractmethod + def samples(self): + """ + Return a list of all samples that have nonzero probabilities. + Use ``prob`` to find the probability of each sample. + + :rtype: list + """ + + # cf self.SUM_TO_ONE + def discount(self): + """ + Return the ratio by which counts are discounted on average: c*/c + + :rtype: float + """ + return 0.0 + + # Subclasses should define more efficient implementations of this, + # where possible. + def generate(self): + """ + Return a randomly selected sample from this probability distribution. + The probability of returning each sample ``samp`` is equal to + ``self.prob(samp)``. + """ + p = random.random() + p_init = p + for sample in self.samples(): + p -= self.prob(sample) + if p <= 0: + return sample + # allow for some rounding error: + if p < 0.0001: + return sample + # we *should* never get here + if self.SUM_TO_ONE: + warnings.warn( + "Probability distribution %r sums to %r; generate()" + " is returning an arbitrary sample." % (self, p_init - p) + ) + return random.choice(list(self.samples())) + + +@compat.python_2_unicode_compatible +class UniformProbDist(ProbDistI): + """ + A probability distribution that assigns equal probability to each + sample in a given set; and a zero probability to all other + samples. + """ + + def __init__(self, samples): + """ + Construct a new uniform probability distribution, that assigns + equal probability to each sample in ``samples``. + + :param samples: The samples that should be given uniform + probability. + :type samples: list + :raise ValueError: If ``samples`` is empty. + """ + if len(samples) == 0: + raise ValueError( + 'A Uniform probability distribution must ' + 'have at least one sample.' + ) + self._sampleset = set(samples) + self._prob = 1.0 / len(self._sampleset) + self._samples = list(self._sampleset) + + def prob(self, sample): + return self._prob if sample in self._sampleset else 0 + + def max(self): + return self._samples[0] + + def samples(self): + return self._samples + + def __repr__(self): + return '' % len(self._sampleset) + + +@compat.python_2_unicode_compatible +class RandomProbDist(ProbDistI): + """ + Generates a random probability distribution whereby each sample + will be between 0 and 1 with equal probability (uniform random distribution. + Also called a continuous uniform distribution). + """ + + def __init__(self, samples): + if len(samples) == 0: + raise ValueError( + 'A probability distribution must ' + 'have at least one sample.' + ) + self._probs = self.unirand(samples) + self._samples = list(self._probs.keys()) + + @classmethod + def unirand(cls, samples): + """ + The key function that creates a randomized initial distribution + that still sums to 1. Set as a dictionary of prob values so that + it can still be passed to MutableProbDist and called with identical + syntax to UniformProbDist + """ + samples = set(samples) + randrow = [random.random() for i in range(len(samples))] + total = sum(randrow) + for i, x in enumerate(randrow): + randrow[i] = x / total + + total = sum(randrow) + if total != 1: + # this difference, if present, is so small (near NINF) that it + # can be subtracted from any element without risking probs not (0 1) + randrow[-1] -= total - 1 + + return dict((s, randrow[i]) for i, s in enumerate(samples)) + + def max(self): + if not hasattr(self, '_max'): + self._max = max((p, v) for (v, p) in self._probs.items())[1] + return self._max + + def prob(self, sample): + return self._probs.get(sample, 0) + + def samples(self): + return self._samples + + def __repr__(self): + return '' % len(self._probs) + + +@compat.python_2_unicode_compatible +class DictionaryProbDist(ProbDistI): + """ + A probability distribution whose probabilities are directly + specified by a given dictionary. The given dictionary maps + samples to probabilities. + """ + + def __init__(self, prob_dict=None, log=False, normalize=False): + """ + Construct a new probability distribution from the given + dictionary, which maps values to probabilities (or to log + probabilities, if ``log`` is true). If ``normalize`` is + true, then the probability values are scaled by a constant + factor such that they sum to 1. + + If called without arguments, the resulting probability + distribution assigns zero probability to all values. + """ + + self._prob_dict = prob_dict.copy() if prob_dict is not None else {} + self._log = log + + # Normalize the distribution, if requested. + if normalize: + if len(prob_dict) == 0: + raise ValueError( + 'A DictionaryProbDist must have at least one sample ' + + 'before it can be normalized.' + ) + if log: + value_sum = sum_logs(list(self._prob_dict.values())) + if value_sum <= _NINF: + logp = math.log(1.0 / len(prob_dict), 2) + for x in prob_dict: + self._prob_dict[x] = logp + else: + for (x, p) in self._prob_dict.items(): + self._prob_dict[x] -= value_sum + else: + value_sum = sum(self._prob_dict.values()) + if value_sum == 0: + p = 1.0 / len(prob_dict) + for x in prob_dict: + self._prob_dict[x] = p + else: + norm_factor = 1.0 / value_sum + for (x, p) in self._prob_dict.items(): + self._prob_dict[x] *= norm_factor + + def prob(self, sample): + if self._log: + return 2 ** (self._prob_dict[sample]) if sample in self._prob_dict else 0 + else: + return self._prob_dict.get(sample, 0) + + def logprob(self, sample): + if self._log: + return self._prob_dict.get(sample, _NINF) + else: + if sample not in self._prob_dict: + return _NINF + elif self._prob_dict[sample] == 0: + return _NINF + else: + return math.log(self._prob_dict[sample], 2) + + def max(self): + if not hasattr(self, '_max'): + self._max = max((p, v) for (v, p) in self._prob_dict.items())[1] + return self._max + + def samples(self): + return self._prob_dict.keys() + + def __repr__(self): + return '' % len(self._prob_dict) + + +@compat.python_2_unicode_compatible +class MLEProbDist(ProbDistI): + """ + The maximum likelihood estimate for the probability distribution + of the experiment used to generate a frequency distribution. The + "maximum likelihood estimate" approximates the probability of + each sample as the frequency of that sample in the frequency + distribution. + """ + + def __init__(self, freqdist, bins=None): + """ + Use the maximum likelihood estimate to create a probability + distribution for the experiment used to generate ``freqdist``. + + :type freqdist: FreqDist + :param freqdist: The frequency distribution that the + probability estimates should be based on. + """ + self._freqdist = freqdist + + def freqdist(self): + """ + Return the frequency distribution that this probability + distribution is based on. + + :rtype: FreqDist + """ + return self._freqdist + + def prob(self, sample): + return self._freqdist.freq(sample) + + def max(self): + return self._freqdist.max() + + def samples(self): + return self._freqdist.keys() + + def __repr__(self): + """ + :rtype: str + :return: A string representation of this ``ProbDist``. + """ + return '' % self._freqdist.N() + + +@compat.python_2_unicode_compatible +class LidstoneProbDist(ProbDistI): + """ + The Lidstone estimate for the probability distribution of the + experiment used to generate a frequency distribution. The + "Lidstone estimate" is parameterized by a real number *gamma*, + which typically ranges from 0 to 1. The Lidstone estimate + approximates the probability of a sample with count *c* from an + experiment with *N* outcomes and *B* bins as + ``c+gamma)/(N+B*gamma)``. This is equivalent to adding + *gamma* to the count for each bin, and taking the maximum + likelihood estimate of the resulting frequency distribution. + """ + + SUM_TO_ONE = False + + def __init__(self, freqdist, gamma, bins=None): + """ + Use the Lidstone estimate to create a probability distribution + for the experiment used to generate ``freqdist``. + + :type freqdist: FreqDist + :param freqdist: The frequency distribution that the + probability estimates should be based on. + :type gamma: float + :param gamma: A real number used to parameterize the + estimate. The Lidstone estimate is equivalent to adding + *gamma* to the count for each bin, and taking the + maximum likelihood estimate of the resulting frequency + distribution. + :type bins: int + :param bins: The number of sample values that can be generated + by the experiment that is described by the probability + distribution. This value must be correctly set for the + probabilities of the sample values to sum to one. If + ``bins`` is not specified, it defaults to ``freqdist.B()``. + """ + if (bins == 0) or (bins is None and freqdist.N() == 0): + name = self.__class__.__name__[:-8] + raise ValueError( + 'A %s probability distribution ' % name + 'must have at least one bin.' + ) + if (bins is not None) and (bins < freqdist.B()): + name = self.__class__.__name__[:-8] + raise ValueError( + '\nThe number of bins in a %s distribution ' % name + + '(%d) must be greater than or equal to\n' % bins + + 'the number of bins in the FreqDist used ' + + 'to create it (%d).' % freqdist.B() + ) + + self._freqdist = freqdist + self._gamma = float(gamma) + self._N = self._freqdist.N() + + if bins is None: + bins = freqdist.B() + self._bins = bins + + self._divisor = self._N + bins * gamma + if self._divisor == 0.0: + # In extreme cases we force the probability to be 0, + # which it will be, since the count will be 0: + self._gamma = 0 + self._divisor = 1 + + def freqdist(self): + """ + Return the frequency distribution that this probability + distribution is based on. + + :rtype: FreqDist + """ + return self._freqdist + + def prob(self, sample): + c = self._freqdist[sample] + return (c + self._gamma) / self._divisor + + def max(self): + # For Lidstone distributions, probability is monotonic with + # frequency, so the most probable sample is the one that + # occurs most frequently. + return self._freqdist.max() + + def samples(self): + return self._freqdist.keys() + + def discount(self): + gb = self._gamma * self._bins + return gb / (self._N + gb) + + def __repr__(self): + """ + Return a string representation of this ``ProbDist``. + + :rtype: str + """ + return '' % self._freqdist.N() + + +@compat.python_2_unicode_compatible +class LaplaceProbDist(LidstoneProbDist): + """ + The Laplace estimate for the probability distribution of the + experiment used to generate a frequency distribution. The + "Laplace estimate" approximates the probability of a sample with + count *c* from an experiment with *N* outcomes and *B* bins as + *(c+1)/(N+B)*. This is equivalent to adding one to the count for + each bin, and taking the maximum likelihood estimate of the + resulting frequency distribution. + """ + + def __init__(self, freqdist, bins=None): + """ + Use the Laplace estimate to create a probability distribution + for the experiment used to generate ``freqdist``. + + :type freqdist: FreqDist + :param freqdist: The frequency distribution that the + probability estimates should be based on. + :type bins: int + :param bins: The number of sample values that can be generated + by the experiment that is described by the probability + distribution. This value must be correctly set for the + probabilities of the sample values to sum to one. If + ``bins`` is not specified, it defaults to ``freqdist.B()``. + """ + LidstoneProbDist.__init__(self, freqdist, 1, bins) + + def __repr__(self): + """ + :rtype: str + :return: A string representation of this ``ProbDist``. + """ + return '' % self._freqdist.N() + + +@compat.python_2_unicode_compatible +class ELEProbDist(LidstoneProbDist): + """ + The expected likelihood estimate for the probability distribution + of the experiment used to generate a frequency distribution. The + "expected likelihood estimate" approximates the probability of a + sample with count *c* from an experiment with *N* outcomes and + *B* bins as *(c+0.5)/(N+B/2)*. This is equivalent to adding 0.5 + to the count for each bin, and taking the maximum likelihood + estimate of the resulting frequency distribution. + """ + + def __init__(self, freqdist, bins=None): + """ + Use the expected likelihood estimate to create a probability + distribution for the experiment used to generate ``freqdist``. + + :type freqdist: FreqDist + :param freqdist: The frequency distribution that the + probability estimates should be based on. + :type bins: int + :param bins: The number of sample values that can be generated + by the experiment that is described by the probability + distribution. This value must be correctly set for the + probabilities of the sample values to sum to one. If + ``bins`` is not specified, it defaults to ``freqdist.B()``. + """ + LidstoneProbDist.__init__(self, freqdist, 0.5, bins) + + def __repr__(self): + """ + Return a string representation of this ``ProbDist``. + + :rtype: str + """ + return '' % self._freqdist.N() + + +@compat.python_2_unicode_compatible +class HeldoutProbDist(ProbDistI): + """ + The heldout estimate for the probability distribution of the + experiment used to generate two frequency distributions. These + two frequency distributions are called the "heldout frequency + distribution" and the "base frequency distribution." The + "heldout estimate" uses uses the "heldout frequency + distribution" to predict the probability of each sample, given its + frequency in the "base frequency distribution". + + In particular, the heldout estimate approximates the probability + for a sample that occurs *r* times in the base distribution as + the average frequency in the heldout distribution of all samples + that occur *r* times in the base distribution. + + This average frequency is *Tr[r]/(Nr[r].N)*, where: + + - *Tr[r]* is the total count in the heldout distribution for + all samples that occur *r* times in the base distribution. + - *Nr[r]* is the number of samples that occur *r* times in + the base distribution. + - *N* is the number of outcomes recorded by the heldout + frequency distribution. + + In order to increase the efficiency of the ``prob`` member + function, *Tr[r]/(Nr[r].N)* is precomputed for each value of *r* + when the ``HeldoutProbDist`` is created. + + :type _estimate: list(float) + :ivar _estimate: A list mapping from *r*, the number of + times that a sample occurs in the base distribution, to the + probability estimate for that sample. ``_estimate[r]`` is + calculated by finding the average frequency in the heldout + distribution of all samples that occur *r* times in the base + distribution. In particular, ``_estimate[r]`` = + *Tr[r]/(Nr[r].N)*. + :type _max_r: int + :ivar _max_r: The maximum number of times that any sample occurs + in the base distribution. ``_max_r`` is used to decide how + large ``_estimate`` must be. + """ + + SUM_TO_ONE = False + + def __init__(self, base_fdist, heldout_fdist, bins=None): + """ + Use the heldout estimate to create a probability distribution + for the experiment used to generate ``base_fdist`` and + ``heldout_fdist``. + + :type base_fdist: FreqDist + :param base_fdist: The base frequency distribution. + :type heldout_fdist: FreqDist + :param heldout_fdist: The heldout frequency distribution. + :type bins: int + :param bins: The number of sample values that can be generated + by the experiment that is described by the probability + distribution. This value must be correctly set for the + probabilities of the sample values to sum to one. If + ``bins`` is not specified, it defaults to ``freqdist.B()``. + """ + + self._base_fdist = base_fdist + self._heldout_fdist = heldout_fdist + + # The max number of times any sample occurs in base_fdist. + self._max_r = base_fdist[base_fdist.max()] + + # Calculate Tr, Nr, and N. + Tr = self._calculate_Tr() + r_Nr = base_fdist.r_Nr(bins) + Nr = [r_Nr[r] for r in range(self._max_r + 1)] + N = heldout_fdist.N() + + # Use Tr, Nr, and N to compute the probability estimate for + # each value of r. + self._estimate = self._calculate_estimate(Tr, Nr, N) + + def _calculate_Tr(self): + """ + Return the list *Tr*, where *Tr[r]* is the total count in + ``heldout_fdist`` for all samples that occur *r* + times in ``base_fdist``. + + :rtype: list(float) + """ + Tr = [0.0] * (self._max_r + 1) + for sample in self._heldout_fdist: + r = self._base_fdist[sample] + Tr[r] += self._heldout_fdist[sample] + return Tr + + def _calculate_estimate(self, Tr, Nr, N): + """ + Return the list *estimate*, where *estimate[r]* is the probability + estimate for any sample that occurs *r* times in the base frequency + distribution. In particular, *estimate[r]* is *Tr[r]/(N[r].N)*. + In the special case that *N[r]=0*, *estimate[r]* will never be used; + so we define *estimate[r]=None* for those cases. + + :rtype: list(float) + :type Tr: list(float) + :param Tr: the list *Tr*, where *Tr[r]* is the total count in + the heldout distribution for all samples that occur *r* + times in base distribution. + :type Nr: list(float) + :param Nr: The list *Nr*, where *Nr[r]* is the number of + samples that occur *r* times in the base distribution. + :type N: int + :param N: The total number of outcomes recorded by the heldout + frequency distribution. + """ + estimate = [] + for r in range(self._max_r + 1): + if Nr[r] == 0: + estimate.append(None) + else: + estimate.append(Tr[r] / (Nr[r] * N)) + return estimate + + def base_fdist(self): + """ + Return the base frequency distribution that this probability + distribution is based on. + + :rtype: FreqDist + """ + return self._base_fdist + + def heldout_fdist(self): + """ + Return the heldout frequency distribution that this + probability distribution is based on. + + :rtype: FreqDist + """ + return self._heldout_fdist + + def samples(self): + return self._base_fdist.keys() + + def prob(self, sample): + # Use our precomputed probability estimate. + r = self._base_fdist[sample] + return self._estimate[r] + + def max(self): + # Note: the Heldout estimation is *not* necessarily monotonic; + # so this implementation is currently broken. However, it + # should give the right answer *most* of the time. :) + return self._base_fdist.max() + + def discount(self): + raise NotImplementedError() + + def __repr__(self): + """ + :rtype: str + :return: A string representation of this ``ProbDist``. + """ + s = '' + return s % (self._base_fdist.N(), self._heldout_fdist.N()) + + +@compat.python_2_unicode_compatible +class CrossValidationProbDist(ProbDistI): + """ + The cross-validation estimate for the probability distribution of + the experiment used to generate a set of frequency distribution. + The "cross-validation estimate" for the probability of a sample + is found by averaging the held-out estimates for the sample in + each pair of frequency distributions. + """ + + SUM_TO_ONE = False + + def __init__(self, freqdists, bins): + """ + Use the cross-validation estimate to create a probability + distribution for the experiment used to generate + ``freqdists``. + + :type freqdists: list(FreqDist) + :param freqdists: A list of the frequency distributions + generated by the experiment. + :type bins: int + :param bins: The number of sample values that can be generated + by the experiment that is described by the probability + distribution. This value must be correctly set for the + probabilities of the sample values to sum to one. If + ``bins`` is not specified, it defaults to ``freqdist.B()``. + """ + self._freqdists = freqdists + + # Create a heldout probability distribution for each pair of + # frequency distributions in freqdists. + self._heldout_probdists = [] + for fdist1 in freqdists: + for fdist2 in freqdists: + if fdist1 is not fdist2: + probdist = HeldoutProbDist(fdist1, fdist2, bins) + self._heldout_probdists.append(probdist) + + def freqdists(self): + """ + Return the list of frequency distributions that this ``ProbDist`` is based on. + + :rtype: list(FreqDist) + """ + return self._freqdists + + def samples(self): + # [xx] nb: this is not too efficient + return set(sum([list(fd) for fd in self._freqdists], [])) + + def prob(self, sample): + # Find the average probability estimate returned by each + # heldout distribution. + prob = 0.0 + for heldout_probdist in self._heldout_probdists: + prob += heldout_probdist.prob(sample) + return prob / len(self._heldout_probdists) + + def discount(self): + raise NotImplementedError() + + def __repr__(self): + """ + Return a string representation of this ``ProbDist``. + + :rtype: str + """ + return '' % len(self._freqdists) + + +@compat.python_2_unicode_compatible +class WittenBellProbDist(ProbDistI): + """ + The Witten-Bell estimate of a probability distribution. This distribution + allocates uniform probability mass to as yet unseen events by using the + number of events that have only been seen once. The probability mass + reserved for unseen events is equal to *T / (N + T)* + where *T* is the number of observed event types and *N* is the total + number of observed events. This equates to the maximum likelihood estimate + of a new type event occurring. The remaining probability mass is discounted + such that all probability estimates sum to one, yielding: + + - *p = T / Z (N + T)*, if count = 0 + - *p = c / (N + T)*, otherwise + """ + + def __init__(self, freqdist, bins=None): + """ + Creates a distribution of Witten-Bell probability estimates. This + distribution allocates uniform probability mass to as yet unseen + events by using the number of events that have only been seen once. The + probability mass reserved for unseen events is equal to *T / (N + T)* + where *T* is the number of observed event types and *N* is the total + number of observed events. This equates to the maximum likelihood + estimate of a new type event occurring. The remaining probability mass + is discounted such that all probability estimates sum to one, + yielding: + + - *p = T / Z (N + T)*, if count = 0 + - *p = c / (N + T)*, otherwise + + The parameters *T* and *N* are taken from the ``freqdist`` parameter + (the ``B()`` and ``N()`` values). The normalizing factor *Z* is + calculated using these values along with the ``bins`` parameter. + + :param freqdist: The frequency counts upon which to base the + estimation. + :type freqdist: FreqDist + :param bins: The number of possible event types. This must be at least + as large as the number of bins in the ``freqdist``. If None, then + it's assumed to be equal to that of the ``freqdist`` + :type bins: int + """ + assert bins is None or bins >= freqdist.B(), ( + 'bins parameter must not be less than %d=freqdist.B()' % freqdist.B() + ) + if bins is None: + bins = freqdist.B() + self._freqdist = freqdist + self._T = self._freqdist.B() + self._Z = bins - self._freqdist.B() + self._N = self._freqdist.N() + # self._P0 is P(0), precalculated for efficiency: + if self._N == 0: + # if freqdist is empty, we approximate P(0) by a UniformProbDist: + self._P0 = 1.0 / self._Z + else: + self._P0 = self._T / (self._Z * (self._N + self._T)) + + def prob(self, sample): + # inherit docs from ProbDistI + c = self._freqdist[sample] + return c / (self._N + self._T) if c != 0 else self._P0 + + def max(self): + return self._freqdist.max() + + def samples(self): + return self._freqdist.keys() + + def freqdist(self): + return self._freqdist + + def discount(self): + raise NotImplementedError() + + def __repr__(self): + """ + Return a string representation of this ``ProbDist``. + + :rtype: str + """ + return '' % self._freqdist.N() + + +##////////////////////////////////////////////////////// +## Good-Turing Probability Distributions +##////////////////////////////////////////////////////// + +# Good-Turing frequency estimation was contributed by Alan Turing and +# his statistical assistant I.J. Good, during their collaboration in +# the WWII. It is a statistical technique for predicting the +# probability of occurrence of objects belonging to an unknown number +# of species, given past observations of such objects and their +# species. (In drawing balls from an urn, the 'objects' would be balls +# and the 'species' would be the distinct colors of the balls (finite +# but unknown in number). +# +# Good-Turing method calculates the probability mass to assign to +# events with zero or low counts based on the number of events with +# higher counts. It does so by using the adjusted count *c\**: +# +# - *c\* = (c + 1) N(c + 1) / N(c)* for c >= 1 +# - *things with frequency zero in training* = N(1) for c == 0 +# +# where *c* is the original count, *N(i)* is the number of event types +# observed with count *i*. We can think the count of unseen as the count +# of frequency one (see Jurafsky & Martin 2nd Edition, p101). +# +# This method is problematic because the situation ``N(c+1) == 0`` +# is quite common in the original Good-Turing estimation; smoothing or +# interpolation of *N(i)* values is essential in practice. +# +# Bill Gale and Geoffrey Sampson present a simple and effective approach, +# Simple Good-Turing. As a smoothing curve they simply use a power curve: +# +# Nr = a*r^b (with b < -1 to give the appropriate hyperbolic +# relationship) +# +# They estimate a and b by simple linear regression technique on the +# logarithmic form of the equation: +# +# log Nr = a + b*log(r) +# +# However, they suggest that such a simple curve is probably only +# appropriate for high values of r. For low values of r, they use the +# measured Nr directly. (see M&S, p.213) +# +# Gale and Sampson propose to use r while the difference between r and +# r* is 1.96 greater than the standard deviation, and switch to r* if +# it is less or equal: +# +# |r - r*| > 1.96 * sqrt((r + 1)^2 (Nr+1 / Nr^2) (1 + Nr+1 / Nr)) +# +# The 1.96 coefficient correspond to a 0.05 significance criterion, +# some implementations can use a coefficient of 1.65 for a 0.1 +# significance criterion. +# + +##////////////////////////////////////////////////////// +## Simple Good-Turing Probablity Distributions +##////////////////////////////////////////////////////// + + +@compat.python_2_unicode_compatible +class SimpleGoodTuringProbDist(ProbDistI): + """ + SimpleGoodTuring ProbDist approximates from frequency to frequency of + frequency into a linear line under log space by linear regression. + Details of Simple Good-Turing algorithm can be found in: + + - Good Turing smoothing without tears" (Gale & Sampson 1995), + Journal of Quantitative Linguistics, vol. 2 pp. 217-237. + - "Speech and Language Processing (Jurafsky & Martin), + 2nd Edition, Chapter 4.5 p103 (log(Nc) = a + b*log(c)) + - http://www.grsampson.net/RGoodTur.html + + Given a set of pair (xi, yi), where the xi denotes the frequency and + yi denotes the frequency of frequency, we want to minimize their + square variation. E(x) and E(y) represent the mean of xi and yi. + + - slope: b = sigma ((xi-E(x)(yi-E(y))) / sigma ((xi-E(x))(xi-E(x))) + - intercept: a = E(y) - b.E(x) + """ + + SUM_TO_ONE = False + + def __init__(self, freqdist, bins=None): + """ + :param freqdist: The frequency counts upon which to base the + estimation. + :type freqdist: FreqDist + :param bins: The number of possible event types. This must be + larger than the number of bins in the ``freqdist``. If None, + then it's assumed to be equal to ``freqdist``.B() + 1 + :type bins: int + """ + assert ( + bins is None or bins > freqdist.B() + ), 'bins parameter must not be less than %d=freqdist.B()+1' % (freqdist.B() + 1) + if bins is None: + bins = freqdist.B() + 1 + self._freqdist = freqdist + self._bins = bins + r, nr = self._r_Nr() + self.find_best_fit(r, nr) + self._switch(r, nr) + self._renormalize(r, nr) + + def _r_Nr_non_zero(self): + r_Nr = self._freqdist.r_Nr() + del r_Nr[0] + return r_Nr + + def _r_Nr(self): + """ + Split the frequency distribution in two list (r, Nr), where Nr(r) > 0 + """ + nonzero = self._r_Nr_non_zero() + + if not nonzero: + return [], [] + return zip(*sorted(nonzero.items())) + + def find_best_fit(self, r, nr): + """ + Use simple linear regression to tune parameters self._slope and + self._intercept in the log-log space based on count and Nr(count) + (Work in log space to avoid floating point underflow.) + """ + # For higher sample frequencies the data points becomes horizontal + # along line Nr=1. To create a more evident linear model in log-log + # space, we average positive Nr values with the surrounding zero + # values. (Church and Gale, 1991) + + if not r or not nr: + # Empty r or nr? + return + + zr = [] + for j in range(len(r)): + i = r[j - 1] if j > 0 else 0 + k = 2 * r[j] - i if j == len(r) - 1 else r[j + 1] + zr_ = 2.0 * nr[j] / (k - i) + zr.append(zr_) + + log_r = [math.log(i) for i in r] + log_zr = [math.log(i) for i in zr] + + xy_cov = x_var = 0.0 + x_mean = sum(log_r) / len(log_r) + y_mean = sum(log_zr) / len(log_zr) + for (x, y) in zip(log_r, log_zr): + xy_cov += (x - x_mean) * (y - y_mean) + x_var += (x - x_mean) ** 2 + self._slope = xy_cov / x_var if x_var != 0 else 0.0 + if self._slope >= -1: + warnings.warn( + 'SimpleGoodTuring did not find a proper best fit ' + 'line for smoothing probabilities of occurrences. ' + 'The probability estimates are likely to be ' + 'unreliable.' + ) + self._intercept = y_mean - self._slope * x_mean + + def _switch(self, r, nr): + """ + Calculate the r frontier where we must switch from Nr to Sr + when estimating E[Nr]. + """ + for i, r_ in enumerate(r): + if len(r) == i + 1 or r[i + 1] != r_ + 1: + # We are at the end of r, or there is a gap in r + self._switch_at = r_ + break + + Sr = self.smoothedNr + smooth_r_star = (r_ + 1) * Sr(r_ + 1) / Sr(r_) + unsmooth_r_star = (r_ + 1) * nr[i + 1] / nr[i] + + std = math.sqrt(self._variance(r_, nr[i], nr[i + 1])) + if abs(unsmooth_r_star - smooth_r_star) <= 1.96 * std: + self._switch_at = r_ + break + + def _variance(self, r, nr, nr_1): + r = float(r) + nr = float(nr) + nr_1 = float(nr_1) + return (r + 1.0) ** 2 * (nr_1 / nr ** 2) * (1.0 + nr_1 / nr) + + def _renormalize(self, r, nr): + """ + It is necessary to renormalize all the probability estimates to + ensure a proper probability distribution results. This can be done + by keeping the estimate of the probability mass for unseen items as + N(1)/N and renormalizing all the estimates for previously seen items + (as Gale and Sampson (1995) propose). (See M&S P.213, 1999) + """ + prob_cov = 0.0 + for r_, nr_ in zip(r, nr): + prob_cov += nr_ * self._prob_measure(r_) + if prob_cov: + self._renormal = (1 - self._prob_measure(0)) / prob_cov + + def smoothedNr(self, r): + """ + Return the number of samples with count r. + + :param r: The amount of frequency. + :type r: int + :rtype: float + """ + + # Nr = a*r^b (with b < -1 to give the appropriate hyperbolic + # relationship) + # Estimate a and b by simple linear regression technique on + # the logarithmic form of the equation: log Nr = a + b*log(r) + + return math.exp(self._intercept + self._slope * math.log(r)) + + def prob(self, sample): + """ + Return the sample's probability. + + :param sample: sample of the event + :type sample: str + :rtype: float + """ + count = self._freqdist[sample] + p = self._prob_measure(count) + if count == 0: + if self._bins == self._freqdist.B(): + p = 0.0 + else: + p = p / (self._bins - self._freqdist.B()) + else: + p = p * self._renormal + return p + + def _prob_measure(self, count): + if count == 0 and self._freqdist.N() == 0: + return 1.0 + elif count == 0 and self._freqdist.N() != 0: + return self._freqdist.Nr(1) / self._freqdist.N() + + if self._switch_at > count: + Er_1 = self._freqdist.Nr(count + 1) + Er = self._freqdist.Nr(count) + else: + Er_1 = self.smoothedNr(count + 1) + Er = self.smoothedNr(count) + + r_star = (count + 1) * Er_1 / Er + return r_star / self._freqdist.N() + + def check(self): + prob_sum = 0.0 + for i in range(0, len(self._Nr)): + prob_sum += self._Nr[i] * self._prob_measure(i) / self._renormal + print("Probability Sum:", prob_sum) + # assert prob_sum != 1.0, "probability sum should be one!" + + def discount(self): + """ + This function returns the total mass of probability transfers from the + seen samples to the unseen samples. + """ + return self.smoothedNr(1) / self._freqdist.N() + + def max(self): + return self._freqdist.max() + + def samples(self): + return self._freqdist.keys() + + def freqdist(self): + return self._freqdist + + def __repr__(self): + """ + Return a string representation of this ``ProbDist``. + + :rtype: str + """ + return '' % self._freqdist.N() + + +class MutableProbDist(ProbDistI): + """ + An mutable probdist where the probabilities may be easily modified. This + simply copies an existing probdist, storing the probability values in a + mutable dictionary and providing an update method. + """ + + def __init__(self, prob_dist, samples, store_logs=True): + """ + Creates the mutable probdist based on the given prob_dist and using + the list of samples given. These values are stored as log + probabilities if the store_logs flag is set. + + :param prob_dist: the distribution from which to garner the + probabilities + :type prob_dist: ProbDist + :param samples: the complete set of samples + :type samples: sequence of any + :param store_logs: whether to store the probabilities as logarithms + :type store_logs: bool + """ + self._samples = samples + self._sample_dict = dict((samples[i], i) for i in range(len(samples))) + self._data = array.array(str("d"), [0.0]) * len(samples) + for i in range(len(samples)): + if store_logs: + self._data[i] = prob_dist.logprob(samples[i]) + else: + self._data[i] = prob_dist.prob(samples[i]) + self._logs = store_logs + + def max(self): + # inherit documentation + return max((p, v) for (v, p) in self._sample_dict.items())[1] + + def samples(self): + # inherit documentation + return self._samples + + def prob(self, sample): + # inherit documentation + i = self._sample_dict.get(sample) + if i is None: + return 0.0 + return 2 ** (self._data[i]) if self._logs else self._data[i] + + def logprob(self, sample): + # inherit documentation + i = self._sample_dict.get(sample) + if i is None: + return float('-inf') + return self._data[i] if self._logs else math.log(self._data[i], 2) + + def update(self, sample, prob, log=True): + """ + Update the probability for the given sample. This may cause the object + to stop being the valid probability distribution - the user must + ensure that they update the sample probabilities such that all samples + have probabilities between 0 and 1 and that all probabilities sum to + one. + + :param sample: the sample for which to update the probability + :type sample: any + :param prob: the new probability + :type prob: float + :param log: is the probability already logged + :type log: bool + """ + i = self._sample_dict.get(sample) + assert i is not None + if self._logs: + self._data[i] = prob if log else math.log(prob, 2) + else: + self._data[i] = 2 ** (prob) if log else prob + + +##///////////////////////////////////////////////////// +## Kneser-Ney Probability Distribution +##////////////////////////////////////////////////////// + +# This method for calculating probabilities was introduced in 1995 by Reinhard +# Kneser and Hermann Ney. It was meant to improve the accuracy of language +# models that use backing-off to deal with sparse data. The authors propose two +# ways of doing so: a marginal distribution constraint on the back-off +# distribution and a leave-one-out distribution. For a start, the first one is +# implemented as a class below. +# +# The idea behind a back-off n-gram model is that we have a series of +# frequency distributions for our n-grams so that in case we have not seen a +# given n-gram during training (and as a result have a 0 probability for it) we +# can 'back off' (hence the name!) and try testing whether we've seen the +# n-1-gram part of the n-gram in training. +# +# The novelty of Kneser and Ney's approach was that they decided to fiddle +# around with the way this latter, backed off probability was being calculated +# whereas their peers seemed to focus on the primary probability. +# +# The implementation below uses one of the techniques described in their paper +# titled "Improved backing-off for n-gram language modeling." In the same paper +# another technique is introduced to attempt to smooth the back-off +# distribution as well as the primary one. There is also a much-cited +# modification of this method proposed by Chen and Goodman. +# +# In order for the implementation of Kneser-Ney to be more efficient, some +# changes have been made to the original algorithm. Namely, the calculation of +# the normalizing function gamma has been significantly simplified and +# combined slightly differently with beta. None of these changes affect the +# nature of the algorithm, but instead aim to cut out unnecessary calculations +# and take advantage of storing and retrieving information in dictionaries +# where possible. + + +@compat.python_2_unicode_compatible +class KneserNeyProbDist(ProbDistI): + """ + Kneser-Ney estimate of a probability distribution. This is a version of + back-off that counts how likely an n-gram is provided the n-1-gram had + been seen in training. Extends the ProbDistI interface, requires a trigram + FreqDist instance to train on. Optionally, a different from default discount + value can be specified. The default discount is set to 0.75. + + """ + + def __init__(self, freqdist, bins=None, discount=0.75): + """ + :param freqdist: The trigram frequency distribution upon which to base + the estimation + :type freqdist: FreqDist + :param bins: Included for compatibility with nltk.tag.hmm + :type bins: int or float + :param discount: The discount applied when retrieving counts of + trigrams + :type discount: float (preferred, but can be set to int) + """ + + if not bins: + self._bins = freqdist.B() + else: + self._bins = bins + self._D = discount + + # cache for probability calculation + self._cache = {} + + # internal bigram and trigram frequency distributions + self._bigrams = defaultdict(int) + self._trigrams = freqdist + + # helper dictionaries used to calculate probabilities + self._wordtypes_after = defaultdict(float) + self._trigrams_contain = defaultdict(float) + self._wordtypes_before = defaultdict(float) + for w0, w1, w2 in freqdist: + self._bigrams[(w0, w1)] += freqdist[(w0, w1, w2)] + self._wordtypes_after[(w0, w1)] += 1 + self._trigrams_contain[w1] += 1 + self._wordtypes_before[(w1, w2)] += 1 + + def prob(self, trigram): + # sample must be a triple + if len(trigram) != 3: + raise ValueError('Expected an iterable with 3 members.') + trigram = tuple(trigram) + w0, w1, w2 = trigram + + if trigram in self._cache: + return self._cache[trigram] + else: + # if the sample trigram was seen during training + if trigram in self._trigrams: + prob = (self._trigrams[trigram] - self.discount()) / self._bigrams[ + (w0, w1) + ] + + # else if the 'rougher' environment was seen during training + elif (w0, w1) in self._bigrams and (w1, w2) in self._wordtypes_before: + aftr = self._wordtypes_after[(w0, w1)] + bfr = self._wordtypes_before[(w1, w2)] + + # the probability left over from alphas + leftover_prob = (aftr * self.discount()) / self._bigrams[(w0, w1)] + + # the beta (including normalization) + beta = bfr / (self._trigrams_contain[w1] - aftr) + + prob = leftover_prob * beta + + # else the sample was completely unseen during training + else: + prob = 0.0 + + self._cache[trigram] = prob + return prob + + def discount(self): + """ + Return the value by which counts are discounted. By default set to 0.75. + + :rtype: float + """ + return self._D + + def set_discount(self, discount): + """ + Set the value by which counts are discounted to the value of discount. + + :param discount: the new value to discount counts by + :type discount: float (preferred, but int possible) + :rtype: None + """ + self._D = discount + + def samples(self): + return self._trigrams.keys() + + def max(self): + return self._trigrams.max() + + def __repr__(self): + ''' + Return a string representation of this ProbDist + + :rtype: str + ''' + return '>> from nltk.probability import ConditionalFreqDist + >>> from nltk.tokenize import word_tokenize + >>> sent = "the the the dog dog some other words that we do not care about" + >>> cfdist = ConditionalFreqDist() + >>> for word in word_tokenize(sent): + ... condition = len(word) + ... cfdist[condition][word] += 1 + + An equivalent way to do this is with the initializer: + + >>> cfdist = ConditionalFreqDist((len(word), word) for word in word_tokenize(sent)) + + The frequency distribution for each condition is accessed using + the indexing operator: + + >>> cfdist[3] + FreqDist({'the': 3, 'dog': 2, 'not': 1}) + >>> cfdist[3].freq('the') + 0.5 + >>> cfdist[3]['dog'] + 2 + + When the indexing operator is used to access the frequency + distribution for a condition that has not been accessed before, + ``ConditionalFreqDist`` creates a new empty FreqDist for that + condition. + + """ + + def __init__(self, cond_samples=None): + """ + Construct a new empty conditional frequency distribution. In + particular, the count for every sample, under every condition, + is zero. + + :param cond_samples: The samples to initialize the conditional + frequency distribution with + :type cond_samples: Sequence of (condition, sample) tuples + """ + defaultdict.__init__(self, FreqDist) + + if cond_samples: + for (cond, sample) in cond_samples: + self[cond][sample] += 1 + + def __reduce__(self): + kv_pairs = ((cond, self[cond]) for cond in self.conditions()) + return (self.__class__, (), None, None, kv_pairs) + + def conditions(self): + """ + Return a list of the conditions that have been accessed for + this ``ConditionalFreqDist``. Use the indexing operator to + access the frequency distribution for a given condition. + Note that the frequency distributions for some conditions + may contain zero sample outcomes. + + :rtype: list + """ + return list(self.keys()) + + def N(self): + """ + Return the total number of sample outcomes that have been + recorded by this ``ConditionalFreqDist``. + + :rtype: int + """ + return sum(fdist.N() for fdist in itervalues(self)) + + def plot(self, *args, **kwargs): + """ + Plot the given samples from the conditional frequency distribution. + For a cumulative plot, specify cumulative=True. + (Requires Matplotlib to be installed.) + + :param samples: The samples to plot + :type samples: list + :param title: The title for the graph + :type title: str + :param conditions: The conditions to plot (default is all) + :type conditions: list + """ + try: + import matplotlib.pyplot as plt #import statment fix + except ImportError: + raise ValueError( + 'The plot function requires matplotlib to be installed.' + 'See http://matplotlib.org/' + ) + + cumulative = _get_kwarg(kwargs, 'cumulative', False) + percents = _get_kwarg(kwargs, 'percents', False) + conditions = [c for c in _get_kwarg(kwargs, 'conditions', self.conditions()) if c in self] # conditions should be in self + title = _get_kwarg(kwargs, 'title', '') + samples = _get_kwarg( + kwargs, 'samples', sorted(set(v + for c in conditions + for v in self[c])) + ) # this computation could be wasted + if "linewidth" not in kwargs: + kwargs["linewidth"] = 2 + ax = plt.gca() + if (len(conditions) != 0): + freqs = [] + for condition in conditions: + if cumulative: + # freqs should be a list of list where each sub list will be a frequency of a condition + freqs.append(list(self[condition]._cumulative_frequencies(samples))) + ylabel = "Cumulative Counts" + legend_loc = 'lower right' + if percents: + freqs[-1] = [f / freqs[len(freqs) - 1] * 100 for f in freqs] + ylabel = "Cumulative Percents" + else: + freqs.append([self[condition][sample] for sample in samples]) + ylabel = "Counts" + legend_loc = 'upper right' + # percents = [f * 100 for f in freqs] only in ConditionalProbDist? + + i = 0 + for freq in freqs: + kwargs['label'] = conditions[i] #label for each condition + i += 1 + ax.plot(freq, *args, **kwargs) + ax.legend(loc=legend_loc) + ax.grid(True, color="silver") + ax.set_xticks(range(len(samples))) + ax.set_xticklabels([text_type(s) for s in samples], rotation=90) + if title: + ax.set_title(title) + ax.set_xlabel("Samples") + ax.set_ylabel(ylabel) + plt.show() + + return ax + + def tabulate(self, *args, **kwargs): + """ + Tabulate the given samples from the conditional frequency distribution. + + :param samples: The samples to plot + :type samples: list + :param conditions: The conditions to plot (default is all) + :type conditions: list + :param cumulative: A flag to specify whether the freqs are cumulative (default = False) + :type title: bool + """ + + cumulative = _get_kwarg(kwargs, 'cumulative', False) + conditions = _get_kwarg(kwargs, 'conditions', sorted(self.conditions())) + samples = _get_kwarg( + kwargs, 'samples', sorted(set(v for c in conditions + if c in self + for v in self[c]))) # this computation could be wasted + + width = max(len("%s" % s) for s in samples) + freqs = dict() + for c in conditions: + if cumulative: + freqs[c] = list(self[c]._cumulative_frequencies(samples)) + else: + freqs[c] = [self[c][sample] for sample in samples] + width = max(width, max(len("%d" % f) for f in freqs[c])) + + condition_size = max(len("%s" % c) for c in conditions) + print(' ' * condition_size, end=' ') + for s in samples: + print("%*s" % (width, s), end=' ') + print() + for c in conditions: + print("%*s" % (condition_size, c), end=' ') + for f in freqs[c]: + print("%*d" % (width, f), end=' ') + print() + + # Mathematical operators + + def __add__(self, other): + """ + Add counts from two ConditionalFreqDists. + """ + if not isinstance(other, ConditionalFreqDist): + return NotImplemented + result = ConditionalFreqDist() + for cond in self.conditions(): + newfreqdist = self[cond] + other[cond] + if newfreqdist: + result[cond] = newfreqdist + for cond in other.conditions(): + if cond not in self.conditions(): + for elem, count in other[cond].items(): + if count > 0: + result[cond][elem] = count + return result + + def __sub__(self, other): + """ + Subtract count, but keep only results with positive counts. + """ + if not isinstance(other, ConditionalFreqDist): + return NotImplemented + result = ConditionalFreqDist() + for cond in self.conditions(): + newfreqdist = self[cond] - other[cond] + if newfreqdist: + result[cond] = newfreqdist + for cond in other.conditions(): + if cond not in self.conditions(): + for elem, count in other[cond].items(): + if count < 0: + result[cond][elem] = 0 - count + return result + + def __or__(self, other): + """ + Union is the maximum of value in either of the input counters. + """ + if not isinstance(other, ConditionalFreqDist): + return NotImplemented + result = ConditionalFreqDist() + for cond in self.conditions(): + newfreqdist = self[cond] | other[cond] + if newfreqdist: + result[cond] = newfreqdist + for cond in other.conditions(): + if cond not in self.conditions(): + for elem, count in other[cond].items(): + if count > 0: + result[cond][elem] = count + return result + + def __and__(self, other): + """ + Intersection is the minimum of corresponding counts. + """ + if not isinstance(other, ConditionalFreqDist): + return NotImplemented + result = ConditionalFreqDist() + for cond in self.conditions(): + newfreqdist = self[cond] & other[cond] + if newfreqdist: + result[cond] = newfreqdist + return result + + # @total_ordering doesn't work here, since the class inherits from a builtin class + def __le__(self, other): + if not isinstance(other, ConditionalFreqDist): + raise_unorderable_types("<=", self, other) + return set(self.conditions()).issubset(other.conditions()) and all( + self[c] <= other[c] for c in self.conditions() + ) + + def __lt__(self, other): + if not isinstance(other, ConditionalFreqDist): + raise_unorderable_types("<", self, other) + return self <= other and self != other + + def __ge__(self, other): + if not isinstance(other, ConditionalFreqDist): + raise_unorderable_types(">=", self, other) + return other <= self + + def __gt__(self, other): + if not isinstance(other, ConditionalFreqDist): + raise_unorderable_types(">", self, other) + return other < self + + def __repr__(self): + """ + Return a string representation of this ``ConditionalFreqDist``. + + :rtype: str + """ + return '' % len(self) + + +@compat.python_2_unicode_compatible +@add_metaclass(ABCMeta) +class ConditionalProbDistI(dict): + """ + A collection of probability distributions for a single experiment + run under different conditions. Conditional probability + distributions are used to estimate the likelihood of each sample, + given the condition under which the experiment was run. For + example, a conditional probability distribution could be used to + estimate the probability of each word type in a document, given + the length of the word type. Formally, a conditional probability + distribution can be defined as a function that maps from each + condition to the ``ProbDist`` for the experiment under that + condition. + """ + + @abstractmethod + def __init__(self): + """ + Classes inheriting from ConditionalProbDistI should implement __init__. + """ + + def conditions(self): + """ + Return a list of the conditions that are represented by + this ``ConditionalProbDist``. Use the indexing operator to + access the probability distribution for a given condition. + + :rtype: list + """ + return list(self.keys()) + + def __repr__(self): + """ + Return a string representation of this ``ConditionalProbDist``. + + :rtype: str + """ + return '<%s with %d conditions>' % (type(self).__name__, len(self)) + + +class ConditionalProbDist(ConditionalProbDistI): + """ + A conditional probability distribution modeling the experiments + that were used to generate a conditional frequency distribution. + A ConditionalProbDist is constructed from a + ``ConditionalFreqDist`` and a ``ProbDist`` factory: + + - The ``ConditionalFreqDist`` specifies the frequency + distribution for each condition. + - The ``ProbDist`` factory is a function that takes a + condition's frequency distribution, and returns its + probability distribution. A ``ProbDist`` class's name (such as + ``MLEProbDist`` or ``HeldoutProbDist``) can be used to specify + that class's constructor. + + The first argument to the ``ProbDist`` factory is the frequency + distribution that it should model; and the remaining arguments are + specified by the ``factory_args`` parameter to the + ``ConditionalProbDist`` constructor. For example, the following + code constructs a ``ConditionalProbDist``, where the probability + distribution for each condition is an ``ELEProbDist`` with 10 bins: + + >>> from nltk.corpus import brown + >>> from nltk.probability import ConditionalFreqDist + >>> from nltk.probability import ConditionalProbDist, ELEProbDist + >>> cfdist = ConditionalFreqDist(brown.tagged_words()[:5000]) + >>> cpdist = ConditionalProbDist(cfdist, ELEProbDist, 10) + >>> cpdist['passed'].max() + 'VBD' + >>> cpdist['passed'].prob('VBD') + 0.423... + + """ + + def __init__(self, cfdist, probdist_factory, *factory_args, **factory_kw_args): + """ + Construct a new conditional probability distribution, based on + the given conditional frequency distribution and ``ProbDist`` + factory. + + :type cfdist: ConditionalFreqDist + :param cfdist: The ``ConditionalFreqDist`` specifying the + frequency distribution for each condition. + :type probdist_factory: class or function + :param probdist_factory: The function or class that maps + a condition's frequency distribution to its probability + distribution. The function is called with the frequency + distribution as its first argument, + ``factory_args`` as its remaining arguments, and + ``factory_kw_args`` as keyword arguments. + :type factory_args: (any) + :param factory_args: Extra arguments for ``probdist_factory``. + These arguments are usually used to specify extra + properties for the probability distributions of individual + conditions, such as the number of bins they contain. + :type factory_kw_args: (any) + :param factory_kw_args: Extra keyword arguments for ``probdist_factory``. + """ + self._probdist_factory = probdist_factory + self._factory_args = factory_args + self._factory_kw_args = factory_kw_args + + for condition in cfdist: + self[condition] = probdist_factory( + cfdist[condition], *factory_args, **factory_kw_args + ) + + def __missing__(self, key): + self[key] = self._probdist_factory( + FreqDist(), *self._factory_args, **self._factory_kw_args + ) + return self[key] + + +class DictionaryConditionalProbDist(ConditionalProbDistI): + """ + An alternative ConditionalProbDist that simply wraps a dictionary of + ProbDists rather than creating these from FreqDists. + """ + + def __init__(self, probdist_dict): + """ + :param probdist_dict: a dictionary containing the probdists indexed + by the conditions + :type probdist_dict: dict any -> probdist + """ + self.update(probdist_dict) + + def __missing__(self, key): + self[key] = DictionaryProbDist() + return self[key] + + +##////////////////////////////////////////////////////// +## Adding in log-space. +##////////////////////////////////////////////////////// + +# If the difference is bigger than this, then just take the bigger one: +_ADD_LOGS_MAX_DIFF = math.log(1e-30, 2) + + +def add_logs(logx, logy): + """ + Given two numbers ``logx`` = *log(x)* and ``logy`` = *log(y)*, return + *log(x+y)*. Conceptually, this is the same as returning + ``log(2**(logx)+2**(logy))``, but the actual implementation + avoids overflow errors that could result from direct computation. + """ + if logx < logy + _ADD_LOGS_MAX_DIFF: + return logy + if logy < logx + _ADD_LOGS_MAX_DIFF: + return logx + base = min(logx, logy) + return base + math.log(2 ** (logx - base) + 2 ** (logy - base), 2) + + +def sum_logs(logs): + return reduce(add_logs, logs[1:], logs[0]) if len(logs) != 0 else _NINF + + +##////////////////////////////////////////////////////// +## Probabilistic Mix-in +##////////////////////////////////////////////////////// + + +class ProbabilisticMixIn(object): + """ + A mix-in class to associate probabilities with other classes + (trees, rules, etc.). To use the ``ProbabilisticMixIn`` class, + define a new class that derives from an existing class and from + ProbabilisticMixIn. You will need to define a new constructor for + the new class, which explicitly calls the constructors of both its + parent classes. For example: + + >>> from nltk.probability import ProbabilisticMixIn + >>> class A: + ... def __init__(self, x, y): self.data = (x,y) + ... + >>> class ProbabilisticA(A, ProbabilisticMixIn): + ... def __init__(self, x, y, **prob_kwarg): + ... A.__init__(self, x, y) + ... ProbabilisticMixIn.__init__(self, **prob_kwarg) + + See the documentation for the ProbabilisticMixIn + ``constructor<__init__>`` for information about the arguments it + expects. + + You should generally also redefine the string representation + methods, the comparison methods, and the hashing method. + """ + + def __init__(self, **kwargs): + """ + Initialize this object's probability. This initializer should + be called by subclass constructors. ``prob`` should generally be + the first argument for those constructors. + + :param prob: The probability associated with the object. + :type prob: float + :param logprob: The log of the probability associated with + the object. + :type logprob: float + """ + if 'prob' in kwargs: + if 'logprob' in kwargs: + raise TypeError('Must specify either prob or logprob ' '(not both)') + else: + ProbabilisticMixIn.set_prob(self, kwargs['prob']) + elif 'logprob' in kwargs: + ProbabilisticMixIn.set_logprob(self, kwargs['logprob']) + else: + self.__prob = self.__logprob = None + + def set_prob(self, prob): + """ + Set the probability associated with this object to ``prob``. + + :param prob: The new probability + :type prob: float + """ + self.__prob = prob + self.__logprob = None + + def set_logprob(self, logprob): + """ + Set the log probability associated with this object to + ``logprob``. I.e., set the probability associated with this + object to ``2**(logprob)``. + + :param logprob: The new log probability + :type logprob: float + """ + self.__logprob = logprob + self.__prob = None + + def prob(self): + """ + Return the probability associated with this object. + + :rtype: float + """ + if self.__prob is None: + if self.__logprob is None: + return None + self.__prob = 2 ** (self.__logprob) + return self.__prob + + def logprob(self): + """ + Return ``log(p)``, where ``p`` is the probability associated + with this object. + + :rtype: float + """ + if self.__logprob is None: + if self.__prob is None: + return None + self.__logprob = math.log(self.__prob, 2) + return self.__logprob + + +class ImmutableProbabilisticMixIn(ProbabilisticMixIn): + def set_prob(self, prob): + raise ValueError('%s is immutable' % self.__class__.__name__) + + def set_logprob(self, prob): + raise ValueError('%s is immutable' % self.__class__.__name__) + + +## Helper function for processing keyword arguments + + +def _get_kwarg(kwargs, key, default): + if key in kwargs: + arg = kwargs[key] + del kwargs[key] + else: + arg = default + return arg + + +##////////////////////////////////////////////////////// +## Demonstration +##////////////////////////////////////////////////////// + + +def _create_rand_fdist(numsamples, numoutcomes): + """ + Create a new frequency distribution, with random samples. The + samples are numbers from 1 to ``numsamples``, and are generated by + summing two numbers, each of which has a uniform distribution. + """ + + fdist = FreqDist() + for x in range(numoutcomes): + y = random.randint(1, (1 + numsamples) // 2) + random.randint( + 0, numsamples // 2 + ) + fdist[y] += 1 + return fdist + + +def _create_sum_pdist(numsamples): + """ + Return the true probability distribution for the experiment + ``_create_rand_fdist(numsamples, x)``. + """ + fdist = FreqDist() + for x in range(1, (1 + numsamples) // 2 + 1): + for y in range(0, numsamples // 2 + 1): + fdist[x + y] += 1 + return MLEProbDist(fdist) + + +def demo(numsamples=6, numoutcomes=500): + """ + A demonstration of frequency distributions and probability + distributions. This demonstration creates three frequency + distributions with, and uses them to sample a random process with + ``numsamples`` samples. Each frequency distribution is sampled + ``numoutcomes`` times. These three frequency distributions are + then used to build six probability distributions. Finally, the + probability estimates of these distributions are compared to the + actual probability of each sample. + + :type numsamples: int + :param numsamples: The number of samples to use in each demo + frequency distributions. + :type numoutcomes: int + :param numoutcomes: The total number of outcomes for each + demo frequency distribution. These outcomes are divided into + ``numsamples`` bins. + :rtype: None + """ + + # Randomly sample a stochastic process three times. + fdist1 = _create_rand_fdist(numsamples, numoutcomes) + fdist2 = _create_rand_fdist(numsamples, numoutcomes) + fdist3 = _create_rand_fdist(numsamples, numoutcomes) + + # Use our samples to create probability distributions. + pdists = [ + MLEProbDist(fdist1), + LidstoneProbDist(fdist1, 0.5, numsamples), + HeldoutProbDist(fdist1, fdist2, numsamples), + HeldoutProbDist(fdist2, fdist1, numsamples), + CrossValidationProbDist([fdist1, fdist2, fdist3], numsamples), + SimpleGoodTuringProbDist(fdist1), + SimpleGoodTuringProbDist(fdist1, 7), + _create_sum_pdist(numsamples), + ] + + # Find the probability of each sample. + vals = [] + for n in range(1, numsamples + 1): + vals.append(tuple([n, fdist1.freq(n)] + [pdist.prob(n) for pdist in pdists])) + + # Print the results in a formatted table. + print( + ( + '%d samples (1-%d); %d outcomes were sampled for each FreqDist' + % (numsamples, numsamples, numoutcomes) + ) + ) + print('=' * 9 * (len(pdists) + 2)) + FORMATSTR = ' FreqDist ' + '%8s ' * (len(pdists) - 1) + '| Actual' + print(FORMATSTR % tuple(repr(pdist)[1:9] for pdist in pdists[:-1])) + print('-' * 9 * (len(pdists) + 2)) + FORMATSTR = '%3d %8.6f ' + '%8.6f ' * (len(pdists) - 1) + '| %8.6f' + for val in vals: + print(FORMATSTR % val) + + # Print the totals for each column (should all be 1.0) + zvals = list(zip(*vals)) + sums = [sum(val) for val in zvals[1:]] + print('-' * 9 * (len(pdists) + 2)) + FORMATSTR = 'Total ' + '%8.6f ' * (len(pdists)) + '| %8.6f' + print(FORMATSTR % tuple(sums)) + print('=' * 9 * (len(pdists) + 2)) + + # Display the distributions themselves, if they're short enough. + if len("%s" % fdist1) < 70: + print(' fdist1: %s' % fdist1) + print(' fdist2: %s' % fdist2) + print(' fdist3: %s' % fdist3) + print() + + print('Generating:') + for pdist in pdists: + fdist = FreqDist(pdist.generate() for i in range(5000)) + print('%20s %s' % (pdist.__class__.__name__[:20], ("%s" % fdist)[:55])) + print() + + +def gt_demo(): + from nltk import corpus + + emma_words = corpus.gutenberg.words('austen-emma.txt') + fd = FreqDist(emma_words) + sgt = SimpleGoodTuringProbDist(fd) + print('%18s %8s %14s' % ("word", "freqency", "SimpleGoodTuring")) + fd_keys_sorted = ( + key for key, value in sorted(fd.items(), key=lambda item: item[1], reverse=True) + ) + for key in fd_keys_sorted: + print('%18s %8d %14e' % (key, fd[key], sgt.prob(key))) + + +if __name__ == '__main__': + demo(6, 10) + demo(5, 5000) + gt_demo() + +__all__ = [ + 'ConditionalFreqDist', + 'ConditionalProbDist', + 'ConditionalProbDistI', + 'CrossValidationProbDist', + 'DictionaryConditionalProbDist', + 'DictionaryProbDist', + 'ELEProbDist', + 'FreqDist', + 'SimpleGoodTuringProbDist', + 'HeldoutProbDist', + 'ImmutableProbabilisticMixIn', + 'LaplaceProbDist', + 'LidstoneProbDist', + 'MLEProbDist', + 'MutableProbDist', + 'KneserNeyProbDist', + 'ProbDistI', + 'ProbabilisticMixIn', + 'UniformProbDist', + 'WittenBellProbDist', + 'add_logs', + 'log_likelihood', + 'sum_logs', + 'entropy', +] diff --git a/venv.bak/lib/python3.7/site-packages/nltk/sem/__init__.py b/venv.bak/lib/python3.7/site-packages/nltk/sem/__init__.py new file mode 100644 index 0000000..2d60761 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/sem/__init__.py @@ -0,0 +1,75 @@ +# Natural Language Toolkit: Semantic Interpretation +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Ewan Klein +# URL: +# For license information, see LICENSE.TXT + +""" +NLTK Semantic Interpretation Package + +This package contains classes for representing semantic structure in +formulas of first-order logic and for evaluating such formulas in +set-theoretic models. + + >>> from nltk.sem import logic + >>> logic._counter._value = 0 + +The package has two main components: + + - ``logic`` provides support for analyzing expressions of First + Order Logic (FOL). + - ``evaluate`` allows users to recursively determine truth in a + model for formulas of FOL. + +A model consists of a domain of discourse and a valuation function, +which assigns values to non-logical constants. We assume that entities +in the domain are represented as strings such as ``'b1'``, ``'g1'``, +etc. A ``Valuation`` is initialized with a list of (symbol, value) +pairs, where values are entities, sets of entities or sets of tuples +of entities. +The domain of discourse can be inferred from the valuation, and model +is then created with domain and valuation as parameters. + + >>> from nltk.sem import Valuation, Model + >>> v = [('adam', 'b1'), ('betty', 'g1'), ('fido', 'd1'), + ... ('girl', set(['g1', 'g2'])), ('boy', set(['b1', 'b2'])), + ... ('dog', set(['d1'])), + ... ('love', set([('b1', 'g1'), ('b2', 'g2'), ('g1', 'b1'), ('g2', 'b1')]))] + >>> val = Valuation(v) + >>> dom = val.domain + >>> m = Model(dom, val) +""" + +from nltk.sem.util import parse_sents, interpret_sents, evaluate_sents, root_semrep +from nltk.sem.evaluate import ( + Valuation, + Assignment, + Model, + Undefined, + is_rel, + set2rel, + arity, + read_valuation, +) +from nltk.sem.logic import ( + boolean_ops, + binding_ops, + equality_preds, + read_logic, + Variable, + Expression, + ApplicationExpression, + LogicalExpressionException, +) +from nltk.sem.skolemize import skolemize +from nltk.sem.lfg import FStructure +from nltk.sem.relextract import extract_rels, rtuple, clause +from nltk.sem.boxer import Boxer +from nltk.sem.drt import DrtExpression, DRS + +# from nltk.sem.glue import Glue +# from nltk.sem.hole import HoleSemantics +# from nltk.sem.cooper_storage import CooperStore + +# don't import chat80 as its names are too generic diff --git a/venv.bak/lib/python3.7/site-packages/nltk/sem/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/sem/__pycache__/__init__.cpython-37.pyc new file mode 100644 index 0000000..c9643d8 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/sem/__pycache__/__init__.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/sem/__pycache__/boxer.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/sem/__pycache__/boxer.cpython-37.pyc new file mode 100644 index 0000000..ae78cc9 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/sem/__pycache__/boxer.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/sem/__pycache__/chat80.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/sem/__pycache__/chat80.cpython-37.pyc new file mode 100644 index 0000000..345e15e Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/sem/__pycache__/chat80.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/sem/__pycache__/cooper_storage.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/sem/__pycache__/cooper_storage.cpython-37.pyc new file mode 100644 index 0000000..c75c7e9 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/sem/__pycache__/cooper_storage.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/sem/__pycache__/drt.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/sem/__pycache__/drt.cpython-37.pyc new file mode 100644 index 0000000..77c3f14 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/sem/__pycache__/drt.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/sem/__pycache__/drt_glue_demo.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/sem/__pycache__/drt_glue_demo.cpython-37.pyc new file mode 100644 index 0000000..5e31351 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/sem/__pycache__/drt_glue_demo.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/sem/__pycache__/evaluate.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/sem/__pycache__/evaluate.cpython-37.pyc new file mode 100644 index 0000000..995a8dd Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/sem/__pycache__/evaluate.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/sem/__pycache__/glue.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/sem/__pycache__/glue.cpython-37.pyc new file mode 100644 index 0000000..fe36f89 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/sem/__pycache__/glue.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/sem/__pycache__/hole.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/sem/__pycache__/hole.cpython-37.pyc new file mode 100644 index 0000000..935d793 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/sem/__pycache__/hole.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/sem/__pycache__/lfg.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/sem/__pycache__/lfg.cpython-37.pyc new file mode 100644 index 0000000..d55a7d1 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/sem/__pycache__/lfg.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/sem/__pycache__/linearlogic.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/sem/__pycache__/linearlogic.cpython-37.pyc new file mode 100644 index 0000000..2a4ed93 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/sem/__pycache__/linearlogic.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/sem/__pycache__/logic.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/sem/__pycache__/logic.cpython-37.pyc new file mode 100644 index 0000000..7d836aa Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/sem/__pycache__/logic.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/sem/__pycache__/relextract.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/sem/__pycache__/relextract.cpython-37.pyc new file mode 100644 index 0000000..cb01949 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/sem/__pycache__/relextract.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/sem/__pycache__/skolemize.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/sem/__pycache__/skolemize.cpython-37.pyc new file mode 100644 index 0000000..4d1bfee Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/sem/__pycache__/skolemize.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/sem/__pycache__/util.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/sem/__pycache__/util.cpython-37.pyc new file mode 100644 index 0000000..b28eb41 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/sem/__pycache__/util.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/sem/boxer.py b/venv.bak/lib/python3.7/site-packages/nltk/sem/boxer.py new file mode 100644 index 0000000..8113165 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/sem/boxer.py @@ -0,0 +1,1613 @@ +# Natural Language Toolkit: Interface to Boxer +# +# +# Author: Dan Garrette +# +# Copyright (C) 2001-2019 NLTK Project +# URL: +# For license information, see LICENSE.TXT + +""" +An interface to Boxer. + +This interface relies on the latest version of the development (subversion) version of +C&C and Boxer. + +Usage: + Set the environment variable CANDC to the bin directory of your CandC installation. + The models directory should be in the CandC root directory. + For example: + /path/to/candc/ + bin/ + candc + boxer + models/ + boxer/ +""" +from __future__ import print_function, unicode_literals + +import os +import re +import operator +import subprocess +from optparse import OptionParser +import tempfile +from functools import reduce + +from nltk.internals import find_binary + +from nltk.sem.logic import ( + ExpectedMoreTokensException, + LogicalExpressionException, + UnexpectedTokenException, + Variable, +) + +from nltk.sem.drt import ( + DRS, + DrtApplicationExpression, + DrtEqualityExpression, + DrtNegatedExpression, + DrtOrExpression, + DrtParser, + DrtProposition, + DrtTokens, + DrtVariableExpression, +) + +from nltk.compat import python_2_unicode_compatible + + +class Boxer(object): + """ + This class is an interface to Johan Bos's program Boxer, a wide-coverage + semantic parser that produces Discourse Representation Structures (DRSs). + """ + + def __init__( + self, + boxer_drs_interpreter=None, + elimeq=False, + bin_dir=None, + verbose=False, + resolve=True, + ): + """ + :param boxer_drs_interpreter: A class that converts from the + ``AbstractBoxerDrs`` object hierarchy to a different object. The + default is ``NltkDrtBoxerDrsInterpreter``, which converts to the NLTK + DRT hierarchy. + :param elimeq: When set to true, Boxer removes all equalities from the + DRSs and discourse referents standing in the equality relation are + unified, but only if this can be done in a meaning-preserving manner. + :param resolve: When set to true, Boxer will resolve all anaphoric DRSs and perform merge-reduction. + Resolution follows Van der Sandt's theory of binding and accommodation. + """ + if boxer_drs_interpreter is None: + boxer_drs_interpreter = NltkDrtBoxerDrsInterpreter() + self._boxer_drs_interpreter = boxer_drs_interpreter + + self._resolve = resolve + self._elimeq = elimeq + + self.set_bin_dir(bin_dir, verbose) + + def set_bin_dir(self, bin_dir, verbose=False): + self._candc_bin = self._find_binary('candc', bin_dir, verbose) + self._candc_models_path = os.path.normpath( + os.path.join(self._candc_bin[:-5], '../models') + ) + self._boxer_bin = self._find_binary('boxer', bin_dir, verbose) + + def interpret(self, input, discourse_id=None, question=False, verbose=False): + """ + Use Boxer to give a first order representation. + + :param input: str Input sentence to parse + :param occur_index: bool Should predicates be occurrence indexed? + :param discourse_id: str An identifier to be inserted to each occurrence-indexed predicate. + :return: ``drt.DrtExpression`` + """ + discourse_ids = [discourse_id] if discourse_id is not None else None + d, = self.interpret_multi_sents([[input]], discourse_ids, question, verbose) + if not d: + raise Exception('Unable to interpret: "{0}"'.format(input)) + return d + + def interpret_multi(self, input, discourse_id=None, question=False, verbose=False): + """ + Use Boxer to give a first order representation. + + :param input: list of str Input sentences to parse as a single discourse + :param occur_index: bool Should predicates be occurrence indexed? + :param discourse_id: str An identifier to be inserted to each occurrence-indexed predicate. + :return: ``drt.DrtExpression`` + """ + discourse_ids = [discourse_id] if discourse_id is not None else None + d, = self.interpret_multi_sents([input], discourse_ids, question, verbose) + if not d: + raise Exception('Unable to interpret: "{0}"'.format(input)) + return d + + def interpret_sents( + self, inputs, discourse_ids=None, question=False, verbose=False + ): + """ + Use Boxer to give a first order representation. + + :param inputs: list of str Input sentences to parse as individual discourses + :param occur_index: bool Should predicates be occurrence indexed? + :param discourse_ids: list of str Identifiers to be inserted to each occurrence-indexed predicate. + :return: list of ``drt.DrtExpression`` + """ + return self.interpret_multi_sents( + [[input] for input in inputs], discourse_ids, question, verbose + ) + + def interpret_multi_sents( + self, inputs, discourse_ids=None, question=False, verbose=False + ): + """ + Use Boxer to give a first order representation. + + :param inputs: list of list of str Input discourses to parse + :param occur_index: bool Should predicates be occurrence indexed? + :param discourse_ids: list of str Identifiers to be inserted to each occurrence-indexed predicate. + :return: ``drt.DrtExpression`` + """ + if discourse_ids is not None: + assert len(inputs) == len(discourse_ids) + assert reduce(operator.and_, (id is not None for id in discourse_ids)) + use_disc_id = True + else: + discourse_ids = list(map(str, range(len(inputs)))) + use_disc_id = False + + candc_out = self._call_candc(inputs, discourse_ids, question, verbose=verbose) + boxer_out = self._call_boxer(candc_out, verbose=verbose) + + # if 'ERROR: input file contains no ccg/2 terms.' in boxer_out: + # raise UnparseableInputException('Could not parse with candc: "%s"' % input_str) + + drs_dict = self._parse_to_drs_dict(boxer_out, use_disc_id) + return [drs_dict.get(id, None) for id in discourse_ids] + + def _call_candc(self, inputs, discourse_ids, question, verbose=False): + """ + Call the ``candc`` binary with the given input. + + :param inputs: list of list of str Input discourses to parse + :param discourse_ids: list of str Identifiers to be inserted to each occurrence-indexed predicate. + :param filename: str A filename for the output file + :return: stdout + """ + args = [ + '--models', + os.path.join(self._candc_models_path, ['boxer', 'questions'][question]), + '--candc-printer', + 'boxer', + ] + return self._call( + '\n'.join( + sum( + ( + ["'{0}'".format(id)] + d + for d, id in zip(inputs, discourse_ids) + ), + [], + ) + ), + self._candc_bin, + args, + verbose, + ) + + def _call_boxer(self, candc_out, verbose=False): + """ + Call the ``boxer`` binary with the given input. + + :param candc_out: str output from C&C parser + :return: stdout + """ + f = None + try: + fd, temp_filename = tempfile.mkstemp( + prefix='boxer-', suffix='.in', text=True + ) + f = os.fdopen(fd, 'w') + f.write(candc_out) + finally: + if f: + f.close() + + args = [ + '--box', + 'false', + '--semantics', + 'drs', + #'--flat', 'false', # removed from boxer + '--resolve', + ['false', 'true'][self._resolve], + '--elimeq', + ['false', 'true'][self._elimeq], + '--format', + 'prolog', + '--instantiate', + 'true', + '--input', + temp_filename, + ] + stdout = self._call(None, self._boxer_bin, args, verbose) + os.remove(temp_filename) + return stdout + + def _find_binary(self, name, bin_dir, verbose=False): + return find_binary( + name, + path_to_bin=bin_dir, + env_vars=['CANDC'], + url='http://svn.ask.it.usyd.edu.au/trac/candc/', + binary_names=[name, name + '.exe'], + verbose=verbose, + ) + + def _call(self, input_str, binary, args=[], verbose=False): + """ + Call the binary with the given input. + + :param input_str: A string whose contents are used as stdin. + :param binary: The location of the binary to call + :param args: A list of command-line arguments. + :return: stdout + """ + if verbose: + print('Calling:', binary) + print('Args:', args) + print('Input:', input_str) + print('Command:', binary + ' ' + ' '.join(args)) + + # Call via a subprocess + if input_str is None: + cmd = [binary] + args + p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + else: + cmd = 'echo "{0}" | {1} {2}'.format(input_str, binary, ' '.join(args)) + p = subprocess.Popen( + cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True + ) + stdout, stderr = p.communicate() + + if verbose: + print('Return code:', p.returncode) + if stdout: + print('stdout:\n', stdout, '\n') + if stderr: + print('stderr:\n', stderr, '\n') + if p.returncode != 0: + raise Exception( + 'ERROR CALLING: {0} {1}\nReturncode: {2}\n{3}'.format( + binary, ' '.join(args), p.returncode, stderr + ) + ) + + return stdout + + def _parse_to_drs_dict(self, boxer_out, use_disc_id): + lines = boxer_out.split('\n') + drs_dict = {} + i = 0 + while i < len(lines): + line = lines[i] + if line.startswith('id('): + comma_idx = line.index(',') + discourse_id = line[3:comma_idx] + if discourse_id[0] == "'" and discourse_id[-1] == "'": + discourse_id = discourse_id[1:-1] + drs_id = line[comma_idx + 1 : line.index(')')] + i += 1 + line = lines[i] + assert line.startswith('sem({0},'.format(drs_id)) + if line[-4:] == "').'": + line = line[:-4] + ")." + assert line.endswith(').'), "can't parse line: {0}".format(line) + + search_start = len('sem({0},['.format(drs_id)) + brace_count = 1 + drs_start = -1 + for j, c in enumerate(line[search_start:]): + if c == '[': + brace_count += 1 + if c == ']': + brace_count -= 1 + if brace_count == 0: + drs_start = search_start + j + 1 + if line[drs_start : drs_start + 3] == "','": + drs_start = drs_start + 3 + else: + drs_start = drs_start + 1 + break + assert drs_start > -1 + + drs_input = line[drs_start:-2].strip() + parsed = self._parse_drs(drs_input, discourse_id, use_disc_id) + drs_dict[discourse_id] = self._boxer_drs_interpreter.interpret(parsed) + i += 1 + return drs_dict + + def _parse_drs(self, drs_string, discourse_id, use_disc_id): + return BoxerOutputDrsParser([None, discourse_id][use_disc_id]).parse(drs_string) + + +class BoxerOutputDrsParser(DrtParser): + def __init__(self, discourse_id=None): + """ + This class is used to parse the Prolog DRS output from Boxer into a + hierarchy of python objects. + """ + DrtParser.__init__(self) + self.discourse_id = discourse_id + self.sentence_id_offset = None + self.quote_chars = [("'", "'", "\\", False)] + + def parse(self, data, signature=None): + return DrtParser.parse(self, data, signature) + + def get_all_symbols(self): + return ['(', ')', ',', '[', ']', ':'] + + def handle(self, tok, context): + return self.handle_drs(tok) + + def attempt_adjuncts(self, expression, context): + return expression + + def parse_condition(self, indices): + """ + Parse a DRS condition + + :return: list of ``DrtExpression`` + """ + tok = self.token() + accum = self.handle_condition(tok, indices) + if accum is None: + raise UnexpectedTokenException(tok) + return accum + + def handle_drs(self, tok): + if tok == 'drs': + return self.parse_drs() + elif tok in ['merge', 'smerge']: + return self._handle_binary_expression(self._make_merge_expression)(None, []) + elif tok in ['alfa']: + return self._handle_alfa(self._make_merge_expression)(None, []) + + def handle_condition(self, tok, indices): + """ + Handle a DRS condition + + :param indices: list of int + :return: list of ``DrtExpression`` + """ + if tok == 'not': + return [self._handle_not()] + + if tok == 'or': + conds = [self._handle_binary_expression(self._make_or_expression)] + elif tok == 'imp': + conds = [self._handle_binary_expression(self._make_imp_expression)] + elif tok == 'eq': + conds = [self._handle_eq()] + elif tok == 'prop': + conds = [self._handle_prop()] + + elif tok == 'pred': + conds = [self._handle_pred()] + elif tok == 'named': + conds = [self._handle_named()] + elif tok == 'rel': + conds = [self._handle_rel()] + elif tok == 'timex': + conds = self._handle_timex() + elif tok == 'card': + conds = [self._handle_card()] + + elif tok == 'whq': + conds = [self._handle_whq()] + elif tok == 'duplex': + conds = [self._handle_duplex()] + + else: + conds = [] + + return sum( + [ + [cond(sent_index, word_indices) for cond in conds] + for sent_index, word_indices in self._sent_and_word_indices(indices) + ], + [], + ) + + def _handle_not(self): + self.assertToken(self.token(), '(') + drs = self.process_next_expression(None) + self.assertToken(self.token(), ')') + return BoxerNot(drs) + + def _handle_pred(self): + # pred(_G3943, dog, n, 0) + self.assertToken(self.token(), '(') + variable = self.parse_variable() + self.assertToken(self.token(), ',') + name = self.token() + self.assertToken(self.token(), ',') + pos = self.token() + self.assertToken(self.token(), ',') + sense = int(self.token()) + self.assertToken(self.token(), ')') + + def _handle_pred_f(sent_index, word_indices): + return BoxerPred( + self.discourse_id, sent_index, word_indices, variable, name, pos, sense + ) + + return _handle_pred_f + + def _handle_duplex(self): + # duplex(whq, drs(...), var, drs(...)) + self.assertToken(self.token(), '(') + # self.assertToken(self.token(), '[') + ans_types = [] + # while self.token(0) != ']': + # cat = self.token() + # self.assertToken(self.token(), ':') + # if cat == 'des': + # ans_types.append(self.token()) + # elif cat == 'num': + # ans_types.append('number') + # typ = self.token() + # if typ == 'cou': + # ans_types.append('count') + # else: + # ans_types.append(typ) + # else: + # ans_types.append(self.token()) + # self.token() #swallow the ']' + + self.assertToken(self.token(), 'whq') + self.assertToken(self.token(), ',') + d1 = self.process_next_expression(None) + self.assertToken(self.token(), ',') + ref = self.parse_variable() + self.assertToken(self.token(), ',') + d2 = self.process_next_expression(None) + self.assertToken(self.token(), ')') + return lambda sent_index, word_indices: BoxerWhq( + self.discourse_id, sent_index, word_indices, ans_types, d1, ref, d2 + ) + + def _handle_named(self): + # named(x0, john, per, 0) + self.assertToken(self.token(), '(') + variable = self.parse_variable() + self.assertToken(self.token(), ',') + name = self.token() + self.assertToken(self.token(), ',') + type = self.token() + self.assertToken(self.token(), ',') + sense = self.token() # as per boxer rev 2554 + self.assertToken(self.token(), ')') + return lambda sent_index, word_indices: BoxerNamed( + self.discourse_id, sent_index, word_indices, variable, name, type, sense + ) + + def _handle_rel(self): + # rel(_G3993, _G3943, agent, 0) + self.assertToken(self.token(), '(') + var1 = self.parse_variable() + self.assertToken(self.token(), ',') + var2 = self.parse_variable() + self.assertToken(self.token(), ',') + rel = self.token() + self.assertToken(self.token(), ',') + sense = int(self.token()) + self.assertToken(self.token(), ')') + return lambda sent_index, word_indices: BoxerRel( + self.discourse_id, sent_index, word_indices, var1, var2, rel, sense + ) + + def _handle_timex(self): + # timex(_G18322, date([]: (+), []:'XXXX', [1004]:'04', []:'XX')) + self.assertToken(self.token(), '(') + arg = self.parse_variable() + self.assertToken(self.token(), ',') + new_conds = self._handle_time_expression(arg) + self.assertToken(self.token(), ')') + return new_conds + + def _handle_time_expression(self, arg): + # date([]: (+), []:'XXXX', [1004]:'04', []:'XX') + tok = self.token() + self.assertToken(self.token(), '(') + if tok == 'date': + conds = self._handle_date(arg) + elif tok == 'time': + conds = self._handle_time(arg) + else: + return None + self.assertToken(self.token(), ')') + return [ + lambda sent_index, word_indices: BoxerPred( + self.discourse_id, sent_index, word_indices, arg, tok, 'n', 0 + ) + ] + [lambda sent_index, word_indices: cond for cond in conds] + + def _handle_date(self, arg): + # []: (+), []:'XXXX', [1004]:'04', []:'XX' + conds = [] + (sent_index, word_indices), = self._sent_and_word_indices( + self._parse_index_list() + ) + self.assertToken(self.token(), '(') + pol = self.token() + self.assertToken(self.token(), ')') + conds.append( + BoxerPred( + self.discourse_id, + sent_index, + word_indices, + arg, + 'date_pol_{0}'.format(pol), + 'a', + 0, + ) + ) + self.assertToken(self.token(), ',') + + (sent_index, word_indices), = self._sent_and_word_indices( + self._parse_index_list() + ) + year = self.token() + if year != 'XXXX': + year = year.replace(':', '_') + conds.append( + BoxerPred( + self.discourse_id, + sent_index, + word_indices, + arg, + 'date_year_{0}'.format(year), + 'a', + 0, + ) + ) + self.assertToken(self.token(), ',') + + (sent_index, word_indices), = self._sent_and_word_indices( + self._parse_index_list() + ) + month = self.token() + if month != 'XX': + conds.append( + BoxerPred( + self.discourse_id, + sent_index, + word_indices, + arg, + 'date_month_{0}'.format(month), + 'a', + 0, + ) + ) + self.assertToken(self.token(), ',') + + (sent_index, word_indices), = self._sent_and_word_indices( + self._parse_index_list() + ) + day = self.token() + if day != 'XX': + conds.append( + BoxerPred( + self.discourse_id, + sent_index, + word_indices, + arg, + 'date_day_{0}'.format(day), + 'a', + 0, + ) + ) + + return conds + + def _handle_time(self, arg): + # time([1018]:'18', []:'XX', []:'XX') + conds = [] + self._parse_index_list() + hour = self.token() + if hour != 'XX': + conds.append(self._make_atom('r_hour_2', arg, hour)) + self.assertToken(self.token(), ',') + + self._parse_index_list() + min = self.token() + if min != 'XX': + conds.append(self._make_atom('r_min_2', arg, min)) + self.assertToken(self.token(), ',') + + self._parse_index_list() + sec = self.token() + if sec != 'XX': + conds.append(self._make_atom('r_sec_2', arg, sec)) + + return conds + + def _handle_card(self): + # card(_G18535, 28, ge) + self.assertToken(self.token(), '(') + variable = self.parse_variable() + self.assertToken(self.token(), ',') + value = self.token() + self.assertToken(self.token(), ',') + type = self.token() + self.assertToken(self.token(), ')') + return lambda sent_index, word_indices: BoxerCard( + self.discourse_id, sent_index, word_indices, variable, value, type + ) + + def _handle_prop(self): + # prop(_G15949, drs(...)) + self.assertToken(self.token(), '(') + variable = self.parse_variable() + self.assertToken(self.token(), ',') + drs = self.process_next_expression(None) + self.assertToken(self.token(), ')') + return lambda sent_index, word_indices: BoxerProp( + self.discourse_id, sent_index, word_indices, variable, drs + ) + + def _parse_index_list(self): + # [1001,1002]: + indices = [] + self.assertToken(self.token(), '[') + while self.token(0) != ']': + indices.append(self.parse_index()) + if self.token(0) == ',': + self.token() # swallow ',' + self.token() # swallow ']' + self.assertToken(self.token(), ':') + return indices + + def parse_drs(self): + # drs([[1001]:_G3943], + # [[1002]:pred(_G3943, dog, n, 0)] + # ) + self.assertToken(self.token(), '(') + self.assertToken(self.token(), '[') + refs = set() + while self.token(0) != ']': + indices = self._parse_index_list() + refs.add(self.parse_variable()) + if self.token(0) == ',': + self.token() # swallow ',' + self.token() # swallow ']' + self.assertToken(self.token(), ',') + self.assertToken(self.token(), '[') + conds = [] + while self.token(0) != ']': + indices = self._parse_index_list() + conds.extend(self.parse_condition(indices)) + if self.token(0) == ',': + self.token() # swallow ',' + self.token() # swallow ']' + self.assertToken(self.token(), ')') + return BoxerDrs(list(refs), conds) + + def _handle_binary_expression(self, make_callback): + self.assertToken(self.token(), '(') + drs1 = self.process_next_expression(None) + self.assertToken(self.token(), ',') + drs2 = self.process_next_expression(None) + self.assertToken(self.token(), ')') + return lambda sent_index, word_indices: make_callback( + sent_index, word_indices, drs1, drs2 + ) + + def _handle_alfa(self, make_callback): + self.assertToken(self.token(), '(') + type = self.token() + self.assertToken(self.token(), ',') + drs1 = self.process_next_expression(None) + self.assertToken(self.token(), ',') + drs2 = self.process_next_expression(None) + self.assertToken(self.token(), ')') + return lambda sent_index, word_indices: make_callback( + sent_index, word_indices, drs1, drs2 + ) + + def _handle_eq(self): + self.assertToken(self.token(), '(') + var1 = self.parse_variable() + self.assertToken(self.token(), ',') + var2 = self.parse_variable() + self.assertToken(self.token(), ')') + return lambda sent_index, word_indices: BoxerEq( + self.discourse_id, sent_index, word_indices, var1, var2 + ) + + def _handle_whq(self): + self.assertToken(self.token(), '(') + self.assertToken(self.token(), '[') + ans_types = [] + while self.token(0) != ']': + cat = self.token() + self.assertToken(self.token(), ':') + if cat == 'des': + ans_types.append(self.token()) + elif cat == 'num': + ans_types.append('number') + typ = self.token() + if typ == 'cou': + ans_types.append('count') + else: + ans_types.append(typ) + else: + ans_types.append(self.token()) + self.token() # swallow the ']' + + self.assertToken(self.token(), ',') + d1 = self.process_next_expression(None) + self.assertToken(self.token(), ',') + ref = self.parse_variable() + self.assertToken(self.token(), ',') + d2 = self.process_next_expression(None) + self.assertToken(self.token(), ')') + return lambda sent_index, word_indices: BoxerWhq( + self.discourse_id, sent_index, word_indices, ans_types, d1, ref, d2 + ) + + def _make_merge_expression(self, sent_index, word_indices, drs1, drs2): + return BoxerDrs(drs1.refs + drs2.refs, drs1.conds + drs2.conds) + + def _make_or_expression(self, sent_index, word_indices, drs1, drs2): + return BoxerOr(self.discourse_id, sent_index, word_indices, drs1, drs2) + + def _make_imp_expression(self, sent_index, word_indices, drs1, drs2): + return BoxerDrs(drs1.refs, drs1.conds, drs2) + + def parse_variable(self): + var = self.token() + assert re.match('^[exps]\d+$', var), var + return var + + def parse_index(self): + return int(self.token()) + + def _sent_and_word_indices(self, indices): + """ + :return: list of (sent_index, word_indices) tuples + """ + sent_indices = set((i / 1000) - 1 for i in indices if i >= 0) + if sent_indices: + pairs = [] + for sent_index in sent_indices: + word_indices = [ + (i % 1000) - 1 for i in indices if sent_index == (i / 1000) - 1 + ] + pairs.append((sent_index, word_indices)) + return pairs + else: + word_indices = [(i % 1000) - 1 for i in indices] + return [(None, word_indices)] + + +class BoxerDrsParser(DrtParser): + """ + Reparse the str form of subclasses of ``AbstractBoxerDrs`` + """ + + def __init__(self, discourse_id=None): + DrtParser.__init__(self) + self.discourse_id = discourse_id + + def get_all_symbols(self): + return [ + DrtTokens.OPEN, + DrtTokens.CLOSE, + DrtTokens.COMMA, + DrtTokens.OPEN_BRACKET, + DrtTokens.CLOSE_BRACKET, + ] + + def attempt_adjuncts(self, expression, context): + return expression + + def handle(self, tok, context): + try: + # if tok == 'drs': + # self.assertNextToken(DrtTokens.OPEN) + # label = int(self.token()) + # self.assertNextToken(DrtTokens.COMMA) + # refs = list(map(int, self.handle_refs())) + # self.assertNextToken(DrtTokens.COMMA) + # conds = self.handle_conds(None) + # self.assertNextToken(DrtTokens.CLOSE) + # return BoxerDrs(label, refs, conds) + if tok == 'pred': + self.assertNextToken(DrtTokens.OPEN) + disc_id = ( + self.discourse_id if self.discourse_id is not None else self.token() + ) + self.assertNextToken(DrtTokens.COMMA) + sent_id = self.nullableIntToken() + self.assertNextToken(DrtTokens.COMMA) + word_ids = list(map(int, self.handle_refs())) + self.assertNextToken(DrtTokens.COMMA) + variable = int(self.token()) + self.assertNextToken(DrtTokens.COMMA) + name = self.token() + self.assertNextToken(DrtTokens.COMMA) + pos = self.token() + self.assertNextToken(DrtTokens.COMMA) + sense = int(self.token()) + self.assertNextToken(DrtTokens.CLOSE) + return BoxerPred(disc_id, sent_id, word_ids, variable, name, pos, sense) + elif tok == 'named': + self.assertNextToken(DrtTokens.OPEN) + disc_id = ( + self.discourse_id if self.discourse_id is not None else self.token() + ) + self.assertNextToken(DrtTokens.COMMA) + sent_id = int(self.token()) + self.assertNextToken(DrtTokens.COMMA) + word_ids = map(int, self.handle_refs()) + self.assertNextToken(DrtTokens.COMMA) + variable = int(self.token()) + self.assertNextToken(DrtTokens.COMMA) + name = self.token() + self.assertNextToken(DrtTokens.COMMA) + type = self.token() + self.assertNextToken(DrtTokens.COMMA) + sense = int(self.token()) + self.assertNextToken(DrtTokens.CLOSE) + return BoxerNamed( + disc_id, sent_id, word_ids, variable, name, type, sense + ) + elif tok == 'rel': + self.assertNextToken(DrtTokens.OPEN) + disc_id = ( + self.discourse_id if self.discourse_id is not None else self.token() + ) + self.assertNextToken(DrtTokens.COMMA) + sent_id = self.nullableIntToken() + self.assertNextToken(DrtTokens.COMMA) + word_ids = list(map(int, self.handle_refs())) + self.assertNextToken(DrtTokens.COMMA) + var1 = int(self.token()) + self.assertNextToken(DrtTokens.COMMA) + var2 = int(self.token()) + self.assertNextToken(DrtTokens.COMMA) + rel = self.token() + self.assertNextToken(DrtTokens.COMMA) + sense = int(self.token()) + self.assertNextToken(DrtTokens.CLOSE) + return BoxerRel(disc_id, sent_id, word_ids, var1, var2, rel, sense) + elif tok == 'prop': + self.assertNextToken(DrtTokens.OPEN) + disc_id = ( + self.discourse_id if self.discourse_id is not None else self.token() + ) + self.assertNextToken(DrtTokens.COMMA) + sent_id = int(self.token()) + self.assertNextToken(DrtTokens.COMMA) + word_ids = list(map(int, self.handle_refs())) + self.assertNextToken(DrtTokens.COMMA) + variable = int(self.token()) + self.assertNextToken(DrtTokens.COMMA) + drs = self.process_next_expression(None) + self.assertNextToken(DrtTokens.CLOSE) + return BoxerProp(disc_id, sent_id, word_ids, variable, drs) + elif tok == 'not': + self.assertNextToken(DrtTokens.OPEN) + drs = self.process_next_expression(None) + self.assertNextToken(DrtTokens.CLOSE) + return BoxerNot(drs) + elif tok == 'imp': + self.assertNextToken(DrtTokens.OPEN) + drs1 = self.process_next_expression(None) + self.assertNextToken(DrtTokens.COMMA) + drs2 = self.process_next_expression(None) + self.assertNextToken(DrtTokens.CLOSE) + return BoxerDrs(drs1.refs, drs1.conds, drs2) + elif tok == 'or': + self.assertNextToken(DrtTokens.OPEN) + disc_id = ( + self.discourse_id if self.discourse_id is not None else self.token() + ) + self.assertNextToken(DrtTokens.COMMA) + sent_id = self.nullableIntToken() + self.assertNextToken(DrtTokens.COMMA) + word_ids = map(int, self.handle_refs()) + self.assertNextToken(DrtTokens.COMMA) + drs1 = self.process_next_expression(None) + self.assertNextToken(DrtTokens.COMMA) + drs2 = self.process_next_expression(None) + self.assertNextToken(DrtTokens.CLOSE) + return BoxerOr(disc_id, sent_id, word_ids, drs1, drs2) + elif tok == 'eq': + self.assertNextToken(DrtTokens.OPEN) + disc_id = ( + self.discourse_id if self.discourse_id is not None else self.token() + ) + self.assertNextToken(DrtTokens.COMMA) + sent_id = self.nullableIntToken() + self.assertNextToken(DrtTokens.COMMA) + word_ids = list(map(int, self.handle_refs())) + self.assertNextToken(DrtTokens.COMMA) + var1 = int(self.token()) + self.assertNextToken(DrtTokens.COMMA) + var2 = int(self.token()) + self.assertNextToken(DrtTokens.CLOSE) + return BoxerEq(disc_id, sent_id, word_ids, var1, var2) + elif tok == 'card': + self.assertNextToken(DrtTokens.OPEN) + disc_id = ( + self.discourse_id if self.discourse_id is not None else self.token() + ) + self.assertNextToken(DrtTokens.COMMA) + sent_id = self.nullableIntToken() + self.assertNextToken(DrtTokens.COMMA) + word_ids = map(int, self.handle_refs()) + self.assertNextToken(DrtTokens.COMMA) + var = int(self.token()) + self.assertNextToken(DrtTokens.COMMA) + value = self.token() + self.assertNextToken(DrtTokens.COMMA) + type = self.token() + self.assertNextToken(DrtTokens.CLOSE) + return BoxerCard(disc_id, sent_id, word_ids, var, value, type) + elif tok == 'whq': + self.assertNextToken(DrtTokens.OPEN) + disc_id = ( + self.discourse_id if self.discourse_id is not None else self.token() + ) + self.assertNextToken(DrtTokens.COMMA) + sent_id = self.nullableIntToken() + self.assertNextToken(DrtTokens.COMMA) + word_ids = list(map(int, self.handle_refs())) + self.assertNextToken(DrtTokens.COMMA) + ans_types = self.handle_refs() + self.assertNextToken(DrtTokens.COMMA) + drs1 = self.process_next_expression(None) + self.assertNextToken(DrtTokens.COMMA) + var = int(self.token()) + self.assertNextToken(DrtTokens.COMMA) + drs2 = self.process_next_expression(None) + self.assertNextToken(DrtTokens.CLOSE) + return BoxerWhq(disc_id, sent_id, word_ids, ans_types, drs1, var, drs2) + except Exception as e: + raise LogicalExpressionException(self._currentIndex, str(e)) + assert False, repr(tok) + + def nullableIntToken(self): + t = self.token() + return int(t) if t != 'None' else None + + def get_next_token_variable(self, description): + try: + return self.token() + except ExpectedMoreTokensException as e: + raise ExpectedMoreTokensException(e.index, 'Variable expected.') + + +class AbstractBoxerDrs(object): + def variables(self): + """ + :return: (set, set, set) + """ + variables, events, propositions = self._variables() + return (variables - (events | propositions), events, propositions - events) + + def variable_types(self): + vartypes = {} + for t, vars in zip(('z', 'e', 'p'), self.variables()): + for v in vars: + vartypes[v] = t + return vartypes + + def _variables(self): + """ + :return: (set, set, set) + """ + return (set(), set(), set()) + + def atoms(self): + return set() + + def clean(self): + return self + + def _clean_name(self, name): + return name.replace('-', '_').replace("'", "_") + + def renumber_sentences(self, f): + return self + + def __hash__(self): + return hash("{0}".format(self)) + + +@python_2_unicode_compatible +class BoxerDrs(AbstractBoxerDrs): + def __init__(self, refs, conds, consequent=None): + AbstractBoxerDrs.__init__(self) + self.refs = refs + self.conds = conds + self.consequent = consequent + + def _variables(self): + variables = (set(), set(), set()) + for cond in self.conds: + for s, v in zip(variables, cond._variables()): + s.update(v) + if self.consequent is not None: + for s, v in zip(variables, self.consequent._variables()): + s.update(v) + return variables + + def atoms(self): + atoms = reduce(operator.or_, (cond.atoms() for cond in self.conds), set()) + if self.consequent is not None: + atoms.update(self.consequent.atoms()) + return atoms + + def clean(self): + consequent = self.consequent.clean() if self.consequent else None + return BoxerDrs(self.refs, [c.clean() for c in self.conds], consequent) + + def renumber_sentences(self, f): + consequent = self.consequent.renumber_sentences(f) if self.consequent else None + return BoxerDrs( + self.refs, [c.renumber_sentences(f) for c in self.conds], consequent + ) + + def __repr__(self): + s = 'drs([%s], [%s])' % ( + ', '.join("%s" % r for r in self.refs), + ', '.join("%s" % c for c in self.conds), + ) + if self.consequent is not None: + s = 'imp(%s, %s)' % (s, self.consequent) + return s + + def __eq__(self, other): + return ( + self.__class__ == other.__class__ + and self.refs == other.refs + and len(self.conds) == len(other.conds) + and reduce( + operator.and_, (c1 == c2 for c1, c2 in zip(self.conds, other.conds)) + ) + and self.consequent == other.consequent + ) + + def __ne__(self, other): + return not self == other + + __hash__ = AbstractBoxerDrs.__hash__ + + +@python_2_unicode_compatible +class BoxerNot(AbstractBoxerDrs): + def __init__(self, drs): + AbstractBoxerDrs.__init__(self) + self.drs = drs + + def _variables(self): + return self.drs._variables() + + def atoms(self): + return self.drs.atoms() + + def clean(self): + return BoxerNot(self.drs.clean()) + + def renumber_sentences(self, f): + return BoxerNot(self.drs.renumber_sentences(f)) + + def __repr__(self): + return 'not(%s)' % (self.drs) + + def __eq__(self, other): + return self.__class__ == other.__class__ and self.drs == other.drs + + def __ne__(self, other): + return not self == other + + __hash__ = AbstractBoxerDrs.__hash__ + + +@python_2_unicode_compatible +class BoxerIndexed(AbstractBoxerDrs): + def __init__(self, discourse_id, sent_index, word_indices): + AbstractBoxerDrs.__init__(self) + self.discourse_id = discourse_id + self.sent_index = sent_index + self.word_indices = word_indices + + def atoms(self): + return set([self]) + + def __eq__(self, other): + return ( + self.__class__ == other.__class__ + and self.discourse_id == other.discourse_id + and self.sent_index == other.sent_index + and self.word_indices == other.word_indices + and reduce(operator.and_, (s == o for s, o in zip(self, other))) + ) + + def __ne__(self, other): + return not self == other + + __hash__ = AbstractBoxerDrs.__hash__ + + def __repr__(self): + s = '%s(%s, %s, [%s]' % ( + self._pred(), + self.discourse_id, + self.sent_index, + ', '.join("%s" % wi for wi in self.word_indices), + ) + for v in self: + s += ', %s' % v + return s + ')' + + +class BoxerPred(BoxerIndexed): + def __init__(self, discourse_id, sent_index, word_indices, var, name, pos, sense): + BoxerIndexed.__init__(self, discourse_id, sent_index, word_indices) + self.var = var + self.name = name + self.pos = pos + self.sense = sense + + def _variables(self): + return (set([self.var]), set(), set()) + + def change_var(self, var): + return BoxerPred( + self.discourse_id, + self.sent_index, + self.word_indices, + var, + self.name, + self.pos, + self.sense, + ) + + def clean(self): + return BoxerPred( + self.discourse_id, + self.sent_index, + self.word_indices, + self.var, + self._clean_name(self.name), + self.pos, + self.sense, + ) + + def renumber_sentences(self, f): + new_sent_index = f(self.sent_index) + return BoxerPred( + self.discourse_id, + new_sent_index, + self.word_indices, + self.var, + self.name, + self.pos, + self.sense, + ) + + def __iter__(self): + return iter((self.var, self.name, self.pos, self.sense)) + + def _pred(self): + return 'pred' + + +class BoxerNamed(BoxerIndexed): + def __init__(self, discourse_id, sent_index, word_indices, var, name, type, sense): + BoxerIndexed.__init__(self, discourse_id, sent_index, word_indices) + self.var = var + self.name = name + self.type = type + self.sense = sense + + def _variables(self): + return (set([self.var]), set(), set()) + + def change_var(self, var): + return BoxerNamed( + self.discourse_id, + self.sent_index, + self.word_indices, + var, + self.name, + self.type, + self.sense, + ) + + def clean(self): + return BoxerNamed( + self.discourse_id, + self.sent_index, + self.word_indices, + self.var, + self._clean_name(self.name), + self.type, + self.sense, + ) + + def renumber_sentences(self, f): + return BoxerNamed( + self.discourse_id, + f(self.sent_index), + self.word_indices, + self.var, + self.name, + self.type, + self.sense, + ) + + def __iter__(self): + return iter((self.var, self.name, self.type, self.sense)) + + def _pred(self): + return 'named' + + +class BoxerRel(BoxerIndexed): + def __init__(self, discourse_id, sent_index, word_indices, var1, var2, rel, sense): + BoxerIndexed.__init__(self, discourse_id, sent_index, word_indices) + self.var1 = var1 + self.var2 = var2 + self.rel = rel + self.sense = sense + + def _variables(self): + return (set([self.var1, self.var2]), set(), set()) + + def clean(self): + return BoxerRel( + self.discourse_id, + self.sent_index, + self.word_indices, + self.var1, + self.var2, + self._clean_name(self.rel), + self.sense, + ) + + def renumber_sentences(self, f): + return BoxerRel( + self.discourse_id, + f(self.sent_index), + self.word_indices, + self.var1, + self.var2, + self.rel, + self.sense, + ) + + def __iter__(self): + return iter((self.var1, self.var2, self.rel, self.sense)) + + def _pred(self): + return 'rel' + + +class BoxerProp(BoxerIndexed): + def __init__(self, discourse_id, sent_index, word_indices, var, drs): + BoxerIndexed.__init__(self, discourse_id, sent_index, word_indices) + self.var = var + self.drs = drs + + def _variables(self): + return tuple( + map(operator.or_, (set(), set(), set([self.var])), self.drs._variables()) + ) + + def referenced_labels(self): + return set([self.drs]) + + def atoms(self): + return self.drs.atoms() + + def clean(self): + return BoxerProp( + self.discourse_id, + self.sent_index, + self.word_indices, + self.var, + self.drs.clean(), + ) + + def renumber_sentences(self, f): + return BoxerProp( + self.discourse_id, + f(self.sent_index), + self.word_indices, + self.var, + self.drs.renumber_sentences(f), + ) + + def __iter__(self): + return iter((self.var, self.drs)) + + def _pred(self): + return 'prop' + + +class BoxerEq(BoxerIndexed): + def __init__(self, discourse_id, sent_index, word_indices, var1, var2): + BoxerIndexed.__init__(self, discourse_id, sent_index, word_indices) + self.var1 = var1 + self.var2 = var2 + + def _variables(self): + return (set([self.var1, self.var2]), set(), set()) + + def atoms(self): + return set() + + def renumber_sentences(self, f): + return BoxerEq( + self.discourse_id, + f(self.sent_index), + self.word_indices, + self.var1, + self.var2, + ) + + def __iter__(self): + return iter((self.var1, self.var2)) + + def _pred(self): + return 'eq' + + +class BoxerCard(BoxerIndexed): + def __init__(self, discourse_id, sent_index, word_indices, var, value, type): + BoxerIndexed.__init__(self, discourse_id, sent_index, word_indices) + self.var = var + self.value = value + self.type = type + + def _variables(self): + return (set([self.var]), set(), set()) + + def renumber_sentences(self, f): + return BoxerCard( + self.discourse_id, + f(self.sent_index), + self.word_indices, + self.var, + self.value, + self.type, + ) + + def __iter__(self): + return iter((self.var, self.value, self.type)) + + def _pred(self): + return 'card' + + +class BoxerOr(BoxerIndexed): + def __init__(self, discourse_id, sent_index, word_indices, drs1, drs2): + BoxerIndexed.__init__(self, discourse_id, sent_index, word_indices) + self.drs1 = drs1 + self.drs2 = drs2 + + def _variables(self): + return tuple(map(operator.or_, self.drs1._variables(), self.drs2._variables())) + + def atoms(self): + return self.drs1.atoms() | self.drs2.atoms() + + def clean(self): + return BoxerOr( + self.discourse_id, + self.sent_index, + self.word_indices, + self.drs1.clean(), + self.drs2.clean(), + ) + + def renumber_sentences(self, f): + return BoxerOr( + self.discourse_id, + f(self.sent_index), + self.word_indices, + self.drs1, + self.drs2, + ) + + def __iter__(self): + return iter((self.drs1, self.drs2)) + + def _pred(self): + return 'or' + + +class BoxerWhq(BoxerIndexed): + def __init__( + self, discourse_id, sent_index, word_indices, ans_types, drs1, variable, drs2 + ): + BoxerIndexed.__init__(self, discourse_id, sent_index, word_indices) + self.ans_types = ans_types + self.drs1 = drs1 + self.variable = variable + self.drs2 = drs2 + + def _variables(self): + return tuple( + map( + operator.or_, + (set([self.variable]), set(), set()), + self.drs1._variables(), + self.drs2._variables(), + ) + ) + + def atoms(self): + return self.drs1.atoms() | self.drs2.atoms() + + def clean(self): + return BoxerWhq( + self.discourse_id, + self.sent_index, + self.word_indices, + self.ans_types, + self.drs1.clean(), + self.variable, + self.drs2.clean(), + ) + + def renumber_sentences(self, f): + return BoxerWhq( + self.discourse_id, + f(self.sent_index), + self.word_indices, + self.ans_types, + self.drs1, + self.variable, + self.drs2, + ) + + def __iter__(self): + return iter( + ('[' + ','.join(self.ans_types) + ']', self.drs1, self.variable, self.drs2) + ) + + def _pred(self): + return 'whq' + + +class PassthroughBoxerDrsInterpreter(object): + def interpret(self, ex): + return ex + + +class NltkDrtBoxerDrsInterpreter(object): + def __init__(self, occur_index=False): + self._occur_index = occur_index + + def interpret(self, ex): + """ + :param ex: ``AbstractBoxerDrs`` + :return: ``DrtExpression`` + """ + if isinstance(ex, BoxerDrs): + drs = DRS( + [Variable(r) for r in ex.refs], list(map(self.interpret, ex.conds)) + ) + if ex.consequent is not None: + drs.consequent = self.interpret(ex.consequent) + return drs + elif isinstance(ex, BoxerNot): + return DrtNegatedExpression(self.interpret(ex.drs)) + elif isinstance(ex, BoxerPred): + pred = self._add_occur_indexing('%s_%s' % (ex.pos, ex.name), ex) + return self._make_atom(pred, ex.var) + elif isinstance(ex, BoxerNamed): + pred = self._add_occur_indexing('ne_%s_%s' % (ex.type, ex.name), ex) + return self._make_atom(pred, ex.var) + elif isinstance(ex, BoxerRel): + pred = self._add_occur_indexing('%s' % (ex.rel), ex) + return self._make_atom(pred, ex.var1, ex.var2) + elif isinstance(ex, BoxerProp): + return DrtProposition(Variable(ex.var), self.interpret(ex.drs)) + elif isinstance(ex, BoxerEq): + return DrtEqualityExpression( + DrtVariableExpression(Variable(ex.var1)), + DrtVariableExpression(Variable(ex.var2)), + ) + elif isinstance(ex, BoxerCard): + pred = self._add_occur_indexing('card_%s_%s' % (ex.type, ex.value), ex) + return self._make_atom(pred, ex.var) + elif isinstance(ex, BoxerOr): + return DrtOrExpression(self.interpret(ex.drs1), self.interpret(ex.drs2)) + elif isinstance(ex, BoxerWhq): + drs1 = self.interpret(ex.drs1) + drs2 = self.interpret(ex.drs2) + return DRS(drs1.refs + drs2.refs, drs1.conds + drs2.conds) + assert False, '%s: %s' % (ex.__class__.__name__, ex) + + def _make_atom(self, pred, *args): + accum = DrtVariableExpression(Variable(pred)) + for arg in args: + accum = DrtApplicationExpression( + accum, DrtVariableExpression(Variable(arg)) + ) + return accum + + def _add_occur_indexing(self, base, ex): + if self._occur_index and ex.sent_index is not None: + if ex.discourse_id: + base += '_%s' % ex.discourse_id + base += '_s%s' % ex.sent_index + base += '_w%s' % sorted(ex.word_indices)[0] + return base + + +class UnparseableInputException(Exception): + pass + + +if __name__ == '__main__': + opts = OptionParser("usage: %prog TEXT [options]") + opts.add_option( + "--verbose", + "-v", + help="display verbose logs", + action="store_true", + default=False, + dest="verbose", + ) + opts.add_option( + "--fol", "-f", help="output FOL", action="store_true", default=False, dest="fol" + ) + opts.add_option( + "--question", + "-q", + help="input is a question", + action="store_true", + default=False, + dest="question", + ) + opts.add_option( + "--occur", + "-o", + help="occurrence index", + action="store_true", + default=False, + dest="occur_index", + ) + (options, args) = opts.parse_args() + + if len(args) != 1: + opts.error("incorrect number of arguments") + + interpreter = NltkDrtBoxerDrsInterpreter(occur_index=options.occur_index) + drs = Boxer(interpreter).interpret_multi( + args[0].split(r'\n'), question=options.question, verbose=options.verbose + ) + if drs is None: + print(None) + else: + drs = drs.simplify().eliminate_equality() + if options.fol: + print(drs.fol().normalize()) + else: + drs.pretty_print() diff --git a/venv.bak/lib/python3.7/site-packages/nltk/sem/chat80.py b/venv.bak/lib/python3.7/site-packages/nltk/sem/chat80.py new file mode 100644 index 0000000..9500b35 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/sem/chat80.py @@ -0,0 +1,863 @@ +# Natural Language Toolkit: Chat-80 KB Reader +# See http://www.w3.org/TR/swbp-skos-core-guide/ +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Ewan Klein , +# URL: +# For license information, see LICENSE.TXT + +""" +Overview +======== + +Chat-80 was a natural language system which allowed the user to +interrogate a Prolog knowledge base in the domain of world +geography. It was developed in the early '80s by Warren and Pereira; see +``http://www.aclweb.org/anthology/J82-3002.pdf`` for a description and +``http://www.cis.upenn.edu/~pereira/oldies.html`` for the source +files. + +This module contains functions to extract data from the Chat-80 +relation files ('the world database'), and convert then into a format +that can be incorporated in the FOL models of +``nltk.sem.evaluate``. The code assumes that the Prolog +input files are available in the NLTK corpora directory. + +The Chat-80 World Database consists of the following files:: + + world0.pl + rivers.pl + cities.pl + countries.pl + contain.pl + borders.pl + +This module uses a slightly modified version of ``world0.pl``, in which +a set of Prolog rules have been omitted. The modified file is named +``world1.pl``. Currently, the file ``rivers.pl`` is not read in, since +it uses a list rather than a string in the second field. + +Reading Chat-80 Files +===================== + +Chat-80 relations are like tables in a relational database. The +relation acts as the name of the table; the first argument acts as the +'primary key'; and subsequent arguments are further fields in the +table. In general, the name of the table provides a label for a unary +predicate whose extension is all the primary keys. For example, +relations in ``cities.pl`` are of the following form:: + + 'city(athens,greece,1368).' + +Here, ``'athens'`` is the key, and will be mapped to a member of the +unary predicate *city*. + +The fields in the table are mapped to binary predicates. The first +argument of the predicate is the primary key, while the second +argument is the data in the relevant field. Thus, in the above +example, the third field is mapped to the binary predicate +*population_of*, whose extension is a set of pairs such as +``'(athens, 1368)'``. + +An exception to this general framework is required by the relations in +the files ``borders.pl`` and ``contains.pl``. These contain facts of the +following form:: + + 'borders(albania,greece).' + + 'contains0(africa,central_africa).' + +We do not want to form a unary concept out the element in +the first field of these records, and we want the label of the binary +relation just to be ``'border'``/``'contain'`` respectively. + +In order to drive the extraction process, we use 'relation metadata bundles' +which are Python dictionaries such as the following:: + + city = {'label': 'city', + 'closures': [], + 'schema': ['city', 'country', 'population'], + 'filename': 'cities.pl'} + +According to this, the file ``city['filename']`` contains a list of +relational tuples (or more accurately, the corresponding strings in +Prolog form) whose predicate symbol is ``city['label']`` and whose +relational schema is ``city['schema']``. The notion of a ``closure`` is +discussed in the next section. + +Concepts +======== +In order to encapsulate the results of the extraction, a class of +``Concept`` objects is introduced. A ``Concept`` object has a number of +attributes, in particular a ``prefLabel`` and ``extension``, which make +it easier to inspect the output of the extraction. In addition, the +``extension`` can be further processed: in the case of the ``'border'`` +relation, we check that the relation is symmetric, and in the case +of the ``'contain'`` relation, we carry out the transitive +closure. The closure properties associated with a concept is +indicated in the relation metadata, as indicated earlier. + +The ``extension`` of a ``Concept`` object is then incorporated into a +``Valuation`` object. + +Persistence +=========== +The functions ``val_dump`` and ``val_load`` are provided to allow a +valuation to be stored in a persistent database and re-loaded, rather +than having to be re-computed each time. + +Individuals and Lexical Items +============================= +As well as deriving relations from the Chat-80 data, we also create a +set of individual constants, one for each entity in the domain. The +individual constants are string-identical to the entities. For +example, given a data item such as ``'zloty'``, we add to the valuation +a pair ``('zloty', 'zloty')``. In order to parse English sentences that +refer to these entities, we also create a lexical item such as the +following for each individual constant:: + + PropN[num=sg, sem=<\P.(P zloty)>] -> 'Zloty' + +The set of rules is written to the file ``chat_pnames.cfg`` in the +current directory. + +""" +from __future__ import print_function, unicode_literals + +import re +import shelve +import os +import sys + +from six import string_types + +import nltk.data +from nltk.compat import python_2_unicode_compatible + +########################################################################### +# Chat-80 relation metadata bundles needed to build the valuation +########################################################################### + +borders = { + 'rel_name': 'borders', + 'closures': ['symmetric'], + 'schema': ['region', 'border'], + 'filename': 'borders.pl', +} + +contains = { + 'rel_name': 'contains0', + 'closures': ['transitive'], + 'schema': ['region', 'contain'], + 'filename': 'contain.pl', +} + +city = { + 'rel_name': 'city', + 'closures': [], + 'schema': ['city', 'country', 'population'], + 'filename': 'cities.pl', +} + +country = { + 'rel_name': 'country', + 'closures': [], + 'schema': [ + 'country', + 'region', + 'latitude', + 'longitude', + 'area', + 'population', + 'capital', + 'currency', + ], + 'filename': 'countries.pl', +} + +circle_of_lat = { + 'rel_name': 'circle_of_latitude', + 'closures': [], + 'schema': ['circle_of_latitude', 'degrees'], + 'filename': 'world1.pl', +} + +circle_of_long = { + 'rel_name': 'circle_of_longitude', + 'closures': [], + 'schema': ['circle_of_longitude', 'degrees'], + 'filename': 'world1.pl', +} + +continent = { + 'rel_name': 'continent', + 'closures': [], + 'schema': ['continent'], + 'filename': 'world1.pl', +} + +region = { + 'rel_name': 'in_continent', + 'closures': [], + 'schema': ['region', 'continent'], + 'filename': 'world1.pl', +} + +ocean = { + 'rel_name': 'ocean', + 'closures': [], + 'schema': ['ocean'], + 'filename': 'world1.pl', +} + +sea = {'rel_name': 'sea', 'closures': [], 'schema': ['sea'], 'filename': 'world1.pl'} + + +items = [ + 'borders', + 'contains', + 'city', + 'country', + 'circle_of_lat', + 'circle_of_long', + 'continent', + 'region', + 'ocean', + 'sea', +] +items = tuple(sorted(items)) + +item_metadata = { + 'borders': borders, + 'contains': contains, + 'city': city, + 'country': country, + 'circle_of_lat': circle_of_lat, + 'circle_of_long': circle_of_long, + 'continent': continent, + 'region': region, + 'ocean': ocean, + 'sea': sea, +} + +rels = item_metadata.values() + +not_unary = ['borders.pl', 'contain.pl'] + +########################################################################### + + +@python_2_unicode_compatible +class Concept(object): + """ + A Concept class, loosely based on SKOS + (http://www.w3.org/TR/swbp-skos-core-guide/). + """ + + def __init__(self, prefLabel, arity, altLabels=[], closures=[], extension=set()): + """ + :param prefLabel: the preferred label for the concept + :type prefLabel: str + :param arity: the arity of the concept + :type arity: int + @keyword altLabels: other (related) labels + :type altLabels: list + @keyword closures: closure properties of the extension \ + (list items can be ``symmetric``, ``reflexive``, ``transitive``) + :type closures: list + @keyword extension: the extensional value of the concept + :type extension: set + """ + self.prefLabel = prefLabel + self.arity = arity + self.altLabels = altLabels + self.closures = closures + # keep _extension internally as a set + self._extension = extension + # public access is via a list (for slicing) + self.extension = sorted(list(extension)) + + def __str__(self): + # _extension = '' + # for element in sorted(self.extension): + # if isinstance(element, tuple): + # element = '(%s, %s)' % (element) + # _extension += element + ', ' + # _extension = _extension[:-1] + + return "Label = '%s'\nArity = %s\nExtension = %s" % ( + self.prefLabel, + self.arity, + self.extension, + ) + + def __repr__(self): + return "Concept('%s')" % self.prefLabel + + def augment(self, data): + """ + Add more data to the ``Concept``'s extension set. + + :param data: a new semantic value + :type data: string or pair of strings + :rtype: set + + """ + self._extension.add(data) + self.extension = sorted(list(self._extension)) + return self._extension + + def _make_graph(self, s): + """ + Convert a set of pairs into an adjacency linked list encoding of a graph. + """ + g = {} + for (x, y) in s: + if x in g: + g[x].append(y) + else: + g[x] = [y] + return g + + def _transclose(self, g): + """ + Compute the transitive closure of a graph represented as a linked list. + """ + for x in g: + for adjacent in g[x]: + # check that adjacent is a key + if adjacent in g: + for y in g[adjacent]: + if y not in g[x]: + g[x].append(y) + return g + + def _make_pairs(self, g): + """ + Convert an adjacency linked list back into a set of pairs. + """ + pairs = [] + for node in g: + for adjacent in g[node]: + pairs.append((node, adjacent)) + return set(pairs) + + def close(self): + """ + Close a binary relation in the ``Concept``'s extension set. + + :return: a new extension for the ``Concept`` in which the + relation is closed under a given property + """ + from nltk.sem import is_rel + + assert is_rel(self._extension) + if 'symmetric' in self.closures: + pairs = [] + for (x, y) in self._extension: + pairs.append((y, x)) + sym = set(pairs) + self._extension = self._extension.union(sym) + if 'transitive' in self.closures: + all = self._make_graph(self._extension) + closed = self._transclose(all) + trans = self._make_pairs(closed) + # print sorted(trans) + self._extension = self._extension.union(trans) + self.extension = sorted(list(self._extension)) + + +def clause2concepts(filename, rel_name, schema, closures=[]): + """ + Convert a file of Prolog clauses into a list of ``Concept`` objects. + + :param filename: filename containing the relations + :type filename: str + :param rel_name: name of the relation + :type rel_name: str + :param schema: the schema used in a set of relational tuples + :type schema: list + :param closures: closure properties for the extension of the concept + :type closures: list + :return: a list of ``Concept`` objects + :rtype: list + """ + concepts = [] + # position of the subject of a binary relation + subj = 0 + # label of the 'primary key' + pkey = schema[0] + # fields other than the primary key + fields = schema[1:] + + # convert a file into a list of lists + records = _str2records(filename, rel_name) + + # add a unary concept corresponding to the set of entities + # in the primary key position + # relations in 'not_unary' are more like ordinary binary relations + if not filename in not_unary: + concepts.append(unary_concept(pkey, subj, records)) + + # add a binary concept for each non-key field + for field in fields: + obj = schema.index(field) + concepts.append(binary_concept(field, closures, subj, obj, records)) + + return concepts + + +def cities2table(filename, rel_name, dbname, verbose=False, setup=False): + """ + Convert a file of Prolog clauses into a database table. + + This is not generic, since it doesn't allow arbitrary + schemas to be set as a parameter. + + Intended usage:: + + cities2table('cities.pl', 'city', 'city.db', verbose=True, setup=True) + + :param filename: filename containing the relations + :type filename: str + :param rel_name: name of the relation + :type rel_name: str + :param dbname: filename of persistent store + :type schema: str + """ + import sqlite3 + + records = _str2records(filename, rel_name) + connection = sqlite3.connect(dbname) + cur = connection.cursor() + if setup: + cur.execute( + '''CREATE TABLE city_table + (City text, Country text, Population int)''' + ) + + table_name = "city_table" + for t in records: + cur.execute('insert into %s values (?,?,?)' % table_name, t) + if verbose: + print("inserting values into %s: " % table_name, t) + connection.commit() + if verbose: + print("Committing update to %s" % dbname) + cur.close() + + +def sql_query(dbname, query): + """ + Execute an SQL query over a database. + :param dbname: filename of persistent store + :type schema: str + :param query: SQL query + :type rel_name: str + """ + import sqlite3 + + try: + path = nltk.data.find(dbname) + connection = sqlite3.connect(str(path)) + cur = connection.cursor() + return cur.execute(query) + except (ValueError, sqlite3.OperationalError): + import warnings + + warnings.warn( + "Make sure the database file %s is installed and uncompressed." % dbname + ) + raise + + +def _str2records(filename, rel): + """ + Read a file into memory and convert each relation clause into a list. + """ + recs = [] + contents = nltk.data.load("corpora/chat80/%s" % filename, format="text") + for line in contents.splitlines(): + if line.startswith(rel): + line = re.sub(rel + r'\(', '', line) + line = re.sub(r'\)\.$', '', line) + record = line.split(',') + recs.append(record) + return recs + + +def unary_concept(label, subj, records): + """ + Make a unary concept out of the primary key in a record. + + A record is a list of entities in some relation, such as + ``['france', 'paris']``, where ``'france'`` is acting as the primary + key. + + :param label: the preferred label for the concept + :type label: string + :param subj: position in the record of the subject of the predicate + :type subj: int + :param records: a list of records + :type records: list of lists + :return: ``Concept`` of arity 1 + :rtype: Concept + """ + c = Concept(label, arity=1, extension=set()) + for record in records: + c.augment(record[subj]) + return c + + +def binary_concept(label, closures, subj, obj, records): + """ + Make a binary concept out of the primary key and another field in a record. + + A record is a list of entities in some relation, such as + ``['france', 'paris']``, where ``'france'`` is acting as the primary + key, and ``'paris'`` stands in the ``'capital_of'`` relation to + ``'france'``. + + More generally, given a record such as ``['a', 'b', 'c']``, where + label is bound to ``'B'``, and ``obj`` bound to 1, the derived + binary concept will have label ``'B_of'``, and its extension will + be a set of pairs such as ``('a', 'b')``. + + + :param label: the base part of the preferred label for the concept + :type label: str + :param closures: closure properties for the extension of the concept + :type closures: list + :param subj: position in the record of the subject of the predicate + :type subj: int + :param obj: position in the record of the object of the predicate + :type obj: int + :param records: a list of records + :type records: list of lists + :return: ``Concept`` of arity 2 + :rtype: Concept + """ + if not label == 'border' and not label == 'contain': + label = label + '_of' + c = Concept(label, arity=2, closures=closures, extension=set()) + for record in records: + c.augment((record[subj], record[obj])) + # close the concept's extension according to the properties in closures + c.close() + return c + + +def process_bundle(rels): + """ + Given a list of relation metadata bundles, make a corresponding + dictionary of concepts, indexed by the relation name. + + :param rels: bundle of metadata needed for constructing a concept + :type rels: list(dict) + :return: a dictionary of concepts, indexed by the relation name. + :rtype: dict(str): Concept + """ + concepts = {} + for rel in rels: + rel_name = rel['rel_name'] + closures = rel['closures'] + schema = rel['schema'] + filename = rel['filename'] + + concept_list = clause2concepts(filename, rel_name, schema, closures) + for c in concept_list: + label = c.prefLabel + if label in concepts: + for data in c.extension: + concepts[label].augment(data) + concepts[label].close() + else: + concepts[label] = c + return concepts + + +def make_valuation(concepts, read=False, lexicon=False): + """ + Convert a list of ``Concept`` objects into a list of (label, extension) pairs; + optionally create a ``Valuation`` object. + + :param concepts: concepts + :type concepts: list(Concept) + :param read: if ``True``, ``(symbol, set)`` pairs are read into a ``Valuation`` + :type read: bool + :rtype: list or Valuation + """ + vals = [] + + for c in concepts: + vals.append((c.prefLabel, c.extension)) + if lexicon: + read = True + if read: + from nltk.sem import Valuation + + val = Valuation({}) + val.update(vals) + # add labels for individuals + val = label_indivs(val, lexicon=lexicon) + return val + else: + return vals + + +def val_dump(rels, db): + """ + Make a ``Valuation`` from a list of relation metadata bundles and dump to + persistent database. + + :param rels: bundle of metadata needed for constructing a concept + :type rels: list of dict + :param db: name of file to which data is written. + The suffix '.db' will be automatically appended. + :type db: str + """ + concepts = process_bundle(rels).values() + valuation = make_valuation(concepts, read=True) + db_out = shelve.open(db, 'n') + + db_out.update(valuation) + + db_out.close() + + +def val_load(db): + """ + Load a ``Valuation`` from a persistent database. + + :param db: name of file from which data is read. + The suffix '.db' should be omitted from the name. + :type db: str + """ + dbname = db + ".db" + + if not os.access(dbname, os.R_OK): + sys.exit("Cannot read file: %s" % dbname) + else: + db_in = shelve.open(db) + from nltk.sem import Valuation + + val = Valuation(db_in) + # val.read(db_in.items()) + return val + + +# def alpha(str): +# """ +# Utility to filter out non-alphabetic constants. + +#:param str: candidate constant +#:type str: string +#:rtype: bool +# """ +# try: +# int(str) +# return False +# except ValueError: +## some unknown values in records are labeled '?' +# if not str == '?': +# return True + + +def label_indivs(valuation, lexicon=False): + """ + Assign individual constants to the individuals in the domain of a ``Valuation``. + + Given a valuation with an entry of the form ``{'rel': {'a': True}}``, + add a new entry ``{'a': 'a'}``. + + :type valuation: Valuation + :rtype: Valuation + """ + # collect all the individuals into a domain + domain = valuation.domain + # convert the domain into a sorted list of alphabetic terms + # use the same string as a label + pairs = [(e, e) for e in domain] + if lexicon: + lex = make_lex(domain) + with open("chat_pnames.cfg", 'w') as outfile: + outfile.writelines(lex) + # read the pairs into the valuation + valuation.update(pairs) + return valuation + + +def make_lex(symbols): + """ + Create lexical CFG rules for each individual symbol. + + Given a valuation with an entry of the form ``{'zloty': 'zloty'}``, + create a lexical rule for the proper name 'Zloty'. + + :param symbols: a list of individual constants in the semantic representation + :type symbols: sequence -- set(str) + :rtype: list(str) + """ + lex = [] + header = """ +################################################################## +# Lexical rules automatically generated by running 'chat80.py -x'. +################################################################## + +""" + lex.append(header) + template = "PropN[num=sg, sem=<\P.(P %s)>] -> '%s'\n" + + for s in symbols: + parts = s.split('_') + caps = [p.capitalize() for p in parts] + pname = '_'.join(caps) + rule = template % (s, pname) + lex.append(rule) + return lex + + +########################################################################### +# Interface function to emulate other corpus readers +########################################################################### + + +def concepts(items=items): + """ + Build a list of concepts corresponding to the relation names in ``items``. + + :param items: names of the Chat-80 relations to extract + :type items: list(str) + :return: the ``Concept`` objects which are extracted from the relations + :rtype: list(Concept) + """ + if isinstance(items, string_types): + items = (items,) + + rels = [item_metadata[r] for r in items] + + concept_map = process_bundle(rels) + return concept_map.values() + + +########################################################################### + + +def main(): + import sys + from optparse import OptionParser + + description = """ +Extract data from the Chat-80 Prolog files and convert them into a +Valuation object for use in the NLTK semantics package. + """ + + opts = OptionParser(description=description) + opts.set_defaults(verbose=True, lex=False, vocab=False) + opts.add_option( + "-s", "--store", dest="outdb", help="store a valuation in DB", metavar="DB" + ) + opts.add_option( + "-l", + "--load", + dest="indb", + help="load a stored valuation from DB", + metavar="DB", + ) + opts.add_option( + "-c", + "--concepts", + action="store_true", + help="print concepts instead of a valuation", + ) + opts.add_option( + "-r", + "--relation", + dest="label", + help="print concept with label REL (check possible labels with '-v' option)", + metavar="REL", + ) + opts.add_option( + "-q", + "--quiet", + action="store_false", + dest="verbose", + help="don't print out progress info", + ) + opts.add_option( + "-x", + "--lex", + action="store_true", + dest="lex", + help="write a file of lexical entries for country names, then exit", + ) + opts.add_option( + "-v", + "--vocab", + action="store_true", + dest="vocab", + help="print out the vocabulary of concept labels and their arity, then exit", + ) + + (options, args) = opts.parse_args() + if options.outdb and options.indb: + opts.error("Options --store and --load are mutually exclusive") + + if options.outdb: + # write the valuation to a persistent database + if options.verbose: + outdb = options.outdb + ".db" + print("Dumping a valuation to %s" % outdb) + val_dump(rels, options.outdb) + sys.exit(0) + else: + # try to read in a valuation from a database + if options.indb is not None: + dbname = options.indb + ".db" + if not os.access(dbname, os.R_OK): + sys.exit("Cannot read file: %s" % dbname) + else: + valuation = val_load(options.indb) + # we need to create the valuation from scratch + else: + # build some concepts + concept_map = process_bundle(rels) + concepts = concept_map.values() + # just print out the vocabulary + if options.vocab: + items = sorted([(c.arity, c.prefLabel) for c in concepts]) + for (arity, label) in items: + print(label, arity) + sys.exit(0) + # show all the concepts + if options.concepts: + for c in concepts: + print(c) + print() + if options.label: + print(concept_map[options.label]) + sys.exit(0) + else: + # turn the concepts into a Valuation + if options.lex: + if options.verbose: + print("Writing out lexical rules") + make_valuation(concepts, lexicon=True) + else: + valuation = make_valuation(concepts, read=True) + print(valuation) + + +def sql_demo(): + """ + Print out every row from the 'city.db' database. + """ + print() + print("Using SQL to extract rows from 'city.db' RDB.") + for row in sql_query('corpora/city_database/city.db', "SELECT * FROM city_table"): + print(row) + + +if __name__ == '__main__': + main() + sql_demo() diff --git a/venv.bak/lib/python3.7/site-packages/nltk/sem/cooper_storage.py b/venv.bak/lib/python3.7/site-packages/nltk/sem/cooper_storage.py new file mode 100644 index 0000000..4aca110 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/sem/cooper_storage.py @@ -0,0 +1,125 @@ +# Natural Language Toolkit: Cooper storage for Quantifier Ambiguity +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Ewan Klein +# URL: +# For license information, see LICENSE.TXT +from __future__ import print_function + +from nltk.sem.logic import LambdaExpression, ApplicationExpression, Variable +from nltk.parse import load_parser +from nltk.parse.featurechart import InstantiateVarsChart + + +class CooperStore(object): + """ + A container for handling quantifier ambiguity via Cooper storage. + """ + + def __init__(self, featstruct): + """ + :param featstruct: The value of the ``sem`` node in a tree from + ``parse_with_bindops()`` + :type featstruct: FeatStruct (with features ``core`` and ``store``) + + """ + self.featstruct = featstruct + self.readings = [] + try: + self.core = featstruct['CORE'] + self.store = featstruct['STORE'] + except KeyError: + print("%s is not a Cooper storage structure" % featstruct) + + def _permute(self, lst): + """ + :return: An iterator over the permutations of the input list + :type lst: list + :rtype: iter + """ + remove = lambda lst0, index: lst0[:index] + lst0[index + 1 :] + if lst: + for index, x in enumerate(lst): + for y in self._permute(remove(lst, index)): + yield (x,) + y + else: + yield () + + def s_retrieve(self, trace=False): + """ + Carry out S-Retrieval of binding operators in store. If hack=True, + serialize the bindop and core as strings and reparse. Ugh. + + Each permutation of the store (i.e. list of binding operators) is + taken to be a possible scoping of quantifiers. We iterate through the + binding operators in each permutation, and successively apply them to + the current term, starting with the core semantic representation, + working from the inside out. + + Binding operators are of the form:: + + bo(\P.all x.(man(x) -> P(x)),z1) + """ + for perm, store_perm in enumerate(self._permute(self.store)): + if trace: + print("Permutation %s" % (perm + 1)) + term = self.core + for bindop in store_perm: + # we just want the arguments that are wrapped by the 'bo' predicate + quant, varex = tuple(bindop.args) + # use var to make an abstraction over the current term and then + # apply the quantifier to it + term = ApplicationExpression( + quant, LambdaExpression(varex.variable, term) + ) + if trace: + print(" ", term) + term = term.simplify() + self.readings.append(term) + + +def parse_with_bindops(sentence, grammar=None, trace=0): + """ + Use a grammar with Binding Operators to parse a sentence. + """ + if not grammar: + grammar = 'grammars/book_grammars/storage.fcfg' + parser = load_parser(grammar, trace=trace, chart_class=InstantiateVarsChart) + # Parse the sentence. + tokens = sentence.split() + return list(parser.parse(tokens)) + + +def demo(): + from nltk.sem import cooper_storage as cs + + sentence = "every girl chases a dog" + # sentence = "a man gives a bone to every dog" + print() + print("Analyis of sentence '%s'" % sentence) + print("=" * 50) + trees = cs.parse_with_bindops(sentence, trace=0) + for tree in trees: + semrep = cs.CooperStore(tree.label()['SEM']) + print() + print("Binding operators:") + print("-" * 15) + for s in semrep.store: + print(s) + print() + print("Core:") + print("-" * 15) + print(semrep.core) + print() + print("S-Retrieval:") + print("-" * 15) + semrep.s_retrieve(trace=True) + print("Readings:") + print("-" * 15) + + for i, reading in enumerate(semrep.readings): + print("%s: %s" % (i + 1, reading)) + + +if __name__ == '__main__': + demo() diff --git a/venv.bak/lib/python3.7/site-packages/nltk/sem/drt.py b/venv.bak/lib/python3.7/site-packages/nltk/sem/drt.py new file mode 100644 index 0000000..8bc67f6 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/sem/drt.py @@ -0,0 +1,1463 @@ +# Natural Language Toolkit: Discourse Representation Theory (DRT) +# +# Author: Dan Garrette +# +# Copyright (C) 2001-2019 NLTK Project +# URL: +# For license information, see LICENSE.TXT +from __future__ import print_function, unicode_literals + +import operator +from functools import reduce +from itertools import chain + +from six import string_types + +from nltk.compat import python_2_unicode_compatible +from nltk.sem.logic import ( + APP, + AbstractVariableExpression, + AllExpression, + AndExpression, + ApplicationExpression, + BinaryExpression, + BooleanExpression, + ConstantExpression, + EqualityExpression, + EventVariableExpression, + ExistsExpression, + Expression, + FunctionVariableExpression, + ImpExpression, + IndividualVariableExpression, + LambdaExpression, + Tokens, + LogicParser, + NegatedExpression, + OrExpression, + Variable, + is_eventvar, + is_funcvar, + is_indvar, + unique_variable, +) + +# Import Tkinter-based modules if they are available +try: + from six.moves.tkinter import Canvas, Tk + from six.moves.tkinter_font import Font + from nltk.util import in_idle + +except ImportError: + # No need to print a warning here, nltk.draw has already printed one. + pass + + +class DrtTokens(Tokens): + DRS = 'DRS' + DRS_CONC = '+' + PRONOUN = 'PRO' + OPEN_BRACKET = '[' + CLOSE_BRACKET = ']' + COLON = ':' + + PUNCT = [DRS_CONC, OPEN_BRACKET, CLOSE_BRACKET, COLON] + + SYMBOLS = Tokens.SYMBOLS + PUNCT + + TOKENS = Tokens.TOKENS + [DRS] + PUNCT + + +class DrtParser(LogicParser): + """A lambda calculus expression parser.""" + + def __init__(self): + LogicParser.__init__(self) + + self.operator_precedence = dict( + [(x, 1) for x in DrtTokens.LAMBDA_LIST] + + [(x, 2) for x in DrtTokens.NOT_LIST] + + [(APP, 3)] + + [(x, 4) for x in DrtTokens.EQ_LIST + Tokens.NEQ_LIST] + + [(DrtTokens.COLON, 5)] + + [(DrtTokens.DRS_CONC, 6)] + + [(x, 7) for x in DrtTokens.OR_LIST] + + [(x, 8) for x in DrtTokens.IMP_LIST] + + [(None, 9)] + ) + + def get_all_symbols(self): + """This method exists to be overridden""" + return DrtTokens.SYMBOLS + + def isvariable(self, tok): + return tok not in DrtTokens.TOKENS + + def handle(self, tok, context): + """This method is intended to be overridden for logics that + use different operators or expressions""" + if tok in DrtTokens.NOT_LIST: + return self.handle_negation(tok, context) + + elif tok in DrtTokens.LAMBDA_LIST: + return self.handle_lambda(tok, context) + + elif tok == DrtTokens.OPEN: + if self.inRange(0) and self.token(0) == DrtTokens.OPEN_BRACKET: + return self.handle_DRS(tok, context) + else: + return self.handle_open(tok, context) + + elif tok.upper() == DrtTokens.DRS: + self.assertNextToken(DrtTokens.OPEN) + return self.handle_DRS(tok, context) + + elif self.isvariable(tok): + if self.inRange(0) and self.token(0) == DrtTokens.COLON: + return self.handle_prop(tok, context) + else: + return self.handle_variable(tok, context) + + def make_NegatedExpression(self, expression): + return DrtNegatedExpression(expression) + + def handle_DRS(self, tok, context): + # a DRS + refs = self.handle_refs() + if ( + self.inRange(0) and self.token(0) == DrtTokens.COMMA + ): # if there is a comma (it's optional) + self.token() # swallow the comma + conds = self.handle_conds(context) + self.assertNextToken(DrtTokens.CLOSE) + return DRS(refs, conds, None) + + def handle_refs(self): + self.assertNextToken(DrtTokens.OPEN_BRACKET) + refs = [] + while self.inRange(0) and self.token(0) != DrtTokens.CLOSE_BRACKET: + # Support expressions like: DRS([x y],C) == DRS([x,y],C) + if refs and self.token(0) == DrtTokens.COMMA: + self.token() # swallow the comma + refs.append(self.get_next_token_variable('quantified')) + self.assertNextToken(DrtTokens.CLOSE_BRACKET) + return refs + + def handle_conds(self, context): + self.assertNextToken(DrtTokens.OPEN_BRACKET) + conds = [] + while self.inRange(0) and self.token(0) != DrtTokens.CLOSE_BRACKET: + # Support expressions like: DRS([x y],C) == DRS([x, y],C) + if conds and self.token(0) == DrtTokens.COMMA: + self.token() # swallow the comma + conds.append(self.process_next_expression(context)) + self.assertNextToken(DrtTokens.CLOSE_BRACKET) + return conds + + def handle_prop(self, tok, context): + variable = self.make_VariableExpression(tok) + self.assertNextToken(':') + drs = self.process_next_expression(DrtTokens.COLON) + return DrtProposition(variable, drs) + + def make_EqualityExpression(self, first, second): + """This method serves as a hook for other logic parsers that + have different equality expression classes""" + return DrtEqualityExpression(first, second) + + def get_BooleanExpression_factory(self, tok): + """This method serves as a hook for other logic parsers that + have different boolean operators""" + if tok == DrtTokens.DRS_CONC: + return lambda first, second: DrtConcatenation(first, second, None) + elif tok in DrtTokens.OR_LIST: + return DrtOrExpression + elif tok in DrtTokens.IMP_LIST: + + def make_imp_expression(first, second): + if isinstance(first, DRS): + return DRS(first.refs, first.conds, second) + if isinstance(first, DrtConcatenation): + return DrtConcatenation(first.first, first.second, second) + raise Exception('Antecedent of implication must be a DRS') + + return make_imp_expression + else: + return None + + def make_BooleanExpression(self, factory, first, second): + return factory(first, second) + + def make_ApplicationExpression(self, function, argument): + return DrtApplicationExpression(function, argument) + + def make_VariableExpression(self, name): + return DrtVariableExpression(Variable(name)) + + def make_LambdaExpression(self, variables, term): + return DrtLambdaExpression(variables, term) + + +class DrtExpression(object): + """ + This is the base abstract DRT Expression from which every DRT + Expression extends. + """ + + _drt_parser = DrtParser() + + @classmethod + def fromstring(cls, s): + return cls._drt_parser.parse(s) + + def applyto(self, other): + return DrtApplicationExpression(self, other) + + def __neg__(self): + return DrtNegatedExpression(self) + + def __and__(self, other): + raise NotImplementedError() + + def __or__(self, other): + assert isinstance(other, DrtExpression) + return DrtOrExpression(self, other) + + def __gt__(self, other): + assert isinstance(other, DrtExpression) + if isinstance(self, DRS): + return DRS(self.refs, self.conds, other) + if isinstance(self, DrtConcatenation): + return DrtConcatenation(self.first, self.second, other) + raise Exception('Antecedent of implication must be a DRS') + + def equiv(self, other, prover=None): + """ + Check for logical equivalence. + Pass the expression (self <-> other) to the theorem prover. + If the prover says it is valid, then the self and other are equal. + + :param other: an ``DrtExpression`` to check equality against + :param prover: a ``nltk.inference.api.Prover`` + """ + assert isinstance(other, DrtExpression) + + f1 = self.simplify().fol() + f2 = other.simplify().fol() + return f1.equiv(f2, prover) + + @property + def type(self): + raise AttributeError( + "'%s' object has no attribute 'type'" % self.__class__.__name__ + ) + + def typecheck(self, signature=None): + raise NotImplementedError() + + def __add__(self, other): + return DrtConcatenation(self, other, None) + + def get_refs(self, recursive=False): + """ + Return the set of discourse referents in this DRS. + :param recursive: bool Also find discourse referents in subterms? + :return: list of ``Variable`` objects + """ + raise NotImplementedError() + + def is_pronoun_function(self): + """ Is self of the form "PRO(x)"? """ + return ( + isinstance(self, DrtApplicationExpression) + and isinstance(self.function, DrtAbstractVariableExpression) + and self.function.variable.name == DrtTokens.PRONOUN + and isinstance(self.argument, DrtIndividualVariableExpression) + ) + + def make_EqualityExpression(self, first, second): + return DrtEqualityExpression(first, second) + + def make_VariableExpression(self, variable): + return DrtVariableExpression(variable) + + def resolve_anaphora(self): + return resolve_anaphora(self) + + def eliminate_equality(self): + return self.visit_structured(lambda e: e.eliminate_equality(), self.__class__) + + def pretty_format(self): + """ + Draw the DRS + :return: the pretty print string + """ + return '\n'.join(self._pretty()) + + def pretty_print(self): + print(self.pretty_format()) + + def draw(self): + DrsDrawer(self).draw() + + +@python_2_unicode_compatible +class DRS(DrtExpression, Expression): + """A Discourse Representation Structure.""" + + def __init__(self, refs, conds, consequent=None): + """ + :param refs: list of ``DrtIndividualVariableExpression`` for the + discourse referents + :param conds: list of ``Expression`` for the conditions + """ + self.refs = refs + self.conds = conds + self.consequent = consequent + + def replace(self, variable, expression, replace_bound=False, alpha_convert=True): + """Replace all instances of variable v with expression E in self, + where v is free in self.""" + if variable in self.refs: + # if a bound variable is the thing being replaced + if not replace_bound: + return self + else: + i = self.refs.index(variable) + if self.consequent: + consequent = self.consequent.replace( + variable, expression, True, alpha_convert + ) + else: + consequent = None + return DRS( + self.refs[:i] + [expression.variable] + self.refs[i + 1 :], + [ + cond.replace(variable, expression, True, alpha_convert) + for cond in self.conds + ], + consequent, + ) + else: + if alpha_convert: + # any bound variable that appears in the expression must + # be alpha converted to avoid a conflict + for ref in set(self.refs) & expression.free(): + newvar = unique_variable(ref) + newvarex = DrtVariableExpression(newvar) + i = self.refs.index(ref) + if self.consequent: + consequent = self.consequent.replace( + ref, newvarex, True, alpha_convert + ) + else: + consequent = None + self = DRS( + self.refs[:i] + [newvar] + self.refs[i + 1 :], + [ + cond.replace(ref, newvarex, True, alpha_convert) + for cond in self.conds + ], + consequent, + ) + + # replace in the conditions + if self.consequent: + consequent = self.consequent.replace( + variable, expression, replace_bound, alpha_convert + ) + else: + consequent = None + return DRS( + self.refs, + [ + cond.replace(variable, expression, replace_bound, alpha_convert) + for cond in self.conds + ], + consequent, + ) + + def free(self): + """:see: Expression.free()""" + conds_free = reduce(operator.or_, [c.free() for c in self.conds], set()) + if self.consequent: + conds_free.update(self.consequent.free()) + return conds_free - set(self.refs) + + def get_refs(self, recursive=False): + """:see: AbstractExpression.get_refs()""" + if recursive: + conds_refs = self.refs + list( + chain(*(c.get_refs(True) for c in self.conds)) + ) + if self.consequent: + conds_refs.extend(self.consequent.get_refs(True)) + return conds_refs + else: + return self.refs + + def visit(self, function, combinator): + """:see: Expression.visit()""" + parts = list(map(function, self.conds)) + if self.consequent: + parts.append(function(self.consequent)) + return combinator(parts) + + def visit_structured(self, function, combinator): + """:see: Expression.visit_structured()""" + consequent = function(self.consequent) if self.consequent else None + return combinator(self.refs, list(map(function, self.conds)), consequent) + + def eliminate_equality(self): + drs = self + i = 0 + while i < len(drs.conds): + cond = drs.conds[i] + if ( + isinstance(cond, EqualityExpression) + and isinstance(cond.first, AbstractVariableExpression) + and isinstance(cond.second, AbstractVariableExpression) + ): + drs = DRS( + list(set(drs.refs) - set([cond.second.variable])), + drs.conds[:i] + drs.conds[i + 1 :], + drs.consequent, + ) + if cond.second.variable != cond.first.variable: + drs = drs.replace(cond.second.variable, cond.first, False, False) + i = 0 + i -= 1 + i += 1 + + conds = [] + for cond in drs.conds: + new_cond = cond.eliminate_equality() + new_cond_simp = new_cond.simplify() + if ( + not isinstance(new_cond_simp, DRS) + or new_cond_simp.refs + or new_cond_simp.conds + or new_cond_simp.consequent + ): + conds.append(new_cond) + + consequent = drs.consequent.eliminate_equality() if drs.consequent else None + return DRS(drs.refs, conds, consequent) + + def fol(self): + if self.consequent: + accum = None + if self.conds: + accum = reduce(AndExpression, [c.fol() for c in self.conds]) + + if accum: + accum = ImpExpression(accum, self.consequent.fol()) + else: + accum = self.consequent.fol() + + for ref in self.refs[::-1]: + accum = AllExpression(ref, accum) + + return accum + + else: + if not self.conds: + raise Exception("Cannot convert DRS with no conditions to FOL.") + accum = reduce(AndExpression, [c.fol() for c in self.conds]) + for ref in map(Variable, self._order_ref_strings(self.refs)[::-1]): + accum = ExistsExpression(ref, accum) + return accum + + def _pretty(self): + refs_line = ' '.join(self._order_ref_strings(self.refs)) + + cond_lines = [ + cond + for cond_line in [ + filter(lambda s: s.strip(), cond._pretty()) for cond in self.conds + ] + for cond in cond_line + ] + length = max([len(refs_line)] + list(map(len, cond_lines))) + drs = ( + [ + ' _' + '_' * length + '_ ', + '| ' + refs_line.ljust(length) + ' |', + '|-' + '-' * length + '-|', + ] + + ['| ' + line.ljust(length) + ' |' for line in cond_lines] + + ['|_' + '_' * length + '_|'] + ) + if self.consequent: + return DrtBinaryExpression._assemble_pretty( + drs, DrtTokens.IMP, self.consequent._pretty() + ) + return drs + + def _order_ref_strings(self, refs): + strings = ["%s" % ref for ref in refs] + ind_vars = [] + func_vars = [] + event_vars = [] + other_vars = [] + for s in strings: + if is_indvar(s): + ind_vars.append(s) + elif is_funcvar(s): + func_vars.append(s) + elif is_eventvar(s): + event_vars.append(s) + else: + other_vars.append(s) + return ( + sorted(other_vars) + + sorted(event_vars, key=lambda v: int([v[2:], -1][len(v[2:]) == 0])) + + sorted(func_vars, key=lambda v: (v[0], int([v[1:], -1][len(v[1:]) == 0]))) + + sorted(ind_vars, key=lambda v: (v[0], int([v[1:], -1][len(v[1:]) == 0]))) + ) + + def __eq__(self, other): + r"""Defines equality modulo alphabetic variance. + If we are comparing \x.M and \y.N, then check equality of M and N[x/y].""" + if isinstance(other, DRS): + if len(self.refs) == len(other.refs): + converted_other = other + for (r1, r2) in zip(self.refs, converted_other.refs): + varex = self.make_VariableExpression(r1) + converted_other = converted_other.replace(r2, varex, True) + if self.consequent == converted_other.consequent and len( + self.conds + ) == len(converted_other.conds): + for c1, c2 in zip(self.conds, converted_other.conds): + if not (c1 == c2): + return False + return True + return False + + def __ne__(self, other): + return not self == other + + __hash__ = Expression.__hash__ + + def __str__(self): + drs = '([%s],[%s])' % ( + ','.join(self._order_ref_strings(self.refs)), + ', '.join("%s" % cond for cond in self.conds), + ) # map(str, self.conds))) + if self.consequent: + return ( + DrtTokens.OPEN + + drs + + ' ' + + DrtTokens.IMP + + ' ' + + "%s" % self.consequent + + DrtTokens.CLOSE + ) + return drs + + +def DrtVariableExpression(variable): + """ + This is a factory method that instantiates and returns a subtype of + ``DrtAbstractVariableExpression`` appropriate for the given variable. + """ + if is_indvar(variable.name): + return DrtIndividualVariableExpression(variable) + elif is_funcvar(variable.name): + return DrtFunctionVariableExpression(variable) + elif is_eventvar(variable.name): + return DrtEventVariableExpression(variable) + else: + return DrtConstantExpression(variable) + + +class DrtAbstractVariableExpression(DrtExpression, AbstractVariableExpression): + def fol(self): + return self + + def get_refs(self, recursive=False): + """:see: AbstractExpression.get_refs()""" + return [] + + def _pretty(self): + s = "%s" % self + blank = ' ' * len(s) + return [blank, blank, s, blank] + + def eliminate_equality(self): + return self + + +class DrtIndividualVariableExpression( + DrtAbstractVariableExpression, IndividualVariableExpression +): + pass + + +class DrtFunctionVariableExpression( + DrtAbstractVariableExpression, FunctionVariableExpression +): + pass + + +class DrtEventVariableExpression( + DrtIndividualVariableExpression, EventVariableExpression +): + pass + + +class DrtConstantExpression(DrtAbstractVariableExpression, ConstantExpression): + pass + + +@python_2_unicode_compatible +class DrtProposition(DrtExpression, Expression): + def __init__(self, variable, drs): + self.variable = variable + self.drs = drs + + def replace(self, variable, expression, replace_bound=False, alpha_convert=True): + if self.variable == variable: + assert isinstance( + expression, DrtAbstractVariableExpression + ), "Can only replace a proposition label with a variable" + return DrtProposition( + expression.variable, + self.drs.replace(variable, expression, replace_bound, alpha_convert), + ) + else: + return DrtProposition( + self.variable, + self.drs.replace(variable, expression, replace_bound, alpha_convert), + ) + + def eliminate_equality(self): + return DrtProposition(self.variable, self.drs.eliminate_equality()) + + def get_refs(self, recursive=False): + return self.drs.get_refs(True) if recursive else [] + + def __eq__(self, other): + return ( + self.__class__ == other.__class__ + and self.variable == other.variable + and self.drs == other.drs + ) + + def __ne__(self, other): + return not self == other + + __hash__ = Expression.__hash__ + + def fol(self): + return self.drs.fol() + + def _pretty(self): + drs_s = self.drs._pretty() + blank = ' ' * len("%s" % self.variable) + return ( + [blank + ' ' + line for line in drs_s[:1]] + + ["%s" % self.variable + ':' + line for line in drs_s[1:2]] + + [blank + ' ' + line for line in drs_s[2:]] + ) + + def visit(self, function, combinator): + """:see: Expression.visit()""" + return combinator([function(self.drs)]) + + def visit_structured(self, function, combinator): + """:see: Expression.visit_structured()""" + return combinator(self.variable, function(self.drs)) + + def __str__(self): + return 'prop(%s, %s)' % (self.variable, self.drs) + + +class DrtNegatedExpression(DrtExpression, NegatedExpression): + def fol(self): + return NegatedExpression(self.term.fol()) + + def get_refs(self, recursive=False): + """:see: AbstractExpression.get_refs()""" + return self.term.get_refs(recursive) + + def _pretty(self): + term_lines = self.term._pretty() + return ( + [' ' + line for line in term_lines[:2]] + + ['__ ' + line for line in term_lines[2:3]] + + [' | ' + line for line in term_lines[3:4]] + + [' ' + line for line in term_lines[4:]] + ) + + +class DrtLambdaExpression(DrtExpression, LambdaExpression): + def alpha_convert(self, newvar): + """Rename all occurrences of the variable introduced by this variable + binder in the expression to ``newvar``. + :param newvar: ``Variable``, for the new variable + """ + return self.__class__( + newvar, + self.term.replace(self.variable, DrtVariableExpression(newvar), True), + ) + + def fol(self): + return LambdaExpression(self.variable, self.term.fol()) + + def _pretty(self): + variables = [self.variable] + term = self.term + while term.__class__ == self.__class__: + variables.append(term.variable) + term = term.term + var_string = ' '.join("%s" % v for v in variables) + DrtTokens.DOT + term_lines = term._pretty() + blank = ' ' * len(var_string) + return ( + [' ' + blank + line for line in term_lines[:1]] + + [' \ ' + blank + line for line in term_lines[1:2]] + + [' /\ ' + var_string + line for line in term_lines[2:3]] + + [' ' + blank + line for line in term_lines[3:]] + ) + + +class DrtBinaryExpression(DrtExpression, BinaryExpression): + def get_refs(self, recursive=False): + """:see: AbstractExpression.get_refs()""" + return ( + self.first.get_refs(True) + self.second.get_refs(True) if recursive else [] + ) + + def _pretty(self): + return DrtBinaryExpression._assemble_pretty( + self._pretty_subex(self.first), + self.getOp(), + self._pretty_subex(self.second), + ) + + @staticmethod + def _assemble_pretty(first_lines, op, second_lines): + max_lines = max(len(first_lines), len(second_lines)) + first_lines = _pad_vertically(first_lines, max_lines) + second_lines = _pad_vertically(second_lines, max_lines) + blank = ' ' * len(op) + first_second_lines = list(zip(first_lines, second_lines)) + return ( + [ + ' ' + first_line + ' ' + blank + ' ' + second_line + ' ' + for first_line, second_line in first_second_lines[:2] + ] + + [ + '(' + first_line + ' ' + op + ' ' + second_line + ')' + for first_line, second_line in first_second_lines[2:3] + ] + + [ + ' ' + first_line + ' ' + blank + ' ' + second_line + ' ' + for first_line, second_line in first_second_lines[3:] + ] + ) + + def _pretty_subex(self, subex): + return subex._pretty() + + +class DrtBooleanExpression(DrtBinaryExpression, BooleanExpression): + pass + + +class DrtOrExpression(DrtBooleanExpression, OrExpression): + def fol(self): + return OrExpression(self.first.fol(), self.second.fol()) + + def _pretty_subex(self, subex): + if isinstance(subex, DrtOrExpression): + return [line[1:-1] for line in subex._pretty()] + return DrtBooleanExpression._pretty_subex(self, subex) + + +class DrtEqualityExpression(DrtBinaryExpression, EqualityExpression): + def fol(self): + return EqualityExpression(self.first.fol(), self.second.fol()) + + +@python_2_unicode_compatible +class DrtConcatenation(DrtBooleanExpression): + """DRS of the form '(DRS + DRS)'""" + + def __init__(self, first, second, consequent=None): + DrtBooleanExpression.__init__(self, first, second) + self.consequent = consequent + + def replace(self, variable, expression, replace_bound=False, alpha_convert=True): + """Replace all instances of variable v with expression E in self, + where v is free in self.""" + first = self.first + second = self.second + consequent = self.consequent + + # If variable is bound + if variable in self.get_refs(): + if replace_bound: + first = first.replace( + variable, expression, replace_bound, alpha_convert + ) + second = second.replace( + variable, expression, replace_bound, alpha_convert + ) + if consequent: + consequent = consequent.replace( + variable, expression, replace_bound, alpha_convert + ) + else: + if alpha_convert: + # alpha convert every ref that is free in 'expression' + for ref in set(self.get_refs(True)) & expression.free(): + v = DrtVariableExpression(unique_variable(ref)) + first = first.replace(ref, v, True, alpha_convert) + second = second.replace(ref, v, True, alpha_convert) + if consequent: + consequent = consequent.replace(ref, v, True, alpha_convert) + + first = first.replace(variable, expression, replace_bound, alpha_convert) + second = second.replace(variable, expression, replace_bound, alpha_convert) + if consequent: + consequent = consequent.replace( + variable, expression, replace_bound, alpha_convert + ) + + return self.__class__(first, second, consequent) + + def eliminate_equality(self): + # TODO: at some point. for now, simplify. + drs = self.simplify() + assert not isinstance(drs, DrtConcatenation) + return drs.eliminate_equality() + + def simplify(self): + first = self.first.simplify() + second = self.second.simplify() + consequent = self.consequent.simplify() if self.consequent else None + + if isinstance(first, DRS) and isinstance(second, DRS): + # For any ref that is in both 'first' and 'second' + for ref in set(first.get_refs(True)) & set(second.get_refs(True)): + # alpha convert the ref in 'second' to prevent collision + newvar = DrtVariableExpression(unique_variable(ref)) + second = second.replace(ref, newvar, True) + + return DRS(first.refs + second.refs, first.conds + second.conds, consequent) + else: + return self.__class__(first, second, consequent) + + def get_refs(self, recursive=False): + """:see: AbstractExpression.get_refs()""" + refs = self.first.get_refs(recursive) + self.second.get_refs(recursive) + if self.consequent and recursive: + refs.extend(self.consequent.get_refs(True)) + return refs + + def getOp(self): + return DrtTokens.DRS_CONC + + def __eq__(self, other): + r"""Defines equality modulo alphabetic variance. + If we are comparing \x.M and \y.N, then check equality of M and N[x/y].""" + if isinstance(other, DrtConcatenation): + self_refs = self.get_refs() + other_refs = other.get_refs() + if len(self_refs) == len(other_refs): + converted_other = other + for (r1, r2) in zip(self_refs, other_refs): + varex = self.make_VariableExpression(r1) + converted_other = converted_other.replace(r2, varex, True) + return ( + self.first == converted_other.first + and self.second == converted_other.second + and self.consequent == converted_other.consequent + ) + return False + + def __ne__(self, other): + return not self == other + + __hash__ = DrtBooleanExpression.__hash__ + + def fol(self): + e = AndExpression(self.first.fol(), self.second.fol()) + if self.consequent: + e = ImpExpression(e, self.consequent.fol()) + return e + + def _pretty(self): + drs = DrtBinaryExpression._assemble_pretty( + self._pretty_subex(self.first), + self.getOp(), + self._pretty_subex(self.second), + ) + if self.consequent: + drs = DrtBinaryExpression._assemble_pretty( + drs, DrtTokens.IMP, self._pretty(self.consequent) + ) + return drs + + def _pretty_subex(self, subex): + if isinstance(subex, DrtConcatenation): + return [line[1:-1] for line in subex._pretty()] + return DrtBooleanExpression._pretty_subex(self, subex) + + def visit(self, function, combinator): + """:see: Expression.visit()""" + if self.consequent: + return combinator( + [function(self.first), function(self.second), function(self.consequent)] + ) + else: + return combinator([function(self.first), function(self.second)]) + + def __str__(self): + first = self._str_subex(self.first) + second = self._str_subex(self.second) + drs = Tokens.OPEN + first + ' ' + self.getOp() + ' ' + second + Tokens.CLOSE + if self.consequent: + return ( + DrtTokens.OPEN + + drs + + ' ' + + DrtTokens.IMP + + ' ' + + "%s" % self.consequent + + DrtTokens.CLOSE + ) + return drs + + def _str_subex(self, subex): + s = "%s" % subex + if isinstance(subex, DrtConcatenation) and subex.consequent is None: + return s[1:-1] + return s + + +class DrtApplicationExpression(DrtExpression, ApplicationExpression): + def fol(self): + return ApplicationExpression(self.function.fol(), self.argument.fol()) + + def get_refs(self, recursive=False): + """:see: AbstractExpression.get_refs()""" + return ( + self.function.get_refs(True) + self.argument.get_refs(True) + if recursive + else [] + ) + + def _pretty(self): + function, args = self.uncurry() + function_lines = function._pretty() + args_lines = [arg._pretty() for arg in args] + max_lines = max(map(len, [function_lines] + args_lines)) + function_lines = _pad_vertically(function_lines, max_lines) + args_lines = [_pad_vertically(arg_lines, max_lines) for arg_lines in args_lines] + func_args_lines = list(zip(function_lines, list(zip(*args_lines)))) + return ( + [ + func_line + ' ' + ' '.join(args_line) + ' ' + for func_line, args_line in func_args_lines[:2] + ] + + [ + func_line + '(' + ','.join(args_line) + ')' + for func_line, args_line in func_args_lines[2:3] + ] + + [ + func_line + ' ' + ' '.join(args_line) + ' ' + for func_line, args_line in func_args_lines[3:] + ] + ) + + +def _pad_vertically(lines, max_lines): + pad_line = [' ' * len(lines[0])] + return lines + pad_line * (max_lines - len(lines)) + + +@python_2_unicode_compatible +class PossibleAntecedents(list, DrtExpression, Expression): + def free(self): + """Set of free variables.""" + return set(self) + + def replace(self, variable, expression, replace_bound=False, alpha_convert=True): + """Replace all instances of variable v with expression E in self, + where v is free in self.""" + result = PossibleAntecedents() + for item in self: + if item == variable: + self.append(expression) + else: + self.append(item) + return result + + def _pretty(self): + s = "%s" % self + blank = ' ' * len(s) + return [blank, blank, s] + + def __str__(self): + return '[' + ','.join("%s" % it for it in self) + ']' + + +class AnaphoraResolutionException(Exception): + pass + + +def resolve_anaphora(expression, trail=[]): + if isinstance(expression, ApplicationExpression): + if expression.is_pronoun_function(): + possible_antecedents = PossibleAntecedents() + for ancestor in trail: + for ref in ancestor.get_refs(): + refex = expression.make_VariableExpression(ref) + + # ========================================================== + # Don't allow resolution to itself or other types + # ========================================================== + if refex.__class__ == expression.argument.__class__ and not ( + refex == expression.argument + ): + possible_antecedents.append(refex) + + if len(possible_antecedents) == 1: + resolution = possible_antecedents[0] + else: + resolution = possible_antecedents + return expression.make_EqualityExpression(expression.argument, resolution) + else: + r_function = resolve_anaphora(expression.function, trail + [expression]) + r_argument = resolve_anaphora(expression.argument, trail + [expression]) + return expression.__class__(r_function, r_argument) + + elif isinstance(expression, DRS): + r_conds = [] + for cond in expression.conds: + r_cond = resolve_anaphora(cond, trail + [expression]) + + # if the condition is of the form '(x = [])' then raise exception + if isinstance(r_cond, EqualityExpression): + if isinstance(r_cond.first, PossibleAntecedents): + # Reverse the order so that the variable is on the left + temp = r_cond.first + r_cond.first = r_cond.second + r_cond.second = temp + if isinstance(r_cond.second, PossibleAntecedents): + if not r_cond.second: + raise AnaphoraResolutionException( + "Variable '%s' does not " + "resolve to anything." % r_cond.first + ) + + r_conds.append(r_cond) + if expression.consequent: + consequent = resolve_anaphora(expression.consequent, trail + [expression]) + else: + consequent = None + return expression.__class__(expression.refs, r_conds, consequent) + + elif isinstance(expression, AbstractVariableExpression): + return expression + + elif isinstance(expression, NegatedExpression): + return expression.__class__( + resolve_anaphora(expression.term, trail + [expression]) + ) + + elif isinstance(expression, DrtConcatenation): + if expression.consequent: + consequent = resolve_anaphora(expression.consequent, trail + [expression]) + else: + consequent = None + return expression.__class__( + resolve_anaphora(expression.first, trail + [expression]), + resolve_anaphora(expression.second, trail + [expression]), + consequent, + ) + + elif isinstance(expression, BinaryExpression): + return expression.__class__( + resolve_anaphora(expression.first, trail + [expression]), + resolve_anaphora(expression.second, trail + [expression]), + ) + + elif isinstance(expression, LambdaExpression): + return expression.__class__( + expression.variable, resolve_anaphora(expression.term, trail + [expression]) + ) + + +class DrsDrawer(object): + BUFFER = 3 # Space between elements + TOPSPACE = 10 # Space above whole DRS + OUTERSPACE = 6 # Space to the left, right, and bottom of the whle DRS + + def __init__(self, drs, size_canvas=True, canvas=None): + """ + :param drs: ``DrtExpression``, The DRS to be drawn + :param size_canvas: bool, True if the canvas size should be the exact size of the DRS + :param canvas: ``Canvas`` The canvas on which to draw the DRS. If none is given, create a new canvas. + """ + master = None + if not canvas: + master = Tk() + master.title("DRT") + + font = Font(family='helvetica', size=12) + + if size_canvas: + canvas = Canvas(master, width=0, height=0) + canvas.font = font + self.canvas = canvas + (right, bottom) = self._visit(drs, self.OUTERSPACE, self.TOPSPACE) + + width = max(right + self.OUTERSPACE, 100) + height = bottom + self.OUTERSPACE + canvas = Canvas(master, width=width, height=height) # , bg='white') + else: + canvas = Canvas(master, width=300, height=300) + + canvas.pack() + canvas.font = font + + self.canvas = canvas + self.drs = drs + self.master = master + + def _get_text_height(self): + """Get the height of a line of text""" + return self.canvas.font.metrics("linespace") + + def draw(self, x=OUTERSPACE, y=TOPSPACE): + """Draw the DRS""" + self._handle(self.drs, self._draw_command, x, y) + + if self.master and not in_idle(): + self.master.mainloop() + else: + return self._visit(self.drs, x, y) + + def _visit(self, expression, x, y): + """ + Return the bottom-rightmost point without actually drawing the item + + :param expression: the item to visit + :param x: the top of the current drawing area + :param y: the left side of the current drawing area + :return: the bottom-rightmost point + """ + return self._handle(expression, self._visit_command, x, y) + + def _draw_command(self, item, x, y): + """ + Draw the given item at the given location + + :param item: the item to draw + :param x: the top of the current drawing area + :param y: the left side of the current drawing area + :return: the bottom-rightmost point + """ + if isinstance(item, string_types): + self.canvas.create_text(x, y, anchor='nw', font=self.canvas.font, text=item) + elif isinstance(item, tuple): + # item is the lower-right of a box + (right, bottom) = item + self.canvas.create_rectangle(x, y, right, bottom) + horiz_line_y = ( + y + self._get_text_height() + (self.BUFFER * 2) + ) # the line separating refs from conds + self.canvas.create_line(x, horiz_line_y, right, horiz_line_y) + + return self._visit_command(item, x, y) + + def _visit_command(self, item, x, y): + """ + Return the bottom-rightmost point without actually drawing the item + + :param item: the item to visit + :param x: the top of the current drawing area + :param y: the left side of the current drawing area + :return: the bottom-rightmost point + """ + if isinstance(item, string_types): + return (x + self.canvas.font.measure(item), y + self._get_text_height()) + elif isinstance(item, tuple): + return item + + def _handle(self, expression, command, x=0, y=0): + """ + :param expression: the expression to handle + :param command: the function to apply, either _draw_command or _visit_command + :param x: the top of the current drawing area + :param y: the left side of the current drawing area + :return: the bottom-rightmost point + """ + if command == self._visit_command: + # if we don't need to draw the item, then we can use the cached values + try: + # attempt to retrieve cached values + right = expression._drawing_width + x + bottom = expression._drawing_height + y + return (right, bottom) + except AttributeError: + # the values have not been cached yet, so compute them + pass + + if isinstance(expression, DrtAbstractVariableExpression): + factory = self._handle_VariableExpression + elif isinstance(expression, DRS): + factory = self._handle_DRS + elif isinstance(expression, DrtNegatedExpression): + factory = self._handle_NegatedExpression + elif isinstance(expression, DrtLambdaExpression): + factory = self._handle_LambdaExpression + elif isinstance(expression, BinaryExpression): + factory = self._handle_BinaryExpression + elif isinstance(expression, DrtApplicationExpression): + factory = self._handle_ApplicationExpression + elif isinstance(expression, PossibleAntecedents): + factory = self._handle_VariableExpression + elif isinstance(expression, DrtProposition): + factory = self._handle_DrtProposition + else: + raise Exception(expression.__class__.__name__) + + (right, bottom) = factory(expression, command, x, y) + + # cache the values + expression._drawing_width = right - x + expression._drawing_height = bottom - y + + return (right, bottom) + + def _handle_VariableExpression(self, expression, command, x, y): + return command("%s" % expression, x, y) + + def _handle_NegatedExpression(self, expression, command, x, y): + # Find the width of the negation symbol + right = self._visit_command(DrtTokens.NOT, x, y)[0] + + # Handle term + (right, bottom) = self._handle(expression.term, command, right, y) + + # Handle variables now that we know the y-coordinate + command( + DrtTokens.NOT, + x, + self._get_centered_top(y, bottom - y, self._get_text_height()), + ) + + return (right, bottom) + + def _handle_DRS(self, expression, command, x, y): + left = x + self.BUFFER # indent the left side + bottom = y + self.BUFFER # indent the top + + # Handle Discourse Referents + if expression.refs: + refs = ' '.join("%s" % r for r in expression.refs) + else: + refs = ' ' + (max_right, bottom) = command(refs, left, bottom) + bottom += self.BUFFER * 2 + + # Handle Conditions + if expression.conds: + for cond in expression.conds: + (right, bottom) = self._handle(cond, command, left, bottom) + max_right = max(max_right, right) + bottom += self.BUFFER + else: + bottom += self._get_text_height() + self.BUFFER + + # Handle Box + max_right += self.BUFFER + return command((max_right, bottom), x, y) + + def _handle_ApplicationExpression(self, expression, command, x, y): + function, args = expression.uncurry() + if not isinstance(function, DrtAbstractVariableExpression): + # It's not a predicate expression ("P(x,y)"), so leave arguments curried + function = expression.function + args = [expression.argument] + + # Get the max bottom of any element on the line + function_bottom = self._visit(function, x, y)[1] + max_bottom = max( + [function_bottom] + [self._visit(arg, x, y)[1] for arg in args] + ) + + line_height = max_bottom - y + + # Handle 'function' + function_drawing_top = self._get_centered_top( + y, line_height, function._drawing_height + ) + right = self._handle(function, command, x, function_drawing_top)[0] + + # Handle open paren + centred_string_top = self._get_centered_top( + y, line_height, self._get_text_height() + ) + right = command(DrtTokens.OPEN, right, centred_string_top)[0] + + # Handle each arg + for (i, arg) in enumerate(args): + arg_drawing_top = self._get_centered_top( + y, line_height, arg._drawing_height + ) + right = self._handle(arg, command, right, arg_drawing_top)[0] + + if i + 1 < len(args): + # since it's not the last arg, add a comma + right = command(DrtTokens.COMMA + ' ', right, centred_string_top)[0] + + # Handle close paren + right = command(DrtTokens.CLOSE, right, centred_string_top)[0] + + return (right, max_bottom) + + def _handle_LambdaExpression(self, expression, command, x, y): + # Find the width of the lambda symbol and abstracted variables + variables = DrtTokens.LAMBDA + "%s" % expression.variable + DrtTokens.DOT + right = self._visit_command(variables, x, y)[0] + + # Handle term + (right, bottom) = self._handle(expression.term, command, right, y) + + # Handle variables now that we know the y-coordinate + command( + variables, x, self._get_centered_top(y, bottom - y, self._get_text_height()) + ) + + return (right, bottom) + + def _handle_BinaryExpression(self, expression, command, x, y): + # Get the full height of the line, based on the operands + first_height = self._visit(expression.first, 0, 0)[1] + second_height = self._visit(expression.second, 0, 0)[1] + line_height = max(first_height, second_height) + + # Handle open paren + centred_string_top = self._get_centered_top( + y, line_height, self._get_text_height() + ) + right = command(DrtTokens.OPEN, x, centred_string_top)[0] + + # Handle the first operand + first_height = expression.first._drawing_height + (right, first_bottom) = self._handle( + expression.first, + command, + right, + self._get_centered_top(y, line_height, first_height), + ) + + # Handle the operator + right = command(' %s ' % expression.getOp(), right, centred_string_top)[0] + + # Handle the second operand + second_height = expression.second._drawing_height + (right, second_bottom) = self._handle( + expression.second, + command, + right, + self._get_centered_top(y, line_height, second_height), + ) + + # Handle close paren + right = command(DrtTokens.CLOSE, right, centred_string_top)[0] + + return (right, max(first_bottom, second_bottom)) + + def _handle_DrtProposition(self, expression, command, x, y): + # Find the width of the negation symbol + right = command(expression.variable, x, y)[0] + + # Handle term + (right, bottom) = self._handle(expression.term, command, right, y) + + return (right, bottom) + + def _get_centered_top(self, top, full_height, item_height): + """Get the y-coordinate of the point that a figure should start at if + its height is 'item_height' and it needs to be centered in an area that + starts at 'top' and is 'full_height' tall.""" + return top + (full_height - item_height) / 2 + + +def demo(): + print('=' * 20 + 'TEST PARSE' + '=' * 20) + dexpr = DrtExpression.fromstring + print(dexpr(r'([x,y],[sees(x,y)])')) + print(dexpr(r'([x],[man(x), walks(x)])')) + print(dexpr(r'\x.\y.([],[sees(x,y)])')) + print(dexpr(r'\x.([],[walks(x)])(john)')) + print(dexpr(r'(([x],[walks(x)]) + ([y],[runs(y)]))')) + print(dexpr(r'(([],[walks(x)]) -> ([],[runs(x)]))')) + print(dexpr(r'([x],[PRO(x), sees(John,x)])')) + print(dexpr(r'([x],[man(x), -([],[walks(x)])])')) + print(dexpr(r'([],[(([x],[man(x)]) -> ([],[walks(x)]))])')) + + print('=' * 20 + 'Test fol()' + '=' * 20) + print(dexpr(r'([x,y],[sees(x,y)])').fol()) + + print('=' * 20 + 'Test alpha conversion and lambda expression equality' + '=' * 20) + e1 = dexpr(r'\x.([],[P(x)])') + print(e1) + e2 = e1.alpha_convert(Variable('z')) + print(e2) + print(e1 == e2) + + print('=' * 20 + 'Test resolve_anaphora()' + '=' * 20) + print(resolve_anaphora(dexpr(r'([x,y,z],[dog(x), cat(y), walks(z), PRO(z)])'))) + print( + resolve_anaphora(dexpr(r'([],[(([x],[dog(x)]) -> ([y],[walks(y), PRO(y)]))])')) + ) + print(resolve_anaphora(dexpr(r'(([x,y],[]) + ([],[PRO(x)]))'))) + + print('=' * 20 + 'Test pretty_print()' + '=' * 20) + dexpr(r"([],[])").pretty_print() + dexpr( + r"([],[([x],[big(x), dog(x)]) -> ([],[bark(x)]) -([x],[walk(x)])])" + ).pretty_print() + dexpr(r"([x,y],[x=y]) + ([z],[dog(z), walk(z)])").pretty_print() + dexpr(r"([],[([x],[]) | ([y],[]) | ([z],[dog(z), walk(z)])])").pretty_print() + dexpr(r"\P.\Q.(([x],[]) + P(x) + Q(x))(\x.([],[dog(x)]))").pretty_print() + + +def test_draw(): + try: + from six.moves.tkinter import Tk + except ImportError: + from nose import SkipTest + + raise SkipTest("tkinter is required, but it's not available.") + + expressions = [ + r'x', + r'([],[])', + r'([x],[])', + r'([x],[man(x)])', + r'([x,y],[sees(x,y)])', + r'([x],[man(x), walks(x)])', + r'\x.([],[man(x), walks(x)])', + r'\x y.([],[sees(x,y)])', + r'([],[(([],[walks(x)]) + ([],[runs(x)]))])', + r'([x],[man(x), -([],[walks(x)])])', + r'([],[(([x],[man(x)]) -> ([],[walks(x)]))])', + ] + + for e in expressions: + d = DrtExpression.fromstring(e) + d.draw() + + +if __name__ == '__main__': + demo() diff --git a/venv.bak/lib/python3.7/site-packages/nltk/sem/drt_glue_demo.py b/venv.bak/lib/python3.7/site-packages/nltk/sem/drt_glue_demo.py new file mode 100644 index 0000000..4a45325 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/sem/drt_glue_demo.py @@ -0,0 +1,561 @@ +# Natural Language Toolkit: GUI Demo for Glue Semantics with Discourse +# Representation Theory (DRT) as meaning language +# +# Author: Dan Garrette +# +# Copyright (C) 2001-2019 NLTK Project +# URL: +# For license information, see LICENSE.TXT + +try: + from six.moves.tkinter import ( + Button, + Frame, + IntVar, + Label, + Listbox, + Menu, + Scrollbar, + Tk, + ) + from six.moves.tkinter_font import Font + from nltk.draw.util import CanvasFrame, ShowText + +except ImportError: + """Ignore ImportError because tkinter might not be available.""" + +from nltk.util import in_idle +from nltk.tag import RegexpTagger +from nltk.parse import MaltParser +from nltk.sem.logic import Variable +from nltk.sem.drt import DrsDrawer, DrtVariableExpression +from nltk.sem.glue import DrtGlue + + +class DrtGlueDemo(object): + def __init__(self, examples): + # Set up the main window. + self._top = Tk() + self._top.title('DRT Glue Demo') + + # Set up key bindings. + self._init_bindings() + + # Initialize the fonts.self._error = None + self._init_fonts(self._top) + + self._examples = examples + self._readingCache = [None for example in examples] + + # The user can hide the grammar. + self._show_grammar = IntVar(self._top) + self._show_grammar.set(1) + + # Set the data to None + self._curExample = -1 + self._readings = [] + self._drs = None + self._drsWidget = None + self._error = None + + self._init_glue() + + # Create the basic frames. + self._init_menubar(self._top) + self._init_buttons(self._top) + self._init_exampleListbox(self._top) + self._init_readingListbox(self._top) + self._init_canvas(self._top) + + # Resize callback + self._canvas.bind('', self._configure) + + ######################################### + ## Initialization Helpers + ######################################### + + def _init_glue(self): + tagger = RegexpTagger( + [ + ('^(David|Mary|John)$', 'NNP'), + ( + '^(walks|sees|eats|chases|believes|gives|sleeps|chases|persuades|tries|seems|leaves)$', + 'VB', + ), + ('^(go|order|vanish|find|approach)$', 'VB'), + ('^(a)$', 'ex_quant'), + ('^(every)$', 'univ_quant'), + ('^(sandwich|man|dog|pizza|unicorn|cat|senator)$', 'NN'), + ('^(big|gray|former)$', 'JJ'), + ('^(him|himself)$', 'PRP'), + ] + ) + + depparser = MaltParser(tagger=tagger) + self._glue = DrtGlue(depparser=depparser, remove_duplicates=False) + + def _init_fonts(self, root): + # See: + self._sysfont = Font(font=Button()["font"]) + root.option_add("*Font", self._sysfont) + + # TWhat's our font size (default=same as sysfont) + self._size = IntVar(root) + self._size.set(self._sysfont.cget('size')) + + self._boldfont = Font(family='helvetica', weight='bold', size=self._size.get()) + self._font = Font(family='helvetica', size=self._size.get()) + if self._size.get() < 0: + big = self._size.get() - 2 + else: + big = self._size.get() + 2 + self._bigfont = Font(family='helvetica', weight='bold', size=big) + + def _init_exampleListbox(self, parent): + self._exampleFrame = listframe = Frame(parent) + self._exampleFrame.pack(fill='both', side='left', padx=2) + self._exampleList_label = Label( + self._exampleFrame, font=self._boldfont, text='Examples' + ) + self._exampleList_label.pack() + self._exampleList = Listbox( + self._exampleFrame, + selectmode='single', + relief='groove', + background='white', + foreground='#909090', + font=self._font, + selectforeground='#004040', + selectbackground='#c0f0c0', + ) + + self._exampleList.pack(side='right', fill='both', expand=1) + + for example in self._examples: + self._exampleList.insert('end', (' %s' % example)) + self._exampleList.config(height=min(len(self._examples), 25), width=40) + + # Add a scrollbar if there are more than 25 examples. + if len(self._examples) > 25: + listscroll = Scrollbar(self._exampleFrame, orient='vertical') + self._exampleList.config(yscrollcommand=listscroll.set) + listscroll.config(command=self._exampleList.yview) + listscroll.pack(side='left', fill='y') + + # If they select a example, apply it. + self._exampleList.bind('<>', self._exampleList_select) + + def _init_readingListbox(self, parent): + self._readingFrame = listframe = Frame(parent) + self._readingFrame.pack(fill='both', side='left', padx=2) + self._readingList_label = Label( + self._readingFrame, font=self._boldfont, text='Readings' + ) + self._readingList_label.pack() + self._readingList = Listbox( + self._readingFrame, + selectmode='single', + relief='groove', + background='white', + foreground='#909090', + font=self._font, + selectforeground='#004040', + selectbackground='#c0f0c0', + ) + + self._readingList.pack(side='right', fill='both', expand=1) + + # Add a scrollbar if there are more than 25 examples. + listscroll = Scrollbar(self._readingFrame, orient='vertical') + self._readingList.config(yscrollcommand=listscroll.set) + listscroll.config(command=self._readingList.yview) + listscroll.pack(side='right', fill='y') + + self._populate_readingListbox() + + def _populate_readingListbox(self): + # Populate the listbox with integers + self._readingList.delete(0, 'end') + for i in range(len(self._readings)): + self._readingList.insert('end', (' %s' % (i + 1))) + self._readingList.config(height=min(len(self._readings), 25), width=5) + + # If they select a example, apply it. + self._readingList.bind('<>', self._readingList_select) + + def _init_bindings(self): + # Key bindings are a good thing. + self._top.bind('', self.destroy) + self._top.bind('', self.destroy) + self._top.bind('', self.destroy) + self._top.bind('n', self.next) + self._top.bind('', self.next) + self._top.bind('p', self.prev) + self._top.bind('', self.prev) + + def _init_buttons(self, parent): + # Set up the frames. + self._buttonframe = buttonframe = Frame(parent) + buttonframe.pack(fill='none', side='bottom', padx=3, pady=2) + Button( + buttonframe, + text='Prev', + background='#90c0d0', + foreground='black', + command=self.prev, + ).pack(side='left') + Button( + buttonframe, + text='Next', + background='#90c0d0', + foreground='black', + command=self.next, + ).pack(side='left') + + def _configure(self, event): + self._autostep = 0 + (x1, y1, x2, y2) = self._cframe.scrollregion() + y2 = event.height - 6 + self._canvas['scrollregion'] = '%d %d %d %d' % (x1, y1, x2, y2) + self._redraw() + + def _init_canvas(self, parent): + self._cframe = CanvasFrame( + parent, + background='white', + # width=525, height=250, + closeenough=10, + border=2, + relief='sunken', + ) + self._cframe.pack(expand=1, fill='both', side='top', pady=2) + canvas = self._canvas = self._cframe.canvas() + + # Initially, there's no tree or text + self._tree = None + self._textwidgets = [] + self._textline = None + + def _init_menubar(self, parent): + menubar = Menu(parent) + + filemenu = Menu(menubar, tearoff=0) + filemenu.add_command( + label='Exit', underline=1, command=self.destroy, accelerator='q' + ) + menubar.add_cascade(label='File', underline=0, menu=filemenu) + + actionmenu = Menu(menubar, tearoff=0) + actionmenu.add_command( + label='Next', underline=0, command=self.next, accelerator='n, Space' + ) + actionmenu.add_command( + label='Previous', underline=0, command=self.prev, accelerator='p, Backspace' + ) + menubar.add_cascade(label='Action', underline=0, menu=actionmenu) + + optionmenu = Menu(menubar, tearoff=0) + optionmenu.add_checkbutton( + label='Remove Duplicates', + underline=0, + variable=self._glue.remove_duplicates, + command=self._toggle_remove_duplicates, + accelerator='r', + ) + menubar.add_cascade(label='Options', underline=0, menu=optionmenu) + + viewmenu = Menu(menubar, tearoff=0) + viewmenu.add_radiobutton( + label='Tiny', + variable=self._size, + underline=0, + value=10, + command=self.resize, + ) + viewmenu.add_radiobutton( + label='Small', + variable=self._size, + underline=0, + value=12, + command=self.resize, + ) + viewmenu.add_radiobutton( + label='Medium', + variable=self._size, + underline=0, + value=14, + command=self.resize, + ) + viewmenu.add_radiobutton( + label='Large', + variable=self._size, + underline=0, + value=18, + command=self.resize, + ) + viewmenu.add_radiobutton( + label='Huge', + variable=self._size, + underline=0, + value=24, + command=self.resize, + ) + menubar.add_cascade(label='View', underline=0, menu=viewmenu) + + helpmenu = Menu(menubar, tearoff=0) + helpmenu.add_command(label='About', underline=0, command=self.about) + menubar.add_cascade(label='Help', underline=0, menu=helpmenu) + + parent.config(menu=menubar) + + ######################################### + ## Main draw procedure + ######################################### + + def _redraw(self): + canvas = self._canvas + + # Delete the old DRS, widgets, etc. + if self._drsWidget is not None: + self._drsWidget.clear() + + if self._drs: + self._drsWidget = DrsWidget(self._canvas, self._drs) + self._drsWidget.draw() + + if self._error: + self._drsWidget = DrsWidget(self._canvas, self._error) + self._drsWidget.draw() + + ######################################### + ## Button Callbacks + ######################################### + + def destroy(self, *e): + self._autostep = 0 + if self._top is None: + return + self._top.destroy() + self._top = None + + def prev(self, *e): + selection = self._readingList.curselection() + readingListSize = self._readingList.size() + + # there are readings + if readingListSize > 0: + # if one reading is currently selected + if len(selection) == 1: + index = int(selection[0]) + + # if it's on (or before) the first item + if index <= 0: + self._select_previous_example() + else: + self._readingList_store_selection(index - 1) + + else: + # select its first reading + self._readingList_store_selection(readingListSize - 1) + + else: + self._select_previous_example() + + def _select_previous_example(self): + # if the current example is not the first example + if self._curExample > 0: + self._exampleList_store_selection(self._curExample - 1) + else: + # go to the last example + self._exampleList_store_selection(len(self._examples) - 1) + + def next(self, *e): + selection = self._readingList.curselection() + readingListSize = self._readingList.size() + + # if there are readings + if readingListSize > 0: + # if one reading is currently selected + if len(selection) == 1: + index = int(selection[0]) + + # if it's on (or past) the last item + if index >= (readingListSize - 1): + self._select_next_example() + else: + self._readingList_store_selection(index + 1) + + else: + # select its first reading + self._readingList_store_selection(0) + + else: + self._select_next_example() + + def _select_next_example(self): + # if the current example is not the last example + if self._curExample < len(self._examples) - 1: + self._exampleList_store_selection(self._curExample + 1) + else: + # go to the first example + self._exampleList_store_selection(0) + + def about(self, *e): + ABOUT = ( + "NLTK Discourse Representation Theory (DRT) Glue Semantics Demo\n" + + "Written by Daniel H. Garrette" + ) + TITLE = 'About: NLTK DRT Glue Demo' + try: + from six.moves.tkinter_messagebox import Message + + Message(message=ABOUT, title=TITLE).show() + except: + ShowText(self._top, TITLE, ABOUT) + + def postscript(self, *e): + self._autostep = 0 + self._cframe.print_to_file() + + def mainloop(self, *args, **kwargs): + """ + Enter the Tkinter mainloop. This function must be called if + this demo is created from a non-interactive program (e.g. + from a secript); otherwise, the demo will close as soon as + the script completes. + """ + if in_idle(): + return + self._top.mainloop(*args, **kwargs) + + def resize(self, size=None): + if size is not None: + self._size.set(size) + size = self._size.get() + self._font.configure(size=-(abs(size))) + self._boldfont.configure(size=-(abs(size))) + self._sysfont.configure(size=-(abs(size))) + self._bigfont.configure(size=-(abs(size + 2))) + self._redraw() + + def _toggle_remove_duplicates(self): + self._glue.remove_duplicates = not self._glue.remove_duplicates + + self._exampleList.selection_clear(0, 'end') + self._readings = [] + self._populate_readingListbox() + self._readingCache = [None for ex in self._examples] + self._curExample = -1 + self._error = None + + self._drs = None + self._redraw() + + def _exampleList_select(self, event): + selection = self._exampleList.curselection() + if len(selection) != 1: + return + self._exampleList_store_selection(int(selection[0])) + + def _exampleList_store_selection(self, index): + self._curExample = index + example = self._examples[index] + + self._exampleList.selection_clear(0, 'end') + if example: + cache = self._readingCache[index] + if cache: + if isinstance(cache, list): + self._readings = cache + self._error = None + else: + self._readings = [] + self._error = cache + else: + try: + self._readings = self._glue.parse_to_meaning(example) + self._error = None + self._readingCache[index] = self._readings + except Exception as e: + self._readings = [] + self._error = DrtVariableExpression(Variable('Error: ' + str(e))) + self._readingCache[index] = self._error + + # add a star to the end of the example + self._exampleList.delete(index) + self._exampleList.insert(index, (' %s *' % example)) + self._exampleList.config( + height=min(len(self._examples), 25), width=40 + ) + + self._populate_readingListbox() + + self._exampleList.selection_set(index) + + self._drs = None + self._redraw() + + def _readingList_select(self, event): + selection = self._readingList.curselection() + if len(selection) != 1: + return + self._readingList_store_selection(int(selection[0])) + + def _readingList_store_selection(self, index): + reading = self._readings[index] + + self._readingList.selection_clear(0, 'end') + if reading: + self._readingList.selection_set(index) + + self._drs = reading.simplify().normalize().resolve_anaphora() + + self._redraw() + + +class DrsWidget(object): + def __init__(self, canvas, drs, **attribs): + self._drs = drs + self._canvas = canvas + canvas.font = Font( + font=canvas.itemcget(canvas.create_text(0, 0, text=''), 'font') + ) + canvas._BUFFER = 3 + self.bbox = (0, 0, 0, 0) + + def draw(self): + (right, bottom) = DrsDrawer(self._drs, canvas=self._canvas).draw() + self.bbox = (0, 0, right + 1, bottom + 1) + + def clear(self): + self._canvas.create_rectangle(self.bbox, fill="white", width="0") + + +def demo(): + examples = [ + 'John walks', + 'David sees Mary', + 'David eats a sandwich', + 'every man chases a dog', + # 'every man believes a dog yawns', + # 'John gives David a sandwich', + 'John chases himself', + # 'John persuades David to order a pizza', + # 'John tries to go', + # 'John tries to find a unicorn', + # 'John seems to vanish', + # 'a unicorn seems to approach', + # 'every big cat leaves', + # 'every gray cat leaves', + # 'every big gray cat leaves', + # 'a former senator leaves', + # 'John likes a cat', + # 'John likes every cat', + # 'he walks', + # 'John walks and he leaves' + ] + DrtGlueDemo(examples).mainloop() + + +if __name__ == '__main__': + demo() diff --git a/venv.bak/lib/python3.7/site-packages/nltk/sem/evaluate.py b/venv.bak/lib/python3.7/site-packages/nltk/sem/evaluate.py new file mode 100644 index 0000000..adc0716 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/sem/evaluate.py @@ -0,0 +1,835 @@ +# Natural Language Toolkit: Models for first-order languages with lambda +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Ewan Klein , +# URL: +# For license information, see LICENSE.TXT + +# TODO: +# - fix tracing +# - fix iterator-based approach to existentials + +""" +This module provides data structures for representing first-order +models. +""" +from __future__ import print_function, unicode_literals + +from pprint import pformat +import inspect +import textwrap +import re +import sys + +from six import string_types + +from nltk.decorators import decorator # this used in code that is commented out +from nltk.compat import python_2_unicode_compatible + +from nltk.sem.logic import ( + AbstractVariableExpression, + AllExpression, + Expression, + AndExpression, + ApplicationExpression, + EqualityExpression, + ExistsExpression, + IffExpression, + ImpExpression, + IndividualVariableExpression, + LambdaExpression, + NegatedExpression, + OrExpression, + Variable, + is_indvar, +) + + +class Error(Exception): + pass + + +class Undefined(Error): + pass + + +def trace(f, *args, **kw): + if sys.version_info[0] >= 3: + argspec = inspect.getfullargspec(f) + else: + argspec = inspect.getargspec(f) + d = dict(zip(argspec[0], args)) + if d.pop('trace', None): + print() + for item in d.items(): + print("%s => %s" % item) + return f(*args, **kw) + + +def is_rel(s): + """ + Check whether a set represents a relation (of any arity). + + :param s: a set containing tuples of str elements + :type s: set + :rtype: bool + """ + # we have the empty relation, i.e. set() + if len(s) == 0: + return True + # all the elements are tuples of the same length + elif all(isinstance(el, tuple) for el in s) and len(max(s)) == len(min(s)): + return True + else: + raise ValueError("Set %r contains sequences of different lengths" % s) + + +def set2rel(s): + """ + Convert a set containing individuals (strings or numbers) into a set of + unary tuples. Any tuples of strings already in the set are passed through + unchanged. + + For example: + - set(['a', 'b']) => set([('a',), ('b',)]) + - set([3, 27]) => set([('3',), ('27',)]) + + :type s: set + :rtype: set of tuple of str + """ + new = set() + for elem in s: + if isinstance(elem, string_types): + new.add((elem,)) + elif isinstance(elem, int): + new.add((str(elem))) + else: + new.add(elem) + return new + + +def arity(rel): + """ + Check the arity of a relation. + :type rel: set of tuples + :rtype: int of tuple of str + """ + if len(rel) == 0: + return 0 + return len(list(rel)[0]) + + +@python_2_unicode_compatible +class Valuation(dict): + """ + A dictionary which represents a model-theoretic Valuation of non-logical constants. + Keys are strings representing the constants to be interpreted, and values correspond + to individuals (represented as strings) and n-ary relations (represented as sets of tuples + of strings). + + An instance of ``Valuation`` will raise a KeyError exception (i.e., + just behave like a standard dictionary) if indexed with an expression that + is not in its list of symbols. + """ + + def __init__(self, xs): + """ + :param xs: a list of (symbol, value) pairs. + """ + super(Valuation, self).__init__() + for (sym, val) in xs: + if isinstance(val, string_types) or isinstance(val, bool): + self[sym] = val + elif isinstance(val, set): + self[sym] = set2rel(val) + else: + msg = textwrap.fill( + "Error in initializing Valuation. " + "Unrecognized value for symbol '%s':\n%s" % (sym, val), + width=66, + ) + + raise ValueError(msg) + + def __getitem__(self, key): + if key in self: + return dict.__getitem__(self, key) + else: + raise Undefined("Unknown expression: '%s'" % key) + + def __str__(self): + return pformat(self) + + @property + def domain(self): + """Set-theoretic domain of the value-space of a Valuation.""" + dom = [] + for val in self.values(): + if isinstance(val, string_types): + dom.append(val) + elif not isinstance(val, bool): + dom.extend( + [elem for tuple_ in val for elem in tuple_ if elem is not None] + ) + return set(dom) + + @property + def symbols(self): + """The non-logical constants which the Valuation recognizes.""" + return sorted(self.keys()) + + @classmethod + def fromstring(cls, s): + return read_valuation(s) + + +########################################## +# REs used by the _read_valuation function +########################################## +_VAL_SPLIT_RE = re.compile(r'\s*=+>\s*') +_ELEMENT_SPLIT_RE = re.compile(r'\s*,\s*') +_TUPLES_RE = re.compile( + r"""\s* + (\([^)]+\)) # tuple-expression + \s*""", + re.VERBOSE, +) + + +def _read_valuation_line(s): + """ + Read a line in a valuation file. + + Lines are expected to be of the form:: + + noosa => n + girl => {g1, g2} + chase => {(b1, g1), (b2, g1), (g1, d1), (g2, d2)} + + :param s: input line + :type s: str + :return: a pair (symbol, value) + :rtype: tuple + """ + pieces = _VAL_SPLIT_RE.split(s) + symbol = pieces[0] + value = pieces[1] + # check whether the value is meant to be a set + if value.startswith('{'): + value = value[1:-1] + tuple_strings = _TUPLES_RE.findall(value) + # are the set elements tuples? + if tuple_strings: + set_elements = [] + for ts in tuple_strings: + ts = ts[1:-1] + element = tuple(_ELEMENT_SPLIT_RE.split(ts)) + set_elements.append(element) + else: + set_elements = _ELEMENT_SPLIT_RE.split(value) + value = set(set_elements) + return symbol, value + + +def read_valuation(s, encoding=None): + """ + Convert a valuation string into a valuation. + + :param s: a valuation string + :type s: str + :param encoding: the encoding of the input string, if it is binary + :type encoding: str + :return: a ``nltk.sem`` valuation + :rtype: Valuation + """ + if encoding is not None: + s = s.decode(encoding) + statements = [] + for linenum, line in enumerate(s.splitlines()): + line = line.strip() + if line.startswith('#') or line == '': + continue + try: + statements.append(_read_valuation_line(line)) + except ValueError: + raise ValueError('Unable to parse line %s: %s' % (linenum, line)) + return Valuation(statements) + + +@python_2_unicode_compatible +class Assignment(dict): + """ + A dictionary which represents an assignment of values to variables. + + An assigment can only assign values from its domain. + + If an unknown expression *a* is passed to a model *M*\ 's + interpretation function *i*, *i* will first check whether *M*\ 's + valuation assigns an interpretation to *a* as a constant, and if + this fails, *i* will delegate the interpretation of *a* to + *g*. *g* only assigns values to individual variables (i.e., + members of the class ``IndividualVariableExpression`` in the ``logic`` + module. If a variable is not assigned a value by *g*, it will raise + an ``Undefined`` exception. + + A variable *Assignment* is a mapping from individual variables to + entities in the domain. Individual variables are usually indicated + with the letters ``'x'``, ``'y'``, ``'w'`` and ``'z'``, optionally + followed by an integer (e.g., ``'x0'``, ``'y332'``). Assignments are + created using the ``Assignment`` constructor, which also takes the + domain as a parameter. + + >>> from nltk.sem.evaluate import Assignment + >>> dom = set(['u1', 'u2', 'u3', 'u4']) + >>> g3 = Assignment(dom, [('x', 'u1'), ('y', 'u2')]) + >>> g3 == {'x': 'u1', 'y': 'u2'} + True + + There is also a ``print`` format for assignments which uses a notation + closer to that in logic textbooks: + + >>> print(g3) + g[u1/x][u2/y] + + It is also possible to update an assignment using the ``add`` method: + + >>> dom = set(['u1', 'u2', 'u3', 'u4']) + >>> g4 = Assignment(dom) + >>> g4.add('x', 'u1') + {'x': 'u1'} + + With no arguments, ``purge()`` is equivalent to ``clear()`` on a dictionary: + + >>> g4.purge() + >>> g4 + {} + + :param domain: the domain of discourse + :type domain: set + :param assign: a list of (varname, value) associations + :type assign: list + """ + + def __init__(self, domain, assign=None): + super(Assignment, self).__init__() + self.domain = domain + if assign: + for (var, val) in assign: + assert val in self.domain, "'%s' is not in the domain: %s" % ( + val, + self.domain, + ) + assert is_indvar(var), ( + "Wrong format for an Individual Variable: '%s'" % var + ) + self[var] = val + self.variant = None + self._addvariant() + + def __getitem__(self, key): + if key in self: + return dict.__getitem__(self, key) + else: + raise Undefined("Not recognized as a variable: '%s'" % key) + + def copy(self): + new = Assignment(self.domain) + new.update(self) + return new + + def purge(self, var=None): + """ + Remove one or all keys (i.e. logic variables) from an + assignment, and update ``self.variant``. + + :param var: a Variable acting as a key for the assignment. + """ + if var: + del self[var] + else: + self.clear() + self._addvariant() + return None + + def __str__(self): + """ + Pretty printing for assignments. {'x', 'u'} appears as 'g[u/x]' + """ + gstring = "g" + # Deterministic output for unit testing. + variant = sorted(self.variant) + for (val, var) in variant: + gstring += "[%s/%s]" % (val, var) + return gstring + + def _addvariant(self): + """ + Create a more pretty-printable version of the assignment. + """ + list_ = [] + for item in self.items(): + pair = (item[1], item[0]) + list_.append(pair) + self.variant = list_ + return None + + def add(self, var, val): + """ + Add a new variable-value pair to the assignment, and update + ``self.variant``. + + """ + assert val in self.domain, "%s is not in the domain %s" % (val, self.domain) + assert is_indvar(var), "Wrong format for an Individual Variable: '%s'" % var + self[var] = val + self._addvariant() + return self + + +@python_2_unicode_compatible +class Model(object): + """ + A first order model is a domain *D* of discourse and a valuation *V*. + + A domain *D* is a set, and a valuation *V* is a map that associates + expressions with values in the model. + The domain of *V* should be a subset of *D*. + + Construct a new ``Model``. + + :type domain: set + :param domain: A set of entities representing the domain of discourse of the model. + :type valuation: Valuation + :param valuation: the valuation of the model. + :param prop: If this is set, then we are building a propositional\ + model and don't require the domain of *V* to be subset of *D*. + """ + + def __init__(self, domain, valuation): + assert isinstance(domain, set) + self.domain = domain + self.valuation = valuation + if not domain.issuperset(valuation.domain): + raise Error( + "The valuation domain, %s, must be a subset of the model's domain, %s" + % (valuation.domain, domain) + ) + + def __repr__(self): + return "(%r, %r)" % (self.domain, self.valuation) + + def __str__(self): + return "Domain = %s,\nValuation = \n%s" % (self.domain, self.valuation) + + def evaluate(self, expr, g, trace=None): + """ + Read input expressions, and provide a handler for ``satisfy`` + that blocks further propagation of the ``Undefined`` error. + :param expr: An ``Expression`` of ``logic``. + :type g: Assignment + :param g: an assignment to individual variables. + :rtype: bool or 'Undefined' + """ + try: + parsed = Expression.fromstring(expr) + value = self.satisfy(parsed, g, trace=trace) + if trace: + print() + print("'%s' evaluates to %s under M, %s" % (expr, value, g)) + return value + except Undefined: + if trace: + print() + print("'%s' is undefined under M, %s" % (expr, g)) + return 'Undefined' + + def satisfy(self, parsed, g, trace=None): + """ + Recursive interpretation function for a formula of first-order logic. + + Raises an ``Undefined`` error when ``parsed`` is an atomic string + but is not a symbol or an individual variable. + + :return: Returns a truth value or ``Undefined`` if ``parsed`` is\ + complex, and calls the interpretation function ``i`` if ``parsed``\ + is atomic. + + :param parsed: An expression of ``logic``. + :type g: Assignment + :param g: an assignment to individual variables. + """ + + if isinstance(parsed, ApplicationExpression): + function, arguments = parsed.uncurry() + if isinstance(function, AbstractVariableExpression): + # It's a predicate expression ("P(x,y)"), so used uncurried arguments + funval = self.satisfy(function, g) + argvals = tuple(self.satisfy(arg, g) for arg in arguments) + return argvals in funval + else: + # It must be a lambda expression, so use curried form + funval = self.satisfy(parsed.function, g) + argval = self.satisfy(parsed.argument, g) + return funval[argval] + elif isinstance(parsed, NegatedExpression): + return not self.satisfy(parsed.term, g) + elif isinstance(parsed, AndExpression): + return self.satisfy(parsed.first, g) and self.satisfy(parsed.second, g) + elif isinstance(parsed, OrExpression): + return self.satisfy(parsed.first, g) or self.satisfy(parsed.second, g) + elif isinstance(parsed, ImpExpression): + return (not self.satisfy(parsed.first, g)) or self.satisfy(parsed.second, g) + elif isinstance(parsed, IffExpression): + return self.satisfy(parsed.first, g) == self.satisfy(parsed.second, g) + elif isinstance(parsed, EqualityExpression): + return self.satisfy(parsed.first, g) == self.satisfy(parsed.second, g) + elif isinstance(parsed, AllExpression): + new_g = g.copy() + for u in self.domain: + new_g.add(parsed.variable.name, u) + if not self.satisfy(parsed.term, new_g): + return False + return True + elif isinstance(parsed, ExistsExpression): + new_g = g.copy() + for u in self.domain: + new_g.add(parsed.variable.name, u) + if self.satisfy(parsed.term, new_g): + return True + return False + elif isinstance(parsed, LambdaExpression): + cf = {} + var = parsed.variable.name + for u in self.domain: + val = self.satisfy(parsed.term, g.add(var, u)) + # NB the dict would be a lot smaller if we do this: + # if val: cf[u] = val + # But then need to deal with cases where f(a) should yield + # a function rather than just False. + cf[u] = val + return cf + else: + return self.i(parsed, g, trace) + + # @decorator(trace_eval) + def i(self, parsed, g, trace=False): + """ + An interpretation function. + + Assuming that ``parsed`` is atomic: + + - if ``parsed`` is a non-logical constant, calls the valuation *V* + - else if ``parsed`` is an individual variable, calls assignment *g* + - else returns ``Undefined``. + + :param parsed: an ``Expression`` of ``logic``. + :type g: Assignment + :param g: an assignment to individual variables. + :return: a semantic value + """ + # If parsed is a propositional letter 'p', 'q', etc, it could be in valuation.symbols + # and also be an IndividualVariableExpression. We want to catch this first case. + # So there is a procedural consequence to the ordering of clauses here: + if parsed.variable.name in self.valuation.symbols: + return self.valuation[parsed.variable.name] + elif isinstance(parsed, IndividualVariableExpression): + return g[parsed.variable.name] + + else: + raise Undefined("Can't find a value for %s" % parsed) + + def satisfiers(self, parsed, varex, g, trace=None, nesting=0): + """ + Generate the entities from the model's domain that satisfy an open formula. + + :param parsed: an open formula + :type parsed: Expression + :param varex: the relevant free individual variable in ``parsed``. + :type varex: VariableExpression or str + :param g: a variable assignment + :type g: Assignment + :return: a set of the entities that satisfy ``parsed``. + """ + + spacer = ' ' + indent = spacer + (spacer * nesting) + candidates = [] + + if isinstance(varex, string_types): + var = Variable(varex) + else: + var = varex + + if var in parsed.free(): + if trace: + print() + print( + (spacer * nesting) + + "Open formula is '%s' with assignment %s" % (parsed, g) + ) + for u in self.domain: + new_g = g.copy() + new_g.add(var.name, u) + if trace and trace > 1: + lowtrace = trace - 1 + else: + lowtrace = 0 + value = self.satisfy(parsed, new_g, lowtrace) + + if trace: + print(indent + "(trying assignment %s)" % new_g) + + # parsed == False under g[u/var]? + if value == False: + if trace: + print( + indent + "value of '%s' under %s is False" % (parsed, new_g) + ) + + # so g[u/var] is a satisfying assignment + else: + candidates.append(u) + if trace: + print( + indent + + "value of '%s' under %s is %s" % (parsed, new_g, value) + ) + + result = set(c for c in candidates) + # var isn't free in parsed + else: + raise Undefined("%s is not free in %s" % (var.name, parsed)) + + return result + + +# ////////////////////////////////////////////////////////////////////// +# Demo.. +# ////////////////////////////////////////////////////////////////////// +# number of spacer chars +mult = 30 + +# Demo 1: Propositional Logic +################# +def propdemo(trace=None): + """Example of a propositional model.""" + + global val1, dom1, m1, g1 + val1 = Valuation([('P', True), ('Q', True), ('R', False)]) + dom1 = set([]) + m1 = Model(dom1, val1) + g1 = Assignment(dom1) + + print() + print('*' * mult) + print("Propositional Formulas Demo") + print('*' * mult) + print('(Propositional constants treated as nullary predicates)') + print() + print("Model m1:\n", m1) + print('*' * mult) + sentences = [ + '(P & Q)', + '(P & R)', + '- P', + '- R', + '- - P', + '- (P & R)', + '(P | R)', + '(R | P)', + '(R | R)', + '(- P | R)', + '(P | - P)', + '(P -> Q)', + '(P -> R)', + '(R -> P)', + '(P <-> P)', + '(R <-> R)', + '(P <-> R)', + ] + + for sent in sentences: + if trace: + print() + m1.evaluate(sent, g1, trace) + else: + print("The value of '%s' is: %s" % (sent, m1.evaluate(sent, g1))) + + +# Demo 2: FOL Model +############# + + +def folmodel(quiet=False, trace=None): + """Example of a first-order model.""" + + global val2, v2, dom2, m2, g2 + + v2 = [ + ('adam', 'b1'), + ('betty', 'g1'), + ('fido', 'd1'), + ('girl', set(['g1', 'g2'])), + ('boy', set(['b1', 'b2'])), + ('dog', set(['d1'])), + ('love', set([('b1', 'g1'), ('b2', 'g2'), ('g1', 'b1'), ('g2', 'b1')])), + ] + val2 = Valuation(v2) + dom2 = val2.domain + m2 = Model(dom2, val2) + g2 = Assignment(dom2, [('x', 'b1'), ('y', 'g2')]) + + if not quiet: + print() + print('*' * mult) + print("Models Demo") + print("*" * mult) + print("Model m2:\n", "-" * 14, "\n", m2) + print("Variable assignment = ", g2) + + exprs = ['adam', 'boy', 'love', 'walks', 'x', 'y', 'z'] + parsed_exprs = [Expression.fromstring(e) for e in exprs] + + print() + for parsed in parsed_exprs: + try: + print( + "The interpretation of '%s' in m2 is %s" + % (parsed, m2.i(parsed, g2)) + ) + except Undefined: + print("The interpretation of '%s' in m2 is Undefined" % parsed) + + applications = [ + ('boy', ('adam')), + ('walks', ('adam',)), + ('love', ('adam', 'y')), + ('love', ('y', 'adam')), + ] + + for (fun, args) in applications: + try: + funval = m2.i(Expression.fromstring(fun), g2) + argsval = tuple(m2.i(Expression.fromstring(arg), g2) for arg in args) + print("%s(%s) evaluates to %s" % (fun, args, argsval in funval)) + except Undefined: + print("%s(%s) evaluates to Undefined" % (fun, args)) + + +# Demo 3: FOL +######### + + +def foldemo(trace=None): + """ + Interpretation of closed expressions in a first-order model. + """ + folmodel(quiet=True) + + print() + print('*' * mult) + print("FOL Formulas Demo") + print('*' * mult) + + formulas = [ + 'love (adam, betty)', + '(adam = mia)', + '\\x. (boy(x) | girl(x))', + '\\x. boy(x)(adam)', + '\\x y. love(x, y)', + '\\x y. love(x, y)(adam)(betty)', + '\\x y. love(x, y)(adam, betty)', + '\\x y. (boy(x) & love(x, y))', + '\\x. exists y. (boy(x) & love(x, y))', + 'exists z1. boy(z1)', + 'exists x. (boy(x) & -(x = adam))', + 'exists x. (boy(x) & all y. love(y, x))', + 'all x. (boy(x) | girl(x))', + 'all x. (girl(x) -> exists y. boy(y) & love(x, y))', # Every girl loves exists boy. + 'exists x. (boy(x) & all y. (girl(y) -> love(y, x)))', # There is exists boy that every girl loves. + 'exists x. (boy(x) & all y. (girl(y) -> love(x, y)))', # exists boy loves every girl. + 'all x. (dog(x) -> - girl(x))', + 'exists x. exists y. (love(x, y) & love(x, y))', + ] + + for fmla in formulas: + g2.purge() + if trace: + m2.evaluate(fmla, g2, trace) + else: + print("The value of '%s' is: %s" % (fmla, m2.evaluate(fmla, g2))) + + +# Demo 3: Satisfaction +############# + + +def satdemo(trace=None): + """Satisfiers of an open formula in a first order model.""" + + print() + print('*' * mult) + print("Satisfiers Demo") + print('*' * mult) + + folmodel(quiet=True) + + formulas = [ + 'boy(x)', + '(x = x)', + '(boy(x) | girl(x))', + '(boy(x) & girl(x))', + 'love(adam, x)', + 'love(x, adam)', + '-(x = adam)', + 'exists z22. love(x, z22)', + 'exists y. love(y, x)', + 'all y. (girl(y) -> love(x, y))', + 'all y. (girl(y) -> love(y, x))', + 'all y. (girl(y) -> (boy(x) & love(y, x)))', + '(boy(x) & all y. (girl(y) -> love(x, y)))', + '(boy(x) & all y. (girl(y) -> love(y, x)))', + '(boy(x) & exists y. (girl(y) & love(y, x)))', + '(girl(x) -> dog(x))', + 'all y. (dog(y) -> (x = y))', + 'exists y. love(y, x)', + 'exists y. (love(adam, y) & love(y, x))', + ] + + if trace: + print(m2) + + for fmla in formulas: + print(fmla) + Expression.fromstring(fmla) + + parsed = [Expression.fromstring(fmla) for fmla in formulas] + + for p in parsed: + g2.purge() + print("The satisfiers of '%s' are: %s" % (p, m2.satisfiers(p, 'x', g2, trace))) + + +def demo(num=0, trace=None): + """ + Run exists demos. + + - num = 1: propositional logic demo + - num = 2: first order model demo (only if trace is set) + - num = 3: first order sentences demo + - num = 4: satisfaction of open formulas demo + - any other value: run all the demos + + :param trace: trace = 1, or trace = 2 for more verbose tracing + """ + demos = {1: propdemo, 2: folmodel, 3: foldemo, 4: satdemo} + + try: + demos[num](trace=trace) + except KeyError: + for num in demos: + demos[num](trace=trace) + + +if __name__ == "__main__": + demo(2, trace=0) diff --git a/venv.bak/lib/python3.7/site-packages/nltk/sem/glue.py b/venv.bak/lib/python3.7/site-packages/nltk/sem/glue.py new file mode 100644 index 0000000..9fd3cab --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/sem/glue.py @@ -0,0 +1,838 @@ +# Natural Language Toolkit: Glue Semantics +# +# Author: Dan Garrette +# +# Copyright (C) 2001-2019 NLTK Project +# URL: +# For license information, see LICENSE.TXT +from __future__ import print_function, division, unicode_literals + +import os +from itertools import chain + +from six import string_types + +import nltk +from nltk.internals import Counter +from nltk.tag import UnigramTagger, BigramTagger, TrigramTagger, RegexpTagger +from nltk.sem.logic import ( + Expression, + Variable, + VariableExpression, + LambdaExpression, + AbstractVariableExpression, +) +from nltk.compat import python_2_unicode_compatible +from nltk.sem import drt +from nltk.sem import linearlogic + +SPEC_SEMTYPES = { + 'a': 'ex_quant', + 'an': 'ex_quant', + 'every': 'univ_quant', + 'the': 'def_art', + 'no': 'no_quant', + 'default': 'ex_quant', +} + +OPTIONAL_RELATIONSHIPS = ['nmod', 'vmod', 'punct'] + + +@python_2_unicode_compatible +class GlueFormula(object): + def __init__(self, meaning, glue, indices=None): + if not indices: + indices = set() + + if isinstance(meaning, string_types): + self.meaning = Expression.fromstring(meaning) + elif isinstance(meaning, Expression): + self.meaning = meaning + else: + raise RuntimeError( + 'Meaning term neither string or expression: %s, %s' + % (meaning, meaning.__class__) + ) + + if isinstance(glue, string_types): + self.glue = linearlogic.LinearLogicParser().parse(glue) + elif isinstance(glue, linearlogic.Expression): + self.glue = glue + else: + raise RuntimeError( + 'Glue term neither string or expression: %s, %s' + % (glue, glue.__class__) + ) + + self.indices = indices + + def applyto(self, arg): + """ self = (\\x.(walk x), (subj -o f)) + arg = (john , subj) + returns ((walk john), f) + """ + if self.indices & arg.indices: # if the sets are NOT disjoint + raise linearlogic.LinearLogicApplicationException( + "'%s' applied to '%s'. Indices are not disjoint." % (self, arg) + ) + else: # if the sets ARE disjoint + return_indices = self.indices | arg.indices + + try: + return_glue = linearlogic.ApplicationExpression( + self.glue, arg.glue, arg.indices + ) + except linearlogic.LinearLogicApplicationException: + raise linearlogic.LinearLogicApplicationException( + "'%s' applied to '%s'" % (self.simplify(), arg.simplify()) + ) + + arg_meaning_abstracted = arg.meaning + if return_indices: + for dep in self.glue.simplify().antecedent.dependencies[ + ::-1 + ]: # if self.glue is (A -o B), dep is in A.dependencies + arg_meaning_abstracted = self.make_LambdaExpression( + Variable('v%s' % dep), arg_meaning_abstracted + ) + return_meaning = self.meaning.applyto(arg_meaning_abstracted) + + return self.__class__(return_meaning, return_glue, return_indices) + + def make_VariableExpression(self, name): + return VariableExpression(name) + + def make_LambdaExpression(self, variable, term): + return LambdaExpression(variable, term) + + def lambda_abstract(self, other): + assert isinstance(other, GlueFormula) + assert isinstance(other.meaning, AbstractVariableExpression) + return self.__class__( + self.make_LambdaExpression(other.meaning.variable, self.meaning), + linearlogic.ImpExpression(other.glue, self.glue), + ) + + def compile(self, counter=None): + """From Iddo Lev's PhD Dissertation p108-109""" + if not counter: + counter = Counter() + (compiled_glue, new_forms) = self.glue.simplify().compile_pos( + counter, self.__class__ + ) + return new_forms + [ + self.__class__(self.meaning, compiled_glue, set([counter.get()])) + ] + + def simplify(self): + return self.__class__( + self.meaning.simplify(), self.glue.simplify(), self.indices + ) + + def __eq__(self, other): + return ( + self.__class__ == other.__class__ + and self.meaning == other.meaning + and self.glue == other.glue + ) + + def __ne__(self, other): + return not self == other + + # sorting for use in doctests which must be deterministic + def __lt__(self, other): + return str(self) < str(other) + + def __str__(self): + assert isinstance(self.indices, set) + accum = '%s : %s' % (self.meaning, self.glue) + if self.indices: + accum += ' : {' + ', '.join(str(index) for index in self.indices) + '}' + return accum + + def __repr__(self): + return "%s" % self + + +@python_2_unicode_compatible +class GlueDict(dict): + def __init__(self, filename, encoding=None): + self.filename = filename + self.file_encoding = encoding + self.read_file() + + def read_file(self, empty_first=True): + if empty_first: + self.clear() + + try: + contents = nltk.data.load( + self.filename, format='text', encoding=self.file_encoding + ) + # TODO: the above can't handle zip files, but this should anyway be fixed in nltk.data.load() + except LookupError as e: + try: + contents = nltk.data.load( + 'file:' + self.filename, format='text', encoding=self.file_encoding + ) + except LookupError: + raise e + lines = contents.splitlines() + + for line in lines: # example: 'n : (\\x.( x), (v-or))' + # lambdacalc -^ linear logic -^ + line = line.strip() # remove trailing newline + if not len(line): + continue # skip empty lines + if line[0] == '#': + continue # skip commented out lines + + parts = line.split( + ' : ', 2 + ) # ['verb', '(\\x.( x), ( subj -o f ))', '[subj]'] + + glue_formulas = [] + paren_count = 0 + tuple_start = 0 + tuple_comma = 0 + + relationships = None + + if len(parts) > 1: + for (i, c) in enumerate(parts[1]): + if c == '(': + if paren_count == 0: # if it's the first '(' of a tuple + tuple_start = i + 1 # then save the index + paren_count += 1 + elif c == ')': + paren_count -= 1 + if paren_count == 0: # if it's the last ')' of a tuple + meaning_term = parts[1][ + tuple_start:tuple_comma + ] # '\\x.( x)' + glue_term = parts[1][tuple_comma + 1 : i] # '(v-r)' + glue_formulas.append( + [meaning_term, glue_term] + ) # add the GlueFormula to the list + elif c == ',': + if ( + paren_count == 1 + ): # if it's a comma separating the parts of the tuple + tuple_comma = i # then save the index + elif c == '#': # skip comments at the ends of lines + if ( + paren_count != 0 + ): # if the line hasn't parsed correctly so far + raise RuntimeError( + 'Formula syntax is incorrect for entry ' + line + ) + break # break to the next line + + if len(parts) > 2: # if there is a relationship entry at the end + rel_start = parts[2].index('[') + 1 + rel_end = parts[2].index(']') + if rel_start == rel_end: + relationships = frozenset() + else: + relationships = frozenset( + r.strip() for r in parts[2][rel_start:rel_end].split(',') + ) + + try: + start_inheritance = parts[0].index('(') + end_inheritance = parts[0].index(')') + sem = parts[0][:start_inheritance].strip() + supertype = parts[0][start_inheritance + 1 : end_inheritance] + except: + sem = parts[0].strip() + supertype = None + + if sem not in self: + self[sem] = {} + + if ( + relationships is None + ): # if not specified for a specific relationship set + # add all relationship entries for parents + if supertype: + for rels in self[supertype]: + if rels not in self[sem]: + self[sem][rels] = [] + glue = self[supertype][rels] + self[sem][rels].extend(glue) + self[sem][rels].extend( + glue_formulas + ) # add the glue formulas to every rel entry + else: + if None not in self[sem]: + self[sem][None] = [] + self[sem][None].extend( + glue_formulas + ) # add the glue formulas to every rel entry + else: + if relationships not in self[sem]: + self[sem][relationships] = [] + if supertype: + self[sem][relationships].extend(self[supertype][relationships]) + self[sem][relationships].extend( + glue_formulas + ) # add the glue entry to the dictionary + + def __str__(self): + accum = '' + for pos in self: + str_pos = "%s" % pos + for relset in self[pos]: + i = 1 + for gf in self[pos][relset]: + if i == 1: + accum += str_pos + ': ' + else: + accum += ' ' * (len(str_pos) + 2) + accum += "%s" % gf + if relset and i == len(self[pos][relset]): + accum += ' : %s' % relset + accum += '\n' + i += 1 + return accum + + def to_glueformula_list(self, depgraph, node=None, counter=None, verbose=False): + if node is None: + # TODO: should it be depgraph.root? Is this code tested? + top = depgraph.nodes[0] + depList = list(chain(*top['deps'].values())) + root = depgraph.nodes[depList[0]] + + return self.to_glueformula_list(depgraph, root, Counter(), verbose) + + glueformulas = self.lookup(node, depgraph, counter) + for dep_idx in chain(*node['deps'].values()): + dep = depgraph.nodes[dep_idx] + glueformulas.extend( + self.to_glueformula_list(depgraph, dep, counter, verbose) + ) + return glueformulas + + def lookup(self, node, depgraph, counter): + semtype_names = self.get_semtypes(node) + + semtype = None + for name in semtype_names: + if name in self: + semtype = self[name] + break + if semtype is None: + # raise KeyError, "There is no GlueDict entry for sem type '%s' (for '%s')" % (sem, word) + return [] + + self.add_missing_dependencies(node, depgraph) + + lookup = self._lookup_semtype_option(semtype, node, depgraph) + + if not len(lookup): + raise KeyError( + "There is no GlueDict entry for sem type of '%s' " + "with tag '%s', and rel '%s'" % (node['word'], node['tag'], node['rel']) + ) + + return self.get_glueformulas_from_semtype_entry( + lookup, node['word'], node, depgraph, counter + ) + + def add_missing_dependencies(self, node, depgraph): + rel = node['rel'].lower() + + if rel == 'main': + headnode = depgraph.nodes[node['head']] + subj = self.lookup_unique('subj', headnode, depgraph) + relation = subj['rel'] + node['deps'].setdefault(relation, []) + node['deps'][relation].append(subj['address']) + # node['deps'].append(subj['address']) + + def _lookup_semtype_option(self, semtype, node, depgraph): + relationships = frozenset( + depgraph.nodes[dep]['rel'].lower() + for dep in chain(*node['deps'].values()) + if depgraph.nodes[dep]['rel'].lower() not in OPTIONAL_RELATIONSHIPS + ) + + try: + lookup = semtype[relationships] + except KeyError: + # An exact match is not found, so find the best match where + # 'best' is defined as the glue entry whose relationship set has the + # most relations of any possible relationship set that is a subset + # of the actual depgraph + best_match = frozenset() + for relset_option in set(semtype) - set([None]): + if ( + len(relset_option) > len(best_match) + and relset_option < relationships + ): + best_match = relset_option + if not best_match: + if None in semtype: + best_match = None + else: + return None + lookup = semtype[best_match] + + return lookup + + def get_semtypes(self, node): + """ + Based on the node, return a list of plausible semtypes in order of + plausibility. + """ + rel = node['rel'].lower() + word = node['word'].lower() + + if rel == 'spec': + if word in SPEC_SEMTYPES: + return [SPEC_SEMTYPES[word]] + else: + return [SPEC_SEMTYPES['default']] + elif rel in ['nmod', 'vmod']: + return [node['tag'], rel] + else: + return [node['tag']] + + def get_glueformulas_from_semtype_entry( + self, lookup, word, node, depgraph, counter + ): + glueformulas = [] + + glueFormulaFactory = self.get_GlueFormula_factory() + for meaning, glue in lookup: + gf = glueFormulaFactory(self.get_meaning_formula(meaning, word), glue) + if not len(glueformulas): + gf.word = word + else: + gf.word = '%s%s' % (word, len(glueformulas) + 1) + + gf.glue = self.initialize_labels(gf.glue, node, depgraph, counter.get()) + + glueformulas.append(gf) + return glueformulas + + def get_meaning_formula(self, generic, word): + """ + :param generic: A meaning formula string containing the + parameter "" + :param word: The actual word to be replace "" + """ + word = word.replace('.', '') + return generic.replace('', word) + + def initialize_labels(self, expr, node, depgraph, unique_index): + if isinstance(expr, linearlogic.AtomicExpression): + name = self.find_label_name(expr.name.lower(), node, depgraph, unique_index) + if name[0].isupper(): + return linearlogic.VariableExpression(name) + else: + return linearlogic.ConstantExpression(name) + else: + return linearlogic.ImpExpression( + self.initialize_labels(expr.antecedent, node, depgraph, unique_index), + self.initialize_labels(expr.consequent, node, depgraph, unique_index), + ) + + def find_label_name(self, name, node, depgraph, unique_index): + try: + dot = name.index('.') + + before_dot = name[:dot] + after_dot = name[dot + 1 :] + if before_dot == 'super': + return self.find_label_name( + after_dot, depgraph.nodes[node['head']], depgraph, unique_index + ) + else: + return self.find_label_name( + after_dot, + self.lookup_unique(before_dot, node, depgraph), + depgraph, + unique_index, + ) + except ValueError: + lbl = self.get_label(node) + if name == 'f': + return lbl + elif name == 'v': + return '%sv' % lbl + elif name == 'r': + return '%sr' % lbl + elif name == 'super': + return self.get_label(depgraph.nodes[node['head']]) + elif name == 'var': + return '%s%s' % (lbl.upper(), unique_index) + elif name == 'a': + return self.get_label(self.lookup_unique('conja', node, depgraph)) + elif name == 'b': + return self.get_label(self.lookup_unique('conjb', node, depgraph)) + else: + return self.get_label(self.lookup_unique(name, node, depgraph)) + + def get_label(self, node): + """ + Pick an alphabetic character as identifier for an entity in the model. + + :param value: where to index into the list of characters + :type value: int + """ + value = node['address'] + + letter = [ + 'f', + 'g', + 'h', + 'i', + 'j', + 'k', + 'l', + 'm', + 'n', + 'o', + 'p', + 'q', + 'r', + 's', + 't', + 'u', + 'v', + 'w', + 'x', + 'y', + 'z', + 'a', + 'b', + 'c', + 'd', + 'e', + ][value - 1] + num = int(value) // 26 + if num > 0: + return letter + str(num) + else: + return letter + + def lookup_unique(self, rel, node, depgraph): + """ + Lookup 'key'. There should be exactly one item in the associated relation. + """ + deps = [ + depgraph.nodes[dep] + for dep in chain(*node['deps'].values()) + if depgraph.nodes[dep]['rel'].lower() == rel.lower() + ] + + if len(deps) == 0: + raise KeyError("'%s' doesn't contain a feature '%s'" % (node['word'], rel)) + elif len(deps) > 1: + raise KeyError( + "'%s' should only have one feature '%s'" % (node['word'], rel) + ) + else: + return deps[0] + + def get_GlueFormula_factory(self): + return GlueFormula + + +class Glue(object): + def __init__( + self, semtype_file=None, remove_duplicates=False, depparser=None, verbose=False + ): + self.verbose = verbose + self.remove_duplicates = remove_duplicates + self.depparser = depparser + + from nltk import Prover9 + + self.prover = Prover9() + + if semtype_file: + self.semtype_file = semtype_file + else: + self.semtype_file = os.path.join( + 'grammars', 'sample_grammars', 'glue.semtype' + ) + + def train_depparser(self, depgraphs=None): + if depgraphs: + self.depparser.train(depgraphs) + else: + self.depparser.train_from_file( + nltk.data.find( + os.path.join('grammars', 'sample_grammars', 'glue_train.conll') + ) + ) + + def parse_to_meaning(self, sentence): + readings = [] + for agenda in self.parse_to_compiled(sentence): + readings.extend(self.get_readings(agenda)) + return readings + + def get_readings(self, agenda): + readings = [] + agenda_length = len(agenda) + atomics = dict() + nonatomics = dict() + while agenda: # is not empty + cur = agenda.pop() + glue_simp = cur.glue.simplify() + if isinstance( + glue_simp, linearlogic.ImpExpression + ): # if cur.glue is non-atomic + for key in atomics: + try: + if isinstance(cur.glue, linearlogic.ApplicationExpression): + bindings = cur.glue.bindings + else: + bindings = linearlogic.BindingDict() + glue_simp.antecedent.unify(key, bindings) + for atomic in atomics[key]: + if not ( + cur.indices & atomic.indices + ): # if the sets of indices are disjoint + try: + agenda.append(cur.applyto(atomic)) + except linearlogic.LinearLogicApplicationException: + pass + except linearlogic.UnificationException: + pass + try: + nonatomics[glue_simp.antecedent].append(cur) + except KeyError: + nonatomics[glue_simp.antecedent] = [cur] + + else: # else cur.glue is atomic + for key in nonatomics: + for nonatomic in nonatomics[key]: + try: + if isinstance( + nonatomic.glue, linearlogic.ApplicationExpression + ): + bindings = nonatomic.glue.bindings + else: + bindings = linearlogic.BindingDict() + glue_simp.unify(key, bindings) + if not ( + cur.indices & nonatomic.indices + ): # if the sets of indices are disjoint + try: + agenda.append(nonatomic.applyto(cur)) + except linearlogic.LinearLogicApplicationException: + pass + except linearlogic.UnificationException: + pass + try: + atomics[glue_simp].append(cur) + except KeyError: + atomics[glue_simp] = [cur] + + for entry in atomics: + for gf in atomics[entry]: + if len(gf.indices) == agenda_length: + self._add_to_reading_list(gf, readings) + for entry in nonatomics: + for gf in nonatomics[entry]: + if len(gf.indices) == agenda_length: + self._add_to_reading_list(gf, readings) + return readings + + def _add_to_reading_list(self, glueformula, reading_list): + add_reading = True + if self.remove_duplicates: + for reading in reading_list: + try: + if reading.equiv(glueformula.meaning, self.prover): + add_reading = False + break + except Exception as e: + # if there is an exception, the syntax of the formula + # may not be understandable by the prover, so don't + # throw out the reading. + print('Error when checking logical equality of statements', e) + + if add_reading: + reading_list.append(glueformula.meaning) + + def parse_to_compiled(self, sentence): + gfls = [self.depgraph_to_glue(dg) for dg in self.dep_parse(sentence)] + return [self.gfl_to_compiled(gfl) for gfl in gfls] + + def dep_parse(self, sentence): + """ + Return a dependency graph for the sentence. + + :param sentence: the sentence to be parsed + :type sentence: list(str) + :rtype: DependencyGraph + """ + + # Lazy-initialize the depparser + if self.depparser is None: + from nltk.parse import MaltParser + + self.depparser = MaltParser(tagger=self.get_pos_tagger()) + if not self.depparser._trained: + self.train_depparser() + return self.depparser.parse(sentence, verbose=self.verbose) + + def depgraph_to_glue(self, depgraph): + return self.get_glue_dict().to_glueformula_list(depgraph) + + def get_glue_dict(self): + return GlueDict(self.semtype_file) + + def gfl_to_compiled(self, gfl): + index_counter = Counter() + return_list = [] + for gf in gfl: + return_list.extend(gf.compile(index_counter)) + + if self.verbose: + print('Compiled Glue Premises:') + for cgf in return_list: + print(cgf) + + return return_list + + def get_pos_tagger(self): + from nltk.corpus import brown + + regexp_tagger = RegexpTagger( + [ + (r'^-?[0-9]+(.[0-9]+)?$', 'CD'), # cardinal numbers + (r'(The|the|A|a|An|an)$', 'AT'), # articles + (r'.*able$', 'JJ'), # adjectives + (r'.*ness$', 'NN'), # nouns formed from adjectives + (r'.*ly$', 'RB'), # adverbs + (r'.*s$', 'NNS'), # plural nouns + (r'.*ing$', 'VBG'), # gerunds + (r'.*ed$', 'VBD'), # past tense verbs + (r'.*', 'NN'), # nouns (default) + ] + ) + brown_train = brown.tagged_sents(categories='news') + unigram_tagger = UnigramTagger(brown_train, backoff=regexp_tagger) + bigram_tagger = BigramTagger(brown_train, backoff=unigram_tagger) + trigram_tagger = TrigramTagger(brown_train, backoff=bigram_tagger) + + # Override particular words + main_tagger = RegexpTagger( + [(r'(A|a|An|an)$', 'ex_quant'), (r'(Every|every|All|all)$', 'univ_quant')], + backoff=trigram_tagger, + ) + + return main_tagger + + +class DrtGlueFormula(GlueFormula): + def __init__(self, meaning, glue, indices=None): + if not indices: + indices = set() + + if isinstance(meaning, string_types): + self.meaning = drt.DrtExpression.fromstring(meaning) + elif isinstance(meaning, drt.DrtExpression): + self.meaning = meaning + else: + raise RuntimeError( + 'Meaning term neither string or expression: %s, %s' + % (meaning, meaning.__class__) + ) + + if isinstance(glue, string_types): + self.glue = linearlogic.LinearLogicParser().parse(glue) + elif isinstance(glue, linearlogic.Expression): + self.glue = glue + else: + raise RuntimeError( + 'Glue term neither string or expression: %s, %s' + % (glue, glue.__class__) + ) + + self.indices = indices + + def make_VariableExpression(self, name): + return drt.DrtVariableExpression(name) + + def make_LambdaExpression(self, variable, term): + return drt.DrtLambdaExpression(variable, term) + + +class DrtGlueDict(GlueDict): + def get_GlueFormula_factory(self): + return DrtGlueFormula + + +class DrtGlue(Glue): + def __init__( + self, semtype_file=None, remove_duplicates=False, depparser=None, verbose=False + ): + if not semtype_file: + semtype_file = os.path.join( + 'grammars', 'sample_grammars', 'drt_glue.semtype' + ) + Glue.__init__(self, semtype_file, remove_duplicates, depparser, verbose) + + def get_glue_dict(self): + return DrtGlueDict(self.semtype_file) + + +def demo(show_example=-1): + from nltk.parse import MaltParser + + examples = [ + 'David sees Mary', + 'David eats a sandwich', + 'every man chases a dog', + 'every man believes a dog sleeps', + 'John gives David a sandwich', + 'John chases himself', + ] + # 'John persuades David to order a pizza', + # 'John tries to go', + # 'John tries to find a unicorn', + # 'John seems to vanish', + # 'a unicorn seems to approach', + # 'every big cat leaves', + # 'every gray cat leaves', + # 'every big gray cat leaves', + # 'a former senator leaves', + + print('============== DEMO ==============') + + tagger = RegexpTagger( + [ + ('^(David|Mary|John)$', 'NNP'), + ( + '^(sees|eats|chases|believes|gives|sleeps|chases|persuades|tries|seems|leaves)$', + 'VB', + ), + ('^(go|order|vanish|find|approach)$', 'VB'), + ('^(a)$', 'ex_quant'), + ('^(every)$', 'univ_quant'), + ('^(sandwich|man|dog|pizza|unicorn|cat|senator)$', 'NN'), + ('^(big|gray|former)$', 'JJ'), + ('^(him|himself)$', 'PRP'), + ] + ) + + depparser = MaltParser(tagger=tagger) + glue = Glue(depparser=depparser, verbose=False) + + for (i, sentence) in enumerate(examples): + if i == show_example or show_example == -1: + print('[[[Example %s]]] %s' % (i, sentence)) + for reading in glue.parse_to_meaning(sentence.split()): + print(reading.simplify()) + print('') + + +if __name__ == '__main__': + demo() diff --git a/venv.bak/lib/python3.7/site-packages/nltk/sem/hole.py b/venv.bak/lib/python3.7/site-packages/nltk/sem/hole.py new file mode 100644 index 0000000..32852b8 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/sem/hole.py @@ -0,0 +1,402 @@ +# Natural Language Toolkit: Logic +# +# Author: Peter Wang +# Updated by: Dan Garrette +# +# Copyright (C) 2001-2019 NLTK Project +# URL: +# For license information, see LICENSE.TXT + +""" +An implementation of the Hole Semantics model, following Blackburn and Bos, +Representation and Inference for Natural Language (CSLI, 2005). + +The semantic representations are built by the grammar hole.fcfg. +This module contains driver code to read in sentences and parse them +according to a hole semantics grammar. + +After parsing, the semantic representation is in the form of an underspecified +representation that is not easy to read. We use a "plugging" algorithm to +convert that representation into first-order logic formulas. +""" +from __future__ import print_function, unicode_literals + +from functools import reduce + +from six import itervalues + +from nltk import compat +from nltk.parse import load_parser + +from nltk.sem.skolemize import skolemize +from nltk.sem.logic import ( + AllExpression, + AndExpression, + ApplicationExpression, + ExistsExpression, + IffExpression, + ImpExpression, + LambdaExpression, + NegatedExpression, + OrExpression, +) + + +# Note that in this code there may be multiple types of trees being referred to: +# +# 1. parse trees +# 2. the underspecified representation +# 3. first-order logic formula trees +# 4. the search space when plugging (search tree) +# + + +class Constants(object): + ALL = 'ALL' + EXISTS = 'EXISTS' + NOT = 'NOT' + AND = 'AND' + OR = 'OR' + IMP = 'IMP' + IFF = 'IFF' + PRED = 'PRED' + LEQ = 'LEQ' + HOLE = 'HOLE' + LABEL = 'LABEL' + + MAP = { + ALL: lambda v, e: AllExpression(v.variable, e), + EXISTS: lambda v, e: ExistsExpression(v.variable, e), + NOT: NegatedExpression, + AND: AndExpression, + OR: OrExpression, + IMP: ImpExpression, + IFF: IffExpression, + PRED: ApplicationExpression, + } + + +class HoleSemantics(object): + """ + This class holds the broken-down components of a hole semantics, i.e. it + extracts the holes, labels, logic formula fragments and constraints out of + a big conjunction of such as produced by the hole semantics grammar. It + then provides some operations on the semantics dealing with holes, labels + and finding legal ways to plug holes with labels. + """ + + def __init__(self, usr): + """ + Constructor. `usr' is a ``sem.Expression`` representing an + Underspecified Representation Structure (USR). A USR has the following + special predicates: + ALL(l,v,n), + EXISTS(l,v,n), + AND(l,n,n), + OR(l,n,n), + IMP(l,n,n), + IFF(l,n,n), + PRED(l,v,n,v[,v]*) where the brackets and star indicate zero or more repetitions, + LEQ(n,n), + HOLE(n), + LABEL(n) + where l is the label of the node described by the predicate, n is either + a label or a hole, and v is a variable. + """ + self.holes = set() + self.labels = set() + self.fragments = {} # mapping of label -> formula fragment + self.constraints = set() # set of Constraints + self._break_down(usr) + self.top_most_labels = self._find_top_most_labels() + self.top_hole = self._find_top_hole() + + def is_node(self, x): + """ + Return true if x is a node (label or hole) in this semantic + representation. + """ + return x in (self.labels | self.holes) + + def _break_down(self, usr): + """ + Extract holes, labels, formula fragments and constraints from the hole + semantics underspecified representation (USR). + """ + if isinstance(usr, AndExpression): + self._break_down(usr.first) + self._break_down(usr.second) + elif isinstance(usr, ApplicationExpression): + func, args = usr.uncurry() + if func.variable.name == Constants.LEQ: + self.constraints.add(Constraint(args[0], args[1])) + elif func.variable.name == Constants.HOLE: + self.holes.add(args[0]) + elif func.variable.name == Constants.LABEL: + self.labels.add(args[0]) + else: + label = args[0] + assert label not in self.fragments + self.fragments[label] = (func, args[1:]) + else: + raise ValueError(usr.label()) + + def _find_top_nodes(self, node_list): + top_nodes = node_list.copy() + for f in itervalues(self.fragments): + # the label is the first argument of the predicate + args = f[1] + for arg in args: + if arg in node_list: + top_nodes.discard(arg) + return top_nodes + + def _find_top_most_labels(self): + """ + Return the set of labels which are not referenced directly as part of + another formula fragment. These will be the top-most labels for the + subtree that they are part of. + """ + return self._find_top_nodes(self.labels) + + def _find_top_hole(self): + """ + Return the hole that will be the top of the formula tree. + """ + top_holes = self._find_top_nodes(self.holes) + assert len(top_holes) == 1 # it must be unique + return top_holes.pop() + + def pluggings(self): + """ + Calculate and return all the legal pluggings (mappings of labels to + holes) of this semantics given the constraints. + """ + record = [] + self._plug_nodes([(self.top_hole, [])], self.top_most_labels, {}, record) + return record + + def _plug_nodes(self, queue, potential_labels, plug_acc, record): + """ + Plug the nodes in `queue' with the labels in `potential_labels'. + + Each element of `queue' is a tuple of the node to plug and the list of + ancestor holes from the root of the graph to that node. + + `potential_labels' is a set of the labels which are still available for + plugging. + + `plug_acc' is the incomplete mapping of holes to labels made on the + current branch of the search tree so far. + + `record' is a list of all the complete pluggings that we have found in + total so far. It is the only parameter that is destructively updated. + """ + if queue != []: + (node, ancestors) = queue[0] + if node in self.holes: + # The node is a hole, try to plug it. + self._plug_hole( + node, ancestors, queue[1:], potential_labels, plug_acc, record + ) + else: + assert node in self.labels + # The node is a label. Replace it in the queue by the holes and + # labels in the formula fragment named by that label. + args = self.fragments[node][1] + head = [(a, ancestors) for a in args if self.is_node(a)] + self._plug_nodes(head + queue[1:], potential_labels, plug_acc, record) + else: + raise Exception('queue empty') + + def _plug_hole(self, hole, ancestors0, queue, potential_labels0, plug_acc0, record): + """ + Try all possible ways of plugging a single hole. + See _plug_nodes for the meanings of the parameters. + """ + # Add the current hole we're trying to plug into the list of ancestors. + assert hole not in ancestors0 + ancestors = [hole] + ancestors0 + + # Try each potential label in this hole in turn. + for l in potential_labels0: + # Is the label valid in this hole? + if self._violates_constraints(l, ancestors): + continue + + plug_acc = plug_acc0.copy() + plug_acc[hole] = l + potential_labels = potential_labels0.copy() + potential_labels.remove(l) + + if len(potential_labels) == 0: + # No more potential labels. That must mean all the holes have + # been filled so we have found a legal plugging so remember it. + # + # Note that the queue might not be empty because there might + # be labels on there that point to formula fragments with + # no holes in them. _sanity_check_plugging will make sure + # all holes are filled. + self._sanity_check_plugging(plug_acc, self.top_hole, []) + record.append(plug_acc) + else: + # Recursively try to fill in the rest of the holes in the + # queue. The label we just plugged into the hole could have + # holes of its own so at the end of the queue. Putting it on + # the end of the queue gives us a breadth-first search, so that + # all the holes at level i of the formula tree are filled + # before filling level i+1. + # A depth-first search would work as well since the trees must + # be finite but the bookkeeping would be harder. + self._plug_nodes( + queue + [(l, ancestors)], potential_labels, plug_acc, record + ) + + def _violates_constraints(self, label, ancestors): + """ + Return True if the `label' cannot be placed underneath the holes given + by the set `ancestors' because it would violate the constraints imposed + on it. + """ + for c in self.constraints: + if c.lhs == label: + if c.rhs not in ancestors: + return True + return False + + def _sanity_check_plugging(self, plugging, node, ancestors): + """ + Make sure that a given plugging is legal. We recursively go through + each node and make sure that no constraints are violated. + We also check that all holes have been filled. + """ + if node in self.holes: + ancestors = [node] + ancestors + label = plugging[node] + else: + label = node + assert label in self.labels + for c in self.constraints: + if c.lhs == label: + assert c.rhs in ancestors + args = self.fragments[label][1] + for arg in args: + if self.is_node(arg): + self._sanity_check_plugging(plugging, arg, [label] + ancestors) + + def formula_tree(self, plugging): + """ + Return the first-order logic formula tree for this underspecified + representation using the plugging given. + """ + return self._formula_tree(plugging, self.top_hole) + + def _formula_tree(self, plugging, node): + if node in plugging: + return self._formula_tree(plugging, plugging[node]) + elif node in self.fragments: + pred, args = self.fragments[node] + children = [self._formula_tree(plugging, arg) for arg in args] + return reduce(Constants.MAP[pred.variable.name], children) + else: + return node + + +@compat.python_2_unicode_compatible +class Constraint(object): + """ + This class represents a constraint of the form (L =< N), + where L is a label and N is a node (a label or a hole). + """ + + def __init__(self, lhs, rhs): + self.lhs = lhs + self.rhs = rhs + + def __eq__(self, other): + if self.__class__ == other.__class__: + return self.lhs == other.lhs and self.rhs == other.rhs + else: + return False + + def __ne__(self, other): + return not (self == other) + + def __hash__(self): + return hash(repr(self)) + + def __repr__(self): + return '(%s < %s)' % (self.lhs, self.rhs) + + +def hole_readings(sentence, grammar_filename=None, verbose=False): + if not grammar_filename: + grammar_filename = 'grammars/sample_grammars/hole.fcfg' + + if verbose: + print('Reading grammar file', grammar_filename) + + parser = load_parser(grammar_filename) + + # Parse the sentence. + tokens = sentence.split() + trees = list(parser.parse(tokens)) + if verbose: + print('Got %d different parses' % len(trees)) + + all_readings = [] + for tree in trees: + # Get the semantic feature from the top of the parse tree. + sem = tree.label()['SEM'].simplify() + + # Print the raw semantic representation. + if verbose: + print('Raw: ', sem) + + # Skolemize away all quantifiers. All variables become unique. + while isinstance(sem, LambdaExpression): + sem = sem.term + skolemized = skolemize(sem) + + if verbose: + print('Skolemized:', skolemized) + + # Break the hole semantics representation down into its components + # i.e. holes, labels, formula fragments and constraints. + hole_sem = HoleSemantics(skolemized) + + # Maybe show the details of the semantic representation. + if verbose: + print('Holes: ', hole_sem.holes) + print('Labels: ', hole_sem.labels) + print('Constraints: ', hole_sem.constraints) + print('Top hole: ', hole_sem.top_hole) + print('Top labels: ', hole_sem.top_most_labels) + print('Fragments:') + for l, f in hole_sem.fragments.items(): + print('\t%s: %s' % (l, f)) + + # Find all the possible ways to plug the formulas together. + pluggings = hole_sem.pluggings() + + # Build FOL formula trees using the pluggings. + readings = list(map(hole_sem.formula_tree, pluggings)) + + # Print out the formulas in a textual format. + if verbose: + for i, r in enumerate(readings): + print() + print('%d. %s' % (i, r)) + print() + + all_readings.extend(readings) + + return all_readings + + +if __name__ == '__main__': + for r in hole_readings('a dog barks'): + print(r) + print() + for r in hole_readings('every girl chases a dog'): + print(r) diff --git a/venv.bak/lib/python3.7/site-packages/nltk/sem/lfg.py b/venv.bak/lib/python3.7/site-packages/nltk/sem/lfg.py new file mode 100644 index 0000000..9b6957e --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/sem/lfg.py @@ -0,0 +1,261 @@ +# Natural Language Toolkit: Lexical Functional Grammar +# +# Author: Dan Garrette +# +# Copyright (C) 2001-2019 NLTK Project +# URL: +# For license information, see LICENSE.TXT +from __future__ import print_function, division, unicode_literals + +from itertools import chain + +from nltk.internals import Counter +from nltk.compat import python_2_unicode_compatible + + +@python_2_unicode_compatible +class FStructure(dict): + def safeappend(self, key, item): + """ + Append 'item' to the list at 'key'. If no list exists for 'key', then + construct one. + """ + if key not in self: + self[key] = [] + self[key].append(item) + + def __setitem__(self, key, value): + dict.__setitem__(self, key.lower(), value) + + def __getitem__(self, key): + return dict.__getitem__(self, key.lower()) + + def __contains__(self, key): + return dict.__contains__(self, key.lower()) + + def to_glueformula_list(self, glue_dict): + depgraph = self.to_depgraph() + return glue_dict.to_glueformula_list(depgraph) + + def to_depgraph(self, rel=None): + from nltk.parse.dependencygraph import DependencyGraph + + depgraph = DependencyGraph() + nodes = depgraph.nodes + + self._to_depgraph(nodes, 0, 'ROOT') + + # Add all the dependencies for all the nodes + for address, node in nodes.items(): + for n2 in (n for n in nodes.values() if n['rel'] != 'TOP'): + if n2['head'] == address: + relation = n2['rel'] + node['deps'].setdefault(relation, []) + node['deps'][relation].append(n2['address']) + + depgraph.root = nodes[1] + + return depgraph + + def _to_depgraph(self, nodes, head, rel): + index = len(nodes) + + nodes[index].update( + { + 'address': index, + 'word': self.pred[0], + 'tag': self.pred[1], + 'head': head, + 'rel': rel, + } + ) + + for feature in sorted(self): + for item in sorted(self[feature]): + if isinstance(item, FStructure): + item._to_depgraph(nodes, index, feature) + elif isinstance(item, tuple): + new_index = len(nodes) + nodes[new_index].update( + { + 'address': new_index, + 'word': item[0], + 'tag': item[1], + 'head': index, + 'rel': feature, + } + ) + elif isinstance(item, list): + for n in item: + n._to_depgraph(nodes, index, feature) + else: + raise Exception( + 'feature %s is not an FStruct, a list, or a tuple' % feature + ) + + @staticmethod + def read_depgraph(depgraph): + return FStructure._read_depgraph(depgraph.root, depgraph) + + @staticmethod + def _read_depgraph(node, depgraph, label_counter=None, parent=None): + if not label_counter: + label_counter = Counter() + + if node['rel'].lower() in ['spec', 'punct']: + # the value of a 'spec' entry is a word, not an FStructure + return (node['word'], node['tag']) + + else: + fstruct = FStructure() + fstruct.pred = None + fstruct.label = FStructure._make_label(label_counter.get()) + + fstruct.parent = parent + + word, tag = node['word'], node['tag'] + if tag[:2] == 'VB': + if tag[2:3] == 'D': + fstruct.safeappend('tense', ('PAST', 'tense')) + fstruct.pred = (word, tag[:2]) + + if not fstruct.pred: + fstruct.pred = (word, tag) + + children = [depgraph.nodes[idx] for idx in chain(*node['deps'].values())] + for child in children: + fstruct.safeappend( + child['rel'], + FStructure._read_depgraph(child, depgraph, label_counter, fstruct), + ) + + return fstruct + + @staticmethod + def _make_label(value): + """ + Pick an alphabetic character as identifier for an entity in the model. + + :param value: where to index into the list of characters + :type value: int + """ + letter = [ + 'f', + 'g', + 'h', + 'i', + 'j', + 'k', + 'l', + 'm', + 'n', + 'o', + 'p', + 'q', + 'r', + 's', + 't', + 'u', + 'v', + 'w', + 'x', + 'y', + 'z', + 'a', + 'b', + 'c', + 'd', + 'e', + ][value - 1] + num = int(value) // 26 + if num > 0: + return letter + str(num) + else: + return letter + + def __repr__(self): + return self.__unicode__().replace('\n', '') + + def __str__(self): + return self.pretty_format() + + def pretty_format(self, indent=3): + try: + accum = '%s:[' % self.label + except NameError: + accum = '[' + try: + accum += 'pred \'%s\'' % (self.pred[0]) + except NameError: + pass + + for feature in sorted(self): + for item in self[feature]: + if isinstance(item, FStructure): + next_indent = indent + len(feature) + 3 + len(self.label) + accum += '\n%s%s %s' % ( + ' ' * (indent), + feature, + item.pretty_format(next_indent), + ) + elif isinstance(item, tuple): + accum += '\n%s%s \'%s\'' % (' ' * (indent), feature, item[0]) + elif isinstance(item, list): + accum += '\n%s%s {%s}' % ( + ' ' * (indent), + feature, + ('\n%s' % (' ' * (indent + len(feature) + 2))).join(item), + ) + else: # ERROR + raise Exception( + 'feature %s is not an FStruct, a list, or a tuple' % feature + ) + return accum + ']' + + +def demo_read_depgraph(): + from nltk.parse.dependencygraph import DependencyGraph + + dg1 = DependencyGraph( + """\ +Esso NNP 2 SUB +said VBD 0 ROOT +the DT 5 NMOD +Whiting NNP 5 NMOD +field NN 6 SUB +started VBD 2 VMOD +production NN 6 OBJ +Tuesday NNP 6 VMOD +""" + ) + dg2 = DependencyGraph( + """\ +John NNP 2 SUB +sees VBP 0 ROOT +Mary NNP 2 OBJ +""" + ) + dg3 = DependencyGraph( + """\ +a DT 2 SPEC +man NN 3 SUBJ +walks VB 0 ROOT +""" + ) + dg4 = DependencyGraph( + """\ +every DT 2 SPEC +girl NN 3 SUBJ +chases VB 0 ROOT +a DT 5 SPEC +dog NN 3 OBJ +""" + ) + + depgraphs = [dg1, dg2, dg3, dg4] + for dg in depgraphs: + print(FStructure.read_depgraph(dg)) + + +if __name__ == '__main__': + demo_read_depgraph() diff --git a/venv.bak/lib/python3.7/site-packages/nltk/sem/linearlogic.py b/venv.bak/lib/python3.7/site-packages/nltk/sem/linearlogic.py new file mode 100644 index 0000000..2725980 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/sem/linearlogic.py @@ -0,0 +1,495 @@ +# Natural Language Toolkit: Linear Logic +# +# Author: Dan Garrette +# +# Copyright (C) 2001-2019 NLTK Project +# URL: +# For license information, see LICENSE.TXT +from __future__ import print_function, unicode_literals + +from six import string_types + +from nltk.internals import Counter +from nltk.compat import python_2_unicode_compatible +from nltk.sem.logic import LogicParser, APP + +_counter = Counter() + + +class Tokens(object): + # Punctuation + OPEN = '(' + CLOSE = ')' + + # Operations + IMP = '-o' + + PUNCT = [OPEN, CLOSE] + TOKENS = PUNCT + [IMP] + + +class LinearLogicParser(LogicParser): + """A linear logic expression parser.""" + + def __init__(self): + LogicParser.__init__(self) + + self.operator_precedence = {APP: 1, Tokens.IMP: 2, None: 3} + self.right_associated_operations += [Tokens.IMP] + + def get_all_symbols(self): + return Tokens.TOKENS + + def handle(self, tok, context): + if tok not in Tokens.TOKENS: + return self.handle_variable(tok, context) + elif tok == Tokens.OPEN: + return self.handle_open(tok, context) + + def get_BooleanExpression_factory(self, tok): + if tok == Tokens.IMP: + return ImpExpression + else: + return None + + def make_BooleanExpression(self, factory, first, second): + return factory(first, second) + + def attempt_ApplicationExpression(self, expression, context): + """Attempt to make an application expression. If the next tokens + are an argument in parens, then the argument expression is a + function being applied to the arguments. Otherwise, return the + argument expression.""" + if self.has_priority(APP, context): + if self.inRange(0) and self.token(0) == Tokens.OPEN: + self.token() # swallow then open paren + argument = self.process_next_expression(APP) + self.assertNextToken(Tokens.CLOSE) + expression = ApplicationExpression(expression, argument, None) + return expression + + def make_VariableExpression(self, name): + if name[0].isupper(): + return VariableExpression(name) + else: + return ConstantExpression(name) + + +@python_2_unicode_compatible +class Expression(object): + + _linear_logic_parser = LinearLogicParser() + + @classmethod + def fromstring(cls, s): + return cls._linear_logic_parser.parse(s) + + def applyto(self, other, other_indices=None): + return ApplicationExpression(self, other, other_indices) + + def __call__(self, other): + return self.applyto(other) + + def __repr__(self): + return '<%s %s>' % (self.__class__.__name__, self) + + +@python_2_unicode_compatible +class AtomicExpression(Expression): + def __init__(self, name, dependencies=None): + """ + :param name: str for the constant name + :param dependencies: list of int for the indices on which this atom is dependent + """ + assert isinstance(name, string_types) + self.name = name + + if not dependencies: + dependencies = [] + self.dependencies = dependencies + + def simplify(self, bindings=None): + """ + If 'self' is bound by 'bindings', return the atomic to which it is bound. + Otherwise, return self. + + :param bindings: ``BindingDict`` A dictionary of bindings used to simplify + :return: ``AtomicExpression`` + """ + if bindings and self in bindings: + return bindings[self] + else: + return self + + def compile_pos(self, index_counter, glueFormulaFactory): + """ + From Iddo Lev's PhD Dissertation p108-109 + + :param index_counter: ``Counter`` for unique indices + :param glueFormulaFactory: ``GlueFormula`` for creating new glue formulas + :return: (``Expression``,set) for the compiled linear logic and any newly created glue formulas + """ + self.dependencies = [] + return (self, []) + + def compile_neg(self, index_counter, glueFormulaFactory): + """ + From Iddo Lev's PhD Dissertation p108-109 + + :param index_counter: ``Counter`` for unique indices + :param glueFormulaFactory: ``GlueFormula`` for creating new glue formulas + :return: (``Expression``,set) for the compiled linear logic and any newly created glue formulas + """ + self.dependencies = [] + return (self, []) + + def initialize_labels(self, fstruct): + self.name = fstruct.initialize_label(self.name.lower()) + + def __eq__(self, other): + return self.__class__ == other.__class__ and self.name == other.name + + def __ne__(self, other): + return not self == other + + def __str__(self): + accum = self.name + if self.dependencies: + accum += "%s" % self.dependencies + return accum + + def __hash__(self): + return hash(self.name) + + +class ConstantExpression(AtomicExpression): + def unify(self, other, bindings): + """ + If 'other' is a constant, then it must be equal to 'self'. If 'other' is a variable, + then it must not be bound to anything other than 'self'. + + :param other: ``Expression`` + :param bindings: ``BindingDict`` A dictionary of all current bindings + :return: ``BindingDict`` A new combined dictionary of of 'bindings' and any new binding + :raise UnificationException: If 'self' and 'other' cannot be unified in the context of 'bindings' + """ + assert isinstance(other, Expression) + if isinstance(other, VariableExpression): + try: + return bindings + BindingDict([(other, self)]) + except VariableBindingException: + pass + elif self == other: + return bindings + raise UnificationException(self, other, bindings) + + +class VariableExpression(AtomicExpression): + def unify(self, other, bindings): + """ + 'self' must not be bound to anything other than 'other'. + + :param other: ``Expression`` + :param bindings: ``BindingDict`` A dictionary of all current bindings + :return: ``BindingDict`` A new combined dictionary of of 'bindings' and the new binding + :raise UnificationException: If 'self' and 'other' cannot be unified in the context of 'bindings' + """ + assert isinstance(other, Expression) + try: + if self == other: + return bindings + else: + return bindings + BindingDict([(self, other)]) + except VariableBindingException: + raise UnificationException(self, other, bindings) + + +@python_2_unicode_compatible +class ImpExpression(Expression): + def __init__(self, antecedent, consequent): + """ + :param antecedent: ``Expression`` for the antecedent + :param consequent: ``Expression`` for the consequent + """ + assert isinstance(antecedent, Expression) + assert isinstance(consequent, Expression) + self.antecedent = antecedent + self.consequent = consequent + + def simplify(self, bindings=None): + return self.__class__( + self.antecedent.simplify(bindings), self.consequent.simplify(bindings) + ) + + def unify(self, other, bindings): + """ + Both the antecedent and consequent of 'self' and 'other' must unify. + + :param other: ``ImpExpression`` + :param bindings: ``BindingDict`` A dictionary of all current bindings + :return: ``BindingDict`` A new combined dictionary of of 'bindings' and any new bindings + :raise UnificationException: If 'self' and 'other' cannot be unified in the context of 'bindings' + """ + assert isinstance(other, ImpExpression) + try: + return ( + bindings + + self.antecedent.unify(other.antecedent, bindings) + + self.consequent.unify(other.consequent, bindings) + ) + except VariableBindingException: + raise UnificationException(self, other, bindings) + + def compile_pos(self, index_counter, glueFormulaFactory): + """ + From Iddo Lev's PhD Dissertation p108-109 + + :param index_counter: ``Counter`` for unique indices + :param glueFormulaFactory: ``GlueFormula`` for creating new glue formulas + :return: (``Expression``,set) for the compiled linear logic and any newly created glue formulas + """ + (a, a_new) = self.antecedent.compile_neg(index_counter, glueFormulaFactory) + (c, c_new) = self.consequent.compile_pos(index_counter, glueFormulaFactory) + return (ImpExpression(a, c), a_new + c_new) + + def compile_neg(self, index_counter, glueFormulaFactory): + """ + From Iddo Lev's PhD Dissertation p108-109 + + :param index_counter: ``Counter`` for unique indices + :param glueFormulaFactory: ``GlueFormula`` for creating new glue formulas + :return: (``Expression``,list of ``GlueFormula``) for the compiled linear logic and any newly created glue formulas + """ + (a, a_new) = self.antecedent.compile_pos(index_counter, glueFormulaFactory) + (c, c_new) = self.consequent.compile_neg(index_counter, glueFormulaFactory) + fresh_index = index_counter.get() + c.dependencies.append(fresh_index) + new_v = glueFormulaFactory('v%s' % fresh_index, a, set([fresh_index])) + return (c, a_new + c_new + [new_v]) + + def initialize_labels(self, fstruct): + self.antecedent.initialize_labels(fstruct) + self.consequent.initialize_labels(fstruct) + + def __eq__(self, other): + return ( + self.__class__ == other.__class__ + and self.antecedent == other.antecedent + and self.consequent == other.consequent + ) + + def __ne__(self, other): + return not self == other + + def __str__(self): + return "%s%s %s %s%s" % ( + Tokens.OPEN, + self.antecedent, + Tokens.IMP, + self.consequent, + Tokens.CLOSE, + ) + + def __hash__(self): + return hash( + '%s%s%s' % (hash(self.antecedent), Tokens.IMP, hash(self.consequent)) + ) + + +@python_2_unicode_compatible +class ApplicationExpression(Expression): + def __init__(self, function, argument, argument_indices=None): + """ + :param function: ``Expression`` for the function + :param argument: ``Expression`` for the argument + :param argument_indices: set for the indices of the glue formula from which the argument came + :raise LinearLogicApplicationException: If 'function' cannot be applied to 'argument' given 'argument_indices'. + """ + function_simp = function.simplify() + argument_simp = argument.simplify() + + assert isinstance(function_simp, ImpExpression) + assert isinstance(argument_simp, Expression) + + bindings = BindingDict() + + try: + if isinstance(function, ApplicationExpression): + bindings += function.bindings + if isinstance(argument, ApplicationExpression): + bindings += argument.bindings + bindings += function_simp.antecedent.unify(argument_simp, bindings) + except UnificationException as e: + raise LinearLogicApplicationException( + 'Cannot apply %s to %s. %s' % (function_simp, argument_simp, e) + ) + + # If you are running it on complied premises, more conditions apply + if argument_indices: + # A.dependencies of (A -o (B -o C)) must be a proper subset of argument_indices + if not set(function_simp.antecedent.dependencies) < argument_indices: + raise LinearLogicApplicationException( + 'Dependencies unfulfilled when attempting to apply Linear Logic formula %s to %s' + % (function_simp, argument_simp) + ) + if set(function_simp.antecedent.dependencies) == argument_indices: + raise LinearLogicApplicationException( + 'Dependencies not a proper subset of indices when attempting to apply Linear Logic formula %s to %s' + % (function_simp, argument_simp) + ) + + self.function = function + self.argument = argument + self.bindings = bindings + + def simplify(self, bindings=None): + """ + Since function is an implication, return its consequent. There should be + no need to check that the application is valid since the checking is done + by the constructor. + + :param bindings: ``BindingDict`` A dictionary of bindings used to simplify + :return: ``Expression`` + """ + if not bindings: + bindings = self.bindings + + return self.function.simplify(bindings).consequent + + def __eq__(self, other): + return ( + self.__class__ == other.__class__ + and self.function == other.function + and self.argument == other.argument + ) + + def __ne__(self, other): + return not self == other + + def __str__(self): + return "%s" % self.function + Tokens.OPEN + "%s" % self.argument + Tokens.CLOSE + + def __hash__(self): + return hash( + '%s%s%s' % (hash(self.antecedent), Tokens.OPEN, hash(self.consequent)) + ) + + +@python_2_unicode_compatible +class BindingDict(object): + def __init__(self, bindings=None): + """ + :param bindings: + list [(``VariableExpression``, ``AtomicExpression``)] to initialize the dictionary + dict {``VariableExpression``: ``AtomicExpression``} to initialize the dictionary + """ + self.d = {} + + if isinstance(bindings, dict): + bindings = bindings.items() + + if bindings: + for (v, b) in bindings: + self[v] = b + + def __setitem__(self, variable, binding): + """ + A binding is consistent with the dict if its variable is not already bound, OR if its + variable is already bound to its argument. + + :param variable: ``VariableExpression`` The variable bind + :param binding: ``Expression`` The expression to which 'variable' should be bound + :raise VariableBindingException: If the variable cannot be bound in this dictionary + """ + assert isinstance(variable, VariableExpression) + assert isinstance(binding, Expression) + + assert variable != binding + + existing = self.d.get(variable, None) + + if not existing or binding == existing: + self.d[variable] = binding + else: + raise VariableBindingException( + 'Variable %s already bound to another value' % (variable) + ) + + def __getitem__(self, variable): + """ + Return the expression to which 'variable' is bound + """ + assert isinstance(variable, VariableExpression) + + intermediate = self.d[variable] + while intermediate: + try: + intermediate = self.d[intermediate] + except KeyError: + return intermediate + + def __contains__(self, item): + return item in self.d + + def __add__(self, other): + """ + :param other: ``BindingDict`` The dict with which to combine self + :return: ``BindingDict`` A new dict containing all the elements of both parameters + :raise VariableBindingException: If the parameter dictionaries are not consistent with each other + """ + try: + combined = BindingDict() + for v in self.d: + combined[v] = self.d[v] + for v in other.d: + combined[v] = other.d[v] + return combined + except VariableBindingException: + raise VariableBindingException( + 'Attempting to add two contradicting' + ' VariableBindingsLists: %s, %s' % (self, other) + ) + + def __ne__(self, other): + return not self == other + + def __eq__(self, other): + if not isinstance(other, BindingDict): + raise TypeError + return self.d == other.d + + def __str__(self): + return '{' + ', '.join('%s: %s' % (v, self.d[v]) for v in self.d) + '}' + + def __repr__(self): + return 'BindingDict: %s' % self + + +class VariableBindingException(Exception): + pass + + +class UnificationException(Exception): + def __init__(self, a, b, bindings): + Exception.__init__(self, 'Cannot unify %s with %s given %s' % (a, b, bindings)) + + +class LinearLogicApplicationException(Exception): + pass + + +def demo(): + lexpr = Expression.fromstring + + print(lexpr(r'f')) + print(lexpr(r'(g -o f)')) + print(lexpr(r'((g -o G) -o G)')) + print(lexpr(r'g -o h -o f')) + print(lexpr(r'(g -o f)(g)').simplify()) + print(lexpr(r'(H -o f)(g)').simplify()) + print(lexpr(r'((g -o G) -o G)((g -o f))').simplify()) + print(lexpr(r'(H -o H)((g -o f))').simplify()) + + +if __name__ == '__main__': + demo() diff --git a/venv.bak/lib/python3.7/site-packages/nltk/sem/logic.py b/venv.bak/lib/python3.7/site-packages/nltk/sem/logic.py new file mode 100644 index 0000000..fe5f73b --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/sem/logic.py @@ -0,0 +1,2072 @@ +# Natural Language Toolkit: Logic +# +# Author: Dan Garrette +# +# Copyright (C) 2001-2019 NLTK Project +# URL: +# For license information, see LICENSE.TXT + +""" +A version of first order predicate logic, built on +top of the typed lambda calculus. +""" +from __future__ import print_function, unicode_literals + +import re +import operator +from collections import defaultdict +from functools import reduce, total_ordering + +from six import string_types + +from nltk.util import Trie +from nltk.internals import Counter +from nltk.compat import python_2_unicode_compatible + +APP = 'APP' + +_counter = Counter() + + +class Tokens(object): + LAMBDA = '\\' + LAMBDA_LIST = ['\\'] + + # Quantifiers + EXISTS = 'exists' + EXISTS_LIST = ['some', 'exists', 'exist'] + ALL = 'all' + ALL_LIST = ['all', 'forall'] + + # Punctuation + DOT = '.' + OPEN = '(' + CLOSE = ')' + COMMA = ',' + + # Operations + NOT = '-' + NOT_LIST = ['not', '-', '!'] + AND = '&' + AND_LIST = ['and', '&', '^'] + OR = '|' + OR_LIST = ['or', '|'] + IMP = '->' + IMP_LIST = ['implies', '->', '=>'] + IFF = '<->' + IFF_LIST = ['iff', '<->', '<=>'] + EQ = '=' + EQ_LIST = ['=', '=='] + NEQ = '!=' + NEQ_LIST = ['!='] + + # Collections of tokens + BINOPS = AND_LIST + OR_LIST + IMP_LIST + IFF_LIST + QUANTS = EXISTS_LIST + ALL_LIST + PUNCT = [DOT, OPEN, CLOSE, COMMA] + + TOKENS = BINOPS + EQ_LIST + NEQ_LIST + QUANTS + LAMBDA_LIST + PUNCT + NOT_LIST + + # Special + SYMBOLS = [x for x in TOKENS if re.match(r'^[-\\.(),!&^|>=<]*$', x)] + + +def boolean_ops(): + """ + Boolean operators + """ + names = ["negation", "conjunction", "disjunction", "implication", "equivalence"] + for pair in zip(names, [Tokens.NOT, Tokens.AND, Tokens.OR, Tokens.IMP, Tokens.IFF]): + print("%-15s\t%s" % pair) + + +def equality_preds(): + """ + Equality predicates + """ + names = ["equality", "inequality"] + for pair in zip(names, [Tokens.EQ, Tokens.NEQ]): + print("%-15s\t%s" % pair) + + +def binding_ops(): + """ + Binding operators + """ + names = ["existential", "universal", "lambda"] + for pair in zip(names, [Tokens.EXISTS, Tokens.ALL, Tokens.LAMBDA]): + print("%-15s\t%s" % pair) + + +@python_2_unicode_compatible +class LogicParser(object): + """A lambda calculus expression parser.""" + + def __init__(self, type_check=False): + """ + :param type_check: bool should type checking be performed? + to their types. + """ + assert isinstance(type_check, bool) + + self._currentIndex = 0 + self._buffer = [] + self.type_check = type_check + + """A list of tuples of quote characters. The 4-tuple is comprised + of the start character, the end character, the escape character, and + a boolean indicating whether the quotes should be included in the + result. Quotes are used to signify that a token should be treated as + atomic, ignoring any special characters within the token. The escape + character allows the quote end character to be used within the quote. + If True, the boolean indicates that the final token should contain the + quote and escape characters. + This method exists to be overridden""" + self.quote_chars = [] + + self.operator_precedence = dict( + [(x, 1) for x in Tokens.LAMBDA_LIST] + + [(x, 2) for x in Tokens.NOT_LIST] + + [(APP, 3)] + + [(x, 4) for x in Tokens.EQ_LIST + Tokens.NEQ_LIST] + + [(x, 5) for x in Tokens.QUANTS] + + [(x, 6) for x in Tokens.AND_LIST] + + [(x, 7) for x in Tokens.OR_LIST] + + [(x, 8) for x in Tokens.IMP_LIST] + + [(x, 9) for x in Tokens.IFF_LIST] + + [(None, 10)] + ) + self.right_associated_operations = [APP] + + def parse(self, data, signature=None): + """ + Parse the expression. + + :param data: str for the input to be parsed + :param signature: ``dict`` that maps variable names to type + strings + :returns: a parsed Expression + """ + data = data.rstrip() + + self._currentIndex = 0 + self._buffer, mapping = self.process(data) + + try: + result = self.process_next_expression(None) + if self.inRange(0): + raise UnexpectedTokenException(self._currentIndex + 1, self.token(0)) + except LogicalExpressionException as e: + msg = '%s\n%s\n%s^' % (e, data, ' ' * mapping[e.index - 1]) + raise LogicalExpressionException(None, msg) + + if self.type_check: + result.typecheck(signature) + + return result + + def process(self, data): + """Split the data into tokens""" + out = [] + mapping = {} + tokenTrie = Trie(self.get_all_symbols()) + token = '' + data_idx = 0 + token_start_idx = data_idx + while data_idx < len(data): + cur_data_idx = data_idx + quoted_token, data_idx = self.process_quoted_token(data_idx, data) + if quoted_token: + if not token: + token_start_idx = cur_data_idx + token += quoted_token + continue + + st = tokenTrie + c = data[data_idx] + symbol = '' + while c in st: + symbol += c + st = st[c] + if len(data) - data_idx > len(symbol): + c = data[data_idx + len(symbol)] + else: + break + if Trie.LEAF in st: + # token is a complete symbol + if token: + mapping[len(out)] = token_start_idx + out.append(token) + token = '' + mapping[len(out)] = data_idx + out.append(symbol) + data_idx += len(symbol) + else: + if data[data_idx] in ' \t\n': # any whitespace + if token: + mapping[len(out)] = token_start_idx + out.append(token) + token = '' + else: + if not token: + token_start_idx = data_idx + token += data[data_idx] + data_idx += 1 + if token: + mapping[len(out)] = token_start_idx + out.append(token) + mapping[len(out)] = len(data) + mapping[len(out) + 1] = len(data) + 1 + return out, mapping + + def process_quoted_token(self, data_idx, data): + token = '' + c = data[data_idx] + i = data_idx + for start, end, escape, incl_quotes in self.quote_chars: + if c == start: + if incl_quotes: + token += c + i += 1 + while data[i] != end: + if data[i] == escape: + if incl_quotes: + token += data[i] + i += 1 + if len(data) == i: # if there are no more chars + raise LogicalExpressionException( + None, + "End of input reached. " + "Escape character [%s] found at end." % escape, + ) + token += data[i] + else: + token += data[i] + i += 1 + if len(data) == i: + raise LogicalExpressionException( + None, "End of input reached. " "Expected: [%s]" % end + ) + if incl_quotes: + token += data[i] + i += 1 + if not token: + raise LogicalExpressionException(None, 'Empty quoted token found') + break + return token, i + + def get_all_symbols(self): + """This method exists to be overridden""" + return Tokens.SYMBOLS + + def inRange(self, location): + """Return TRUE if the given location is within the buffer""" + return self._currentIndex + location < len(self._buffer) + + def token(self, location=None): + """Get the next waiting token. If a location is given, then + return the token at currentIndex+location without advancing + currentIndex; setting it gives lookahead/lookback capability.""" + try: + if location is None: + tok = self._buffer[self._currentIndex] + self._currentIndex += 1 + else: + tok = self._buffer[self._currentIndex + location] + return tok + except IndexError: + raise ExpectedMoreTokensException(self._currentIndex + 1) + + def isvariable(self, tok): + return tok not in Tokens.TOKENS + + def process_next_expression(self, context): + """Parse the next complete expression from the stream and return it.""" + try: + tok = self.token() + except ExpectedMoreTokensException: + raise ExpectedMoreTokensException( + self._currentIndex + 1, message='Expression expected.' + ) + + accum = self.handle(tok, context) + + if not accum: + raise UnexpectedTokenException( + self._currentIndex, tok, message='Expression expected.' + ) + + return self.attempt_adjuncts(accum, context) + + def handle(self, tok, context): + """This method is intended to be overridden for logics that + use different operators or expressions""" + if self.isvariable(tok): + return self.handle_variable(tok, context) + + elif tok in Tokens.NOT_LIST: + return self.handle_negation(tok, context) + + elif tok in Tokens.LAMBDA_LIST: + return self.handle_lambda(tok, context) + + elif tok in Tokens.QUANTS: + return self.handle_quant(tok, context) + + elif tok == Tokens.OPEN: + return self.handle_open(tok, context) + + def attempt_adjuncts(self, expression, context): + cur_idx = None + while cur_idx != self._currentIndex: # while adjuncts are added + cur_idx = self._currentIndex + expression = self.attempt_EqualityExpression(expression, context) + expression = self.attempt_ApplicationExpression(expression, context) + expression = self.attempt_BooleanExpression(expression, context) + return expression + + def handle_negation(self, tok, context): + return self.make_NegatedExpression(self.process_next_expression(Tokens.NOT)) + + def make_NegatedExpression(self, expression): + return NegatedExpression(expression) + + def handle_variable(self, tok, context): + # It's either: 1) a predicate expression: sees(x,y) + # 2) an application expression: P(x) + # 3) a solo variable: john OR x + accum = self.make_VariableExpression(tok) + if self.inRange(0) and self.token(0) == Tokens.OPEN: + # The predicate has arguments + if not isinstance(accum, FunctionVariableExpression) and not isinstance( + accum, ConstantExpression + ): + raise LogicalExpressionException( + self._currentIndex, + "'%s' is an illegal predicate name. " + "Individual variables may not be used as " + "predicates." % tok, + ) + self.token() # swallow the Open Paren + + # curry the arguments + accum = self.make_ApplicationExpression( + accum, self.process_next_expression(APP) + ) + while self.inRange(0) and self.token(0) == Tokens.COMMA: + self.token() # swallow the comma + accum = self.make_ApplicationExpression( + accum, self.process_next_expression(APP) + ) + self.assertNextToken(Tokens.CLOSE) + return accum + + def get_next_token_variable(self, description): + try: + tok = self.token() + except ExpectedMoreTokensException as e: + raise ExpectedMoreTokensException(e.index, 'Variable expected.') + if isinstance(self.make_VariableExpression(tok), ConstantExpression): + raise LogicalExpressionException( + self._currentIndex, + "'%s' is an illegal variable name. " + "Constants may not be %s." % (tok, description), + ) + return Variable(tok) + + def handle_lambda(self, tok, context): + # Expression is a lambda expression + if not self.inRange(0): + raise ExpectedMoreTokensException( + self._currentIndex + 2, + message="Variable and Expression expected following lambda operator.", + ) + vars = [self.get_next_token_variable('abstracted')] + while True: + if not self.inRange(0) or ( + self.token(0) == Tokens.DOT and not self.inRange(1) + ): + raise ExpectedMoreTokensException( + self._currentIndex + 2, message="Expression expected." + ) + if not self.isvariable(self.token(0)): + break + # Support expressions like: \x y.M == \x.\y.M + vars.append(self.get_next_token_variable('abstracted')) + if self.inRange(0) and self.token(0) == Tokens.DOT: + self.token() # swallow the dot + + accum = self.process_next_expression(tok) + while vars: + accum = self.make_LambdaExpression(vars.pop(), accum) + return accum + + def handle_quant(self, tok, context): + # Expression is a quantified expression: some x.M + factory = self.get_QuantifiedExpression_factory(tok) + + if not self.inRange(0): + raise ExpectedMoreTokensException( + self._currentIndex + 2, + message="Variable and Expression expected following quantifier '%s'." + % tok, + ) + vars = [self.get_next_token_variable('quantified')] + while True: + if not self.inRange(0) or ( + self.token(0) == Tokens.DOT and not self.inRange(1) + ): + raise ExpectedMoreTokensException( + self._currentIndex + 2, message="Expression expected." + ) + if not self.isvariable(self.token(0)): + break + # Support expressions like: some x y.M == some x.some y.M + vars.append(self.get_next_token_variable('quantified')) + if self.inRange(0) and self.token(0) == Tokens.DOT: + self.token() # swallow the dot + + accum = self.process_next_expression(tok) + while vars: + accum = self.make_QuanifiedExpression(factory, vars.pop(), accum) + return accum + + def get_QuantifiedExpression_factory(self, tok): + """This method serves as a hook for other logic parsers that + have different quantifiers""" + if tok in Tokens.EXISTS_LIST: + return ExistsExpression + elif tok in Tokens.ALL_LIST: + return AllExpression + else: + self.assertToken(tok, Tokens.QUANTS) + + def make_QuanifiedExpression(self, factory, variable, term): + return factory(variable, term) + + def handle_open(self, tok, context): + # Expression is in parens + accum = self.process_next_expression(None) + self.assertNextToken(Tokens.CLOSE) + return accum + + def attempt_EqualityExpression(self, expression, context): + """Attempt to make an equality expression. If the next token is an + equality operator, then an EqualityExpression will be returned. + Otherwise, the parameter will be returned.""" + if self.inRange(0): + tok = self.token(0) + if tok in Tokens.EQ_LIST + Tokens.NEQ_LIST and self.has_priority( + tok, context + ): + self.token() # swallow the "=" or "!=" + expression = self.make_EqualityExpression( + expression, self.process_next_expression(tok) + ) + if tok in Tokens.NEQ_LIST: + expression = self.make_NegatedExpression(expression) + return expression + + def make_EqualityExpression(self, first, second): + """This method serves as a hook for other logic parsers that + have different equality expression classes""" + return EqualityExpression(first, second) + + def attempt_BooleanExpression(self, expression, context): + """Attempt to make a boolean expression. If the next token is a boolean + operator, then a BooleanExpression will be returned. Otherwise, the + parameter will be returned.""" + while self.inRange(0): + tok = self.token(0) + factory = self.get_BooleanExpression_factory(tok) + if factory and self.has_priority(tok, context): + self.token() # swallow the operator + expression = self.make_BooleanExpression( + factory, expression, self.process_next_expression(tok) + ) + else: + break + return expression + + def get_BooleanExpression_factory(self, tok): + """This method serves as a hook for other logic parsers that + have different boolean operators""" + if tok in Tokens.AND_LIST: + return AndExpression + elif tok in Tokens.OR_LIST: + return OrExpression + elif tok in Tokens.IMP_LIST: + return ImpExpression + elif tok in Tokens.IFF_LIST: + return IffExpression + else: + return None + + def make_BooleanExpression(self, factory, first, second): + return factory(first, second) + + def attempt_ApplicationExpression(self, expression, context): + """Attempt to make an application expression. The next tokens are + a list of arguments in parens, then the argument expression is a + function being applied to the arguments. Otherwise, return the + argument expression.""" + if self.has_priority(APP, context): + if self.inRange(0) and self.token(0) == Tokens.OPEN: + if ( + not isinstance(expression, LambdaExpression) + and not isinstance(expression, ApplicationExpression) + and not isinstance(expression, FunctionVariableExpression) + and not isinstance(expression, ConstantExpression) + ): + raise LogicalExpressionException( + self._currentIndex, + ("The function '%s" % expression) + + "' is not a Lambda Expression, an " + "Application Expression, or a " + "functional predicate, so it may " + "not take arguments.", + ) + self.token() # swallow then open paren + # curry the arguments + accum = self.make_ApplicationExpression( + expression, self.process_next_expression(APP) + ) + while self.inRange(0) and self.token(0) == Tokens.COMMA: + self.token() # swallow the comma + accum = self.make_ApplicationExpression( + accum, self.process_next_expression(APP) + ) + self.assertNextToken(Tokens.CLOSE) + return accum + return expression + + def make_ApplicationExpression(self, function, argument): + return ApplicationExpression(function, argument) + + def make_VariableExpression(self, name): + return VariableExpression(Variable(name)) + + def make_LambdaExpression(self, variable, term): + return LambdaExpression(variable, term) + + def has_priority(self, operation, context): + return self.operator_precedence[operation] < self.operator_precedence[ + context + ] or ( + operation in self.right_associated_operations + and self.operator_precedence[operation] == self.operator_precedence[context] + ) + + def assertNextToken(self, expected): + try: + tok = self.token() + except ExpectedMoreTokensException as e: + raise ExpectedMoreTokensException( + e.index, message="Expected token '%s'." % expected + ) + + if isinstance(expected, list): + if tok not in expected: + raise UnexpectedTokenException(self._currentIndex, tok, expected) + else: + if tok != expected: + raise UnexpectedTokenException(self._currentIndex, tok, expected) + + def assertToken(self, tok, expected): + if isinstance(expected, list): + if tok not in expected: + raise UnexpectedTokenException(self._currentIndex, tok, expected) + else: + if tok != expected: + raise UnexpectedTokenException(self._currentIndex, tok, expected) + + def __repr__(self): + if self.inRange(0): + msg = 'Next token: ' + self.token(0) + else: + msg = 'No more tokens' + return '<' + self.__class__.__name__ + ': ' + msg + '>' + + +def read_logic(s, logic_parser=None, encoding=None): + """ + Convert a file of First Order Formulas into a list of {Expression}s. + + :param s: the contents of the file + :type s: str + :param logic_parser: The parser to be used to parse the logical expression + :type logic_parser: LogicParser + :param encoding: the encoding of the input string, if it is binary + :type encoding: str + :return: a list of parsed formulas. + :rtype: list(Expression) + """ + if encoding is not None: + s = s.decode(encoding) + if logic_parser is None: + logic_parser = LogicParser() + + statements = [] + for linenum, line in enumerate(s.splitlines()): + line = line.strip() + if line.startswith('#') or line == '': + continue + try: + statements.append(logic_parser.parse(line)) + except LogicalExpressionException: + raise ValueError('Unable to parse line %s: %s' % (linenum, line)) + return statements + + +@total_ordering +@python_2_unicode_compatible +class Variable(object): + def __init__(self, name): + """ + :param name: the name of the variable + """ + assert isinstance(name, string_types), "%s is not a string" % name + self.name = name + + def __eq__(self, other): + return isinstance(other, Variable) and self.name == other.name + + def __ne__(self, other): + return not self == other + + def __lt__(self, other): + if not isinstance(other, Variable): + raise TypeError + return self.name < other.name + + def substitute_bindings(self, bindings): + return bindings.get(self, self) + + def __hash__(self): + return hash(self.name) + + def __str__(self): + return self.name + + def __repr__(self): + return "Variable('%s')" % self.name + + +def unique_variable(pattern=None, ignore=None): + """ + Return a new, unique variable. + + :param pattern: ``Variable`` that is being replaced. The new variable must + be the same type. + :param term: a set of ``Variable`` objects that should not be returned from + this function. + :rtype: Variable + """ + if pattern is not None: + if is_indvar(pattern.name): + prefix = 'z' + elif is_funcvar(pattern.name): + prefix = 'F' + elif is_eventvar(pattern.name): + prefix = 'e0' + else: + assert False, "Cannot generate a unique constant" + else: + prefix = 'z' + + v = Variable("%s%s" % (prefix, _counter.get())) + while ignore is not None and v in ignore: + v = Variable("%s%s" % (prefix, _counter.get())) + return v + + +def skolem_function(univ_scope=None): + """ + Return a skolem function over the variables in univ_scope + param univ_scope + """ + skolem = VariableExpression(Variable('F%s' % _counter.get())) + if univ_scope: + for v in list(univ_scope): + skolem = skolem(VariableExpression(v)) + return skolem + + +@python_2_unicode_compatible +class Type(object): + def __repr__(self): + return "%s" % self + + def __hash__(self): + return hash("%s" % self) + + @classmethod + def fromstring(cls, s): + return read_type(s) + + +@python_2_unicode_compatible +class ComplexType(Type): + def __init__(self, first, second): + assert isinstance(first, Type), "%s is not a Type" % first + assert isinstance(second, Type), "%s is not a Type" % second + self.first = first + self.second = second + + def __eq__(self, other): + return ( + isinstance(other, ComplexType) + and self.first == other.first + and self.second == other.second + ) + + def __ne__(self, other): + return not self == other + + __hash__ = Type.__hash__ + + def matches(self, other): + if isinstance(other, ComplexType): + return self.first.matches(other.first) and self.second.matches(other.second) + else: + return self == ANY_TYPE + + def resolve(self, other): + if other == ANY_TYPE: + return self + elif isinstance(other, ComplexType): + f = self.first.resolve(other.first) + s = self.second.resolve(other.second) + if f and s: + return ComplexType(f, s) + else: + return None + elif self == ANY_TYPE: + return other + else: + return None + + def __str__(self): + if self == ANY_TYPE: + return "%s" % ANY_TYPE + else: + return '<%s,%s>' % (self.first, self.second) + + def str(self): + if self == ANY_TYPE: + return ANY_TYPE.str() + else: + return '(%s -> %s)' % (self.first.str(), self.second.str()) + + +class BasicType(Type): + def __eq__(self, other): + return isinstance(other, BasicType) and ("%s" % self) == ("%s" % other) + + def __ne__(self, other): + return not self == other + + __hash__ = Type.__hash__ + + def matches(self, other): + return other == ANY_TYPE or self == other + + def resolve(self, other): + if self.matches(other): + return self + else: + return None + + +@python_2_unicode_compatible +class EntityType(BasicType): + def __str__(self): + return 'e' + + def str(self): + return 'IND' + + +@python_2_unicode_compatible +class TruthValueType(BasicType): + def __str__(self): + return 't' + + def str(self): + return 'BOOL' + + +@python_2_unicode_compatible +class EventType(BasicType): + def __str__(self): + return 'v' + + def str(self): + return 'EVENT' + + +@python_2_unicode_compatible +class AnyType(BasicType, ComplexType): + def __init__(self): + pass + + @property + def first(self): + return self + + @property + def second(self): + return self + + def __eq__(self, other): + return isinstance(other, AnyType) or other.__eq__(self) + + def __ne__(self, other): + return not self == other + + __hash__ = Type.__hash__ + + def matches(self, other): + return True + + def resolve(self, other): + return other + + def __str__(self): + return '?' + + def str(self): + return 'ANY' + + +TRUTH_TYPE = TruthValueType() +ENTITY_TYPE = EntityType() +EVENT_TYPE = EventType() +ANY_TYPE = AnyType() + + +def read_type(type_string): + assert isinstance(type_string, string_types) + type_string = type_string.replace(' ', '') # remove spaces + + if type_string[0] == '<': + assert type_string[-1] == '>' + paren_count = 0 + for i, char in enumerate(type_string): + if char == '<': + paren_count += 1 + elif char == '>': + paren_count -= 1 + assert paren_count > 0 + elif char == ',': + if paren_count == 1: + break + return ComplexType( + read_type(type_string[1:i]), read_type(type_string[i + 1 : -1]) + ) + elif type_string[0] == "%s" % ENTITY_TYPE: + return ENTITY_TYPE + elif type_string[0] == "%s" % TRUTH_TYPE: + return TRUTH_TYPE + elif type_string[0] == "%s" % ANY_TYPE: + return ANY_TYPE + else: + raise LogicalExpressionException(None, "Unexpected character: '%s'." % type_string[0]) + + +class TypeException(Exception): + def __init__(self, msg): + super(TypeException, self).__init__(msg) + + +class InconsistentTypeHierarchyException(TypeException): + def __init__(self, variable, expression=None): + if expression: + msg = ( + "The variable '%s' was found in multiple places with different" + " types in '%s'." % (variable, expression) + ) + else: + msg = ( + "The variable '%s' was found in multiple places with different" + " types." % (variable) + ) + super(InconsistentTypeHierarchyException, self).__init__(msg) + + +class TypeResolutionException(TypeException): + def __init__(self, expression, other_type): + super(TypeResolutionException, self).__init__( + "The type of '%s', '%s', cannot be resolved with type '%s'" + % (expression, expression.type, other_type) + ) + + +class IllegalTypeException(TypeException): + def __init__(self, expression, other_type, allowed_type): + super(IllegalTypeException, self).__init__( + "Cannot set type of %s '%s' to '%s'; must match type '%s'." + % (expression.__class__.__name__, expression, other_type, allowed_type) + ) + + +def typecheck(expressions, signature=None): + """ + Ensure correct typing across a collection of ``Expression`` objects. + :param expressions: a collection of expressions + :param signature: dict that maps variable names to types (or string + representations of types) + """ + # typecheck and create master signature + for expression in expressions: + signature = expression.typecheck(signature) + # apply master signature to all expressions + for expression in expressions[:-1]: + expression.typecheck(signature) + return signature + + +class SubstituteBindingsI(object): + """ + An interface for classes that can perform substitutions for + variables. + """ + + def substitute_bindings(self, bindings): + """ + :return: The object that is obtained by replacing + each variable bound by ``bindings`` with its values. + Aliases are already resolved. (maybe?) + :rtype: (any) + """ + raise NotImplementedError() + + def variables(self): + """ + :return: A list of all variables in this object. + """ + raise NotImplementedError() + + +@python_2_unicode_compatible +class Expression(SubstituteBindingsI): + """This is the base abstract object for all logical expressions""" + + _logic_parser = LogicParser() + _type_checking_logic_parser = LogicParser(type_check=True) + + @classmethod + def fromstring(cls, s, type_check=False, signature=None): + if type_check: + return cls._type_checking_logic_parser.parse(s, signature) + else: + return cls._logic_parser.parse(s, signature) + + def __call__(self, other, *additional): + accum = self.applyto(other) + for a in additional: + accum = accum(a) + return accum + + def applyto(self, other): + assert isinstance(other, Expression), "%s is not an Expression" % other + return ApplicationExpression(self, other) + + def __neg__(self): + return NegatedExpression(self) + + def negate(self): + """If this is a negated expression, remove the negation. + Otherwise add a negation.""" + return -self + + def __and__(self, other): + if not isinstance(other, Expression): + raise TypeError("%s is not an Expression" % other) + return AndExpression(self, other) + + def __or__(self, other): + if not isinstance(other, Expression): + raise TypeError("%s is not an Expression" % other) + return OrExpression(self, other) + + def __gt__(self, other): + if not isinstance(other, Expression): + raise TypeError("%s is not an Expression" % other) + return ImpExpression(self, other) + + def __lt__(self, other): + if not isinstance(other, Expression): + raise TypeError("%s is not an Expression" % other) + return IffExpression(self, other) + + def __eq__(self, other): + raise NotImplementedError() + + def __ne__(self, other): + return not self == other + + def equiv(self, other, prover=None): + """ + Check for logical equivalence. + Pass the expression (self <-> other) to the theorem prover. + If the prover says it is valid, then the self and other are equal. + + :param other: an ``Expression`` to check equality against + :param prover: a ``nltk.inference.api.Prover`` + """ + assert isinstance(other, Expression), "%s is not an Expression" % other + + if prover is None: + from nltk.inference import Prover9 + + prover = Prover9() + bicond = IffExpression(self.simplify(), other.simplify()) + return prover.prove(bicond) + + def __hash__(self): + return hash(repr(self)) + + def substitute_bindings(self, bindings): + expr = self + for var in expr.variables(): + if var in bindings: + val = bindings[var] + if isinstance(val, Variable): + val = self.make_VariableExpression(val) + elif not isinstance(val, Expression): + raise ValueError( + 'Can not substitute a non-expression ' + 'value into an expression: %r' % (val,) + ) + # Substitute bindings in the target value. + val = val.substitute_bindings(bindings) + # Replace var w/ the target value. + expr = expr.replace(var, val) + return expr.simplify() + + def typecheck(self, signature=None): + """ + Infer and check types. Raise exceptions if necessary. + + :param signature: dict that maps variable names to types (or string + representations of types) + :return: the signature, plus any additional type mappings + """ + sig = defaultdict(list) + if signature: + for key in signature: + val = signature[key] + varEx = VariableExpression(Variable(key)) + if isinstance(val, Type): + varEx.type = val + else: + varEx.type = read_type(val) + sig[key].append(varEx) + + self._set_type(signature=sig) + + return dict((key, sig[key][0].type) for key in sig) + + def findtype(self, variable): + """ + Find the type of the given variable as it is used in this expression. + For example, finding the type of "P" in "P(x) & Q(x,y)" yields "" + + :param variable: Variable + """ + raise NotImplementedError() + + def _set_type(self, other_type=ANY_TYPE, signature=None): + """ + Set the type of this expression to be the given type. Raise type + exceptions where applicable. + + :param other_type: Type + :param signature: dict(str -> list(AbstractVariableExpression)) + """ + raise NotImplementedError() + + def replace(self, variable, expression, replace_bound=False, alpha_convert=True): + """ + Replace every instance of 'variable' with 'expression' + :param variable: ``Variable`` The variable to replace + :param expression: ``Expression`` The expression with which to replace it + :param replace_bound: bool Should bound variables be replaced? + :param alpha_convert: bool Alpha convert automatically to avoid name clashes? + """ + assert isinstance(variable, Variable), "%s is not a Variable" % variable + assert isinstance(expression, Expression), ( + "%s is not an Expression" % expression + ) + + return self.visit_structured( + lambda e: e.replace(variable, expression, replace_bound, alpha_convert), + self.__class__, + ) + + def normalize(self, newvars=None): + """Rename auto-generated unique variables""" + + def get_indiv_vars(e): + if isinstance(e, IndividualVariableExpression): + return set([e]) + elif isinstance(e, AbstractVariableExpression): + return set() + else: + return e.visit( + get_indiv_vars, lambda parts: reduce(operator.or_, parts, set()) + ) + + result = self + for i, e in enumerate(sorted(get_indiv_vars(self), key=lambda e: e.variable)): + if isinstance(e, EventVariableExpression): + newVar = e.__class__(Variable('e0%s' % (i + 1))) + elif isinstance(e, IndividualVariableExpression): + newVar = e.__class__(Variable('z%s' % (i + 1))) + else: + newVar = e + result = result.replace(e.variable, newVar, True) + return result + + def visit(self, function, combinator): + """ + Recursively visit subexpressions. Apply 'function' to each + subexpression and pass the result of each function application + to the 'combinator' for aggregation: + + return combinator(map(function, self.subexpressions)) + + Bound variables are neither applied upon by the function nor given to + the combinator. + :param function: ``Function`` to call on each subexpression + :param combinator: ``Function,R>`` to combine the results of the + function calls + :return: result of combination ``R`` + """ + raise NotImplementedError() + + def visit_structured(self, function, combinator): + """ + Recursively visit subexpressions. Apply 'function' to each + subexpression and pass the result of each function application + to the 'combinator' for aggregation. The combinator must have + the same signature as the constructor. The function is not + applied to bound variables, but they are passed to the + combinator. + :param function: ``Function`` to call on each subexpression + :param combinator: ``Function`` with the same signature as the + constructor, to combine the results of the function calls + :return: result of combination + """ + return self.visit(function, lambda parts: combinator(*parts)) + + def __repr__(self): + return '<%s %s>' % (self.__class__.__name__, self) + + def __str__(self): + return self.str() + + def variables(self): + """ + Return a set of all the variables for binding substitution. + The variables returned include all free (non-bound) individual + variables and any variable starting with '?' or '@'. + :return: set of ``Variable`` objects + """ + return self.free() | set( + p for p in self.predicates() | self.constants() if re.match('^[?@]', p.name) + ) + + def free(self): + """ + Return a set of all the free (non-bound) variables. This includes + both individual and predicate variables, but not constants. + :return: set of ``Variable`` objects + """ + return self.visit( + lambda e: e.free(), lambda parts: reduce(operator.or_, parts, set()) + ) + + def constants(self): + """ + Return a set of individual constants (non-predicates). + :return: set of ``Variable`` objects + """ + return self.visit( + lambda e: e.constants(), lambda parts: reduce(operator.or_, parts, set()) + ) + + def predicates(self): + """ + Return a set of predicates (constants, not variables). + :return: set of ``Variable`` objects + """ + return self.visit( + lambda e: e.predicates(), lambda parts: reduce(operator.or_, parts, set()) + ) + + def simplify(self): + """ + :return: beta-converted version of this expression + """ + return self.visit_structured(lambda e: e.simplify(), self.__class__) + + def make_VariableExpression(self, variable): + return VariableExpression(variable) + + +@python_2_unicode_compatible +class ApplicationExpression(Expression): + r""" + This class is used to represent two related types of logical expressions. + + The first is a Predicate Expression, such as "P(x,y)". A predicate + expression is comprised of a ``FunctionVariableExpression`` or + ``ConstantExpression`` as the predicate and a list of Expressions as the + arguments. + + The second is a an application of one expression to another, such as + "(\x.dog(x))(fido)". + + The reason Predicate Expressions are treated as Application Expressions is + that the Variable Expression predicate of the expression may be replaced + with another Expression, such as a LambdaExpression, which would mean that + the Predicate should be thought of as being applied to the arguments. + + The logical expression reader will always curry arguments in a application expression. + So, "\x y.see(x,y)(john,mary)" will be represented internally as + "((\x y.(see(x))(y))(john))(mary)". This simplifies the internals since + there will always be exactly one argument in an application. + + The str() method will usually print the curried forms of application + expressions. The one exception is when the the application expression is + really a predicate expression (ie, underlying function is an + ``AbstractVariableExpression``). This means that the example from above + will be returned as "(\x y.see(x,y)(john))(mary)". + """ + + def __init__(self, function, argument): + """ + :param function: ``Expression``, for the function expression + :param argument: ``Expression``, for the argument + """ + assert isinstance(function, Expression), "%s is not an Expression" % function + assert isinstance(argument, Expression), "%s is not an Expression" % argument + self.function = function + self.argument = argument + + def simplify(self): + function = self.function.simplify() + argument = self.argument.simplify() + if isinstance(function, LambdaExpression): + return function.term.replace(function.variable, argument).simplify() + else: + return self.__class__(function, argument) + + @property + def type(self): + if isinstance(self.function.type, ComplexType): + return self.function.type.second + else: + return ANY_TYPE + + def _set_type(self, other_type=ANY_TYPE, signature=None): + """:see Expression._set_type()""" + assert isinstance(other_type, Type) + + if signature is None: + signature = defaultdict(list) + + self.argument._set_type(ANY_TYPE, signature) + try: + self.function._set_type( + ComplexType(self.argument.type, other_type), signature + ) + except TypeResolutionException: + raise TypeException( + "The function '%s' is of type '%s' and cannot be applied " + "to '%s' of type '%s'. Its argument must match type '%s'." + % ( + self.function, + self.function.type, + self.argument, + self.argument.type, + self.function.type.first, + ) + ) + + def findtype(self, variable): + """:see Expression.findtype()""" + assert isinstance(variable, Variable), "%s is not a Variable" % variable + if self.is_atom(): + function, args = self.uncurry() + else: + # It's not a predicate expression ("P(x,y)"), so leave args curried + function = self.function + args = [self.argument] + + found = [arg.findtype(variable) for arg in [function] + args] + + unique = [] + for f in found: + if f != ANY_TYPE: + if unique: + for u in unique: + if f.matches(u): + break + else: + unique.append(f) + + if len(unique) == 1: + return list(unique)[0] + else: + return ANY_TYPE + + def constants(self): + """:see: Expression.constants()""" + if isinstance(self.function, AbstractVariableExpression): + function_constants = set() + else: + function_constants = self.function.constants() + return function_constants | self.argument.constants() + + def predicates(self): + """:see: Expression.predicates()""" + if isinstance(self.function, ConstantExpression): + function_preds = set([self.function.variable]) + else: + function_preds = self.function.predicates() + return function_preds | self.argument.predicates() + + def visit(self, function, combinator): + """:see: Expression.visit()""" + return combinator([function(self.function), function(self.argument)]) + + def __eq__(self, other): + return ( + isinstance(other, ApplicationExpression) + and self.function == other.function + and self.argument == other.argument + ) + + def __ne__(self, other): + return not self == other + + __hash__ = Expression.__hash__ + + def __str__(self): + # uncurry the arguments and find the base function + if self.is_atom(): + function, args = self.uncurry() + arg_str = ','.join("%s" % arg for arg in args) + else: + # Leave arguments curried + function = self.function + arg_str = "%s" % self.argument + + function_str = "%s" % function + parenthesize_function = False + if isinstance(function, LambdaExpression): + if isinstance(function.term, ApplicationExpression): + if not isinstance(function.term.function, AbstractVariableExpression): + parenthesize_function = True + elif not isinstance(function.term, BooleanExpression): + parenthesize_function = True + elif isinstance(function, ApplicationExpression): + parenthesize_function = True + + if parenthesize_function: + function_str = Tokens.OPEN + function_str + Tokens.CLOSE + + return function_str + Tokens.OPEN + arg_str + Tokens.CLOSE + + def uncurry(self): + """ + Uncurry this application expression + + return: A tuple (base-function, arg-list) + """ + function = self.function + args = [self.argument] + while isinstance(function, ApplicationExpression): + # (\x.\y.sees(x,y)(john))(mary) + args.insert(0, function.argument) + function = function.function + return (function, args) + + @property + def pred(self): + """ + Return uncurried base-function. + If this is an atom, then the result will be a variable expression. + Otherwise, it will be a lambda expression. + """ + return self.uncurry()[0] + + @property + def args(self): + """ + Return uncurried arg-list + """ + return self.uncurry()[1] + + def is_atom(self): + """ + Is this expression an atom (as opposed to a lambda expression applied + to a term)? + """ + return isinstance(self.pred, AbstractVariableExpression) + + +@total_ordering +@python_2_unicode_compatible +class AbstractVariableExpression(Expression): + """This class represents a variable to be used as a predicate or entity""" + + def __init__(self, variable): + """ + :param variable: ``Variable``, for the variable + """ + assert isinstance(variable, Variable), "%s is not a Variable" % variable + self.variable = variable + + def simplify(self): + return self + + def replace(self, variable, expression, replace_bound=False, alpha_convert=True): + """:see: Expression.replace()""" + assert isinstance(variable, Variable), "%s is not an Variable" % variable + assert isinstance(expression, Expression), ( + "%s is not an Expression" % expression + ) + if self.variable == variable: + return expression + else: + return self + + def _set_type(self, other_type=ANY_TYPE, signature=None): + """:see Expression._set_type()""" + assert isinstance(other_type, Type) + + if signature is None: + signature = defaultdict(list) + + resolution = other_type + for varEx in signature[self.variable.name]: + resolution = varEx.type.resolve(resolution) + if not resolution: + raise InconsistentTypeHierarchyException(self) + + signature[self.variable.name].append(self) + for varEx in signature[self.variable.name]: + varEx.type = resolution + + def findtype(self, variable): + """:see Expression.findtype()""" + assert isinstance(variable, Variable), "%s is not a Variable" % variable + if self.variable == variable: + return self.type + else: + return ANY_TYPE + + def predicates(self): + """:see: Expression.predicates()""" + return set() + + def __eq__(self, other): + """Allow equality between instances of ``AbstractVariableExpression`` + subtypes.""" + return ( + isinstance(other, AbstractVariableExpression) + and self.variable == other.variable + ) + + def __ne__(self, other): + return not self == other + + def __lt__(self, other): + if not isinstance(other, AbstractVariableExpression): + raise TypeError + return self.variable < other.variable + + __hash__ = Expression.__hash__ + + def __str__(self): + return "%s" % self.variable + + +class IndividualVariableExpression(AbstractVariableExpression): + """This class represents variables that take the form of a single lowercase + character (other than 'e') followed by zero or more digits.""" + + def _set_type(self, other_type=ANY_TYPE, signature=None): + """:see Expression._set_type()""" + assert isinstance(other_type, Type) + + if signature is None: + signature = defaultdict(list) + + if not other_type.matches(ENTITY_TYPE): + raise IllegalTypeException(self, other_type, ENTITY_TYPE) + + signature[self.variable.name].append(self) + + def _get_type(self): + return ENTITY_TYPE + + type = property(_get_type, _set_type) + + def free(self): + """:see: Expression.free()""" + return set([self.variable]) + + def constants(self): + """:see: Expression.constants()""" + return set() + + +class FunctionVariableExpression(AbstractVariableExpression): + """This class represents variables that take the form of a single uppercase + character followed by zero or more digits.""" + + type = ANY_TYPE + + def free(self): + """:see: Expression.free()""" + return set([self.variable]) + + def constants(self): + """:see: Expression.constants()""" + return set() + + +class EventVariableExpression(IndividualVariableExpression): + """This class represents variables that take the form of a single lowercase + 'e' character followed by zero or more digits.""" + + type = EVENT_TYPE + + +class ConstantExpression(AbstractVariableExpression): + """This class represents variables that do not take the form of a single + character followed by zero or more digits.""" + + type = ENTITY_TYPE + + def _set_type(self, other_type=ANY_TYPE, signature=None): + """:see Expression._set_type()""" + assert isinstance(other_type, Type) + + if signature is None: + signature = defaultdict(list) + + if other_type == ANY_TYPE: + # entity type by default, for individuals + resolution = ENTITY_TYPE + else: + resolution = other_type + if self.type != ENTITY_TYPE: + resolution = resolution.resolve(self.type) + + for varEx in signature[self.variable.name]: + resolution = varEx.type.resolve(resolution) + if not resolution: + raise InconsistentTypeHierarchyException(self) + + signature[self.variable.name].append(self) + for varEx in signature[self.variable.name]: + varEx.type = resolution + + def free(self): + """:see: Expression.free()""" + return set() + + def constants(self): + """:see: Expression.constants()""" + return set([self.variable]) + + +def VariableExpression(variable): + """ + This is a factory method that instantiates and returns a subtype of + ``AbstractVariableExpression`` appropriate for the given variable. + """ + assert isinstance(variable, Variable), "%s is not a Variable" % variable + if is_indvar(variable.name): + return IndividualVariableExpression(variable) + elif is_funcvar(variable.name): + return FunctionVariableExpression(variable) + elif is_eventvar(variable.name): + return EventVariableExpression(variable) + else: + return ConstantExpression(variable) + + +class VariableBinderExpression(Expression): + """This an abstract class for any Expression that binds a variable in an + Expression. This includes LambdaExpressions and Quantified Expressions""" + + def __init__(self, variable, term): + """ + :param variable: ``Variable``, for the variable + :param term: ``Expression``, for the term + """ + assert isinstance(variable, Variable), "%s is not a Variable" % variable + assert isinstance(term, Expression), "%s is not an Expression" % term + self.variable = variable + self.term = term + + def replace(self, variable, expression, replace_bound=False, alpha_convert=True): + """:see: Expression.replace()""" + assert isinstance(variable, Variable), "%s is not a Variable" % variable + assert isinstance(expression, Expression), ( + "%s is not an Expression" % expression + ) + # if the bound variable is the thing being replaced + if self.variable == variable: + if replace_bound: + assert isinstance(expression, AbstractVariableExpression), ( + "%s is not a AbstractVariableExpression" % expression + ) + return self.__class__( + expression.variable, + self.term.replace(variable, expression, True, alpha_convert), + ) + else: + return self + else: + # if the bound variable appears in the expression, then it must + # be alpha converted to avoid a conflict + if alpha_convert and self.variable in expression.free(): + self = self.alpha_convert(unique_variable(pattern=self.variable)) + + # replace in the term + return self.__class__( + self.variable, + self.term.replace(variable, expression, replace_bound, alpha_convert), + ) + + def alpha_convert(self, newvar): + """Rename all occurrences of the variable introduced by this variable + binder in the expression to ``newvar``. + :param newvar: ``Variable``, for the new variable + """ + assert isinstance(newvar, Variable), "%s is not a Variable" % newvar + return self.__class__( + newvar, self.term.replace(self.variable, VariableExpression(newvar), True) + ) + + def free(self): + """:see: Expression.free()""" + return self.term.free() - set([self.variable]) + + def findtype(self, variable): + """:see Expression.findtype()""" + assert isinstance(variable, Variable), "%s is not a Variable" % variable + if variable == self.variable: + return ANY_TYPE + else: + return self.term.findtype(variable) + + def visit(self, function, combinator): + """:see: Expression.visit()""" + return combinator([function(self.term)]) + + def visit_structured(self, function, combinator): + """:see: Expression.visit_structured()""" + return combinator(self.variable, function(self.term)) + + def __eq__(self, other): + r"""Defines equality modulo alphabetic variance. If we are comparing + \x.M and \y.N, then check equality of M and N[x/y].""" + if isinstance(self, other.__class__) or isinstance(other, self.__class__): + if self.variable == other.variable: + return self.term == other.term + else: + # Comparing \x.M and \y.N. Relabel y in N with x and continue. + varex = VariableExpression(self.variable) + return self.term == other.term.replace(other.variable, varex) + else: + return False + + def __ne__(self, other): + return not self == other + + __hash__ = Expression.__hash__ + + +@python_2_unicode_compatible +class LambdaExpression(VariableBinderExpression): + @property + def type(self): + return ComplexType(self.term.findtype(self.variable), self.term.type) + + def _set_type(self, other_type=ANY_TYPE, signature=None): + """:see Expression._set_type()""" + assert isinstance(other_type, Type) + + if signature is None: + signature = defaultdict(list) + + self.term._set_type(other_type.second, signature) + if not self.type.resolve(other_type): + raise TypeResolutionException(self, other_type) + + def __str__(self): + variables = [self.variable] + term = self.term + while term.__class__ == self.__class__: + variables.append(term.variable) + term = term.term + return ( + Tokens.LAMBDA + + ' '.join("%s" % v for v in variables) + + Tokens.DOT + + "%s" % term + ) + + +@python_2_unicode_compatible +class QuantifiedExpression(VariableBinderExpression): + @property + def type(self): + return TRUTH_TYPE + + def _set_type(self, other_type=ANY_TYPE, signature=None): + """:see Expression._set_type()""" + assert isinstance(other_type, Type) + + if signature is None: + signature = defaultdict(list) + + if not other_type.matches(TRUTH_TYPE): + raise IllegalTypeException(self, other_type, TRUTH_TYPE) + self.term._set_type(TRUTH_TYPE, signature) + + def __str__(self): + variables = [self.variable] + term = self.term + while term.__class__ == self.__class__: + variables.append(term.variable) + term = term.term + return ( + self.getQuantifier() + + ' ' + + ' '.join("%s" % v for v in variables) + + Tokens.DOT + + "%s" % term + ) + + +class ExistsExpression(QuantifiedExpression): + def getQuantifier(self): + return Tokens.EXISTS + + +class AllExpression(QuantifiedExpression): + def getQuantifier(self): + return Tokens.ALL + + +@python_2_unicode_compatible +class NegatedExpression(Expression): + def __init__(self, term): + assert isinstance(term, Expression), "%s is not an Expression" % term + self.term = term + + @property + def type(self): + return TRUTH_TYPE + + def _set_type(self, other_type=ANY_TYPE, signature=None): + """:see Expression._set_type()""" + assert isinstance(other_type, Type) + + if signature is None: + signature = defaultdict(list) + + if not other_type.matches(TRUTH_TYPE): + raise IllegalTypeException(self, other_type, TRUTH_TYPE) + self.term._set_type(TRUTH_TYPE, signature) + + def findtype(self, variable): + assert isinstance(variable, Variable), "%s is not a Variable" % variable + return self.term.findtype(variable) + + def visit(self, function, combinator): + """:see: Expression.visit()""" + return combinator([function(self.term)]) + + def negate(self): + """:see: Expression.negate()""" + return self.term + + def __eq__(self, other): + return isinstance(other, NegatedExpression) and self.term == other.term + + def __ne__(self, other): + return not self == other + + __hash__ = Expression.__hash__ + + def __str__(self): + return Tokens.NOT + "%s" % self.term + + +@python_2_unicode_compatible +class BinaryExpression(Expression): + def __init__(self, first, second): + assert isinstance(first, Expression), "%s is not an Expression" % first + assert isinstance(second, Expression), "%s is not an Expression" % second + self.first = first + self.second = second + + @property + def type(self): + return TRUTH_TYPE + + def findtype(self, variable): + """:see Expression.findtype()""" + assert isinstance(variable, Variable), "%s is not a Variable" % variable + f = self.first.findtype(variable) + s = self.second.findtype(variable) + if f == s or s == ANY_TYPE: + return f + elif f == ANY_TYPE: + return s + else: + return ANY_TYPE + + def visit(self, function, combinator): + """:see: Expression.visit()""" + return combinator([function(self.first), function(self.second)]) + + def __eq__(self, other): + return ( + (isinstance(self, other.__class__) or isinstance(other, self.__class__)) + and self.first == other.first + and self.second == other.second + ) + + def __ne__(self, other): + return not self == other + + __hash__ = Expression.__hash__ + + def __str__(self): + first = self._str_subex(self.first) + second = self._str_subex(self.second) + return Tokens.OPEN + first + ' ' + self.getOp() + ' ' + second + Tokens.CLOSE + + def _str_subex(self, subex): + return "%s" % subex + + +class BooleanExpression(BinaryExpression): + def _set_type(self, other_type=ANY_TYPE, signature=None): + """:see Expression._set_type()""" + assert isinstance(other_type, Type) + + if signature is None: + signature = defaultdict(list) + + if not other_type.matches(TRUTH_TYPE): + raise IllegalTypeException(self, other_type, TRUTH_TYPE) + self.first._set_type(TRUTH_TYPE, signature) + self.second._set_type(TRUTH_TYPE, signature) + + +class AndExpression(BooleanExpression): + """This class represents conjunctions""" + + def getOp(self): + return Tokens.AND + + def _str_subex(self, subex): + s = "%s" % subex + if isinstance(subex, AndExpression): + return s[1:-1] + return s + + +class OrExpression(BooleanExpression): + """This class represents disjunctions""" + + def getOp(self): + return Tokens.OR + + def _str_subex(self, subex): + s = "%s" % subex + if isinstance(subex, OrExpression): + return s[1:-1] + return s + + +class ImpExpression(BooleanExpression): + """This class represents implications""" + + def getOp(self): + return Tokens.IMP + + +class IffExpression(BooleanExpression): + """This class represents biconditionals""" + + def getOp(self): + return Tokens.IFF + + +class EqualityExpression(BinaryExpression): + """This class represents equality expressions like "(x = y)".""" + + def _set_type(self, other_type=ANY_TYPE, signature=None): + """:see Expression._set_type()""" + assert isinstance(other_type, Type) + + if signature is None: + signature = defaultdict(list) + + if not other_type.matches(TRUTH_TYPE): + raise IllegalTypeException(self, other_type, TRUTH_TYPE) + self.first._set_type(ENTITY_TYPE, signature) + self.second._set_type(ENTITY_TYPE, signature) + + def getOp(self): + return Tokens.EQ + + +### Utilities + + +class LogicalExpressionException(Exception): + def __init__(self, index, message): + self.index = index + Exception.__init__(self, message) + + +class UnexpectedTokenException(LogicalExpressionException): + def __init__(self, index, unexpected=None, expected=None, message=None): + if unexpected and expected: + msg = "Unexpected token: '%s'. " "Expected token '%s'." % ( + unexpected, + expected, + ) + elif unexpected: + msg = "Unexpected token: '%s'." % unexpected + if message: + msg += ' ' + message + else: + msg = "Expected token '%s'." % expected + LogicalExpressionException.__init__(self, index, msg) + + +class ExpectedMoreTokensException(LogicalExpressionException): + def __init__(self, index, message=None): + if not message: + message = 'More tokens expected.' + LogicalExpressionException.__init__( + self, index, 'End of input found. ' + message + ) + + +def is_indvar(expr): + """ + An individual variable must be a single lowercase character other than 'e', + followed by zero or more digits. + + :param expr: str + :return: bool True if expr is of the correct form + """ + assert isinstance(expr, string_types), "%s is not a string" % expr + return re.match(r'^[a-df-z]\d*$', expr) is not None + + +def is_funcvar(expr): + """ + A function variable must be a single uppercase character followed by + zero or more digits. + + :param expr: str + :return: bool True if expr is of the correct form + """ + assert isinstance(expr, string_types), "%s is not a string" % expr + return re.match(r'^[A-Z]\d*$', expr) is not None + + +def is_eventvar(expr): + """ + An event variable must be a single lowercase 'e' character followed by + zero or more digits. + + :param expr: str + :return: bool True if expr is of the correct form + """ + assert isinstance(expr, string_types), "%s is not a string" % expr + return re.match(r'^e\d*$', expr) is not None + + +def demo(): + lexpr = Expression.fromstring + print('=' * 20 + 'Test reader' + '=' * 20) + print(lexpr(r'john')) + print(lexpr(r'man(x)')) + print(lexpr(r'-man(x)')) + print(lexpr(r'(man(x) & tall(x) & walks(x))')) + print(lexpr(r'exists x.(man(x) & tall(x) & walks(x))')) + print(lexpr(r'\x.man(x)')) + print(lexpr(r'\x.man(x)(john)')) + print(lexpr(r'\x y.sees(x,y)')) + print(lexpr(r'\x y.sees(x,y)(a,b)')) + print(lexpr(r'(\x.exists y.walks(x,y))(x)')) + print(lexpr(r'exists x.x = y')) + print(lexpr(r'exists x.(x = y)')) + print(lexpr('P(x) & x=y & P(y)')) + print(lexpr(r'\P Q.exists x.(P(x) & Q(x))')) + print(lexpr(r'man(x) <-> tall(x)')) + + print('=' * 20 + 'Test simplify' + '=' * 20) + print(lexpr(r'\x.\y.sees(x,y)(john)(mary)').simplify()) + print(lexpr(r'\x.\y.sees(x,y)(john, mary)').simplify()) + print(lexpr(r'all x.(man(x) & (\x.exists y.walks(x,y))(x))').simplify()) + print(lexpr(r'(\P.\Q.exists x.(P(x) & Q(x)))(\x.dog(x))(\x.bark(x))').simplify()) + + print('=' * 20 + 'Test alpha conversion and binder expression equality' + '=' * 20) + e1 = lexpr('exists x.P(x)') + print(e1) + e2 = e1.alpha_convert(Variable('z')) + print(e2) + print(e1 == e2) + + +def demo_errors(): + print('=' * 20 + 'Test reader errors' + '=' * 20) + demoException('(P(x) & Q(x)') + demoException('((P(x) &) & Q(x))') + demoException('P(x) -> ') + demoException('P(x') + demoException('P(x,') + demoException('P(x,)') + demoException('exists') + demoException('exists x.') + demoException('\\') + demoException('\\ x y.') + demoException('P(x)Q(x)') + demoException('(P(x)Q(x)') + demoException('exists x -> y') + + +def demoException(s): + try: + Expression.fromstring(s) + except LogicalExpressionException as e: + print("%s: %s" % (e.__class__.__name__, e)) + + +def printtype(ex): + print("%s : %s" % (ex.str(), ex.type)) + + +if __name__ == '__main__': + demo() +# demo_errors() diff --git a/venv.bak/lib/python3.7/site-packages/nltk/sem/relextract.py b/venv.bak/lib/python3.7/site-packages/nltk/sem/relextract.py new file mode 100644 index 0000000..5837f84 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/sem/relextract.py @@ -0,0 +1,548 @@ +# Natural Language Toolkit: Relation Extraction +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Ewan Klein +# URL: +# For license information, see LICENSE.TXT + +""" +Code for extracting relational triples from the ieer and conll2002 corpora. + +Relations are stored internally as dictionaries ('reldicts'). + +The two serialization outputs are "rtuple" and "clause". + +- An rtuple is a tuple of the form ``(subj, filler, obj)``, + where ``subj`` and ``obj`` are pairs of Named Entity mentions, and ``filler`` is the string of words + occurring between ``sub`` and ``obj`` (with no intervening NEs). Strings are printed via ``repr()`` to + circumvent locale variations in rendering utf-8 encoded strings. +- A clause is an atom of the form ``relsym(subjsym, objsym)``, + where the relation, subject and object have been canonicalized to single strings. +""" +from __future__ import print_function + +# todo: get a more general solution to canonicalized symbols for clauses -- maybe use xmlcharrefs? + +from collections import defaultdict +import re + +from six.moves import html_entities + +# Dictionary that associates corpora with NE classes +NE_CLASSES = { + 'ieer': [ + 'LOCATION', + 'ORGANIZATION', + 'PERSON', + 'DURATION', + 'DATE', + 'CARDINAL', + 'PERCENT', + 'MONEY', + 'MEASURE', + ], + 'conll2002': ['LOC', 'PER', 'ORG'], + 'ace': [ + 'LOCATION', + 'ORGANIZATION', + 'PERSON', + 'DURATION', + 'DATE', + 'CARDINAL', + 'PERCENT', + 'MONEY', + 'MEASURE', + 'FACILITY', + 'GPE', + ], +} + +# Allow abbreviated class labels +short2long = dict(LOC='LOCATION', ORG='ORGANIZATION', PER='PERSON') +long2short = dict(LOCATION='LOC', ORGANIZATION='ORG', PERSON='PER') + + +def _expand(type): + """ + Expand an NE class name. + :type type: str + :rtype: str + """ + try: + return short2long[type] + except KeyError: + return type + + +def class_abbrev(type): + """ + Abbreviate an NE class name. + :type type: str + :rtype: str + """ + try: + return long2short[type] + except KeyError: + return type + + +def _join(lst, sep=' ', untag=False): + """ + Join a list into a string, turning tags tuples into tag strings or just words. + :param untag: if ``True``, omit the tag from tagged input strings. + :type lst: list + :rtype: str + """ + try: + return sep.join(lst) + except TypeError: + if untag: + return sep.join(tup[0] for tup in lst) + from nltk.tag import tuple2str + + return sep.join(tuple2str(tup) for tup in lst) + + +def descape_entity(m, defs=html_entities.entitydefs): + """ + Translate one entity to its ISO Latin value. + Inspired by example from effbot.org + + + """ + # s = 'mcglashan_&_sarrail' + # l = ['mcglashan', '&', 'sarrail'] + # pattern = re.compile("&(\w+?);") + # new = list2sym(l) + # s = pattern.sub(descape_entity, s) + # print s, new + try: + return defs[m.group(1)] + + except KeyError: + return m.group(0) # use as is + + +def list2sym(lst): + """ + Convert a list of strings into a canonical symbol. + :type lst: list + :return: a Unicode string without whitespace + :rtype: unicode + """ + sym = _join(lst, '_', untag=True) + sym = sym.lower() + ENT = re.compile("&(\w+?);") + sym = ENT.sub(descape_entity, sym) + sym = sym.replace('.', '') + return sym + + +def tree2semi_rel(tree): + """ + Group a chunk structure into a list of 'semi-relations' of the form (list(str), ``Tree``). + + In order to facilitate the construction of (``Tree``, string, ``Tree``) triples, this + identifies pairs whose first member is a list (possibly empty) of terminal + strings, and whose second member is a ``Tree`` of the form (NE_label, terminals). + + :param tree: a chunk tree + :return: a list of pairs (list(str), ``Tree``) + :rtype: list of tuple + """ + + from nltk.tree import Tree + + semi_rels = [] + semi_rel = [[], None] + + for dtr in tree: + if not isinstance(dtr, Tree): + semi_rel[0].append(dtr) + else: + # dtr is a Tree + semi_rel[1] = dtr + semi_rels.append(semi_rel) + semi_rel = [[], None] + return semi_rels + + +def semi_rel2reldict(pairs, window=5, trace=False): + """ + Converts the pairs generated by ``tree2semi_rel`` into a 'reldict': a dictionary which + stores information about the subject and object NEs plus the filler between them. + Additionally, a left and right context of length =< window are captured (within + a given input sentence). + + :param pairs: a pair of list(str) and ``Tree``, as generated by + :param window: a threshold for the number of items to include in the left and right context + :type window: int + :return: 'relation' dictionaries whose keys are 'lcon', 'subjclass', 'subjtext', 'subjsym', 'filler', objclass', objtext', 'objsym' and 'rcon' + :rtype: list(defaultdict) + """ + result = [] + while len(pairs) > 2: + reldict = defaultdict(str) + reldict['lcon'] = _join(pairs[0][0][-window:]) + reldict['subjclass'] = pairs[0][1].label() + reldict['subjtext'] = _join(pairs[0][1].leaves()) + reldict['subjsym'] = list2sym(pairs[0][1].leaves()) + reldict['filler'] = _join(pairs[1][0]) + reldict['untagged_filler'] = _join(pairs[1][0], untag=True) + reldict['objclass'] = pairs[1][1].label() + reldict['objtext'] = _join(pairs[1][1].leaves()) + reldict['objsym'] = list2sym(pairs[1][1].leaves()) + reldict['rcon'] = _join(pairs[2][0][:window]) + if trace: + print( + "(%s(%s, %s)" + % ( + reldict['untagged_filler'], + reldict['subjclass'], + reldict['objclass'], + ) + ) + result.append(reldict) + pairs = pairs[1:] + return result + + +def extract_rels(subjclass, objclass, doc, corpus='ace', pattern=None, window=10): + """ + Filter the output of ``semi_rel2reldict`` according to specified NE classes and a filler pattern. + + The parameters ``subjclass`` and ``objclass`` can be used to restrict the + Named Entities to particular types (any of 'LOCATION', 'ORGANIZATION', + 'PERSON', 'DURATION', 'DATE', 'CARDINAL', 'PERCENT', 'MONEY', 'MEASURE'). + + :param subjclass: the class of the subject Named Entity. + :type subjclass: str + :param objclass: the class of the object Named Entity. + :type objclass: str + :param doc: input document + :type doc: ieer document or a list of chunk trees + :param corpus: name of the corpus to take as input; possible values are + 'ieer' and 'conll2002' + :type corpus: str + :param pattern: a regular expression for filtering the fillers of + retrieved triples. + :type pattern: SRE_Pattern + :param window: filters out fillers which exceed this threshold + :type window: int + :return: see ``mk_reldicts`` + :rtype: list(defaultdict) + """ + + if subjclass and subjclass not in NE_CLASSES[corpus]: + if _expand(subjclass) in NE_CLASSES[corpus]: + subjclass = _expand(subjclass) + else: + raise ValueError( + "your value for the subject type has not been recognized: %s" + % subjclass + ) + if objclass and objclass not in NE_CLASSES[corpus]: + if _expand(objclass) in NE_CLASSES[corpus]: + objclass = _expand(objclass) + else: + raise ValueError( + "your value for the object type has not been recognized: %s" % objclass + ) + + if corpus == 'ace' or corpus == 'conll2002': + pairs = tree2semi_rel(doc) + elif corpus == 'ieer': + pairs = tree2semi_rel(doc.text) + tree2semi_rel(doc.headline) + else: + raise ValueError("corpus type not recognized") + + reldicts = semi_rel2reldict(pairs) + + relfilter = lambda x: ( + x['subjclass'] == subjclass + and len(x['filler'].split()) <= window + and pattern.match(x['filler']) + and x['objclass'] == objclass + ) + + return list(filter(relfilter, reldicts)) + + +def rtuple(reldict, lcon=False, rcon=False): + """ + Pretty print the reldict as an rtuple. + :param reldict: a relation dictionary + :type reldict: defaultdict + """ + items = [ + class_abbrev(reldict['subjclass']), + reldict['subjtext'], + reldict['filler'], + class_abbrev(reldict['objclass']), + reldict['objtext'], + ] + format = '[%s: %r] %r [%s: %r]' + if lcon: + items = [reldict['lcon']] + items + format = '...%r)' + format + if rcon: + items.append(reldict['rcon']) + format = format + '(%r...' + printargs = tuple(items) + return format % printargs + + +def clause(reldict, relsym): + """ + Print the relation in clausal form. + :param reldict: a relation dictionary + :type reldict: defaultdict + :param relsym: a label for the relation + :type relsym: str + """ + items = (relsym, reldict['subjsym'], reldict['objsym']) + return "%s(%r, %r)" % items + + +####################################################### +# Demos of relation extraction with regular expressions +####################################################### + +############################################ +# Example of in(ORG, LOC) +############################################ +def in_demo(trace=0, sql=True): + """ + Select pairs of organizations and locations whose mentions occur with an + intervening occurrence of the preposition "in". + + If the sql parameter is set to True, then the entity pairs are loaded into + an in-memory database, and subsequently pulled out using an SQL "SELECT" + query. + """ + from nltk.corpus import ieer + + if sql: + try: + import sqlite3 + + connection = sqlite3.connect(":memory:") + connection.text_factory = sqlite3.OptimizedUnicode + cur = connection.cursor() + cur.execute( + """create table Locations + (OrgName text, LocationName text, DocID text)""" + ) + except ImportError: + import warnings + + warnings.warn("Cannot import sqlite; sql flag will be ignored.") + + IN = re.compile(r'.*\bin\b(?!\b.+ing)') + + print() + print("IEER: in(ORG, LOC) -- just the clauses:") + print("=" * 45) + + for file in ieer.fileids(): + for doc in ieer.parsed_docs(file): + if trace: + print(doc.docno) + print("=" * 15) + for rel in extract_rels('ORG', 'LOC', doc, corpus='ieer', pattern=IN): + print(clause(rel, relsym='IN')) + if sql: + try: + rtuple = (rel['subjtext'], rel['objtext'], doc.docno) + cur.execute( + """insert into Locations + values (?, ?, ?)""", + rtuple, + ) + connection.commit() + except NameError: + pass + + if sql: + try: + cur.execute( + """select OrgName from Locations + where LocationName = 'Atlanta'""" + ) + print() + print("Extract data from SQL table: ORGs in Atlanta") + print("-" * 15) + for row in cur: + print(row) + except NameError: + pass + + +############################################ +# Example of has_role(PER, LOC) +############################################ + + +def roles_demo(trace=0): + from nltk.corpus import ieer + + roles = """ + (.*( # assorted roles + analyst| + chair(wo)?man| + commissioner| + counsel| + director| + economist| + editor| + executive| + foreman| + governor| + head| + lawyer| + leader| + librarian).*)| + manager| + partner| + president| + producer| + professor| + researcher| + spokes(wo)?man| + writer| + ,\sof\sthe?\s* # "X, of (the) Y" + """ + ROLES = re.compile(roles, re.VERBOSE) + + print() + print("IEER: has_role(PER, ORG) -- raw rtuples:") + print("=" * 45) + + for file in ieer.fileids(): + for doc in ieer.parsed_docs(file): + lcon = rcon = False + if trace: + print(doc.docno) + print("=" * 15) + lcon = rcon = True + for rel in extract_rels('PER', 'ORG', doc, corpus='ieer', pattern=ROLES): + print(rtuple(rel, lcon=lcon, rcon=rcon)) + + +############################################## +### Show what's in the IEER Headlines +############################################## + + +def ieer_headlines(): + + from nltk.corpus import ieer + from nltk.tree import Tree + + print("IEER: First 20 Headlines") + print("=" * 45) + + trees = [ + (doc.docno, doc.headline) + for file in ieer.fileids() + for doc in ieer.parsed_docs(file) + ] + for tree in trees[:20]: + print() + print("%s:\n%s" % tree) + + +############################################# +## Dutch CONLL2002: take_on_role(PER, ORG +############################################# + + +def conllned(trace=1): + """ + Find the copula+'van' relation ('of') in the Dutch tagged training corpus + from CoNLL 2002. + """ + + from nltk.corpus import conll2002 + + vnv = """ + ( + is/V| # 3rd sing present and + was/V| # past forms of the verb zijn ('be') + werd/V| # and also present + wordt/V # past of worden ('become) + ) + .* # followed by anything + van/Prep # followed by van ('of') + """ + VAN = re.compile(vnv, re.VERBOSE) + + print() + print("Dutch CoNLL2002: van(PER, ORG) -- raw rtuples with context:") + print("=" * 45) + + for doc in conll2002.chunked_sents('ned.train'): + lcon = rcon = False + if trace: + lcon = rcon = True + for rel in extract_rels( + 'PER', 'ORG', doc, corpus='conll2002', pattern=VAN, window=10 + ): + print(rtuple(rel, lcon=lcon, rcon=rcon)) + + +############################################# +## Spanish CONLL2002: (PER, ORG) +############################################# + + +def conllesp(): + from nltk.corpus import conll2002 + + de = """ + .* + ( + de/SP| + del/SP + ) + """ + DE = re.compile(de, re.VERBOSE) + + print() + print("Spanish CoNLL2002: de(ORG, LOC) -- just the first 10 clauses:") + print("=" * 45) + rels = [ + rel + for doc in conll2002.chunked_sents('esp.train') + for rel in extract_rels('ORG', 'LOC', doc, corpus='conll2002', pattern=DE) + ] + for r in rels[:10]: + print(clause(r, relsym='DE')) + print() + + +def ne_chunked(): + print() + print("1500 Sentences from Penn Treebank, as processed by NLTK NE Chunker") + print("=" * 45) + ROLE = re.compile( + r'.*(chairman|president|trader|scientist|economist|analyst|partner).*' + ) + rels = [] + for i, sent in enumerate(nltk.corpus.treebank.tagged_sents()[:1500]): + sent = nltk.ne_chunk(sent) + rels = extract_rels('PER', 'ORG', sent, corpus='ace', pattern=ROLE, window=7) + for rel in rels: + print('{0:<5}{1}'.format(i, rtuple(rel))) + + +if __name__ == '__main__': + import nltk + from nltk.sem import relextract + + in_demo(trace=0) + roles_demo(trace=0) + conllned() + conllesp() + ieer_headlines() + ne_chunked() diff --git a/venv.bak/lib/python3.7/site-packages/nltk/sem/skolemize.py b/venv.bak/lib/python3.7/site-packages/nltk/sem/skolemize.py new file mode 100644 index 0000000..73eaf4b --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/sem/skolemize.py @@ -0,0 +1,148 @@ +# Natural Language Toolkit: Semantic Interpretation +# +# Author: Ewan Klein +# +# Copyright (C) 2001-2019 NLTK Project +# URL: +# For license information, see LICENSE.TXT + +from nltk.sem.logic import ( + AllExpression, + AndExpression, + ApplicationExpression, + EqualityExpression, + ExistsExpression, + IffExpression, + ImpExpression, + NegatedExpression, + OrExpression, + VariableExpression, + skolem_function, + unique_variable, +) + + +def skolemize(expression, univ_scope=None, used_variables=None): + """ + Skolemize the expression and convert to conjunctive normal form (CNF) + """ + if univ_scope is None: + univ_scope = set() + if used_variables is None: + used_variables = set() + + if isinstance(expression, AllExpression): + term = skolemize( + expression.term, + univ_scope | set([expression.variable]), + used_variables | set([expression.variable]), + ) + return term.replace( + expression.variable, + VariableExpression(unique_variable(ignore=used_variables)), + ) + elif isinstance(expression, AndExpression): + return skolemize(expression.first, univ_scope, used_variables) & skolemize( + expression.second, univ_scope, used_variables + ) + elif isinstance(expression, OrExpression): + return to_cnf( + skolemize(expression.first, univ_scope, used_variables), + skolemize(expression.second, univ_scope, used_variables), + ) + elif isinstance(expression, ImpExpression): + return to_cnf( + skolemize(-expression.first, univ_scope, used_variables), + skolemize(expression.second, univ_scope, used_variables), + ) + elif isinstance(expression, IffExpression): + return to_cnf( + skolemize(-expression.first, univ_scope, used_variables), + skolemize(expression.second, univ_scope, used_variables), + ) & to_cnf( + skolemize(expression.first, univ_scope, used_variables), + skolemize(-expression.second, univ_scope, used_variables), + ) + elif isinstance(expression, EqualityExpression): + return expression + elif isinstance(expression, NegatedExpression): + negated = expression.term + if isinstance(negated, AllExpression): + term = skolemize( + -negated.term, univ_scope, used_variables | set([negated.variable]) + ) + if univ_scope: + return term.replace(negated.variable, skolem_function(univ_scope)) + else: + skolem_constant = VariableExpression( + unique_variable(ignore=used_variables) + ) + return term.replace(negated.variable, skolem_constant) + elif isinstance(negated, AndExpression): + return to_cnf( + skolemize(-negated.first, univ_scope, used_variables), + skolemize(-negated.second, univ_scope, used_variables), + ) + elif isinstance(negated, OrExpression): + return skolemize(-negated.first, univ_scope, used_variables) & skolemize( + -negated.second, univ_scope, used_variables + ) + elif isinstance(negated, ImpExpression): + return skolemize(negated.first, univ_scope, used_variables) & skolemize( + -negated.second, univ_scope, used_variables + ) + elif isinstance(negated, IffExpression): + return to_cnf( + skolemize(-negated.first, univ_scope, used_variables), + skolemize(-negated.second, univ_scope, used_variables), + ) & to_cnf( + skolemize(negated.first, univ_scope, used_variables), + skolemize(negated.second, univ_scope, used_variables), + ) + elif isinstance(negated, EqualityExpression): + return expression + elif isinstance(negated, NegatedExpression): + return skolemize(negated.term, univ_scope, used_variables) + elif isinstance(negated, ExistsExpression): + term = skolemize( + -negated.term, + univ_scope | set([negated.variable]), + used_variables | set([negated.variable]), + ) + return term.replace( + negated.variable, + VariableExpression(unique_variable(ignore=used_variables)), + ) + elif isinstance(negated, ApplicationExpression): + return expression + else: + raise Exception('\'%s\' cannot be skolemized' % expression) + elif isinstance(expression, ExistsExpression): + term = skolemize( + expression.term, univ_scope, used_variables | set([expression.variable]) + ) + if univ_scope: + return term.replace(expression.variable, skolem_function(univ_scope)) + else: + skolem_constant = VariableExpression(unique_variable(ignore=used_variables)) + return term.replace(expression.variable, skolem_constant) + elif isinstance(expression, ApplicationExpression): + return expression + else: + raise Exception('\'%s\' cannot be skolemized' % expression) + + +def to_cnf(first, second): + """ + Convert this split disjunction to conjunctive normal form (CNF) + """ + if isinstance(first, AndExpression): + r_first = to_cnf(first.first, second) + r_second = to_cnf(first.second, second) + return r_first & r_second + elif isinstance(second, AndExpression): + r_first = to_cnf(first, second.first) + r_second = to_cnf(first, second.second) + return r_first & r_second + else: + return first | second diff --git a/venv.bak/lib/python3.7/site-packages/nltk/sem/util.py b/venv.bak/lib/python3.7/site-packages/nltk/sem/util.py new file mode 100644 index 0000000..bac3884 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/sem/util.py @@ -0,0 +1,310 @@ +# Natural Language Toolkit: Semantic Interpretation +# +# Author: Ewan Klein +# +# Copyright (C) 2001-2019 NLTK Project +# URL: +# For license information, see LICENSE.TXT + +""" +Utility functions for batch-processing sentences: parsing and +extraction of the semantic representation of the root node of the the +syntax tree, followed by evaluation of the semantic representation in +a first-order model. +""" +from __future__ import print_function, unicode_literals + +import codecs +from nltk.sem import evaluate + + +############################################################## +## Utility functions for connecting parse output to semantics +############################################################## + + +def parse_sents(inputs, grammar, trace=0): + """ + Convert input sentences into syntactic trees. + + :param inputs: sentences to be parsed + :type inputs: list(str) + :param grammar: ``FeatureGrammar`` or name of feature-based grammar + :type grammar: nltk.grammar.FeatureGrammar + :rtype: list(nltk.tree.Tree) or dict(list(str)): list(Tree) + :return: a mapping from input sentences to a list of ``Tree``s + """ + # put imports here to avoid circult dependencies + from nltk.grammar import FeatureGrammar + from nltk.parse import FeatureChartParser, load_parser + + if isinstance(grammar, FeatureGrammar): + cp = FeatureChartParser(grammar) + else: + cp = load_parser(grammar, trace=trace) + parses = [] + for sent in inputs: + tokens = sent.split() # use a tokenizer? + syntrees = list(cp.parse(tokens)) + parses.append(syntrees) + return parses + + +def root_semrep(syntree, semkey='SEM'): + """ + Find the semantic representation at the root of a tree. + + :param syntree: a parse ``Tree`` + :param semkey: the feature label to use for the root semantics in the tree + :return: the semantic representation at the root of a ``Tree`` + :rtype: sem.Expression + """ + from nltk.grammar import FeatStructNonterminal + + node = syntree.label() + assert isinstance(node, FeatStructNonterminal) + try: + return node[semkey] + except KeyError: + print(node, end=' ') + print("has no specification for the feature %s" % semkey) + raise + + +def interpret_sents(inputs, grammar, semkey='SEM', trace=0): + """ + Add the semantic representation to each syntactic parse tree + of each input sentence. + + :param inputs: a list of sentences + :type inputs: list(str) + :param grammar: ``FeatureGrammar`` or name of feature-based grammar + :type grammar: nltk.grammar.FeatureGrammar + :return: a mapping from sentences to lists of pairs (parse-tree, semantic-representations) + :rtype: list(list(tuple(nltk.tree.Tree, nltk.sem.logic.ConstantExpression))) + """ + return [ + [(syn, root_semrep(syn, semkey)) for syn in syntrees] + for syntrees in parse_sents(inputs, grammar, trace=trace) + ] + + +def evaluate_sents(inputs, grammar, model, assignment, trace=0): + """ + Add the truth-in-a-model value to each semantic representation + for each syntactic parse of each input sentences. + + :param inputs: a list of sentences + :type inputs: list(str) + :param grammar: ``FeatureGrammar`` or name of feature-based grammar + :type grammar: nltk.grammar.FeatureGrammar + :return: a mapping from sentences to lists of triples (parse-tree, semantic-representations, evaluation-in-model) + :rtype: list(list(tuple(nltk.tree.Tree, nltk.sem.logic.ConstantExpression, bool or dict(str): bool))) + """ + return [ + [ + (syn, sem, model.evaluate("%s" % sem, assignment, trace=trace)) + for (syn, sem) in interpretations + ] + for interpretations in interpret_sents(inputs, grammar) + ] + + +def demo_model0(): + global m0, g0 + # Initialize a valuation of non-logical constants.""" + v = [ + ('john', 'b1'), + ('mary', 'g1'), + ('suzie', 'g2'), + ('fido', 'd1'), + ('tess', 'd2'), + ('noosa', 'n'), + ('girl', set(['g1', 'g2'])), + ('boy', set(['b1', 'b2'])), + ('dog', set(['d1', 'd2'])), + ('bark', set(['d1', 'd2'])), + ('walk', set(['b1', 'g2', 'd1'])), + ('chase', set([('b1', 'g1'), ('b2', 'g1'), ('g1', 'd1'), ('g2', 'd2')])), + ( + 'see', + set([('b1', 'g1'), ('b2', 'd2'), ('g1', 'b1'), ('d2', 'b1'), ('g2', 'n')]), + ), + ('in', set([('b1', 'n'), ('b2', 'n'), ('d2', 'n')])), + ('with', set([('b1', 'g1'), ('g1', 'b1'), ('d1', 'b1'), ('b1', 'd1')])), + ] + # Read in the data from ``v`` + val = evaluate.Valuation(v) + # Bind ``dom`` to the ``domain`` property of ``val`` + dom = val.domain + # Initialize a model with parameters ``dom`` and ``val``. + m0 = evaluate.Model(dom, val) + # Initialize a variable assignment with parameter ``dom`` + g0 = evaluate.Assignment(dom) + + +def read_sents(filename, encoding='utf8'): + with codecs.open(filename, 'r', encoding) as fp: + sents = [l.rstrip() for l in fp] + + # get rid of blank lines + sents = [l for l in sents if len(l) > 0] + sents = [l for l in sents if not l[0] == '#'] + return sents + + +def demo_legacy_grammar(): + """ + Check that interpret_sents() is compatible with legacy grammars that use + a lowercase 'sem' feature. + + Define 'test.fcfg' to be the following + + """ + from nltk.grammar import FeatureGrammar + + g = FeatureGrammar.fromstring( + """ + % start S + S[sem=] -> 'hello' + """ + ) + print("Reading grammar: %s" % g) + print("*" * 20) + for reading in interpret_sents(['hello'], g, semkey='sem'): + syn, sem = reading[0] + print() + print("output: ", sem) + + +def demo(): + import sys + from optparse import OptionParser + + description = """ + Parse and evaluate some sentences. + """ + + opts = OptionParser(description=description) + + opts.set_defaults( + evaluate=True, + beta=True, + syntrace=0, + semtrace=0, + demo='default', + grammar='', + sentences='', + ) + + opts.add_option( + "-d", + "--demo", + dest="demo", + help="choose demo D; omit this for the default demo, or specify 'chat80'", + metavar="D", + ) + opts.add_option( + "-g", "--gram", dest="grammar", help="read in grammar G", metavar="G" + ) + opts.add_option( + "-m", + "--model", + dest="model", + help="import model M (omit '.py' suffix)", + metavar="M", + ) + opts.add_option( + "-s", + "--sentences", + dest="sentences", + help="read in a file of test sentences S", + metavar="S", + ) + opts.add_option( + "-e", + "--no-eval", + action="store_false", + dest="evaluate", + help="just do a syntactic analysis", + ) + opts.add_option( + "-b", + "--no-beta-reduction", + action="store_false", + dest="beta", + help="don't carry out beta-reduction", + ) + opts.add_option( + "-t", + "--syntrace", + action="count", + dest="syntrace", + help="set syntactic tracing on; requires '-e' option", + ) + opts.add_option( + "-T", + "--semtrace", + action="count", + dest="semtrace", + help="set semantic tracing on", + ) + + (options, args) = opts.parse_args() + + SPACER = '-' * 30 + + demo_model0() + + sents = [ + 'Fido sees a boy with Mary', + 'John sees Mary', + 'every girl chases a dog', + 'every boy chases a girl', + 'John walks with a girl in Noosa', + 'who walks', + ] + + gramfile = 'grammars/sample_grammars/sem2.fcfg' + + if options.sentences: + sentsfile = options.sentences + if options.grammar: + gramfile = options.grammar + if options.model: + exec("import %s as model" % options.model) + + if sents is None: + sents = read_sents(sentsfile) + + # Set model and assignment + model = m0 + g = g0 + + if options.evaluate: + evaluations = evaluate_sents(sents, gramfile, model, g, trace=options.semtrace) + else: + semreps = interpret_sents(sents, gramfile, trace=options.syntrace) + + for i, sent in enumerate(sents): + n = 1 + print('\nSentence: %s' % sent) + print(SPACER) + if options.evaluate: + + for (syntree, semrep, value) in evaluations[i]: + if isinstance(value, dict): + value = set(value.keys()) + print('%d: %s' % (n, semrep)) + print(value) + n += 1 + else: + + for (syntree, semrep) in semreps[i]: + print('%d: %s' % (n, semrep)) + n += 1 + + +if __name__ == "__main__": + demo() + demo_legacy_grammar() diff --git a/venv.bak/lib/python3.7/site-packages/nltk/sentiment/__init__.py b/venv.bak/lib/python3.7/site-packages/nltk/sentiment/__init__.py new file mode 100644 index 0000000..8bb7c07 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/sentiment/__init__.py @@ -0,0 +1,14 @@ +# -*- coding: utf-8 -*- +# Natural Language Toolkit: Sentiment Analysis +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Ewan Klein +# URL: +# For license information, see LICENSE.TXT + +""" +NLTK Sentiment Analysis Package + +""" +from nltk.sentiment.sentiment_analyzer import SentimentAnalyzer +from nltk.sentiment.vader import SentimentIntensityAnalyzer diff --git a/venv.bak/lib/python3.7/site-packages/nltk/sentiment/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/sentiment/__pycache__/__init__.cpython-37.pyc new file mode 100644 index 0000000..9bc760b Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/sentiment/__pycache__/__init__.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/sentiment/__pycache__/sentiment_analyzer.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/sentiment/__pycache__/sentiment_analyzer.cpython-37.pyc new file mode 100644 index 0000000..25b9c7b Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/sentiment/__pycache__/sentiment_analyzer.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/sentiment/__pycache__/util.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/sentiment/__pycache__/util.cpython-37.pyc new file mode 100644 index 0000000..c7f3f06 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/sentiment/__pycache__/util.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/sentiment/__pycache__/vader.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/sentiment/__pycache__/vader.cpython-37.pyc new file mode 100644 index 0000000..9441fe9 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/sentiment/__pycache__/vader.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/sentiment/sentiment_analyzer.py b/venv.bak/lib/python3.7/site-packages/nltk/sentiment/sentiment_analyzer.py new file mode 100644 index 0000000..4abbc5e --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/sentiment/sentiment_analyzer.py @@ -0,0 +1,248 @@ +# coding: utf-8 +# +# Natural Language Toolkit: Sentiment Analyzer +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Pierpaolo Pantone <24alsecondo@gmail.com> +# URL: +# For license information, see LICENSE.TXT + +""" +A SentimentAnalyzer is a tool to implement and facilitate Sentiment Analysis tasks +using NLTK features and classifiers, especially for teaching and demonstrative +purposes. +""" + +from __future__ import print_function +from collections import defaultdict + +from nltk.classify.util import apply_features, accuracy as eval_accuracy +from nltk.collocations import BigramCollocationFinder +from nltk.metrics import ( + BigramAssocMeasures, + precision as eval_precision, + recall as eval_recall, + f_measure as eval_f_measure, +) + +from nltk.probability import FreqDist + +from nltk.sentiment.util import save_file, timer + + +class SentimentAnalyzer(object): + """ + A Sentiment Analysis tool based on machine learning approaches. + """ + + def __init__(self, classifier=None): + self.feat_extractors = defaultdict(list) + self.classifier = classifier + + def all_words(self, documents, labeled=None): + """ + Return all words/tokens from the documents (with duplicates). + :param documents: a list of (words, label) tuples. + :param labeled: if `True`, assume that each document is represented by a + (words, label) tuple: (list(str), str). If `False`, each document is + considered as being a simple list of strings: list(str). + :rtype: list(str) + :return: A list of all words/tokens in `documents`. + """ + all_words = [] + if labeled is None: + labeled = documents and isinstance(documents[0], tuple) + if labeled == True: + for words, sentiment in documents: + all_words.extend(words) + elif labeled == False: + for words in documents: + all_words.extend(words) + return all_words + + def apply_features(self, documents, labeled=None): + """ + Apply all feature extractor functions to the documents. This is a wrapper + around `nltk.classify.util.apply_features`. + + If `labeled=False`, return featuresets as: + [feature_func(doc) for doc in documents] + If `labeled=True`, return featuresets as: + [(feature_func(tok), label) for (tok, label) in toks] + + :param documents: a list of documents. `If labeled=True`, the method expects + a list of (words, label) tuples. + :rtype: LazyMap + """ + return apply_features(self.extract_features, documents, labeled) + + def unigram_word_feats(self, words, top_n=None, min_freq=0): + """ + Return most common top_n word features. + + :param words: a list of words/tokens. + :param top_n: number of best words/tokens to use, sorted by frequency. + :rtype: list(str) + :return: A list of `top_n` words/tokens (with no duplicates) sorted by + frequency. + """ + # Stopwords are not removed + unigram_feats_freqs = FreqDist(word for word in words) + return [ + w + for w, f in unigram_feats_freqs.most_common(top_n) + if unigram_feats_freqs[w] > min_freq + ] + + def bigram_collocation_feats( + self, documents, top_n=None, min_freq=3, assoc_measure=BigramAssocMeasures.pmi + ): + """ + Return `top_n` bigram features (using `assoc_measure`). + Note that this method is based on bigram collocations measures, and not + on simple bigram frequency. + + :param documents: a list (or iterable) of tokens. + :param top_n: number of best words/tokens to use, sorted by association + measure. + :param assoc_measure: bigram association measure to use as score function. + :param min_freq: the minimum number of occurrencies of bigrams to take + into consideration. + + :return: `top_n` ngrams scored by the given association measure. + """ + finder = BigramCollocationFinder.from_documents(documents) + finder.apply_freq_filter(min_freq) + return finder.nbest(assoc_measure, top_n) + + def classify(self, instance): + """ + Classify a single instance applying the features that have already been + stored in the SentimentAnalyzer. + + :param instance: a list (or iterable) of tokens. + :return: the classification result given by applying the classifier. + """ + instance_feats = self.apply_features([instance], labeled=False) + return self.classifier.classify(instance_feats[0]) + + def add_feat_extractor(self, function, **kwargs): + """ + Add a new function to extract features from a document. This function will + be used in extract_features(). + Important: in this step our kwargs are only representing additional parameters, + and NOT the document we have to parse. The document will always be the first + parameter in the parameter list, and it will be added in the extract_features() + function. + + :param function: the extractor function to add to the list of feature extractors. + :param kwargs: additional parameters required by the `function` function. + """ + self.feat_extractors[function].append(kwargs) + + def extract_features(self, document): + """ + Apply extractor functions (and their parameters) to the present document. + We pass `document` as the first parameter of the extractor functions. + If we want to use the same extractor function multiple times, we have to + add it to the extractors with `add_feat_extractor` using multiple sets of + parameters (one for each call of the extractor function). + + :param document: the document that will be passed as argument to the + feature extractor functions. + :return: A dictionary of populated features extracted from the document. + :rtype: dict + """ + all_features = {} + for extractor in self.feat_extractors: + for param_set in self.feat_extractors[extractor]: + feats = extractor(document, **param_set) + all_features.update(feats) + return all_features + + def train(self, trainer, training_set, save_classifier=None, **kwargs): + """ + Train classifier on the training set, optionally saving the output in the + file specified by `save_classifier`. + Additional arguments depend on the specific trainer used. For example, + a MaxentClassifier can use `max_iter` parameter to specify the number + of iterations, while a NaiveBayesClassifier cannot. + + :param trainer: `train` method of a classifier. + E.g.: NaiveBayesClassifier.train + :param training_set: the training set to be passed as argument to the + classifier `train` method. + :param save_classifier: the filename of the file where the classifier + will be stored (optional). + :param kwargs: additional parameters that will be passed as arguments to + the classifier `train` function. + :return: A classifier instance trained on the training set. + :rtype: + """ + print("Training classifier") + self.classifier = trainer(training_set, **kwargs) + if save_classifier: + save_file(self.classifier, save_classifier) + + return self.classifier + + def evaluate( + self, + test_set, + classifier=None, + accuracy=True, + f_measure=True, + precision=True, + recall=True, + verbose=False, + ): + """ + Evaluate and print classifier performance on the test set. + + :param test_set: A list of (tokens, label) tuples to use as gold set. + :param classifier: a classifier instance (previously trained). + :param accuracy: if `True`, evaluate classifier accuracy. + :param f_measure: if `True`, evaluate classifier f_measure. + :param precision: if `True`, evaluate classifier precision. + :param recall: if `True`, evaluate classifier recall. + :return: evaluation results. + :rtype: dict(str): float + """ + if classifier is None: + classifier = self.classifier + print("Evaluating {0} results...".format(type(classifier).__name__)) + metrics_results = {} + if accuracy == True: + accuracy_score = eval_accuracy(classifier, test_set) + metrics_results['Accuracy'] = accuracy_score + + gold_results = defaultdict(set) + test_results = defaultdict(set) + labels = set() + for i, (feats, label) in enumerate(test_set): + labels.add(label) + gold_results[label].add(i) + observed = classifier.classify(feats) + test_results[observed].add(i) + + for label in labels: + if precision == True: + precision_score = eval_precision( + gold_results[label], test_results[label] + ) + metrics_results['Precision [{0}]'.format(label)] = precision_score + if recall == True: + recall_score = eval_recall(gold_results[label], test_results[label]) + metrics_results['Recall [{0}]'.format(label)] = recall_score + if f_measure == True: + f_measure_score = eval_f_measure( + gold_results[label], test_results[label] + ) + metrics_results['F-measure [{0}]'.format(label)] = f_measure_score + + # Print evaluation results (in alphabetical order) + if verbose == True: + for result in sorted(metrics_results): + print('{0}: {1}'.format(result, metrics_results[result])) + + return metrics_results diff --git a/venv.bak/lib/python3.7/site-packages/nltk/sentiment/util.py b/venv.bak/lib/python3.7/site-packages/nltk/sentiment/util.py new file mode 100644 index 0000000..334c7b7 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/sentiment/util.py @@ -0,0 +1,938 @@ +# coding: utf-8 +# +# Natural Language Toolkit: Sentiment Analyzer +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Pierpaolo Pantone <24alsecondo@gmail.com> +# URL: +# For license information, see LICENSE.TXT + +""" +Utility methods for Sentiment Analysis. +""" +from __future__ import division + +import codecs +import csv +import json +import pickle +import random +import re +import sys +import time +from copy import deepcopy +from itertools import tee + +import nltk +from nltk.corpus import CategorizedPlaintextCorpusReader +from nltk.data import load +from nltk.tokenize.casual import EMOTICON_RE + +# //////////////////////////////////////////////////////////// +# { Regular expressions +# //////////////////////////////////////////////////////////// + +# Regular expression for negation by Christopher Potts +NEGATION = r""" + (?: + ^(?:never|no|nothing|nowhere|noone|none|not| + havent|hasnt|hadnt|cant|couldnt|shouldnt| + wont|wouldnt|dont|doesnt|didnt|isnt|arent|aint + )$ + ) + | + n't""" + +NEGATION_RE = re.compile(NEGATION, re.VERBOSE) + +CLAUSE_PUNCT = r'^[.:;!?]$' +CLAUSE_PUNCT_RE = re.compile(CLAUSE_PUNCT) + +# Happy and sad emoticons + +HAPPY = set( + [ + ':-)', + ':)', + ';)', + ':o)', + ':]', + ':3', + ':c)', + ':>', + '=]', + '8)', + '=)', + ':}', + ':^)', + ':-D', + ':D', + '8-D', + '8D', + 'x-D', + 'xD', + 'X-D', + 'XD', + '=-D', + '=D', + '=-3', + '=3', + ':-))', + ":'-)", + ":')", + ':*', + ':^*', + '>:P', + ':-P', + ':P', + 'X-P', + 'x-p', + 'xp', + 'XP', + ':-p', + ':p', + '=p', + ':-b', + ':b', + '>:)', + '>;)', + '>:-)', + '<3', + ] +) + +SAD = set( + [ + ':L', + ':-/', + '>:/', + ':S', + '>:[', + ':@', + ':-(', + ':[', + ':-||', + '=L', + ':<', + ':-[', + ':-<', + '=\\', + '=/', + '>:(', + ':(', + '>.<', + ":'-(", + ":'(", + ':\\', + ':-c', + ':c', + ':{', + '>:\\', + ';(', + ] +) + + +def timer(method): + """ + A timer decorator to measure execution performance of methods. + """ + + def timed(*args, **kw): + start = time.time() + result = method(*args, **kw) + end = time.time() + tot_time = end - start + hours = tot_time // 3600 + mins = tot_time // 60 % 60 + # in Python 2.x round() will return a float, so we convert it to int + secs = int(round(tot_time % 60)) + if hours == 0 and mins == 0 and secs < 10: + print('[TIMER] {0}(): {:.3f} seconds'.format(method.__name__, tot_time)) + else: + print( + '[TIMER] {0}(): {1}h {2}m {3}s'.format( + method.__name__, hours, mins, secs + ) + ) + return result + + return timed + + +def pairwise(iterable): + """s -> (s0,s1), (s1,s2), (s2, s3), ...""" + a, b = tee(iterable) + next(b, None) + return zip(a, b) + + +# //////////////////////////////////////////////////////////// +# { Feature extractor functions +# //////////////////////////////////////////////////////////// +""" +Feature extractor functions are declared outside the SentimentAnalyzer class. +Users should have the possibility to create their own feature extractors +without modifying SentimentAnalyzer. +""" + + +def extract_unigram_feats(document, unigrams, handle_negation=False): + """ + Populate a dictionary of unigram features, reflecting the presence/absence in + the document of each of the tokens in `unigrams`. + + :param document: a list of words/tokens. + :param unigrams: a list of words/tokens whose presence/absence has to be + checked in `document`. + :param handle_negation: if `handle_negation == True` apply `mark_negation` + method to `document` before checking for unigram presence/absence. + :return: a dictionary of unigram features {unigram : boolean}. + + >>> words = ['ice', 'police', 'riot'] + >>> document = 'ice is melting due to global warming'.split() + >>> sorted(extract_unigram_feats(document, words).items()) + [('contains(ice)', True), ('contains(police)', False), ('contains(riot)', False)] + """ + features = {} + if handle_negation: + document = mark_negation(document) + for word in unigrams: + features['contains({0})'.format(word)] = word in set(document) + return features + + +def extract_bigram_feats(document, bigrams): + """ + Populate a dictionary of bigram features, reflecting the presence/absence in + the document of each of the tokens in `bigrams`. This extractor function only + considers contiguous bigrams obtained by `nltk.bigrams`. + + :param document: a list of words/tokens. + :param unigrams: a list of bigrams whose presence/absence has to be + checked in `document`. + :return: a dictionary of bigram features {bigram : boolean}. + + >>> bigrams = [('global', 'warming'), ('police', 'prevented'), ('love', 'you')] + >>> document = 'ice is melting due to global warming'.split() + >>> sorted(extract_bigram_feats(document, bigrams).items()) + [('contains(global - warming)', True), ('contains(love - you)', False), + ('contains(police - prevented)', False)] + """ + features = {} + for bigr in bigrams: + features['contains({0} - {1})'.format(bigr[0], bigr[1])] = bigr in nltk.bigrams( + document + ) + return features + + +# //////////////////////////////////////////////////////////// +# { Helper Functions +# //////////////////////////////////////////////////////////// + + +def mark_negation(document, double_neg_flip=False, shallow=False): + """ + Append _NEG suffix to words that appear in the scope between a negation + and a punctuation mark. + + :param document: a list of words/tokens, or a tuple (words, label). + :param shallow: if True, the method will modify the original document in place. + :param double_neg_flip: if True, double negation is considered affirmation + (we activate/deactivate negation scope everytime we find a negation). + :return: if `shallow == True` the method will modify the original document + and return it. If `shallow == False` the method will return a modified + document, leaving the original unmodified. + + >>> sent = "I didn't like this movie . It was bad .".split() + >>> mark_negation(sent) + ['I', "didn't", 'like_NEG', 'this_NEG', 'movie_NEG', '.', 'It', 'was', 'bad', '.'] + """ + if not shallow: + document = deepcopy(document) + # check if the document is labeled. If so, do not consider the label. + labeled = document and isinstance(document[0], (tuple, list)) + if labeled: + doc = document[0] + else: + doc = document + neg_scope = False + for i, word in enumerate(doc): + if NEGATION_RE.search(word): + if not neg_scope or (neg_scope and double_neg_flip): + neg_scope = not neg_scope + continue + else: + doc[i] += '_NEG' + elif neg_scope and CLAUSE_PUNCT_RE.search(word): + neg_scope = not neg_scope + elif neg_scope and not CLAUSE_PUNCT_RE.search(word): + doc[i] += '_NEG' + + return document + + +def output_markdown(filename, **kwargs): + """ + Write the output of an analysis to a file. + """ + with codecs.open(filename, 'at') as outfile: + text = '\n*** \n\n' + text += '{0} \n\n'.format(time.strftime("%d/%m/%Y, %H:%M")) + for k in sorted(kwargs): + if isinstance(kwargs[k], dict): + dictionary = kwargs[k] + text += ' - **{0}:**\n'.format(k) + for entry in sorted(dictionary): + text += ' - {0}: {1} \n'.format(entry, dictionary[entry]) + elif isinstance(kwargs[k], list): + text += ' - **{0}:**\n'.format(k) + for entry in kwargs[k]: + text += ' - {0}\n'.format(entry) + else: + text += ' - **{0}:** {1} \n'.format(k, kwargs[k]) + outfile.write(text) + + +def save_file(content, filename): + """ + Store `content` in `filename`. Can be used to store a SentimentAnalyzer. + """ + print("Saving", filename) + with codecs.open(filename, 'wb') as storage_file: + # The protocol=2 parameter is for python2 compatibility + pickle.dump(content, storage_file, protocol=2) + + +def split_train_test(all_instances, n=None): + """ + Randomly split `n` instances of the dataset into train and test sets. + + :param all_instances: a list of instances (e.g. documents) that will be split. + :param n: the number of instances to consider (in case we want to use only a + subset). + :return: two lists of instances. Train set is 8/10 of the total and test set + is 2/10 of the total. + """ + random.seed(12345) + random.shuffle(all_instances) + if not n or n > len(all_instances): + n = len(all_instances) + train_set = all_instances[: int(0.8 * n)] + test_set = all_instances[int(0.8 * n) : n] + + return train_set, test_set + + +def _show_plot(x_values, y_values, x_labels=None, y_labels=None): + try: + import matplotlib.pyplot as plt + except ImportError: + raise ImportError( + 'The plot function requires matplotlib to be installed.' + 'See http://matplotlib.org/' + ) + + plt.locator_params(axis='y', nbins=3) + axes = plt.axes() + axes.yaxis.grid() + plt.plot(x_values, y_values, 'ro', color='red') + plt.ylim(ymin=-1.2, ymax=1.2) + plt.tight_layout(pad=5) + if x_labels: + plt.xticks(x_values, x_labels, rotation='vertical') + if y_labels: + plt.yticks([-1, 0, 1], y_labels, rotation='horizontal') + # Pad margins so that markers are not clipped by the axes + plt.margins(0.2) + plt.show() + + +# //////////////////////////////////////////////////////////// +# { Parsing and conversion functions +# //////////////////////////////////////////////////////////// + + +def json2csv_preprocess( + json_file, + outfile, + fields, + encoding='utf8', + errors='replace', + gzip_compress=False, + skip_retweets=True, + skip_tongue_tweets=True, + skip_ambiguous_tweets=True, + strip_off_emoticons=True, + remove_duplicates=True, + limit=None, +): + """ + Convert json file to csv file, preprocessing each row to obtain a suitable + dataset for tweets Semantic Analysis. + + :param json_file: the original json file containing tweets. + :param outfile: the output csv filename. + :param fields: a list of fields that will be extracted from the json file and + kept in the output csv file. + :param encoding: the encoding of the files. + :param errors: the error handling strategy for the output writer. + :param gzip_compress: if True, create a compressed GZIP file. + + :param skip_retweets: if True, remove retweets. + :param skip_tongue_tweets: if True, remove tweets containing ":P" and ":-P" + emoticons. + :param skip_ambiguous_tweets: if True, remove tweets containing both happy + and sad emoticons. + :param strip_off_emoticons: if True, strip off emoticons from all tweets. + :param remove_duplicates: if True, remove tweets appearing more than once. + :param limit: an integer to set the number of tweets to convert. After the + limit is reached the conversion will stop. It can be useful to create + subsets of the original tweets json data. + """ + with codecs.open(json_file, encoding=encoding) as fp: + (writer, outf) = outf_writer_compat(outfile, encoding, errors, gzip_compress) + # write the list of fields as header + writer.writerow(fields) + + if remove_duplicates == True: + tweets_cache = [] + i = 0 + for line in fp: + tweet = json.loads(line) + row = extract_fields(tweet, fields) + try: + text = row[fields.index('text')] + # Remove retweets + if skip_retweets == True: + if re.search(r'\bRT\b', text): + continue + # Remove tweets containing ":P" and ":-P" emoticons + if skip_tongue_tweets == True: + if re.search(r'\:\-?P\b', text): + continue + # Remove tweets containing both happy and sad emoticons + if skip_ambiguous_tweets == True: + all_emoticons = EMOTICON_RE.findall(text) + if all_emoticons: + if (set(all_emoticons) & HAPPY) and (set(all_emoticons) & SAD): + continue + # Strip off emoticons from all tweets + if strip_off_emoticons == True: + row[fields.index('text')] = re.sub( + r'(?!\n)\s+', ' ', EMOTICON_RE.sub('', text) + ) + # Remove duplicate tweets + if remove_duplicates == True: + if row[fields.index('text')] in tweets_cache: + continue + else: + tweets_cache.append(row[fields.index('text')]) + except ValueError: + pass + writer.writerow(row) + i += 1 + if limit and i >= limit: + break + outf.close() + + +def parse_tweets_set( + filename, label, word_tokenizer=None, sent_tokenizer=None, skip_header=True +): + """ + Parse csv file containing tweets and output data a list of (text, label) tuples. + + :param filename: the input csv filename. + :param label: the label to be appended to each tweet contained in the csv file. + :param word_tokenizer: the tokenizer instance that will be used to tokenize + each sentence into tokens (e.g. WordPunctTokenizer() or BlanklineTokenizer()). + If no word_tokenizer is specified, tweets will not be tokenized. + :param sent_tokenizer: the tokenizer that will be used to split each tweet into + sentences. + :param skip_header: if True, skip the first line of the csv file (which usually + contains headers). + + :return: a list of (text, label) tuples. + """ + tweets = [] + if not sent_tokenizer: + sent_tokenizer = load('tokenizers/punkt/english.pickle') + + # If we use Python3.x we can proceed using the 'rt' flag + if sys.version_info[0] == 3: + with codecs.open(filename, 'rt') as csvfile: + reader = csv.reader(csvfile) + if skip_header == True: + next(reader, None) # skip the header + i = 0 + for tweet_id, text in reader: + # text = text[1] + i += 1 + sys.stdout.write('Loaded {0} tweets\r'.format(i)) + # Apply sentence and word tokenizer to text + if word_tokenizer: + tweet = [ + w + for sent in sent_tokenizer.tokenize(text) + for w in word_tokenizer.tokenize(sent) + ] + else: + tweet = text + tweets.append((tweet, label)) + # If we use Python2.x we need to handle encoding problems + elif sys.version_info[0] < 3: + with codecs.open(filename) as csvfile: + reader = csv.reader(csvfile) + if skip_header == True: + next(reader, None) # skip the header + i = 0 + for row in reader: + unicode_row = [x.decode('utf8') for x in row] + text = unicode_row[1] + i += 1 + sys.stdout.write('Loaded {0} tweets\r'.format(i)) + # Apply sentence and word tokenizer to text + if word_tokenizer: + tweet = [ + w.encode('utf8') + for sent in sent_tokenizer.tokenize(text) + for w in word_tokenizer.tokenize(sent) + ] + else: + tweet = text + tweets.append((tweet, label)) + print("Loaded {0} tweets".format(i)) + return tweets + + +# //////////////////////////////////////////////////////////// +# { Demos +# //////////////////////////////////////////////////////////// + + +def demo_tweets(trainer, n_instances=None, output=None): + """ + Train and test Naive Bayes classifier on 10000 tweets, tokenized using + TweetTokenizer. + Features are composed of: + - 1000 most frequent unigrams + - 100 top bigrams (using BigramAssocMeasures.pmi) + + :param trainer: `train` method of a classifier. + :param n_instances: the number of total tweets that have to be used for + training and testing. Tweets will be equally split between positive and + negative. + :param output: the output file where results have to be reported. + """ + from nltk.tokenize import TweetTokenizer + from nltk.sentiment import SentimentAnalyzer + from nltk.corpus import twitter_samples, stopwords + + # Different customizations for the TweetTokenizer + tokenizer = TweetTokenizer(preserve_case=False) + # tokenizer = TweetTokenizer(preserve_case=True, strip_handles=True) + # tokenizer = TweetTokenizer(reduce_len=True, strip_handles=True) + + if n_instances is not None: + n_instances = int(n_instances / 2) + + fields = ['id', 'text'] + positive_json = twitter_samples.abspath("positive_tweets.json") + positive_csv = 'positive_tweets.csv' + json2csv_preprocess(positive_json, positive_csv, fields, limit=n_instances) + + negative_json = twitter_samples.abspath("negative_tweets.json") + negative_csv = 'negative_tweets.csv' + json2csv_preprocess(negative_json, negative_csv, fields, limit=n_instances) + + neg_docs = parse_tweets_set(negative_csv, label='neg', word_tokenizer=tokenizer) + pos_docs = parse_tweets_set(positive_csv, label='pos', word_tokenizer=tokenizer) + + # We separately split subjective and objective instances to keep a balanced + # uniform class distribution in both train and test sets. + train_pos_docs, test_pos_docs = split_train_test(pos_docs) + train_neg_docs, test_neg_docs = split_train_test(neg_docs) + + training_tweets = train_pos_docs + train_neg_docs + testing_tweets = test_pos_docs + test_neg_docs + + sentim_analyzer = SentimentAnalyzer() + # stopwords = stopwords.words('english') + # all_words = [word for word in sentim_analyzer.all_words(training_tweets) if word.lower() not in stopwords] + all_words = [word for word in sentim_analyzer.all_words(training_tweets)] + + # Add simple unigram word features + unigram_feats = sentim_analyzer.unigram_word_feats(all_words, top_n=1000) + sentim_analyzer.add_feat_extractor(extract_unigram_feats, unigrams=unigram_feats) + + # Add bigram collocation features + bigram_collocs_feats = sentim_analyzer.bigram_collocation_feats( + [tweet[0] for tweet in training_tweets], top_n=100, min_freq=12 + ) + sentim_analyzer.add_feat_extractor( + extract_bigram_feats, bigrams=bigram_collocs_feats + ) + + training_set = sentim_analyzer.apply_features(training_tweets) + test_set = sentim_analyzer.apply_features(testing_tweets) + + classifier = sentim_analyzer.train(trainer, training_set) + # classifier = sentim_analyzer.train(trainer, training_set, max_iter=4) + try: + classifier.show_most_informative_features() + except AttributeError: + print( + 'Your classifier does not provide a show_most_informative_features() method.' + ) + results = sentim_analyzer.evaluate(test_set) + + if output: + extr = [f.__name__ for f in sentim_analyzer.feat_extractors] + output_markdown( + output, + Dataset='labeled_tweets', + Classifier=type(classifier).__name__, + Tokenizer=tokenizer.__class__.__name__, + Feats=extr, + Results=results, + Instances=n_instances, + ) + + +def demo_movie_reviews(trainer, n_instances=None, output=None): + """ + Train classifier on all instances of the Movie Reviews dataset. + The corpus has been preprocessed using the default sentence tokenizer and + WordPunctTokenizer. + Features are composed of: + - most frequent unigrams + + :param trainer: `train` method of a classifier. + :param n_instances: the number of total reviews that have to be used for + training and testing. Reviews will be equally split between positive and + negative. + :param output: the output file where results have to be reported. + """ + from nltk.corpus import movie_reviews + from nltk.sentiment import SentimentAnalyzer + + if n_instances is not None: + n_instances = int(n_instances / 2) + + pos_docs = [ + (list(movie_reviews.words(pos_id)), 'pos') + for pos_id in movie_reviews.fileids('pos')[:n_instances] + ] + neg_docs = [ + (list(movie_reviews.words(neg_id)), 'neg') + for neg_id in movie_reviews.fileids('neg')[:n_instances] + ] + # We separately split positive and negative instances to keep a balanced + # uniform class distribution in both train and test sets. + train_pos_docs, test_pos_docs = split_train_test(pos_docs) + train_neg_docs, test_neg_docs = split_train_test(neg_docs) + + training_docs = train_pos_docs + train_neg_docs + testing_docs = test_pos_docs + test_neg_docs + + sentim_analyzer = SentimentAnalyzer() + all_words = sentim_analyzer.all_words(training_docs) + + # Add simple unigram word features + unigram_feats = sentim_analyzer.unigram_word_feats(all_words, min_freq=4) + sentim_analyzer.add_feat_extractor(extract_unigram_feats, unigrams=unigram_feats) + # Apply features to obtain a feature-value representation of our datasets + training_set = sentim_analyzer.apply_features(training_docs) + test_set = sentim_analyzer.apply_features(testing_docs) + + classifier = sentim_analyzer.train(trainer, training_set) + try: + classifier.show_most_informative_features() + except AttributeError: + print( + 'Your classifier does not provide a show_most_informative_features() method.' + ) + results = sentim_analyzer.evaluate(test_set) + + if output: + extr = [f.__name__ for f in sentim_analyzer.feat_extractors] + output_markdown( + output, + Dataset='Movie_reviews', + Classifier=type(classifier).__name__, + Tokenizer='WordPunctTokenizer', + Feats=extr, + Results=results, + Instances=n_instances, + ) + + +def demo_subjectivity(trainer, save_analyzer=False, n_instances=None, output=None): + """ + Train and test a classifier on instances of the Subjective Dataset by Pang and + Lee. The dataset is made of 5000 subjective and 5000 objective sentences. + All tokens (words and punctuation marks) are separated by a whitespace, so + we use the basic WhitespaceTokenizer to parse the data. + + :param trainer: `train` method of a classifier. + :param save_analyzer: if `True`, store the SentimentAnalyzer in a pickle file. + :param n_instances: the number of total sentences that have to be used for + training and testing. Sentences will be equally split between positive + and negative. + :param output: the output file where results have to be reported. + """ + from nltk.sentiment import SentimentAnalyzer + from nltk.corpus import subjectivity + + if n_instances is not None: + n_instances = int(n_instances / 2) + + subj_docs = [ + (sent, 'subj') for sent in subjectivity.sents(categories='subj')[:n_instances] + ] + obj_docs = [ + (sent, 'obj') for sent in subjectivity.sents(categories='obj')[:n_instances] + ] + + # We separately split subjective and objective instances to keep a balanced + # uniform class distribution in both train and test sets. + train_subj_docs, test_subj_docs = split_train_test(subj_docs) + train_obj_docs, test_obj_docs = split_train_test(obj_docs) + + training_docs = train_subj_docs + train_obj_docs + testing_docs = test_subj_docs + test_obj_docs + + sentim_analyzer = SentimentAnalyzer() + all_words_neg = sentim_analyzer.all_words( + [mark_negation(doc) for doc in training_docs] + ) + + # Add simple unigram word features handling negation + unigram_feats = sentim_analyzer.unigram_word_feats(all_words_neg, min_freq=4) + sentim_analyzer.add_feat_extractor(extract_unigram_feats, unigrams=unigram_feats) + + # Apply features to obtain a feature-value representation of our datasets + training_set = sentim_analyzer.apply_features(training_docs) + test_set = sentim_analyzer.apply_features(testing_docs) + + classifier = sentim_analyzer.train(trainer, training_set) + try: + classifier.show_most_informative_features() + except AttributeError: + print( + 'Your classifier does not provide a show_most_informative_features() method.' + ) + results = sentim_analyzer.evaluate(test_set) + + if save_analyzer == True: + save_file(sentim_analyzer, 'sa_subjectivity.pickle') + + if output: + extr = [f.__name__ for f in sentim_analyzer.feat_extractors] + output_markdown( + output, + Dataset='subjectivity', + Classifier=type(classifier).__name__, + Tokenizer='WhitespaceTokenizer', + Feats=extr, + Instances=n_instances, + Results=results, + ) + + return sentim_analyzer + + +def demo_sent_subjectivity(text): + """ + Classify a single sentence as subjective or objective using a stored + SentimentAnalyzer. + + :param text: a sentence whose subjectivity has to be classified. + """ + from nltk.classify import NaiveBayesClassifier + from nltk.tokenize import regexp + + word_tokenizer = regexp.WhitespaceTokenizer() + try: + sentim_analyzer = load('sa_subjectivity.pickle') + except LookupError: + print('Cannot find the sentiment analyzer you want to load.') + print('Training a new one using NaiveBayesClassifier.') + sentim_analyzer = demo_subjectivity(NaiveBayesClassifier.train, True) + + # Tokenize and convert to lower case + tokenized_text = [word.lower() for word in word_tokenizer.tokenize(text)] + print(sentim_analyzer.classify(tokenized_text)) + + +def demo_liu_hu_lexicon(sentence, plot=False): + """ + Basic example of sentiment classification using Liu and Hu opinion lexicon. + This function simply counts the number of positive, negative and neutral words + in the sentence and classifies it depending on which polarity is more represented. + Words that do not appear in the lexicon are considered as neutral. + + :param sentence: a sentence whose polarity has to be classified. + :param plot: if True, plot a visual representation of the sentence polarity. + """ + from nltk.corpus import opinion_lexicon + from nltk.tokenize import treebank + + tokenizer = treebank.TreebankWordTokenizer() + pos_words = 0 + neg_words = 0 + tokenized_sent = [word.lower() for word in tokenizer.tokenize(sentence)] + + x = list(range(len(tokenized_sent))) # x axis for the plot + y = [] + + for word in tokenized_sent: + if word in opinion_lexicon.positive(): + pos_words += 1 + y.append(1) # positive + elif word in opinion_lexicon.negative(): + neg_words += 1 + y.append(-1) # negative + else: + y.append(0) # neutral + + if pos_words > neg_words: + print('Positive') + elif pos_words < neg_words: + print('Negative') + elif pos_words == neg_words: + print('Neutral') + + if plot == True: + _show_plot( + x, y, x_labels=tokenized_sent, y_labels=['Negative', 'Neutral', 'Positive'] + ) + + +def demo_vader_instance(text): + """ + Output polarity scores for a text using Vader approach. + + :param text: a text whose polarity has to be evaluated. + """ + from nltk.sentiment import SentimentIntensityAnalyzer + + vader_analyzer = SentimentIntensityAnalyzer() + print(vader_analyzer.polarity_scores(text)) + + +def demo_vader_tweets(n_instances=None, output=None): + """ + Classify 10000 positive and negative tweets using Vader approach. + + :param n_instances: the number of total tweets that have to be classified. + :param output: the output file where results have to be reported. + """ + from collections import defaultdict + from nltk.corpus import twitter_samples + from nltk.sentiment import SentimentIntensityAnalyzer + from nltk.metrics import ( + accuracy as eval_accuracy, + precision as eval_precision, + recall as eval_recall, + f_measure as eval_f_measure, + ) + + if n_instances is not None: + n_instances = int(n_instances / 2) + + fields = ['id', 'text'] + positive_json = twitter_samples.abspath("positive_tweets.json") + positive_csv = 'positive_tweets.csv' + json2csv_preprocess( + positive_json, + positive_csv, + fields, + strip_off_emoticons=False, + limit=n_instances, + ) + + negative_json = twitter_samples.abspath("negative_tweets.json") + negative_csv = 'negative_tweets.csv' + json2csv_preprocess( + negative_json, + negative_csv, + fields, + strip_off_emoticons=False, + limit=n_instances, + ) + + pos_docs = parse_tweets_set(positive_csv, label='pos') + neg_docs = parse_tweets_set(negative_csv, label='neg') + + # We separately split subjective and objective instances to keep a balanced + # uniform class distribution in both train and test sets. + train_pos_docs, test_pos_docs = split_train_test(pos_docs) + train_neg_docs, test_neg_docs = split_train_test(neg_docs) + + training_tweets = train_pos_docs + train_neg_docs + testing_tweets = test_pos_docs + test_neg_docs + + vader_analyzer = SentimentIntensityAnalyzer() + + gold_results = defaultdict(set) + test_results = defaultdict(set) + acc_gold_results = [] + acc_test_results = [] + labels = set() + num = 0 + for i, (text, label) in enumerate(testing_tweets): + labels.add(label) + gold_results[label].add(i) + acc_gold_results.append(label) + score = vader_analyzer.polarity_scores(text)['compound'] + if score > 0: + observed = 'pos' + else: + observed = 'neg' + num += 1 + acc_test_results.append(observed) + test_results[observed].add(i) + metrics_results = {} + for label in labels: + accuracy_score = eval_accuracy(acc_gold_results, acc_test_results) + metrics_results['Accuracy'] = accuracy_score + precision_score = eval_precision(gold_results[label], test_results[label]) + metrics_results['Precision [{0}]'.format(label)] = precision_score + recall_score = eval_recall(gold_results[label], test_results[label]) + metrics_results['Recall [{0}]'.format(label)] = recall_score + f_measure_score = eval_f_measure(gold_results[label], test_results[label]) + metrics_results['F-measure [{0}]'.format(label)] = f_measure_score + + for result in sorted(metrics_results): + print('{0}: {1}'.format(result, metrics_results[result])) + + if output: + output_markdown( + output, + Approach='Vader', + Dataset='labeled_tweets', + Instances=n_instances, + Results=metrics_results, + ) + + +if __name__ == '__main__': + from nltk.classify import NaiveBayesClassifier, MaxentClassifier + from nltk.classify.scikitlearn import SklearnClassifier + from sklearn.svm import LinearSVC + from nltk.twitter.common import outf_writer_compat, extract_fields + + naive_bayes = NaiveBayesClassifier.train + svm = SklearnClassifier(LinearSVC()).train + maxent = MaxentClassifier.train + + demo_tweets(naive_bayes) + # demo_movie_reviews(svm) + # demo_subjectivity(svm) + # demo_sent_subjectivity("she's an artist , but hasn't picked up a brush in a year . ") + # demo_liu_hu_lexicon("This movie was actually neither that funny, nor super witty.", plot=True) + # demo_vader_instance("This movie was actually neither that funny, nor super witty.") + # demo_vader_tweets() diff --git a/venv.bak/lib/python3.7/site-packages/nltk/sentiment/vader.py b/venv.bak/lib/python3.7/site-packages/nltk/sentiment/vader.py new file mode 100644 index 0000000..da9fab7 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/sentiment/vader.py @@ -0,0 +1,626 @@ +# coding: utf-8 +# Natural Language Toolkit: vader +# +# Copyright (C) 2001-2019 NLTK Project +# Author: C.J. Hutto +# Ewan Klein (modifications) +# Pierpaolo Pantone <24alsecondo@gmail.com> (modifications) +# George Berry (modifications) +# URL: +# For license information, see LICENSE.TXT +# +# Modifications to the original VADER code have been made in order to +# integrate it into NLTK. These have involved changes to +# ensure Python 3 compatibility, and refactoring to achieve greater modularity. + +""" +If you use the VADER sentiment analysis tools, please cite: + +Hutto, C.J. & Gilbert, E.E. (2014). VADER: A Parsimonious Rule-based Model for +Sentiment Analysis of Social Media Text. Eighth International Conference on +Weblogs and Social Media (ICWSM-14). Ann Arbor, MI, June 2014. +""" + +import math +import re +import string +from itertools import product +import nltk.data +from .util import pairwise + +##Constants## + +# (empirically derived mean sentiment intensity rating increase for booster words) +B_INCR = 0.293 +B_DECR = -0.293 + +# (empirically derived mean sentiment intensity rating increase for using +# ALLCAPs to emphasize a word) +C_INCR = 0.733 + +N_SCALAR = -0.74 + +# for removing punctuation +REGEX_REMOVE_PUNCTUATION = re.compile('[{0}]'.format(re.escape(string.punctuation))) + +PUNC_LIST = [ + ".", + "!", + "?", + ",", + ";", + ":", + "-", + "'", + "\"", + "!!", + "!!!", + "??", + "???", + "?!?", + "!?!", + "?!?!", + "!?!?", +] +NEGATE = { + "aint", + "arent", + "cannot", + "cant", + "couldnt", + "darent", + "didnt", + "doesnt", + "ain't", + "aren't", + "can't", + "couldn't", + "daren't", + "didn't", + "doesn't", + "dont", + "hadnt", + "hasnt", + "havent", + "isnt", + "mightnt", + "mustnt", + "neither", + "don't", + "hadn't", + "hasn't", + "haven't", + "isn't", + "mightn't", + "mustn't", + "neednt", + "needn't", + "never", + "none", + "nope", + "nor", + "not", + "nothing", + "nowhere", + "oughtnt", + "shant", + "shouldnt", + "uhuh", + "wasnt", + "werent", + "oughtn't", + "shan't", + "shouldn't", + "uh-uh", + "wasn't", + "weren't", + "without", + "wont", + "wouldnt", + "won't", + "wouldn't", + "rarely", + "seldom", + "despite", +} + +# booster/dampener 'intensifiers' or 'degree adverbs' +# http://en.wiktionary.org/wiki/Category:English_degree_adverbs + +BOOSTER_DICT = { + "absolutely": B_INCR, + "amazingly": B_INCR, + "awfully": B_INCR, + "completely": B_INCR, + "considerably": B_INCR, + "decidedly": B_INCR, + "deeply": B_INCR, + "effing": B_INCR, + "enormously": B_INCR, + "entirely": B_INCR, + "especially": B_INCR, + "exceptionally": B_INCR, + "extremely": B_INCR, + "fabulously": B_INCR, + "flipping": B_INCR, + "flippin": B_INCR, + "fricking": B_INCR, + "frickin": B_INCR, + "frigging": B_INCR, + "friggin": B_INCR, + "fully": B_INCR, + "fucking": B_INCR, + "greatly": B_INCR, + "hella": B_INCR, + "highly": B_INCR, + "hugely": B_INCR, + "incredibly": B_INCR, + "intensely": B_INCR, + "majorly": B_INCR, + "more": B_INCR, + "most": B_INCR, + "particularly": B_INCR, + "purely": B_INCR, + "quite": B_INCR, + "really": B_INCR, + "remarkably": B_INCR, + "so": B_INCR, + "substantially": B_INCR, + "thoroughly": B_INCR, + "totally": B_INCR, + "tremendously": B_INCR, + "uber": B_INCR, + "unbelievably": B_INCR, + "unusually": B_INCR, + "utterly": B_INCR, + "very": B_INCR, + "almost": B_DECR, + "barely": B_DECR, + "hardly": B_DECR, + "just enough": B_DECR, + "kind of": B_DECR, + "kinda": B_DECR, + "kindof": B_DECR, + "kind-of": B_DECR, + "less": B_DECR, + "little": B_DECR, + "marginally": B_DECR, + "occasionally": B_DECR, + "partly": B_DECR, + "scarcely": B_DECR, + "slightly": B_DECR, + "somewhat": B_DECR, + "sort of": B_DECR, + "sorta": B_DECR, + "sortof": B_DECR, + "sort-of": B_DECR, +} + +# check for special case idioms using a sentiment-laden keyword known to SAGE +SPECIAL_CASE_IDIOMS = { + "the shit": 3, + "the bomb": 3, + "bad ass": 1.5, + "yeah right": -2, + "cut the mustard": 2, + "kiss of death": -1.5, + "hand to mouth": -2, +} + + +##Static methods## + + +def negated(input_words, include_nt=True): + """ + Determine if input contains negation words + """ + neg_words = NEGATE + if any(word.lower() in neg_words for word in input_words): + return True + if include_nt: + if any("n't" in word.lower() for word in input_words): + return True + for first, second in pairwise(input_words): + if second.lower() == "least" and first.lower() != 'at': + return True + return False + + +def normalize(score, alpha=15): + """ + Normalize the score to be between -1 and 1 using an alpha that + approximates the max expected value + """ + norm_score = score / math.sqrt((score * score) + alpha) + return norm_score + + +def allcap_differential(words): + """ + Check whether just some words in the input are ALL CAPS + + :param list words: The words to inspect + :returns: `True` if some but not all items in `words` are ALL CAPS + """ + is_different = False + allcap_words = 0 + for word in words: + if word.isupper(): + allcap_words += 1 + cap_differential = len(words) - allcap_words + if 0 < cap_differential < len(words): + is_different = True + return is_different + + +def scalar_inc_dec(word, valence, is_cap_diff): + """ + Check if the preceding words increase, decrease, or negate/nullify the + valence + """ + scalar = 0.0 + word_lower = word.lower() + if word_lower in BOOSTER_DICT: + scalar = BOOSTER_DICT[word_lower] + if valence < 0: + scalar *= -1 + # check if booster/dampener word is in ALLCAPS (while others aren't) + if word.isupper() and is_cap_diff: + if valence > 0: + scalar += C_INCR + else: + scalar -= C_INCR + return scalar + + +class SentiText(object): + """ + Identify sentiment-relevant string-level properties of input text. + """ + + def __init__(self, text): + if not isinstance(text, str): + text = str(text.encode('utf-8')) + self.text = text + self.words_and_emoticons = self._words_and_emoticons() + # doesn't separate words from\ + # adjacent punctuation (keeps emoticons & contractions) + self.is_cap_diff = allcap_differential(self.words_and_emoticons) + + def _words_plus_punc(self): + """ + Returns mapping of form: + { + 'cat,': 'cat', + ',cat': 'cat', + } + """ + no_punc_text = REGEX_REMOVE_PUNCTUATION.sub('', self.text) + # removes punctuation (but loses emoticons & contractions) + words_only = no_punc_text.split() + # remove singletons + words_only = set(w for w in words_only if len(w) > 1) + # the product gives ('cat', ',') and (',', 'cat') + punc_before = {''.join(p): p[1] for p in product(PUNC_LIST, words_only)} + punc_after = {''.join(p): p[0] for p in product(words_only, PUNC_LIST)} + words_punc_dict = punc_before + words_punc_dict.update(punc_after) + return words_punc_dict + + def _words_and_emoticons(self): + """ + Removes leading and trailing puncutation + Leaves contractions and most emoticons + Does not preserve punc-plus-letter emoticons (e.g. :D) + """ + wes = self.text.split() + words_punc_dict = self._words_plus_punc() + wes = [we for we in wes if len(we) > 1] + for i, we in enumerate(wes): + if we in words_punc_dict: + wes[i] = words_punc_dict[we] + return wes + + +class SentimentIntensityAnalyzer(object): + """ + Give a sentiment intensity score to sentences. + """ + + def __init__( + self, lexicon_file="sentiment/vader_lexicon.zip/vader_lexicon/vader_lexicon.txt" + ): + self.lexicon_file = nltk.data.load(lexicon_file) + self.lexicon = self.make_lex_dict() + + def make_lex_dict(self): + """ + Convert lexicon file to a dictionary + """ + lex_dict = {} + for line in self.lexicon_file.split('\n'): + (word, measure) = line.strip().split('\t')[0:2] + lex_dict[word] = float(measure) + return lex_dict + + def polarity_scores(self, text): + """ + Return a float for sentiment strength based on the input text. + Positive values are positive valence, negative value are negative + valence. + """ + sentitext = SentiText(text) + # text, words_and_emoticons, is_cap_diff = self.preprocess(text) + + sentiments = [] + words_and_emoticons = sentitext.words_and_emoticons + for item in words_and_emoticons: + valence = 0 + i = words_and_emoticons.index(item) + if ( + i < len(words_and_emoticons) - 1 + and item.lower() == "kind" + and words_and_emoticons[i + 1].lower() == "of" + ) or item.lower() in BOOSTER_DICT: + sentiments.append(valence) + continue + + sentiments = self.sentiment_valence(valence, sentitext, item, i, sentiments) + + sentiments = self._but_check(words_and_emoticons, sentiments) + + return self.score_valence(sentiments, text) + + def sentiment_valence(self, valence, sentitext, item, i, sentiments): + is_cap_diff = sentitext.is_cap_diff + words_and_emoticons = sentitext.words_and_emoticons + item_lowercase = item.lower() + if item_lowercase in self.lexicon: + # get the sentiment valence + valence = self.lexicon[item_lowercase] + + # check if sentiment laden word is in ALL CAPS (while others aren't) + if item.isupper() and is_cap_diff: + if valence > 0: + valence += C_INCR + else: + valence -= C_INCR + + for start_i in range(0, 3): + if ( + i > start_i + and words_and_emoticons[i - (start_i + 1)].lower() + not in self.lexicon + ): + # dampen the scalar modifier of preceding words and emoticons + # (excluding the ones that immediately preceed the item) based + # on their distance from the current item. + s = scalar_inc_dec( + words_and_emoticons[i - (start_i + 1)], valence, is_cap_diff + ) + if start_i == 1 and s != 0: + s = s * 0.95 + if start_i == 2 and s != 0: + s = s * 0.9 + valence = valence + s + valence = self._never_check( + valence, words_and_emoticons, start_i, i + ) + if start_i == 2: + valence = self._idioms_check(valence, words_and_emoticons, i) + + # future work: consider other sentiment-laden idioms + # other_idioms = + # {"back handed": -2, "blow smoke": -2, "blowing smoke": -2, + # "upper hand": 1, "break a leg": 2, + # "cooking with gas": 2, "in the black": 2, "in the red": -2, + # "on the ball": 2,"under the weather": -2} + + valence = self._least_check(valence, words_and_emoticons, i) + + sentiments.append(valence) + return sentiments + + def _least_check(self, valence, words_and_emoticons, i): + # check for negation case using "least" + if ( + i > 1 + and words_and_emoticons[i - 1].lower() not in self.lexicon + and words_and_emoticons[i - 1].lower() == "least" + ): + if ( + words_and_emoticons[i - 2].lower() != "at" + and words_and_emoticons[i - 2].lower() != "very" + ): + valence = valence * N_SCALAR + elif ( + i > 0 + and words_and_emoticons[i - 1].lower() not in self.lexicon + and words_and_emoticons[i - 1].lower() == "least" + ): + valence = valence * N_SCALAR + return valence + + def _but_check(self, words_and_emoticons, sentiments): + # check for modification in sentiment due to contrastive conjunction 'but' + if 'but' in words_and_emoticons or 'BUT' in words_and_emoticons: + try: + bi = words_and_emoticons.index('but') + except ValueError: + bi = words_and_emoticons.index('BUT') + for sentiment in sentiments: + si = sentiments.index(sentiment) + if si < bi: + sentiments.pop(si) + sentiments.insert(si, sentiment * 0.5) + elif si > bi: + sentiments.pop(si) + sentiments.insert(si, sentiment * 1.5) + return sentiments + + def _idioms_check(self, valence, words_and_emoticons, i): + onezero = "{0} {1}".format(words_and_emoticons[i - 1], words_and_emoticons[i]) + + twoonezero = "{0} {1} {2}".format( + words_and_emoticons[i - 2], + words_and_emoticons[i - 1], + words_and_emoticons[i], + ) + + twoone = "{0} {1}".format( + words_and_emoticons[i - 2], words_and_emoticons[i - 1] + ) + + threetwoone = "{0} {1} {2}".format( + words_and_emoticons[i - 3], + words_and_emoticons[i - 2], + words_and_emoticons[i - 1], + ) + + threetwo = "{0} {1}".format( + words_and_emoticons[i - 3], words_and_emoticons[i - 2] + ) + + sequences = [onezero, twoonezero, twoone, threetwoone, threetwo] + + for seq in sequences: + if seq in SPECIAL_CASE_IDIOMS: + valence = SPECIAL_CASE_IDIOMS[seq] + break + + if len(words_and_emoticons) - 1 > i: + zeroone = "{0} {1}".format( + words_and_emoticons[i], words_and_emoticons[i + 1] + ) + if zeroone in SPECIAL_CASE_IDIOMS: + valence = SPECIAL_CASE_IDIOMS[zeroone] + if len(words_and_emoticons) - 1 > i + 1: + zeroonetwo = "{0} {1} {2}".format( + words_and_emoticons[i], + words_and_emoticons[i + 1], + words_and_emoticons[i + 2], + ) + if zeroonetwo in SPECIAL_CASE_IDIOMS: + valence = SPECIAL_CASE_IDIOMS[zeroonetwo] + + # check for booster/dampener bi-grams such as 'sort of' or 'kind of' + if threetwo in BOOSTER_DICT or twoone in BOOSTER_DICT: + valence = valence + B_DECR + return valence + + def _never_check(self, valence, words_and_emoticons, start_i, i): + if start_i == 0: + if negated([words_and_emoticons[i - 1]]): + valence = valence * N_SCALAR + if start_i == 1: + if words_and_emoticons[i - 2] == "never" and ( + words_and_emoticons[i - 1] == "so" + or words_and_emoticons[i - 1] == "this" + ): + valence = valence * 1.5 + elif negated([words_and_emoticons[i - (start_i + 1)]]): + valence = valence * N_SCALAR + if start_i == 2: + if ( + words_and_emoticons[i - 3] == "never" + and ( + words_and_emoticons[i - 2] == "so" + or words_and_emoticons[i - 2] == "this" + ) + or ( + words_and_emoticons[i - 1] == "so" + or words_and_emoticons[i - 1] == "this" + ) + ): + valence = valence * 1.25 + elif negated([words_and_emoticons[i - (start_i + 1)]]): + valence = valence * N_SCALAR + return valence + + def _punctuation_emphasis(self, sum_s, text): + # add emphasis from exclamation points and question marks + ep_amplifier = self._amplify_ep(text) + qm_amplifier = self._amplify_qm(text) + punct_emph_amplifier = ep_amplifier + qm_amplifier + return punct_emph_amplifier + + def _amplify_ep(self, text): + # check for added emphasis resulting from exclamation points (up to 4 of them) + ep_count = text.count("!") + if ep_count > 4: + ep_count = 4 + # (empirically derived mean sentiment intensity rating increase for + # exclamation points) + ep_amplifier = ep_count * 0.292 + return ep_amplifier + + def _amplify_qm(self, text): + # check for added emphasis resulting from question marks (2 or 3+) + qm_count = text.count("?") + qm_amplifier = 0 + if qm_count > 1: + if qm_count <= 3: + # (empirically derived mean sentiment intensity rating increase for + # question marks) + qm_amplifier = qm_count * 0.18 + else: + qm_amplifier = 0.96 + return qm_amplifier + + def _sift_sentiment_scores(self, sentiments): + # want separate positive versus negative sentiment scores + pos_sum = 0.0 + neg_sum = 0.0 + neu_count = 0 + for sentiment_score in sentiments: + if sentiment_score > 0: + pos_sum += ( + float(sentiment_score) + 1 + ) # compensates for neutral words that are counted as 1 + if sentiment_score < 0: + neg_sum += ( + float(sentiment_score) - 1 + ) # when used with math.fabs(), compensates for neutrals + if sentiment_score == 0: + neu_count += 1 + return pos_sum, neg_sum, neu_count + + def score_valence(self, sentiments, text): + if sentiments: + sum_s = float(sum(sentiments)) + # compute and add emphasis from punctuation in text + punct_emph_amplifier = self._punctuation_emphasis(sum_s, text) + if sum_s > 0: + sum_s += punct_emph_amplifier + elif sum_s < 0: + sum_s -= punct_emph_amplifier + + compound = normalize(sum_s) + # discriminate between positive, negative and neutral sentiment scores + pos_sum, neg_sum, neu_count = self._sift_sentiment_scores(sentiments) + + if pos_sum > math.fabs(neg_sum): + pos_sum += punct_emph_amplifier + elif pos_sum < math.fabs(neg_sum): + neg_sum -= punct_emph_amplifier + + total = pos_sum + math.fabs(neg_sum) + neu_count + pos = math.fabs(pos_sum / total) + neg = math.fabs(neg_sum / total) + neu = math.fabs(neu_count / total) + + else: + compound = 0.0 + pos = 0.0 + neg = 0.0 + neu = 0.0 + + sentiment_dict = { + "neg": round(neg, 3), + "neu": round(neu, 3), + "pos": round(pos, 3), + "compound": round(compound, 4), + } + + return sentiment_dict diff --git a/venv.bak/lib/python3.7/site-packages/nltk/stem/__init__.py b/venv.bak/lib/python3.7/site-packages/nltk/stem/__init__.py new file mode 100644 index 0000000..d31603f --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/stem/__init__.py @@ -0,0 +1,32 @@ +# Natural Language Toolkit: Stemmers +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Trevor Cohn +# Edward Loper +# Steven Bird +# URL: +# For license information, see LICENSE.TXT + +""" +NLTK Stemmers + +Interfaces used to remove morphological affixes from words, leaving +only the word stem. Stemming algorithms aim to remove those affixes +required for eg. grammatical role, tense, derivational morphology +leaving only the stem of the word. This is a difficult problem due to +irregular words (eg. common verbs in English), complicated +morphological rules, and part-of-speech and sense ambiguities +(eg. ``ceil-`` is not the stem of ``ceiling``). + +StemmerI defines a standard interface for stemmers. +""" + +from nltk.stem.api import StemmerI +from nltk.stem.regexp import RegexpStemmer +from nltk.stem.lancaster import LancasterStemmer +from nltk.stem.isri import ISRIStemmer +from nltk.stem.porter import PorterStemmer +from nltk.stem.snowball import SnowballStemmer +from nltk.stem.wordnet import WordNetLemmatizer +from nltk.stem.rslp import RSLPStemmer +from nltk.stem.cistem import Cistem diff --git a/venv.bak/lib/python3.7/site-packages/nltk/stem/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/stem/__pycache__/__init__.cpython-37.pyc new file mode 100644 index 0000000..e13fdce Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/stem/__pycache__/__init__.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/stem/__pycache__/api.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/stem/__pycache__/api.cpython-37.pyc new file mode 100644 index 0000000..cecd952 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/stem/__pycache__/api.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/stem/__pycache__/arlstem.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/stem/__pycache__/arlstem.cpython-37.pyc new file mode 100644 index 0000000..5f4a276 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/stem/__pycache__/arlstem.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/stem/__pycache__/cistem.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/stem/__pycache__/cistem.cpython-37.pyc new file mode 100644 index 0000000..ecbce5c Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/stem/__pycache__/cistem.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/stem/__pycache__/isri.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/stem/__pycache__/isri.cpython-37.pyc new file mode 100644 index 0000000..cd09f69 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/stem/__pycache__/isri.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/stem/__pycache__/lancaster.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/stem/__pycache__/lancaster.cpython-37.pyc new file mode 100644 index 0000000..e4c0f2e Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/stem/__pycache__/lancaster.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/stem/__pycache__/porter.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/stem/__pycache__/porter.cpython-37.pyc new file mode 100644 index 0000000..4faa979 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/stem/__pycache__/porter.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/stem/__pycache__/regexp.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/stem/__pycache__/regexp.cpython-37.pyc new file mode 100644 index 0000000..30f1a11 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/stem/__pycache__/regexp.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/stem/__pycache__/rslp.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/stem/__pycache__/rslp.cpython-37.pyc new file mode 100644 index 0000000..154dc72 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/stem/__pycache__/rslp.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/stem/__pycache__/snowball.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/stem/__pycache__/snowball.cpython-37.pyc new file mode 100644 index 0000000..2976d93 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/stem/__pycache__/snowball.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/stem/__pycache__/util.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/stem/__pycache__/util.cpython-37.pyc new file mode 100644 index 0000000..b6d104d Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/stem/__pycache__/util.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/stem/__pycache__/wordnet.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/stem/__pycache__/wordnet.cpython-37.pyc new file mode 100644 index 0000000..d323a69 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/stem/__pycache__/wordnet.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/stem/api.py b/venv.bak/lib/python3.7/site-packages/nltk/stem/api.py new file mode 100644 index 0000000..aa3b326 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/stem/api.py @@ -0,0 +1,29 @@ +# Natural Language Toolkit: Stemmer Interface +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Trevor Cohn +# Edward Loper +# Steven Bird +# URL: +# For license information, see LICENSE.TXT + +from abc import ABCMeta, abstractmethod +from six import add_metaclass + + +@add_metaclass(ABCMeta) +class StemmerI(object): + """ + A processing interface for removing morphological affixes from + words. This process is known as stemming. + + """ + + @abstractmethod + def stem(self, token): + """ + Strip affixes from the token and return the stem. + + :param token: The token that should be stemmed. + :type token: str + """ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/stem/arlstem.py b/venv.bak/lib/python3.7/site-packages/nltk/stem/arlstem.py new file mode 100644 index 0000000..d2777c0 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/stem/arlstem.py @@ -0,0 +1,363 @@ +# -*- coding: utf-8 -*- +# +# Natural Language Toolkit: ARLSTem Stemmer +# +# Copyright (C) 2001-2019 NLTK Project +# +# Author: Kheireddine Abainia (x-programer) +# Algorithms: Kheireddine Abainia +# Siham Ouamour +# Halim Sayoud +# URL: +# For license information, see LICENSE.TXT + + +""" +ARLSTem Arabic Stemmer +The details about the implementation of this algorithm are described in: +K. Abainia, S. Ouamour and H. Sayoud, A Novel Robust Arabic Light Stemmer , +Journal of Experimental & Theoretical Artificial Intelligence (JETAI'17), +Vol. 29, No. 3, 2017, pp. 557-573. +The ARLSTem is a light Arabic stemmer that is based on removing the affixes +from the word (i.e. prefixes, suffixes and infixes). It was evaluated and +compared to several other stemmers using Paice's parameters (under-stemming +index, over-stemming index and stemming weight), and the results showed that +ARLSTem is promising and producing high performances. This stemmer is not +based on any dictionary and can be used on-line effectively. +""" +from __future__ import unicode_literals +import re + +from nltk.stem.api import StemmerI + + +class ARLSTem(StemmerI): + ''' + ARLSTem stemmer : a light Arabic Stemming algorithm without any dictionary. + Department of Telecommunication & Information Processing. USTHB University, + Algiers, Algeria. + ARLSTem.stem(token) returns the Arabic stem for the input token. + The ARLSTem Stemmer requires that all tokens are encoded using Unicode + encoding. + ''' + + def __init__(self): + # different Alif with hamza + self.re_hamzated_alif = re.compile(r'[\u0622\u0623\u0625]') + self.re_alifMaqsura = re.compile(r'[\u0649]') + self.re_diacritics = re.compile(r'[\u064B-\u065F]') + + # Alif Laam, Laam Laam, Fa Laam, Fa Ba + self.pr2 = ['\u0627\u0644', '\u0644\u0644', '\u0641\u0644', '\u0641\u0628'] + # Ba Alif Laam, Kaaf Alif Laam, Waaw Alif Laam + self.pr3 = ['\u0628\u0627\u0644', '\u0643\u0627\u0644', '\u0648\u0627\u0644'] + # Fa Laam Laam, Waaw Laam Laam + self.pr32 = ['\u0641\u0644\u0644', '\u0648\u0644\u0644'] + # Fa Ba Alif Laam, Waaw Ba Alif Laam, Fa Kaaf Alif Laam + self.pr4 = [ + '\u0641\u0628\u0627\u0644', + '\u0648\u0628\u0627\u0644', + '\u0641\u0643\u0627\u0644', + ] + + # Kaf Yaa, Kaf Miim + self.su2 = ['\u0643\u064A', '\u0643\u0645'] + # Ha Alif, Ha Miim + self.su22 = ['\u0647\u0627', '\u0647\u0645'] + # Kaf Miim Alif, Kaf Noon Shadda + self.su3 = ['\u0643\u0645\u0627', '\u0643\u0646\u0651'] + # Ha Miim Alif, Ha Noon Shadda + self.su32 = ['\u0647\u0645\u0627', '\u0647\u0646\u0651'] + + # Alif Noon, Ya Noon, Waaw Noon + self.pl_si2 = ['\u0627\u0646', '\u064A\u0646', '\u0648\u0646'] + # Taa Alif Noon, Taa Ya Noon + self.pl_si3 = ['\u062A\u0627\u0646', '\u062A\u064A\u0646'] + + # Alif Noon, Waaw Noon + self.verb_su2 = ['\u0627\u0646', '\u0648\u0646'] + # Siin Taa, Siin Yaa + self.verb_pr2 = ['\u0633\u062A', '\u0633\u064A'] + # Siin Alif, Siin Noon + self.verb_pr22 = ['\u0633\u0627', '\u0633\u0646'] + # Lam Noon, Lam Taa, Lam Yaa, Lam Hamza + self.verb_pr33 = [ + '\u0644\u0646', + '\u0644\u062A', + '\u0644\u064A', + '\u0644\u0623', + ] + # Taa Miim Alif, Taa Noon Shadda + self.verb_suf3 = ['\u062A\u0645\u0627', '\u062A\u0646\u0651'] + # Noon Alif, Taa Miim, Taa Alif, Waaw Alif + self.verb_suf2 = [ + '\u0646\u0627', + '\u062A\u0645', + '\u062A\u0627', + '\u0648\u0627', + ] + # Taa, Alif, Noon + self.verb_suf1 = ['\u062A', '\u0627', '\u0646'] + + def stem(self, token): + """ + call this function to get the word's stem based on ARLSTem . + """ + try: + if token is None: + raise ValueError( + "The word could not be stemmed, because \ + it is empty !" + ) + # remove Arabic diacritics and replace some letters with others + token = self.norm(token) + # strip common prefixes of the nouns + pre = self.pref(token) + if pre is not None: + token = pre + # strip the suffixes which are common to nouns and verbs + token = self.suff(token) + # transform a plural noun to a singular noun + ps = self.plur2sing(token) + if ps is None: + # transform from the feminine form to the masculine form + fm = self.fem2masc(token) + if fm is not None: + return fm + else: + if pre is None: # if the prefixes are not stripped + # strip the verb prefixes and suffixes + return self.verb(token) + else: + return ps + return token + except ValueError as e: + print(e) + + def norm(self, token): + """ + normalize the word by removing diacritics, replacing hamzated Alif + with Alif replacing AlifMaqsura with Yaa and removing Waaw at the + beginning. + """ + # strip Arabic diacritics + token = self.re_diacritics.sub('', token) + # replace Hamzated Alif with Alif bare + token = self.re_hamzated_alif.sub('\u0627', token) + # replace alifMaqsura with Yaa + token = self.re_alifMaqsura.sub('\u064A', token) + # strip the Waaw from the word beginning if the remaining is 3 letters + # at least + if token.startswith('\u0648') and len(token) > 3: + token = token[1:] + return token + + def pref(self, token): + """ + remove prefixes from the words' beginning. + """ + if len(token) > 5: + for p3 in self.pr3: + if token.startswith(p3): + return token[3:] + if len(token) > 6: + for p4 in self.pr4: + if token.startswith(p4): + return token[4:] + if len(token) > 5: + for p3 in self.pr32: + if token.startswith(p3): + return token[3:] + if len(token) > 4: + for p2 in self.pr2: + if token.startswith(p2): + return token[2:] + + def suff(self, token): + """ + remove suffixes from the word's end. + """ + if token.endswith('\u0643') and len(token) > 3: + return token[:-1] + if len(token) > 4: + for s2 in self.su2: + if token.endswith(s2): + return token[:-2] + if len(token) > 5: + for s3 in self.su3: + if token.endswith(s3): + return token[:-3] + if token.endswith('\u0647') and len(token) > 3: + token = token[:-1] + return token + if len(token) > 4: + for s2 in self.su22: + if token.endswith(s2): + return token[:-2] + if len(token) > 5: + for s3 in self.su32: + if token.endswith(s3): + return token[:-3] + if token.endswith('\u0646\u0627') and len(token) > 4: + return token[:-2] + return token + + def fem2masc(self, token): + """ + transform the word from the feminine form to the masculine form. + """ + if token.endswith('\u0629') and len(token) > 3: + return token[:-1] + + def plur2sing(self, token): + """ + transform the word from the plural form to the singular form. + """ + if len(token) > 4: + for ps2 in self.pl_si2: + if token.endswith(ps2): + return token[:-2] + if len(token) > 5: + for ps3 in self.pl_si3: + if token.endswith(ps3): + return token[:-3] + if len(token) > 3 and token.endswith('\u0627\u062A'): + return token[:-2] + if len(token) > 3 and token.startswith('\u0627') and token[2] == '\u0627': + return token[:2] + token[3:] + if len(token) > 4 and token.startswith('\u0627') and token[-2] == '\u0627': + return token[1:-2] + token[-1] + + def verb(self, token): + """ + stem the verb prefixes and suffixes or both + """ + vb = self.verb_t1(token) + if vb is not None: + return vb + vb = self.verb_t2(token) + if vb is not None: + return vb + vb = self.verb_t3(token) + if vb is not None: + return vb + vb = self.verb_t4(token) + if vb is not None: + return vb + vb = self.verb_t5(token) + if vb is not None: + return vb + return self.verb_t6(token) + + def verb_t1(self, token): + """ + stem the present prefixes and suffixes + """ + if len(token) > 5 and token.startswith('\u062A'): # Taa + for s2 in self.pl_si2: + if token.endswith(s2): + return token[1:-2] + if len(token) > 5 and token.startswith('\u064A'): # Yaa + for s2 in self.verb_su2: + if token.endswith(s2): + return token[1:-2] + if len(token) > 4 and token.startswith('\u0627'): # Alif + # Waaw Alif + if len(token) > 5 and token.endswith('\u0648\u0627'): + return token[1:-2] + # Yaa + if token.endswith('\u064A'): + return token[1:-1] + # Alif + if token.endswith('\u0627'): + return token[1:-1] + # Noon + if token.endswith('\u0646'): + return token[1:-1] + # ^Yaa, Noon$ + if len(token) > 4 and token.startswith('\u064A') and token.endswith('\u0646'): + return token[1:-1] + # ^Taa, Noon$ + if len(token) > 4 and token.startswith('\u062A') and token.endswith('\u0646'): + return token[1:-1] + + def verb_t2(self, token): + """ + stem the future prefixes and suffixes + """ + if len(token) > 6: + for s2 in self.pl_si2: + # ^Siin Taa + if token.startswith(self.verb_pr2[0]) and token.endswith(s2): + return token[2:-2] + # ^Siin Yaa, Alif Noon$ + if token.startswith(self.verb_pr2[1]) and token.endswith(self.pl_si2[0]): + return token[2:-2] + # ^Siin Yaa, Waaw Noon$ + if token.startswith(self.verb_pr2[1]) and token.endswith(self.pl_si2[2]): + return token[2:-2] + # ^Siin Taa, Noon$ + if ( + len(token) > 5 + and token.startswith(self.verb_pr2[0]) + and token.endswith('\u0646') + ): + return token[2:-1] + # ^Siin Yaa, Noon$ + if ( + len(token) > 5 + and token.startswith(self.verb_pr2[1]) + and token.endswith('\u0646') + ): + return token[2:-1] + + def verb_t3(self, token): + """ + stem the present suffixes + """ + if len(token) > 5: + for su3 in self.verb_suf3: + if token.endswith(su3): + return token[:-3] + if len(token) > 4: + for su2 in self.verb_suf2: + if token.endswith(su2): + return token[:-2] + if len(token) > 3: + for su1 in self.verb_suf1: + if token.endswith(su1): + return token[:-1] + + def verb_t4(self, token): + """ + stem the present prefixes + """ + if len(token) > 3: + for pr1 in self.verb_suf1: + if token.startswith(pr1): + return token[1:] + if token.startswith('\u064A'): + return token[1:] + + def verb_t5(self, token): + """ + stem the future prefixes + """ + if len(token) > 4: + for pr2 in self.verb_pr22: + if token.startswith(pr2): + return token[2:] + for pr2 in self.verb_pr2: + if token.startswith(pr2): + return token[2:] + return token + + def verb_t6(self, token): + """ + stem the order prefixes + """ + if len(token) > 4: + for pr3 in self.verb_pr33: + if token.startswith(pr3): + return token[2:] + return token diff --git a/venv.bak/lib/python3.7/site-packages/nltk/stem/cistem.py b/venv.bak/lib/python3.7/site-packages/nltk/stem/cistem.py new file mode 100644 index 0000000..0ddbe22 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/stem/cistem.py @@ -0,0 +1,220 @@ +# -*- coding: utf-8 -*- +# Natural Language Toolkit: CISTEM Stemmer for German +# Copyright (C) 2001-2019 NLTK Project +# Author: Leonie Weissweiler +# Algorithm: Leonie Weissweiler +# Alexander Fraser +# URL: +# For license information, see LICENSE.TXT + +from __future__ import unicode_literals +import re +from nltk.stem.api import StemmerI +from nltk.compat import python_2_unicode_compatible + +@python_2_unicode_compatible +class Cistem(StemmerI): + """ + CISTEM Stemmer for German + + This is the official Python implementation of the CISTEM stemmer. + It is based on the paper + Leonie Weissweiler, Alexander Fraser (2017). Developing a Stemmer for German + Based on a Comparative Analysis of Publicly Available Stemmers. + In Proceedings of the German Society for Computational Linguistics and Language + Technology (GSCL) + which can be read here: + http://www.cis.lmu.de/~weissweiler/cistem/ + + In the paper, we conducted an analysis of publicly available stemmers, + developed two gold standards for German stemming and evaluated the stemmers + based on the two gold standards. We then proposed the stemmer implemented here + and show that it achieves slightly better f-measure than the other stemmers and + is thrice as fast as the Snowball stemmer for German while being about as fast + as most other stemmers. + + case_insensitive is a a boolean specifying if case-insensitive stemming + should be used. Case insensitivity improves performance only if words in the + text may be incorrectly upper case. For all-lowercase and correctly cased + text, best performance is achieved by setting case_insensitive for false. + + :param case_insensitive: if True, the stemming is case insensitive. False by default. + :type case_insensitive: bool + """ + strip_ge = re.compile(r"^ge(.{4,})") + repl_xx = re.compile(r"(.)\1") + strip_emr = re.compile(r"e[mr]$") + strip_nd = re.compile(r"nd$") + strip_t = re.compile(r"t$") + strip_esn = re.compile(r"[esn]$") + repl_xx_back = re.compile(r"(.)\*") + + def __init__(self, case_insensitive=False): + self._case_insensitive = case_insensitive + + @staticmethod + def replace_to(word): + word = word.replace("sch", "$") + word = word.replace("ei", "%") + word = word.replace("ie", "&") + word = Cistem.repl_xx.sub(r"\1*", word) + + return word + + @staticmethod + def replace_back(word): + word = Cistem.repl_xx_back.sub(r"\1\1", word) + word = word.replace("%", "ei") + word = word.replace("&", "ie") + word = word.replace("$", "sch") + + return word + + def stem(self, word): + """ + This method takes the word to be stemmed and returns the stemmed word. + + :param word: the word that is to be stemmed + :type word: unicode + :return word: the stemmed word + :rtype: unicode + + >>> from nltk.stem.cistem import Cistem + >>> stemmer = Cistem() + >>> s1 = "Speicherbehältern" + >>> stemmer.stem(s1) + 'speicherbehalt' + >>> s2 = "Grenzpostens" + >>> stemmer.stem(s2) + 'grenzpost' + >>> s3 = "Ausgefeiltere" + >>> stemmer.stem(s3) + 'ausgefeilt' + >>> stemmer = Cistem(True) + >>> stemmer.stem(s1) + 'speicherbehal' + >>> stemmer.stem(s2) + 'grenzpo' + >>> stemmer.stem(s3) + 'ausgefeil' + """ + if len(word) == 0: + return word + + upper = word[0].isupper() + word = word.lower() + + word = word.replace("ü", "u") + word = word.replace("ö", "o") + word = word.replace("ä", "a") + word = word.replace("ß", "ss") + + word = Cistem.strip_ge.sub(r"\1", word) + word = Cistem.replace_to(word) + + while len(word) > 3: + if len(word) > 5: + (word, success) = Cistem.strip_emr.subn("", word) + if success != 0: + continue + + (word, success) = Cistem.strip_nd.subn("", word) + if success != 0: + continue + + if not upper or self._case_insensitive: + (word, success) = Cistem.strip_t.subn("", word) + if success != 0: + continue + + (word, success) = Cistem.strip_esn.subn("", word) + if success != 0: + continue + else: + break + + word = Cistem.replace_back(word) + + return word + + + def segment(self, word): + """ + This method works very similarly to stem (:func:'cistem.stem'). The difference is that in + addition to returning the stem, it also returns the rest that was removed at + the end. To be able to return the stem unchanged so the stem and the rest + can be concatenated to form the original word, all subsitutions that altered + the stem in any other way than by removing letters at the end were left out. + + :param word: the word that is to be stemmed + :type word: unicode + :return word: the stemmed word + :rtype: unicode + :return word: the removed suffix + :rtype: unicode + + >>> from nltk.stem.cistem import Cistem + >>> stemmer = Cistem() + >>> s1 = "Speicherbehältern" + >>> print("('" + stemmer.segment(s1)[0] + "', '" + stemmer.segment(s1)[1] + "')") + ('speicherbehält', 'ern') + >>> s2 = "Grenzpostens" + >>> stemmer.segment(s2) + ('grenzpost', 'ens') + >>> s3 = "Ausgefeiltere" + >>> stemmer.segment(s3) + ('ausgefeilt', 'ere') + >>> stemmer = Cistem(True) + >>> print("('" + stemmer.segment(s1)[0] + "', '" + stemmer.segment(s1)[1] + "')") + ('speicherbehäl', 'tern') + >>> stemmer.segment(s2) + ('grenzpo', 'stens') + >>> stemmer.segment(s3) + ('ausgefeil', 'tere') + """ + + rest_length = 0 + + if len(word) == 0: + return ("", "") + + upper = word[0].isupper() + word = word.lower() + + original = word[:] + + word = Cistem.replace_to(word) + + while len(word) > 3: + if len(word) > 5: + (word, success) = Cistem.strip_emr.subn("", word) + if success != 0: + rest_length += 2 + continue + + (word, success) = Cistem.strip_nd.subn("", word) + if success != 0: + rest_length += 2 + continue + + if not upper or self._case_insensitive: + (word, success) = Cistem.strip_t.subn("", word) + if success != 0: + rest_length += 1 + continue + + (word, success) = Cistem.strip_esn.subn("", word) + if success != 0: + rest_length += 1 + continue + else: + break + + word = Cistem.replace_back(word) + + if rest_length: + rest = original[-rest_length:] + else: + rest = "" + + return (word, rest) diff --git a/venv.bak/lib/python3.7/site-packages/nltk/stem/isri.py b/venv.bak/lib/python3.7/site-packages/nltk/stem/isri.py new file mode 100644 index 0000000..5e9de8a --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/stem/isri.py @@ -0,0 +1,397 @@ +# -*- coding: utf-8 -*- +# +# Natural Language Toolkit: The ISRI Arabic Stemmer +# +# Copyright (C) 2001-2019 NLTK Proejct +# Algorithm: Kazem Taghva, Rania Elkhoury, and Jeffrey Coombs (2005) +# Author: Hosam Algasaier +# URL: +# For license information, see LICENSE.TXT + +""" +ISRI Arabic Stemmer + +The algorithm for this stemmer is described in: + +Taghva, K., Elkoury, R., and Coombs, J. 2005. Arabic Stemming without a root dictionary. +Information Science Research Institute. University of Nevada, Las Vegas, USA. + +The Information Science Research Institute’s (ISRI) Arabic stemmer shares many features +with the Khoja stemmer. However, the main difference is that ISRI stemmer does not use root +dictionary. Also, if a root is not found, ISRI stemmer returned normalized form, rather than +returning the original unmodified word. + +Additional adjustments were made to improve the algorithm: + +1- Adding 60 stop words. +2- Adding the pattern (تفاعيل) to ISRI pattern set. +3- The step 2 in the original algorithm was normalizing all hamza. This step is discarded because it +increases the word ambiguities and changes the original root. + +""" +from __future__ import unicode_literals +import re + +from nltk.stem.api import StemmerI + + +class ISRIStemmer(StemmerI): + ''' + ISRI Arabic stemmer based on algorithm: Arabic Stemming without a root dictionary. + Information Science Research Institute. University of Nevada, Las Vegas, USA. + + A few minor modifications have been made to ISRI basic algorithm. + See the source code of this module for more information. + + isri.stem(token) returns Arabic root for the given token. + + The ISRI Stemmer requires that all tokens have Unicode string types. + If you use Python IDLE on Arabic Windows you have to decode text first + using Arabic '1256' coding. + ''' + + def __init__(self): + # length three prefixes + self.p3 = [ + '\u0643\u0627\u0644', + '\u0628\u0627\u0644', + '\u0648\u0644\u0644', + '\u0648\u0627\u0644', + ] + + # length two prefixes + self.p2 = ['\u0627\u0644', '\u0644\u0644'] + + # length one prefixes + self.p1 = [ + '\u0644', + '\u0628', + '\u0641', + '\u0633', + '\u0648', + '\u064a', + '\u062a', + '\u0646', + '\u0627', + ] + + # length three suffixes + self.s3 = [ + '\u062a\u0645\u0644', + '\u0647\u0645\u0644', + '\u062a\u0627\u0646', + '\u062a\u064a\u0646', + '\u0643\u0645\u0644', + ] + + # length two suffixes + self.s2 = [ + '\u0648\u0646', + '\u0627\u062a', + '\u0627\u0646', + '\u064a\u0646', + '\u062a\u0646', + '\u0643\u0645', + '\u0647\u0646', + '\u0646\u0627', + '\u064a\u0627', + '\u0647\u0627', + '\u062a\u0645', + '\u0643\u0646', + '\u0646\u064a', + '\u0648\u0627', + '\u0645\u0627', + '\u0647\u0645', + ] + + # length one suffixes + self.s1 = ['\u0629', '\u0647', '\u064a', '\u0643', '\u062a', '\u0627', '\u0646'] + + # groups of length four patterns + self.pr4 = { + 0: ['\u0645'], + 1: ['\u0627'], + 2: ['\u0627', '\u0648', '\u064A'], + 3: ['\u0629'], + } + + # Groups of length five patterns and length three roots + self.pr53 = { + 0: ['\u0627', '\u062a'], + 1: ['\u0627', '\u064a', '\u0648'], + 2: ['\u0627', '\u062a', '\u0645'], + 3: ['\u0645', '\u064a', '\u062a'], + 4: ['\u0645', '\u062a'], + 5: ['\u0627', '\u0648'], + 6: ['\u0627', '\u0645'], + } + + self.re_short_vowels = re.compile(r'[\u064B-\u0652]') + self.re_hamza = re.compile(r'[\u0621\u0624\u0626]') + self.re_initial_hamza = re.compile(r'^[\u0622\u0623\u0625]') + + self.stop_words = [ + '\u064a\u0643\u0648\u0646', + '\u0648\u0644\u064a\u0633', + '\u0648\u0643\u0627\u0646', + '\u0643\u0630\u0644\u0643', + '\u0627\u0644\u062a\u064a', + '\u0648\u0628\u064a\u0646', + '\u0639\u0644\u064a\u0647\u0627', + '\u0645\u0633\u0627\u0621', + '\u0627\u0644\u0630\u064a', + '\u0648\u0643\u0627\u0646\u062a', + '\u0648\u0644\u0643\u0646', + '\u0648\u0627\u0644\u062a\u064a', + '\u062a\u0643\u0648\u0646', + '\u0627\u0644\u064a\u0648\u0645', + '\u0627\u0644\u0644\u0630\u064a\u0646', + '\u0639\u0644\u064a\u0647', + '\u0643\u0627\u0646\u062a', + '\u0644\u0630\u0644\u0643', + '\u0623\u0645\u0627\u0645', + '\u0647\u0646\u0627\u0643', + '\u0645\u0646\u0647\u0627', + '\u0645\u0627\u0632\u0627\u0644', + '\u0644\u0627\u0632\u0627\u0644', + '\u0644\u0627\u064a\u0632\u0627\u0644', + '\u0645\u0627\u064a\u0632\u0627\u0644', + '\u0627\u0635\u0628\u062d', + '\u0623\u0635\u0628\u062d', + '\u0623\u0645\u0633\u0649', + '\u0627\u0645\u0633\u0649', + '\u0623\u0636\u062d\u0649', + '\u0627\u0636\u062d\u0649', + '\u0645\u0627\u0628\u0631\u062d', + '\u0645\u0627\u0641\u062a\u0626', + '\u0645\u0627\u0627\u0646\u0641\u0643', + '\u0644\u0627\u0633\u064a\u0645\u0627', + '\u0648\u0644\u0627\u064a\u0632\u0627\u0644', + '\u0627\u0644\u062d\u0627\u0644\u064a', + '\u0627\u0644\u064a\u0647\u0627', + '\u0627\u0644\u0630\u064a\u0646', + '\u0641\u0627\u0646\u0647', + '\u0648\u0627\u0644\u0630\u064a', + '\u0648\u0647\u0630\u0627', + '\u0644\u0647\u0630\u0627', + '\u0641\u0643\u0627\u0646', + '\u0633\u062a\u0643\u0648\u0646', + '\u0627\u0644\u064a\u0647', + '\u064a\u0645\u0643\u0646', + '\u0628\u0647\u0630\u0627', + '\u0627\u0644\u0630\u0649', + ] + + def stem(self, token): + """ + Stemming a word token using the ISRI stemmer. + """ + token = self.norm( + token, 1 + ) # remove diacritics which representing Arabic short vowels + if token in self.stop_words: + return token # exclude stop words from being processed + token = self.pre32( + token + ) # remove length three and length two prefixes in this order + token = self.suf32( + token + ) # remove length three and length two suffixes in this order + token = self.waw( + token + ) # remove connective ‘و’ if it precedes a word beginning with ‘و’ + token = self.norm(token, 2) # normalize initial hamza to bare alif + # if 4 <= word length <= 7, then stem; otherwise, no stemming + if len(token) == 4: # length 4 word + token = self.pro_w4(token) + elif len(token) == 5: # length 5 word + token = self.pro_w53(token) + token = self.end_w5(token) + elif len(token) == 6: # length 6 word + token = self.pro_w6(token) + token = self.end_w6(token) + elif len(token) == 7: # length 7 word + token = self.suf1(token) + if len(token) == 7: + token = self.pre1(token) + if len(token) == 6: + token = self.pro_w6(token) + token = self.end_w6(token) + return token + + def norm(self, word, num=3): + """ + normalization: + num=1 normalize diacritics + num=2 normalize initial hamza + num=3 both 1&2 + """ + if num == 1: + word = self.re_short_vowels.sub('', word) + elif num == 2: + word = self.re_initial_hamza.sub('\u0627', word) + elif num == 3: + word = self.re_short_vowels.sub('', word) + word = self.re_initial_hamza.sub('\u0627', word) + return word + + def pre32(self, word): + """remove length three and length two prefixes in this order""" + if len(word) >= 6: + for pre3 in self.p3: + if word.startswith(pre3): + return word[3:] + if len(word) >= 5: + for pre2 in self.p2: + if word.startswith(pre2): + return word[2:] + return word + + def suf32(self, word): + """remove length three and length two suffixes in this order""" + if len(word) >= 6: + for suf3 in self.s3: + if word.endswith(suf3): + return word[:-3] + if len(word) >= 5: + for suf2 in self.s2: + if word.endswith(suf2): + return word[:-2] + return word + + def waw(self, word): + """remove connective ‘و’ if it precedes a word beginning with ‘و’ """ + if len(word) >= 4 and word[:2] == '\u0648\u0648': + word = word[1:] + return word + + def pro_w4(self, word): + """process length four patterns and extract length three roots""" + if word[0] in self.pr4[0]: # مفعل + word = word[1:] + elif word[1] in self.pr4[1]: # فاعل + word = word[:1] + word[2:] + elif word[2] in self.pr4[2]: # فعال - فعول - فعيل + word = word[:2] + word[3] + elif word[3] in self.pr4[3]: # فعلة + word = word[:-1] + else: + word = self.suf1(word) # do - normalize short sufix + if len(word) == 4: + word = self.pre1(word) # do - normalize short prefix + return word + + def pro_w53(self, word): + """process length five patterns and extract length three roots""" + if word[2] in self.pr53[0] and word[0] == '\u0627': # افتعل - افاعل + word = word[1] + word[3:] + elif word[3] in self.pr53[1] and word[0] == '\u0645': # مفعول - مفعال - مفعيل + word = word[1:3] + word[4] + elif word[0] in self.pr53[2] and word[4] == '\u0629': # مفعلة - تفعلة - افعلة + word = word[1:4] + elif word[0] in self.pr53[3] and word[2] == '\u062a': # مفتعل - يفتعل - تفتعل + word = word[1] + word[3:] + elif word[0] in self.pr53[4] and word[2] == '\u0627': # مفاعل - تفاعل + word = word[1] + word[3:] + elif word[2] in self.pr53[5] and word[4] == '\u0629': # فعولة - فعالة + word = word[:2] + word[3] + elif word[0] in self.pr53[6] and word[1] == '\u0646': # انفعل - منفعل + word = word[2:] + elif word[3] == '\u0627' and word[0] == '\u0627': # افعال + word = word[1:3] + word[4] + elif word[4] == '\u0646' and word[3] == '\u0627': # فعلان + word = word[:3] + elif word[3] == '\u064a' and word[0] == '\u062a': # تفعيل + word = word[1:3] + word[4] + elif word[3] == '\u0648' and word[1] == '\u0627': # فاعول + word = word[0] + word[2] + word[4] + elif word[2] == '\u0627' and word[1] == '\u0648': # فواعل + word = word[0] + word[3:] + elif word[3] == '\u0626' and word[2] == '\u0627': # فعائل + word = word[:2] + word[4] + elif word[4] == '\u0629' and word[1] == '\u0627': # فاعلة + word = word[0] + word[2:4] + elif word[4] == '\u064a' and word[2] == '\u0627': # فعالي + word = word[:2] + word[3] + else: + word = self.suf1(word) # do - normalize short sufix + if len(word) == 5: + word = self.pre1(word) # do - normalize short prefix + return word + + def pro_w54(self, word): + """process length five patterns and extract length four roots""" + if word[0] in self.pr53[2]: # تفعلل - افعلل - مفعلل + word = word[1:] + elif word[4] == '\u0629': # فعللة + word = word[:4] + elif word[2] == '\u0627': # فعالل + word = word[:2] + word[3:] + return word + + def end_w5(self, word): + """ending step (word of length five)""" + if len(word) == 4: + word = self.pro_w4(word) + elif len(word) == 5: + word = self.pro_w54(word) + return word + + def pro_w6(self, word): + """process length six patterns and extract length three roots""" + if word.startswith('\u0627\u0633\u062a') or word.startswith( + '\u0645\u0633\u062a' + ): # مستفعل - استفعل + word = word[3:] + elif ( + word[0] == '\u0645' and word[3] == '\u0627' and word[5] == '\u0629' + ): # مفعالة + word = word[1:3] + word[4] + elif ( + word[0] == '\u0627' and word[2] == '\u062a' and word[4] == '\u0627' + ): # افتعال + word = word[1] + word[3] + word[5] + elif ( + word[0] == '\u0627' and word[3] == '\u0648' and word[2] == word[4] + ): # افعوعل + word = word[1] + word[4:] + elif ( + word[0] == '\u062a' and word[2] == '\u0627' and word[4] == '\u064a' + ): # تفاعيل new pattern + word = word[1] + word[3] + word[5] + else: + word = self.suf1(word) # do - normalize short sufix + if len(word) == 6: + word = self.pre1(word) # do - normalize short prefix + return word + + def pro_w64(self, word): + """process length six patterns and extract length four roots""" + if word[0] == '\u0627' and word[4] == '\u0627': # افعلال + word = word[1:4] + word[5] + elif word.startswith('\u0645\u062a'): # متفعلل + word = word[2:] + return word + + def end_w6(self, word): + """ending step (word of length six)""" + if len(word) == 5: + word = self.pro_w53(word) + word = self.end_w5(word) + elif len(word) == 6: + word = self.pro_w64(word) + return word + + def suf1(self, word): + """normalize short sufix""" + for sf1 in self.s1: + if word.endswith(sf1): + return word[:-1] + return word + + def pre1(self, word): + """normalize short prefix""" + for sp1 in self.p1: + if word.startswith(sp1): + return word[1:] + return word diff --git a/venv.bak/lib/python3.7/site-packages/nltk/stem/lancaster.py b/venv.bak/lib/python3.7/site-packages/nltk/stem/lancaster.py new file mode 100644 index 0000000..919a1a6 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/stem/lancaster.py @@ -0,0 +1,352 @@ +# Natural Language Toolkit: Stemmers +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Steven Tomcavage +# URL: +# For license information, see LICENSE.TXT + +""" +A word stemmer based on the Lancaster (Paice/Husk) stemming algorithm. +Paice, Chris D. "Another Stemmer." ACM SIGIR Forum 24.3 (1990): 56-61. +""" +from __future__ import unicode_literals +import re + +from nltk.stem.api import StemmerI +from nltk.compat import python_2_unicode_compatible + + +@python_2_unicode_compatible +class LancasterStemmer(StemmerI): + """ + Lancaster Stemmer + + >>> from nltk.stem.lancaster import LancasterStemmer + >>> st = LancasterStemmer() + >>> st.stem('maximum') # Remove "-um" when word is intact + 'maxim' + >>> st.stem('presumably') # Don't remove "-um" when word is not intact + 'presum' + >>> st.stem('multiply') # No action taken if word ends with "-ply" + 'multiply' + >>> st.stem('provision') # Replace "-sion" with "-j" to trigger "j" set of rules + 'provid' + >>> st.stem('owed') # Word starting with vowel must contain at least 2 letters + 'ow' + >>> st.stem('ear') # ditto + 'ear' + >>> st.stem('saying') # Words starting with consonant must contain at least 3 + 'say' + >>> st.stem('crying') # letters and one of those letters must be a vowel + 'cry' + >>> st.stem('string') # ditto + 'string' + >>> st.stem('meant') # ditto + 'meant' + >>> st.stem('cement') # ditto + 'cem' + >>> st_pre = LancasterStemmer(strip_prefix_flag=True) + >>> st_pre.stem('kilometer') # Test Prefix + 'met' + >>> st_custom = LancasterStemmer(rule_tuple=("ssen4>", "s1t.")) + >>> st_custom.stem("ness") # Change s to t + 'nest' + """ + + # The rule list is static since it doesn't change between instances + default_rule_tuple = ( + "ai*2.", # -ia > - if intact + "a*1.", # -a > - if intact + "bb1.", # -bb > -b + "city3s.", # -ytic > -ys + "ci2>", # -ic > - + "cn1t>", # -nc > -nt + "dd1.", # -dd > -d + "dei3y>", # -ied > -y + "deec2ss.", # -ceed >", -cess + "dee1.", # -eed > -ee + "de2>", # -ed > - + "dooh4>", # -hood > - + "e1>", # -e > - + "feil1v.", # -lief > -liev + "fi2>", # -if > - + "gni3>", # -ing > - + "gai3y.", # -iag > -y + "ga2>", # -ag > - + "gg1.", # -gg > -g + "ht*2.", # -th > - if intact + "hsiug5ct.", # -guish > -ct + "hsi3>", # -ish > - + "i*1.", # -i > - if intact + "i1y>", # -i > -y + "ji1d.", # -ij > -id -- see nois4j> & vis3j> + "juf1s.", # -fuj > -fus + "ju1d.", # -uj > -ud + "jo1d.", # -oj > -od + "jeh1r.", # -hej > -her + "jrev1t.", # -verj > -vert + "jsim2t.", # -misj > -mit + "jn1d.", # -nj > -nd + "j1s.", # -j > -s + "lbaifi6.", # -ifiabl > - + "lbai4y.", # -iabl > -y + "lba3>", # -abl > - + "lbi3.", # -ibl > - + "lib2l>", # -bil > -bl + "lc1.", # -cl > c + "lufi4y.", # -iful > -y + "luf3>", # -ful > - + "lu2.", # -ul > - + "lai3>", # -ial > - + "lau3>", # -ual > - + "la2>", # -al > - + "ll1.", # -ll > -l + "mui3.", # -ium > - + "mu*2.", # -um > - if intact + "msi3>", # -ism > - + "mm1.", # -mm > -m + "nois4j>", # -sion > -j + "noix4ct.", # -xion > -ct + "noi3>", # -ion > - + "nai3>", # -ian > - + "na2>", # -an > - + "nee0.", # protect -een + "ne2>", # -en > - + "nn1.", # -nn > -n + "pihs4>", # -ship > - + "pp1.", # -pp > -p + "re2>", # -er > - + "rae0.", # protect -ear + "ra2.", # -ar > - + "ro2>", # -or > - + "ru2>", # -ur > - + "rr1.", # -rr > -r + "rt1>", # -tr > -t + "rei3y>", # -ier > -y + "sei3y>", # -ies > -y + "sis2.", # -sis > -s + "si2>", # -is > - + "ssen4>", # -ness > - + "ss0.", # protect -ss + "suo3>", # -ous > - + "su*2.", # -us > - if intact + "s*1>", # -s > - if intact + "s0.", # -s > -s + "tacilp4y.", # -plicat > -ply + "ta2>", # -at > - + "tnem4>", # -ment > - + "tne3>", # -ent > - + "tna3>", # -ant > - + "tpir2b.", # -ript > -rib + "tpro2b.", # -orpt > -orb + "tcud1.", # -duct > -duc + "tpmus2.", # -sumpt > -sum + "tpec2iv.", # -cept > -ceiv + "tulo2v.", # -olut > -olv + "tsis0.", # protect -sist + "tsi3>", # -ist > - + "tt1.", # -tt > -t + "uqi3.", # -iqu > - + "ugo1.", # -ogu > -og + "vis3j>", # -siv > -j + "vie0.", # protect -eiv + "vi2>", # -iv > - + "ylb1>", # -bly > -bl + "yli3y>", # -ily > -y + "ylp0.", # protect -ply + "yl2>", # -ly > - + "ygo1.", # -ogy > -og + "yhp1.", # -phy > -ph + "ymo1.", # -omy > -om + "ypo1.", # -opy > -op + "yti3>", # -ity > - + "yte3>", # -ety > - + "ytl2.", # -lty > -l + "yrtsi5.", # -istry > - + "yra3>", # -ary > - + "yro3>", # -ory > - + "yfi3.", # -ify > - + "ycn2t>", # -ncy > -nt + "yca3>", # -acy > - + "zi2>", # -iz > - + "zy1s.", # -yz > -ys + ) + + def __init__(self, rule_tuple=None, strip_prefix_flag=False): + """Create an instance of the Lancaster stemmer. + """ + # Setup an empty rule dictionary - this will be filled in later + self.rule_dictionary = {} + # Check if a user wants to strip prefix + self._strip_prefix = strip_prefix_flag + # Check if a user wants to use his/her own rule tuples. + self._rule_tuple = rule_tuple if rule_tuple else self.default_rule_tuple + + def parseRules(self, rule_tuple=None): + """Validate the set of rules used in this stemmer. + + If this function is called as an individual method, without using stem + method, rule_tuple argument will be compiled into self.rule_dictionary. + If this function is called within stem, self._rule_tuple will be used. + + """ + # If there is no argument for the function, use class' own rule tuple. + rule_tuple = rule_tuple if rule_tuple else self._rule_tuple + valid_rule = re.compile("^[a-z]+\*?\d[a-z]*[>\.]?$") + # Empty any old rules from the rule set before adding new ones + self.rule_dictionary = {} + + for rule in rule_tuple: + if not valid_rule.match(rule): + raise ValueError("The rule {0} is invalid".format(rule)) + first_letter = rule[0:1] + if first_letter in self.rule_dictionary: + self.rule_dictionary[first_letter].append(rule) + else: + self.rule_dictionary[first_letter] = [rule] + + def stem(self, word): + """Stem a word using the Lancaster stemmer. + """ + # Lower-case the word, since all the rules are lower-cased + word = word.lower() + word = self.__stripPrefix(word) if self._strip_prefix else word + + # Save a copy of the original word + intact_word = word + + # If rule dictionary is empty, parse rule tuple. + if not self.rule_dictionary: + self.parseRules() + + return self.__doStemming(word, intact_word) + + def __doStemming(self, word, intact_word): + """Perform the actual word stemming + """ + + valid_rule = re.compile("^([a-z]+)(\*?)(\d)([a-z]*)([>\.]?)$") + + proceed = True + + while proceed: + + # Find the position of the last letter of the word to be stemmed + last_letter_position = self.__getLastLetter(word) + + # Only stem the word if it has a last letter and a rule matching that last letter + if ( + last_letter_position < 0 + or word[last_letter_position] not in self.rule_dictionary + ): + proceed = False + + else: + rule_was_applied = False + + # Go through each rule that matches the word's final letter + for rule in self.rule_dictionary[word[last_letter_position]]: + rule_match = valid_rule.match(rule) + if rule_match: + ( + ending_string, + intact_flag, + remove_total, + append_string, + cont_flag, + ) = rule_match.groups() + + # Convert the number of chars to remove when stemming + # from a string to an integer + remove_total = int(remove_total) + + # Proceed if word's ending matches rule's word ending + if word.endswith(ending_string[::-1]): + if intact_flag: + if word == intact_word and self.__isAcceptable( + word, remove_total + ): + word = self.__applyRule( + word, remove_total, append_string + ) + rule_was_applied = True + if cont_flag == '.': + proceed = False + break + elif self.__isAcceptable(word, remove_total): + word = self.__applyRule( + word, remove_total, append_string + ) + rule_was_applied = True + if cont_flag == '.': + proceed = False + break + # If no rules apply, the word doesn't need any more stemming + if rule_was_applied == False: + proceed = False + return word + + def __getLastLetter(self, word): + """Get the zero-based index of the last alphabetic character in this string + """ + last_letter = -1 + for position in range(len(word)): + if word[position].isalpha(): + last_letter = position + else: + break + return last_letter + + def __isAcceptable(self, word, remove_total): + """Determine if the word is acceptable for stemming. + """ + word_is_acceptable = False + # If the word starts with a vowel, it must be at least 2 + # characters long to be stemmed + if word[0] in "aeiouy": + if len(word) - remove_total >= 2: + word_is_acceptable = True + # If the word starts with a consonant, it must be at least 3 + # characters long (including one vowel) to be stemmed + elif len(word) - remove_total >= 3: + if word[1] in "aeiouy": + word_is_acceptable = True + elif word[2] in "aeiouy": + word_is_acceptable = True + return word_is_acceptable + + def __applyRule(self, word, remove_total, append_string): + """Apply the stemming rule to the word + """ + # Remove letters from the end of the word + new_word_length = len(word) - remove_total + word = word[0:new_word_length] + + # And add new letters to the end of the truncated word + if append_string: + word += append_string + return word + + def __stripPrefix(self, word): + """Remove prefix from a word. + + This function originally taken from Whoosh. + + """ + for prefix in ( + "kilo", + "micro", + "milli", + "intra", + "ultra", + "mega", + "nano", + "pico", + "pseudo", + ): + if word.startswith(prefix): + return word[len(prefix) :] + return word + + def __repr__(self): + return '' diff --git a/venv.bak/lib/python3.7/site-packages/nltk/stem/porter.py b/venv.bak/lib/python3.7/site-packages/nltk/stem/porter.py new file mode 100644 index 0000000..bfc5dd8 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/stem/porter.py @@ -0,0 +1,713 @@ +""" +Porter Stemmer + +This is the Porter stemming algorithm. It follows the algorithm +presented in + +Porter, M. "An algorithm for suffix stripping." Program 14.3 (1980): 130-137. + +with some optional deviations that can be turned on or off with the +`mode` argument to the constructor. + +Martin Porter, the algorithm's inventor, maintains a web page about the +algorithm at + + http://www.tartarus.org/~martin/PorterStemmer/ + +which includes another Python implementation and other implementations +in many languages. +""" + +from __future__ import print_function, unicode_literals + +__docformat__ = 'plaintext' + +import re + +from nltk.stem.api import StemmerI +from nltk.compat import python_2_unicode_compatible + + +@python_2_unicode_compatible +class PorterStemmer(StemmerI): + """ + A word stemmer based on the Porter stemming algorithm. + + Porter, M. "An algorithm for suffix stripping." + Program 14.3 (1980): 130-137. + + See http://www.tartarus.org/~martin/PorterStemmer/ for the homepage + of the algorithm. + + Martin Porter has endorsed several modifications to the Porter + algorithm since writing his original paper, and those extensions are + included in the implementations on his website. Additionally, others + have proposed further improvements to the algorithm, including NLTK + contributors. There are thus three modes that can be selected by + passing the appropriate constant to the class constructor's `mode` + attribute: + + PorterStemmer.ORIGINAL_ALGORITHM + - Implementation that is faithful to the original paper. + + Note that Martin Porter has deprecated this version of the + algorithm. Martin distributes implementations of the Porter + Stemmer in many languages, hosted at: + + http://www.tartarus.org/~martin/PorterStemmer/ + + and all of these implementations include his extensions. He + strongly recommends against using the original, published + version of the algorithm; only use this mode if you clearly + understand why you are choosing to do so. + + PorterStemmer.MARTIN_EXTENSIONS + - Implementation that only uses the modifications to the + algorithm that are included in the implementations on Martin + Porter's website. He has declared Porter frozen, so the + behaviour of those implementations should never change. + + PorterStemmer.NLTK_EXTENSIONS (default) + - Implementation that includes further improvements devised by + NLTK contributors or taken from other modified implementations + found on the web. + + For the best stemming, you should use the default NLTK_EXTENSIONS + version. However, if you need to get the same results as either the + original algorithm or one of Martin Porter's hosted versions for + compatibility with an existing implementation or dataset, you can use + one of the other modes instead. + """ + + # Modes the Stemmer can be instantiated in + NLTK_EXTENSIONS = 'NLTK_EXTENSIONS' + MARTIN_EXTENSIONS = 'MARTIN_EXTENSIONS' + ORIGINAL_ALGORITHM = 'ORIGINAL_ALGORITHM' + + def __init__(self, mode=NLTK_EXTENSIONS): + if mode not in ( + self.NLTK_EXTENSIONS, + self.MARTIN_EXTENSIONS, + self.ORIGINAL_ALGORITHM, + ): + raise ValueError( + "Mode must be one of PorterStemmer.NLTK_EXTENSIONS, " + "PorterStemmer.MARTIN_EXTENSIONS, or " + "PorterStemmer.ORIGINAL_ALGORITHM" + ) + + self.mode = mode + + if self.mode == self.NLTK_EXTENSIONS: + # This is a table of irregular forms. It is quite short, + # but still reflects the errors actually drawn to Martin + # Porter's attention over a 20 year period! + irregular_forms = { + "sky": ["sky", "skies"], + "die": ["dying"], + "lie": ["lying"], + "tie": ["tying"], + "news": ["news"], + "inning": ["innings", "inning"], + "outing": ["outings", "outing"], + "canning": ["cannings", "canning"], + "howe": ["howe"], + "proceed": ["proceed"], + "exceed": ["exceed"], + "succeed": ["succeed"], + } + + self.pool = {} + for key in irregular_forms: + for val in irregular_forms[key]: + self.pool[val] = key + + self.vowels = frozenset(['a', 'e', 'i', 'o', 'u']) + + def _is_consonant(self, word, i): + """Returns True if word[i] is a consonant, False otherwise + + A consonant is defined in the paper as follows: + + A consonant in a word is a letter other than A, E, I, O or + U, and other than Y preceded by a consonant. (The fact that + the term `consonant' is defined to some extent in terms of + itself does not make it ambiguous.) So in TOY the consonants + are T and Y, and in SYZYGY they are S, Z and G. If a letter + is not a consonant it is a vowel. + """ + if word[i] in self.vowels: + return False + if word[i] == 'y': + if i == 0: + return True + else: + return not self._is_consonant(word, i - 1) + return True + + def _measure(self, stem): + """Returns the 'measure' of stem, per definition in the paper + + From the paper: + + A consonant will be denoted by c, a vowel by v. A list + ccc... of length greater than 0 will be denoted by C, and a + list vvv... of length greater than 0 will be denoted by V. + Any word, or part of a word, therefore has one of the four + forms: + + CVCV ... C + CVCV ... V + VCVC ... C + VCVC ... V + + These may all be represented by the single form + + [C]VCVC ... [V] + + where the square brackets denote arbitrary presence of their + contents. Using (VC){m} to denote VC repeated m times, this + may again be written as + + [C](VC){m}[V]. + + m will be called the \measure\ of any word or word part when + represented in this form. The case m = 0 covers the null + word. Here are some examples: + + m=0 TR, EE, TREE, Y, BY. + m=1 TROUBLE, OATS, TREES, IVY. + m=2 TROUBLES, PRIVATE, OATEN, ORRERY. + """ + cv_sequence = '' + + # Construct a string of 'c's and 'v's representing whether each + # character in `stem` is a consonant or a vowel. + # e.g. 'falafel' becomes 'cvcvcvc', + # 'architecture' becomes 'vcccvcvccvcv' + for i in range(len(stem)): + if self._is_consonant(stem, i): + cv_sequence += 'c' + else: + cv_sequence += 'v' + + # Count the number of 'vc' occurences, which is equivalent to + # the number of 'VC' occurrences in Porter's reduced form in the + # docstring above, which is in turn equivalent to `m` + return cv_sequence.count('vc') + + def _has_positive_measure(self, stem): + return self._measure(stem) > 0 + + def _contains_vowel(self, stem): + """Returns True if stem contains a vowel, else False""" + for i in range(len(stem)): + if not self._is_consonant(stem, i): + return True + return False + + def _ends_double_consonant(self, word): + """Implements condition *d from the paper + + Returns True if word ends with a double consonant + """ + return ( + len(word) >= 2 + and word[-1] == word[-2] + and self._is_consonant(word, len(word) - 1) + ) + + def _ends_cvc(self, word): + """Implements condition *o from the paper + + From the paper: + + *o - the stem ends cvc, where the second c is not W, X or Y + (e.g. -WIL, -HOP). + """ + return ( + len(word) >= 3 + and self._is_consonant(word, len(word) - 3) + and not self._is_consonant(word, len(word) - 2) + and self._is_consonant(word, len(word) - 1) + and word[-1] not in ('w', 'x', 'y') + ) or ( + self.mode == self.NLTK_EXTENSIONS + and len(word) == 2 + and not self._is_consonant(word, 0) + and self._is_consonant(word, 1) + ) + + def _replace_suffix(self, word, suffix, replacement): + """Replaces `suffix` of `word` with `replacement""" + assert word.endswith(suffix), "Given word doesn't end with given suffix" + if suffix == '': + return word + replacement + else: + return word[: -len(suffix)] + replacement + + def _apply_rule_list(self, word, rules): + """Applies the first applicable suffix-removal rule to the word + + Takes a word and a list of suffix-removal rules represented as + 3-tuples, with the first element being the suffix to remove, + the second element being the string to replace it with, and the + final element being the condition for the rule to be applicable, + or None if the rule is unconditional. + """ + for rule in rules: + suffix, replacement, condition = rule + if suffix == '*d' and self._ends_double_consonant(word): + stem = word[:-2] + if condition is None or condition(stem): + return stem + replacement + else: + # Don't try any further rules + return word + if word.endswith(suffix): + stem = self._replace_suffix(word, suffix, '') + if condition is None or condition(stem): + return stem + replacement + else: + # Don't try any further rules + return word + + return word + + def _step1a(self, word): + """Implements Step 1a from "An algorithm for suffix stripping" + + From the paper: + + SSES -> SS caresses -> caress + IES -> I ponies -> poni + ties -> ti + SS -> SS caress -> caress + S -> cats -> cat + """ + # this NLTK-only rule extends the original algorithm, so + # that 'flies'->'fli' but 'dies'->'die' etc + if self.mode == self.NLTK_EXTENSIONS: + if word.endswith('ies') and len(word) == 4: + return self._replace_suffix(word, 'ies', 'ie') + + return self._apply_rule_list( + word, + [ + ('sses', 'ss', None), # SSES -> SS + ('ies', 'i', None), # IES -> I + ('ss', 'ss', None), # SS -> SS + ('s', '', None), # S -> + ], + ) + + def _step1b(self, word): + """Implements Step 1b from "An algorithm for suffix stripping" + + From the paper: + + (m>0) EED -> EE feed -> feed + agreed -> agree + (*v*) ED -> plastered -> plaster + bled -> bled + (*v*) ING -> motoring -> motor + sing -> sing + + If the second or third of the rules in Step 1b is successful, + the following is done: + + AT -> ATE conflat(ed) -> conflate + BL -> BLE troubl(ed) -> trouble + IZ -> IZE siz(ed) -> size + (*d and not (*L or *S or *Z)) + -> single letter + hopp(ing) -> hop + tann(ed) -> tan + fall(ing) -> fall + hiss(ing) -> hiss + fizz(ed) -> fizz + (m=1 and *o) -> E fail(ing) -> fail + fil(ing) -> file + + The rule to map to a single letter causes the removal of one of + the double letter pair. The -E is put back on -AT, -BL and -IZ, + so that the suffixes -ATE, -BLE and -IZE can be recognised + later. This E may be removed in step 4. + """ + # this NLTK-only block extends the original algorithm, so that + # 'spied'->'spi' but 'died'->'die' etc + if self.mode == self.NLTK_EXTENSIONS: + if word.endswith('ied'): + if len(word) == 4: + return self._replace_suffix(word, 'ied', 'ie') + else: + return self._replace_suffix(word, 'ied', 'i') + + # (m>0) EED -> EE + if word.endswith('eed'): + stem = self._replace_suffix(word, 'eed', '') + if self._measure(stem) > 0: + return stem + 'ee' + else: + return word + + rule_2_or_3_succeeded = False + + for suffix in ['ed', 'ing']: + if word.endswith(suffix): + intermediate_stem = self._replace_suffix(word, suffix, '') + if self._contains_vowel(intermediate_stem): + rule_2_or_3_succeeded = True + break + + if not rule_2_or_3_succeeded: + return word + + return self._apply_rule_list( + intermediate_stem, + [ + ('at', 'ate', None), # AT -> ATE + ('bl', 'ble', None), # BL -> BLE + ('iz', 'ize', None), # IZ -> IZE + # (*d and not (*L or *S or *Z)) + # -> single letter + ( + '*d', + intermediate_stem[-1], + lambda stem: intermediate_stem[-1] not in ('l', 's', 'z'), + ), + # (m=1 and *o) -> E + ( + '', + 'e', + lambda stem: (self._measure(stem) == 1 and self._ends_cvc(stem)), + ), + ], + ) + + def _step1c(self, word): + """Implements Step 1c from "An algorithm for suffix stripping" + + From the paper: + + Step 1c + + (*v*) Y -> I happy -> happi + sky -> sky + """ + + def nltk_condition(stem): + """ + This has been modified from the original Porter algorithm so + that y->i is only done when y is preceded by a consonant, + but not if the stem is only a single consonant, i.e. + + (*c and not c) Y -> I + + So 'happy' -> 'happi', but + 'enjoy' -> 'enjoy' etc + + This is a much better rule. Formerly 'enjoy'->'enjoi' and + 'enjoyment'->'enjoy'. Step 1c is perhaps done too soon; but + with this modification that no longer really matters. + + Also, the removal of the contains_vowel(z) condition means + that 'spy', 'fly', 'try' ... stem to 'spi', 'fli', 'tri' and + conflate with 'spied', 'tried', 'flies' ... + """ + return len(stem) > 1 and self._is_consonant(stem, len(stem) - 1) + + def original_condition(stem): + return self._contains_vowel(stem) + + return self._apply_rule_list( + word, + [ + ( + 'y', + 'i', + nltk_condition + if self.mode == self.NLTK_EXTENSIONS + else original_condition, + ) + ], + ) + + def _step2(self, word): + """Implements Step 2 from "An algorithm for suffix stripping" + + From the paper: + + Step 2 + + (m>0) ATIONAL -> ATE relational -> relate + (m>0) TIONAL -> TION conditional -> condition + rational -> rational + (m>0) ENCI -> ENCE valenci -> valence + (m>0) ANCI -> ANCE hesitanci -> hesitance + (m>0) IZER -> IZE digitizer -> digitize + (m>0) ABLI -> ABLE conformabli -> conformable + (m>0) ALLI -> AL radicalli -> radical + (m>0) ENTLI -> ENT differentli -> different + (m>0) ELI -> E vileli - > vile + (m>0) OUSLI -> OUS analogousli -> analogous + (m>0) IZATION -> IZE vietnamization -> vietnamize + (m>0) ATION -> ATE predication -> predicate + (m>0) ATOR -> ATE operator -> operate + (m>0) ALISM -> AL feudalism -> feudal + (m>0) IVENESS -> IVE decisiveness -> decisive + (m>0) FULNESS -> FUL hopefulness -> hopeful + (m>0) OUSNESS -> OUS callousness -> callous + (m>0) ALITI -> AL formaliti -> formal + (m>0) IVITI -> IVE sensitiviti -> sensitive + (m>0) BILITI -> BLE sensibiliti -> sensible + """ + + if self.mode == self.NLTK_EXTENSIONS: + # Instead of applying the ALLI -> AL rule after '(a)bli' per + # the published algorithm, instead we apply it first, and, + # if it succeeds, run the result through step2 again. + if word.endswith('alli') and self._has_positive_measure( + self._replace_suffix(word, 'alli', '') + ): + return self._step2(self._replace_suffix(word, 'alli', 'al')) + + bli_rule = ('bli', 'ble', self._has_positive_measure) + abli_rule = ('abli', 'able', self._has_positive_measure) + + rules = [ + ('ational', 'ate', self._has_positive_measure), + ('tional', 'tion', self._has_positive_measure), + ('enci', 'ence', self._has_positive_measure), + ('anci', 'ance', self._has_positive_measure), + ('izer', 'ize', self._has_positive_measure), + abli_rule if self.mode == self.ORIGINAL_ALGORITHM else bli_rule, + ('alli', 'al', self._has_positive_measure), + ('entli', 'ent', self._has_positive_measure), + ('eli', 'e', self._has_positive_measure), + ('ousli', 'ous', self._has_positive_measure), + ('ization', 'ize', self._has_positive_measure), + ('ation', 'ate', self._has_positive_measure), + ('ator', 'ate', self._has_positive_measure), + ('alism', 'al', self._has_positive_measure), + ('iveness', 'ive', self._has_positive_measure), + ('fulness', 'ful', self._has_positive_measure), + ('ousness', 'ous', self._has_positive_measure), + ('aliti', 'al', self._has_positive_measure), + ('iviti', 'ive', self._has_positive_measure), + ('biliti', 'ble', self._has_positive_measure), + ] + + if self.mode == self.NLTK_EXTENSIONS: + rules.append(('fulli', 'ful', self._has_positive_measure)) + + # The 'l' of the 'logi' -> 'log' rule is put with the stem, + # so that short stems like 'geo' 'theo' etc work like + # 'archaeo' 'philo' etc. + rules.append( + ("logi", "log", lambda stem: self._has_positive_measure(word[:-3])) + ) + + if self.mode == self.MARTIN_EXTENSIONS: + rules.append(("logi", "log", self._has_positive_measure)) + + return self._apply_rule_list(word, rules) + + def _step3(self, word): + """Implements Step 3 from "An algorithm for suffix stripping" + + From the paper: + + Step 3 + + (m>0) ICATE -> IC triplicate -> triplic + (m>0) ATIVE -> formative -> form + (m>0) ALIZE -> AL formalize -> formal + (m>0) ICITI -> IC electriciti -> electric + (m>0) ICAL -> IC electrical -> electric + (m>0) FUL -> hopeful -> hope + (m>0) NESS -> goodness -> good + """ + return self._apply_rule_list( + word, + [ + ('icate', 'ic', self._has_positive_measure), + ('ative', '', self._has_positive_measure), + ('alize', 'al', self._has_positive_measure), + ('iciti', 'ic', self._has_positive_measure), + ('ical', 'ic', self._has_positive_measure), + ('ful', '', self._has_positive_measure), + ('ness', '', self._has_positive_measure), + ], + ) + + def _step4(self, word): + """Implements Step 4 from "An algorithm for suffix stripping" + + Step 4 + + (m>1) AL -> revival -> reviv + (m>1) ANCE -> allowance -> allow + (m>1) ENCE -> inference -> infer + (m>1) ER -> airliner -> airlin + (m>1) IC -> gyroscopic -> gyroscop + (m>1) ABLE -> adjustable -> adjust + (m>1) IBLE -> defensible -> defens + (m>1) ANT -> irritant -> irrit + (m>1) EMENT -> replacement -> replac + (m>1) MENT -> adjustment -> adjust + (m>1) ENT -> dependent -> depend + (m>1 and (*S or *T)) ION -> adoption -> adopt + (m>1) OU -> homologou -> homolog + (m>1) ISM -> communism -> commun + (m>1) ATE -> activate -> activ + (m>1) ITI -> angulariti -> angular + (m>1) OUS -> homologous -> homolog + (m>1) IVE -> effective -> effect + (m>1) IZE -> bowdlerize -> bowdler + + The suffixes are now removed. All that remains is a little + tidying up. + """ + measure_gt_1 = lambda stem: self._measure(stem) > 1 + + return self._apply_rule_list( + word, + [ + ('al', '', measure_gt_1), + ('ance', '', measure_gt_1), + ('ence', '', measure_gt_1), + ('er', '', measure_gt_1), + ('ic', '', measure_gt_1), + ('able', '', measure_gt_1), + ('ible', '', measure_gt_1), + ('ant', '', measure_gt_1), + ('ement', '', measure_gt_1), + ('ment', '', measure_gt_1), + ('ent', '', measure_gt_1), + # (m>1 and (*S or *T)) ION -> + ( + 'ion', + '', + lambda stem: self._measure(stem) > 1 and stem[-1] in ('s', 't'), + ), + ('ou', '', measure_gt_1), + ('ism', '', measure_gt_1), + ('ate', '', measure_gt_1), + ('iti', '', measure_gt_1), + ('ous', '', measure_gt_1), + ('ive', '', measure_gt_1), + ('ize', '', measure_gt_1), + ], + ) + + def _step5a(self, word): + """Implements Step 5a from "An algorithm for suffix stripping" + + From the paper: + + Step 5a + + (m>1) E -> probate -> probat + rate -> rate + (m=1 and not *o) E -> cease -> ceas + """ + # Note that Martin's test vocabulary and reference + # implementations are inconsistent in how they handle the case + # where two rules both refer to a suffix that matches the word + # to be stemmed, but only the condition of the second one is + # true. + # Earlier in step2b we had the rules: + # (m>0) EED -> EE + # (*v*) ED -> + # but the examples in the paper included "feed"->"feed", even + # though (*v*) is true for "fe" and therefore the second rule + # alone would map "feed"->"fe". + # However, in THIS case, we need to handle the consecutive rules + # differently and try both conditions (obviously; the second + # rule here would be redundant otherwise). Martin's paper makes + # no explicit mention of the inconsistency; you have to infer it + # from the examples. + # For this reason, we can't use _apply_rule_list here. + if word.endswith('e'): + stem = self._replace_suffix(word, 'e', '') + if self._measure(stem) > 1: + return stem + if self._measure(stem) == 1 and not self._ends_cvc(stem): + return stem + return word + + def _step5b(self, word): + """Implements Step 5a from "An algorithm for suffix stripping" + + From the paper: + + Step 5b + + (m > 1 and *d and *L) -> single letter + controll -> control + roll -> roll + """ + return self._apply_rule_list( + word, [('ll', 'l', lambda stem: self._measure(word[:-1]) > 1)] + ) + + def stem(self, word): + stem = word.lower() + + if self.mode == self.NLTK_EXTENSIONS and word in self.pool: + return self.pool[word] + + if self.mode != self.ORIGINAL_ALGORITHM and len(word) <= 2: + # With this line, strings of length 1 or 2 don't go through + # the stemming process, although no mention is made of this + # in the published algorithm. + return word + + stem = self._step1a(stem) + stem = self._step1b(stem) + stem = self._step1c(stem) + stem = self._step2(stem) + stem = self._step3(stem) + stem = self._step4(stem) + stem = self._step5a(stem) + stem = self._step5b(stem) + + return stem + + def __repr__(self): + return '' + + +def demo(): + """ + A demonstration of the porter stemmer on a sample from + the Penn Treebank corpus. + """ + + from nltk.corpus import treebank + from nltk import stem + + stemmer = stem.PorterStemmer() + + orig = [] + stemmed = [] + for item in treebank.fileids()[:3]: + for (word, tag) in treebank.tagged_words(item): + orig.append(word) + stemmed.append(stemmer.stem(word)) + + # Convert the results to a string, and word-wrap them. + results = ' '.join(stemmed) + results = re.sub(r"(.{,70})\s", r'\1\n', results + ' ').rstrip() + + # Convert the original to a string, and word wrap it. + original = ' '.join(orig) + original = re.sub(r"(.{,70})\s", r'\1\n', original + ' ').rstrip() + + # Print the results. + print('-Original-'.center(70).replace(' ', '*').replace('-', ' ')) + print(original) + print('-Results-'.center(70).replace(' ', '*').replace('-', ' ')) + print(results) + print('*' * 70) diff --git a/venv.bak/lib/python3.7/site-packages/nltk/stem/regexp.py b/venv.bak/lib/python3.7/site-packages/nltk/stem/regexp.py new file mode 100644 index 0000000..8f6ead5 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/stem/regexp.py @@ -0,0 +1,59 @@ +# Natural Language Toolkit: Stemmers +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Trevor Cohn +# Edward Loper +# Steven Bird +# URL: +# For license information, see LICENSE.TXT +from __future__ import unicode_literals +import re + +from nltk.stem.api import StemmerI +from nltk.compat import python_2_unicode_compatible + + +@python_2_unicode_compatible +class RegexpStemmer(StemmerI): + """ + A stemmer that uses regular expressions to identify morphological + affixes. Any substrings that match the regular expressions will + be removed. + + >>> from nltk.stem import RegexpStemmer + >>> st = RegexpStemmer('ing$|s$|e$|able$', min=4) + >>> st.stem('cars') + 'car' + >>> st.stem('mass') + 'mas' + >>> st.stem('was') + 'was' + >>> st.stem('bee') + 'bee' + >>> st.stem('compute') + 'comput' + >>> st.stem('advisable') + 'advis' + + :type regexp: str or regexp + :param regexp: The regular expression that should be used to + identify morphological affixes. + :type min: int + :param min: The minimum length of string to stem + """ + + def __init__(self, regexp, min=0): + + if not hasattr(regexp, 'pattern'): + regexp = re.compile(regexp) + self._regexp = regexp + self._min = min + + def stem(self, word): + if len(word) < self._min: + return word + else: + return self._regexp.sub('', word) + + def __repr__(self): + return ''.format(self._regexp.pattern) diff --git a/venv.bak/lib/python3.7/site-packages/nltk/stem/rslp.py b/venv.bak/lib/python3.7/site-packages/nltk/stem/rslp.py new file mode 100644 index 0000000..06184ee --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/stem/rslp.py @@ -0,0 +1,140 @@ +# -*- coding: utf-8 -*- + +# Natural Language Toolkit: RSLP Stemmer +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Tiago Tresoldi +# URL: +# For license information, see LICENSE.TXT + +# This code is based on the algorithm presented in the paper "A Stemming +# Algorithm for the Portuguese Language" by Viviane Moreira Orengo and +# Christian Huyck, which unfortunately I had no access to. The code is a +# Python version, with some minor modifications of mine, to the description +# presented at http://www.webcitation.org/5NnvdIzOb and to the C source code +# available at http://www.inf.ufrgs.br/~arcoelho/rslp/integrando_rslp.html. +# Please note that this stemmer is intended for demonstration and educational +# purposes only. Feel free to write me for any comments, including the +# development of a different and/or better stemmer for Portuguese. I also +# suggest using NLTK's mailing list for Portuguese for any discussion. + +# Este código é baseado no algoritmo apresentado no artigo "A Stemming +# Algorithm for the Portuguese Language" de Viviane Moreira Orengo e +# Christian Huyck, o qual infelizmente não tive a oportunidade de ler. O +# código é uma conversão para Python, com algumas pequenas modificações +# minhas, daquele apresentado em http://www.webcitation.org/5NnvdIzOb e do +# código para linguagem C disponível em +# http://www.inf.ufrgs.br/~arcoelho/rslp/integrando_rslp.html. Por favor, +# lembre-se de que este stemmer foi desenvolvido com finalidades unicamente +# de demonstração e didáticas. Sinta-se livre para me escrever para qualquer +# comentário, inclusive sobre o desenvolvimento de um stemmer diferente +# e/ou melhor para o português. Também sugiro utilizar-se a lista de discussão +# do NLTK para o português para qualquer debate. +from __future__ import print_function, unicode_literals +from nltk.data import load + +from nltk.stem.api import StemmerI + + +class RSLPStemmer(StemmerI): + """ + A stemmer for Portuguese. + + >>> from nltk.stem import RSLPStemmer + >>> st = RSLPStemmer() + >>> # opening lines of Erico Verissimo's "Música ao Longe" + >>> text = ''' + ... Clarissa risca com giz no quadro-negro a paisagem que os alunos + ... devem copiar . Uma casinha de porta e janela , em cima duma + ... coxilha .''' + >>> for token in text.split(): + ... print(st.stem(token)) + clariss risc com giz no quadro-negr a pais que os alun dev copi . + uma cas de port e janel , em cim dum coxilh . + """ + + def __init__(self): + self._model = [] + + self._model.append(self.read_rule("step0.pt")) + self._model.append(self.read_rule("step1.pt")) + self._model.append(self.read_rule("step2.pt")) + self._model.append(self.read_rule("step3.pt")) + self._model.append(self.read_rule("step4.pt")) + self._model.append(self.read_rule("step5.pt")) + self._model.append(self.read_rule("step6.pt")) + + def read_rule(self, filename): + rules = load('nltk:stemmers/rslp/' + filename, format='raw').decode("utf8") + lines = rules.split("\n") + + lines = [line for line in lines if line != ""] # remove blank lines + lines = [line for line in lines if line[0] != "#"] # remove comments + + # NOTE: a simple but ugly hack to make this parser happy with double '\t's + lines = [line.replace("\t\t", "\t") for line in lines] + + # parse rules + rules = [] + for line in lines: + rule = [] + tokens = line.split("\t") + + # text to be searched for at the end of the string + rule.append(tokens[0][1:-1]) # remove quotes + + # minimum stem size to perform the replacement + rule.append(int(tokens[1])) + + # text to be replaced into + rule.append(tokens[2][1:-1]) # remove quotes + + # exceptions to this rule + rule.append([token[1:-1] for token in tokens[3].split(",")]) + + # append to the results + rules.append(rule) + + return rules + + def stem(self, word): + word = word.lower() + + # the word ends in 's'? apply rule for plural reduction + if word[-1] == "s": + word = self.apply_rule(word, 0) + + # the word ends in 'a'? apply rule for feminine reduction + if word[-1] == "a": + word = self.apply_rule(word, 1) + + # augmentative reduction + word = self.apply_rule(word, 3) + + # adverb reduction + word = self.apply_rule(word, 2) + + # noun reduction + prev_word = word + word = self.apply_rule(word, 4) + if word == prev_word: + # verb reduction + prev_word = word + word = self.apply_rule(word, 5) + if word == prev_word: + # vowel removal + word = self.apply_rule(word, 6) + + return word + + def apply_rule(self, word, rule_index): + rules = self._model[rule_index] + for rule in rules: + suffix_length = len(rule[0]) + if word[-suffix_length:] == rule[0]: # if suffix matches + if len(word) >= suffix_length + rule[1]: # if we have minimum size + if word not in rule[3]: # if not an exception + word = word[:-suffix_length] + rule[2] + break + + return word diff --git a/venv.bak/lib/python3.7/site-packages/nltk/stem/snowball.py b/venv.bak/lib/python3.7/site-packages/nltk/stem/snowball.py new file mode 100644 index 0000000..645cf1d --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/stem/snowball.py @@ -0,0 +1,5946 @@ +# -*- coding: utf-8 -*- +# +# Natural Language Toolkit: Snowball Stemmer +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Peter Michael Stahl +# Peter Ljunglof (revisions) +# Lakhdar Benzahia (co-writer) +# Assem Chelli (reviewer arabicstemmer) +# Abdelkrim Aries (reviewer arabicstemmer) +# Algorithms: Dr Martin Porter +# Assem Chelli arabic stemming algorithm +# Benzahia Lakhdar +# URL: +# For license information, see LICENSE.TXT + +""" +Snowball stemmers + +This module provides a port of the Snowball stemmers +developed by Martin Porter. + +There is also a demo function: `snowball.demo()`. + +""" +from __future__ import unicode_literals, print_function + +import re +from six.moves import input + +from nltk import compat +from nltk.corpus import stopwords +from nltk.stem import porter +from nltk.stem.util import suffix_replace, prefix_replace + +from nltk.stem.api import StemmerI + + +class SnowballStemmer(StemmerI): + + """ + Snowball Stemmer + + The following languages are supported: + Arabic, Danish, Dutch, English, Finnish, French, German, + Hungarian, Italian, Norwegian, Portuguese, Romanian, Russian, + Spanish and Swedish. + + The algorithm for English is documented here: + + Porter, M. \"An algorithm for suffix stripping.\" + Program 14.3 (1980): 130-137. + + The algorithms have been developed by Martin Porter. + These stemmers are called Snowball, because Porter created + a programming language with this name for creating + new stemming algorithms. There is more information available + at http://snowball.tartarus.org/ + + The stemmer is invoked as shown below: + + >>> from nltk.stem import SnowballStemmer + >>> print(" ".join(SnowballStemmer.languages)) # See which languages are supported + arabic danish dutch english finnish french german hungarian + italian norwegian porter portuguese romanian russian + spanish swedish + >>> stemmer = SnowballStemmer("german") # Choose a language + >>> stemmer.stem("Autobahnen") # Stem a word + 'autobahn' + + Invoking the stemmers that way is useful if you do not know the + language to be stemmed at runtime. Alternatively, if you already know + the language, then you can invoke the language specific stemmer directly: + + >>> from nltk.stem.snowball import GermanStemmer + >>> stemmer = GermanStemmer() + >>> stemmer.stem("Autobahnen") + 'autobahn' + + :param language: The language whose subclass is instantiated. + :type language: str or unicode + :param ignore_stopwords: If set to True, stopwords are + not stemmed and returned unchanged. + Set to False by default. + :type ignore_stopwords: bool + :raise ValueError: If there is no stemmer for the specified + language, a ValueError is raised. + """ + + languages = ( + "arabic", + "danish", + "dutch", + "english", + "finnish", + "french", + "german", + "hungarian", + "italian", + "norwegian", + "porter", + "portuguese", + "romanian", + "russian", + "spanish", + "swedish", + ) + + def __init__(self, language, ignore_stopwords=False): + if language not in self.languages: + raise ValueError("The language '{0}' is not supported.".format(language)) + stemmerclass = globals()[language.capitalize() + "Stemmer"] + self.stemmer = stemmerclass(ignore_stopwords) + self.stem = self.stemmer.stem + self.stopwords = self.stemmer.stopwords + + def stem(self, token): + return self.stemmer.stem(self, token) + + +@compat.python_2_unicode_compatible +class _LanguageSpecificStemmer(StemmerI): + + """ + This helper subclass offers the possibility + to invoke a specific stemmer directly. + This is useful if you already know the language to be stemmed at runtime. + + Create an instance of the Snowball stemmer. + + :param ignore_stopwords: If set to True, stopwords are + not stemmed and returned unchanged. + Set to False by default. + :type ignore_stopwords: bool + """ + + def __init__(self, ignore_stopwords=False): + # The language is the name of the class, minus the final "Stemmer". + language = type(self).__name__.lower() + if language.endswith("stemmer"): + language = language[:-7] + + self.stopwords = set() + if ignore_stopwords: + try: + for word in stopwords.words(language): + self.stopwords.add(word) + except IOError: + raise ValueError( + "{!r} has no list of stopwords. Please set" + " 'ignore_stopwords' to 'False'.".format(self) + ) + + def __repr__(self): + """ + Print out the string representation of the respective class. + + """ + return "<{0}>".format(type(self).__name__) + + +class PorterStemmer(_LanguageSpecificStemmer, porter.PorterStemmer): + """ + A word stemmer based on the original Porter stemming algorithm. + + Porter, M. \"An algorithm for suffix stripping.\" + Program 14.3 (1980): 130-137. + + A few minor modifications have been made to Porter's basic + algorithm. See the source code of the module + nltk.stem.porter for more information. + + """ + + def __init__(self, ignore_stopwords=False): + _LanguageSpecificStemmer.__init__(self, ignore_stopwords) + porter.PorterStemmer.__init__(self) + + +class _ScandinavianStemmer(_LanguageSpecificStemmer): + + """ + This subclass encapsulates a method for defining the string region R1. + It is used by the Danish, Norwegian, and Swedish stemmer. + + """ + + def _r1_scandinavian(self, word, vowels): + """ + Return the region R1 that is used by the Scandinavian stemmers. + + R1 is the region after the first non-vowel following a vowel, + or is the null region at the end of the word if there is no + such non-vowel. But then R1 is adjusted so that the region + before it contains at least three letters. + + :param word: The word whose region R1 is determined. + :type word: str or unicode + :param vowels: The vowels of the respective language that are + used to determine the region R1. + :type vowels: unicode + :return: the region R1 for the respective word. + :rtype: unicode + :note: This helper method is invoked by the respective stem method of + the subclasses DanishStemmer, NorwegianStemmer, and + SwedishStemmer. It is not to be invoked directly! + + """ + r1 = "" + for i in range(1, len(word)): + if word[i] not in vowels and word[i - 1] in vowels: + if len(word[: i + 1]) < 3 and len(word[: i + 1]) > 0: + r1 = word[3:] + elif len(word[: i + 1]) >= 3: + r1 = word[i + 1 :] + else: + return word + break + + return r1 + + +class _StandardStemmer(_LanguageSpecificStemmer): + + """ + This subclass encapsulates two methods for defining the standard versions + of the string regions R1, R2, and RV. + + """ + + def _r1r2_standard(self, word, vowels): + """ + Return the standard interpretations of the string regions R1 and R2. + + R1 is the region after the first non-vowel following a vowel, + or is the null region at the end of the word if there is no + such non-vowel. + + R2 is the region after the first non-vowel following a vowel + in R1, or is the null region at the end of the word if there + is no such non-vowel. + + :param word: The word whose regions R1 and R2 are determined. + :type word: str or unicode + :param vowels: The vowels of the respective language that are + used to determine the regions R1 and R2. + :type vowels: unicode + :return: (r1,r2), the regions R1 and R2 for the respective word. + :rtype: tuple + :note: This helper method is invoked by the respective stem method of + the subclasses DutchStemmer, FinnishStemmer, + FrenchStemmer, GermanStemmer, ItalianStemmer, + PortugueseStemmer, RomanianStemmer, and SpanishStemmer. + It is not to be invoked directly! + :note: A detailed description of how to define R1 and R2 + can be found at http://snowball.tartarus.org/texts/r1r2.html + + """ + r1 = "" + r2 = "" + for i in range(1, len(word)): + if word[i] not in vowels and word[i - 1] in vowels: + r1 = word[i + 1 :] + break + + for i in range(1, len(r1)): + if r1[i] not in vowels and r1[i - 1] in vowels: + r2 = r1[i + 1 :] + break + + return (r1, r2) + + def _rv_standard(self, word, vowels): + """ + Return the standard interpretation of the string region RV. + + If the second letter is a consonant, RV is the region after the + next following vowel. If the first two letters are vowels, RV is + the region after the next following consonant. Otherwise, RV is + the region after the third letter. + + :param word: The word whose region RV is determined. + :type word: str or unicode + :param vowels: The vowels of the respective language that are + used to determine the region RV. + :type vowels: unicode + :return: the region RV for the respective word. + :rtype: unicode + :note: This helper method is invoked by the respective stem method of + the subclasses ItalianStemmer, PortugueseStemmer, + RomanianStemmer, and SpanishStemmer. It is not to be + invoked directly! + + """ + rv = "" + if len(word) >= 2: + if word[1] not in vowels: + for i in range(2, len(word)): + if word[i] in vowels: + rv = word[i + 1 :] + break + + elif word[0] in vowels and word[1] in vowels: + for i in range(2, len(word)): + if word[i] not in vowels: + rv = word[i + 1 :] + break + else: + rv = word[3:] + + return rv + + +class ArabicStemmer(_StandardStemmer): + """ + https://github.com/snowballstem/snowball/blob/master/algorithms/arabic/stem_Unicode.sbl (Original Algorithm) + The Snowball Arabic light Stemmer + Algorithm : Assem Chelli + Abdelkrim Aries + Lakhdar Benzahia + Nltk Version Author : Lakhdar Benzahia + """ + + # Normalize_pre stes + __vocalization = re.compile( + r'[\u064b-\u064c-\u064d-\u064e-\u064f-\u0650-\u0651-\u0652]' + ) # ً، ٌ، ٍ، َ، ُ، ِ، ّ، ْ + + __kasheeda = re.compile(r'[\u0640]') # ـ tatweel/kasheeda + + __arabic_punctuation_marks = re.compile(r'[\u060C-\u061B-\u061F]') # ؛ ، ؟ + + # Normalize_post + __last_hamzat = ('\u0623', '\u0625', '\u0622', '\u0624', '\u0626') # أ، إ، آ، ؤ، ئ + + # normalize other hamza's + __initial_hamzat = re.compile(r'^[\u0622\u0623\u0625]') # أ، إ، آ + + __waw_hamza = re.compile(r'[\u0624]') # ؤ + + __yeh_hamza = re.compile(r'[\u0626]') # ئ + + __alefat = re.compile(r'[\u0623\u0622\u0625]') # أ، إ، آ + + # Checks + __checks1 = ( + '\u0643\u0627\u0644', + '\u0628\u0627\u0644', # بال، كال + '\u0627\u0644', + '\u0644\u0644', # لل، ال + ) + + __checks2 = ('\u0629', '\u0627\u062a') # ة # female plural ات + + # Suffixes + __suffix_noun_step1a = ( + '\u064a', + '\u0643', + '\u0647', # ي، ك، ه + '\u0646\u0627', + '\u0643\u0645', + '\u0647\u0627', + '\u0647\u0646', + '\u0647\u0645', # نا، كم، ها، هن، هم + '\u0643\u0645\u0627', + '\u0647\u0645\u0627', # كما، هما + ) + + __suffix_noun_step1b = '\u0646' # ن + + __suffix_noun_step2a = ('\u0627', '\u064a', '\u0648') # ا، ي، و + + __suffix_noun_step2b = '\u0627\u062a' # ات + + __suffix_noun_step2c1 = '\u062a' # ت + + __suffix_noun_step2c2 = '\u0629' # ة + + __suffix_noun_step3 = '\u064a' # ي + + __suffix_verb_step1 = ( + '\u0647', + '\u0643', # ه، ك + '\u0646\u064a', + '\u0646\u0627', + '\u0647\u0627', + '\u0647\u0645', # ني، نا، ها، هم + '\u0647\u0646', + '\u0643\u0645', + '\u0643\u0646', # هن، كم، كن + '\u0647\u0645\u0627', + '\u0643\u0645\u0627', + '\u0643\u0645\u0648', # هما، كما، كمو + ) + + __suffix_verb_step2a = ( + '\u062a', + '\u0627', + '\u0646', + '\u064a', # ت، ا، ن، ي + '\u0646\u0627', + '\u062a\u0627', + '\u062a\u0646', # نا، تا، تن Past + '\u0627\u0646', + '\u0648\u0646', + '\u064a\u0646', # ان، هن، ين Present + '\u062a\u0645\u0627', # تما + ) + + __suffix_verb_step2b = ('\u0648\u0627', '\u062a\u0645') # وا، تم + + __suffix_verb_step2c = ('\u0648', '\u062a\u0645\u0648') # و # تمو + + __suffix_all_alef_maqsura = '\u0649' # ى + + # Prefixes + __prefix_step1 = ( + '\u0623', # أ + '\u0623\u0623', + '\u0623\u0622', + '\u0623\u0624', + '\u0623\u0627', + '\u0623\u0625', # أأ، أآ، أؤ، أا، أإ + ) + + __prefix_step2a = ('\u0641\u0627\u0644', '\u0648\u0627\u0644') # فال، وال + + __prefix_step2b = ('\u0641', '\u0648') # ف، و + + __prefix_step3a_noun = ( + '\u0627\u0644', + '\u0644\u0644', # لل، ال + '\u0643\u0627\u0644', + '\u0628\u0627\u0644', # بال، كال + ) + + __prefix_step3b_noun = ( + '\u0628', + '\u0643', + '\u0644', # ب، ك، ل + '\u0628\u0628', + '\u0643\u0643', # بب، كك + ) + + __prefix_step3_verb = ( + '\u0633\u064a', + '\u0633\u062a', + '\u0633\u0646', + '\u0633\u0623', + ) # سي، ست، سن، سأ + + __prefix_step4_verb = ( + '\u064a\u0633\u062a', + '\u0646\u0633\u062a', + '\u062a\u0633\u062a', + ) # يست، نست، تست + + # Suffixes added due to Conjugation Verbs + __conjugation_suffix_verb_1 = ('\u0647', '\u0643') # ه، ك + + __conjugation_suffix_verb_2 = ( + '\u0646\u064a', + '\u0646\u0627', + '\u0647\u0627', # ني، نا، ها + '\u0647\u0645', + '\u0647\u0646', + '\u0643\u0645', # هم، هن، كم + '\u0643\u0646', # كن + ) + __conjugation_suffix_verb_3 = ( + '\u0647\u0645\u0627', + '\u0643\u0645\u0627', + '\u0643\u0645\u0648', + ) # هما، كما، كمو + + __conjugation_suffix_verb_4 = ('\u0627', '\u0646', '\u064a') # ا، ن، ي + + __conjugation_suffix_verb_past = ( + '\u0646\u0627', + '\u062a\u0627', + '\u062a\u0646', + ) # نا، تا، تن + + __conjugation_suffix_verb_present = ( + '\u0627\u0646', + '\u0648\u0646', + '\u064a\u0646', + ) # ان، ون، ين + + # Suffixes added due to derivation Names + __conjugation_suffix_noun_1 = ('\u064a', '\u0643', '\u0647') # ي، ك، ه + + __conjugation_suffix_noun_2 = ( + '\u0646\u0627', + '\u0643\u0645', # نا، كم + '\u0647\u0627', + '\u0647\u0646', + '\u0647\u0645', # ها، هن، هم + ) + + __conjugation_suffix_noun_3 = ( + '\u0643\u0645\u0627', + '\u0647\u0645\u0627', + ) # كما، هما + + # Prefixes added due to derivation Names + __prefixes1 = ('\u0648\u0627', '\u0641\u0627') # فا، وا + + __articles_3len = ('\u0643\u0627\u0644', '\u0628\u0627\u0644') # بال كال + + __articles_2len = ('\u0627\u0644', '\u0644\u0644') # ال لل + + # Prepositions letters + __prepositions1 = ('\u0643', '\u0644') # ك، ل + __prepositions2 = ('\u0628\u0628', '\u0643\u0643') # بب، كك + + is_verb = True + is_noun = True + is_defined = False + + suffixes_verb_step1_success = False + suffix_verb_step2a_success = False + suffix_verb_step2b_success = False + suffix_noun_step2c2_success = False + suffix_noun_step1a_success = False + suffix_noun_step2a_success = False + suffix_noun_step2b_success = False + suffixe_noun_step1b_success = False + prefix_step2a_success = False + prefix_step3a_noun_success = False + prefix_step3b_noun_success = False + + def __normalize_pre(self, token): + """ + :param token: string + :return: normalized token type string + """ + # strip diacritics + token = self.__vocalization.sub('', token) + # strip kasheeda + token = self.__kasheeda.sub('', token) + # strip punctuation marks + token = self.__arabic_punctuation_marks.sub('', token) + return token + + def __normalize_post(self, token): + # normalize last hamza + for hamza in self.__last_hamzat: + if token.endswith(hamza): + token = suffix_replace(token, hamza, '\u0621') + break + # normalize other hamzat + token = self.__initial_hamzat.sub('\u0627', token) + token = self.__waw_hamza.sub('\u0648', token) + token = self.__yeh_hamza.sub('\u064a', token) + token = self.__alefat.sub('\u0627', token) + return token + + def __checks_1(self, token): + for prefix in self.__checks1: + if token.startswith(prefix): + if prefix in self.__articles_3len and len(token) > 4: + self.is_noun = True + self.is_verb = False + self.is_defined = True + break + + if prefix in self.__articles_2len and len(token) > 3: + self.is_noun = True + self.is_verb = False + self.is_defined = True + break + + def __checks_2(self, token): + for suffix in self.__checks2: + if token.endswith(suffix): + if suffix == '\u0629' and len(token) > 2: + self.is_noun = True + self.is_verb = False + break + + if suffix == '\u0627\u062a' and len(token) > 3: + self.is_noun = True + self.is_verb = False + break + + def __Suffix_Verb_Step1(self, token): + for suffix in self.__suffix_verb_step1: + if token.endswith(suffix): + if suffix in self.__conjugation_suffix_verb_1 and len(token) >= 4: + token = token[:-1] + self.suffixes_verb_step1_success = True + break + + if suffix in self.__conjugation_suffix_verb_2 and len(token) >= 5: + token = token[:-2] + self.suffixes_verb_step1_success = True + break + + if suffix in self.__conjugation_suffix_verb_3 and len(token) >= 6: + token = token[:-3] + self.suffixes_verb_step1_success = True + break + return token + + def __Suffix_Verb_Step2a(self, token): + for suffix in self.__suffix_verb_step2a: + if token.endswith(suffix) and len(token) > 3: + if suffix == '\u062a' and len(token) >= 4: + token = token[:-1] + self.suffix_verb_step2a_success = True + break + + if suffix in self.__conjugation_suffix_verb_4 and len(token) >= 4: + token = token[:-1] + self.suffix_verb_step2a_success = True + break + + if suffix in self.__conjugation_suffix_verb_past and len(token) >= 5: + token = token[:-2] # past + self.suffix_verb_step2a_success = True + break + + if suffix in self.__conjugation_suffix_verb_present and len(token) > 5: + token = token[:-2] # present + self.suffix_verb_step2a_success = True + break + + if suffix == '\u062a\u0645\u0627' and len(token) >= 6: + token = token[:-3] + self.suffix_verb_step2a_success = True + break + return token + + def __Suffix_Verb_Step2c(self, token): + for suffix in self.__suffix_verb_step2c: + if token.endswith(suffix): + if suffix == '\u062a\u0645\u0648' and len(token) >= 6: + token = token[:-3] + break + + if suffix == '\u0648' and len(token) >= 4: + token = token[:-1] + break + return token + + def __Suffix_Verb_Step2b(self, token): + for suffix in self.__suffix_verb_step2b: + if token.endswith(suffix) and len(token) >= 5: + token = token[:-2] + self.suffix_verb_step2b_success = True + break + return token + + def __Suffix_Noun_Step2c2(self, token): + for suffix in self.__suffix_noun_step2c2: + if token.endswith(suffix) and len(token) >= 3: + token = token[:-1] + self.suffix_noun_step2c2_success = True + break + return token + + def __Suffix_Noun_Step1a(self, token): + for suffix in self.__suffix_noun_step1a: + if token.endswith(suffix): + if suffix in self.__conjugation_suffix_noun_1 and len(token) >= 4: + token = token[:-1] + self.suffix_noun_step1a_success = True + break + + if suffix in self.__conjugation_suffix_noun_2 and len(token) >= 5: + token = token[:-2] + self.suffix_noun_step1a_success = True + break + + if suffix in self.__conjugation_suffix_noun_3 and len(token) >= 6: + token = token[:-3] + self.suffix_noun_step1a_success = True + break + return token + + def __Suffix_Noun_Step2a(self, token): + for suffix in self.__suffix_noun_step2a: + if token.endswith(suffix) and len(token) > 4: + token = token[:-1] + self.suffix_noun_step2a_success = True + break + return token + + def __Suffix_Noun_Step2b(self, token): + for suffix in self.__suffix_noun_step2b: + if token.endswith(suffix) and len(token) >= 5: + token = token[:-2] + self.suffix_noun_step2b_success = True + break + return token + + def __Suffix_Noun_Step2c1(self, token): + for suffix in self.__suffix_noun_step2c1: + if token.endswith(suffix) and len(token) >= 4: + token = token[:-1] + break + return token + + def __Suffix_Noun_Step1b(self, token): + for suffix in self.__suffix_noun_step1b: + if token.endswith(suffix) and len(token) > 5: + token = token[:-1] + self.suffixe_noun_step1b_success = True + break + return token + + def __Suffix_Noun_Step3(self, token): + for suffix in self.__suffix_noun_step3: + if token.endswith(suffix) and len(token) >= 3: + token = token[:-1] # ya' nisbiya + break + return token + + def __Suffix_All_alef_maqsura(self, token): + for suffix in self.__suffix_all_alef_maqsura: + if token.endswith(suffix): + token = suffix_replace(token, suffix, '\u064a') + return token + + def __Prefix_Step1(self, token): + for prefix in self.__prefix_step1: + if token.startswith(prefix) and len(token) > 3: + if prefix == '\u0623\u0623': + token = prefix_replace(token, prefix, '\u0623') + break + + elif prefix == '\u0623\u0622': + token = prefix_replace(token, prefix, '\u0622') + break + + elif prefix == '\u0623\u0624': + token = prefix_replace(token, prefix, '\u0624') + break + + elif prefix == '\u0623\u0627': + token = prefix_replace(token, prefix, '\u0627') + break + + elif prefix == '\u0623\u0625': + token = prefix_replace(token, prefix, '\u0625') + break + return token + + def __Prefix_Step2a(self, token): + for prefix in self.__prefix_step2a: + if token.startswith(prefix) and len(token) > 5: + token = token[len(prefix) :] + self.prefix_step2a_success = True + break + return token + + def __Prefix_Step2b(self, token): + for prefix in self.__prefix_step2b: + if token.startswith(prefix) and len(token) > 3: + if token[:2] not in self.__prefixes1: + token = token[len(prefix) :] + break + return token + + def __Prefix_Step3a_Noun(self, token): + for prefix in self.__prefix_step3a_noun: + if token.startswith(prefix): + if prefix in self.__articles_2len and len(token) > 4: + token = token[len(prefix) :] + self.prefix_step3a_noun_success = True + break + if prefix in self.__articles_3len and len(token) > 5: + token = token[len(prefix) :] + break + return token + + def __Prefix_Step3b_Noun(self, token): + for prefix in self.__prefix_step3b_noun: + if token.startswith(prefix): + if len(token) > 3: + if prefix == '\u0628': + token = token[len(prefix) :] + self.prefix_step3b_noun_success = True + break + + if prefix in self.__prepositions2: + token = prefix_replace(token, prefix, prefix[1]) + self.prefix_step3b_noun_success = True + break + + if prefix in self.__prepositions1 and len(token) > 4: + token = token[len(prefix) :] # BUG: cause confusion + self.prefix_step3b_noun_success = True + break + return token + + def __Prefix_Step3_Verb(self, token): + for prefix in self.__prefix_step3_verb: + if token.startswith(prefix) and len(token) > 4: + token = prefix_replace(token, prefix, prefix[1]) + break + return token + + def __Prefix_Step4_Verb(self, token): + for prefix in self.__prefix_step4_verb: + if token.startswith(prefix) and len(token) > 4: + token = prefix_replace(token, prefix, '\u0627\u0633\u062a') + self.is_verb = True + self.is_noun = False + break + return token + + def stem(self, word): + """ + Stem an Arabic word and return the stemmed form. + :param word: string + :return: string + """ + # set initial values + self.is_verb = True + self.is_noun = True + self.is_defined = False + + self.suffix_verb_step2a_success = False + self.suffix_verb_step2b_success = False + self.suffix_noun_step2c2_success = False + self.suffix_noun_step1a_success = False + self.suffix_noun_step2a_success = False + self.suffix_noun_step2b_success = False + self.suffixe_noun_step1b_success = False + self.prefix_step2a_success = False + self.prefix_step3a_noun_success = False + self.prefix_step3b_noun_success = False + + modified_word = word + # guess type and properties + # checks1 + self.__checks_1(modified_word) + # checks2 + self.__checks_2(modified_word) + # Pre_Normalization + modified_word = self.__normalize_pre(modified_word) + # Avoid stopwords + if modified_word in self.stopwords or len(modified_word) <= 2: + return modified_word + # Start stemming + if self.is_verb: + modified_word = self.__Suffix_Verb_Step1(modified_word) + if self.suffixes_verb_step1_success: + modified_word = self.__Suffix_Verb_Step2a(modified_word) + if not self.suffix_verb_step2a_success: + modified_word = self.__Suffix_Verb_Step2c(modified_word) + # or next TODO: How to deal with or next instruction + else: + modified_word = self.__Suffix_Verb_Step2b(modified_word) + if not self.suffix_verb_step2b_success: + modified_word = self.__Suffix_Verb_Step2a(modified_word) + if self.is_noun: + modified_word = self.__Suffix_Noun_Step2c2(modified_word) + if not self.suffix_noun_step2c2_success: + if not self.is_defined: + modified_word = self.__Suffix_Noun_Step1a(modified_word) + # if self.suffix_noun_step1a_success: + modified_word = self.__Suffix_Noun_Step2a(modified_word) + if not self.suffix_noun_step2a_success: + modified_word = self.__Suffix_Noun_Step2b(modified_word) + if ( + not self.suffix_noun_step2b_success + and not self.suffix_noun_step2a_success + ): + modified_word = self.__Suffix_Noun_Step2c1(modified_word) + # or next ? todo : how to deal with or next + else: + modified_word = self.__Suffix_Noun_Step1b(modified_word) + if self.suffixe_noun_step1b_success: + modified_word = self.__Suffix_Noun_Step2a(modified_word) + if not self.suffix_noun_step2a_success: + modified_word = self.__Suffix_Noun_Step2b(modified_word) + if ( + not self.suffix_noun_step2b_success + and not self.suffix_noun_step2a_success + ): + modified_word = self.__Suffix_Noun_Step2c1(modified_word) + else: + if not self.is_defined: + modified_word = self.__Suffix_Noun_Step2a(modified_word) + modified_word = self.__Suffix_Noun_Step2b(modified_word) + modified_word = self.__Suffix_Noun_Step3(modified_word) + if not self.is_noun and self.is_verb: + modified_word = self.__Suffix_All_alef_maqsura(modified_word) + + # prefixes + modified_word = self.__Prefix_Step1(modified_word) + modified_word = self.__Prefix_Step2a(modified_word) + if not self.prefix_step2a_success: + modified_word = self.__Prefix_Step2b(modified_word) + modified_word = self.__Prefix_Step3a_Noun(modified_word) + if not self.prefix_step3a_noun_success and self.is_noun: + modified_word = self.__Prefix_Step3b_Noun(modified_word) + else: + if not self.prefix_step3b_noun_success and self.is_verb: + modified_word = self.__Prefix_Step3_Verb(modified_word) + modified_word = self.__Prefix_Step4_Verb(modified_word) + + # post normalization stemming + modified_word = self.__normalize_post(modified_word) + stemmed_word = modified_word + return stemmed_word + + +class DanishStemmer(_ScandinavianStemmer): + + """ + The Danish Snowball stemmer. + + :cvar __vowels: The Danish vowels. + :type __vowels: unicode + :cvar __consonants: The Danish consonants. + :type __consonants: unicode + :cvar __double_consonants: The Danish double consonants. + :type __double_consonants: tuple + :cvar __s_ending: Letters that may directly appear before a word final 's'. + :type __s_ending: unicode + :cvar __step1_suffixes: Suffixes to be deleted in step 1 of the algorithm. + :type __step1_suffixes: tuple + :cvar __step2_suffixes: Suffixes to be deleted in step 2 of the algorithm. + :type __step2_suffixes: tuple + :cvar __step3_suffixes: Suffixes to be deleted in step 3 of the algorithm. + :type __step3_suffixes: tuple + :note: A detailed description of the Danish + stemming algorithm can be found under + http://snowball.tartarus.org/algorithms/danish/stemmer.html + + """ + + # The language's vowels and other important characters are defined. + __vowels = "aeiouy\xE6\xE5\xF8" + __consonants = "bcdfghjklmnpqrstvwxz" + __double_consonants = ( + "bb", + "cc", + "dd", + "ff", + "gg", + "hh", + "jj", + "kk", + "ll", + "mm", + "nn", + "pp", + "qq", + "rr", + "ss", + "tt", + "vv", + "ww", + "xx", + "zz", + ) + __s_ending = "abcdfghjklmnoprtvyz\xE5" + + # The different suffixes, divided into the algorithm's steps + # and organized by length, are listed in tuples. + __step1_suffixes = ( + "erendes", + "erende", + "hedens", + "ethed", + "erede", + "heden", + "heder", + "endes", + "ernes", + "erens", + "erets", + "ered", + "ende", + "erne", + "eren", + "erer", + "heds", + "enes", + "eres", + "eret", + "hed", + "ene", + "ere", + "ens", + "ers", + "ets", + "en", + "er", + "es", + "et", + "e", + "s", + ) + __step2_suffixes = ("gd", "dt", "gt", "kt") + __step3_suffixes = ("elig", "l\xF8st", "lig", "els", "ig") + + def stem(self, word): + """ + Stem a Danish word and return the stemmed form. + + :param word: The word that is stemmed. + :type word: str or unicode + :return: The stemmed form. + :rtype: unicode + + """ + # Every word is put into lower case for normalization. + word = word.lower() + + if word in self.stopwords: + return word + + # After this, the required regions are generated + # by the respective helper method. + r1 = self._r1_scandinavian(word, self.__vowels) + + # Then the actual stemming process starts. + # Every new step is explicitly indicated + # according to the descriptions on the Snowball website. + + # STEP 1 + for suffix in self.__step1_suffixes: + if r1.endswith(suffix): + if suffix == "s": + if word[-2] in self.__s_ending: + word = word[:-1] + r1 = r1[:-1] + else: + word = word[: -len(suffix)] + r1 = r1[: -len(suffix)] + break + + # STEP 2 + for suffix in self.__step2_suffixes: + if r1.endswith(suffix): + word = word[:-1] + r1 = r1[:-1] + break + + # STEP 3 + if r1.endswith("igst"): + word = word[:-2] + r1 = r1[:-2] + + for suffix in self.__step3_suffixes: + if r1.endswith(suffix): + if suffix == "l\xF8st": + word = word[:-1] + r1 = r1[:-1] + else: + word = word[: -len(suffix)] + r1 = r1[: -len(suffix)] + + if r1.endswith(self.__step2_suffixes): + word = word[:-1] + r1 = r1[:-1] + break + + # STEP 4: Undouble + for double_cons in self.__double_consonants: + if word.endswith(double_cons) and len(word) > 3: + word = word[:-1] + break + + return word + + +class DutchStemmer(_StandardStemmer): + + """ + The Dutch Snowball stemmer. + + :cvar __vowels: The Dutch vowels. + :type __vowels: unicode + :cvar __step1_suffixes: Suffixes to be deleted in step 1 of the algorithm. + :type __step1_suffixes: tuple + :cvar __step3b_suffixes: Suffixes to be deleted in step 3b of the algorithm. + :type __step3b_suffixes: tuple + :note: A detailed description of the Dutch + stemming algorithm can be found under + http://snowball.tartarus.org/algorithms/dutch/stemmer.html + + """ + + __vowels = "aeiouy\xE8" + __step1_suffixes = ("heden", "ene", "en", "se", "s") + __step3b_suffixes = ("baar", "lijk", "bar", "end", "ing", "ig") + + def stem(self, word): + """ + Stem a Dutch word and return the stemmed form. + + :param word: The word that is stemmed. + :type word: str or unicode + :return: The stemmed form. + :rtype: unicode + + """ + word = word.lower() + + if word in self.stopwords: + return word + + step2_success = False + + # Vowel accents are removed. + word = ( + word.replace("\xE4", "a") + .replace("\xE1", "a") + .replace("\xEB", "e") + .replace("\xE9", "e") + .replace("\xED", "i") + .replace("\xEF", "i") + .replace("\xF6", "o") + .replace("\xF3", "o") + .replace("\xFC", "u") + .replace("\xFA", "u") + ) + + # An initial 'y', a 'y' after a vowel, + # and an 'i' between self.__vowels is put into upper case. + # As from now these are treated as consonants. + if word.startswith("y"): + word = "".join(("Y", word[1:])) + + for i in range(1, len(word)): + if word[i - 1] in self.__vowels and word[i] == "y": + word = "".join((word[:i], "Y", word[i + 1 :])) + + for i in range(1, len(word) - 1): + if ( + word[i - 1] in self.__vowels + and word[i] == "i" + and word[i + 1] in self.__vowels + ): + word = "".join((word[:i], "I", word[i + 1 :])) + + r1, r2 = self._r1r2_standard(word, self.__vowels) + + # R1 is adjusted so that the region before it + # contains at least 3 letters. + for i in range(1, len(word)): + if word[i] not in self.__vowels and word[i - 1] in self.__vowels: + if len(word[: i + 1]) < 3 and len(word[: i + 1]) > 0: + r1 = word[3:] + elif len(word[: i + 1]) == 0: + return word + break + + # STEP 1 + for suffix in self.__step1_suffixes: + if r1.endswith(suffix): + if suffix == "heden": + word = suffix_replace(word, suffix, "heid") + r1 = suffix_replace(r1, suffix, "heid") + if r2.endswith("heden"): + r2 = suffix_replace(r2, suffix, "heid") + + elif ( + suffix in ("ene", "en") + and not word.endswith("heden") + and word[-len(suffix) - 1] not in self.__vowels + and word[-len(suffix) - 3 : -len(suffix)] != "gem" + ): + word = word[: -len(suffix)] + r1 = r1[: -len(suffix)] + r2 = r2[: -len(suffix)] + if word.endswith(("kk", "dd", "tt")): + word = word[:-1] + r1 = r1[:-1] + r2 = r2[:-1] + + elif ( + suffix in ("se", "s") + and word[-len(suffix) - 1] not in self.__vowels + and word[-len(suffix) - 1] != "j" + ): + word = word[: -len(suffix)] + r1 = r1[: -len(suffix)] + r2 = r2[: -len(suffix)] + break + + # STEP 2 + if r1.endswith("e") and word[-2] not in self.__vowels: + step2_success = True + word = word[:-1] + r1 = r1[:-1] + r2 = r2[:-1] + + if word.endswith(("kk", "dd", "tt")): + word = word[:-1] + r1 = r1[:-1] + r2 = r2[:-1] + + # STEP 3a + if r2.endswith("heid") and word[-5] != "c": + word = word[:-4] + r1 = r1[:-4] + r2 = r2[:-4] + + if ( + r1.endswith("en") + and word[-3] not in self.__vowels + and word[-5:-2] != "gem" + ): + word = word[:-2] + r1 = r1[:-2] + r2 = r2[:-2] + + if word.endswith(("kk", "dd", "tt")): + word = word[:-1] + r1 = r1[:-1] + r2 = r2[:-1] + + # STEP 3b: Derivational suffixes + for suffix in self.__step3b_suffixes: + if r2.endswith(suffix): + if suffix in ("end", "ing"): + word = word[:-3] + r2 = r2[:-3] + + if r2.endswith("ig") and word[-3] != "e": + word = word[:-2] + else: + if word.endswith(("kk", "dd", "tt")): + word = word[:-1] + + elif suffix == "ig" and word[-3] != "e": + word = word[:-2] + + elif suffix == "lijk": + word = word[:-4] + r1 = r1[:-4] + + if r1.endswith("e") and word[-2] not in self.__vowels: + word = word[:-1] + if word.endswith(("kk", "dd", "tt")): + word = word[:-1] + + elif suffix == "baar": + word = word[:-4] + + elif suffix == "bar" and step2_success: + word = word[:-3] + break + + # STEP 4: Undouble vowel + if len(word) >= 4: + if word[-1] not in self.__vowels and word[-1] != "I": + if word[-3:-1] in ("aa", "ee", "oo", "uu"): + if word[-4] not in self.__vowels: + word = "".join((word[:-3], word[-3], word[-1])) + + # All occurrences of 'I' and 'Y' are put back into lower case. + word = word.replace("I", "i").replace("Y", "y") + + return word + + +class EnglishStemmer(_StandardStemmer): + + """ + The English Snowball stemmer. + + :cvar __vowels: The English vowels. + :type __vowels: unicode + :cvar __double_consonants: The English double consonants. + :type __double_consonants: tuple + :cvar __li_ending: Letters that may directly appear before a word final 'li'. + :type __li_ending: unicode + :cvar __step0_suffixes: Suffixes to be deleted in step 0 of the algorithm. + :type __step0_suffixes: tuple + :cvar __step1a_suffixes: Suffixes to be deleted in step 1a of the algorithm. + :type __step1a_suffixes: tuple + :cvar __step1b_suffixes: Suffixes to be deleted in step 1b of the algorithm. + :type __step1b_suffixes: tuple + :cvar __step2_suffixes: Suffixes to be deleted in step 2 of the algorithm. + :type __step2_suffixes: tuple + :cvar __step3_suffixes: Suffixes to be deleted in step 3 of the algorithm. + :type __step3_suffixes: tuple + :cvar __step4_suffixes: Suffixes to be deleted in step 4 of the algorithm. + :type __step4_suffixes: tuple + :cvar __step5_suffixes: Suffixes to be deleted in step 5 of the algorithm. + :type __step5_suffixes: tuple + :cvar __special_words: A dictionary containing words + which have to be stemmed specially. + :type __special_words: dict + :note: A detailed description of the English + stemming algorithm can be found under + http://snowball.tartarus.org/algorithms/english/stemmer.html + """ + + __vowels = "aeiouy" + __double_consonants = ("bb", "dd", "ff", "gg", "mm", "nn", "pp", "rr", "tt") + __li_ending = "cdeghkmnrt" + __step0_suffixes = ("'s'", "'s", "'") + __step1a_suffixes = ("sses", "ied", "ies", "us", "ss", "s") + __step1b_suffixes = ("eedly", "ingly", "edly", "eed", "ing", "ed") + __step2_suffixes = ( + 'ization', + 'ational', + 'fulness', + 'ousness', + 'iveness', + 'tional', + 'biliti', + 'lessli', + 'entli', + 'ation', + 'alism', + 'aliti', + 'ousli', + 'iviti', + 'fulli', + 'enci', + 'anci', + 'abli', + 'izer', + 'ator', + 'alli', + 'bli', + 'ogi', + 'li', + ) + __step3_suffixes = ( + 'ational', + 'tional', + 'alize', + 'icate', + 'iciti', + 'ative', + 'ical', + 'ness', + 'ful', + ) + __step4_suffixes = ( + 'ement', + 'ance', + 'ence', + 'able', + 'ible', + 'ment', + 'ant', + 'ent', + 'ism', + 'ate', + 'iti', + 'ous', + 'ive', + 'ize', + 'ion', + 'al', + 'er', + 'ic', + ) + __step5_suffixes = ("e", "l") + __special_words = { + "skis": "ski", + "skies": "sky", + "dying": "die", + "lying": "lie", + "tying": "tie", + "idly": "idl", + "gently": "gentl", + "ugly": "ugli", + "early": "earli", + "only": "onli", + "singly": "singl", + "sky": "sky", + "news": "news", + "howe": "howe", + "atlas": "atlas", + "cosmos": "cosmos", + "bias": "bias", + "andes": "andes", + "inning": "inning", + "innings": "inning", + "outing": "outing", + "outings": "outing", + "canning": "canning", + "cannings": "canning", + "herring": "herring", + "herrings": "herring", + "earring": "earring", + "earrings": "earring", + "proceed": "proceed", + "proceeds": "proceed", + "proceeded": "proceed", + "proceeding": "proceed", + "exceed": "exceed", + "exceeds": "exceed", + "exceeded": "exceed", + "exceeding": "exceed", + "succeed": "succeed", + "succeeds": "succeed", + "succeeded": "succeed", + "succeeding": "succeed", + } + + def stem(self, word): + + """ + Stem an English word and return the stemmed form. + + :param word: The word that is stemmed. + :type word: str or unicode + :return: The stemmed form. + :rtype: unicode + + """ + word = word.lower() + + if word in self.stopwords or len(word) <= 2: + return word + + elif word in self.__special_words: + return self.__special_words[word] + + # Map the different apostrophe characters to a single consistent one + word = ( + word.replace("\u2019", "\x27") + .replace("\u2018", "\x27") + .replace("\u201B", "\x27") + ) + + if word.startswith("\x27"): + word = word[1:] + + if word.startswith("y"): + word = "".join(("Y", word[1:])) + + for i in range(1, len(word)): + if word[i - 1] in self.__vowels and word[i] == "y": + word = "".join((word[:i], "Y", word[i + 1 :])) + + step1a_vowel_found = False + step1b_vowel_found = False + + r1 = "" + r2 = "" + + if word.startswith(("gener", "commun", "arsen")): + if word.startswith(("gener", "arsen")): + r1 = word[5:] + else: + r1 = word[6:] + + for i in range(1, len(r1)): + if r1[i] not in self.__vowels and r1[i - 1] in self.__vowels: + r2 = r1[i + 1 :] + break + else: + r1, r2 = self._r1r2_standard(word, self.__vowels) + + # STEP 0 + for suffix in self.__step0_suffixes: + if word.endswith(suffix): + word = word[: -len(suffix)] + r1 = r1[: -len(suffix)] + r2 = r2[: -len(suffix)] + break + + # STEP 1a + for suffix in self.__step1a_suffixes: + if word.endswith(suffix): + + if suffix == "sses": + word = word[:-2] + r1 = r1[:-2] + r2 = r2[:-2] + + elif suffix in ("ied", "ies"): + if len(word[: -len(suffix)]) > 1: + word = word[:-2] + r1 = r1[:-2] + r2 = r2[:-2] + else: + word = word[:-1] + r1 = r1[:-1] + r2 = r2[:-1] + + elif suffix == "s": + for letter in word[:-2]: + if letter in self.__vowels: + step1a_vowel_found = True + break + + if step1a_vowel_found: + word = word[:-1] + r1 = r1[:-1] + r2 = r2[:-1] + break + + # STEP 1b + for suffix in self.__step1b_suffixes: + if word.endswith(suffix): + if suffix in ("eed", "eedly"): + + if r1.endswith(suffix): + word = suffix_replace(word, suffix, "ee") + + if len(r1) >= len(suffix): + r1 = suffix_replace(r1, suffix, "ee") + else: + r1 = "" + + if len(r2) >= len(suffix): + r2 = suffix_replace(r2, suffix, "ee") + else: + r2 = "" + else: + for letter in word[: -len(suffix)]: + if letter in self.__vowels: + step1b_vowel_found = True + break + + if step1b_vowel_found: + word = word[: -len(suffix)] + r1 = r1[: -len(suffix)] + r2 = r2[: -len(suffix)] + + if word.endswith(("at", "bl", "iz")): + word = "".join((word, "e")) + r1 = "".join((r1, "e")) + + if len(word) > 5 or len(r1) >= 3: + r2 = "".join((r2, "e")) + + elif word.endswith(self.__double_consonants): + word = word[:-1] + r1 = r1[:-1] + r2 = r2[:-1] + + elif ( + r1 == "" + and len(word) >= 3 + and word[-1] not in self.__vowels + and word[-1] not in "wxY" + and word[-2] in self.__vowels + and word[-3] not in self.__vowels + ) or ( + r1 == "" + and len(word) == 2 + and word[0] in self.__vowels + and word[1] not in self.__vowels + ): + + word = "".join((word, "e")) + + if len(r1) > 0: + r1 = "".join((r1, "e")) + + if len(r2) > 0: + r2 = "".join((r2, "e")) + break + + # STEP 1c + if len(word) > 2 and word[-1] in "yY" and word[-2] not in self.__vowels: + word = "".join((word[:-1], "i")) + if len(r1) >= 1: + r1 = "".join((r1[:-1], "i")) + else: + r1 = "" + + if len(r2) >= 1: + r2 = "".join((r2[:-1], "i")) + else: + r2 = "" + + # STEP 2 + for suffix in self.__step2_suffixes: + if word.endswith(suffix): + if r1.endswith(suffix): + if suffix == "tional": + word = word[:-2] + r1 = r1[:-2] + r2 = r2[:-2] + + elif suffix in ("enci", "anci", "abli"): + word = "".join((word[:-1], "e")) + + if len(r1) >= 1: + r1 = "".join((r1[:-1], "e")) + else: + r1 = "" + + if len(r2) >= 1: + r2 = "".join((r2[:-1], "e")) + else: + r2 = "" + + elif suffix == "entli": + word = word[:-2] + r1 = r1[:-2] + r2 = r2[:-2] + + elif suffix in ("izer", "ization"): + word = suffix_replace(word, suffix, "ize") + + if len(r1) >= len(suffix): + r1 = suffix_replace(r1, suffix, "ize") + else: + r1 = "" + + if len(r2) >= len(suffix): + r2 = suffix_replace(r2, suffix, "ize") + else: + r2 = "" + + elif suffix in ("ational", "ation", "ator"): + word = suffix_replace(word, suffix, "ate") + + if len(r1) >= len(suffix): + r1 = suffix_replace(r1, suffix, "ate") + else: + r1 = "" + + if len(r2) >= len(suffix): + r2 = suffix_replace(r2, suffix, "ate") + else: + r2 = "e" + + elif suffix in ("alism", "aliti", "alli"): + word = suffix_replace(word, suffix, "al") + + if len(r1) >= len(suffix): + r1 = suffix_replace(r1, suffix, "al") + else: + r1 = "" + + if len(r2) >= len(suffix): + r2 = suffix_replace(r2, suffix, "al") + else: + r2 = "" + + elif suffix == "fulness": + word = word[:-4] + r1 = r1[:-4] + r2 = r2[:-4] + + elif suffix in ("ousli", "ousness"): + word = suffix_replace(word, suffix, "ous") + + if len(r1) >= len(suffix): + r1 = suffix_replace(r1, suffix, "ous") + else: + r1 = "" + + if len(r2) >= len(suffix): + r2 = suffix_replace(r2, suffix, "ous") + else: + r2 = "" + + elif suffix in ("iveness", "iviti"): + word = suffix_replace(word, suffix, "ive") + + if len(r1) >= len(suffix): + r1 = suffix_replace(r1, suffix, "ive") + else: + r1 = "" + + if len(r2) >= len(suffix): + r2 = suffix_replace(r2, suffix, "ive") + else: + r2 = "e" + + elif suffix in ("biliti", "bli"): + word = suffix_replace(word, suffix, "ble") + + if len(r1) >= len(suffix): + r1 = suffix_replace(r1, suffix, "ble") + else: + r1 = "" + + if len(r2) >= len(suffix): + r2 = suffix_replace(r2, suffix, "ble") + else: + r2 = "" + + elif suffix == "ogi" and word[-4] == "l": + word = word[:-1] + r1 = r1[:-1] + r2 = r2[:-1] + + elif suffix in ("fulli", "lessli"): + word = word[:-2] + r1 = r1[:-2] + r2 = r2[:-2] + + elif suffix == "li" and word[-3] in self.__li_ending: + word = word[:-2] + r1 = r1[:-2] + r2 = r2[:-2] + break + + # STEP 3 + for suffix in self.__step3_suffixes: + if word.endswith(suffix): + if r1.endswith(suffix): + if suffix == "tional": + word = word[:-2] + r1 = r1[:-2] + r2 = r2[:-2] + + elif suffix == "ational": + word = suffix_replace(word, suffix, "ate") + + if len(r1) >= len(suffix): + r1 = suffix_replace(r1, suffix, "ate") + else: + r1 = "" + + if len(r2) >= len(suffix): + r2 = suffix_replace(r2, suffix, "ate") + else: + r2 = "" + + elif suffix == "alize": + word = word[:-3] + r1 = r1[:-3] + r2 = r2[:-3] + + elif suffix in ("icate", "iciti", "ical"): + word = suffix_replace(word, suffix, "ic") + + if len(r1) >= len(suffix): + r1 = suffix_replace(r1, suffix, "ic") + else: + r1 = "" + + if len(r2) >= len(suffix): + r2 = suffix_replace(r2, suffix, "ic") + else: + r2 = "" + + elif suffix in ("ful", "ness"): + word = word[: -len(suffix)] + r1 = r1[: -len(suffix)] + r2 = r2[: -len(suffix)] + + elif suffix == "ative" and r2.endswith(suffix): + word = word[:-5] + r1 = r1[:-5] + r2 = r2[:-5] + break + + # STEP 4 + for suffix in self.__step4_suffixes: + if word.endswith(suffix): + if r2.endswith(suffix): + if suffix == "ion": + if word[-4] in "st": + word = word[:-3] + r1 = r1[:-3] + r2 = r2[:-3] + else: + word = word[: -len(suffix)] + r1 = r1[: -len(suffix)] + r2 = r2[: -len(suffix)] + break + + # STEP 5 + if r2.endswith("l") and word[-2] == "l": + word = word[:-1] + elif r2.endswith("e"): + word = word[:-1] + elif r1.endswith("e"): + if len(word) >= 4 and ( + word[-2] in self.__vowels + or word[-2] in "wxY" + or word[-3] not in self.__vowels + or word[-4] in self.__vowels + ): + word = word[:-1] + + word = word.replace("Y", "y") + + return word + + +class FinnishStemmer(_StandardStemmer): + + """ + The Finnish Snowball stemmer. + + :cvar __vowels: The Finnish vowels. + :type __vowels: unicode + :cvar __restricted_vowels: A subset of the Finnish vowels. + :type __restricted_vowels: unicode + :cvar __long_vowels: The Finnish vowels in their long forms. + :type __long_vowels: tuple + :cvar __consonants: The Finnish consonants. + :type __consonants: unicode + :cvar __double_consonants: The Finnish double consonants. + :type __double_consonants: tuple + :cvar __step1_suffixes: Suffixes to be deleted in step 1 of the algorithm. + :type __step1_suffixes: tuple + :cvar __step2_suffixes: Suffixes to be deleted in step 2 of the algorithm. + :type __step2_suffixes: tuple + :cvar __step3_suffixes: Suffixes to be deleted in step 3 of the algorithm. + :type __step3_suffixes: tuple + :cvar __step4_suffixes: Suffixes to be deleted in step 4 of the algorithm. + :type __step4_suffixes: tuple + :note: A detailed description of the Finnish + stemming algorithm can be found under + http://snowball.tartarus.org/algorithms/finnish/stemmer.html + """ + + __vowels = "aeiouy\xE4\xF6" + __restricted_vowels = "aeiou\xE4\xF6" + __long_vowels = ("aa", "ee", "ii", "oo", "uu", "\xE4\xE4", "\xF6\xF6") + __consonants = "bcdfghjklmnpqrstvwxz" + __double_consonants = ( + "bb", + "cc", + "dd", + "ff", + "gg", + "hh", + "jj", + "kk", + "ll", + "mm", + "nn", + "pp", + "qq", + "rr", + "ss", + "tt", + "vv", + "ww", + "xx", + "zz", + ) + __step1_suffixes = ( + 'kaan', + 'k\xE4\xE4n', + 'sti', + 'kin', + 'han', + 'h\xE4n', + 'ko', + 'k\xF6', + 'pa', + 'p\xE4', + ) + __step2_suffixes = ('nsa', 'ns\xE4', 'mme', 'nne', 'si', 'ni', 'an', '\xE4n', 'en') + __step3_suffixes = ( + 'siin', + 'tten', + 'seen', + 'han', + 'hen', + 'hin', + 'hon', + 'h\xE4n', + 'h\xF6n', + 'den', + 'tta', + 'tt\xE4', + 'ssa', + 'ss\xE4', + 'sta', + 'st\xE4', + 'lla', + 'll\xE4', + 'lta', + 'lt\xE4', + 'lle', + 'ksi', + 'ine', + 'ta', + 't\xE4', + 'na', + 'n\xE4', + 'a', + '\xE4', + 'n', + ) + __step4_suffixes = ( + 'impi', + 'impa', + 'imp\xE4', + 'immi', + 'imma', + 'imm\xE4', + 'mpi', + 'mpa', + 'mp\xE4', + 'mmi', + 'mma', + 'mm\xE4', + 'eja', + 'ej\xE4', + ) + + def stem(self, word): + """ + Stem a Finnish word and return the stemmed form. + + :param word: The word that is stemmed. + :type word: str or unicode + :return: The stemmed form. + :rtype: unicode + + """ + word = word.lower() + + if word in self.stopwords: + return word + + step3_success = False + + r1, r2 = self._r1r2_standard(word, self.__vowels) + + # STEP 1: Particles etc. + for suffix in self.__step1_suffixes: + if r1.endswith(suffix): + if suffix == "sti": + if suffix in r2: + word = word[:-3] + r1 = r1[:-3] + r2 = r2[:-3] + else: + if word[-len(suffix) - 1] in "ntaeiouy\xE4\xF6": + word = word[: -len(suffix)] + r1 = r1[: -len(suffix)] + r2 = r2[: -len(suffix)] + break + + # STEP 2: Possessives + for suffix in self.__step2_suffixes: + if r1.endswith(suffix): + if suffix == "si": + if word[-3] != "k": + word = word[:-2] + r1 = r1[:-2] + r2 = r2[:-2] + + elif suffix == "ni": + word = word[:-2] + r1 = r1[:-2] + r2 = r2[:-2] + if word.endswith("kse"): + word = suffix_replace(word, "kse", "ksi") + + if r1.endswith("kse"): + r1 = suffix_replace(r1, "kse", "ksi") + + if r2.endswith("kse"): + r2 = suffix_replace(r2, "kse", "ksi") + + elif suffix == "an": + if word[-4:-2] in ("ta", "na") or word[-5:-2] in ( + "ssa", + "sta", + "lla", + "lta", + ): + word = word[:-2] + r1 = r1[:-2] + r2 = r2[:-2] + + elif suffix == "\xE4n": + if word[-4:-2] in ("t\xE4", "n\xE4") or word[-5:-2] in ( + "ss\xE4", + "st\xE4", + "ll\xE4", + "lt\xE4", + ): + word = word[:-2] + r1 = r1[:-2] + r2 = r2[:-2] + + elif suffix == "en": + if word[-5:-2] in ("lle", "ine"): + word = word[:-2] + r1 = r1[:-2] + r2 = r2[:-2] + else: + word = word[:-3] + r1 = r1[:-3] + r2 = r2[:-3] + break + + # STEP 3: Cases + for suffix in self.__step3_suffixes: + if r1.endswith(suffix): + if suffix in ("han", "hen", "hin", "hon", "h\xE4n", "h\xF6n"): + if ( + (suffix == "han" and word[-4] == "a") + or (suffix == "hen" and word[-4] == "e") + or (suffix == "hin" and word[-4] == "i") + or (suffix == "hon" and word[-4] == "o") + or (suffix == "h\xE4n" and word[-4] == "\xE4") + or (suffix == "h\xF6n" and word[-4] == "\xF6") + ): + word = word[:-3] + r1 = r1[:-3] + r2 = r2[:-3] + step3_success = True + + elif suffix in ("siin", "den", "tten"): + if ( + word[-len(suffix) - 1] == "i" + and word[-len(suffix) - 2] in self.__restricted_vowels + ): + word = word[: -len(suffix)] + r1 = r1[: -len(suffix)] + r2 = r2[: -len(suffix)] + step3_success = True + else: + continue + + elif suffix == "seen": + if word[-6:-4] in self.__long_vowels: + word = word[:-4] + r1 = r1[:-4] + r2 = r2[:-4] + step3_success = True + else: + continue + + elif suffix in ("a", "\xE4"): + if word[-2] in self.__vowels and word[-3] in self.__consonants: + word = word[:-1] + r1 = r1[:-1] + r2 = r2[:-1] + step3_success = True + + elif suffix in ("tta", "tt\xE4"): + if word[-4] == "e": + word = word[:-3] + r1 = r1[:-3] + r2 = r2[:-3] + step3_success = True + + elif suffix == "n": + word = word[:-1] + r1 = r1[:-1] + r2 = r2[:-1] + step3_success = True + + if word[-2:] == "ie" or word[-2:] in self.__long_vowels: + word = word[:-1] + r1 = r1[:-1] + r2 = r2[:-1] + else: + word = word[: -len(suffix)] + r1 = r1[: -len(suffix)] + r2 = r2[: -len(suffix)] + step3_success = True + break + + # STEP 4: Other endings + for suffix in self.__step4_suffixes: + if r2.endswith(suffix): + if suffix in ("mpi", "mpa", "mp\xE4", "mmi", "mma", "mm\xE4"): + if word[-5:-3] != "po": + word = word[:-3] + r1 = r1[:-3] + r2 = r2[:-3] + else: + word = word[: -len(suffix)] + r1 = r1[: -len(suffix)] + r2 = r2[: -len(suffix)] + break + + # STEP 5: Plurals + if step3_success and len(r1) >= 1 and r1[-1] in "ij": + word = word[:-1] + r1 = r1[:-1] + + elif ( + not step3_success + and len(r1) >= 2 + and r1[-1] == "t" + and r1[-2] in self.__vowels + ): + word = word[:-1] + r1 = r1[:-1] + r2 = r2[:-1] + if r2.endswith("imma"): + word = word[:-4] + r1 = r1[:-4] + elif r2.endswith("mma") and r2[-5:-3] != "po": + word = word[:-3] + r1 = r1[:-3] + + # STEP 6: Tidying up + if r1[-2:] in self.__long_vowels: + word = word[:-1] + r1 = r1[:-1] + + if len(r1) >= 2 and r1[-2] in self.__consonants and r1[-1] in "a\xE4ei": + word = word[:-1] + r1 = r1[:-1] + + if r1.endswith(("oj", "uj")): + word = word[:-1] + r1 = r1[:-1] + + if r1.endswith("jo"): + word = word[:-1] + r1 = r1[:-1] + + # If the word ends with a double consonant + # followed by zero or more vowels, the last consonant is removed. + for i in range(1, len(word)): + if word[-i] in self.__vowels: + continue + else: + if i == 1: + if word[-i - 1 :] in self.__double_consonants: + word = word[:-1] + else: + if word[-i - 1 : -i + 1] in self.__double_consonants: + word = "".join((word[:-i], word[-i + 1 :])) + break + + return word + + +class FrenchStemmer(_StandardStemmer): + + """ + The French Snowball stemmer. + + :cvar __vowels: The French vowels. + :type __vowels: unicode + :cvar __step1_suffixes: Suffixes to be deleted in step 1 of the algorithm. + :type __step1_suffixes: tuple + :cvar __step2a_suffixes: Suffixes to be deleted in step 2a of the algorithm. + :type __step2a_suffixes: tuple + :cvar __step2b_suffixes: Suffixes to be deleted in step 2b of the algorithm. + :type __step2b_suffixes: tuple + :cvar __step4_suffixes: Suffixes to be deleted in step 4 of the algorithm. + :type __step4_suffixes: tuple + :note: A detailed description of the French + stemming algorithm can be found under + http://snowball.tartarus.org/algorithms/french/stemmer.html + """ + + __vowels = "aeiouy\xE2\xE0\xEB\xE9\xEA\xE8\xEF\xEE\xF4\xFB\xF9" + __step1_suffixes = ( + 'issements', + 'issement', + 'atrices', + 'atrice', + 'ateurs', + 'ations', + 'logies', + 'usions', + 'utions', + 'ements', + 'amment', + 'emment', + 'ances', + 'iqUes', + 'ismes', + 'ables', + 'istes', + 'ateur', + 'ation', + 'logie', + 'usion', + 'ution', + 'ences', + 'ement', + 'euses', + 'ments', + 'ance', + 'iqUe', + 'isme', + 'able', + 'iste', + 'ence', + 'it\xE9s', + 'ives', + 'eaux', + 'euse', + 'ment', + 'eux', + 'it\xE9', + 'ive', + 'ifs', + 'aux', + 'if', + ) + __step2a_suffixes = ( + 'issaIent', + 'issantes', + 'iraIent', + 'issante', + 'issants', + 'issions', + 'irions', + 'issais', + 'issait', + 'issant', + 'issent', + 'issiez', + 'issons', + 'irais', + 'irait', + 'irent', + 'iriez', + 'irons', + 'iront', + 'isses', + 'issez', + '\xEEmes', + '\xEEtes', + 'irai', + 'iras', + 'irez', + 'isse', + 'ies', + 'ira', + '\xEEt', + 'ie', + 'ir', + 'is', + 'it', + 'i', + ) + __step2b_suffixes = ( + 'eraIent', + 'assions', + 'erions', + 'assent', + 'assiez', + '\xE8rent', + 'erais', + 'erait', + 'eriez', + 'erons', + 'eront', + 'aIent', + 'antes', + 'asses', + 'ions', + 'erai', + 'eras', + 'erez', + '\xE2mes', + '\xE2tes', + 'ante', + 'ants', + 'asse', + '\xE9es', + 'era', + 'iez', + 'ais', + 'ait', + 'ant', + '\xE9e', + '\xE9s', + 'er', + 'ez', + '\xE2t', + 'ai', + 'as', + '\xE9', + 'a', + ) + __step4_suffixes = ('i\xE8re', 'I\xE8re', 'ion', 'ier', 'Ier', 'e', '\xEB') + + def stem(self, word): + """ + Stem a French word and return the stemmed form. + + :param word: The word that is stemmed. + :type word: str or unicode + :return: The stemmed form. + :rtype: unicode + + """ + word = word.lower() + + if word in self.stopwords: + return word + + step1_success = False + rv_ending_found = False + step2a_success = False + step2b_success = False + + # Every occurrence of 'u' after 'q' is put into upper case. + for i in range(1, len(word)): + if word[i - 1] == "q" and word[i] == "u": + word = "".join((word[:i], "U", word[i + 1 :])) + + # Every occurrence of 'u' and 'i' + # between vowels is put into upper case. + # Every occurrence of 'y' preceded or + # followed by a vowel is also put into upper case. + for i in range(1, len(word) - 1): + if word[i - 1] in self.__vowels and word[i + 1] in self.__vowels: + if word[i] == "u": + word = "".join((word[:i], "U", word[i + 1 :])) + + elif word[i] == "i": + word = "".join((word[:i], "I", word[i + 1 :])) + + if word[i - 1] in self.__vowels or word[i + 1] in self.__vowels: + if word[i] == "y": + word = "".join((word[:i], "Y", word[i + 1 :])) + + r1, r2 = self._r1r2_standard(word, self.__vowels) + rv = self.__rv_french(word, self.__vowels) + + # STEP 1: Standard suffix removal + for suffix in self.__step1_suffixes: + if word.endswith(suffix): + if suffix == "eaux": + word = word[:-1] + step1_success = True + + elif suffix in ("euse", "euses"): + if suffix in r2: + word = word[: -len(suffix)] + step1_success = True + + elif suffix in r1: + word = suffix_replace(word, suffix, "eux") + step1_success = True + + elif suffix in ("ement", "ements") and suffix in rv: + word = word[: -len(suffix)] + step1_success = True + + if word[-2:] == "iv" and "iv" in r2: + word = word[:-2] + + if word[-2:] == "at" and "at" in r2: + word = word[:-2] + + elif word[-3:] == "eus": + if "eus" in r2: + word = word[:-3] + elif "eus" in r1: + word = "".join((word[:-1], "x")) + + elif word[-3:] in ("abl", "iqU"): + if "abl" in r2 or "iqU" in r2: + word = word[:-3] + + elif word[-3:] in ("i\xE8r", "I\xE8r"): + if "i\xE8r" in rv or "I\xE8r" in rv: + word = "".join((word[:-3], "i")) + + elif suffix == "amment" and suffix in rv: + word = suffix_replace(word, "amment", "ant") + rv = suffix_replace(rv, "amment", "ant") + rv_ending_found = True + + elif suffix == "emment" and suffix in rv: + word = suffix_replace(word, "emment", "ent") + rv_ending_found = True + + elif ( + suffix in ("ment", "ments") + and suffix in rv + and not rv.startswith(suffix) + and rv[rv.rindex(suffix) - 1] in self.__vowels + ): + word = word[: -len(suffix)] + rv = rv[: -len(suffix)] + rv_ending_found = True + + elif suffix == "aux" and suffix in r1: + word = "".join((word[:-2], "l")) + step1_success = True + + elif ( + suffix in ("issement", "issements") + and suffix in r1 + and word[-len(suffix) - 1] not in self.__vowels + ): + word = word[: -len(suffix)] + step1_success = True + + elif ( + suffix + in ( + "ance", + "iqUe", + "isme", + "able", + "iste", + "eux", + "ances", + "iqUes", + "ismes", + "ables", + "istes", + ) + and suffix in r2 + ): + word = word[: -len(suffix)] + step1_success = True + + elif ( + suffix + in ("atrice", "ateur", "ation", "atrices", "ateurs", "ations") + and suffix in r2 + ): + word = word[: -len(suffix)] + step1_success = True + + if word[-2:] == "ic": + if "ic" in r2: + word = word[:-2] + else: + word = "".join((word[:-2], "iqU")) + + elif suffix in ("logie", "logies") and suffix in r2: + word = suffix_replace(word, suffix, "log") + step1_success = True + + elif suffix in ("usion", "ution", "usions", "utions") and suffix in r2: + word = suffix_replace(word, suffix, "u") + step1_success = True + + elif suffix in ("ence", "ences") and suffix in r2: + word = suffix_replace(word, suffix, "ent") + step1_success = True + + elif suffix in ("it\xE9", "it\xE9s") and suffix in r2: + word = word[: -len(suffix)] + step1_success = True + + if word[-4:] == "abil": + if "abil" in r2: + word = word[:-4] + else: + word = "".join((word[:-2], "l")) + + elif word[-2:] == "ic": + if "ic" in r2: + word = word[:-2] + else: + word = "".join((word[:-2], "iqU")) + + elif word[-2:] == "iv": + if "iv" in r2: + word = word[:-2] + + elif suffix in ("if", "ive", "ifs", "ives") and suffix in r2: + word = word[: -len(suffix)] + step1_success = True + + if word[-2:] == "at" and "at" in r2: + word = word[:-2] + + if word[-2:] == "ic": + if "ic" in r2: + word = word[:-2] + else: + word = "".join((word[:-2], "iqU")) + break + + # STEP 2a: Verb suffixes beginning 'i' + if not step1_success or rv_ending_found: + for suffix in self.__step2a_suffixes: + if word.endswith(suffix): + if ( + suffix in rv + and len(rv) > len(suffix) + and rv[rv.rindex(suffix) - 1] not in self.__vowels + ): + word = word[: -len(suffix)] + step2a_success = True + break + + # STEP 2b: Other verb suffixes + if not step2a_success: + for suffix in self.__step2b_suffixes: + if rv.endswith(suffix): + if suffix == "ions" and "ions" in r2: + word = word[:-4] + step2b_success = True + + elif suffix in ( + 'eraIent', + 'erions', + '\xE8rent', + 'erais', + 'erait', + 'eriez', + 'erons', + 'eront', + 'erai', + 'eras', + 'erez', + '\xE9es', + 'era', + 'iez', + '\xE9e', + '\xE9s', + 'er', + 'ez', + '\xE9', + ): + word = word[: -len(suffix)] + step2b_success = True + + elif suffix in ( + 'assions', + 'assent', + 'assiez', + 'aIent', + 'antes', + 'asses', + '\xE2mes', + '\xE2tes', + 'ante', + 'ants', + 'asse', + 'ais', + 'ait', + 'ant', + '\xE2t', + 'ai', + 'as', + 'a', + ): + word = word[: -len(suffix)] + rv = rv[: -len(suffix)] + step2b_success = True + if rv.endswith("e"): + word = word[:-1] + break + + # STEP 3 + if step1_success or step2a_success or step2b_success: + if word[-1] == "Y": + word = "".join((word[:-1], "i")) + elif word[-1] == "\xE7": + word = "".join((word[:-1], "c")) + + # STEP 4: Residual suffixes + else: + if len(word) >= 2 and word[-1] == "s" and word[-2] not in "aiou\xE8s": + word = word[:-1] + + for suffix in self.__step4_suffixes: + if word.endswith(suffix): + if suffix in rv: + if suffix == "ion" and suffix in r2 and rv[-4] in "st": + word = word[:-3] + + elif suffix in ("ier", "i\xE8re", "Ier", "I\xE8re"): + word = suffix_replace(word, suffix, "i") + + elif suffix == "e": + word = word[:-1] + + elif suffix == "\xEB" and word[-3:-1] == "gu": + word = word[:-1] + break + + # STEP 5: Undouble + if word.endswith(("enn", "onn", "ett", "ell", "eill")): + word = word[:-1] + + # STEP 6: Un-accent + for i in range(1, len(word)): + if word[-i] not in self.__vowels: + i += 1 + else: + if i != 1 and word[-i] in ("\xE9", "\xE8"): + word = "".join((word[:-i], "e", word[-i + 1 :])) + break + + word = word.replace("I", "i").replace("U", "u").replace("Y", "y") + + return word + + def __rv_french(self, word, vowels): + """ + Return the region RV that is used by the French stemmer. + + If the word begins with two vowels, RV is the region after + the third letter. Otherwise, it is the region after the first + vowel not at the beginning of the word, or the end of the word + if these positions cannot be found. (Exceptionally, u'par', + u'col' or u'tap' at the beginning of a word is also taken to + define RV as the region to their right.) + + :param word: The French word whose region RV is determined. + :type word: str or unicode + :param vowels: The French vowels that are used to determine + the region RV. + :type vowels: unicode + :return: the region RV for the respective French word. + :rtype: unicode + :note: This helper method is invoked by the stem method of + the subclass FrenchStemmer. It is not to be invoked directly! + + """ + rv = "" + if len(word) >= 2: + if word.startswith(("par", "col", "tap")) or ( + word[0] in vowels and word[1] in vowels + ): + rv = word[3:] + else: + for i in range(1, len(word)): + if word[i] in vowels: + rv = word[i + 1 :] + break + + return rv + + +class GermanStemmer(_StandardStemmer): + + """ + The German Snowball stemmer. + + :cvar __vowels: The German vowels. + :type __vowels: unicode + :cvar __s_ending: Letters that may directly appear before a word final 's'. + :type __s_ending: unicode + :cvar __st_ending: Letter that may directly appear before a word final 'st'. + :type __st_ending: unicode + :cvar __step1_suffixes: Suffixes to be deleted in step 1 of the algorithm. + :type __step1_suffixes: tuple + :cvar __step2_suffixes: Suffixes to be deleted in step 2 of the algorithm. + :type __step2_suffixes: tuple + :cvar __step3_suffixes: Suffixes to be deleted in step 3 of the algorithm. + :type __step3_suffixes: tuple + :note: A detailed description of the German + stemming algorithm can be found under + http://snowball.tartarus.org/algorithms/german/stemmer.html + + """ + + __vowels = "aeiouy\xE4\xF6\xFC" + __s_ending = "bdfghklmnrt" + __st_ending = "bdfghklmnt" + + __step1_suffixes = ("ern", "em", "er", "en", "es", "e", "s") + __step2_suffixes = ("est", "en", "er", "st") + __step3_suffixes = ("isch", "lich", "heit", "keit", "end", "ung", "ig", "ik") + + def stem(self, word): + """ + Stem a German word and return the stemmed form. + + :param word: The word that is stemmed. + :type word: str or unicode + :return: The stemmed form. + :rtype: unicode + + """ + word = word.lower() + + if word in self.stopwords: + return word + + word = word.replace("\xDF", "ss") + + # Every occurrence of 'u' and 'y' + # between vowels is put into upper case. + for i in range(1, len(word) - 1): + if word[i - 1] in self.__vowels and word[i + 1] in self.__vowels: + if word[i] == "u": + word = "".join((word[:i], "U", word[i + 1 :])) + + elif word[i] == "y": + word = "".join((word[:i], "Y", word[i + 1 :])) + + r1, r2 = self._r1r2_standard(word, self.__vowels) + + # R1 is adjusted so that the region before it + # contains at least 3 letters. + for i in range(1, len(word)): + if word[i] not in self.__vowels and word[i - 1] in self.__vowels: + if len(word[: i + 1]) < 3 and len(word[: i + 1]) > 0: + r1 = word[3:] + elif len(word[: i + 1]) == 0: + return word + break + + # STEP 1 + for suffix in self.__step1_suffixes: + if r1.endswith(suffix): + if ( + suffix in ("en", "es", "e") + and word[-len(suffix) - 4 : -len(suffix)] == "niss" + ): + word = word[: -len(suffix) - 1] + r1 = r1[: -len(suffix) - 1] + r2 = r2[: -len(suffix) - 1] + + elif suffix == "s": + if word[-2] in self.__s_ending: + word = word[:-1] + r1 = r1[:-1] + r2 = r2[:-1] + else: + word = word[: -len(suffix)] + r1 = r1[: -len(suffix)] + r2 = r2[: -len(suffix)] + break + + # STEP 2 + for suffix in self.__step2_suffixes: + if r1.endswith(suffix): + if suffix == "st": + if word[-3] in self.__st_ending and len(word[:-3]) >= 3: + word = word[:-2] + r1 = r1[:-2] + r2 = r2[:-2] + else: + word = word[: -len(suffix)] + r1 = r1[: -len(suffix)] + r2 = r2[: -len(suffix)] + break + + # STEP 3: Derivational suffixes + for suffix in self.__step3_suffixes: + if r2.endswith(suffix): + if suffix in ("end", "ung"): + if ( + "ig" in r2[-len(suffix) - 2 : -len(suffix)] + and "e" not in r2[-len(suffix) - 3 : -len(suffix) - 2] + ): + word = word[: -len(suffix) - 2] + else: + word = word[: -len(suffix)] + + elif ( + suffix in ("ig", "ik", "isch") + and "e" not in r2[-len(suffix) - 1 : -len(suffix)] + ): + word = word[: -len(suffix)] + + elif suffix in ("lich", "heit"): + if ( + "er" in r1[-len(suffix) - 2 : -len(suffix)] + or "en" in r1[-len(suffix) - 2 : -len(suffix)] + ): + word = word[: -len(suffix) - 2] + else: + word = word[: -len(suffix)] + + elif suffix == "keit": + if "lich" in r2[-len(suffix) - 4 : -len(suffix)]: + word = word[: -len(suffix) - 4] + + elif "ig" in r2[-len(suffix) - 2 : -len(suffix)]: + word = word[: -len(suffix) - 2] + else: + word = word[: -len(suffix)] + break + + # Umlaut accents are removed and + # 'u' and 'y' are put back into lower case. + word = ( + word.replace("\xE4", "a") + .replace("\xF6", "o") + .replace("\xFC", "u") + .replace("U", "u") + .replace("Y", "y") + ) + + return word + + +class HungarianStemmer(_LanguageSpecificStemmer): + + """ + The Hungarian Snowball stemmer. + + :cvar __vowels: The Hungarian vowels. + :type __vowels: unicode + :cvar __digraphs: The Hungarian digraphs. + :type __digraphs: tuple + :cvar __double_consonants: The Hungarian double consonants. + :type __double_consonants: tuple + :cvar __step1_suffixes: Suffixes to be deleted in step 1 of the algorithm. + :type __step1_suffixes: tuple + :cvar __step2_suffixes: Suffixes to be deleted in step 2 of the algorithm. + :type __step2_suffixes: tuple + :cvar __step3_suffixes: Suffixes to be deleted in step 3 of the algorithm. + :type __step3_suffixes: tuple + :cvar __step4_suffixes: Suffixes to be deleted in step 4 of the algorithm. + :type __step4_suffixes: tuple + :cvar __step5_suffixes: Suffixes to be deleted in step 5 of the algorithm. + :type __step5_suffixes: tuple + :cvar __step6_suffixes: Suffixes to be deleted in step 6 of the algorithm. + :type __step6_suffixes: tuple + :cvar __step7_suffixes: Suffixes to be deleted in step 7 of the algorithm. + :type __step7_suffixes: tuple + :cvar __step8_suffixes: Suffixes to be deleted in step 8 of the algorithm. + :type __step8_suffixes: tuple + :cvar __step9_suffixes: Suffixes to be deleted in step 9 of the algorithm. + :type __step9_suffixes: tuple + :note: A detailed description of the Hungarian + stemming algorithm can be found under + http://snowball.tartarus.org/algorithms/hungarian/stemmer.html + + """ + + __vowels = "aeiou\xF6\xFC\xE1\xE9\xED\xF3\xF5\xFA\xFB" + __digraphs = ("cs", "dz", "dzs", "gy", "ly", "ny", "ty", "zs") + __double_consonants = ( + "bb", + "cc", + "ccs", + "dd", + "ff", + "gg", + "ggy", + "jj", + "kk", + "ll", + "lly", + "mm", + "nn", + "nny", + "pp", + "rr", + "ss", + "ssz", + "tt", + "tty", + "vv", + "zz", + "zzs", + ) + + __step1_suffixes = ("al", "el") + __step2_suffixes = ( + 'k\xE9ppen', + 'onk\xE9nt', + 'enk\xE9nt', + 'ank\xE9nt', + 'k\xE9pp', + 'k\xE9nt', + 'ban', + 'ben', + 'nak', + 'nek', + 'val', + 'vel', + 't\xF3l', + 't\xF5l', + 'r\xF3l', + 'r\xF5l', + 'b\xF3l', + 'b\xF5l', + 'hoz', + 'hez', + 'h\xF6z', + 'n\xE1l', + 'n\xE9l', + '\xE9rt', + 'kor', + 'ba', + 'be', + 'ra', + 're', + 'ig', + 'at', + 'et', + 'ot', + '\xF6t', + 'ul', + '\xFCl', + 'v\xE1', + 'v\xE9', + 'en', + 'on', + 'an', + '\xF6n', + 'n', + 't', + ) + __step3_suffixes = ("\xE1nk\xE9nt", "\xE1n", "\xE9n") + __step4_suffixes = ( + 'astul', + 'est\xFCl', + '\xE1stul', + '\xE9st\xFCl', + 'stul', + 'st\xFCl', + ) + __step5_suffixes = ("\xE1", "\xE9") + __step6_suffixes = ( + 'ok\xE9', + '\xF6k\xE9', + 'ak\xE9', + 'ek\xE9', + '\xE1k\xE9', + '\xE1\xE9i', + '\xE9k\xE9', + '\xE9\xE9i', + 'k\xE9', + '\xE9i', + '\xE9\xE9', + '\xE9', + ) + __step7_suffixes = ( + '\xE1juk', + '\xE9j\xFCk', + '\xFCnk', + 'unk', + 'juk', + 'j\xFCk', + '\xE1nk', + '\xE9nk', + 'nk', + 'uk', + '\xFCk', + 'em', + 'om', + 'am', + 'od', + 'ed', + 'ad', + '\xF6d', + 'ja', + 'je', + '\xE1m', + '\xE1d', + '\xE9m', + '\xE9d', + 'm', + 'd', + 'a', + 'e', + 'o', + '\xE1', + '\xE9', + ) + __step8_suffixes = ( + 'jaitok', + 'jeitek', + 'jaink', + 'jeink', + 'aitok', + 'eitek', + '\xE1itok', + '\xE9itek', + 'jaim', + 'jeim', + 'jaid', + 'jeid', + 'eink', + 'aink', + 'itek', + 'jeik', + 'jaik', + '\xE1ink', + '\xE9ink', + 'aim', + 'eim', + 'aid', + 'eid', + 'jai', + 'jei', + 'ink', + 'aik', + 'eik', + '\xE1im', + '\xE1id', + '\xE1ik', + '\xE9im', + '\xE9id', + '\xE9ik', + 'im', + 'id', + 'ai', + 'ei', + 'ik', + '\xE1i', + '\xE9i', + 'i', + ) + __step9_suffixes = ("\xE1k", "\xE9k", "\xF6k", "ok", "ek", "ak", "k") + + def stem(self, word): + """ + Stem an Hungarian word and return the stemmed form. + + :param word: The word that is stemmed. + :type word: str or unicode + :return: The stemmed form. + :rtype: unicode + + """ + word = word.lower() + + if word in self.stopwords: + return word + + r1 = self.__r1_hungarian(word, self.__vowels, self.__digraphs) + + # STEP 1: Remove instrumental case + if r1.endswith(self.__step1_suffixes): + for double_cons in self.__double_consonants: + if word[-2 - len(double_cons) : -2] == double_cons: + word = "".join((word[:-4], word[-3])) + + if r1[-2 - len(double_cons) : -2] == double_cons: + r1 = "".join((r1[:-4], r1[-3])) + break + + # STEP 2: Remove frequent cases + for suffix in self.__step2_suffixes: + if word.endswith(suffix): + if r1.endswith(suffix): + word = word[: -len(suffix)] + r1 = r1[: -len(suffix)] + + if r1.endswith("\xE1"): + word = "".join((word[:-1], "a")) + r1 = suffix_replace(r1, "\xE1", "a") + + elif r1.endswith("\xE9"): + word = "".join((word[:-1], "e")) + r1 = suffix_replace(r1, "\xE9", "e") + break + + # STEP 3: Remove special cases + for suffix in self.__step3_suffixes: + if r1.endswith(suffix): + if suffix == "\xE9n": + word = suffix_replace(word, suffix, "e") + r1 = suffix_replace(r1, suffix, "e") + else: + word = suffix_replace(word, suffix, "a") + r1 = suffix_replace(r1, suffix, "a") + break + + # STEP 4: Remove other cases + for suffix in self.__step4_suffixes: + if r1.endswith(suffix): + if suffix == "\xE1stul": + word = suffix_replace(word, suffix, "a") + r1 = suffix_replace(r1, suffix, "a") + + elif suffix == "\xE9st\xFCl": + word = suffix_replace(word, suffix, "e") + r1 = suffix_replace(r1, suffix, "e") + else: + word = word[: -len(suffix)] + r1 = r1[: -len(suffix)] + break + + # STEP 5: Remove factive case + for suffix in self.__step5_suffixes: + if r1.endswith(suffix): + for double_cons in self.__double_consonants: + if word[-1 - len(double_cons) : -1] == double_cons: + word = "".join((word[:-3], word[-2])) + + if r1[-1 - len(double_cons) : -1] == double_cons: + r1 = "".join((r1[:-3], r1[-2])) + break + + # STEP 6: Remove owned + for suffix in self.__step6_suffixes: + if r1.endswith(suffix): + if suffix in ("\xE1k\xE9", "\xE1\xE9i"): + word = suffix_replace(word, suffix, "a") + r1 = suffix_replace(r1, suffix, "a") + + elif suffix in ("\xE9k\xE9", "\xE9\xE9i", "\xE9\xE9"): + word = suffix_replace(word, suffix, "e") + r1 = suffix_replace(r1, suffix, "e") + else: + word = word[: -len(suffix)] + r1 = r1[: -len(suffix)] + break + + # STEP 7: Remove singular owner suffixes + for suffix in self.__step7_suffixes: + if word.endswith(suffix): + if r1.endswith(suffix): + if suffix in ("\xE1nk", "\xE1juk", "\xE1m", "\xE1d", "\xE1"): + word = suffix_replace(word, suffix, "a") + r1 = suffix_replace(r1, suffix, "a") + + elif suffix in ("\xE9nk", "\xE9j\xFCk", "\xE9m", "\xE9d", "\xE9"): + word = suffix_replace(word, suffix, "e") + r1 = suffix_replace(r1, suffix, "e") + else: + word = word[: -len(suffix)] + r1 = r1[: -len(suffix)] + break + + # STEP 8: Remove plural owner suffixes + for suffix in self.__step8_suffixes: + if word.endswith(suffix): + if r1.endswith(suffix): + if suffix in ( + "\xE1im", + "\xE1id", + "\xE1i", + "\xE1ink", + "\xE1itok", + "\xE1ik", + ): + word = suffix_replace(word, suffix, "a") + r1 = suffix_replace(r1, suffix, "a") + + elif suffix in ( + "\xE9im", + "\xE9id", + "\xE9i", + "\xE9ink", + "\xE9itek", + "\xE9ik", + ): + word = suffix_replace(word, suffix, "e") + r1 = suffix_replace(r1, suffix, "e") + else: + word = word[: -len(suffix)] + r1 = r1[: -len(suffix)] + break + + # STEP 9: Remove plural suffixes + for suffix in self.__step9_suffixes: + if word.endswith(suffix): + if r1.endswith(suffix): + if suffix == "\xE1k": + word = suffix_replace(word, suffix, "a") + elif suffix == "\xE9k": + word = suffix_replace(word, suffix, "e") + else: + word = word[: -len(suffix)] + break + + return word + + def __r1_hungarian(self, word, vowels, digraphs): + """ + Return the region R1 that is used by the Hungarian stemmer. + + If the word begins with a vowel, R1 is defined as the region + after the first consonant or digraph (= two letters stand for + one phoneme) in the word. If the word begins with a consonant, + it is defined as the region after the first vowel in the word. + If the word does not contain both a vowel and consonant, R1 + is the null region at the end of the word. + + :param word: The Hungarian word whose region R1 is determined. + :type word: str or unicode + :param vowels: The Hungarian vowels that are used to determine + the region R1. + :type vowels: unicode + :param digraphs: The digraphs that are used to determine the + region R1. + :type digraphs: tuple + :return: the region R1 for the respective word. + :rtype: unicode + :note: This helper method is invoked by the stem method of the subclass + HungarianStemmer. It is not to be invoked directly! + + """ + r1 = "" + if word[0] in vowels: + for digraph in digraphs: + if digraph in word[1:]: + r1 = word[word.index(digraph[-1]) + 1 :] + return r1 + + for i in range(1, len(word)): + if word[i] not in vowels: + r1 = word[i + 1 :] + break + else: + for i in range(1, len(word)): + if word[i] in vowels: + r1 = word[i + 1 :] + break + + return r1 + + +class ItalianStemmer(_StandardStemmer): + + """ + The Italian Snowball stemmer. + + :cvar __vowels: The Italian vowels. + :type __vowels: unicode + :cvar __step0_suffixes: Suffixes to be deleted in step 0 of the algorithm. + :type __step0_suffixes: tuple + :cvar __step1_suffixes: Suffixes to be deleted in step 1 of the algorithm. + :type __step1_suffixes: tuple + :cvar __step2_suffixes: Suffixes to be deleted in step 2 of the algorithm. + :type __step2_suffixes: tuple + :note: A detailed description of the Italian + stemming algorithm can be found under + http://snowball.tartarus.org/algorithms/italian/stemmer.html + + """ + + __vowels = "aeiou\xE0\xE8\xEC\xF2\xF9" + __step0_suffixes = ( + 'gliela', + 'gliele', + 'glieli', + 'glielo', + 'gliene', + 'sene', + 'mela', + 'mele', + 'meli', + 'melo', + 'mene', + 'tela', + 'tele', + 'teli', + 'telo', + 'tene', + 'cela', + 'cele', + 'celi', + 'celo', + 'cene', + 'vela', + 'vele', + 'veli', + 'velo', + 'vene', + 'gli', + 'ci', + 'la', + 'le', + 'li', + 'lo', + 'mi', + 'ne', + 'si', + 'ti', + 'vi', + ) + __step1_suffixes = ( + 'atrice', + 'atrici', + 'azione', + 'azioni', + 'uzione', + 'uzioni', + 'usione', + 'usioni', + 'amento', + 'amenti', + 'imento', + 'imenti', + 'amente', + 'abile', + 'abili', + 'ibile', + 'ibili', + 'mente', + 'atore', + 'atori', + 'logia', + 'logie', + 'anza', + 'anze', + 'iche', + 'ichi', + 'ismo', + 'ismi', + 'ista', + 'iste', + 'isti', + 'ist\xE0', + 'ist\xE8', + 'ist\xEC', + 'ante', + 'anti', + 'enza', + 'enze', + 'ico', + 'ici', + 'ica', + 'ice', + 'oso', + 'osi', + 'osa', + 'ose', + 'it\xE0', + 'ivo', + 'ivi', + 'iva', + 'ive', + ) + __step2_suffixes = ( + 'erebbero', + 'irebbero', + 'assero', + 'assimo', + 'eranno', + 'erebbe', + 'eremmo', + 'ereste', + 'eresti', + 'essero', + 'iranno', + 'irebbe', + 'iremmo', + 'ireste', + 'iresti', + 'iscano', + 'iscono', + 'issero', + 'arono', + 'avamo', + 'avano', + 'avate', + 'eremo', + 'erete', + 'erono', + 'evamo', + 'evano', + 'evate', + 'iremo', + 'irete', + 'irono', + 'ivamo', + 'ivano', + 'ivate', + 'ammo', + 'ando', + 'asse', + 'assi', + 'emmo', + 'enda', + 'ende', + 'endi', + 'endo', + 'erai', + 'erei', + 'Yamo', + 'iamo', + 'immo', + 'irai', + 'irei', + 'isca', + 'isce', + 'isci', + 'isco', + 'ano', + 'are', + 'ata', + 'ate', + 'ati', + 'ato', + 'ava', + 'avi', + 'avo', + 'er\xE0', + 'ere', + 'er\xF2', + 'ete', + 'eva', + 'evi', + 'evo', + 'ir\xE0', + 'ire', + 'ir\xF2', + 'ita', + 'ite', + 'iti', + 'ito', + 'iva', + 'ivi', + 'ivo', + 'ono', + 'uta', + 'ute', + 'uti', + 'uto', + 'ar', + 'ir', + ) + + def stem(self, word): + """ + Stem an Italian word and return the stemmed form. + + :param word: The word that is stemmed. + :type word: str or unicode + :return: The stemmed form. + :rtype: unicode + + """ + word = word.lower() + + if word in self.stopwords: + return word + + step1_success = False + + # All acute accents are replaced by grave accents. + word = ( + word.replace("\xE1", "\xE0") + .replace("\xE9", "\xE8") + .replace("\xED", "\xEC") + .replace("\xF3", "\xF2") + .replace("\xFA", "\xF9") + ) + + # Every occurrence of 'u' after 'q' + # is put into upper case. + for i in range(1, len(word)): + if word[i - 1] == "q" and word[i] == "u": + word = "".join((word[:i], "U", word[i + 1 :])) + + # Every occurrence of 'u' and 'i' + # between vowels is put into upper case. + for i in range(1, len(word) - 1): + if word[i - 1] in self.__vowels and word[i + 1] in self.__vowels: + if word[i] == "u": + word = "".join((word[:i], "U", word[i + 1 :])) + + elif word[i] == "i": + word = "".join((word[:i], "I", word[i + 1 :])) + + r1, r2 = self._r1r2_standard(word, self.__vowels) + rv = self._rv_standard(word, self.__vowels) + + # STEP 0: Attached pronoun + for suffix in self.__step0_suffixes: + if rv.endswith(suffix): + if rv[-len(suffix) - 4 : -len(suffix)] in ("ando", "endo"): + word = word[: -len(suffix)] + r1 = r1[: -len(suffix)] + r2 = r2[: -len(suffix)] + rv = rv[: -len(suffix)] + + elif rv[-len(suffix) - 2 : -len(suffix)] in ("ar", "er", "ir"): + word = suffix_replace(word, suffix, "e") + r1 = suffix_replace(r1, suffix, "e") + r2 = suffix_replace(r2, suffix, "e") + rv = suffix_replace(rv, suffix, "e") + break + + # STEP 1: Standard suffix removal + for suffix in self.__step1_suffixes: + if word.endswith(suffix): + if suffix == "amente" and r1.endswith(suffix): + step1_success = True + word = word[:-6] + r2 = r2[:-6] + rv = rv[:-6] + + if r2.endswith("iv"): + word = word[:-2] + r2 = r2[:-2] + rv = rv[:-2] + + if r2.endswith("at"): + word = word[:-2] + rv = rv[:-2] + + elif r2.endswith(("os", "ic")): + word = word[:-2] + rv = rv[:-2] + + elif r2.endswith("abil"): + word = word[:-4] + rv = rv[:-4] + + elif suffix in ("amento", "amenti", "imento", "imenti") and rv.endswith( + suffix + ): + step1_success = True + word = word[:-6] + rv = rv[:-6] + + elif r2.endswith(suffix): + step1_success = True + if suffix in ("azione", "azioni", "atore", "atori"): + word = word[: -len(suffix)] + r2 = r2[: -len(suffix)] + rv = rv[: -len(suffix)] + + if r2.endswith("ic"): + word = word[:-2] + rv = rv[:-2] + + elif suffix in ("logia", "logie"): + word = word[:-2] + rv = word[:-2] + + elif suffix in ("uzione", "uzioni", "usione", "usioni"): + word = word[:-5] + rv = rv[:-5] + + elif suffix in ("enza", "enze"): + word = suffix_replace(word, suffix, "te") + rv = suffix_replace(rv, suffix, "te") + + elif suffix == "it\xE0": + word = word[:-3] + r2 = r2[:-3] + rv = rv[:-3] + + if r2.endswith(("ic", "iv")): + word = word[:-2] + rv = rv[:-2] + + elif r2.endswith("abil"): + word = word[:-4] + rv = rv[:-4] + + elif suffix in ("ivo", "ivi", "iva", "ive"): + word = word[:-3] + r2 = r2[:-3] + rv = rv[:-3] + + if r2.endswith("at"): + word = word[:-2] + r2 = r2[:-2] + rv = rv[:-2] + + if r2.endswith("ic"): + word = word[:-2] + rv = rv[:-2] + else: + word = word[: -len(suffix)] + rv = rv[: -len(suffix)] + break + + # STEP 2: Verb suffixes + if not step1_success: + for suffix in self.__step2_suffixes: + if rv.endswith(suffix): + word = word[: -len(suffix)] + rv = rv[: -len(suffix)] + break + + # STEP 3a + if rv.endswith(("a", "e", "i", "o", "\xE0", "\xE8", "\xEC", "\xF2")): + word = word[:-1] + rv = rv[:-1] + + if rv.endswith("i"): + word = word[:-1] + rv = rv[:-1] + + # STEP 3b + if rv.endswith(("ch", "gh")): + word = word[:-1] + + word = word.replace("I", "i").replace("U", "u") + + return word + + +class NorwegianStemmer(_ScandinavianStemmer): + + """ + The Norwegian Snowball stemmer. + + :cvar __vowels: The Norwegian vowels. + :type __vowels: unicode + :cvar __s_ending: Letters that may directly appear before a word final 's'. + :type __s_ending: unicode + :cvar __step1_suffixes: Suffixes to be deleted in step 1 of the algorithm. + :type __step1_suffixes: tuple + :cvar __step2_suffixes: Suffixes to be deleted in step 2 of the algorithm. + :type __step2_suffixes: tuple + :cvar __step3_suffixes: Suffixes to be deleted in step 3 of the algorithm. + :type __step3_suffixes: tuple + :note: A detailed description of the Norwegian + stemming algorithm can be found under + http://snowball.tartarus.org/algorithms/norwegian/stemmer.html + + """ + + __vowels = "aeiouy\xE6\xE5\xF8" + __s_ending = "bcdfghjlmnoprtvyz" + __step1_suffixes = ( + "hetenes", + "hetene", + "hetens", + "heter", + "heten", + "endes", + "ande", + "ende", + "edes", + "enes", + "erte", + "ede", + "ane", + "ene", + "ens", + "ers", + "ets", + "het", + "ast", + "ert", + "en", + "ar", + "er", + "as", + "es", + "et", + "a", + "e", + "s", + ) + + __step2_suffixes = ("dt", "vt") + + __step3_suffixes = ( + "hetslov", + "eleg", + "elig", + "elov", + "slov", + "leg", + "eig", + "lig", + "els", + "lov", + "ig", + ) + + def stem(self, word): + """ + Stem a Norwegian word and return the stemmed form. + + :param word: The word that is stemmed. + :type word: str or unicode + :return: The stemmed form. + :rtype: unicode + + """ + word = word.lower() + + if word in self.stopwords: + return word + + r1 = self._r1_scandinavian(word, self.__vowels) + + # STEP 1 + for suffix in self.__step1_suffixes: + if r1.endswith(suffix): + if suffix in ("erte", "ert"): + word = suffix_replace(word, suffix, "er") + r1 = suffix_replace(r1, suffix, "er") + + elif suffix == "s": + if word[-2] in self.__s_ending or ( + word[-2] == "k" and word[-3] not in self.__vowels + ): + word = word[:-1] + r1 = r1[:-1] + else: + word = word[: -len(suffix)] + r1 = r1[: -len(suffix)] + break + + # STEP 2 + for suffix in self.__step2_suffixes: + if r1.endswith(suffix): + word = word[:-1] + r1 = r1[:-1] + break + + # STEP 3 + for suffix in self.__step3_suffixes: + if r1.endswith(suffix): + word = word[: -len(suffix)] + break + + return word + + +class PortugueseStemmer(_StandardStemmer): + + """ + The Portuguese Snowball stemmer. + + :cvar __vowels: The Portuguese vowels. + :type __vowels: unicode + :cvar __step1_suffixes: Suffixes to be deleted in step 1 of the algorithm. + :type __step1_suffixes: tuple + :cvar __step2_suffixes: Suffixes to be deleted in step 2 of the algorithm. + :type __step2_suffixes: tuple + :cvar __step4_suffixes: Suffixes to be deleted in step 4 of the algorithm. + :type __step4_suffixes: tuple + :note: A detailed description of the Portuguese + stemming algorithm can be found under + http://snowball.tartarus.org/algorithms/portuguese/stemmer.html + + """ + + __vowels = "aeiou\xE1\xE9\xED\xF3\xFA\xE2\xEA\xF4" + __step1_suffixes = ( + 'amentos', + 'imentos', + 'uço~es', + 'amento', + 'imento', + 'adoras', + 'adores', + 'a\xE7o~es', + 'logias', + '\xEAncias', + 'amente', + 'idades', + 'an\xE7as', + 'ismos', + 'istas', + 'adora', + 'a\xE7a~o', + 'antes', + '\xE2ncia', + 'logia', + 'uça~o', + '\xEAncia', + 'mente', + 'idade', + 'an\xE7a', + 'ezas', + 'icos', + 'icas', + 'ismo', + '\xE1vel', + '\xEDvel', + 'ista', + 'osos', + 'osas', + 'ador', + 'ante', + 'ivas', + 'ivos', + 'iras', + 'eza', + 'ico', + 'ica', + 'oso', + 'osa', + 'iva', + 'ivo', + 'ira', + ) + __step2_suffixes = ( + 'ar\xEDamos', + 'er\xEDamos', + 'ir\xEDamos', + '\xE1ssemos', + '\xEAssemos', + '\xEDssemos', + 'ar\xEDeis', + 'er\xEDeis', + 'ir\xEDeis', + '\xE1sseis', + '\xE9sseis', + '\xEDsseis', + '\xE1ramos', + '\xE9ramos', + '\xEDramos', + '\xE1vamos', + 'aremos', + 'eremos', + 'iremos', + 'ariam', + 'eriam', + 'iriam', + 'assem', + 'essem', + 'issem', + 'ara~o', + 'era~o', + 'ira~o', + 'arias', + 'erias', + 'irias', + 'ardes', + 'erdes', + 'irdes', + 'asses', + 'esses', + 'isses', + 'astes', + 'estes', + 'istes', + '\xE1reis', + 'areis', + '\xE9reis', + 'ereis', + '\xEDreis', + 'ireis', + '\xE1veis', + '\xEDamos', + 'armos', + 'ermos', + 'irmos', + 'aria', + 'eria', + 'iria', + 'asse', + 'esse', + 'isse', + 'aste', + 'este', + 'iste', + 'arei', + 'erei', + 'irei', + 'aram', + 'eram', + 'iram', + 'avam', + 'arem', + 'erem', + 'irem', + 'ando', + 'endo', + 'indo', + 'adas', + 'idas', + 'ar\xE1s', + 'aras', + 'er\xE1s', + 'eras', + 'ir\xE1s', + 'avas', + 'ares', + 'eres', + 'ires', + '\xEDeis', + 'ados', + 'idos', + '\xE1mos', + 'amos', + 'emos', + 'imos', + 'iras', + 'ada', + 'ida', + 'ar\xE1', + 'ara', + 'er\xE1', + 'era', + 'ir\xE1', + 'ava', + 'iam', + 'ado', + 'ido', + 'ias', + 'ais', + 'eis', + 'ira', + 'ia', + 'ei', + 'am', + 'em', + 'ar', + 'er', + 'ir', + 'as', + 'es', + 'is', + 'eu', + 'iu', + 'ou', + ) + __step4_suffixes = ("os", "a", "i", "o", "\xE1", "\xED", "\xF3") + + def stem(self, word): + """ + Stem a Portuguese word and return the stemmed form. + + :param word: The word that is stemmed. + :type word: str or unicode + :return: The stemmed form. + :rtype: unicode + + """ + word = word.lower() + + if word in self.stopwords: + return word + + step1_success = False + step2_success = False + + word = ( + word.replace("\xE3", "a~") + .replace("\xF5", "o~") + .replace("q\xFC", "qu") + .replace("g\xFC", "gu") + ) + + r1, r2 = self._r1r2_standard(word, self.__vowels) + rv = self._rv_standard(word, self.__vowels) + + # STEP 1: Standard suffix removal + for suffix in self.__step1_suffixes: + if word.endswith(suffix): + if suffix == "amente" and r1.endswith(suffix): + step1_success = True + + word = word[:-6] + r2 = r2[:-6] + rv = rv[:-6] + + if r2.endswith("iv"): + word = word[:-2] + r2 = r2[:-2] + rv = rv[:-2] + + if r2.endswith("at"): + word = word[:-2] + rv = rv[:-2] + + elif r2.endswith(("os", "ic", "ad")): + word = word[:-2] + rv = rv[:-2] + + elif ( + suffix in ("ira", "iras") + and rv.endswith(suffix) + and word[-len(suffix) - 1 : -len(suffix)] == "e" + ): + step1_success = True + + word = suffix_replace(word, suffix, "ir") + rv = suffix_replace(rv, suffix, "ir") + + elif r2.endswith(suffix): + step1_success = True + + if suffix in ("logia", "logias"): + word = suffix_replace(word, suffix, "log") + rv = suffix_replace(rv, suffix, "log") + + elif suffix in ("uça~o", "uço~es"): + word = suffix_replace(word, suffix, "u") + rv = suffix_replace(rv, suffix, "u") + + elif suffix in ("\xEAncia", "\xEAncias"): + word = suffix_replace(word, suffix, "ente") + rv = suffix_replace(rv, suffix, "ente") + + elif suffix == "mente": + word = word[:-5] + r2 = r2[:-5] + rv = rv[:-5] + + if r2.endswith(("ante", "avel", "ivel")): + word = word[:-4] + rv = rv[:-4] + + elif suffix in ("idade", "idades"): + word = word[: -len(suffix)] + r2 = r2[: -len(suffix)] + rv = rv[: -len(suffix)] + + if r2.endswith(("ic", "iv")): + word = word[:-2] + rv = rv[:-2] + + elif r2.endswith("abil"): + word = word[:-4] + rv = rv[:-4] + + elif suffix in ("iva", "ivo", "ivas", "ivos"): + word = word[: -len(suffix)] + r2 = r2[: -len(suffix)] + rv = rv[: -len(suffix)] + + if r2.endswith("at"): + word = word[:-2] + rv = rv[:-2] + else: + word = word[: -len(suffix)] + rv = rv[: -len(suffix)] + break + + # STEP 2: Verb suffixes + if not step1_success: + for suffix in self.__step2_suffixes: + if rv.endswith(suffix): + step2_success = True + + word = word[: -len(suffix)] + rv = rv[: -len(suffix)] + break + + # STEP 3 + if step1_success or step2_success: + if rv.endswith("i") and word[-2] == "c": + word = word[:-1] + rv = rv[:-1] + + ### STEP 4: Residual suffix + if not step1_success and not step2_success: + for suffix in self.__step4_suffixes: + if rv.endswith(suffix): + word = word[: -len(suffix)] + rv = rv[: -len(suffix)] + break + + # STEP 5 + if rv.endswith(("e", "\xE9", "\xEA")): + word = word[:-1] + rv = rv[:-1] + + if (word.endswith("gu") and rv.endswith("u")) or ( + word.endswith("ci") and rv.endswith("i") + ): + word = word[:-1] + + elif word.endswith("\xE7"): + word = suffix_replace(word, "\xE7", "c") + + word = word.replace("a~", "\xE3").replace("o~", "\xF5") + + return word + + +class RomanianStemmer(_StandardStemmer): + + """ + The Romanian Snowball stemmer. + + :cvar __vowels: The Romanian vowels. + :type __vowels: unicode + :cvar __step0_suffixes: Suffixes to be deleted in step 0 of the algorithm. + :type __step0_suffixes: tuple + :cvar __step1_suffixes: Suffixes to be deleted in step 1 of the algorithm. + :type __step1_suffixes: tuple + :cvar __step2_suffixes: Suffixes to be deleted in step 2 of the algorithm. + :type __step2_suffixes: tuple + :cvar __step3_suffixes: Suffixes to be deleted in step 3 of the algorithm. + :type __step3_suffixes: tuple + :note: A detailed description of the Romanian + stemming algorithm can be found under + http://snowball.tartarus.org/algorithms/romanian/stemmer.html + + """ + + __vowels = "aeiou\u0103\xE2\xEE" + __step0_suffixes = ( + 'iilor', + 'ului', + 'elor', + 'iile', + 'ilor', + 'atei', + 'a\u0163ie', + 'a\u0163ia', + 'aua', + 'ele', + 'iua', + 'iei', + 'ile', + 'ul', + 'ea', + 'ii', + ) + __step1_suffixes = ( + 'abilitate', + 'abilitati', + 'abilit\u0103\u0163i', + 'ibilitate', + 'abilit\u0103i', + 'ivitate', + 'ivitati', + 'ivit\u0103\u0163i', + 'icitate', + 'icitati', + 'icit\u0103\u0163i', + 'icatori', + 'ivit\u0103i', + 'icit\u0103i', + 'icator', + 'a\u0163iune', + 'atoare', + '\u0103toare', + 'i\u0163iune', + 'itoare', + 'iciva', + 'icive', + 'icivi', + 'iciv\u0103', + 'icala', + 'icale', + 'icali', + 'ical\u0103', + 'ativa', + 'ative', + 'ativi', + 'ativ\u0103', + 'atori', + '\u0103tori', + 'itiva', + 'itive', + 'itivi', + 'itiv\u0103', + 'itori', + 'iciv', + 'ical', + 'ativ', + 'ator', + '\u0103tor', + 'itiv', + 'itor', + ) + __step2_suffixes = ( + 'abila', + 'abile', + 'abili', + 'abil\u0103', + 'ibila', + 'ibile', + 'ibili', + 'ibil\u0103', + 'atori', + 'itate', + 'itati', + 'it\u0103\u0163i', + 'abil', + 'ibil', + 'oasa', + 'oas\u0103', + 'oase', + 'anta', + 'ante', + 'anti', + 'ant\u0103', + 'ator', + 'it\u0103i', + 'iune', + 'iuni', + 'isme', + 'ista', + 'iste', + 'isti', + 'ist\u0103', + 'i\u015Fti', + 'ata', + 'at\u0103', + 'ati', + 'ate', + 'uta', + 'ut\u0103', + 'uti', + 'ute', + 'ita', + 'it\u0103', + 'iti', + 'ite', + 'ica', + 'ice', + 'ici', + 'ic\u0103', + 'osi', + 'o\u015Fi', + 'ant', + 'iva', + 'ive', + 'ivi', + 'iv\u0103', + 'ism', + 'ist', + 'at', + 'ut', + 'it', + 'ic', + 'os', + 'iv', + ) + __step3_suffixes = ( + 'seser\u0103\u0163i', + 'aser\u0103\u0163i', + 'iser\u0103\u0163i', + '\xE2ser\u0103\u0163i', + 'user\u0103\u0163i', + 'seser\u0103m', + 'aser\u0103m', + 'iser\u0103m', + '\xE2ser\u0103m', + 'user\u0103m', + 'ser\u0103\u0163i', + 'sese\u015Fi', + 'seser\u0103', + 'easc\u0103', + 'ar\u0103\u0163i', + 'ur\u0103\u0163i', + 'ir\u0103\u0163i', + '\xE2r\u0103\u0163i', + 'ase\u015Fi', + 'aser\u0103', + 'ise\u015Fi', + 'iser\u0103', + '\xe2se\u015Fi', + '\xE2ser\u0103', + 'use\u015Fi', + 'user\u0103', + 'ser\u0103m', + 'sesem', + 'indu', + '\xE2ndu', + 'eaz\u0103', + 'e\u015Fti', + 'e\u015Fte', + '\u0103\u015Fti', + '\u0103\u015Fte', + 'ea\u0163i', + 'ia\u0163i', + 'ar\u0103m', + 'ur\u0103m', + 'ir\u0103m', + '\xE2r\u0103m', + 'asem', + 'isem', + '\xE2sem', + 'usem', + 'se\u015Fi', + 'ser\u0103', + 'sese', + 'are', + 'ere', + 'ire', + '\xE2re', + 'ind', + '\xE2nd', + 'eze', + 'ezi', + 'esc', + '\u0103sc', + 'eam', + 'eai', + 'eau', + 'iam', + 'iai', + 'iau', + 'a\u015Fi', + 'ar\u0103', + 'u\u015Fi', + 'ur\u0103', + 'i\u015Fi', + 'ir\u0103', + '\xE2\u015Fi', + '\xe2r\u0103', + 'ase', + 'ise', + '\xE2se', + 'use', + 'a\u0163i', + 'e\u0163i', + 'i\u0163i', + '\xe2\u0163i', + 'sei', + 'ez', + 'am', + 'ai', + 'au', + 'ea', + 'ia', + 'ui', + '\xE2i', + '\u0103m', + 'em', + 'im', + '\xE2m', + 'se', + ) + + def stem(self, word): + """ + Stem a Romanian word and return the stemmed form. + + :param word: The word that is stemmed. + :type word: str or unicode + :return: The stemmed form. + :rtype: unicode + + """ + word = word.lower() + + if word in self.stopwords: + return word + + step1_success = False + step2_success = False + + for i in range(1, len(word) - 1): + if word[i - 1] in self.__vowels and word[i + 1] in self.__vowels: + if word[i] == "u": + word = "".join((word[:i], "U", word[i + 1 :])) + + elif word[i] == "i": + word = "".join((word[:i], "I", word[i + 1 :])) + + r1, r2 = self._r1r2_standard(word, self.__vowels) + rv = self._rv_standard(word, self.__vowels) + + # STEP 0: Removal of plurals and other simplifications + for suffix in self.__step0_suffixes: + if word.endswith(suffix): + if suffix in r1: + if suffix in ("ul", "ului"): + word = word[: -len(suffix)] + + if suffix in rv: + rv = rv[: -len(suffix)] + else: + rv = "" + + elif ( + suffix == "aua" + or suffix == "atei" + or (suffix == "ile" and word[-5:-3] != "ab") + ): + word = word[:-2] + + elif suffix in ("ea", "ele", "elor"): + word = suffix_replace(word, suffix, "e") + + if suffix in rv: + rv = suffix_replace(rv, suffix, "e") + else: + rv = "" + + elif suffix in ("ii", "iua", "iei", "iile", "iilor", "ilor"): + word = suffix_replace(word, suffix, "i") + + if suffix in rv: + rv = suffix_replace(rv, suffix, "i") + else: + rv = "" + + elif suffix in ("a\u0163ie", "a\u0163ia"): + word = word[:-1] + break + + # STEP 1: Reduction of combining suffixes + while True: + + replacement_done = False + + for suffix in self.__step1_suffixes: + if word.endswith(suffix): + if suffix in r1: + step1_success = True + replacement_done = True + + if suffix in ( + "abilitate", + "abilitati", + "abilit\u0103i", + "abilit\u0103\u0163i", + ): + word = suffix_replace(word, suffix, "abil") + + elif suffix == "ibilitate": + word = word[:-5] + + elif suffix in ( + "ivitate", + "ivitati", + "ivit\u0103i", + "ivit\u0103\u0163i", + ): + word = suffix_replace(word, suffix, "iv") + + elif suffix in ( + "icitate", + "icitati", + "icit\u0103i", + "icit\u0103\u0163i", + "icator", + "icatori", + "iciv", + "iciva", + "icive", + "icivi", + "iciv\u0103", + "ical", + "icala", + "icale", + "icali", + "ical\u0103", + ): + word = suffix_replace(word, suffix, "ic") + + elif suffix in ( + "ativ", + "ativa", + "ative", + "ativi", + "ativ\u0103", + "a\u0163iune", + "atoare", + "ator", + "atori", + "\u0103toare", + "\u0103tor", + "\u0103tori", + ): + word = suffix_replace(word, suffix, "at") + + if suffix in r2: + r2 = suffix_replace(r2, suffix, "at") + + elif suffix in ( + "itiv", + "itiva", + "itive", + "itivi", + "itiv\u0103", + "i\u0163iune", + "itoare", + "itor", + "itori", + ): + word = suffix_replace(word, suffix, "it") + + if suffix in r2: + r2 = suffix_replace(r2, suffix, "it") + else: + step1_success = False + break + + if not replacement_done: + break + + # STEP 2: Removal of standard suffixes + for suffix in self.__step2_suffixes: + if word.endswith(suffix): + if suffix in r2: + step2_success = True + + if suffix in ("iune", "iuni"): + if word[-5] == "\u0163": + word = "".join((word[:-5], "t")) + + elif suffix in ( + "ism", + "isme", + "ist", + "ista", + "iste", + "isti", + "ist\u0103", + "i\u015Fti", + ): + word = suffix_replace(word, suffix, "ist") + + else: + word = word[: -len(suffix)] + break + + # STEP 3: Removal of verb suffixes + if not step1_success and not step2_success: + for suffix in self.__step3_suffixes: + if word.endswith(suffix): + if suffix in rv: + if suffix in ( + 'seser\u0103\u0163i', + 'seser\u0103m', + 'ser\u0103\u0163i', + 'sese\u015Fi', + 'seser\u0103', + 'ser\u0103m', + 'sesem', + 'se\u015Fi', + 'ser\u0103', + 'sese', + 'a\u0163i', + 'e\u0163i', + 'i\u0163i', + '\xE2\u0163i', + 'sei', + '\u0103m', + 'em', + 'im', + '\xE2m', + 'se', + ): + word = word[: -len(suffix)] + rv = rv[: -len(suffix)] + else: + if ( + not rv.startswith(suffix) + and rv[rv.index(suffix) - 1] not in "aeio\u0103\xE2\xEE" + ): + word = word[: -len(suffix)] + break + + # STEP 4: Removal of final vowel + for suffix in ("ie", "a", "e", "i", "\u0103"): + if word.endswith(suffix): + if suffix in rv: + word = word[: -len(suffix)] + break + + word = word.replace("I", "i").replace("U", "u") + + return word + + +class RussianStemmer(_LanguageSpecificStemmer): + + """ + The Russian Snowball stemmer. + + :cvar __perfective_gerund_suffixes: Suffixes to be deleted. + :type __perfective_gerund_suffixes: tuple + :cvar __adjectival_suffixes: Suffixes to be deleted. + :type __adjectival_suffixes: tuple + :cvar __reflexive_suffixes: Suffixes to be deleted. + :type __reflexive_suffixes: tuple + :cvar __verb_suffixes: Suffixes to be deleted. + :type __verb_suffixes: tuple + :cvar __noun_suffixes: Suffixes to be deleted. + :type __noun_suffixes: tuple + :cvar __superlative_suffixes: Suffixes to be deleted. + :type __superlative_suffixes: tuple + :cvar __derivational_suffixes: Suffixes to be deleted. + :type __derivational_suffixes: tuple + :note: A detailed description of the Russian + stemming algorithm can be found under + http://snowball.tartarus.org/algorithms/russian/stemmer.html + + """ + + __perfective_gerund_suffixes = ( + "ivshis'", + "yvshis'", + "vshis'", + "ivshi", + "yvshi", + "vshi", + "iv", + "yv", + "v", + ) + __adjectival_suffixes = ( + 'ui^ushchi^ui^u', + 'ui^ushchi^ai^a', + 'ui^ushchimi', + 'ui^ushchymi', + 'ui^ushchego', + 'ui^ushchogo', + 'ui^ushchemu', + 'ui^ushchomu', + 'ui^ushchikh', + 'ui^ushchykh', + 'ui^ushchui^u', + 'ui^ushchaia', + 'ui^ushchoi^u', + 'ui^ushchei^u', + 'i^ushchi^ui^u', + 'i^ushchi^ai^a', + 'ui^ushchee', + 'ui^ushchie', + 'ui^ushchye', + 'ui^ushchoe', + 'ui^ushchei`', + 'ui^ushchii`', + 'ui^ushchyi`', + 'ui^ushchoi`', + 'ui^ushchem', + 'ui^ushchim', + 'ui^ushchym', + 'ui^ushchom', + 'i^ushchimi', + 'i^ushchymi', + 'i^ushchego', + 'i^ushchogo', + 'i^ushchemu', + 'i^ushchomu', + 'i^ushchikh', + 'i^ushchykh', + 'i^ushchui^u', + 'i^ushchai^a', + 'i^ushchoi^u', + 'i^ushchei^u', + 'i^ushchee', + 'i^ushchie', + 'i^ushchye', + 'i^ushchoe', + 'i^ushchei`', + 'i^ushchii`', + 'i^ushchyi`', + 'i^ushchoi`', + 'i^ushchem', + 'i^ushchim', + 'i^ushchym', + 'i^ushchom', + 'shchi^ui^u', + 'shchi^ai^a', + 'ivshi^ui^u', + 'ivshi^ai^a', + 'yvshi^ui^u', + 'yvshi^ai^a', + 'shchimi', + 'shchymi', + 'shchego', + 'shchogo', + 'shchemu', + 'shchomu', + 'shchikh', + 'shchykh', + 'shchui^u', + 'shchai^a', + 'shchoi^u', + 'shchei^u', + 'ivshimi', + 'ivshymi', + 'ivshego', + 'ivshogo', + 'ivshemu', + 'ivshomu', + 'ivshikh', + 'ivshykh', + 'ivshui^u', + 'ivshai^a', + 'ivshoi^u', + 'ivshei^u', + 'yvshimi', + 'yvshymi', + 'yvshego', + 'yvshogo', + 'yvshemu', + 'yvshomu', + 'yvshikh', + 'yvshykh', + 'yvshui^u', + 'yvshai^a', + 'yvshoi^u', + 'yvshei^u', + 'vshi^ui^u', + 'vshi^ai^a', + 'shchee', + 'shchie', + 'shchye', + 'shchoe', + 'shchei`', + 'shchii`', + 'shchyi`', + 'shchoi`', + 'shchem', + 'shchim', + 'shchym', + 'shchom', + 'ivshee', + 'ivshie', + 'ivshye', + 'ivshoe', + 'ivshei`', + 'ivshii`', + 'ivshyi`', + 'ivshoi`', + 'ivshem', + 'ivshim', + 'ivshym', + 'ivshom', + 'yvshee', + 'yvshie', + 'yvshye', + 'yvshoe', + 'yvshei`', + 'yvshii`', + 'yvshyi`', + 'yvshoi`', + 'yvshem', + 'yvshim', + 'yvshym', + 'yvshom', + 'vshimi', + 'vshymi', + 'vshego', + 'vshogo', + 'vshemu', + 'vshomu', + 'vshikh', + 'vshykh', + 'vshui^u', + 'vshai^a', + 'vshoi^u', + 'vshei^u', + 'emi^ui^u', + 'emi^ai^a', + 'nni^ui^u', + 'nni^ai^a', + 'vshee', + 'vshie', + 'vshye', + 'vshoe', + 'vshei`', + 'vshii`', + 'vshyi`', + 'vshoi`', + 'vshem', + 'vshim', + 'vshym', + 'vshom', + 'emimi', + 'emymi', + 'emego', + 'emogo', + 'ememu', + 'emomu', + 'emikh', + 'emykh', + 'emui^u', + 'emai^a', + 'emoi^u', + 'emei^u', + 'nnimi', + 'nnymi', + 'nnego', + 'nnogo', + 'nnemu', + 'nnomu', + 'nnikh', + 'nnykh', + 'nnui^u', + 'nnai^a', + 'nnoi^u', + 'nnei^u', + 'emee', + 'emie', + 'emye', + 'emoe', + 'emei`', + 'emii`', + 'emyi`', + 'emoi`', + 'emem', + 'emim', + 'emym', + 'emom', + 'nnee', + 'nnie', + 'nnye', + 'nnoe', + 'nnei`', + 'nnii`', + 'nnyi`', + 'nnoi`', + 'nnem', + 'nnim', + 'nnym', + 'nnom', + 'i^ui^u', + 'i^ai^a', + 'imi', + 'ymi', + 'ego', + 'ogo', + 'emu', + 'omu', + 'ikh', + 'ykh', + 'ui^u', + 'ai^a', + 'oi^u', + 'ei^u', + 'ee', + 'ie', + 'ye', + 'oe', + 'ei`', + 'ii`', + 'yi`', + 'oi`', + 'em', + 'im', + 'ym', + 'om', + ) + __reflexive_suffixes = ("si^a", "s'") + __verb_suffixes = ( + "esh'", + 'ei`te', + 'ui`te', + 'ui^ut', + "ish'", + 'ete', + 'i`te', + 'i^ut', + 'nno', + 'ila', + 'yla', + 'ena', + 'ite', + 'ili', + 'yli', + 'ilo', + 'ylo', + 'eno', + 'i^at', + 'uet', + 'eny', + "it'", + "yt'", + 'ui^u', + 'la', + 'na', + 'li', + 'em', + 'lo', + 'no', + 'et', + 'ny', + "t'", + 'ei`', + 'ui`', + 'il', + 'yl', + 'im', + 'ym', + 'en', + 'it', + 'yt', + 'i^u', + 'i`', + 'l', + 'n', + ) + __noun_suffixes = ( + 'ii^ami', + 'ii^akh', + 'i^ami', + 'ii^am', + 'i^akh', + 'ami', + 'iei`', + 'i^am', + 'iem', + 'akh', + 'ii^u', + "'i^u", + 'ii^a', + "'i^a", + 'ev', + 'ov', + 'ie', + "'e", + 'ei', + 'ii', + 'ei`', + 'oi`', + 'ii`', + 'em', + 'am', + 'om', + 'i^u', + 'i^a', + 'a', + 'e', + 'i', + 'i`', + 'o', + 'u', + 'y', + "'", + ) + __superlative_suffixes = ("ei`she", "ei`sh") + __derivational_suffixes = ("ost'", "ost") + + def stem(self, word): + """ + Stem a Russian word and return the stemmed form. + + :param word: The word that is stemmed. + :type word: str or unicode + :return: The stemmed form. + :rtype: unicode + + """ + if word in self.stopwords: + return word + + chr_exceeded = False + for i in range(len(word)): + if ord(word[i]) > 255: + chr_exceeded = True + break + + if not chr_exceeded: + return word + + word = self.__cyrillic_to_roman(word) + + step1_success = False + adjectival_removed = False + verb_removed = False + undouble_success = False + superlative_removed = False + + rv, r2 = self.__regions_russian(word) + + # Step 1 + for suffix in self.__perfective_gerund_suffixes: + if rv.endswith(suffix): + if suffix in ("v", "vshi", "vshis'"): + if ( + rv[-len(suffix) - 3 : -len(suffix)] == "i^a" + or rv[-len(suffix) - 1 : -len(suffix)] == "a" + ): + word = word[: -len(suffix)] + r2 = r2[: -len(suffix)] + rv = rv[: -len(suffix)] + step1_success = True + break + else: + word = word[: -len(suffix)] + r2 = r2[: -len(suffix)] + rv = rv[: -len(suffix)] + step1_success = True + break + + if not step1_success: + for suffix in self.__reflexive_suffixes: + if rv.endswith(suffix): + word = word[: -len(suffix)] + r2 = r2[: -len(suffix)] + rv = rv[: -len(suffix)] + break + + for suffix in self.__adjectival_suffixes: + if rv.endswith(suffix): + if suffix in ( + 'i^ushchi^ui^u', + 'i^ushchi^ai^a', + 'i^ushchui^u', + 'i^ushchai^a', + 'i^ushchoi^u', + 'i^ushchei^u', + 'i^ushchimi', + 'i^ushchymi', + 'i^ushchego', + 'i^ushchogo', + 'i^ushchemu', + 'i^ushchomu', + 'i^ushchikh', + 'i^ushchykh', + 'shchi^ui^u', + 'shchi^ai^a', + 'i^ushchee', + 'i^ushchie', + 'i^ushchye', + 'i^ushchoe', + 'i^ushchei`', + 'i^ushchii`', + 'i^ushchyi`', + 'i^ushchoi`', + 'i^ushchem', + 'i^ushchim', + 'i^ushchym', + 'i^ushchom', + 'vshi^ui^u', + 'vshi^ai^a', + 'shchui^u', + 'shchai^a', + 'shchoi^u', + 'shchei^u', + 'emi^ui^u', + 'emi^ai^a', + 'nni^ui^u', + 'nni^ai^a', + 'shchimi', + 'shchymi', + 'shchego', + 'shchogo', + 'shchemu', + 'shchomu', + 'shchikh', + 'shchykh', + 'vshui^u', + 'vshai^a', + 'vshoi^u', + 'vshei^u', + 'shchee', + 'shchie', + 'shchye', + 'shchoe', + 'shchei`', + 'shchii`', + 'shchyi`', + 'shchoi`', + 'shchem', + 'shchim', + 'shchym', + 'shchom', + 'vshimi', + 'vshymi', + 'vshego', + 'vshogo', + 'vshemu', + 'vshomu', + 'vshikh', + 'vshykh', + 'emui^u', + 'emai^a', + 'emoi^u', + 'emei^u', + 'nnui^u', + 'nnai^a', + 'nnoi^u', + 'nnei^u', + 'vshee', + 'vshie', + 'vshye', + 'vshoe', + 'vshei`', + 'vshii`', + 'vshyi`', + 'vshoi`', + 'vshem', + 'vshim', + 'vshym', + 'vshom', + 'emimi', + 'emymi', + 'emego', + 'emogo', + 'ememu', + 'emomu', + 'emikh', + 'emykh', + 'nnimi', + 'nnymi', + 'nnego', + 'nnogo', + 'nnemu', + 'nnomu', + 'nnikh', + 'nnykh', + 'emee', + 'emie', + 'emye', + 'emoe', + 'emei`', + 'emii`', + 'emyi`', + 'emoi`', + 'emem', + 'emim', + 'emym', + 'emom', + 'nnee', + 'nnie', + 'nnye', + 'nnoe', + 'nnei`', + 'nnii`', + 'nnyi`', + 'nnoi`', + 'nnem', + 'nnim', + 'nnym', + 'nnom', + ): + if ( + rv[-len(suffix) - 3 : -len(suffix)] == "i^a" + or rv[-len(suffix) - 1 : -len(suffix)] == "a" + ): + word = word[: -len(suffix)] + r2 = r2[: -len(suffix)] + rv = rv[: -len(suffix)] + adjectival_removed = True + break + else: + word = word[: -len(suffix)] + r2 = r2[: -len(suffix)] + rv = rv[: -len(suffix)] + adjectival_removed = True + break + + if not adjectival_removed: + for suffix in self.__verb_suffixes: + if rv.endswith(suffix): + if suffix in ( + "la", + "na", + "ete", + "i`te", + "li", + "i`", + "l", + "em", + "n", + "lo", + "no", + "et", + "i^ut", + "ny", + "t'", + "esh'", + "nno", + ): + if ( + rv[-len(suffix) - 3 : -len(suffix)] == "i^a" + or rv[-len(suffix) - 1 : -len(suffix)] == "a" + ): + word = word[: -len(suffix)] + r2 = r2[: -len(suffix)] + rv = rv[: -len(suffix)] + verb_removed = True + break + else: + word = word[: -len(suffix)] + r2 = r2[: -len(suffix)] + rv = rv[: -len(suffix)] + verb_removed = True + break + + if not adjectival_removed and not verb_removed: + for suffix in self.__noun_suffixes: + if rv.endswith(suffix): + word = word[: -len(suffix)] + r2 = r2[: -len(suffix)] + rv = rv[: -len(suffix)] + break + + # Step 2 + if rv.endswith("i"): + word = word[:-1] + r2 = r2[:-1] + + # Step 3 + for suffix in self.__derivational_suffixes: + if r2.endswith(suffix): + word = word[: -len(suffix)] + break + + # Step 4 + if word.endswith("nn"): + word = word[:-1] + undouble_success = True + + if not undouble_success: + for suffix in self.__superlative_suffixes: + if word.endswith(suffix): + word = word[: -len(suffix)] + superlative_removed = True + break + if word.endswith("nn"): + word = word[:-1] + + if not undouble_success and not superlative_removed: + if word.endswith("'"): + word = word[:-1] + + word = self.__roman_to_cyrillic(word) + + return word + + def __regions_russian(self, word): + """ + Return the regions RV and R2 which are used by the Russian stemmer. + + In any word, RV is the region after the first vowel, + or the end of the word if it contains no vowel. + + R2 is the region after the first non-vowel following + a vowel in R1, or the end of the word if there is no such non-vowel. + + R1 is the region after the first non-vowel following a vowel, + or the end of the word if there is no such non-vowel. + + :param word: The Russian word whose regions RV and R2 are determined. + :type word: str or unicode + :return: the regions RV and R2 for the respective Russian word. + :rtype: tuple + :note: This helper method is invoked by the stem method of the subclass + RussianStemmer. It is not to be invoked directly! + + """ + r1 = "" + r2 = "" + rv = "" + + vowels = ("A", "U", "E", "a", "e", "i", "o", "u", "y") + word = word.replace("i^a", "A").replace("i^u", "U").replace("e`", "E") + + for i in range(1, len(word)): + if word[i] not in vowels and word[i - 1] in vowels: + r1 = word[i + 1 :] + break + + for i in range(1, len(r1)): + if r1[i] not in vowels and r1[i - 1] in vowels: + r2 = r1[i + 1 :] + break + + for i in range(len(word)): + if word[i] in vowels: + rv = word[i + 1 :] + break + + r2 = r2.replace("A", "i^a").replace("U", "i^u").replace("E", "e`") + rv = rv.replace("A", "i^a").replace("U", "i^u").replace("E", "e`") + + return (rv, r2) + + def __cyrillic_to_roman(self, word): + """ + Transliterate a Russian word into the Roman alphabet. + + A Russian word whose letters consist of the Cyrillic + alphabet are transliterated into the Roman alphabet + in order to ease the forthcoming stemming process. + + :param word: The word that is transliterated. + :type word: unicode + :return: the transliterated word. + :rtype: unicode + :note: This helper method is invoked by the stem method of the subclass + RussianStemmer. It is not to be invoked directly! + + """ + word = ( + word.replace("\u0410", "a") + .replace("\u0430", "a") + .replace("\u0411", "b") + .replace("\u0431", "b") + .replace("\u0412", "v") + .replace("\u0432", "v") + .replace("\u0413", "g") + .replace("\u0433", "g") + .replace("\u0414", "d") + .replace("\u0434", "d") + .replace("\u0415", "e") + .replace("\u0435", "e") + .replace("\u0401", "e") + .replace("\u0451", "e") + .replace("\u0416", "zh") + .replace("\u0436", "zh") + .replace("\u0417", "z") + .replace("\u0437", "z") + .replace("\u0418", "i") + .replace("\u0438", "i") + .replace("\u0419", "i`") + .replace("\u0439", "i`") + .replace("\u041A", "k") + .replace("\u043A", "k") + .replace("\u041B", "l") + .replace("\u043B", "l") + .replace("\u041C", "m") + .replace("\u043C", "m") + .replace("\u041D", "n") + .replace("\u043D", "n") + .replace("\u041E", "o") + .replace("\u043E", "o") + .replace("\u041F", "p") + .replace("\u043F", "p") + .replace("\u0420", "r") + .replace("\u0440", "r") + .replace("\u0421", "s") + .replace("\u0441", "s") + .replace("\u0422", "t") + .replace("\u0442", "t") + .replace("\u0423", "u") + .replace("\u0443", "u") + .replace("\u0424", "f") + .replace("\u0444", "f") + .replace("\u0425", "kh") + .replace("\u0445", "kh") + .replace("\u0426", "t^s") + .replace("\u0446", "t^s") + .replace("\u0427", "ch") + .replace("\u0447", "ch") + .replace("\u0428", "sh") + .replace("\u0448", "sh") + .replace("\u0429", "shch") + .replace("\u0449", "shch") + .replace("\u042A", "''") + .replace("\u044A", "''") + .replace("\u042B", "y") + .replace("\u044B", "y") + .replace("\u042C", "'") + .replace("\u044C", "'") + .replace("\u042D", "e`") + .replace("\u044D", "e`") + .replace("\u042E", "i^u") + .replace("\u044E", "i^u") + .replace("\u042F", "i^a") + .replace("\u044F", "i^a") + ) + + return word + + def __roman_to_cyrillic(self, word): + """ + Transliterate a Russian word back into the Cyrillic alphabet. + + A Russian word formerly transliterated into the Roman alphabet + in order to ease the stemming process, is transliterated back + into the Cyrillic alphabet, its original form. + + :param word: The word that is transliterated. + :type word: str or unicode + :return: word, the transliterated word. + :rtype: unicode + :note: This helper method is invoked by the stem method of the subclass + RussianStemmer. It is not to be invoked directly! + + """ + word = ( + word.replace("i^u", "\u044E") + .replace("i^a", "\u044F") + .replace("shch", "\u0449") + .replace("kh", "\u0445") + .replace("t^s", "\u0446") + .replace("ch", "\u0447") + .replace("e`", "\u044D") + .replace("i`", "\u0439") + .replace("sh", "\u0448") + .replace("k", "\u043A") + .replace("e", "\u0435") + .replace("zh", "\u0436") + .replace("a", "\u0430") + .replace("b", "\u0431") + .replace("v", "\u0432") + .replace("g", "\u0433") + .replace("d", "\u0434") + .replace("e", "\u0435") + .replace("z", "\u0437") + .replace("i", "\u0438") + .replace("l", "\u043B") + .replace("m", "\u043C") + .replace("n", "\u043D") + .replace("o", "\u043E") + .replace("p", "\u043F") + .replace("r", "\u0440") + .replace("s", "\u0441") + .replace("t", "\u0442") + .replace("u", "\u0443") + .replace("f", "\u0444") + .replace("''", "\u044A") + .replace("y", "\u044B") + .replace("'", "\u044C") + ) + + return word + + +class SpanishStemmer(_StandardStemmer): + + """ + The Spanish Snowball stemmer. + + :cvar __vowels: The Spanish vowels. + :type __vowels: unicode + :cvar __step0_suffixes: Suffixes to be deleted in step 0 of the algorithm. + :type __step0_suffixes: tuple + :cvar __step1_suffixes: Suffixes to be deleted in step 1 of the algorithm. + :type __step1_suffixes: tuple + :cvar __step2a_suffixes: Suffixes to be deleted in step 2a of the algorithm. + :type __step2a_suffixes: tuple + :cvar __step2b_suffixes: Suffixes to be deleted in step 2b of the algorithm. + :type __step2b_suffixes: tuple + :cvar __step3_suffixes: Suffixes to be deleted in step 3 of the algorithm. + :type __step3_suffixes: tuple + :note: A detailed description of the Spanish + stemming algorithm can be found under + http://snowball.tartarus.org/algorithms/spanish/stemmer.html + + """ + + __vowels = "aeiou\xE1\xE9\xED\xF3\xFA\xFC" + __step0_suffixes = ( + "selas", + "selos", + "sela", + "selo", + "las", + "les", + "los", + "nos", + "me", + "se", + "la", + "le", + "lo", + ) + __step1_suffixes = ( + 'amientos', + 'imientos', + 'amiento', + 'imiento', + 'aciones', + 'uciones', + 'adoras', + 'adores', + 'ancias', + 'log\xEDas', + 'encias', + 'amente', + 'idades', + 'anzas', + 'ismos', + 'ables', + 'ibles', + 'istas', + 'adora', + 'aci\xF3n', + 'antes', + 'ancia', + 'log\xEDa', + 'uci\xf3n', + 'encia', + 'mente', + 'anza', + 'icos', + 'icas', + 'ismo', + 'able', + 'ible', + 'ista', + 'osos', + 'osas', + 'ador', + 'ante', + 'idad', + 'ivas', + 'ivos', + 'ico', + 'ica', + 'oso', + 'osa', + 'iva', + 'ivo', + ) + __step2a_suffixes = ( + 'yeron', + 'yendo', + 'yamos', + 'yais', + 'yan', + 'yen', + 'yas', + 'yes', + 'ya', + 'ye', + 'yo', + 'y\xF3', + ) + __step2b_suffixes = ( + 'ar\xEDamos', + 'er\xEDamos', + 'ir\xEDamos', + 'i\xE9ramos', + 'i\xE9semos', + 'ar\xEDais', + 'aremos', + 'er\xEDais', + 'eremos', + 'ir\xEDais', + 'iremos', + 'ierais', + 'ieseis', + 'asteis', + 'isteis', + '\xE1bamos', + '\xE1ramos', + '\xE1semos', + 'ar\xEDan', + 'ar\xEDas', + 'ar\xE9is', + 'er\xEDan', + 'er\xEDas', + 'er\xE9is', + 'ir\xEDan', + 'ir\xEDas', + 'ir\xE9is', + 'ieran', + 'iesen', + 'ieron', + 'iendo', + 'ieras', + 'ieses', + 'abais', + 'arais', + 'aseis', + '\xE9amos', + 'ar\xE1n', + 'ar\xE1s', + 'ar\xEDa', + 'er\xE1n', + 'er\xE1s', + 'er\xEDa', + 'ir\xE1n', + 'ir\xE1s', + 'ir\xEDa', + 'iera', + 'iese', + 'aste', + 'iste', + 'aban', + 'aran', + 'asen', + 'aron', + 'ando', + 'abas', + 'adas', + 'idas', + 'aras', + 'ases', + '\xEDais', + 'ados', + 'idos', + 'amos', + 'imos', + 'emos', + 'ar\xE1', + 'ar\xE9', + 'er\xE1', + 'er\xE9', + 'ir\xE1', + 'ir\xE9', + 'aba', + 'ada', + 'ida', + 'ara', + 'ase', + '\xEDan', + 'ado', + 'ido', + '\xEDas', + '\xE1is', + '\xE9is', + '\xEDa', + 'ad', + 'ed', + 'id', + 'an', + 'i\xF3', + 'ar', + 'er', + 'ir', + 'as', + '\xEDs', + 'en', + 'es', + ) + __step3_suffixes = ("os", "a", "e", "o", "\xE1", "\xE9", "\xED", "\xF3") + + def stem(self, word): + """ + Stem a Spanish word and return the stemmed form. + + :param word: The word that is stemmed. + :type word: str or unicode + :return: The stemmed form. + :rtype: unicode + + """ + word = word.lower() + + if word in self.stopwords: + return word + + step1_success = False + + r1, r2 = self._r1r2_standard(word, self.__vowels) + rv = self._rv_standard(word, self.__vowels) + + # STEP 0: Attached pronoun + for suffix in self.__step0_suffixes: + if not (word.endswith(suffix) and rv.endswith(suffix)): + continue + + if ( + rv[: -len(suffix)].endswith( + ( + "ando", + "\xE1ndo", + "ar", + "\xE1r", + "er", + "\xE9r", + "iendo", + "i\xE9ndo", + "ir", + "\xEDr", + ) + ) + ) or ( + rv[: -len(suffix)].endswith("yendo") + and word[: -len(suffix)].endswith("uyendo") + ): + + word = self.__replace_accented(word[: -len(suffix)]) + r1 = self.__replace_accented(r1[: -len(suffix)]) + r2 = self.__replace_accented(r2[: -len(suffix)]) + rv = self.__replace_accented(rv[: -len(suffix)]) + break + + # STEP 1: Standard suffix removal + for suffix in self.__step1_suffixes: + if not word.endswith(suffix): + continue + + if suffix == "amente" and r1.endswith(suffix): + step1_success = True + word = word[:-6] + r2 = r2[:-6] + rv = rv[:-6] + + if r2.endswith("iv"): + word = word[:-2] + r2 = r2[:-2] + rv = rv[:-2] + + if r2.endswith("at"): + word = word[:-2] + rv = rv[:-2] + + elif r2.endswith(("os", "ic", "ad")): + word = word[:-2] + rv = rv[:-2] + + elif r2.endswith(suffix): + step1_success = True + if suffix in ( + "adora", + "ador", + "aci\xF3n", + "adoras", + "adores", + "aciones", + "ante", + "antes", + "ancia", + "ancias", + ): + word = word[: -len(suffix)] + r2 = r2[: -len(suffix)] + rv = rv[: -len(suffix)] + + if r2.endswith("ic"): + word = word[:-2] + rv = rv[:-2] + + elif suffix in ("log\xEDa", "log\xEDas"): + word = suffix_replace(word, suffix, "log") + rv = suffix_replace(rv, suffix, "log") + + elif suffix in ("uci\xF3n", "uciones"): + word = suffix_replace(word, suffix, "u") + rv = suffix_replace(rv, suffix, "u") + + elif suffix in ("encia", "encias"): + word = suffix_replace(word, suffix, "ente") + rv = suffix_replace(rv, suffix, "ente") + + elif suffix == "mente": + word = word[: -len(suffix)] + r2 = r2[: -len(suffix)] + rv = rv[: -len(suffix)] + + if r2.endswith(("ante", "able", "ible")): + word = word[:-4] + rv = rv[:-4] + + elif suffix in ("idad", "idades"): + word = word[: -len(suffix)] + r2 = r2[: -len(suffix)] + rv = rv[: -len(suffix)] + + for pre_suff in ("abil", "ic", "iv"): + if r2.endswith(pre_suff): + word = word[: -len(pre_suff)] + rv = rv[: -len(pre_suff)] + + elif suffix in ("ivo", "iva", "ivos", "ivas"): + word = word[: -len(suffix)] + r2 = r2[: -len(suffix)] + rv = rv[: -len(suffix)] + if r2.endswith("at"): + word = word[:-2] + rv = rv[:-2] + else: + word = word[: -len(suffix)] + rv = rv[: -len(suffix)] + break + + # STEP 2a: Verb suffixes beginning 'y' + if not step1_success: + for suffix in self.__step2a_suffixes: + if rv.endswith(suffix) and word[-len(suffix) - 1 : -len(suffix)] == "u": + word = word[: -len(suffix)] + rv = rv[: -len(suffix)] + break + + # STEP 2b: Other verb suffixes + for suffix in self.__step2b_suffixes: + if rv.endswith(suffix): + word = word[: -len(suffix)] + rv = rv[: -len(suffix)] + if suffix in ("en", "es", "\xE9is", "emos"): + if word.endswith("gu"): + word = word[:-1] + + if rv.endswith("gu"): + rv = rv[:-1] + break + + # STEP 3: Residual suffix + for suffix in self.__step3_suffixes: + if rv.endswith(suffix): + word = word[: -len(suffix)] + if suffix in ("e", "\xE9"): + rv = rv[: -len(suffix)] + + if word[-2:] == "gu" and rv.endswith("u"): + word = word[:-1] + break + + word = self.__replace_accented(word) + + return word + + def __replace_accented(self, word): + """ + Replaces all accented letters on a word with their non-accented + counterparts. + + :param word: A spanish word, with or without accents + :type word: str or unicode + :return: a word with the accented letters (á, é, í, ó, ú) replaced with + their non-accented counterparts (a, e, i, o, u) + :rtype: str or unicode + """ + return ( + word.replace("\xE1", "a") + .replace("\xE9", "e") + .replace("\xED", "i") + .replace("\xF3", "o") + .replace("\xFA", "u") + ) + + +class SwedishStemmer(_ScandinavianStemmer): + + """ + The Swedish Snowball stemmer. + + :cvar __vowels: The Swedish vowels. + :type __vowels: unicode + :cvar __s_ending: Letters that may directly appear before a word final 's'. + :type __s_ending: unicode + :cvar __step1_suffixes: Suffixes to be deleted in step 1 of the algorithm. + :type __step1_suffixes: tuple + :cvar __step2_suffixes: Suffixes to be deleted in step 2 of the algorithm. + :type __step2_suffixes: tuple + :cvar __step3_suffixes: Suffixes to be deleted in step 3 of the algorithm. + :type __step3_suffixes: tuple + :note: A detailed description of the Swedish + stemming algorithm can be found under + http://snowball.tartarus.org/algorithms/swedish/stemmer.html + + """ + + __vowels = "aeiouy\xE4\xE5\xF6" + __s_ending = "bcdfghjklmnoprtvy" + __step1_suffixes = ( + "heterna", + "hetens", + "heter", + "heten", + "anden", + "arnas", + "ernas", + "ornas", + "andes", + "andet", + "arens", + "arna", + "erna", + "orna", + "ande", + "arne", + "aste", + "aren", + "ades", + "erns", + "ade", + "are", + "ern", + "ens", + "het", + "ast", + "ad", + "en", + "ar", + "er", + "or", + "as", + "es", + "at", + "a", + "e", + "s", + ) + __step2_suffixes = ("dd", "gd", "nn", "dt", "gt", "kt", "tt") + __step3_suffixes = ("fullt", "l\xF6st", "els", "lig", "ig") + + def stem(self, word): + """ + Stem a Swedish word and return the stemmed form. + + :param word: The word that is stemmed. + :type word: str or unicode + :return: The stemmed form. + :rtype: unicode + + """ + word = word.lower() + + if word in self.stopwords: + return word + + r1 = self._r1_scandinavian(word, self.__vowels) + + # STEP 1 + for suffix in self.__step1_suffixes: + if r1.endswith(suffix): + if suffix == "s": + if word[-2] in self.__s_ending: + word = word[:-1] + r1 = r1[:-1] + else: + word = word[: -len(suffix)] + r1 = r1[: -len(suffix)] + break + + # STEP 2 + for suffix in self.__step2_suffixes: + if r1.endswith(suffix): + word = word[:-1] + r1 = r1[:-1] + break + + # STEP 3 + for suffix in self.__step3_suffixes: + if r1.endswith(suffix): + if suffix in ("els", "lig", "ig"): + word = word[: -len(suffix)] + elif suffix in ("fullt", "l\xF6st"): + word = word[:-1] + break + + return word + + +def demo(): + """ + This function provides a demonstration of the Snowball stemmers. + + After invoking this function and specifying a language, + it stems an excerpt of the Universal Declaration of Human Rights + (which is a part of the NLTK corpus collection) and then prints + out the original and the stemmed text. + + """ + + from nltk.corpus import udhr + + udhr_corpus = { + "arabic": "Arabic_Alarabia-Arabic", + "danish": "Danish_Dansk-Latin1", + "dutch": "Dutch_Nederlands-Latin1", + "english": "English-Latin1", + "finnish": "Finnish_Suomi-Latin1", + "french": "French_Francais-Latin1", + "german": "German_Deutsch-Latin1", + "hungarian": "Hungarian_Magyar-UTF8", + "italian": "Italian_Italiano-Latin1", + "norwegian": "Norwegian-Latin1", + "porter": "English-Latin1", + "portuguese": "Portuguese_Portugues-Latin1", + "romanian": "Romanian_Romana-Latin2", + "russian": "Russian-UTF8", + "spanish": "Spanish-Latin1", + "swedish": "Swedish_Svenska-Latin1", + } + + print("\n") + print("******************************") + print("Demo for the Snowball stemmers") + print("******************************") + + while True: + + language = input( + "Please enter the name of the language " + + "to be demonstrated\n" + + "/".join(SnowballStemmer.languages) + + "\n" + + "(enter 'exit' in order to leave): " + ) + + if language == "exit": + break + + if language not in SnowballStemmer.languages: + print( + ( + "\nOops, there is no stemmer for this language. " + + "Please try again.\n" + ) + ) + continue + + stemmer = SnowballStemmer(language) + excerpt = udhr.words(udhr_corpus[language])[:300] + + stemmed = " ".join(stemmer.stem(word) for word in excerpt) + stemmed = re.sub(r"(.{,70})\s", r'\1\n', stemmed + ' ').rstrip() + excerpt = " ".join(excerpt) + excerpt = re.sub(r"(.{,70})\s", r'\1\n', excerpt + ' ').rstrip() + + print("\n") + print('-' * 70) + print('ORIGINAL'.center(70)) + print(excerpt) + print("\n\n") + print('STEMMED RESULTS'.center(70)) + print(stemmed) + print('-' * 70) + print("\n") diff --git a/venv.bak/lib/python3.7/site-packages/nltk/stem/util.py b/venv.bak/lib/python3.7/site-packages/nltk/stem/util.py new file mode 100644 index 0000000..0daad9d --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/stem/util.py @@ -0,0 +1,24 @@ +# Natural Language Toolkit: Stemmer Utilities +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Helder +# URL: +# For license information, see LICENSE.TXT + + +def suffix_replace(original, old, new): + """ + Replaces the old suffix of the original string by a new suffix + """ + return original[: -len(old)] + new + + +def prefix_replace(original, old, new): + """ + Replaces the old prefix of the original string by a new suffix + :param original: string + :param old: string + :param new: string + :return: string + """ + return new + original[len(old) :] diff --git a/venv.bak/lib/python3.7/site-packages/nltk/stem/wordnet.py b/venv.bak/lib/python3.7/site-packages/nltk/stem/wordnet.py new file mode 100644 index 0000000..da521a3 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/stem/wordnet.py @@ -0,0 +1,52 @@ +# Natural Language Toolkit: WordNet stemmer interface +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Steven Bird +# Edward Loper +# URL: +# For license information, see LICENSE.TXT +from __future__ import unicode_literals + +from nltk.corpus.reader.wordnet import NOUN +from nltk.corpus import wordnet +from nltk.compat import python_2_unicode_compatible + + +@python_2_unicode_compatible +class WordNetLemmatizer(object): + """ + WordNet Lemmatizer + + Lemmatize using WordNet's built-in morphy function. + Returns the input word unchanged if it cannot be found in WordNet. + + >>> from nltk.stem import WordNetLemmatizer + >>> wnl = WordNetLemmatizer() + >>> print(wnl.lemmatize('dogs')) + dog + >>> print(wnl.lemmatize('churches')) + church + >>> print(wnl.lemmatize('aardwolves')) + aardwolf + >>> print(wnl.lemmatize('abaci')) + abacus + >>> print(wnl.lemmatize('hardrock')) + hardrock + """ + + def __init__(self): + pass + + def lemmatize(self, word, pos=NOUN): + lemmas = wordnet._morphy(word, pos) + return min(lemmas, key=len) if lemmas else word + + def __repr__(self): + return '' + + +# unload wordnet +def teardown_module(module=None): + from nltk.corpus import wordnet + + wordnet._unload() diff --git a/venv.bak/lib/python3.7/site-packages/nltk/tag/__init__.py b/venv.bak/lib/python3.7/site-packages/nltk/tag/__init__.py new file mode 100644 index 0000000..c0cbe0a --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/tag/__init__.py @@ -0,0 +1,180 @@ +# -*- coding: utf-8 -*- +# Natural Language Toolkit: Taggers +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Edward Loper +# Steven Bird (minor additions) +# URL: +# For license information, see LICENSE.TXT +""" +NLTK Taggers + +This package contains classes and interfaces for part-of-speech +tagging, or simply "tagging". + +A "tag" is a case-sensitive string that specifies some property of a token, +such as its part of speech. Tagged tokens are encoded as tuples +``(tag, token)``. For example, the following tagged token combines +the word ``'fly'`` with a noun part of speech tag (``'NN'``): + + >>> tagged_tok = ('fly', 'NN') + +An off-the-shelf tagger is available for English. It uses the Penn Treebank tagset: + + >>> from nltk import pos_tag, word_tokenize + >>> pos_tag(word_tokenize("John's big idea isn't all that bad.")) + [('John', 'NNP'), ("'s", 'POS'), ('big', 'JJ'), ('idea', 'NN'), ('is', 'VBZ'), + ("n't", 'RB'), ('all', 'PDT'), ('that', 'DT'), ('bad', 'JJ'), ('.', '.')] + +A Russian tagger is also available if you specify lang="rus". It uses +the Russian National Corpus tagset: + + >>> pos_tag(word_tokenize("Илья оторопел и дважды перечитал бумажку."), lang='rus') # doctest: +SKIP + [('Илья', 'S'), ('оторопел', 'V'), ('и', 'CONJ'), ('дважды', 'ADV'), ('перечитал', 'V'), + ('бумажку', 'S'), ('.', 'NONLEX')] + +This package defines several taggers, which take a list of tokens, +assign a tag to each one, and return the resulting list of tagged tokens. +Most of the taggers are built automatically based on a training corpus. +For example, the unigram tagger tags each word *w* by checking what +the most frequent tag for *w* was in a training corpus: + + >>> from nltk.corpus import brown + >>> from nltk.tag import UnigramTagger + >>> tagger = UnigramTagger(brown.tagged_sents(categories='news')[:500]) + >>> sent = ['Mitchell', 'decried', 'the', 'high', 'rate', 'of', 'unemployment'] + >>> for word, tag in tagger.tag(sent): + ... print(word, '->', tag) + Mitchell -> NP + decried -> None + the -> AT + high -> JJ + rate -> NN + of -> IN + unemployment -> None + +Note that words that the tagger has not seen during training receive a tag +of ``None``. + +We evaluate a tagger on data that was not seen during training: + + >>> tagger.evaluate(brown.tagged_sents(categories='news')[500:600]) + 0.73... + +For more information, please consult chapter 5 of the NLTK Book. +""" +from __future__ import print_function + +from nltk.tag.api import TaggerI +from nltk.tag.util import str2tuple, tuple2str, untag +from nltk.tag.sequential import ( + SequentialBackoffTagger, + ContextTagger, + DefaultTagger, + NgramTagger, + UnigramTagger, + BigramTagger, + TrigramTagger, + AffixTagger, + RegexpTagger, + ClassifierBasedTagger, + ClassifierBasedPOSTagger, +) +from nltk.tag.brill import BrillTagger +from nltk.tag.brill_trainer import BrillTaggerTrainer +from nltk.tag.tnt import TnT +from nltk.tag.hunpos import HunposTagger +from nltk.tag.stanford import StanfordTagger, StanfordPOSTagger, StanfordNERTagger +from nltk.tag.hmm import HiddenMarkovModelTagger, HiddenMarkovModelTrainer +from nltk.tag.senna import SennaTagger, SennaChunkTagger, SennaNERTagger +from nltk.tag.mapping import tagset_mapping, map_tag +from nltk.tag.crf import CRFTagger +from nltk.tag.perceptron import PerceptronTagger + +from nltk.data import load, find + +RUS_PICKLE = ( + 'taggers/averaged_perceptron_tagger_ru/averaged_perceptron_tagger_ru.pickle' +) + + +def _get_tagger(lang=None): + if lang == 'rus': + tagger = PerceptronTagger(False) + ap_russian_model_loc = 'file:' + str(find(RUS_PICKLE)) + tagger.load(ap_russian_model_loc) + else: + tagger = PerceptronTagger() + return tagger + + +def _pos_tag(tokens, tagset=None, tagger=None, lang=None): + # Currently only supoorts English and Russian. + if lang not in ['eng', 'rus']: + raise NotImplementedError( + "Currently, NLTK pos_tag only supports English and Russian " + "(i.e. lang='eng' or lang='rus')" + ) + else: + tagged_tokens = tagger.tag(tokens) + if tagset: # Maps to the specified tagset. + if lang == 'eng': + tagged_tokens = [ + (token, map_tag('en-ptb', tagset, tag)) + for (token, tag) in tagged_tokens + ] + elif lang == 'rus': + # Note that the new Russion pos tags from the model contains suffixes, + # see https://github.com/nltk/nltk/issues/2151#issuecomment-430709018 + tagged_tokens = [ + (token, map_tag('ru-rnc-new', tagset, tag.partition('=')[0])) + for (token, tag) in tagged_tokens + ] + return tagged_tokens + + +def pos_tag(tokens, tagset=None, lang='eng'): + """ + Use NLTK's currently recommended part of speech tagger to + tag the given list of tokens. + + >>> from nltk.tag import pos_tag + >>> from nltk.tokenize import word_tokenize + >>> pos_tag(word_tokenize("John's big idea isn't all that bad.")) + [('John', 'NNP'), ("'s", 'POS'), ('big', 'JJ'), ('idea', 'NN'), ('is', 'VBZ'), + ("n't", 'RB'), ('all', 'PDT'), ('that', 'DT'), ('bad', 'JJ'), ('.', '.')] + >>> pos_tag(word_tokenize("John's big idea isn't all that bad."), tagset='universal') + [('John', 'NOUN'), ("'s", 'PRT'), ('big', 'ADJ'), ('idea', 'NOUN'), ('is', 'VERB'), + ("n't", 'ADV'), ('all', 'DET'), ('that', 'DET'), ('bad', 'ADJ'), ('.', '.')] + + NB. Use `pos_tag_sents()` for efficient tagging of more than one sentence. + + :param tokens: Sequence of tokens to be tagged + :type tokens: list(str) + :param tagset: the tagset to be used, e.g. universal, wsj, brown + :type tagset: str + :param lang: the ISO 639 code of the language, e.g. 'eng' for English, 'rus' for Russian + :type lang: str + :return: The tagged tokens + :rtype: list(tuple(str, str)) + """ + tagger = _get_tagger(lang) + return _pos_tag(tokens, tagset, tagger, lang) + + +def pos_tag_sents(sentences, tagset=None, lang='eng'): + """ + Use NLTK's currently recommended part of speech tagger to tag the + given list of sentences, each consisting of a list of tokens. + + :param tokens: List of sentences to be tagged + :type tokens: list(list(str)) + :param tagset: the tagset to be used, e.g. universal, wsj, brown + :type tagset: str + :param lang: the ISO 639 code of the language, e.g. 'eng' for English, 'rus' for Russian + :type lang: str + :return: The list of tagged sentences + :rtype: list(list(tuple(str, str))) + """ + tagger = _get_tagger(lang) + return [_pos_tag(sent, tagset, tagger, lang) for sent in sentences] diff --git a/venv.bak/lib/python3.7/site-packages/nltk/tag/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/tag/__pycache__/__init__.cpython-37.pyc new file mode 100644 index 0000000..e23a5bc Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/tag/__pycache__/__init__.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/tag/__pycache__/api.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/tag/__pycache__/api.cpython-37.pyc new file mode 100644 index 0000000..851c3bd Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/tag/__pycache__/api.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/tag/__pycache__/brill.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/tag/__pycache__/brill.cpython-37.pyc new file mode 100644 index 0000000..bf19390 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/tag/__pycache__/brill.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/tag/__pycache__/brill_trainer.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/tag/__pycache__/brill_trainer.cpython-37.pyc new file mode 100644 index 0000000..9de596a Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/tag/__pycache__/brill_trainer.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/tag/__pycache__/crf.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/tag/__pycache__/crf.cpython-37.pyc new file mode 100644 index 0000000..0ab8fe1 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/tag/__pycache__/crf.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/tag/__pycache__/hmm.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/tag/__pycache__/hmm.cpython-37.pyc new file mode 100644 index 0000000..23be1bd Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/tag/__pycache__/hmm.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/tag/__pycache__/hunpos.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/tag/__pycache__/hunpos.cpython-37.pyc new file mode 100644 index 0000000..32522ba Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/tag/__pycache__/hunpos.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/tag/__pycache__/mapping.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/tag/__pycache__/mapping.cpython-37.pyc new file mode 100644 index 0000000..7d630d3 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/tag/__pycache__/mapping.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/tag/__pycache__/perceptron.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/tag/__pycache__/perceptron.cpython-37.pyc new file mode 100644 index 0000000..33d7ec5 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/tag/__pycache__/perceptron.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/tag/__pycache__/senna.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/tag/__pycache__/senna.cpython-37.pyc new file mode 100644 index 0000000..b8b5367 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/tag/__pycache__/senna.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/tag/__pycache__/sequential.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/tag/__pycache__/sequential.cpython-37.pyc new file mode 100644 index 0000000..9df41d3 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/tag/__pycache__/sequential.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/tag/__pycache__/stanford.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/tag/__pycache__/stanford.cpython-37.pyc new file mode 100644 index 0000000..031ce80 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/tag/__pycache__/stanford.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/tag/__pycache__/tnt.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/tag/__pycache__/tnt.cpython-37.pyc new file mode 100644 index 0000000..c3dbc7f Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/tag/__pycache__/tnt.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/tag/__pycache__/util.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/tag/__pycache__/util.cpython-37.pyc new file mode 100644 index 0000000..faeca76 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/tag/__pycache__/util.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/tag/api.py b/venv.bak/lib/python3.7/site-packages/nltk/tag/api.py new file mode 100644 index 0000000..0d4ffda --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/tag/api.py @@ -0,0 +1,86 @@ +# Natural Language Toolkit: Tagger Interface +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Edward Loper +# Steven Bird (minor additions) +# URL: +# For license information, see LICENSE.TXT + +""" +Interface for tagging each token in a sentence with supplementary +information, such as its part of speech. +""" +from abc import ABCMeta, abstractmethod +from itertools import chain + +from six import add_metaclass + +from nltk.internals import overridden +from nltk.metrics import accuracy +from nltk.tag.util import untag + + +@add_metaclass(ABCMeta) +class TaggerI(object): + """ + A processing interface for assigning a tag to each token in a list. + Tags are case sensitive strings that identify some property of each + token, such as its part of speech or its sense. + + Some taggers require specific types for their tokens. This is + generally indicated by the use of a sub-interface to ``TaggerI``. + For example, featureset taggers, which are subclassed from + ``FeaturesetTagger``, require that each token be a ``featureset``. + + Subclasses must define: + - either ``tag()`` or ``tag_sents()`` (or both) + """ + + @abstractmethod + def tag(self, tokens): + """ + Determine the most appropriate tag sequence for the given + token sequence, and return a corresponding list of tagged + tokens. A tagged token is encoded as a tuple ``(token, tag)``. + + :rtype: list(tuple(str, str)) + """ + if overridden(self.tag_sents): + return self.tag_sents([tokens])[0] + + def tag_sents(self, sentences): + """ + Apply ``self.tag()`` to each element of *sentences*. I.e.: + + return [self.tag(sent) for sent in sentences] + """ + return [self.tag(sent) for sent in sentences] + + def evaluate(self, gold): + """ + Score the accuracy of the tagger against the gold standard. + Strip the tags from the gold standard text, retag it using + the tagger, then compute the accuracy score. + + :type gold: list(list(tuple(str, str))) + :param gold: The list of tagged sentences to score the tagger on. + :rtype: float + """ + + tagged_sents = self.tag_sents(untag(sent) for sent in gold) + gold_tokens = list(chain(*gold)) + test_tokens = list(chain(*tagged_sents)) + return accuracy(gold_tokens, test_tokens) + + def _check_params(self, train, model): + if (train and model) or (not train and not model): + raise ValueError('Must specify either training data or trained model.') + + +class FeaturesetTaggerI(TaggerI): + """ + A tagger that requires tokens to be ``featuresets``. A featureset + is a dictionary that maps from feature names to feature + values. See ``nltk.classify`` for more information about features + and featuresets. + """ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/tag/brill.py b/venv.bak/lib/python3.7/site-packages/nltk/tag/brill.py new file mode 100644 index 0000000..b44e335 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/tag/brill.py @@ -0,0 +1,452 @@ +# -*- coding: utf-8 -*- +# Natural Language Toolkit: Transformation-based learning +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Marcus Uneson +# based on previous (nltk2) version by +# Christopher Maloof, Edward Loper, Steven Bird +# URL: +# For license information, see LICENSE.TXT + +from __future__ import print_function, division + +from collections import defaultdict, Counter + +from nltk.tag import TaggerI +from nltk.tbl import Feature, Template +from nltk import jsontags + + +###################################################################### +# Brill Templates +###################################################################### + + +@jsontags.register_tag +class Word(Feature): + """ + Feature which examines the text (word) of nearby tokens. + """ + + json_tag = 'nltk.tag.brill.Word' + + @staticmethod + def extract_property(tokens, index): + """@return: The given token's text.""" + return tokens[index][0] + + +@jsontags.register_tag +class Pos(Feature): + """ + Feature which examines the tags of nearby tokens. + """ + + json_tag = 'nltk.tag.brill.Pos' + + @staticmethod + def extract_property(tokens, index): + """@return: The given token's tag.""" + return tokens[index][1] + + +def nltkdemo18(): + """ + Return 18 templates, from the original nltk demo, in multi-feature syntax + """ + return [ + Template(Pos([-1])), + Template(Pos([1])), + Template(Pos([-2])), + Template(Pos([2])), + Template(Pos([-2, -1])), + Template(Pos([1, 2])), + Template(Pos([-3, -2, -1])), + Template(Pos([1, 2, 3])), + Template(Pos([-1]), Pos([1])), + Template(Word([-1])), + Template(Word([1])), + Template(Word([-2])), + Template(Word([2])), + Template(Word([-2, -1])), + Template(Word([1, 2])), + Template(Word([-3, -2, -1])), + Template(Word([1, 2, 3])), + Template(Word([-1]), Word([1])), + ] + + +def nltkdemo18plus(): + """ + Return 18 templates, from the original nltk demo, and additionally a few + multi-feature ones (the motivation is easy comparison with nltkdemo18) + """ + return nltkdemo18() + [ + Template(Word([-1]), Pos([1])), + Template(Pos([-1]), Word([1])), + Template(Word([-1]), Word([0]), Pos([1])), + Template(Pos([-1]), Word([0]), Word([1])), + Template(Pos([-1]), Word([0]), Pos([1])), + ] + + +def fntbl37(): + """ + Return 37 templates taken from the postagging task of the + fntbl distribution http://www.cs.jhu.edu/~rflorian/fntbl/ + (37 is after excluding a handful which do not condition on Pos[0]; + fntbl can do that but the current nltk implementation cannot.) + """ + return [ + Template(Word([0]), Word([1]), Word([2])), + Template(Word([-1]), Word([0]), Word([1])), + Template(Word([0]), Word([-1])), + Template(Word([0]), Word([1])), + Template(Word([0]), Word([2])), + Template(Word([0]), Word([-2])), + Template(Word([1, 2])), + Template(Word([-2, -1])), + Template(Word([1, 2, 3])), + Template(Word([-3, -2, -1])), + Template(Word([0]), Pos([2])), + Template(Word([0]), Pos([-2])), + Template(Word([0]), Pos([1])), + Template(Word([0]), Pos([-1])), + Template(Word([0])), + Template(Word([-2])), + Template(Word([2])), + Template(Word([1])), + Template(Word([-1])), + Template(Pos([-1]), Pos([1])), + Template(Pos([1]), Pos([2])), + Template(Pos([-1]), Pos([-2])), + Template(Pos([1])), + Template(Pos([-1])), + Template(Pos([-2])), + Template(Pos([2])), + Template(Pos([1, 2, 3])), + Template(Pos([1, 2])), + Template(Pos([-3, -2, -1])), + Template(Pos([-2, -1])), + Template(Pos([1]), Word([0]), Word([1])), + Template(Pos([1]), Word([0]), Word([-1])), + Template(Pos([-1]), Word([-1]), Word([0])), + Template(Pos([-1]), Word([0]), Word([1])), + Template(Pos([-2]), Pos([-1])), + Template(Pos([1]), Pos([2])), + Template(Pos([1]), Pos([2]), Word([1])), + ] + + +def brill24(): + """ + Return 24 templates of the seminal TBL paper, Brill (1995) + """ + return [ + Template(Pos([-1])), + Template(Pos([1])), + Template(Pos([-2])), + Template(Pos([2])), + Template(Pos([-2, -1])), + Template(Pos([1, 2])), + Template(Pos([-3, -2, -1])), + Template(Pos([1, 2, 3])), + Template(Pos([-1]), Pos([1])), + Template(Pos([-2]), Pos([-1])), + Template(Pos([1]), Pos([2])), + Template(Word([-1])), + Template(Word([1])), + Template(Word([-2])), + Template(Word([2])), + Template(Word([-2, -1])), + Template(Word([1, 2])), + Template(Word([-1, 0])), + Template(Word([0, 1])), + Template(Word([0])), + Template(Word([-1]), Pos([-1])), + Template(Word([1]), Pos([1])), + Template(Word([0]), Word([-1]), Pos([-1])), + Template(Word([0]), Word([1]), Pos([1])), + ] + + +def describe_template_sets(): + """ + Print the available template sets in this demo, with a short description" + """ + import inspect + import sys + + # a bit of magic to get all functions in this module + templatesets = inspect.getmembers(sys.modules[__name__], inspect.isfunction) + for (name, obj) in templatesets: + if name == "describe_template_sets": + continue + print(name, obj.__doc__, "\n") + + +###################################################################### +# The Brill Tagger +###################################################################### + + +@jsontags.register_tag +class BrillTagger(TaggerI): + """ + Brill's transformational rule-based tagger. Brill taggers use an + initial tagger (such as ``tag.DefaultTagger``) to assign an initial + tag sequence to a text; and then apply an ordered list of + transformational rules to correct the tags of individual tokens. + These transformation rules are specified by the ``TagRule`` + interface. + + Brill taggers can be created directly, from an initial tagger and + a list of transformational rules; but more often, Brill taggers + are created by learning rules from a training corpus, using one + of the TaggerTrainers available. + """ + + json_tag = 'nltk.tag.BrillTagger' + + def __init__(self, initial_tagger, rules, training_stats=None): + """ + :param initial_tagger: The initial tagger + :type initial_tagger: TaggerI + + :param rules: An ordered list of transformation rules that + should be used to correct the initial tagging. + :type rules: list(TagRule) + + :param training_stats: A dictionary of statistics collected + during training, for possible later use + :type training_stats: dict + + """ + self._initial_tagger = initial_tagger + self._rules = tuple(rules) + self._training_stats = training_stats + + def encode_json_obj(self): + return self._initial_tagger, self._rules, self._training_stats + + @classmethod + def decode_json_obj(cls, obj): + _initial_tagger, _rules, _training_stats = obj + return cls(_initial_tagger, _rules, _training_stats) + + def rules(self): + """ + Return the ordered list of transformation rules that this tagger has learnt + + :return: the ordered list of transformation rules that correct the initial tagging + :rtype: list of Rules + """ + return self._rules + + def train_stats(self, statistic=None): + """ + Return a named statistic collected during training, or a dictionary of all + available statistics if no name given + + :param statistic: name of statistic + :type statistic: str + :return: some statistic collected during training of this tagger + :rtype: any (but usually a number) + """ + if statistic is None: + return self._training_stats + else: + return self._training_stats.get(statistic) + + def tag(self, tokens): + # Inherit documentation from TaggerI + + # Run the initial tagger. + tagged_tokens = self._initial_tagger.tag(tokens) + + # Create a dictionary that maps each tag to a list of the + # indices of tokens that have that tag. + tag_to_positions = defaultdict(set) + for i, (token, tag) in enumerate(tagged_tokens): + tag_to_positions[tag].add(i) + + # Apply each rule, in order. Only try to apply rules at + # positions that have the desired original tag. + for rule in self._rules: + # Find the positions where it might apply + positions = tag_to_positions.get(rule.original_tag, []) + # Apply the rule at those positions. + changed = rule.apply(tagged_tokens, positions) + # Update tag_to_positions with the positions of tags that + # were modified. + for i in changed: + tag_to_positions[rule.original_tag].remove(i) + tag_to_positions[rule.replacement_tag].add(i) + + return tagged_tokens + + def print_template_statistics(self, test_stats=None, printunused=True): + """ + Print a list of all templates, ranked according to efficiency. + + If test_stats is available, the templates are ranked according to their + relative contribution (summed for all rules created from a given template, + weighted by score) to the performance on the test set. If no test_stats, then + statistics collected during training are used instead. There is also + an unweighted measure (just counting the rules). This is less informative, + though, as many low-score rules will appear towards end of training. + + :param test_stats: dictionary of statistics collected during testing + :type test_stats: dict of str -> any (but usually numbers) + :param printunused: if True, print a list of all unused templates + :type printunused: bool + :return: None + :rtype: None + """ + tids = [r.templateid for r in self._rules] + train_stats = self.train_stats() + + trainscores = train_stats['rulescores'] + assert len(trainscores) == len(tids), ( + "corrupt statistics: " + "{0} train scores for {1} rules".format(trainscores, tids) + ) + template_counts = Counter(tids) + weighted_traincounts = Counter() + for (tid, score) in zip(tids, trainscores): + weighted_traincounts[tid] += score + tottrainscores = sum(trainscores) + + # det_tplsort() is for deterministic sorting; + # the otherwise convenient Counter.most_common() unfortunately + # does not break ties deterministically + # between python versions and will break cross-version tests + def det_tplsort(tpl_value): + return (tpl_value[1], repr(tpl_value[0])) + + def print_train_stats(): + print( + "TEMPLATE STATISTICS (TRAIN) {0} templates, {1} rules)".format( + len(template_counts), len(tids) + ) + ) + print( + "TRAIN ({tokencount:7d} tokens) initial {initialerrors:5d} {initialacc:.4f} " + "final: {finalerrors:5d} {finalacc:.4f} ".format(**train_stats) + ) + head = "#ID | Score (train) | #Rules | Template" + print(head, "\n", "-" * len(head), sep="") + train_tplscores = sorted( + weighted_traincounts.items(), key=det_tplsort, reverse=True + ) + for (tid, trainscore) in train_tplscores: + s = "{0} | {1:5d} {2:5.3f} |{3:4d} {4:.3f} | {5}".format( + tid, + trainscore, + trainscore / tottrainscores, + template_counts[tid], + template_counts[tid] / len(tids), + Template.ALLTEMPLATES[int(tid)], + ) + print(s) + + def print_testtrain_stats(): + testscores = test_stats['rulescores'] + print( + "TEMPLATE STATISTICS (TEST AND TRAIN) ({0} templates, {1} rules)".format( + len(template_counts), len(tids) + ) + ) + print( + "TEST ({tokencount:7d} tokens) initial {initialerrors:5d} {initialacc:.4f} " + "final: {finalerrors:5d} {finalacc:.4f} ".format(**test_stats) + ) + print( + "TRAIN ({tokencount:7d} tokens) initial {initialerrors:5d} {initialacc:.4f} " + "final: {finalerrors:5d} {finalacc:.4f} ".format(**train_stats) + ) + weighted_testcounts = Counter() + for (tid, score) in zip(tids, testscores): + weighted_testcounts[tid] += score + tottestscores = sum(testscores) + head = "#ID | Score (test) | Score (train) | #Rules | Template" + print(head, "\n", "-" * len(head), sep="") + test_tplscores = sorted( + weighted_testcounts.items(), key=det_tplsort, reverse=True + ) + for (tid, testscore) in test_tplscores: + s = "{0:s} |{1:5d} {2:6.3f} | {3:4d} {4:.3f} |{5:4d} {6:.3f} | {7:s}".format( + tid, + testscore, + testscore / tottestscores, + weighted_traincounts[tid], + weighted_traincounts[tid] / tottrainscores, + template_counts[tid], + template_counts[tid] / len(tids), + Template.ALLTEMPLATES[int(tid)], + ) + print(s) + + def print_unused_templates(): + usedtpls = set(int(tid) for tid in tids) + unused = [ + (tid, tpl) + for (tid, tpl) in enumerate(Template.ALLTEMPLATES) + if tid not in usedtpls + ] + print("UNUSED TEMPLATES ({0})".format(len(unused))) + + for (tid, tpl) in unused: + print("{0:03d} {1:s}".format(tid, str(tpl))) + + if test_stats is None: + print_train_stats() + else: + print_testtrain_stats() + print() + if printunused: + print_unused_templates() + print() + + def batch_tag_incremental(self, sequences, gold): + """ + Tags by applying each rule to the entire corpus (rather than all rules to a + single sequence). The point is to collect statistics on the test set for + individual rules. + + NOTE: This is inefficient (does not build any index, so will traverse the entire + corpus N times for N rules) -- usually you would not care about statistics for + individual rules and thus use batch_tag() instead + + :param sequences: lists of token sequences (sentences, in some applications) to be tagged + :type sequences: list of list of strings + :param gold: the gold standard + :type gold: list of list of strings + :returns: tuple of (tagged_sequences, ordered list of rule scores (one for each rule)) + """ + + def counterrors(xs): + return sum(t[1] != g[1] for pair in zip(xs, gold) for (t, g) in zip(*pair)) + + testing_stats = {} + testing_stats['tokencount'] = sum(len(t) for t in sequences) + testing_stats['sequencecount'] = len(sequences) + tagged_tokenses = [self._initial_tagger.tag(tokens) for tokens in sequences] + testing_stats['initialerrors'] = counterrors(tagged_tokenses) + testing_stats['initialacc'] = ( + 1 - testing_stats['initialerrors'] / testing_stats['tokencount'] + ) + # Apply each rule to the entire corpus, in order + errors = [testing_stats['initialerrors']] + for rule in self._rules: + for tagged_tokens in tagged_tokenses: + rule.apply(tagged_tokens) + errors.append(counterrors(tagged_tokenses)) + testing_stats['rulescores'] = [ + err0 - err1 for (err0, err1) in zip(errors, errors[1:]) + ] + testing_stats['finalerrors'] = errors[-1] + testing_stats['finalacc'] = ( + 1 - testing_stats['finalerrors'] / testing_stats['tokencount'] + ) + return (tagged_tokenses, testing_stats) diff --git a/venv.bak/lib/python3.7/site-packages/nltk/tag/brill_trainer.py b/venv.bak/lib/python3.7/site-packages/nltk/tag/brill_trainer.py new file mode 100644 index 0000000..f518dcf --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/tag/brill_trainer.py @@ -0,0 +1,631 @@ +# -*- coding: utf-8 -*- +# Natural Language Toolkit: Transformation-based learning +# +# Copyright (C) 2001-2013 NLTK Project +# Author: Marcus Uneson +# based on previous (nltk2) version by +# Christopher Maloof, Edward Loper, Steven Bird +# URL: +# For license information, see LICENSE.TXT + +from __future__ import print_function, division + +import bisect +import textwrap +from collections import defaultdict + +from nltk.tag import untag, BrillTagger + +###################################################################### +# Brill Tagger Trainer +###################################################################### + + +class BrillTaggerTrainer(object): + """ + A trainer for tbl taggers. + """ + + def __init__( + self, initial_tagger, templates, trace=0, deterministic=None, ruleformat="str" + ): + """ + Construct a Brill tagger from a baseline tagger and a + set of templates + + :param initial_tagger: the baseline tagger + :type initial_tagger: Tagger + :param templates: templates to be used in training + :type templates: list of Templates + :param trace: verbosity level + :type trace: int + :param deterministic: if True, adjudicate ties deterministically + :type deterministic: bool + :param ruleformat: format of reported Rules + :type ruleformat: str + :return: An untrained BrillTagger + :rtype: BrillTagger + """ + + if deterministic is None: + deterministic = trace > 0 + self._initial_tagger = initial_tagger + self._templates = templates + self._trace = trace + self._deterministic = deterministic + self._ruleformat = ruleformat + + self._tag_positions = None + """Mapping from tags to lists of positions that use that tag.""" + + self._rules_by_position = None + """Mapping from positions to the set of rules that are known + to occur at that position. Position is (sentnum, wordnum). + Initially, this will only contain positions where each rule + applies in a helpful way; but when we examine a rule, we'll + extend this list to also include positions where each rule + applies in a harmful or neutral way.""" + + self._positions_by_rule = None + """Mapping from rule to position to effect, specifying the + effect that each rule has on the overall score, at each + position. Position is (sentnum, wordnum); and effect is + -1, 0, or 1. As with _rules_by_position, this mapping starts + out only containing rules with positive effects; but when + we examine a rule, we'll extend this mapping to include + the positions where the rule is harmful or neutral.""" + + self._rules_by_score = None + """Mapping from scores to the set of rules whose effect on the + overall score is upper bounded by that score. Invariant: + rulesByScore[s] will contain r iff the sum of + _positions_by_rule[r] is s.""" + + self._rule_scores = None + """Mapping from rules to upper bounds on their effects on the + overall score. This is the inverse mapping to _rules_by_score. + Invariant: ruleScores[r] = sum(_positions_by_rule[r])""" + + self._first_unknown_position = None + """Mapping from rules to the first position where we're unsure + if the rule applies. This records the next position we + need to check to see if the rule messed anything up.""" + + # Training + + def train(self, train_sents, max_rules=200, min_score=2, min_acc=None): + """ + Trains the Brill tagger on the corpus *train_sents*, + producing at most *max_rules* transformations, each of which + reduces the net number of errors in the corpus by at least + *min_score*, and each of which has accuracy not lower than + *min_acc*. + + #imports + >>> from nltk.tbl.template import Template + >>> from nltk.tag.brill import Pos, Word + >>> from nltk.tag import untag, RegexpTagger, BrillTaggerTrainer + + #some data + >>> from nltk.corpus import treebank + >>> training_data = treebank.tagged_sents()[:100] + >>> baseline_data = treebank.tagged_sents()[100:200] + >>> gold_data = treebank.tagged_sents()[200:300] + >>> testing_data = [untag(s) for s in gold_data] + + >>> backoff = RegexpTagger([ + ... (r'^-?[0-9]+(.[0-9]+)?$', 'CD'), # cardinal numbers + ... (r'(The|the|A|a|An|an)$', 'AT'), # articles + ... (r'.*able$', 'JJ'), # adjectives + ... (r'.*ness$', 'NN'), # nouns formed from adjectives + ... (r'.*ly$', 'RB'), # adverbs + ... (r'.*s$', 'NNS'), # plural nouns + ... (r'.*ing$', 'VBG'), # gerunds + ... (r'.*ed$', 'VBD'), # past tense verbs + ... (r'.*', 'NN') # nouns (default) + ... ]) + + >>> baseline = backoff #see NOTE1 + + >>> baseline.evaluate(gold_data) #doctest: +ELLIPSIS + 0.2450142... + + #templates + >>> Template._cleartemplates() #clear any templates created in earlier tests + >>> templates = [Template(Pos([-1])), Template(Pos([-1]), Word([0]))] + + #construct a BrillTaggerTrainer + >>> tt = BrillTaggerTrainer(baseline, templates, trace=3) + + >>> tagger1 = tt.train(training_data, max_rules=10) + TBL train (fast) (seqs: 100; tokens: 2417; tpls: 2; min score: 2; min acc: None) + Finding initial useful rules... + Found 845 useful rules. + + B | + S F r O | Score = Fixed - Broken + c i o t | R Fixed = num tags changed incorrect -> correct + o x k h | u Broken = num tags changed correct -> incorrect + r e e e | l Other = num tags changed incorrect -> incorrect + e d n r | e + ------------------+------------------------------------------------------- + 132 132 0 0 | AT->DT if Pos:NN@[-1] + 85 85 0 0 | NN->, if Pos:NN@[-1] & Word:,@[0] + 69 69 0 0 | NN->. if Pos:NN@[-1] & Word:.@[0] + 51 51 0 0 | NN->IN if Pos:NN@[-1] & Word:of@[0] + 47 63 16 161 | NN->IN if Pos:NNS@[-1] + 33 33 0 0 | NN->TO if Pos:NN@[-1] & Word:to@[0] + 26 26 0 0 | IN->. if Pos:NNS@[-1] & Word:.@[0] + 24 24 0 0 | IN->, if Pos:NNS@[-1] & Word:,@[0] + 22 27 5 24 | NN->-NONE- if Pos:VBD@[-1] + 17 17 0 0 | NN->CC if Pos:NN@[-1] & Word:and@[0] + + >>> tagger1.rules()[1:3] + (Rule('001', 'NN', ',', [(Pos([-1]),'NN'), (Word([0]),',')]), Rule('001', 'NN', '.', [(Pos([-1]),'NN'), (Word([0]),'.')])) + + >>> train_stats = tagger1.train_stats() + >>> [train_stats[stat] for stat in ['initialerrors', 'finalerrors', 'rulescores']] + [1775, 1269, [132, 85, 69, 51, 47, 33, 26, 24, 22, 17]] + + >>> tagger1.print_template_statistics(printunused=False) + TEMPLATE STATISTICS (TRAIN) 2 templates, 10 rules) + TRAIN ( 2417 tokens) initial 1775 0.2656 final: 1269 0.4750 + #ID | Score (train) | #Rules | Template + -------------------------------------------- + 001 | 305 0.603 | 7 0.700 | Template(Pos([-1]),Word([0])) + 000 | 201 0.397 | 3 0.300 | Template(Pos([-1])) + + + + >>> tagger1.evaluate(gold_data) # doctest: +ELLIPSIS + 0.43996... + + >>> tagged, test_stats = tagger1.batch_tag_incremental(testing_data, gold_data) + + >>> tagged[33][12:] == [('foreign', 'IN'), ('debt', 'NN'), ('of', 'IN'), ('$', 'NN'), ('64', 'CD'), + ... ('billion', 'NN'), ('*U*', 'NN'), ('--', 'NN'), ('the', 'DT'), ('third-highest', 'NN'), ('in', 'NN'), + ... ('the', 'DT'), ('developing', 'VBG'), ('world', 'NN'), ('.', '.')] + True + + >>> [test_stats[stat] for stat in ['initialerrors', 'finalerrors', 'rulescores']] + [1855, 1376, [100, 85, 67, 58, 27, 36, 27, 16, 31, 32]] + + # a high-accuracy tagger + >>> tagger2 = tt.train(training_data, max_rules=10, min_acc=0.99) + TBL train (fast) (seqs: 100; tokens: 2417; tpls: 2; min score: 2; min acc: 0.99) + Finding initial useful rules... + Found 845 useful rules. + + B | + S F r O | Score = Fixed - Broken + c i o t | R Fixed = num tags changed incorrect -> correct + o x k h | u Broken = num tags changed correct -> incorrect + r e e e | l Other = num tags changed incorrect -> incorrect + e d n r | e + ------------------+------------------------------------------------------- + 132 132 0 0 | AT->DT if Pos:NN@[-1] + 85 85 0 0 | NN->, if Pos:NN@[-1] & Word:,@[0] + 69 69 0 0 | NN->. if Pos:NN@[-1] & Word:.@[0] + 51 51 0 0 | NN->IN if Pos:NN@[-1] & Word:of@[0] + 36 36 0 0 | NN->TO if Pos:NN@[-1] & Word:to@[0] + 26 26 0 0 | NN->. if Pos:NNS@[-1] & Word:.@[0] + 24 24 0 0 | NN->, if Pos:NNS@[-1] & Word:,@[0] + 19 19 0 6 | NN->VB if Pos:TO@[-1] + 18 18 0 0 | CD->-NONE- if Pos:NN@[-1] & Word:0@[0] + 18 18 0 0 | NN->CC if Pos:NN@[-1] & Word:and@[0] + + >>> tagger2.evaluate(gold_data) # doctest: +ELLIPSIS + 0.44159544... + >>> tagger2.rules()[2:4] + (Rule('001', 'NN', '.', [(Pos([-1]),'NN'), (Word([0]),'.')]), Rule('001', 'NN', 'IN', [(Pos([-1]),'NN'), (Word([0]),'of')])) + + # NOTE1: (!!FIXME) A far better baseline uses nltk.tag.UnigramTagger, + # with a RegexpTagger only as backoff. For instance, + # >>> baseline = UnigramTagger(baseline_data, backoff=backoff) + # However, as of Nov 2013, nltk.tag.UnigramTagger does not yield consistent results + # between python versions. The simplistic backoff above is a workaround to make doctests + # get consistent input. + + :param train_sents: training data + :type train_sents: list(list(tuple)) + :param max_rules: output at most max_rules rules + :type max_rules: int + :param min_score: stop training when no rules better than min_score can be found + :type min_score: int + :param min_acc: discard any rule with lower accuracy than min_acc + :type min_acc: float or None + :return: the learned tagger + :rtype: BrillTagger + + """ + # FIXME: several tests are a bit too dependent on tracing format + # FIXME: tests in trainer.fast and trainer.brillorig are exact duplicates + + # Basic idea: Keep track of the rules that apply at each position. + # And keep track of the positions to which each rule applies. + + # Create a new copy of the training corpus, and run the + # initial tagger on it. We will progressively update this + # test corpus to look more like the training corpus. + test_sents = [ + list(self._initial_tagger.tag(untag(sent))) for sent in train_sents + ] + + # Collect some statistics on the training process + trainstats = {} + trainstats['min_acc'] = min_acc + trainstats['min_score'] = min_score + trainstats['tokencount'] = sum(len(t) for t in test_sents) + trainstats['sequencecount'] = len(test_sents) + trainstats['templatecount'] = len(self._templates) + trainstats['rulescores'] = [] + trainstats['initialerrors'] = sum( + tag[1] != truth[1] + for paired in zip(test_sents, train_sents) + for (tag, truth) in zip(*paired) + ) + trainstats['initialacc'] = ( + 1 - trainstats['initialerrors'] / trainstats['tokencount'] + ) + if self._trace > 0: + print( + "TBL train (fast) (seqs: {sequencecount}; tokens: {tokencount}; " + "tpls: {templatecount}; min score: {min_score}; min acc: {min_acc})".format( + **trainstats + ) + ) + + # Initialize our mappings. This will find any errors made + # by the initial tagger, and use those to generate repair + # rules, which are added to the rule mappings. + if self._trace: + print("Finding initial useful rules...") + self._init_mappings(test_sents, train_sents) + if self._trace: + print((" Found %d useful rules." % len(self._rule_scores))) + + # Let the user know what we're up to. + if self._trace > 2: + self._trace_header() + elif self._trace == 1: + print("Selecting rules...") + + # Repeatedly select the best rule, and add it to `rules`. + rules = [] + try: + while len(rules) < max_rules: + # Find the best rule, and add it to our rule list. + rule = self._best_rule(train_sents, test_sents, min_score, min_acc) + if rule: + rules.append(rule) + score = self._rule_scores[rule] + trainstats['rulescores'].append(score) + else: + break # No more good rules left! + + # Report the rule that we found. + if self._trace > 1: + self._trace_rule(rule) + + # Apply the new rule at the relevant sites + self._apply_rule(rule, test_sents) + + # Update _tag_positions[rule.original_tag] and + # _tag_positions[rule.replacement_tag] for the affected + # positions (i.e., self._positions_by_rule[rule]). + self._update_tag_positions(rule) + + # Update rules that were affected by the change. + self._update_rules(rule, train_sents, test_sents) + + # The user can cancel training manually: + except KeyboardInterrupt: + print("Training stopped manually -- %d rules found" % len(rules)) + + # Discard our tag position mapping & rule mappings. + self._clean() + trainstats['finalerrors'] = trainstats['initialerrors'] - sum( + trainstats['rulescores'] + ) + trainstats['finalacc'] = ( + 1 - trainstats['finalerrors'] / trainstats['tokencount'] + ) + # Create and return a tagger from the rules we found. + return BrillTagger(self._initial_tagger, rules, trainstats) + + def _init_mappings(self, test_sents, train_sents): + """ + Initialize the tag position mapping & the rule related + mappings. For each error in test_sents, find new rules that + would correct them, and add them to the rule mappings. + """ + self._tag_positions = defaultdict(list) + self._rules_by_position = defaultdict(set) + self._positions_by_rule = defaultdict(dict) + self._rules_by_score = defaultdict(set) + self._rule_scores = defaultdict(int) + self._first_unknown_position = defaultdict(int) + # Scan through the corpus, initializing the tag_positions + # mapping and all the rule-related mappings. + for sentnum, sent in enumerate(test_sents): + for wordnum, (word, tag) in enumerate(sent): + + # Initialize tag_positions + self._tag_positions[tag].append((sentnum, wordnum)) + + # If it's an error token, update the rule-related mappings. + correct_tag = train_sents[sentnum][wordnum][1] + if tag != correct_tag: + for rule in self._find_rules(sent, wordnum, correct_tag): + self._update_rule_applies(rule, sentnum, wordnum, train_sents) + + def _clean(self): + self._tag_positions = None + self._rules_by_position = None + self._positions_by_rule = None + self._rules_by_score = None + self._rule_scores = None + self._first_unknown_position = None + + def _find_rules(self, sent, wordnum, new_tag): + """ + Use the templates to find rules that apply at index *wordnum* + in the sentence *sent* and generate the tag *new_tag*. + """ + for template in self._templates: + for rule in template.applicable_rules(sent, wordnum, new_tag): + yield rule + + def _update_rule_applies(self, rule, sentnum, wordnum, train_sents): + """ + Update the rule data tables to reflect the fact that + *rule* applies at the position *(sentnum, wordnum)*. + """ + pos = sentnum, wordnum + + # If the rule is already known to apply here, ignore. + # (This only happens if the position's tag hasn't changed.) + if pos in self._positions_by_rule[rule]: + return + + # Update self._positions_by_rule. + correct_tag = train_sents[sentnum][wordnum][1] + if rule.replacement_tag == correct_tag: + self._positions_by_rule[rule][pos] = 1 + elif rule.original_tag == correct_tag: + self._positions_by_rule[rule][pos] = -1 + else: # was wrong, remains wrong + self._positions_by_rule[rule][pos] = 0 + + # Update _rules_by_position + self._rules_by_position[pos].add(rule) + + # Update _rule_scores. + old_score = self._rule_scores[rule] + self._rule_scores[rule] += self._positions_by_rule[rule][pos] + + # Update _rules_by_score. + self._rules_by_score[old_score].discard(rule) + self._rules_by_score[self._rule_scores[rule]].add(rule) + + def _update_rule_not_applies(self, rule, sentnum, wordnum): + """ + Update the rule data tables to reflect the fact that *rule* + does not apply at the position *(sentnum, wordnum)*. + """ + pos = sentnum, wordnum + + # Update _rule_scores. + old_score = self._rule_scores[rule] + self._rule_scores[rule] -= self._positions_by_rule[rule][pos] + + # Update _rules_by_score. + self._rules_by_score[old_score].discard(rule) + self._rules_by_score[self._rule_scores[rule]].add(rule) + + # Update _positions_by_rule + del self._positions_by_rule[rule][pos] + self._rules_by_position[pos].remove(rule) + + # Optional addition: if the rule now applies nowhere, delete + # all its dictionary entries. + + def _best_rule(self, train_sents, test_sents, min_score, min_acc): + """ + Find the next best rule. This is done by repeatedly taking a + rule with the highest score and stepping through the corpus to + see where it applies. When it makes an error (decreasing its + score) it's bumped down, and we try a new rule with the + highest score. When we find a rule which has the highest + score *and* which has been tested against the entire corpus, we + can conclude that it's the next best rule. + """ + for max_score in sorted(self._rules_by_score.keys(), reverse=True): + if len(self._rules_by_score) == 0: + return None + if max_score < min_score or max_score <= 0: + return None + best_rules = list(self._rules_by_score[max_score]) + if self._deterministic: + best_rules.sort(key=repr) + for rule in best_rules: + positions = self._tag_positions[rule.original_tag] + + unk = self._first_unknown_position.get(rule, (0, -1)) + start = bisect.bisect_left(positions, unk) + + for i in range(start, len(positions)): + sentnum, wordnum = positions[i] + if rule.applies(test_sents[sentnum], wordnum): + self._update_rule_applies(rule, sentnum, wordnum, train_sents) + if self._rule_scores[rule] < max_score: + self._first_unknown_position[rule] = (sentnum, wordnum + 1) + break # The update demoted the rule. + + if self._rule_scores[rule] == max_score: + self._first_unknown_position[rule] = (len(train_sents) + 1, 0) + # optimization: if no min_acc threshold given, don't bother computing accuracy + if min_acc is None: + return rule + else: + changes = self._positions_by_rule[rule].values() + num_fixed = len([c for c in changes if c == 1]) + num_broken = len([c for c in changes if c == -1]) + # acc here is fixed/(fixed+broken); could also be + # fixed/(fixed+broken+other) == num_fixed/len(changes) + acc = num_fixed / (num_fixed + num_broken) + if acc >= min_acc: + return rule + # else: rule too inaccurate, discard and try next + + # We demoted (or skipped due to < min_acc, if that was given) + # all the rules with score==max_score. + + assert min_acc is not None or not self._rules_by_score[max_score] + if not self._rules_by_score[max_score]: + del self._rules_by_score[max_score] + + def _apply_rule(self, rule, test_sents): + """ + Update *test_sents* by applying *rule* everywhere where its + conditions are met. + """ + update_positions = set(self._positions_by_rule[rule]) + new_tag = rule.replacement_tag + + if self._trace > 3: + self._trace_apply(len(update_positions)) + + # Update test_sents. + for (sentnum, wordnum) in update_positions: + text = test_sents[sentnum][wordnum][0] + test_sents[sentnum][wordnum] = (text, new_tag) + + def _update_tag_positions(self, rule): + """ + Update _tag_positions to reflect the changes to tags that are + made by *rule*. + """ + # Update the tag index. + for pos in self._positions_by_rule[rule]: + # Delete the old tag. + old_tag_positions = self._tag_positions[rule.original_tag] + old_index = bisect.bisect_left(old_tag_positions, pos) + del old_tag_positions[old_index] + # Insert the new tag. + new_tag_positions = self._tag_positions[rule.replacement_tag] + bisect.insort_left(new_tag_positions, pos) + + def _update_rules(self, rule, train_sents, test_sents): + """ + Check if we should add or remove any rules from consideration, + given the changes made by *rule*. + """ + # Collect a list of all positions that might be affected. + neighbors = set() + for sentnum, wordnum in self._positions_by_rule[rule]: + for template in self._templates: + n = template.get_neighborhood(test_sents[sentnum], wordnum) + neighbors.update([(sentnum, i) for i in n]) + + # Update the rules at each position. + num_obsolete = num_new = num_unseen = 0 + for sentnum, wordnum in neighbors: + test_sent = test_sents[sentnum] + correct_tag = train_sents[sentnum][wordnum][1] + + # Check if the change causes any rule at this position to + # stop matching; if so, then update our rule mappings + # accordingly. + old_rules = set(self._rules_by_position[sentnum, wordnum]) + for old_rule in old_rules: + if not old_rule.applies(test_sent, wordnum): + num_obsolete += 1 + self._update_rule_not_applies(old_rule, sentnum, wordnum) + + # Check if the change causes our templates to propose any + # new rules for this position. + for template in self._templates: + for new_rule in template.applicable_rules( + test_sent, wordnum, correct_tag + ): + if new_rule not in old_rules: + num_new += 1 + if new_rule not in self._rule_scores: + num_unseen += 1 + old_rules.add(new_rule) + self._update_rule_applies( + new_rule, sentnum, wordnum, train_sents + ) + + # We may have caused other rules to match here, that are + # not proposed by our templates -- in particular, rules + # that are harmful or neutral. We therefore need to + # update any rule whose first_unknown_position is past + # this rule. + for new_rule, pos in self._first_unknown_position.items(): + if pos > (sentnum, wordnum): + if new_rule not in old_rules: + num_new += 1 + if new_rule.applies(test_sent, wordnum): + self._update_rule_applies( + new_rule, sentnum, wordnum, train_sents + ) + + if self._trace > 3: + self._trace_update_rules(num_obsolete, num_new, num_unseen) + + # Tracing + + def _trace_header(self): + print( + """ + B | + S F r O | Score = Fixed - Broken + c i o t | R Fixed = num tags changed incorrect -> correct + o x k h | u Broken = num tags changed correct -> incorrect + r e e e | l Other = num tags changed incorrect -> incorrect + e d n r | e +------------------+------------------------------------------------------- + """.rstrip() + ) + + def _trace_rule(self, rule): + assert self._rule_scores[rule] == sum(self._positions_by_rule[rule].values()) + + changes = self._positions_by_rule[rule].values() + num_fixed = len([c for c in changes if c == 1]) + num_broken = len([c for c in changes if c == -1]) + num_other = len([c for c in changes if c == 0]) + score = self._rule_scores[rule] + + rulestr = rule.format(self._ruleformat) + if self._trace > 2: + print( + '%4d%4d%4d%4d |' % (score, num_fixed, num_broken, num_other), end=' ' + ) + print( + textwrap.fill( + rulestr, + initial_indent=' ' * 20, + width=79, + subsequent_indent=' ' * 18 + '| ', + ).strip() + ) + else: + print(rulestr) + + def _trace_apply(self, num_updates): + prefix = ' ' * 18 + '|' + print(prefix) + print(prefix, 'Applying rule to %d positions.' % num_updates) + + def _trace_update_rules(self, num_obsolete, num_new, num_unseen): + prefix = ' ' * 18 + '|' + print(prefix, 'Updated rule tables:') + print(prefix, (' - %d rule applications removed' % num_obsolete)) + print( + prefix, + (' - %d rule applications added (%d novel)' % (num_new, num_unseen)), + ) + print(prefix) diff --git a/venv.bak/lib/python3.7/site-packages/nltk/tag/crf.py b/venv.bak/lib/python3.7/site-packages/nltk/tag/crf.py new file mode 100644 index 0000000..828125f --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/tag/crf.py @@ -0,0 +1,205 @@ +# -*- coding: utf-8 -*- +# Natural Language Toolkit: Interface to the CRFSuite Tagger +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Long Duong +# URL: +# For license information, see LICENSE.TXT + +""" +A module for POS tagging using CRFSuite +""" +from __future__ import absolute_import +from __future__ import unicode_literals +import unicodedata +import re +from nltk.tag.api import TaggerI + +try: + import pycrfsuite +except ImportError: + pass + + +class CRFTagger(TaggerI): + """ + A module for POS tagging using CRFSuite https://pypi.python.org/pypi/python-crfsuite + + >>> from nltk.tag import CRFTagger + >>> ct = CRFTagger() + + >>> train_data = [[('University','Noun'), ('is','Verb'), ('a','Det'), ('good','Adj'), ('place','Noun')], + ... [('dog','Noun'),('eat','Verb'),('meat','Noun')]] + + >>> ct.train(train_data,'model.crf.tagger') + >>> ct.tag_sents([['dog','is','good'], ['Cat','eat','meat']]) + [[('dog', 'Noun'), ('is', 'Verb'), ('good', 'Adj')], [('Cat', 'Noun'), ('eat', 'Verb'), ('meat', 'Noun')]] + + >>> gold_sentences = [[('dog','Noun'),('is','Verb'),('good','Adj')] , [('Cat','Noun'),('eat','Verb'), ('meat','Noun')]] + >>> ct.evaluate(gold_sentences) + 1.0 + + Setting learned model file + >>> ct = CRFTagger() + >>> ct.set_model_file('model.crf.tagger') + >>> ct.evaluate(gold_sentences) + 1.0 + + """ + + def __init__(self, feature_func=None, verbose=False, training_opt={}): + """ + Initialize the CRFSuite tagger + :param feature_func: The function that extracts features for each token of a sentence. This function should take + 2 parameters: tokens and index which extract features at index position from tokens list. See the build in + _get_features function for more detail. + :param verbose: output the debugging messages during training. + :type verbose: boolean + :param training_opt: python-crfsuite training options + :type training_opt : dictionary + + Set of possible training options (using LBFGS training algorithm). + 'feature.minfreq' : The minimum frequency of features. + 'feature.possible_states' : Force to generate possible state features. + 'feature.possible_transitions' : Force to generate possible transition features. + 'c1' : Coefficient for L1 regularization. + 'c2' : Coefficient for L2 regularization. + 'max_iterations' : The maximum number of iterations for L-BFGS optimization. + 'num_memories' : The number of limited memories for approximating the inverse hessian matrix. + 'epsilon' : Epsilon for testing the convergence of the objective. + 'period' : The duration of iterations to test the stopping criterion. + 'delta' : The threshold for the stopping criterion; an L-BFGS iteration stops when the + improvement of the log likelihood over the last ${period} iterations is no greater than this threshold. + 'linesearch' : The line search algorithm used in L-BFGS updates: + { 'MoreThuente': More and Thuente's method, + 'Backtracking': Backtracking method with regular Wolfe condition, + 'StrongBacktracking': Backtracking method with strong Wolfe condition + } + 'max_linesearch' : The maximum number of trials for the line search algorithm. + + """ + + self._model_file = '' + self._tagger = pycrfsuite.Tagger() + + if feature_func is None: + self._feature_func = self._get_features + else: + self._feature_func = feature_func + + self._verbose = verbose + self._training_options = training_opt + self._pattern = re.compile(r'\d') + + def set_model_file(self, model_file): + self._model_file = model_file + self._tagger.open(self._model_file) + + def _get_features(self, tokens, idx): + """ + Extract basic features about this word including + - Current Word + - Is Capitalized ? + - Has Punctuation ? + - Has Number ? + - Suffixes up to length 3 + Note that : we might include feature over previous word, next word ect. + + :return : a list which contains the features + :rtype : list(str) + + """ + token = tokens[idx] + + feature_list = [] + + if not token: + return feature_list + + # Capitalization + if token[0].isupper(): + feature_list.append('CAPITALIZATION') + + # Number + if re.search(self._pattern, token) is not None: + feature_list.append('HAS_NUM') + + # Punctuation + punc_cat = set(["Pc", "Pd", "Ps", "Pe", "Pi", "Pf", "Po"]) + if all(unicodedata.category(x) in punc_cat for x in token): + feature_list.append('PUNCTUATION') + + # Suffix up to length 3 + if len(token) > 1: + feature_list.append('SUF_' + token[-1:]) + if len(token) > 2: + feature_list.append('SUF_' + token[-2:]) + if len(token) > 3: + feature_list.append('SUF_' + token[-3:]) + + feature_list.append('WORD_' + token) + + return feature_list + + def tag_sents(self, sents): + ''' + Tag a list of sentences. NB before using this function, user should specify the mode_file either by + - Train a new model using ``train'' function + - Use the pre-trained model which is set via ``set_model_file'' function + :params sentences : list of sentences needed to tag. + :type sentences : list(list(str)) + :return : list of tagged sentences. + :rtype : list (list (tuple(str,str))) + ''' + if self._model_file == '': + raise Exception( + ' No model file is found !! Please use train or set_model_file function' + ) + + # We need the list of sentences instead of the list generator for matching the input and output + result = [] + for tokens in sents: + features = [self._feature_func(tokens, i) for i in range(len(tokens))] + labels = self._tagger.tag(features) + + if len(labels) != len(tokens): + raise Exception(' Predicted Length Not Matched, Expect Errors !') + + tagged_sent = list(zip(tokens, labels)) + result.append(tagged_sent) + + return result + + def train(self, train_data, model_file): + ''' + Train the CRF tagger using CRFSuite + :params train_data : is the list of annotated sentences. + :type train_data : list (list(tuple(str,str))) + :params model_file : the model will be saved to this file. + + ''' + trainer = pycrfsuite.Trainer(verbose=self._verbose) + trainer.set_params(self._training_options) + + for sent in train_data: + tokens, labels = zip(*sent) + features = [self._feature_func(tokens, i) for i in range(len(tokens))] + trainer.append(features, labels) + + # Now train the model, the output should be model_file + trainer.train(model_file) + # Save the model file + self.set_model_file(model_file) + + def tag(self, tokens): + ''' + Tag a sentence using Python CRFSuite Tagger. NB before using this function, user should specify the mode_file either by + - Train a new model using ``train'' function + - Use the pre-trained model which is set via ``set_model_file'' function + :params tokens : list of tokens needed to tag. + :type tokens : list(str) + :return : list of tagged tokens. + :rtype : list (tuple(str,str)) + ''' + + return self.tag_sents([tokens])[0] diff --git a/venv.bak/lib/python3.7/site-packages/nltk/tag/hmm.py b/venv.bak/lib/python3.7/site-packages/nltk/tag/hmm.py new file mode 100644 index 0000000..5e834dc --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/tag/hmm.py @@ -0,0 +1,1344 @@ +# Natural Language Toolkit: Hidden Markov Model +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Trevor Cohn +# Philip Blunsom +# Tiago Tresoldi (fixes) +# Steven Bird (fixes) +# Joseph Frazee (fixes) +# Steven Xu (fixes) +# URL: +# For license information, see LICENSE.TXT + +""" +Hidden Markov Models (HMMs) largely used to assign the correct label sequence +to sequential data or assess the probability of a given label and data +sequence. These models are finite state machines characterised by a number of +states, transitions between these states, and output symbols emitted while in +each state. The HMM is an extension to the Markov chain, where each state +corresponds deterministically to a given event. In the HMM the observation is +a probabilistic function of the state. HMMs share the Markov chain's +assumption, being that the probability of transition from one state to another +only depends on the current state - i.e. the series of states that led to the +current state are not used. They are also time invariant. + +The HMM is a directed graph, with probability weighted edges (representing the +probability of a transition between the source and sink states) where each +vertex emits an output symbol when entered. The symbol (or observation) is +non-deterministically generated. For this reason, knowing that a sequence of +output observations was generated by a given HMM does not mean that the +corresponding sequence of states (and what the current state is) is known. +This is the 'hidden' in the hidden markov model. + +Formally, a HMM can be characterised by: + +- the output observation alphabet. This is the set of symbols which may be + observed as output of the system. +- the set of states. +- the transition probabilities *a_{ij} = P(s_t = j | s_{t-1} = i)*. These + represent the probability of transition to each state from a given state. +- the output probability matrix *b_i(k) = P(X_t = o_k | s_t = i)*. These + represent the probability of observing each symbol in a given state. +- the initial state distribution. This gives the probability of starting + in each state. + +To ground this discussion, take a common NLP application, part-of-speech (POS) +tagging. An HMM is desirable for this task as the highest probability tag +sequence can be calculated for a given sequence of word forms. This differs +from other tagging techniques which often tag each word individually, seeking +to optimise each individual tagging greedily without regard to the optimal +combination of tags for a larger unit, such as a sentence. The HMM does this +with the Viterbi algorithm, which efficiently computes the optimal path +through the graph given the sequence of words forms. + +In POS tagging the states usually have a 1:1 correspondence with the tag +alphabet - i.e. each state represents a single tag. The output observation +alphabet is the set of word forms (the lexicon), and the remaining three +parameters are derived by a training regime. With this information the +probability of a given sentence can be easily derived, by simply summing the +probability of each distinct path through the model. Similarly, the highest +probability tagging sequence can be derived with the Viterbi algorithm, +yielding a state sequence which can be mapped into a tag sequence. + +This discussion assumes that the HMM has been trained. This is probably the +most difficult task with the model, and requires either MLE estimates of the +parameters or unsupervised learning using the Baum-Welch algorithm, a variant +of EM. + +For more information, please consult the source code for this module, +which includes extensive demonstration code. +""" +from __future__ import print_function, unicode_literals, division + +import re +import itertools + +from six.moves import map, zip + +try: + import numpy as np +except ImportError: + pass + +from nltk.probability import ( + FreqDist, + ConditionalFreqDist, + ConditionalProbDist, + DictionaryProbDist, + DictionaryConditionalProbDist, + LidstoneProbDist, + MutableProbDist, + MLEProbDist, + RandomProbDist, +) +from nltk.metrics import accuracy +from nltk.util import LazyMap, unique_list +from nltk.compat import python_2_unicode_compatible +from nltk.tag.api import TaggerI + + +_TEXT = 0 # index of text in a tuple +_TAG = 1 # index of tag in a tuple + + +def _identity(labeled_symbols): + return labeled_symbols + + +@python_2_unicode_compatible +class HiddenMarkovModelTagger(TaggerI): + """ + Hidden Markov model class, a generative model for labelling sequence data. + These models define the joint probability of a sequence of symbols and + their labels (state transitions) as the product of the starting state + probability, the probability of each state transition, and the probability + of each observation being generated from each state. This is described in + more detail in the module documentation. + + This implementation is based on the HMM description in Chapter 8, Huang, + Acero and Hon, Spoken Language Processing and includes an extension for + training shallow HMM parsers or specialized HMMs as in Molina et. + al, 2002. A specialized HMM modifies training data by applying a + specialization function to create a new training set that is more + appropriate for sequential tagging with an HMM. A typical use case is + chunking. + + :param symbols: the set of output symbols (alphabet) + :type symbols: seq of any + :param states: a set of states representing state space + :type states: seq of any + :param transitions: transition probabilities; Pr(s_i | s_j) is the + probability of transition from state i given the model is in + state_j + :type transitions: ConditionalProbDistI + :param outputs: output probabilities; Pr(o_k | s_i) is the probability + of emitting symbol k when entering state i + :type outputs: ConditionalProbDistI + :param priors: initial state distribution; Pr(s_i) is the probability + of starting in state i + :type priors: ProbDistI + :param transform: an optional function for transforming training + instances, defaults to the identity function. + :type transform: callable + """ + + def __init__( + self, symbols, states, transitions, outputs, priors, transform=_identity + ): + self._symbols = unique_list(symbols) + self._states = unique_list(states) + self._transitions = transitions + self._outputs = outputs + self._priors = priors + self._cache = None + self._transform = transform + + @classmethod + def _train( + cls, + labeled_sequence, + test_sequence=None, + unlabeled_sequence=None, + transform=_identity, + estimator=None, + **kwargs + ): + + if estimator is None: + + def estimator(fd, bins): + return LidstoneProbDist(fd, 0.1, bins) + + labeled_sequence = LazyMap(transform, labeled_sequence) + symbols = unique_list(word for sent in labeled_sequence for word, tag in sent) + tag_set = unique_list(tag for sent in labeled_sequence for word, tag in sent) + + trainer = HiddenMarkovModelTrainer(tag_set, symbols) + hmm = trainer.train_supervised(labeled_sequence, estimator=estimator) + hmm = cls( + hmm._symbols, + hmm._states, + hmm._transitions, + hmm._outputs, + hmm._priors, + transform=transform, + ) + + if test_sequence: + hmm.test(test_sequence, verbose=kwargs.get('verbose', False)) + + if unlabeled_sequence: + max_iterations = kwargs.get('max_iterations', 5) + hmm = trainer.train_unsupervised( + unlabeled_sequence, model=hmm, max_iterations=max_iterations + ) + if test_sequence: + hmm.test(test_sequence, verbose=kwargs.get('verbose', False)) + + return hmm + + @classmethod + def train( + cls, labeled_sequence, test_sequence=None, unlabeled_sequence=None, **kwargs + ): + """ + Train a new HiddenMarkovModelTagger using the given labeled and + unlabeled training instances. Testing will be performed if test + instances are provided. + + :return: a hidden markov model tagger + :rtype: HiddenMarkovModelTagger + :param labeled_sequence: a sequence of labeled training instances, + i.e. a list of sentences represented as tuples + :type labeled_sequence: list(list) + :param test_sequence: a sequence of labeled test instances + :type test_sequence: list(list) + :param unlabeled_sequence: a sequence of unlabeled training instances, + i.e. a list of sentences represented as words + :type unlabeled_sequence: list(list) + :param transform: an optional function for transforming training + instances, defaults to the identity function, see ``transform()`` + :type transform: function + :param estimator: an optional function or class that maps a + condition's frequency distribution to its probability + distribution, defaults to a Lidstone distribution with gamma = 0.1 + :type estimator: class or function + :param verbose: boolean flag indicating whether training should be + verbose or include printed output + :type verbose: bool + :param max_iterations: number of Baum-Welch interations to perform + :type max_iterations: int + """ + return cls._train(labeled_sequence, test_sequence, unlabeled_sequence, **kwargs) + + def probability(self, sequence): + """ + Returns the probability of the given symbol sequence. If the sequence + is labelled, then returns the joint probability of the symbol, state + sequence. Otherwise, uses the forward algorithm to find the + probability over all label sequences. + + :return: the probability of the sequence + :rtype: float + :param sequence: the sequence of symbols which must contain the TEXT + property, and optionally the TAG property + :type sequence: Token + """ + return 2 ** (self.log_probability(self._transform(sequence))) + + def log_probability(self, sequence): + """ + Returns the log-probability of the given symbol sequence. If the + sequence is labelled, then returns the joint log-probability of the + symbol, state sequence. Otherwise, uses the forward algorithm to find + the log-probability over all label sequences. + + :return: the log-probability of the sequence + :rtype: float + :param sequence: the sequence of symbols which must contain the TEXT + property, and optionally the TAG property + :type sequence: Token + """ + sequence = self._transform(sequence) + + T = len(sequence) + + if T > 0 and sequence[0][_TAG]: + last_state = sequence[0][_TAG] + p = self._priors.logprob(last_state) + self._output_logprob( + last_state, sequence[0][_TEXT] + ) + for t in range(1, T): + state = sequence[t][_TAG] + p += self._transitions[last_state].logprob( + state + ) + self._output_logprob(state, sequence[t][_TEXT]) + last_state = state + return p + else: + alpha = self._forward_probability(sequence) + p = logsumexp2(alpha[T - 1]) + return p + + def tag(self, unlabeled_sequence): + """ + Tags the sequence with the highest probability state sequence. This + uses the best_path method to find the Viterbi path. + + :return: a labelled sequence of symbols + :rtype: list + :param unlabeled_sequence: the sequence of unlabeled symbols + :type unlabeled_sequence: list + """ + unlabeled_sequence = self._transform(unlabeled_sequence) + return self._tag(unlabeled_sequence) + + def _tag(self, unlabeled_sequence): + path = self._best_path(unlabeled_sequence) + return list(zip(unlabeled_sequence, path)) + + def _output_logprob(self, state, symbol): + """ + :return: the log probability of the symbol being observed in the given + state + :rtype: float + """ + return self._outputs[state].logprob(symbol) + + def _create_cache(self): + """ + The cache is a tuple (P, O, X, S) where: + + - S maps symbols to integers. I.e., it is the inverse + mapping from self._symbols; for each symbol s in + self._symbols, the following is true:: + + self._symbols[S[s]] == s + + - O is the log output probabilities:: + + O[i,k] = log( P(token[t]=sym[k]|tag[t]=state[i]) ) + + - X is the log transition probabilities:: + + X[i,j] = log( P(tag[t]=state[j]|tag[t-1]=state[i]) ) + + - P is the log prior probabilities:: + + P[i] = log( P(tag[0]=state[i]) ) + """ + if not self._cache: + N = len(self._states) + M = len(self._symbols) + P = np.zeros(N, np.float32) + X = np.zeros((N, N), np.float32) + O = np.zeros((N, M), np.float32) + for i in range(N): + si = self._states[i] + P[i] = self._priors.logprob(si) + for j in range(N): + X[i, j] = self._transitions[si].logprob(self._states[j]) + for k in range(M): + O[i, k] = self._output_logprob(si, self._symbols[k]) + S = {} + for k in range(M): + S[self._symbols[k]] = k + self._cache = (P, O, X, S) + + def _update_cache(self, symbols): + # add new symbols to the symbol table and repopulate the output + # probabilities and symbol table mapping + if symbols: + self._create_cache() + P, O, X, S = self._cache + for symbol in symbols: + if symbol not in self._symbols: + self._cache = None + self._symbols.append(symbol) + # don't bother with the work if there aren't any new symbols + if not self._cache: + N = len(self._states) + M = len(self._symbols) + Q = O.shape[1] + # add new columns to the output probability table without + # destroying the old probabilities + O = np.hstack([O, np.zeros((N, M - Q), np.float32)]) + for i in range(N): + si = self._states[i] + # only calculate probabilities for new symbols + for k in range(Q, M): + O[i, k] = self._output_logprob(si, self._symbols[k]) + # only create symbol mappings for new symbols + for k in range(Q, M): + S[self._symbols[k]] = k + self._cache = (P, O, X, S) + + def reset_cache(self): + self._cache = None + + def best_path(self, unlabeled_sequence): + """ + Returns the state sequence of the optimal (most probable) path through + the HMM. Uses the Viterbi algorithm to calculate this part by dynamic + programming. + + :return: the state sequence + :rtype: sequence of any + :param unlabeled_sequence: the sequence of unlabeled symbols + :type unlabeled_sequence: list + """ + unlabeled_sequence = self._transform(unlabeled_sequence) + return self._best_path(unlabeled_sequence) + + def _best_path(self, unlabeled_sequence): + T = len(unlabeled_sequence) + N = len(self._states) + self._create_cache() + self._update_cache(unlabeled_sequence) + P, O, X, S = self._cache + + V = np.zeros((T, N), np.float32) + B = -np.ones((T, N), np.int) + + V[0] = P + O[:, S[unlabeled_sequence[0]]] + for t in range(1, T): + for j in range(N): + vs = V[t - 1, :] + X[:, j] + best = np.argmax(vs) + V[t, j] = vs[best] + O[j, S[unlabeled_sequence[t]]] + B[t, j] = best + + current = np.argmax(V[T - 1, :]) + sequence = [current] + for t in range(T - 1, 0, -1): + last = B[t, current] + sequence.append(last) + current = last + + sequence.reverse() + return list(map(self._states.__getitem__, sequence)) + + def best_path_simple(self, unlabeled_sequence): + """ + Returns the state sequence of the optimal (most probable) path through + the HMM. Uses the Viterbi algorithm to calculate this part by dynamic + programming. This uses a simple, direct method, and is included for + teaching purposes. + + :return: the state sequence + :rtype: sequence of any + :param unlabeled_sequence: the sequence of unlabeled symbols + :type unlabeled_sequence: list + """ + unlabeled_sequence = self._transform(unlabeled_sequence) + return self._best_path_simple(unlabeled_sequence) + + def _best_path_simple(self, unlabeled_sequence): + T = len(unlabeled_sequence) + N = len(self._states) + V = np.zeros((T, N), np.float64) + B = {} + + # find the starting log probabilities for each state + symbol = unlabeled_sequence[0] + for i, state in enumerate(self._states): + V[0, i] = self._priors.logprob(state) + self._output_logprob(state, symbol) + B[0, state] = None + + # find the maximum log probabilities for reaching each state at time t + for t in range(1, T): + symbol = unlabeled_sequence[t] + for j in range(N): + sj = self._states[j] + best = None + for i in range(N): + si = self._states[i] + va = V[t - 1, i] + self._transitions[si].logprob(sj) + if not best or va > best[0]: + best = (va, si) + V[t, j] = best[0] + self._output_logprob(sj, symbol) + B[t, sj] = best[1] + + # find the highest probability final state + best = None + for i in range(N): + val = V[T - 1, i] + if not best or val > best[0]: + best = (val, self._states[i]) + + # traverse the back-pointers B to find the state sequence + current = best[1] + sequence = [current] + for t in range(T - 1, 0, -1): + last = B[t, current] + sequence.append(last) + current = last + + sequence.reverse() + return sequence + + def random_sample(self, rng, length): + """ + Randomly sample the HMM to generate a sentence of a given length. This + samples the prior distribution then the observation distribution and + transition distribution for each subsequent observation and state. + This will mostly generate unintelligible garbage, but can provide some + amusement. + + :return: the randomly created state/observation sequence, + generated according to the HMM's probability + distributions. The SUBTOKENS have TEXT and TAG + properties containing the observation and state + respectively. + :rtype: list + :param rng: random number generator + :type rng: Random (or any object with a random() method) + :param length: desired output length + :type length: int + """ + + # sample the starting state and symbol prob dists + tokens = [] + state = self._sample_probdist(self._priors, rng.random(), self._states) + symbol = self._sample_probdist( + self._outputs[state], rng.random(), self._symbols + ) + tokens.append((symbol, state)) + + for i in range(1, length): + # sample the state transition and symbol prob dists + state = self._sample_probdist( + self._transitions[state], rng.random(), self._states + ) + symbol = self._sample_probdist( + self._outputs[state], rng.random(), self._symbols + ) + tokens.append((symbol, state)) + + return tokens + + def _sample_probdist(self, probdist, p, samples): + cum_p = 0 + for sample in samples: + add_p = probdist.prob(sample) + if cum_p <= p <= cum_p + add_p: + return sample + cum_p += add_p + raise Exception('Invalid probability distribution - ' 'does not sum to one') + + def entropy(self, unlabeled_sequence): + """ + Returns the entropy over labellings of the given sequence. This is + given by:: + + H(O) = - sum_S Pr(S | O) log Pr(S | O) + + where the summation ranges over all state sequences, S. Let + *Z = Pr(O) = sum_S Pr(S, O)}* where the summation ranges over all state + sequences and O is the observation sequence. As such the entropy can + be re-expressed as:: + + H = - sum_S Pr(S | O) log [ Pr(S, O) / Z ] + = log Z - sum_S Pr(S | O) log Pr(S, 0) + = log Z - sum_S Pr(S | O) [ log Pr(S_0) + sum_t Pr(S_t | S_{t-1}) + sum_t Pr(O_t | S_t) ] + + The order of summation for the log terms can be flipped, allowing + dynamic programming to be used to calculate the entropy. Specifically, + we use the forward and backward probabilities (alpha, beta) giving:: + + H = log Z - sum_s0 alpha_0(s0) beta_0(s0) / Z * log Pr(s0) + + sum_t,si,sj alpha_t(si) Pr(sj | si) Pr(O_t+1 | sj) beta_t(sj) / Z * log Pr(sj | si) + + sum_t,st alpha_t(st) beta_t(st) / Z * log Pr(O_t | st) + + This simply uses alpha and beta to find the probabilities of partial + sequences, constrained to include the given state(s) at some point in + time. + """ + unlabeled_sequence = self._transform(unlabeled_sequence) + + T = len(unlabeled_sequence) + N = len(self._states) + + alpha = self._forward_probability(unlabeled_sequence) + beta = self._backward_probability(unlabeled_sequence) + normalisation = logsumexp2(alpha[T - 1]) + + entropy = normalisation + + # starting state, t = 0 + for i, state in enumerate(self._states): + p = 2 ** (alpha[0, i] + beta[0, i] - normalisation) + entropy -= p * self._priors.logprob(state) + # print 'p(s_0 = %s) =' % state, p + + # state transitions + for t0 in range(T - 1): + t1 = t0 + 1 + for i0, s0 in enumerate(self._states): + for i1, s1 in enumerate(self._states): + p = 2 ** ( + alpha[t0, i0] + + self._transitions[s0].logprob(s1) + + self._outputs[s1].logprob(unlabeled_sequence[t1][_TEXT]) + + beta[t1, i1] + - normalisation + ) + entropy -= p * self._transitions[s0].logprob(s1) + # print 'p(s_%d = %s, s_%d = %s) =' % (t0, s0, t1, s1), p + + # symbol emissions + for t in range(T): + for i, state in enumerate(self._states): + p = 2 ** (alpha[t, i] + beta[t, i] - normalisation) + entropy -= p * self._outputs[state].logprob( + unlabeled_sequence[t][_TEXT] + ) + # print 'p(s_%d = %s) =' % (t, state), p + + return entropy + + def point_entropy(self, unlabeled_sequence): + """ + Returns the pointwise entropy over the possible states at each + position in the chain, given the observation sequence. + """ + unlabeled_sequence = self._transform(unlabeled_sequence) + + T = len(unlabeled_sequence) + N = len(self._states) + + alpha = self._forward_probability(unlabeled_sequence) + beta = self._backward_probability(unlabeled_sequence) + normalisation = logsumexp2(alpha[T - 1]) + + entropies = np.zeros(T, np.float64) + probs = np.zeros(N, np.float64) + for t in range(T): + for s in range(N): + probs[s] = alpha[t, s] + beta[t, s] - normalisation + + for s in range(N): + entropies[t] -= 2 ** (probs[s]) * probs[s] + + return entropies + + def _exhaustive_entropy(self, unlabeled_sequence): + unlabeled_sequence = self._transform(unlabeled_sequence) + + T = len(unlabeled_sequence) + N = len(self._states) + + labellings = [[state] for state in self._states] + for t in range(T - 1): + current = labellings + labellings = [] + for labelling in current: + for state in self._states: + labellings.append(labelling + [state]) + + log_probs = [] + for labelling in labellings: + labeled_sequence = unlabeled_sequence[:] + for t, label in enumerate(labelling): + labeled_sequence[t] = (labeled_sequence[t][_TEXT], label) + lp = self.log_probability(labeled_sequence) + log_probs.append(lp) + normalisation = _log_add(*log_probs) + + # ps = zeros((T, N), float64) + # for labelling, lp in zip(labellings, log_probs): + # for t in range(T): + # ps[t, self._states.index(labelling[t])] += \ + # 2**(lp - normalisation) + + # for t in range(T): + # print 'prob[%d] =' % t, ps[t] + + entropy = 0 + for lp in log_probs: + lp -= normalisation + entropy -= 2 ** (lp) * lp + + return entropy + + def _exhaustive_point_entropy(self, unlabeled_sequence): + unlabeled_sequence = self._transform(unlabeled_sequence) + + T = len(unlabeled_sequence) + N = len(self._states) + + labellings = [[state] for state in self._states] + for t in range(T - 1): + current = labellings + labellings = [] + for labelling in current: + for state in self._states: + labellings.append(labelling + [state]) + + log_probs = [] + for labelling in labellings: + labelled_sequence = unlabeled_sequence[:] + for t, label in enumerate(labelling): + labelled_sequence[t] = (labelled_sequence[t][_TEXT], label) + lp = self.log_probability(labelled_sequence) + log_probs.append(lp) + + normalisation = _log_add(*log_probs) + + probabilities = _ninf_array((T, N)) + + for labelling, lp in zip(labellings, log_probs): + lp -= normalisation + for t, label in enumerate(labelling): + index = self._states.index(label) + probabilities[t, index] = _log_add(probabilities[t, index], lp) + + entropies = np.zeros(T, np.float64) + for t in range(T): + for s in range(N): + entropies[t] -= 2 ** (probabilities[t, s]) * probabilities[t, s] + + return entropies + + def _transitions_matrix(self): + """ Return a matrix of transition log probabilities. """ + trans_iter = ( + self._transitions[sj].logprob(si) + for sj in self._states + for si in self._states + ) + + transitions_logprob = np.fromiter(trans_iter, dtype=np.float64) + N = len(self._states) + return transitions_logprob.reshape((N, N)).T + + def _outputs_vector(self, symbol): + """ + Return a vector with log probabilities of emitting a symbol + when entering states. + """ + out_iter = (self._output_logprob(sj, symbol) for sj in self._states) + return np.fromiter(out_iter, dtype=np.float64) + + def _forward_probability(self, unlabeled_sequence): + """ + Return the forward probability matrix, a T by N array of + log-probabilities, where T is the length of the sequence and N is the + number of states. Each entry (t, s) gives the probability of being in + state s at time t after observing the partial symbol sequence up to + and including t. + + :param unlabeled_sequence: the sequence of unlabeled symbols + :type unlabeled_sequence: list + :return: the forward log probability matrix + :rtype: array + """ + T = len(unlabeled_sequence) + N = len(self._states) + alpha = _ninf_array((T, N)) + + transitions_logprob = self._transitions_matrix() + + # Initialization + symbol = unlabeled_sequence[0][_TEXT] + for i, state in enumerate(self._states): + alpha[0, i] = self._priors.logprob(state) + self._output_logprob( + state, symbol + ) + + # Induction + for t in range(1, T): + symbol = unlabeled_sequence[t][_TEXT] + output_logprob = self._outputs_vector(symbol) + + for i in range(N): + summand = alpha[t - 1] + transitions_logprob[i] + alpha[t, i] = logsumexp2(summand) + output_logprob[i] + + return alpha + + def _backward_probability(self, unlabeled_sequence): + """ + Return the backward probability matrix, a T by N array of + log-probabilities, where T is the length of the sequence and N is the + number of states. Each entry (t, s) gives the probability of being in + state s at time t after observing the partial symbol sequence from t + .. T. + + :return: the backward log probability matrix + :rtype: array + :param unlabeled_sequence: the sequence of unlabeled symbols + :type unlabeled_sequence: list + """ + T = len(unlabeled_sequence) + N = len(self._states) + beta = _ninf_array((T, N)) + + transitions_logprob = self._transitions_matrix().T + + # initialise the backward values; + # "1" is an arbitrarily chosen value from Rabiner tutorial + beta[T - 1, :] = np.log2(1) + + # inductively calculate remaining backward values + for t in range(T - 2, -1, -1): + symbol = unlabeled_sequence[t + 1][_TEXT] + outputs = self._outputs_vector(symbol) + + for i in range(N): + summand = transitions_logprob[i] + beta[t + 1] + outputs + beta[t, i] = logsumexp2(summand) + + return beta + + def test(self, test_sequence, verbose=False, **kwargs): + """ + Tests the HiddenMarkovModelTagger instance. + + :param test_sequence: a sequence of labeled test instances + :type test_sequence: list(list) + :param verbose: boolean flag indicating whether training should be + verbose or include printed output + :type verbose: bool + """ + + def words(sent): + return [word for (word, tag) in sent] + + def tags(sent): + return [tag for (word, tag) in sent] + + def flatten(seq): + return list(itertools.chain(*seq)) + + test_sequence = self._transform(test_sequence) + predicted_sequence = list(map(self._tag, map(words, test_sequence))) + + if verbose: + for test_sent, predicted_sent in zip(test_sequence, predicted_sequence): + print( + 'Test:', + ' '.join('%s/%s' % (token, tag) for (token, tag) in test_sent), + ) + print() + print('Untagged:', ' '.join("%s" % token for (token, tag) in test_sent)) + print() + print( + 'HMM-tagged:', + ' '.join('%s/%s' % (token, tag) for (token, tag) in predicted_sent), + ) + print() + print( + 'Entropy:', + self.entropy([(token, None) for (token, tag) in predicted_sent]), + ) + print() + print('-' * 60) + + test_tags = flatten(map(tags, test_sequence)) + predicted_tags = flatten(map(tags, predicted_sequence)) + + acc = accuracy(test_tags, predicted_tags) + count = sum(len(sent) for sent in test_sequence) + print('accuracy over %d tokens: %.2f' % (count, acc * 100)) + + def __repr__(self): + return '' % ( + len(self._states), + len(self._symbols), + ) + + +class HiddenMarkovModelTrainer(object): + """ + Algorithms for learning HMM parameters from training data. These include + both supervised learning (MLE) and unsupervised learning (Baum-Welch). + + Creates an HMM trainer to induce an HMM with the given states and + output symbol alphabet. A supervised and unsupervised training + method may be used. If either of the states or symbols are not given, + these may be derived from supervised training. + + :param states: the set of state labels + :type states: sequence of any + :param symbols: the set of observation symbols + :type symbols: sequence of any + """ + + def __init__(self, states=None, symbols=None): + self._states = states if states else [] + self._symbols = symbols if symbols else [] + + def train(self, labeled_sequences=None, unlabeled_sequences=None, **kwargs): + """ + Trains the HMM using both (or either of) supervised and unsupervised + techniques. + + :return: the trained model + :rtype: HiddenMarkovModelTagger + :param labelled_sequences: the supervised training data, a set of + labelled sequences of observations + ex: [ (word_1, tag_1),...,(word_n,tag_n) ] + :type labelled_sequences: list + :param unlabeled_sequences: the unsupervised training data, a set of + sequences of observations + ex: [ word_1, ..., word_n ] + :type unlabeled_sequences: list + :param kwargs: additional arguments to pass to the training methods + """ + assert labeled_sequences or unlabeled_sequences + model = None + if labeled_sequences: + model = self.train_supervised(labeled_sequences, **kwargs) + if unlabeled_sequences: + if model: + kwargs['model'] = model + model = self.train_unsupervised(unlabeled_sequences, **kwargs) + return model + + def _baum_welch_step(self, sequence, model, symbol_to_number): + + N = len(model._states) + M = len(model._symbols) + T = len(sequence) + + # compute forward and backward probabilities + alpha = model._forward_probability(sequence) + beta = model._backward_probability(sequence) + + # find the log probability of the sequence + lpk = logsumexp2(alpha[T - 1]) + + A_numer = _ninf_array((N, N)) + B_numer = _ninf_array((N, M)) + A_denom = _ninf_array(N) + B_denom = _ninf_array(N) + + transitions_logprob = model._transitions_matrix().T + + for t in range(T): + symbol = sequence[t][_TEXT] # not found? FIXME + next_symbol = None + if t < T - 1: + next_symbol = sequence[t + 1][_TEXT] # not found? FIXME + xi = symbol_to_number[symbol] + + next_outputs_logprob = model._outputs_vector(next_symbol) + alpha_plus_beta = alpha[t] + beta[t] + + if t < T - 1: + numer_add = ( + transitions_logprob + + next_outputs_logprob + + beta[t + 1] + + alpha[t].reshape(N, 1) + ) + A_numer = np.logaddexp2(A_numer, numer_add) + A_denom = np.logaddexp2(A_denom, alpha_plus_beta) + else: + B_denom = np.logaddexp2(A_denom, alpha_plus_beta) + + B_numer[:, xi] = np.logaddexp2(B_numer[:, xi], alpha_plus_beta) + + return lpk, A_numer, A_denom, B_numer, B_denom + + def train_unsupervised(self, unlabeled_sequences, update_outputs=True, **kwargs): + """ + Trains the HMM using the Baum-Welch algorithm to maximise the + probability of the data sequence. This is a variant of the EM + algorithm, and is unsupervised in that it doesn't need the state + sequences for the symbols. The code is based on 'A Tutorial on Hidden + Markov Models and Selected Applications in Speech Recognition', + Lawrence Rabiner, IEEE, 1989. + + :return: the trained model + :rtype: HiddenMarkovModelTagger + :param unlabeled_sequences: the training data, a set of + sequences of observations + :type unlabeled_sequences: list + + kwargs may include following parameters: + + :param model: a HiddenMarkovModelTagger instance used to begin + the Baum-Welch algorithm + :param max_iterations: the maximum number of EM iterations + :param convergence_logprob: the maximum change in log probability to + allow convergence + """ + + # create a uniform HMM, which will be iteratively refined, unless + # given an existing model + model = kwargs.get('model') + if not model: + priors = RandomProbDist(self._states) + transitions = DictionaryConditionalProbDist( + dict((state, RandomProbDist(self._states)) for state in self._states) + ) + outputs = DictionaryConditionalProbDist( + dict((state, RandomProbDist(self._symbols)) for state in self._states) + ) + model = HiddenMarkovModelTagger( + self._symbols, self._states, transitions, outputs, priors + ) + + self._states = model._states + self._symbols = model._symbols + + N = len(self._states) + M = len(self._symbols) + symbol_numbers = dict((sym, i) for i, sym in enumerate(self._symbols)) + + # update model prob dists so that they can be modified + # model._priors = MutableProbDist(model._priors, self._states) + + model._transitions = DictionaryConditionalProbDist( + dict( + (s, MutableProbDist(model._transitions[s], self._states)) + for s in self._states + ) + ) + + if update_outputs: + model._outputs = DictionaryConditionalProbDist( + dict( + (s, MutableProbDist(model._outputs[s], self._symbols)) + for s in self._states + ) + ) + + model.reset_cache() + + # iterate until convergence + converged = False + last_logprob = None + iteration = 0 + max_iterations = kwargs.get('max_iterations', 1000) + epsilon = kwargs.get('convergence_logprob', 1e-6) + + while not converged and iteration < max_iterations: + A_numer = _ninf_array((N, N)) + B_numer = _ninf_array((N, M)) + A_denom = _ninf_array(N) + B_denom = _ninf_array(N) + + logprob = 0 + for sequence in unlabeled_sequences: + sequence = list(sequence) + if not sequence: + continue + + ( + lpk, + seq_A_numer, + seq_A_denom, + seq_B_numer, + seq_B_denom, + ) = self._baum_welch_step(sequence, model, symbol_numbers) + + # add these sums to the global A and B values + for i in range(N): + A_numer[i] = np.logaddexp2(A_numer[i], seq_A_numer[i] - lpk) + B_numer[i] = np.logaddexp2(B_numer[i], seq_B_numer[i] - lpk) + + A_denom = np.logaddexp2(A_denom, seq_A_denom - lpk) + B_denom = np.logaddexp2(B_denom, seq_B_denom - lpk) + + logprob += lpk + + # use the calculated values to update the transition and output + # probability values + for i in range(N): + logprob_Ai = A_numer[i] - A_denom[i] + logprob_Bi = B_numer[i] - B_denom[i] + + # We should normalize all probabilities (see p.391 Huang et al) + # Let sum(P) be K. + # We can divide each Pi by K to make sum(P) == 1. + # Pi' = Pi/K + # log2(Pi') = log2(Pi) - log2(K) + logprob_Ai -= logsumexp2(logprob_Ai) + logprob_Bi -= logsumexp2(logprob_Bi) + + # update output and transition probabilities + si = self._states[i] + + for j in range(N): + sj = self._states[j] + model._transitions[si].update(sj, logprob_Ai[j]) + + if update_outputs: + for k in range(M): + ok = self._symbols[k] + model._outputs[si].update(ok, logprob_Bi[k]) + + # Rabiner says the priors don't need to be updated. I don't + # believe him. FIXME + + # test for convergence + if iteration > 0 and abs(logprob - last_logprob) < epsilon: + converged = True + + print('iteration', iteration, 'logprob', logprob) + iteration += 1 + last_logprob = logprob + + return model + + def train_supervised(self, labelled_sequences, estimator=None): + """ + Supervised training maximising the joint probability of the symbol and + state sequences. This is done via collecting frequencies of + transitions between states, symbol observations while within each + state and which states start a sentence. These frequency distributions + are then normalised into probability estimates, which can be + smoothed if desired. + + :return: the trained model + :rtype: HiddenMarkovModelTagger + :param labelled_sequences: the training data, a set of + labelled sequences of observations + :type labelled_sequences: list + :param estimator: a function taking + a FreqDist and a number of bins and returning a CProbDistI; + otherwise a MLE estimate is used + """ + + # default to the MLE estimate + if estimator is None: + estimator = lambda fdist, bins: MLEProbDist(fdist) + + # count occurrences of starting states, transitions out of each state + # and output symbols observed in each state + known_symbols = set(self._symbols) + known_states = set(self._states) + + starting = FreqDist() + transitions = ConditionalFreqDist() + outputs = ConditionalFreqDist() + for sequence in labelled_sequences: + lasts = None + for token in sequence: + state = token[_TAG] + symbol = token[_TEXT] + if lasts is None: + starting[state] += 1 + else: + transitions[lasts][state] += 1 + outputs[state][symbol] += 1 + lasts = state + + # update the state and symbol lists + if state not in known_states: + self._states.append(state) + known_states.add(state) + + if symbol not in known_symbols: + self._symbols.append(symbol) + known_symbols.add(symbol) + + # create probability distributions (with smoothing) + N = len(self._states) + pi = estimator(starting, N) + A = ConditionalProbDist(transitions, estimator, N) + B = ConditionalProbDist(outputs, estimator, len(self._symbols)) + + return HiddenMarkovModelTagger(self._symbols, self._states, A, B, pi) + + +def _ninf_array(shape): + res = np.empty(shape, np.float64) + res.fill(-np.inf) + return res + + +def logsumexp2(arr): + max_ = arr.max() + return np.log2(np.sum(2 ** (arr - max_))) + max_ + + +def _log_add(*values): + """ + Adds the logged values, returning the logarithm of the addition. + """ + x = max(values) + if x > -np.inf: + sum_diffs = 0 + for value in values: + sum_diffs += 2 ** (value - x) + return x + np.log2(sum_diffs) + else: + return x + + +def _create_hmm_tagger(states, symbols, A, B, pi): + def pd(values, samples): + d = dict(zip(samples, values)) + return DictionaryProbDist(d) + + def cpd(array, conditions, samples): + d = {} + for values, condition in zip(array, conditions): + d[condition] = pd(values, samples) + return DictionaryConditionalProbDist(d) + + A = cpd(A, states, states) + B = cpd(B, states, symbols) + pi = pd(pi, states) + return HiddenMarkovModelTagger( + symbols=symbols, states=states, transitions=A, outputs=B, priors=pi + ) + + +def _market_hmm_example(): + """ + Return an example HMM (described at page 381, Huang et al) + """ + states = ['bull', 'bear', 'static'] + symbols = ['up', 'down', 'unchanged'] + A = np.array([[0.6, 0.2, 0.2], [0.5, 0.3, 0.2], [0.4, 0.1, 0.5]], np.float64) + B = np.array([[0.7, 0.1, 0.2], [0.1, 0.6, 0.3], [0.3, 0.3, 0.4]], np.float64) + pi = np.array([0.5, 0.2, 0.3], np.float64) + + model = _create_hmm_tagger(states, symbols, A, B, pi) + return model, states, symbols + + +def demo(): + # demonstrates HMM probability calculation + + print() + print("HMM probability calculation demo") + print() + + model, states, symbols = _market_hmm_example() + + print('Testing', model) + + for test in [ + ['up', 'up'], + ['up', 'down', 'up'], + ['down'] * 5, + ['unchanged'] * 5 + ['up'], + ]: + + sequence = [(t, None) for t in test] + + print('Testing with state sequence', test) + print('probability =', model.probability(sequence)) + print('tagging = ', model.tag([word for (word, tag) in sequence])) + print('p(tagged) = ', model.probability(sequence)) + print('H = ', model.entropy(sequence)) + print('H_exh = ', model._exhaustive_entropy(sequence)) + print('H(point) = ', model.point_entropy(sequence)) + print('H_exh(point)=', model._exhaustive_point_entropy(sequence)) + print() + + +def load_pos(num_sents): + from nltk.corpus import brown + + sentences = brown.tagged_sents(categories='news')[:num_sents] + + tag_re = re.compile(r'[*]|--|[^+*-]+') + tag_set = set() + symbols = set() + + cleaned_sentences = [] + for sentence in sentences: + for i in range(len(sentence)): + word, tag = sentence[i] + word = word.lower() # normalize + symbols.add(word) # log this word + # Clean up the tag. + tag = tag_re.match(tag).group() + tag_set.add(tag) + sentence[i] = (word, tag) # store cleaned-up tagged token + cleaned_sentences += [sentence] + + return cleaned_sentences, list(tag_set), list(symbols) + + +def demo_pos(): + # demonstrates POS tagging using supervised training + + print() + print("HMM POS tagging demo") + print() + + print('Training HMM...') + labelled_sequences, tag_set, symbols = load_pos(20000) + trainer = HiddenMarkovModelTrainer(tag_set, symbols) + hmm = trainer.train_supervised( + labelled_sequences[10:], + estimator=lambda fd, bins: LidstoneProbDist(fd, 0.1, bins), + ) + + print('Testing...') + hmm.test(labelled_sequences[:10], verbose=True) + + +def _untag(sentences): + unlabeled = [] + for sentence in sentences: + unlabeled.append([(token[_TEXT], None) for token in sentence]) + return unlabeled + + +def demo_pos_bw( + test=10, supervised=20, unsupervised=10, verbose=True, max_iterations=5 +): + # demonstrates the Baum-Welch algorithm in POS tagging + + print() + print("Baum-Welch demo for POS tagging") + print() + + print('Training HMM (supervised, %d sentences)...' % supervised) + + sentences, tag_set, symbols = load_pos(test + supervised + unsupervised) + + symbols = set() + for sentence in sentences: + for token in sentence: + symbols.add(token[_TEXT]) + + trainer = HiddenMarkovModelTrainer(tag_set, list(symbols)) + hmm = trainer.train_supervised( + sentences[test : test + supervised], + estimator=lambda fd, bins: LidstoneProbDist(fd, 0.1, bins), + ) + + hmm.test(sentences[:test], verbose=verbose) + + print('Training (unsupervised, %d sentences)...' % unsupervised) + # it's rather slow - so only use 10 samples by default + unlabeled = _untag(sentences[test + supervised :]) + hmm = trainer.train_unsupervised( + unlabeled, model=hmm, max_iterations=max_iterations + ) + hmm.test(sentences[:test], verbose=verbose) + + +def demo_bw(): + # demo Baum Welch by generating some sequences and then performing + # unsupervised training on them + + print() + print("Baum-Welch demo for market example") + print() + + model, states, symbols = _market_hmm_example() + + # generate some random sequences + training = [] + import random + + rng = random.Random() + rng.seed(0) + for i in range(10): + item = model.random_sample(rng, 5) + training.append([(i[0], None) for i in item]) + + # train on those examples, starting with the model that generated them + trainer = HiddenMarkovModelTrainer(states, symbols) + hmm = trainer.train_unsupervised(training, model=model, max_iterations=1000) diff --git a/venv.bak/lib/python3.7/site-packages/nltk/tag/hunpos.py b/venv.bak/lib/python3.7/site-packages/nltk/tag/hunpos.py new file mode 100644 index 0000000..9513338 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/tag/hunpos.py @@ -0,0 +1,151 @@ +# -*- coding: utf-8 -*- +# Natural Language Toolkit: Interface to the HunPos POS-tagger +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Peter Ljunglöf +# Dávid Márk Nemeskey (modifications) +# Attila Zséder (modifications) +# URL: +# For license information, see LICENSE.TXT + +""" +A module for interfacing with the HunPos open-source POS-tagger. +""" + +import os +from subprocess import Popen, PIPE + +from six import text_type + +from nltk.internals import find_binary, find_file +from nltk.tag.api import TaggerI + +_hunpos_url = 'http://code.google.com/p/hunpos/' + +_hunpos_charset = 'ISO-8859-1' +"""The default encoding used by hunpos: ISO-8859-1.""" + + +class HunposTagger(TaggerI): + """ + A class for pos tagging with HunPos. The input is the paths to: + - a model trained on training data + - (optionally) the path to the hunpos-tag binary + - (optionally) the encoding of the training data (default: ISO-8859-1) + + Example: + + >>> from nltk.tag import HunposTagger + >>> ht = HunposTagger('en_wsj.model') + >>> ht.tag('What is the airspeed of an unladen swallow ?'.split()) + [('What', 'WP'), ('is', 'VBZ'), ('the', 'DT'), ('airspeed', 'NN'), ('of', 'IN'), ('an', 'DT'), ('unladen', 'NN'), ('swallow', 'VB'), ('?', '.')] + >>> ht.close() + + This class communicates with the hunpos-tag binary via pipes. When the + tagger object is no longer needed, the close() method should be called to + free system resources. The class supports the context manager interface; if + used in a with statement, the close() method is invoked automatically: + + >>> with HunposTagger('en_wsj.model') as ht: + ... ht.tag('What is the airspeed of an unladen swallow ?'.split()) + ... + [('What', 'WP'), ('is', 'VBZ'), ('the', 'DT'), ('airspeed', 'NN'), ('of', 'IN'), ('an', 'DT'), ('unladen', 'NN'), ('swallow', 'VB'), ('?', '.')] + """ + + def __init__( + self, path_to_model, path_to_bin=None, encoding=_hunpos_charset, verbose=False + ): + """ + Starts the hunpos-tag executable and establishes a connection with it. + + :param path_to_model: The model file. + :param path_to_bin: The hunpos-tag binary. + :param encoding: The encoding used by the model. Unicode tokens + passed to the tag() and tag_sents() methods are converted to + this charset when they are sent to hunpos-tag. + The default is ISO-8859-1 (Latin-1). + + This parameter is ignored for str tokens, which are sent as-is. + The caller must ensure that tokens are encoded in the right charset. + """ + self._closed = True + hunpos_paths = [ + '.', + '/usr/bin', + '/usr/local/bin', + '/opt/local/bin', + '/Applications/bin', + '~/bin', + '~/Applications/bin', + ] + hunpos_paths = list(map(os.path.expanduser, hunpos_paths)) + + self._hunpos_bin = find_binary( + 'hunpos-tag', + path_to_bin, + env_vars=('HUNPOS_TAGGER',), + searchpath=hunpos_paths, + url=_hunpos_url, + verbose=verbose, + ) + + self._hunpos_model = find_file( + path_to_model, env_vars=('HUNPOS_TAGGER',), verbose=verbose + ) + self._encoding = encoding + self._hunpos = Popen( + [self._hunpos_bin, self._hunpos_model], + shell=False, + stdin=PIPE, + stdout=PIPE, + stderr=PIPE, + ) + self._closed = False + + def __del__(self): + self.close() + + def close(self): + """Closes the pipe to the hunpos executable.""" + if not self._closed: + self._hunpos.communicate() + self._closed = True + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_value, traceback): + self.close() + + def tag(self, tokens): + """Tags a single sentence: a list of words. + The tokens should not contain any newline characters. + """ + for token in tokens: + assert "\n" not in token, "Tokens should not contain newlines" + if isinstance(token, text_type): + token = token.encode(self._encoding) + self._hunpos.stdin.write(token + b"\n") + # We write a final empty line to tell hunpos that the sentence is finished: + self._hunpos.stdin.write(b"\n") + self._hunpos.stdin.flush() + + tagged_tokens = [] + for token in tokens: + tagged = self._hunpos.stdout.readline().strip().split(b"\t") + tag = tagged[1] if len(tagged) > 1 else None + tagged_tokens.append((token, tag)) + # We have to read (and dismiss) the final empty line: + self._hunpos.stdout.readline() + + return tagged_tokens + + +# skip doctests if Hunpos tagger is not installed +def setup_module(module): + from nose import SkipTest + + try: + HunposTagger('en_wsj.model') + except LookupError: + raise SkipTest("HunposTagger is not available") diff --git a/venv.bak/lib/python3.7/site-packages/nltk/tag/mapping.py b/venv.bak/lib/python3.7/site-packages/nltk/tag/mapping.py new file mode 100644 index 0000000..2e38365 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/tag/mapping.py @@ -0,0 +1,137 @@ +# Natural Language Toolkit: Tagset Mapping +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Nathan Schneider +# Steven Bird +# URL: +# For license information, see LICENSE.TXT + +""" +Interface for converting POS tags from various treebanks +to the universal tagset of Petrov, Das, & McDonald. + +The tagset consists of the following 12 coarse tags: + +VERB - verbs (all tenses and modes) +NOUN - nouns (common and proper) +PRON - pronouns +ADJ - adjectives +ADV - adverbs +ADP - adpositions (prepositions and postpositions) +CONJ - conjunctions +DET - determiners +NUM - cardinal numbers +PRT - particles or other function words +X - other: foreign words, typos, abbreviations +. - punctuation + +@see: http://arxiv.org/abs/1104.2086 and http://code.google.com/p/universal-pos-tags/ + +""" + +from __future__ import print_function, unicode_literals, division +from collections import defaultdict +from os.path import join + +from nltk.data import load + +_UNIVERSAL_DATA = "taggers/universal_tagset" +_UNIVERSAL_TAGS = ( + 'VERB', + 'NOUN', + 'PRON', + 'ADJ', + 'ADV', + 'ADP', + 'CONJ', + 'DET', + 'NUM', + 'PRT', + 'X', + '.', +) + +# _MAPPINGS = defaultdict(lambda: defaultdict(dict)) +# the mapping between tagset T1 and T2 returns UNK if appied to an unrecognized tag +_MAPPINGS = defaultdict(lambda: defaultdict(lambda: defaultdict(lambda: 'UNK'))) + + +def _load_universal_map(fileid): + contents = load(join(_UNIVERSAL_DATA, fileid + '.map'), format="text") + + # When mapping to the Universal Tagset, + # map unknown inputs to 'X' not 'UNK' + _MAPPINGS[fileid]['universal'].default_factory = lambda: 'X' + + for line in contents.splitlines(): + line = line.strip() + if line == '': + continue + fine, coarse = line.split('\t') + + assert coarse in _UNIVERSAL_TAGS, 'Unexpected coarse tag: {}'.format(coarse) + assert ( + fine not in _MAPPINGS[fileid]['universal'] + ), 'Multiple entries for original tag: {}'.format(fine) + + _MAPPINGS[fileid]['universal'][fine] = coarse + + +def tagset_mapping(source, target): + """ + Retrieve the mapping dictionary between tagsets. + + >>> tagset_mapping('ru-rnc', 'universal') == {'!': '.', 'A': 'ADJ', 'C': 'CONJ', 'AD': 'ADV',\ + 'NN': 'NOUN', 'VG': 'VERB', 'COMP': 'CONJ', 'NC': 'NUM', 'VP': 'VERB', 'P': 'ADP',\ + 'IJ': 'X', 'V': 'VERB', 'Z': 'X', 'VI': 'VERB', 'YES_NO_SENT': 'X', 'PTCL': 'PRT'} + True + """ + + if source not in _MAPPINGS or target not in _MAPPINGS[source]: + if target == 'universal': + _load_universal_map(source) + # Added the new Russian National Corpus mappings because the + # Russian model for nltk.pos_tag() uses it. + _MAPPINGS['ru-rnc-new']['universal'] = { + 'A': 'ADJ', + 'A-PRO': 'PRON', + 'ADV': 'ADV', + 'ADV-PRO': 'PRON', + 'ANUM': 'ADJ', + 'CONJ': 'CONJ', + 'INTJ': 'X', + 'NONLEX': '.', + 'NUM': 'NUM', + 'PARENTH': 'PRT', + 'PART': 'PRT', + 'PR': 'ADP', + 'PRAEDIC': 'PRT', + 'PRAEDIC-PRO': 'PRON', + 'S': 'NOUN', + 'S-PRO': 'PRON', + 'V': 'VERB', + } + + return _MAPPINGS[source][target] + + +def map_tag(source, target, source_tag): + """ + Maps the tag from the source tagset to the target tagset. + + >>> map_tag('en-ptb', 'universal', 'VBZ') + 'VERB' + >>> map_tag('en-ptb', 'universal', 'VBP') + 'VERB' + >>> map_tag('en-ptb', 'universal', '``') + '.' + """ + + # we need a systematic approach to naming + if target == 'universal': + if source == 'wsj': + source = 'en-ptb' + if source == 'brown': + source = 'en-brown' + + return tagset_mapping(source, target)[source_tag] diff --git a/venv.bak/lib/python3.7/site-packages/nltk/tag/perceptron.py b/venv.bak/lib/python3.7/site-packages/nltk/tag/perceptron.py new file mode 100644 index 0000000..ebe9b43 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/tag/perceptron.py @@ -0,0 +1,354 @@ + # -*- coding: utf-8 -*- +# This module is a port of the Textblob Averaged Perceptron Tagger +# Author: Matthew Honnibal , +# Long Duong (NLTK port) +# URL: +# +# Copyright 2013 Matthew Honnibal +# NLTK modifications Copyright 2015 The NLTK Project +# +# This module is provided under the terms of the MIT License. + +from __future__ import absolute_import +from __future__ import print_function, division + +import random +from collections import defaultdict +import pickle +import logging + +from nltk.tag.api import TaggerI +from nltk.data import find, load +from nltk.compat import python_2_unicode_compatible + +try: + import numpy as np +except ImportError: + pass + +PICKLE = "averaged_perceptron_tagger.pickle" + + +class AveragedPerceptron(object): + + '''An averaged perceptron, as implemented by Matthew Honnibal. + + See more implementation details here: + https://explosion.ai/blog/part-of-speech-pos-tagger-in-python + ''' + + def __init__(self): + # Each feature gets its own weight vector, so weights is a dict-of-dicts + self.weights = {} + self.classes = set() + # The accumulated values, for the averaging. These will be keyed by + # feature/clas tuples + self._totals = defaultdict(int) + # The last time the feature was changed, for the averaging. Also + # keyed by feature/clas tuples + # (tstamps is short for timestamps) + self._tstamps = defaultdict(int) + # Number of instances seen + self.i = 0 + + + def _softmax(self, scores): + s = np.fromiter(scores.values(), dtype=float) + exps = np.exp(s) + return exps / np.sum(exps) + + + def predict(self, features, return_conf=False): + '''Dot-product the features and current weights and return the best label.''' + scores = defaultdict(float) + for feat, value in features.items(): + if feat not in self.weights or value == 0: + continue + weights = self.weights[feat] + for label, weight in weights.items(): + scores[label] += value * weight + + # Do a secondary alphabetic sort, for stability + best_label = max(self.classes, key=lambda label: (scores[label], label)) + # compute the confidence + conf = max(self._softmax(scores)) if return_conf == True else None + + return best_label, conf + + + def update(self, truth, guess, features): + '''Update the feature weights.''' + + def upd_feat(c, f, w, v): + param = (f, c) + self._totals[param] += (self.i - self._tstamps[param]) * w + self._tstamps[param] = self.i + self.weights[f][c] = w + v + + self.i += 1 + if truth == guess: + return None + for f in features: + weights = self.weights.setdefault(f, {}) + upd_feat(truth, f, weights.get(truth, 0.0), 1.0) + upd_feat(guess, f, weights.get(guess, 0.0), -1.0) + + def average_weights(self): + '''Average weights from all iterations.''' + for feat, weights in self.weights.items(): + new_feat_weights = {} + for clas, weight in weights.items(): + param = (feat, clas) + total = self._totals[param] + total += (self.i - self._tstamps[param]) * weight + averaged = round(total / self.i, 3) + if averaged: + new_feat_weights[clas] = averaged + self.weights[feat] = new_feat_weights + + def save(self, path): + '''Save the pickled model weights.''' + with open(path, 'wb') as fout: + return pickle.dump(dict(self.weights), fout) + + def load(self, path): + '''Load the pickled model weights.''' + self.weights = load(path) + + +@python_2_unicode_compatible +class PerceptronTagger(TaggerI): + + ''' + Greedy Averaged Perceptron tagger, as implemented by Matthew Honnibal. + See more implementation details here: + https://explosion.ai/blog/part-of-speech-pos-tagger-in-python + + >>> from nltk.tag.perceptron import PerceptronTagger + + Train the model + + >>> tagger = PerceptronTagger(load=False) + + >>> tagger.train([[('today','NN'),('is','VBZ'),('good','JJ'),('day','NN')], + ... [('yes','NNS'),('it','PRP'),('beautiful','JJ')]]) + + >>> tagger.tag(['today','is','a','beautiful','day']) + [('today', 'NN'), ('is', 'PRP'), ('a', 'PRP'), ('beautiful', 'JJ'), ('day', 'NN')] + + Use the pretrain model (the default constructor) + + >>> pretrain = PerceptronTagger() + + >>> pretrain.tag('The quick brown fox jumps over the lazy dog'.split()) + [('The', 'DT'), ('quick', 'JJ'), ('brown', 'NN'), ('fox', 'NN'), ('jumps', 'VBZ'), ('over', 'IN'), ('the', 'DT'), ('lazy', 'JJ'), ('dog', 'NN')] + + >>> pretrain.tag("The red cat".split()) + [('The', 'DT'), ('red', 'JJ'), ('cat', 'NN')] + ''' + + START = ['-START-', '-START2-'] + END = ['-END-', '-END2-'] + + def __init__(self, load=True): + ''' + :param load: Load the pickled model upon instantiation. + ''' + self.model = AveragedPerceptron() + self.tagdict = {} + self.classes = set() + if load: + AP_MODEL_LOC = 'file:' + str( + find('taggers/averaged_perceptron_tagger/' + PICKLE) + ) + self.load(AP_MODEL_LOC) + + def tag(self, tokens, return_conf=False, use_tagdict=True): + ''' + Tag tokenized sentences. + :params tokens: list of word + :type tokens: list(str) + ''' + prev, prev2 = self.START + output = [] + + context = self.START + [self.normalize(w) for w in tokens] + self.END + for i, word in enumerate(tokens): + tag, conf = (self.tagdict.get(word), 1.0) if use_tagdict == True else (None, None) + if not tag: + features = self._get_features(i, word, context, prev, prev2) + tag, conf = self.model.predict(features, return_conf) + output.append((word, tag, conf) if return_conf == True else (word, tag)) + + prev2 = prev + prev = tag + + return output + + def train(self, sentences, save_loc=None, nr_iter=5): + '''Train a model from sentences, and save it at ``save_loc``. ``nr_iter`` + controls the number of Perceptron training iterations. + + :param sentences: A list or iterator of sentences, where each sentence + is a list of (words, tags) tuples. + :param save_loc: If not ``None``, saves a pickled model in this location. + :param nr_iter: Number of training iterations. + ''' + # We'd like to allow ``sentences`` to be either a list or an iterator, + # the latter being especially important for a large training dataset. + # Because ``self._make_tagdict(sentences)`` runs regardless, we make + # it populate ``self._sentences`` (a list) with all the sentences. + # This saves the overheard of just iterating through ``sentences`` to + # get the list by ``sentences = list(sentences)``. + + self._sentences = list() # to be populated by self._make_tagdict... + self._make_tagdict(sentences) + self.model.classes = self.classes + for iter_ in range(nr_iter): + c = 0 + n = 0 + for sentence in self._sentences: + words, tags = zip(*sentence) + + prev, prev2 = self.START + context = self.START + [self.normalize(w) for w in words] + self.END + for i, word in enumerate(words): + guess = self.tagdict.get(word) + if not guess: + feats = self._get_features(i, word, context, prev, prev2) + guess,_ = self.model.predict(feats) + self.model.update(tags[i], guess, feats) + prev2 = prev + prev = guess + c += guess == tags[i] + n += 1 + random.shuffle(self._sentences) + logging.info("Iter {0}: {1}/{2}={3}".format(iter_, c, n, _pc(c, n))) + + # We don't need the training sentences anymore, and we don't want to + # waste space on them when we pickle the trained tagger. + self._sentences = None + + self.model.average_weights() + # Pickle as a binary file + if save_loc is not None: + with open(save_loc, 'wb') as fout: + # changed protocol from -1 to 2 to make pickling Python 2 compatible + pickle.dump((self.model.weights, self.tagdict, self.classes), fout, 2) + + def load(self, loc): + ''' + :param loc: Load a pickled model at location. + :type loc: str + ''' + + self.model.weights, self.tagdict, self.classes = load(loc) + self.model.classes = self.classes + + def normalize(self, word): + ''' + Normalization used in pre-processing. + - All words are lower cased + - Groups of digits of length 4 are represented as !YEAR; + - Other digits are represented as !DIGITS + + :rtype: str + ''' + if '-' in word and word[0] != '-': + return '!HYPHEN' + elif word.isdigit() and len(word) == 4: + return '!YEAR' + elif word[0].isdigit(): + return '!DIGITS' + else: + return word.lower() + + def _get_features(self, i, word, context, prev, prev2): + '''Map tokens into a feature representation, implemented as a + {hashable: int} dict. If the features change, a new model must be + trained. + ''' + + def add(name, *args): + features[' '.join((name,) + tuple(args))] += 1 + + i += len(self.START) + features = defaultdict(int) + # It's useful to have a constant feature, which acts sort of like a prior + add('bias') + add('i suffix', word[-3:]) + add('i pref1', word[0]) + add('i-1 tag', prev) + add('i-2 tag', prev2) + add('i tag+i-2 tag', prev, prev2) + add('i word', context[i]) + add('i-1 tag+i word', prev, context[i]) + add('i-1 word', context[i - 1]) + add('i-1 suffix', context[i - 1][-3:]) + add('i-2 word', context[i - 2]) + add('i+1 word', context[i + 1]) + add('i+1 suffix', context[i + 1][-3:]) + add('i+2 word', context[i + 2]) + return features + + def _make_tagdict(self, sentences): + ''' + Make a tag dictionary for single-tag words. + :param sentences: A list of list of (word, tag) tuples. + ''' + counts = defaultdict(lambda: defaultdict(int)) + for sentence in sentences: + self._sentences.append(sentence) + for word, tag in sentence: + counts[word][tag] += 1 + self.classes.add(tag) + freq_thresh = 20 + ambiguity_thresh = 0.97 + for word, tag_freqs in counts.items(): + tag, mode = max(tag_freqs.items(), key=lambda item: item[1]) + n = sum(tag_freqs.values()) + # Don't add rare words to the tag dictionary + # Only add quite unambiguous words + if n >= freq_thresh and (mode / n) >= ambiguity_thresh: + self.tagdict[word] = tag + + +def _pc(n, d): + return (n / d) * 100 + + +def _load_data_conll_format(filename): + print('Read from file: ', filename) + with open(filename, 'rb') as fin: + sentences = [] + sentence = [] + for line in fin.readlines(): + line = line.strip() + # print line + if len(line) == 0: + sentences.append(sentence) + sentence = [] + continue + tokens = line.split('\t') + word = tokens[1] + tag = tokens[4] + sentence.append((word, tag)) + return sentences + + +def _get_pretrain_model(): + # Train and test on English part of ConLL data (WSJ part of Penn Treebank) + # Train: section 2-11 + # Test : section 23 + tagger = PerceptronTagger() + training = _load_data_conll_format('english_ptb_train.conll') + testing = _load_data_conll_format('english_ptb_test.conll') + print('Size of training and testing (sentence)', len(training), len(testing)) + # Train and save the model + tagger.train(training, PICKLE) + print('Accuracy : ', tagger.evaluate(testing)) + + +if __name__ == '__main__': + # _get_pretrain_model() + pass diff --git a/venv.bak/lib/python3.7/site-packages/nltk/tag/senna.py b/venv.bak/lib/python3.7/site-packages/nltk/tag/senna.py new file mode 100644 index 0000000..8404656 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/tag/senna.py @@ -0,0 +1,148 @@ +# encoding: utf-8 +# Natural Language Toolkit: Senna POS Tagger +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Rami Al-Rfou' +# URL: +# For license information, see LICENSE.TXT + +""" +Senna POS tagger, NER Tagger, Chunk Tagger + +The input is: +- path to the directory that contains SENNA executables. If the path is incorrect, + SennaTagger will automatically search for executable file specified in SENNA environment variable +- (optionally) the encoding of the input data (default:utf-8) + +Note: Unit tests for this module can be found in test/unit/test_senna.py + + >>> from nltk.tag import SennaTagger + >>> tagger = SennaTagger('/usr/share/senna-v3.0') + >>> tagger.tag('What is the airspeed of an unladen swallow ?'.split()) # doctest: +SKIP + [('What', 'WP'), ('is', 'VBZ'), ('the', 'DT'), ('airspeed', 'NN'), + ('of', 'IN'), ('an', 'DT'), ('unladen', 'NN'), ('swallow', 'NN'), ('?', '.')] + + >>> from nltk.tag import SennaChunkTagger + >>> chktagger = SennaChunkTagger('/usr/share/senna-v3.0') + >>> chktagger.tag('What is the airspeed of an unladen swallow ?'.split()) # doctest: +SKIP + [('What', 'B-NP'), ('is', 'B-VP'), ('the', 'B-NP'), ('airspeed', 'I-NP'), + ('of', 'B-PP'), ('an', 'B-NP'), ('unladen', 'I-NP'), ('swallow', 'I-NP'), + ('?', 'O')] + + >>> from nltk.tag import SennaNERTagger + >>> nertagger = SennaNERTagger('/usr/share/senna-v3.0') + >>> nertagger.tag('Shakespeare theatre was in London .'.split()) # doctest: +SKIP + [('Shakespeare', 'B-PER'), ('theatre', 'O'), ('was', 'O'), ('in', 'O'), + ('London', 'B-LOC'), ('.', 'O')] + >>> nertagger.tag('UN headquarters are in NY , USA .'.split()) # doctest: +SKIP + [('UN', 'B-ORG'), ('headquarters', 'O'), ('are', 'O'), ('in', 'O'), + ('NY', 'B-LOC'), (',', 'O'), ('USA', 'B-LOC'), ('.', 'O')] +""" + +from nltk.compat import python_2_unicode_compatible +from nltk.classify import Senna + + +@python_2_unicode_compatible +class SennaTagger(Senna): + def __init__(self, path, encoding='utf-8'): + super(SennaTagger, self).__init__(path, ['pos'], encoding) + + def tag_sents(self, sentences): + """ + Applies the tag method over a list of sentences. This method will return + for each sentence a list of tuples of (word, tag). + """ + tagged_sents = super(SennaTagger, self).tag_sents(sentences) + for i in range(len(tagged_sents)): + for j in range(len(tagged_sents[i])): + annotations = tagged_sents[i][j] + tagged_sents[i][j] = (annotations['word'], annotations['pos']) + return tagged_sents + + +@python_2_unicode_compatible +class SennaChunkTagger(Senna): + def __init__(self, path, encoding='utf-8'): + super(SennaChunkTagger, self).__init__(path, ['chk'], encoding) + + def tag_sents(self, sentences): + """ + Applies the tag method over a list of sentences. This method will return + for each sentence a list of tuples of (word, tag). + """ + tagged_sents = super(SennaChunkTagger, self).tag_sents(sentences) + for i in range(len(tagged_sents)): + for j in range(len(tagged_sents[i])): + annotations = tagged_sents[i][j] + tagged_sents[i][j] = (annotations['word'], annotations['chk']) + return tagged_sents + + def bio_to_chunks(self, tagged_sent, chunk_type): + """ + Extracts the chunks in a BIO chunk-tagged sentence. + + >>> from nltk.tag import SennaChunkTagger + >>> chktagger = SennaChunkTagger('/usr/share/senna-v3.0') + >>> sent = 'What is the airspeed of an unladen swallow ?'.split() + >>> tagged_sent = chktagger.tag(sent) # doctest: +SKIP + >>> tagged_sent # doctest: +SKIP + [('What', 'B-NP'), ('is', 'B-VP'), ('the', 'B-NP'), ('airspeed', 'I-NP'), + ('of', 'B-PP'), ('an', 'B-NP'), ('unladen', 'I-NP'), ('swallow', 'I-NP'), + ('?', 'O')] + >>> list(chktagger.bio_to_chunks(tagged_sent, chunk_type='NP')) # doctest: +SKIP + [('What', '0'), ('the airspeed', '2-3'), ('an unladen swallow', '5-6-7')] + + :param tagged_sent: A list of tuples of word and BIO chunk tag. + :type tagged_sent: list(tuple) + :param tagged_sent: The chunk tag that users want to extract, e.g. 'NP' or 'VP' + :type tagged_sent: str + + :return: An iterable of tuples of chunks that users want to extract + and their corresponding indices. + :rtype: iter(tuple(str)) + """ + current_chunk = [] + current_chunk_position = [] + for idx, word_pos in enumerate(tagged_sent): + word, pos = word_pos + if '-' + chunk_type in pos: # Append the word to the current_chunk. + current_chunk.append((word)) + current_chunk_position.append((idx)) + else: + if current_chunk: # Flush the full chunk when out of an NP. + _chunk_str = ' '.join(current_chunk) + _chunk_pos_str = '-'.join(map(str, current_chunk_position)) + yield _chunk_str, _chunk_pos_str + current_chunk = [] + current_chunk_position = [] + if current_chunk: # Flush the last chunk. + yield ' '.join(current_chunk), '-'.join(map(str, current_chunk_position)) + + +@python_2_unicode_compatible +class SennaNERTagger(Senna): + def __init__(self, path, encoding='utf-8'): + super(SennaNERTagger, self).__init__(path, ['ner'], encoding) + + def tag_sents(self, sentences): + """ + Applies the tag method over a list of sentences. This method will return + for each sentence a list of tuples of (word, tag). + """ + tagged_sents = super(SennaNERTagger, self).tag_sents(sentences) + for i in range(len(tagged_sents)): + for j in range(len(tagged_sents[i])): + annotations = tagged_sents[i][j] + tagged_sents[i][j] = (annotations['word'], annotations['ner']) + return tagged_sents + + +# skip doctests if Senna is not installed +def setup_module(module): + from nose import SkipTest + + try: + tagger = Senna('/usr/share/senna-v3.0', ['pos', 'chk', 'ner']) + except OSError: + raise SkipTest("Senna executable not found") diff --git a/venv.bak/lib/python3.7/site-packages/nltk/tag/sequential.py b/venv.bak/lib/python3.7/site-packages/nltk/tag/sequential.py new file mode 100644 index 0000000..3d3a767 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/tag/sequential.py @@ -0,0 +1,760 @@ +# Natural Language Toolkit: Sequential Backoff Taggers +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Edward Loper +# Steven Bird (minor additions) +# Tiago Tresoldi (original affix tagger) +# URL: +# For license information, see LICENSE.TXT + +""" +Classes for tagging sentences sequentially, left to right. The +abstract base class SequentialBackoffTagger serves as the base +class for all the taggers in this module. Tagging of individual words +is performed by the method ``choose_tag()``, which is defined by +subclasses of SequentialBackoffTagger. If a tagger is unable to +determine a tag for the specified token, then its backoff tagger is +consulted instead. Any SequentialBackoffTagger may serve as a +backoff tagger for any other SequentialBackoffTagger. +""" +from __future__ import print_function, unicode_literals +from abc import abstractmethod + +import re + +from nltk.probability import ConditionalFreqDist +from nltk.classify import NaiveBayesClassifier +from nltk.compat import python_2_unicode_compatible + +from nltk.tag.api import TaggerI, FeaturesetTaggerI + +from nltk import jsontags + + +###################################################################### +# Abstract Base Classes +###################################################################### +class SequentialBackoffTagger(TaggerI): + """ + An abstract base class for taggers that tags words sequentially, + left to right. Tagging of individual words is performed by the + ``choose_tag()`` method, which should be defined by subclasses. If + a tagger is unable to determine a tag for the specified token, + then its backoff tagger is consulted. + + :ivar _taggers: A list of all the taggers that should be tried to + tag a token (i.e., self and its backoff taggers). + """ + + def __init__(self, backoff=None): + if backoff is None: + self._taggers = [self] + else: + self._taggers = [self] + backoff._taggers + + @property + def backoff(self): + """The backoff tagger for this tagger.""" + return self._taggers[1] if len(self._taggers) > 1 else None + + def tag(self, tokens): + # docs inherited from TaggerI + tags = [] + for i in range(len(tokens)): + tags.append(self.tag_one(tokens, i, tags)) + return list(zip(tokens, tags)) + + def tag_one(self, tokens, index, history): + """ + Determine an appropriate tag for the specified token, and + return that tag. If this tagger is unable to determine a tag + for the specified token, then its backoff tagger is consulted. + + :rtype: str + :type tokens: list + :param tokens: The list of words that are being tagged. + :type index: int + :param index: The index of the word whose tag should be + returned. + :type history: list(str) + :param history: A list of the tags for all words before *index*. + """ + tag = None + for tagger in self._taggers: + tag = tagger.choose_tag(tokens, index, history) + if tag is not None: + break + return tag + + @abstractmethod + def choose_tag(self, tokens, index, history): + """ + Decide which tag should be used for the specified token, and + return that tag. If this tagger is unable to determine a tag + for the specified token, return None -- do not consult + the backoff tagger. This method should be overridden by + subclasses of SequentialBackoffTagger. + + :rtype: str + :type tokens: list + :param tokens: The list of words that are being tagged. + :type index: int + :param index: The index of the word whose tag should be + returned. + :type history: list(str) + :param history: A list of the tags for all words before *index*. + """ + + +@python_2_unicode_compatible +class ContextTagger(SequentialBackoffTagger): + """ + An abstract base class for sequential backoff taggers that choose + a tag for a token based on the value of its "context". Different + subclasses are used to define different contexts. + + A ContextTagger chooses the tag for a token by calculating the + token's context, and looking up the corresponding tag in a table. + This table can be constructed manually; or it can be automatically + constructed based on a training corpus, using the ``_train()`` + factory method. + + :ivar _context_to_tag: Dictionary mapping contexts to tags. + """ + + def __init__(self, context_to_tag, backoff=None): + """ + :param context_to_tag: A dictionary mapping contexts to tags. + :param backoff: The backoff tagger that should be used for this tagger. + """ + SequentialBackoffTagger.__init__(self, backoff) + self._context_to_tag = context_to_tag if context_to_tag else {} + + @abstractmethod + def context(self, tokens, index, history): + """ + :return: the context that should be used to look up the tag + for the specified token; or None if the specified token + should not be handled by this tagger. + :rtype: (hashable) + """ + + def choose_tag(self, tokens, index, history): + context = self.context(tokens, index, history) + return self._context_to_tag.get(context) + + def size(self): + """ + :return: The number of entries in the table used by this + tagger to map from contexts to tags. + """ + return len(self._context_to_tag) + + def __repr__(self): + return '<%s: size=%d>' % (self.__class__.__name__, self.size()) + + def _train(self, tagged_corpus, cutoff=0, verbose=False): + """ + Initialize this ContextTagger's ``_context_to_tag`` table + based on the given training data. In particular, for each + context ``c`` in the training data, set + ``_context_to_tag[c]`` to the most frequent tag for that + context. However, exclude any contexts that are already + tagged perfectly by the backoff tagger(s). + + The old value of ``self._context_to_tag`` (if any) is discarded. + + :param tagged_corpus: A tagged corpus. Each item should be + a list of (word, tag tuples. + :param cutoff: If the most likely tag for a context occurs + fewer than cutoff times, then exclude it from the + context-to-tag table for the new tagger. + """ + + token_count = hit_count = 0 + + # A context is considered 'useful' if it's not already tagged + # perfectly by the backoff tagger. + useful_contexts = set() + + # Count how many times each tag occurs in each context. + fd = ConditionalFreqDist() + for sentence in tagged_corpus: + tokens, tags = zip(*sentence) + for index, (token, tag) in enumerate(sentence): + # Record the event. + token_count += 1 + context = self.context(tokens, index, tags[:index]) + if context is None: + continue + fd[context][tag] += 1 + # If the backoff got it wrong, this context is useful: + if self.backoff is None or tag != self.backoff.tag_one( + tokens, index, tags[:index] + ): + useful_contexts.add(context) + + # Build the context_to_tag table -- for each context, figure + # out what the most likely tag is. Only include contexts that + # we've seen at least `cutoff` times. + for context in useful_contexts: + best_tag = fd[context].max() + hits = fd[context][best_tag] + if hits > cutoff: + self._context_to_tag[context] = best_tag + hit_count += hits + + # Display some stats, if requested. + if verbose: + size = len(self._context_to_tag) + backoff = 100 - (hit_count * 100.0) / token_count + pruning = 100 - (size * 100.0) / len(fd.conditions()) + print("[Trained Unigram tagger:", end=' ') + print("size=%d, backoff=%.2f%%, pruning=%.2f%%]" % (size, backoff, pruning)) + + +###################################################################### +# Tagger Classes +###################################################################### +@python_2_unicode_compatible +@jsontags.register_tag +class DefaultTagger(SequentialBackoffTagger): + """ + A tagger that assigns the same tag to every token. + + >>> from nltk.tag import DefaultTagger + >>> default_tagger = DefaultTagger('NN') + >>> list(default_tagger.tag('This is a test'.split())) + [('This', 'NN'), ('is', 'NN'), ('a', 'NN'), ('test', 'NN')] + + This tagger is recommended as a backoff tagger, in cases where + a more powerful tagger is unable to assign a tag to the word + (e.g. because the word was not seen during training). + + :param tag: The tag to assign to each token + :type tag: str + """ + + json_tag = 'nltk.tag.sequential.DefaultTagger' + + def __init__(self, tag): + self._tag = tag + SequentialBackoffTagger.__init__(self, None) + + def encode_json_obj(self): + return self._tag + + @classmethod + def decode_json_obj(cls, obj): + tag = obj + return cls(tag) + + def choose_tag(self, tokens, index, history): + return self._tag # ignore token and history + + def __repr__(self): + return '' % self._tag + + +@jsontags.register_tag +class NgramTagger(ContextTagger): + """ + A tagger that chooses a token's tag based on its word string and + on the preceding n word's tags. In particular, a tuple + (tags[i-n:i-1], words[i]) is looked up in a table, and the + corresponding tag is returned. N-gram taggers are typically + trained on a tagged corpus. + + Train a new NgramTagger using the given training data or + the supplied model. In particular, construct a new tagger + whose table maps from each context (tag[i-n:i-1], word[i]) + to the most frequent tag for that context. But exclude any + contexts that are already tagged perfectly by the backoff + tagger. + + :param train: A tagged corpus consisting of a list of tagged + sentences, where each sentence is a list of (word, tag) tuples. + :param backoff: A backoff tagger, to be used by the new + tagger if it encounters an unknown context. + :param cutoff: If the most likely tag for a context occurs + fewer than *cutoff* times, then exclude it from the + context-to-tag table for the new tagger. + """ + + json_tag = 'nltk.tag.sequential.NgramTagger' + + def __init__( + self, n, train=None, model=None, backoff=None, cutoff=0, verbose=False + ): + self._n = n + self._check_params(train, model) + + ContextTagger.__init__(self, model, backoff) + + if train: + self._train(train, cutoff, verbose) + + def encode_json_obj(self): + return self._n, self._context_to_tag, self.backoff + + @classmethod + def decode_json_obj(cls, obj): + _n, _context_to_tag, backoff = obj + return cls(_n, model=_context_to_tag, backoff=backoff) + + def context(self, tokens, index, history): + tag_context = tuple(history[max(0, index - self._n + 1) : index]) + return tag_context, tokens[index] + + +@jsontags.register_tag +class UnigramTagger(NgramTagger): + """ + Unigram Tagger + + The UnigramTagger finds the most likely tag for each word in a training + corpus, and then uses that information to assign tags to new tokens. + + >>> from nltk.corpus import brown + >>> from nltk.tag import UnigramTagger + >>> test_sent = brown.sents(categories='news')[0] + >>> unigram_tagger = UnigramTagger(brown.tagged_sents(categories='news')[:500]) + >>> for tok, tag in unigram_tagger.tag(test_sent): + ... print("(%s, %s), " % (tok, tag)) + (The, AT), (Fulton, NP-TL), (County, NN-TL), (Grand, JJ-TL), + (Jury, NN-TL), (said, VBD), (Friday, NR), (an, AT), + (investigation, NN), (of, IN), (Atlanta's, NP$), (recent, JJ), + (primary, NN), (election, NN), (produced, VBD), (``, ``), + (no, AT), (evidence, NN), ('', ''), (that, CS), (any, DTI), + (irregularities, NNS), (took, VBD), (place, NN), (., .), + + :param train: The corpus of training data, a list of tagged sentences + :type train: list(list(tuple(str, str))) + :param model: The tagger model + :type model: dict + :param backoff: Another tagger which this tagger will consult when it is + unable to tag a word + :type backoff: TaggerI + :param cutoff: The number of instances of training data the tagger must see + in order not to use the backoff tagger + :type cutoff: int + """ + + json_tag = 'nltk.tag.sequential.UnigramTagger' + + def __init__(self, train=None, model=None, backoff=None, cutoff=0, verbose=False): + NgramTagger.__init__(self, 1, train, model, backoff, cutoff, verbose) + + def encode_json_obj(self): + return self._context_to_tag, self.backoff + + @classmethod + def decode_json_obj(cls, obj): + _context_to_tag, backoff = obj + return cls(model=_context_to_tag, backoff=backoff) + + def context(self, tokens, index, history): + return tokens[index] + + +@jsontags.register_tag +class BigramTagger(NgramTagger): + """ + A tagger that chooses a token's tag based its word string and on + the preceding words' tag. In particular, a tuple consisting + of the previous tag and the word is looked up in a table, and + the corresponding tag is returned. + + :param train: The corpus of training data, a list of tagged sentences + :type train: list(list(tuple(str, str))) + :param model: The tagger model + :type model: dict + :param backoff: Another tagger which this tagger will consult when it is + unable to tag a word + :type backoff: TaggerI + :param cutoff: The number of instances of training data the tagger must see + in order not to use the backoff tagger + :type cutoff: int + """ + + json_tag = 'nltk.tag.sequential.BigramTagger' + + def __init__(self, train=None, model=None, backoff=None, cutoff=0, verbose=False): + NgramTagger.__init__(self, 2, train, model, backoff, cutoff, verbose) + + def encode_json_obj(self): + return self._context_to_tag, self.backoff + + @classmethod + def decode_json_obj(cls, obj): + _context_to_tag, backoff = obj + return cls(model=_context_to_tag, backoff=backoff) + + +@jsontags.register_tag +class TrigramTagger(NgramTagger): + """ + A tagger that chooses a token's tag based its word string and on + the preceding two words' tags. In particular, a tuple consisting + of the previous two tags and the word is looked up in a table, and + the corresponding tag is returned. + + :param train: The corpus of training data, a list of tagged sentences + :type train: list(list(tuple(str, str))) + :param model: The tagger model + :type model: dict + :param backoff: Another tagger which this tagger will consult when it is + unable to tag a word + :type backoff: TaggerI + :param cutoff: The number of instances of training data the tagger must see + in order not to use the backoff tagger + :type cutoff: int + """ + + json_tag = 'nltk.tag.sequential.TrigramTagger' + + def __init__(self, train=None, model=None, backoff=None, cutoff=0, verbose=False): + NgramTagger.__init__(self, 3, train, model, backoff, cutoff, verbose) + + def encode_json_obj(self): + return self._context_to_tag, self.backoff + + @classmethod + def decode_json_obj(cls, obj): + _context_to_tag, backoff = obj + return cls(model=_context_to_tag, backoff=backoff) + + +@jsontags.register_tag +class AffixTagger(ContextTagger): + """ + A tagger that chooses a token's tag based on a leading or trailing + substring of its word string. (It is important to note that these + substrings are not necessarily "true" morphological affixes). In + particular, a fixed-length substring of the word is looked up in a + table, and the corresponding tag is returned. Affix taggers are + typically constructed by training them on a tagged corpus. + + Construct a new affix tagger. + + :param affix_length: The length of the affixes that should be + considered during training and tagging. Use negative + numbers for suffixes. + :param min_stem_length: Any words whose length is less than + min_stem_length+abs(affix_length) will be assigned a + tag of None by this tagger. + """ + + json_tag = 'nltk.tag.sequential.AffixTagger' + + def __init__( + self, + train=None, + model=None, + affix_length=-3, + min_stem_length=2, + backoff=None, + cutoff=0, + verbose=False, + ): + + self._check_params(train, model) + + ContextTagger.__init__(self, model, backoff) + + self._affix_length = affix_length + self._min_word_length = min_stem_length + abs(affix_length) + + if train: + self._train(train, cutoff, verbose) + + def encode_json_obj(self): + return ( + self._affix_length, + self._min_word_length, + self._context_to_tag, + self.backoff, + ) + + @classmethod + def decode_json_obj(cls, obj): + _affix_length, _min_word_length, _context_to_tag, backoff = obj + return cls( + affix_length=_affix_length, + min_stem_length=_min_word_length - abs(_affix_length), + model=_context_to_tag, + backoff=backoff, + ) + + def context(self, tokens, index, history): + token = tokens[index] + if len(token) < self._min_word_length: + return None + elif self._affix_length > 0: + return token[: self._affix_length] + else: + return token[self._affix_length :] + + +@python_2_unicode_compatible +@jsontags.register_tag +class RegexpTagger(SequentialBackoffTagger): + """ + Regular Expression Tagger + + The RegexpTagger assigns tags to tokens by comparing their + word strings to a series of regular expressions. The following tagger + uses word suffixes to make guesses about the correct Brown Corpus part + of speech tag: + + >>> from nltk.corpus import brown + >>> from nltk.tag import RegexpTagger + >>> test_sent = brown.sents(categories='news')[0] + >>> regexp_tagger = RegexpTagger( + ... [(r'^-?[0-9]+(.[0-9]+)?$', 'CD'), # cardinal numbers + ... (r'(The|the|A|a|An|an)$', 'AT'), # articles + ... (r'.*able$', 'JJ'), # adjectives + ... (r'.*ness$', 'NN'), # nouns formed from adjectives + ... (r'.*ly$', 'RB'), # adverbs + ... (r'.*s$', 'NNS'), # plural nouns + ... (r'.*ing$', 'VBG'), # gerunds + ... (r'.*ed$', 'VBD'), # past tense verbs + ... (r'.*', 'NN') # nouns (default) + ... ]) + >>> regexp_tagger + + >>> regexp_tagger.tag(test_sent) + [('The', 'AT'), ('Fulton', 'NN'), ('County', 'NN'), ('Grand', 'NN'), ('Jury', 'NN'), + ('said', 'NN'), ('Friday', 'NN'), ('an', 'AT'), ('investigation', 'NN'), ('of', 'NN'), + ("Atlanta's", 'NNS'), ('recent', 'NN'), ('primary', 'NN'), ('election', 'NN'), + ('produced', 'VBD'), ('``', 'NN'), ('no', 'NN'), ('evidence', 'NN'), ("''", 'NN'), + ('that', 'NN'), ('any', 'NN'), ('irregularities', 'NNS'), ('took', 'NN'), + ('place', 'NN'), ('.', 'NN')] + + :type regexps: list(tuple(str, str)) + :param regexps: A list of ``(regexp, tag)`` pairs, each of + which indicates that a word matching ``regexp`` should + be tagged with ``tag``. The pairs will be evalutated in + order. If none of the regexps match a word, then the + optional backoff tagger is invoked, else it is + assigned the tag None. + """ + + json_tag = 'nltk.tag.sequential.RegexpTagger' + + def __init__(self, regexps, backoff=None): + """ + """ + SequentialBackoffTagger.__init__(self, backoff) + self._regexs = [(re.compile(regexp), tag) for regexp, tag in regexps] + + def encode_json_obj(self): + return [(regexp.patten, tag) for regexp, tag in self._regexs], self.backoff + + @classmethod + def decode_json_obj(cls, obj): + regexps, backoff = obj + self = cls(()) + self._regexs = [(re.compile(regexp), tag) for regexp, tag in regexps] + SequentialBackoffTagger.__init__(self, backoff) + return self + + def choose_tag(self, tokens, index, history): + for regexp, tag in self._regexs: + if re.match(regexp, tokens[index]): + return tag + return None + + def __repr__(self): + return '' % len(self._regexs) + + +@python_2_unicode_compatible +class ClassifierBasedTagger(SequentialBackoffTagger, FeaturesetTaggerI): + """ + A sequential tagger that uses a classifier to choose the tag for + each token in a sentence. The featureset input for the classifier + is generated by a feature detector function:: + + feature_detector(tokens, index, history) -> featureset + + Where tokens is the list of unlabeled tokens in the sentence; + index is the index of the token for which feature detection + should be performed; and history is list of the tags for all + tokens before index. + + Construct a new classifier-based sequential tagger. + + :param feature_detector: A function used to generate the + featureset input for the classifier:: + feature_detector(tokens, index, history) -> featureset + + :param train: A tagged corpus consisting of a list of tagged + sentences, where each sentence is a list of (word, tag) tuples. + + :param backoff: A backoff tagger, to be used by the new tagger + if it encounters an unknown context. + + :param classifier_builder: A function used to train a new + classifier based on the data in *train*. It should take + one argument, a list of labeled featuresets (i.e., + (featureset, label) tuples). + + :param classifier: The classifier that should be used by the + tagger. This is only useful if you want to manually + construct the classifier; normally, you would use *train* + instead. + + :param backoff: A backoff tagger, used if this tagger is + unable to determine a tag for a given token. + + :param cutoff_prob: If specified, then this tagger will fall + back on its backoff tagger if the probability of the most + likely tag is less than *cutoff_prob*. + """ + + def __init__( + self, + feature_detector=None, + train=None, + classifier_builder=NaiveBayesClassifier.train, + classifier=None, + backoff=None, + cutoff_prob=None, + verbose=False, + ): + self._check_params(train, classifier) + + SequentialBackoffTagger.__init__(self, backoff) + + if (train and classifier) or (not train and not classifier): + raise ValueError( + 'Must specify either training data or ' 'trained classifier.' + ) + + if feature_detector is not None: + self._feature_detector = feature_detector + # The feature detector function, used to generate a featureset + # or each token: feature_detector(tokens, index, history) -> featureset + + self._cutoff_prob = cutoff_prob + """Cutoff probability for tagging -- if the probability of the + most likely tag is less than this, then use backoff.""" + + self._classifier = classifier + """The classifier used to choose a tag for each token.""" + + if train: + self._train(train, classifier_builder, verbose) + + def choose_tag(self, tokens, index, history): + # Use our feature detector to get the featureset. + featureset = self.feature_detector(tokens, index, history) + + # Use the classifier to pick a tag. If a cutoff probability + # was specified, then check that the tag's probability is + # higher than that cutoff first; otherwise, return None. + if self._cutoff_prob is None: + return self._classifier.classify(featureset) + + pdist = self._classifier.prob_classify(featureset) + tag = pdist.max() + return tag if pdist.prob(tag) >= self._cutoff_prob else None + + def _train(self, tagged_corpus, classifier_builder, verbose): + """ + Build a new classifier, based on the given training data + *tagged_corpus*. + """ + + classifier_corpus = [] + if verbose: + print('Constructing training corpus for classifier.') + + for sentence in tagged_corpus: + history = [] + untagged_sentence, tags = zip(*sentence) + for index in range(len(sentence)): + featureset = self.feature_detector(untagged_sentence, index, history) + classifier_corpus.append((featureset, tags[index])) + history.append(tags[index]) + + if verbose: + print('Training classifier (%d instances)' % len(classifier_corpus)) + self._classifier = classifier_builder(classifier_corpus) + + def __repr__(self): + return '' % self._classifier + + def feature_detector(self, tokens, index, history): + """ + Return the feature detector that this tagger uses to generate + featuresets for its classifier. The feature detector is a + function with the signature:: + + feature_detector(tokens, index, history) -> featureset + + See ``classifier()`` + """ + return self._feature_detector(tokens, index, history) + + def classifier(self): + """ + Return the classifier that this tagger uses to choose a tag + for each word in a sentence. The input for this classifier is + generated using this tagger's feature detector. + See ``feature_detector()`` + """ + return self._classifier + + +class ClassifierBasedPOSTagger(ClassifierBasedTagger): + """ + A classifier based part of speech tagger. + """ + + def feature_detector(self, tokens, index, history): + word = tokens[index] + if index == 0: + prevword = prevprevword = None + prevtag = prevprevtag = None + elif index == 1: + prevword = tokens[index - 1].lower() + prevprevword = None + prevtag = history[index - 1] + prevprevtag = None + else: + prevword = tokens[index - 1].lower() + prevprevword = tokens[index - 2].lower() + prevtag = history[index - 1] + prevprevtag = history[index - 2] + + if re.match('[0-9]+(\.[0-9]*)?|[0-9]*\.[0-9]+$', word): + shape = 'number' + elif re.match('\W+$', word): + shape = 'punct' + elif re.match('[A-Z][a-z]+$', word): + shape = 'upcase' + elif re.match('[a-z]+$', word): + shape = 'downcase' + elif re.match('\w+$', word): + shape = 'mixedcase' + else: + shape = 'other' + + features = { + 'prevtag': prevtag, + 'prevprevtag': prevprevtag, + 'word': word, + 'word.lower': word.lower(), + 'suffix3': word.lower()[-3:], + 'suffix2': word.lower()[-2:], + 'suffix1': word.lower()[-1:], + 'prevprevword': prevprevword, + 'prevword': prevword, + 'prevtag+word': '%s+%s' % (prevtag, word.lower()), + 'prevprevtag+word': '%s+%s' % (prevprevtag, word.lower()), + 'prevword+word': '%s+%s' % (prevword, word.lower()), + 'shape': shape, + } + return features diff --git a/venv.bak/lib/python3.7/site-packages/nltk/tag/stanford.py b/venv.bak/lib/python3.7/site-packages/nltk/tag/stanford.py new file mode 100644 index 0000000..9916386 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/tag/stanford.py @@ -0,0 +1,249 @@ +# -*- coding: utf-8 -*- +# Natural Language Toolkit: Interface to the Stanford Part-of-speech and Named-Entity Taggers +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Nitin Madnani +# Rami Al-Rfou' +# URL: +# For license information, see LICENSE.TXT + +""" +A module for interfacing with the Stanford taggers. + +Tagger models need to be downloaded from https://nlp.stanford.edu/software +and the STANFORD_MODELS environment variable set (a colon-separated +list of paths). + +For more details see the documentation for StanfordPOSTagger and StanfordNERTagger. +""" + +from abc import abstractmethod +import os +import tempfile +from subprocess import PIPE +import warnings + +from six import text_type + +from nltk.internals import find_file, find_jar, config_java, java, _java_options +from nltk.tag.api import TaggerI + +_stanford_url = 'https://nlp.stanford.edu/software' + + +class StanfordTagger(TaggerI): + """ + An interface to Stanford taggers. Subclasses must define: + + - ``_cmd`` property: A property that returns the command that will be + executed. + - ``_SEPARATOR``: Class constant that represents that character that + is used to separate the tokens from their tags. + - ``_JAR`` file: Class constant that represents the jar file name. + """ + + _SEPARATOR = '' + _JAR = '' + + def __init__( + self, + model_filename, + path_to_jar=None, + encoding='utf8', + verbose=False, + java_options='-mx1000m', + ): + # Raise deprecation warning. + warnings.warn( + str( + "\nThe StanfordTokenizer will " + "be deprecated in version 3.2.6.\n" + "Please use \033[91mnltk.parse.corenlp.CoreNLPParser\033[0m instead." + ), + DeprecationWarning, + stacklevel=2, + ) + + if not self._JAR: + warnings.warn( + 'The StanfordTagger class is not meant to be ' + 'instantiated directly. Did you mean ' + 'StanfordPOSTagger or StanfordNERTagger?' + ) + self._stanford_jar = find_jar( + self._JAR, path_to_jar, searchpath=(), url=_stanford_url, verbose=verbose + ) + + self._stanford_model = find_file( + model_filename, env_vars=('STANFORD_MODELS',), verbose=verbose + ) + + self._encoding = encoding + self.java_options = java_options + + @property + @abstractmethod + def _cmd(self): + """ + A property that returns the command that will be executed. + """ + + def tag(self, tokens): + # This function should return list of tuple rather than list of list + return sum(self.tag_sents([tokens]), []) + + def tag_sents(self, sentences): + encoding = self._encoding + default_options = ' '.join(_java_options) + config_java(options=self.java_options, verbose=False) + + # Create a temporary input file + _input_fh, self._input_file_path = tempfile.mkstemp(text=True) + + cmd = list(self._cmd) + cmd.extend(['-encoding', encoding]) + + # Write the actual sentences to the temporary input file + _input_fh = os.fdopen(_input_fh, 'wb') + _input = '\n'.join((' '.join(x) for x in sentences)) + if isinstance(_input, text_type) and encoding: + _input = _input.encode(encoding) + _input_fh.write(_input) + _input_fh.close() + + # Run the tagger and get the output + stanpos_output, _stderr = java( + cmd, classpath=self._stanford_jar, stdout=PIPE, stderr=PIPE + ) + stanpos_output = stanpos_output.decode(encoding) + + # Delete the temporary file + os.unlink(self._input_file_path) + + # Return java configurations to their default values + config_java(options=default_options, verbose=False) + + return self.parse_output(stanpos_output, sentences) + + def parse_output(self, text, sentences=None): + # Output the tagged sentences + tagged_sentences = [] + for tagged_sentence in text.strip().split("\n"): + sentence = [] + for tagged_word in tagged_sentence.strip().split(): + word_tags = tagged_word.strip().split(self._SEPARATOR) + sentence.append((''.join(word_tags[:-1]), word_tags[-1])) + tagged_sentences.append(sentence) + return tagged_sentences + + +class StanfordPOSTagger(StanfordTagger): + """ + A class for pos tagging with Stanford Tagger. The input is the paths to: + - a model trained on training data + - (optionally) the path to the stanford tagger jar file. If not specified here, + then this jar file must be specified in the CLASSPATH envinroment variable. + - (optionally) the encoding of the training data (default: UTF-8) + + Example: + + >>> from nltk.tag import StanfordPOSTagger + >>> st = StanfordPOSTagger('english-bidirectional-distsim.tagger') + >>> st.tag('What is the airspeed of an unladen swallow ?'.split()) + [('What', 'WP'), ('is', 'VBZ'), ('the', 'DT'), ('airspeed', 'NN'), ('of', 'IN'), ('an', 'DT'), ('unladen', 'JJ'), ('swallow', 'VB'), ('?', '.')] + """ + + _SEPARATOR = '_' + _JAR = 'stanford-postagger.jar' + + def __init__(self, *args, **kwargs): + super(StanfordPOSTagger, self).__init__(*args, **kwargs) + + @property + def _cmd(self): + return [ + 'edu.stanford.nlp.tagger.maxent.MaxentTagger', + '-model', + self._stanford_model, + '-textFile', + self._input_file_path, + '-tokenize', + 'false', + '-outputFormatOptions', + 'keepEmptySentences', + ] + + +class StanfordNERTagger(StanfordTagger): + """ + A class for Named-Entity Tagging with Stanford Tagger. The input is the paths to: + + - a model trained on training data + - (optionally) the path to the stanford tagger jar file. If not specified here, + then this jar file must be specified in the CLASSPATH envinroment variable. + - (optionally) the encoding of the training data (default: UTF-8) + + Example: + + >>> from nltk.tag import StanfordNERTagger + >>> st = StanfordNERTagger('english.all.3class.distsim.crf.ser.gz') # doctest: +SKIP + >>> st.tag('Rami Eid is studying at Stony Brook University in NY'.split()) # doctest: +SKIP + [('Rami', 'PERSON'), ('Eid', 'PERSON'), ('is', 'O'), ('studying', 'O'), + ('at', 'O'), ('Stony', 'ORGANIZATION'), ('Brook', 'ORGANIZATION'), + ('University', 'ORGANIZATION'), ('in', 'O'), ('NY', 'LOCATION')] + """ + + _SEPARATOR = '/' + _JAR = 'stanford-ner.jar' + _FORMAT = 'slashTags' + + def __init__(self, *args, **kwargs): + super(StanfordNERTagger, self).__init__(*args, **kwargs) + + @property + def _cmd(self): + # Adding -tokenizerFactory edu.stanford.nlp.process.WhitespaceTokenizer -tokenizerOptions tokenizeNLs=false for not using stanford Tokenizer + return [ + 'edu.stanford.nlp.ie.crf.CRFClassifier', + '-loadClassifier', + self._stanford_model, + '-textFile', + self._input_file_path, + '-outputFormat', + self._FORMAT, + '-tokenizerFactory', + 'edu.stanford.nlp.process.WhitespaceTokenizer', + '-tokenizerOptions', + '\"tokenizeNLs=false\"', + ] + + def parse_output(self, text, sentences): + if self._FORMAT == 'slashTags': + # Joint together to a big list + tagged_sentences = [] + for tagged_sentence in text.strip().split("\n"): + for tagged_word in tagged_sentence.strip().split(): + word_tags = tagged_word.strip().split(self._SEPARATOR) + tagged_sentences.append((''.join(word_tags[:-1]), word_tags[-1])) + + # Separate it according to the input + result = [] + start = 0 + for sent in sentences: + result.append(tagged_sentences[start : start + len(sent)]) + start += len(sent) + return result + + raise NotImplementedError + + +def setup_module(module): + from nose import SkipTest + + try: + StanfordPOSTagger('english-bidirectional-distsim.tagger') + except LookupError: + raise SkipTest( + 'Doctests from nltk.tag.stanford are skipped because one \ + of the stanford jars cannot be found.' + ) diff --git a/venv.bak/lib/python3.7/site-packages/nltk/tag/tnt.py b/venv.bak/lib/python3.7/site-packages/nltk/tag/tnt.py new file mode 100644 index 0000000..4837e11 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/tag/tnt.py @@ -0,0 +1,588 @@ +# Natural Language Toolkit: TnT Tagger +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Sam Huston +# +# URL: +# For license information, see LICENSE.TXT + +''' +Implementation of 'TnT - A Statisical Part of Speech Tagger' +by Thorsten Brants + +http://acl.ldc.upenn.edu/A/A00/A00-1031.pdf +''' +from __future__ import print_function, division +from math import log + +from operator import itemgetter + +from nltk.probability import FreqDist, ConditionalFreqDist +from nltk.tag.api import TaggerI + + +class TnT(TaggerI): + ''' + TnT - Statistical POS tagger + + IMPORTANT NOTES: + + * DOES NOT AUTOMATICALLY DEAL WITH UNSEEN WORDS + + - It is possible to provide an untrained POS tagger to + create tags for unknown words, see __init__ function + + * SHOULD BE USED WITH SENTENCE-DELIMITED INPUT + + - Due to the nature of this tagger, it works best when + trained over sentence delimited input. + - However it still produces good results if the training + data and testing data are separated on all punctuation eg: [,.?!] + - Input for training is expected to be a list of sentences + where each sentence is a list of (word, tag) tuples + - Input for tag function is a single sentence + Input for tagdata function is a list of sentences + Output is of a similar form + + * Function provided to process text that is unsegmented + + - Please see basic_sent_chop() + + + TnT uses a second order Markov model to produce tags for + a sequence of input, specifically: + + argmax [Proj(P(t_i|t_i-1,t_i-2)P(w_i|t_i))] P(t_T+1 | t_T) + + IE: the maximum projection of a set of probabilities + + The set of possible tags for a given word is derived + from the training data. It is the set of all tags + that exact word has been assigned. + + To speed up and get more precision, we can use log addition + to instead multiplication, specifically: + + argmax [Sigma(log(P(t_i|t_i-1,t_i-2))+log(P(w_i|t_i)))] + + log(P(t_T+1|t_T)) + + The probability of a tag for a given word is the linear + interpolation of 3 markov models; a zero-order, first-order, + and a second order model. + + P(t_i| t_i-1, t_i-2) = l1*P(t_i) + l2*P(t_i| t_i-1) + + l3*P(t_i| t_i-1, t_i-2) + + A beam search is used to limit the memory usage of the algorithm. + The degree of the beam can be changed using N in the initialization. + N represents the maximum number of possible solutions to maintain + while tagging. + + It is possible to differentiate the tags which are assigned to + capitalized words. However this does not result in a significant + gain in the accuracy of the results. + ''' + + def __init__(self, unk=None, Trained=False, N=1000, C=False): + ''' + Construct a TnT statistical tagger. Tagger must be trained + before being used to tag input. + + :param unk: instance of a POS tagger, conforms to TaggerI + :type unk:(TaggerI) + :param Trained: Indication that the POS tagger is trained or not + :type Trained: boolean + :param N: Beam search degree (see above) + :type N:(int) + :param C: Capitalization flag + :type C: boolean + + Initializer, creates frequency distributions to be used + for tagging + + _lx values represent the portion of the tri/bi/uni taggers + to be used to calculate the probability + + N value is the number of possible solutions to maintain + while tagging. A good value for this is 1000 + + C is a boolean value which specifies to use or + not use the Capitalization of the word as additional + information for tagging. + NOTE: using capitalization may not increase the accuracy + of the tagger + ''' + + self._uni = FreqDist() + self._bi = ConditionalFreqDist() + self._tri = ConditionalFreqDist() + self._wd = ConditionalFreqDist() + self._eos = ConditionalFreqDist() + self._l1 = 0.0 + self._l2 = 0.0 + self._l3 = 0.0 + self._N = N + self._C = C + self._T = Trained + + self._unk = unk + + # statistical tools (ignore or delete me) + self.unknown = 0 + self.known = 0 + + def train(self, data): + ''' + Uses a set of tagged data to train the tagger. + If an unknown word tagger is specified, + it is trained on the same data. + + :param data: List of lists of (word, tag) tuples + :type data: tuple(str) + ''' + + # Ensure that local C flag is initialized before use + C = False + + if self._unk is not None and self._T == False: + self._unk.train(data) + + for sent in data: + history = [('BOS', False), ('BOS', False)] + for w, t in sent: + + # if capitalization is requested, + # and the word begins with a capital + # set local flag C to True + if self._C and w[0].isupper(): + C = True + + self._wd[w][t] += 1 + self._uni[(t, C)] += 1 + self._bi[history[1]][(t, C)] += 1 + self._tri[tuple(history)][(t, C)] += 1 + + history.append((t, C)) + history.pop(0) + + # set local flag C to false for the next word + C = False + + self._eos[t]['EOS'] += 1 + + # compute lambda values from the trained frequency distributions + self._compute_lambda() + + # (debugging -- ignore or delete me) + # print "lambdas" + # print i, self._l1, i, self._l2, i, self._l3 + + def _compute_lambda(self): + ''' + creates lambda values based upon training data + + NOTE: no need to explicitly reference C, + it is contained within the tag variable :: tag == (tag,C) + + for each tag trigram (t1, t2, t3) + depending on the maximum value of + - f(t1,t2,t3)-1 / f(t1,t2)-1 + - f(t2,t3)-1 / f(t2)-1 + - f(t3)-1 / N-1 + + increment l3,l2, or l1 by f(t1,t2,t3) + + ISSUES -- Resolutions: + if 2 values are equal, increment both lambda values + by (f(t1,t2,t3) / 2) + ''' + + # temporary lambda variables + tl1 = 0.0 + tl2 = 0.0 + tl3 = 0.0 + + # for each t1,t2 in system + for history in self._tri.conditions(): + (h1, h2) = history + + # for each t3 given t1,t2 in system + # (NOTE: tag actually represents (tag,C)) + # However no effect within this function + for tag in self._tri[history].keys(): + + # if there has only been 1 occurrence of this tag in the data + # then ignore this trigram. + if self._uni[tag] == 1: + continue + + # safe_div provides a safe floating point division + # it returns -1 if the denominator is 0 + c3 = self._safe_div( + (self._tri[history][tag] - 1), (self._tri[history].N() - 1) + ) + c2 = self._safe_div((self._bi[h2][tag] - 1), (self._bi[h2].N() - 1)) + c1 = self._safe_div((self._uni[tag] - 1), (self._uni.N() - 1)) + + # if c1 is the maximum value: + if (c1 > c3) and (c1 > c2): + tl1 += self._tri[history][tag] + + # if c2 is the maximum value + elif (c2 > c3) and (c2 > c1): + tl2 += self._tri[history][tag] + + # if c3 is the maximum value + elif (c3 > c2) and (c3 > c1): + tl3 += self._tri[history][tag] + + # if c3, and c2 are equal and larger than c1 + elif (c3 == c2) and (c3 > c1): + tl2 += self._tri[history][tag] / 2.0 + tl3 += self._tri[history][tag] / 2.0 + + # if c1, and c2 are equal and larger than c3 + # this might be a dumb thing to do....(not sure yet) + elif (c2 == c1) and (c1 > c3): + tl1 += self._tri[history][tag] / 2.0 + tl2 += self._tri[history][tag] / 2.0 + + # otherwise there might be a problem + # eg: all values = 0 + else: + # print "Problem", c1, c2 ,c3 + pass + + # Lambda normalisation: + # ensures that l1+l2+l3 = 1 + self._l1 = tl1 / (tl1 + tl2 + tl3) + self._l2 = tl2 / (tl1 + tl2 + tl3) + self._l3 = tl3 / (tl1 + tl2 + tl3) + + def _safe_div(self, v1, v2): + ''' + Safe floating point division function, does not allow division by 0 + returns -1 if the denominator is 0 + ''' + if v2 == 0: + return -1 + else: + return v1 / v2 + + def tagdata(self, data): + ''' + Tags each sentence in a list of sentences + + :param data:list of list of words + :type data: [[string,],] + :return: list of list of (word, tag) tuples + + Invokes tag(sent) function for each sentence + compiles the results into a list of tagged sentences + each tagged sentence is a list of (word, tag) tuples + ''' + res = [] + for sent in data: + res1 = self.tag(sent) + res.append(res1) + return res + + def tag(self, data): + ''' + Tags a single sentence + + :param data: list of words + :type data: [string,] + + :return: [(word, tag),] + + Calls recursive function '_tagword' + to produce a list of tags + + Associates the sequence of returned tags + with the correct words in the input sequence + + returns a list of (word, tag) tuples + ''' + + current_state = [(['BOS', 'BOS'], 0.0)] + + sent = list(data) + + tags = self._tagword(sent, current_state) + + res = [] + for i in range(len(sent)): + # unpack and discard the C flags + (t, C) = tags[i + 2] + res.append((sent[i], t)) + + return res + + def _tagword(self, sent, current_states): + ''' + :param sent : List of words remaining in the sentence + :type sent : [word,] + :param current_states : List of possible tag combinations for + the sentence so far, and the log probability + associated with each tag combination + :type current_states : [([tag, ], logprob), ] + + Tags the first word in the sentence and + recursively tags the reminder of sentence + + Uses formula specified above to calculate the probability + of a particular tag + ''' + + # if this word marks the end of the sentance, + # return the most probable tag + if sent == []: + (h, logp) = current_states[0] + return h + + # otherwise there are more words to be tagged + word = sent[0] + sent = sent[1:] + new_states = [] + + # if the Capitalisation is requested, + # initalise the flag for this word + C = False + if self._C and word[0].isupper(): + C = True + + # if word is known + # compute the set of possible tags + # and their associated log probabilities + if word in self._wd: + self.known += 1 + + for (history, curr_sent_logprob) in current_states: + logprobs = [] + + for t in self._wd[word].keys(): + tC = (t, C) + p_uni = self._uni.freq(tC) + p_bi = self._bi[history[-1]].freq(tC) + p_tri = self._tri[tuple(history[-2:])].freq(tC) + p_wd = self._wd[word][t] / self._uni[tC] + p = self._l1 * p_uni + self._l2 * p_bi + self._l3 * p_tri + p2 = log(p, 2) + log(p_wd, 2) + + # compute the result of appending each tag to this history + new_states.append((history + [tC], curr_sent_logprob + p2)) + + # otherwise a new word, set of possible tags is unknown + else: + self.unknown += 1 + + # since a set of possible tags, + # and the probability of each specific tag + # can not be returned from most classifiers: + # specify that any unknown words are tagged with certainty + p = 1 + + # if no unknown word tagger has been specified + # then use the tag 'Unk' + if self._unk is None: + tag = ('Unk', C) + + # otherwise apply the unknown word tagger + else: + [(_w, t)] = list(self._unk.tag([word])) + tag = (t, C) + + for (history, logprob) in current_states: + history.append(tag) + + new_states = current_states + + # now have computed a set of possible new_states + + # sort states by log prob + # set is now ordered greatest to least log probability + new_states.sort(reverse=True, key=itemgetter(1)) + + # del everything after N (threshold) + # this is the beam search cut + if len(new_states) > self._N: + new_states = new_states[: self._N] + + # compute the tags for the rest of the sentence + # return the best list of tags for the sentence + return self._tagword(sent, new_states) + + +######################################## +# helper function -- basic sentence tokenizer +######################################## + + +def basic_sent_chop(data, raw=True): + ''' + Basic method for tokenizing input into sentences + for this tagger: + + :param data: list of tokens (words or (word, tag) tuples) + :type data: str or tuple(str, str) + :param raw: boolean flag marking the input data + as a list of words or a list of tagged words + :type raw: bool + :return: list of sentences + sentences are a list of tokens + tokens are the same as the input + + Function takes a list of tokens and separates the tokens into lists + where each list represents a sentence fragment + This function can separate both tagged and raw sequences into + basic sentences. + + Sentence markers are the set of [,.!?] + + This is a simple method which enhances the performance of the TnT + tagger. Better sentence tokenization will further enhance the results. + ''' + + new_data = [] + curr_sent = [] + sent_mark = [',', '.', '?', '!'] + + if raw: + for word in data: + if word in sent_mark: + curr_sent.append(word) + new_data.append(curr_sent) + curr_sent = [] + else: + curr_sent.append(word) + + else: + for (word, tag) in data: + if word in sent_mark: + curr_sent.append((word, tag)) + new_data.append(curr_sent) + curr_sent = [] + else: + curr_sent.append((word, tag)) + return new_data + + +def demo(): + from nltk.corpus import brown + + sents = list(brown.tagged_sents()) + test = list(brown.sents()) + + # create and train the tagger + tagger = TnT() + tagger.train(sents[200:1000]) + + # tag some data + tagged_data = tagger.tagdata(test[100:120]) + + # print results + for j in range(len(tagged_data)): + s = tagged_data[j] + t = sents[j + 100] + for i in range(len(s)): + print(s[i], '--', t[i]) + print() + + +def demo2(): + from nltk.corpus import treebank + + d = list(treebank.tagged_sents()) + + t = TnT(N=1000, C=False) + s = TnT(N=1000, C=True) + t.train(d[(11) * 100 :]) + s.train(d[(11) * 100 :]) + + for i in range(10): + tacc = t.evaluate(d[i * 100 : ((i + 1) * 100)]) + tp_un = t.unknown / (t.known + t.unknown) + tp_kn = t.known / (t.known + t.unknown) + t.unknown = 0 + t.known = 0 + + print('Capitalization off:') + print('Accuracy:', tacc) + print('Percentage known:', tp_kn) + print('Percentage unknown:', tp_un) + print('Accuracy over known words:', (tacc / tp_kn)) + + sacc = s.evaluate(d[i * 100 : ((i + 1) * 100)]) + sp_un = s.unknown / (s.known + s.unknown) + sp_kn = s.known / (s.known + s.unknown) + s.unknown = 0 + s.known = 0 + + print('Capitalization on:') + print('Accuracy:', sacc) + print('Percentage known:', sp_kn) + print('Percentage unknown:', sp_un) + print('Accuracy over known words:', (sacc / sp_kn)) + + +def demo3(): + from nltk.corpus import treebank, brown + + d = list(treebank.tagged_sents()) + e = list(brown.tagged_sents()) + + d = d[:1000] + e = e[:1000] + + d10 = int(len(d) * 0.1) + e10 = int(len(e) * 0.1) + + tknacc = 0 + sknacc = 0 + tallacc = 0 + sallacc = 0 + tknown = 0 + sknown = 0 + + for i in range(10): + + t = TnT(N=1000, C=False) + s = TnT(N=1000, C=False) + + dtest = d[(i * d10) : ((i + 1) * d10)] + etest = e[(i * e10) : ((i + 1) * e10)] + + dtrain = d[: (i * d10)] + d[((i + 1) * d10) :] + etrain = e[: (i * e10)] + e[((i + 1) * e10) :] + + t.train(dtrain) + s.train(etrain) + + tacc = t.evaluate(dtest) + tp_un = t.unknown / (t.known + t.unknown) + tp_kn = t.known / (t.known + t.unknown) + tknown += tp_kn + t.unknown = 0 + t.known = 0 + + sacc = s.evaluate(etest) + sp_un = s.unknown / (s.known + s.unknown) + sp_kn = s.known / (s.known + s.unknown) + sknown += sp_kn + s.unknown = 0 + s.known = 0 + + tknacc += tacc / tp_kn + sknacc += sacc / tp_kn + tallacc += tacc + sallacc += sacc + + # print i+1, (tacc / tp_kn), i+1, (sacc / tp_kn), i+1, tacc, i+1, sacc + + print("brown: acc over words known:", 10 * tknacc) + print(" : overall accuracy:", 10 * tallacc) + print(" : words known:", 10 * tknown) + print("treebank: acc over words known:", 10 * sknacc) + print(" : overall accuracy:", 10 * sallacc) + print(" : words known:", 10 * sknown) diff --git a/venv.bak/lib/python3.7/site-packages/nltk/tag/util.py b/venv.bak/lib/python3.7/site-packages/nltk/tag/util.py new file mode 100644 index 0000000..2a397d0 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/tag/util.py @@ -0,0 +1,72 @@ +# Natural Language Toolkit: Tagger Utilities +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Edward Loper +# Steven Bird +# URL: +# For license information, see LICENSE.TXT + + +def str2tuple(s, sep='/'): + """ + Given the string representation of a tagged token, return the + corresponding tuple representation. The rightmost occurrence of + *sep* in *s* will be used to divide *s* into a word string and + a tag string. If *sep* does not occur in *s*, return (s, None). + + >>> from nltk.tag.util import str2tuple + >>> str2tuple('fly/NN') + ('fly', 'NN') + + :type s: str + :param s: The string representation of a tagged token. + :type sep: str + :param sep: The separator string used to separate word strings + from tags. + """ + loc = s.rfind(sep) + if loc >= 0: + return (s[:loc], s[loc + len(sep) :].upper()) + else: + return (s, None) + + +def tuple2str(tagged_token, sep='/'): + """ + Given the tuple representation of a tagged token, return the + corresponding string representation. This representation is + formed by concatenating the token's word string, followed by the + separator, followed by the token's tag. (If the tag is None, + then just return the bare word string.) + + >>> from nltk.tag.util import tuple2str + >>> tagged_token = ('fly', 'NN') + >>> tuple2str(tagged_token) + 'fly/NN' + + :type tagged_token: tuple(str, str) + :param tagged_token: The tuple representation of a tagged token. + :type sep: str + :param sep: The separator string used to separate word strings + from tags. + """ + word, tag = tagged_token + if tag is None: + return word + else: + assert sep not in tag, 'tag may not contain sep!' + return '%s%s%s' % (word, sep, tag) + + +def untag(tagged_sentence): + """ + Given a tagged sentence, return an untagged version of that + sentence. I.e., return a list containing the first element + of each tuple in *tagged_sentence*. + + >>> from nltk.tag.util import untag + >>> untag([('John', 'NNP'), ('saw', 'VBD'), ('Mary', 'NNP')]) + ['John', 'saw', 'Mary'] + + """ + return [w for (w, t) in tagged_sentence] diff --git a/venv.bak/lib/python3.7/site-packages/nltk/tbl/__init__.py b/venv.bak/lib/python3.7/site-packages/nltk/tbl/__init__.py new file mode 100644 index 0000000..5298a5a --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/tbl/__init__.py @@ -0,0 +1,30 @@ +# -*- coding: utf-8 -*- +# Natural Language Toolkit: Transformation-based learning +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Marcus Uneson +# based on previous (nltk2) version by +# Christopher Maloof, Edward Loper, Steven Bird +# URL: +# For license information, see LICENSE.TXT + +""" +Transformation Based Learning + +A general purpose package for Transformation Based Learning, +currently used by nltk.tag.BrillTagger. +""" + +from nltk.tbl.template import Template + +# API: Template(...), Template.expand(...) + +from nltk.tbl.feature import Feature + +# API: Feature(...), Feature.expand(...) + +from nltk.tbl.rule import Rule + +# API: Rule.format(...), Rule.templatetid + +from nltk.tbl.erroranalysis import error_list diff --git a/venv.bak/lib/python3.7/site-packages/nltk/tbl/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/tbl/__pycache__/__init__.cpython-37.pyc new file mode 100644 index 0000000..ba22266 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/tbl/__pycache__/__init__.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/tbl/__pycache__/api.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/tbl/__pycache__/api.cpython-37.pyc new file mode 100644 index 0000000..133fc7d Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/tbl/__pycache__/api.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/tbl/__pycache__/demo.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/tbl/__pycache__/demo.cpython-37.pyc new file mode 100644 index 0000000..b677d12 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/tbl/__pycache__/demo.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/tbl/__pycache__/erroranalysis.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/tbl/__pycache__/erroranalysis.cpython-37.pyc new file mode 100644 index 0000000..248ad4e Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/tbl/__pycache__/erroranalysis.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/tbl/__pycache__/feature.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/tbl/__pycache__/feature.cpython-37.pyc new file mode 100644 index 0000000..b8e9fbd Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/tbl/__pycache__/feature.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/tbl/__pycache__/rule.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/tbl/__pycache__/rule.cpython-37.pyc new file mode 100644 index 0000000..827907a Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/tbl/__pycache__/rule.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/tbl/__pycache__/template.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/tbl/__pycache__/template.cpython-37.pyc new file mode 100644 index 0000000..0ae5bd5 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/tbl/__pycache__/template.cpython-37.pyc differ diff --git a/venv/lib/python3.7/site-packages/numpy/compat/tests/__init__.py b/venv.bak/lib/python3.7/site-packages/nltk/tbl/api.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/compat/tests/__init__.py rename to venv.bak/lib/python3.7/site-packages/nltk/tbl/api.py diff --git a/venv.bak/lib/python3.7/site-packages/nltk/tbl/demo.py b/venv.bak/lib/python3.7/site-packages/nltk/tbl/demo.py new file mode 100644 index 0000000..28642ae --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/tbl/demo.py @@ -0,0 +1,424 @@ +# -*- coding: utf-8 -*- +# Natural Language Toolkit: Transformation-based learning +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Marcus Uneson +# based on previous (nltk2) version by +# Christopher Maloof, Edward Loper, Steven Bird +# URL: +# For license information, see LICENSE.TXT + +from __future__ import print_function, absolute_import, division +import os +import pickle + +import random +import time + +from nltk.corpus import treebank + +from nltk.tbl import error_list, Template +from nltk.tag.brill import Word, Pos +from nltk.tag import BrillTaggerTrainer, RegexpTagger, UnigramTagger + + +def demo(): + """ + Run a demo with defaults. See source comments for details, + or docstrings of any of the more specific demo_* functions. + """ + postag() + + +def demo_repr_rule_format(): + """ + Exemplify repr(Rule) (see also str(Rule) and Rule.format("verbose")) + """ + postag(ruleformat="repr") + + +def demo_str_rule_format(): + """ + Exemplify repr(Rule) (see also str(Rule) and Rule.format("verbose")) + """ + postag(ruleformat="str") + + +def demo_verbose_rule_format(): + """ + Exemplify Rule.format("verbose") + """ + postag(ruleformat="verbose") + + +def demo_multiposition_feature(): + """ + The feature/s of a template takes a list of positions + relative to the current word where the feature should be + looked for, conceptually joined by logical OR. For instance, + Pos([-1, 1]), given a value V, will hold whenever V is found + one step to the left and/or one step to the right. + + For contiguous ranges, a 2-arg form giving inclusive end + points can also be used: Pos(-3, -1) is the same as the arg + below. + """ + postag(templates=[Template(Pos([-3, -2, -1]))]) + + +def demo_multifeature_template(): + """ + Templates can have more than a single feature. + """ + postag(templates=[Template(Word([0]), Pos([-2, -1]))]) + + +def demo_template_statistics(): + """ + Show aggregate statistics per template. Little used templates are + candidates for deletion, much used templates may possibly be refined. + + Deleting unused templates is mostly about saving time and/or space: + training is basically O(T) in the number of templates T + (also in terms of memory usage, which often will be the limiting factor). + """ + postag(incremental_stats=True, template_stats=True) + + +def demo_generated_templates(): + """ + Template.expand and Feature.expand are class methods facilitating + generating large amounts of templates. See their documentation for + details. + + Note: training with 500 templates can easily fill all available + even on relatively small corpora + """ + wordtpls = Word.expand([-1, 0, 1], [1, 2], excludezero=False) + tagtpls = Pos.expand([-2, -1, 0, 1], [1, 2], excludezero=True) + templates = list(Template.expand([wordtpls, tagtpls], combinations=(1, 3))) + print( + "Generated {0} templates for transformation-based learning".format( + len(templates) + ) + ) + postag(templates=templates, incremental_stats=True, template_stats=True) + + +def demo_learning_curve(): + """ + Plot a learning curve -- the contribution on tagging accuracy of + the individual rules. + Note: requires matplotlib + """ + postag( + incremental_stats=True, + separate_baseline_data=True, + learning_curve_output="learningcurve.png", + ) + + +def demo_error_analysis(): + """ + Writes a file with context for each erroneous word after tagging testing data + """ + postag(error_output="errors.txt") + + +def demo_serialize_tagger(): + """ + Serializes the learned tagger to a file in pickle format; reloads it + and validates the process. + """ + postag(serialize_output="tagger.pcl") + + +def demo_high_accuracy_rules(): + """ + Discard rules with low accuracy. This may hurt performance a bit, + but will often produce rules which are more interesting read to a human. + """ + postag(num_sents=3000, min_acc=0.96, min_score=10) + + +def postag( + templates=None, + tagged_data=None, + num_sents=1000, + max_rules=300, + min_score=3, + min_acc=None, + train=0.8, + trace=3, + randomize=False, + ruleformat="str", + incremental_stats=False, + template_stats=False, + error_output=None, + serialize_output=None, + learning_curve_output=None, + learning_curve_take=300, + baseline_backoff_tagger=None, + separate_baseline_data=False, + cache_baseline_tagger=None, +): + """ + Brill Tagger Demonstration + :param templates: how many sentences of training and testing data to use + :type templates: list of Template + + :param tagged_data: maximum number of rule instances to create + :type tagged_data: C{int} + + :param num_sents: how many sentences of training and testing data to use + :type num_sents: C{int} + + :param max_rules: maximum number of rule instances to create + :type max_rules: C{int} + + :param min_score: the minimum score for a rule in order for it to be considered + :type min_score: C{int} + + :param min_acc: the minimum score for a rule in order for it to be considered + :type min_acc: C{float} + + :param train: the fraction of the the corpus to be used for training (1=all) + :type train: C{float} + + :param trace: the level of diagnostic tracing output to produce (0-4) + :type trace: C{int} + + :param randomize: whether the training data should be a random subset of the corpus + :type randomize: C{bool} + + :param ruleformat: rule output format, one of "str", "repr", "verbose" + :type ruleformat: C{str} + + :param incremental_stats: if true, will tag incrementally and collect stats for each rule (rather slow) + :type incremental_stats: C{bool} + + :param template_stats: if true, will print per-template statistics collected in training and (optionally) testing + :type template_stats: C{bool} + + :param error_output: the file where errors will be saved + :type error_output: C{string} + + :param serialize_output: the file where the learned tbl tagger will be saved + :type serialize_output: C{string} + + :param learning_curve_output: filename of plot of learning curve(s) (train and also test, if available) + :type learning_curve_output: C{string} + + :param learning_curve_take: how many rules plotted + :type learning_curve_take: C{int} + + :param baseline_backoff_tagger: the file where rules will be saved + :type baseline_backoff_tagger: tagger + + :param separate_baseline_data: use a fraction of the training data exclusively for training baseline + :type separate_baseline_data: C{bool} + + :param cache_baseline_tagger: cache baseline tagger to this file (only interesting as a temporary workaround to get + deterministic output from the baseline unigram tagger between python versions) + :type cache_baseline_tagger: C{string} + + + Note on separate_baseline_data: if True, reuse training data both for baseline and rule learner. This + is fast and fine for a demo, but is likely to generalize worse on unseen data. + Also cannot be sensibly used for learning curves on training data (the baseline will be artificially high). + """ + + # defaults + baseline_backoff_tagger = baseline_backoff_tagger or REGEXP_TAGGER + if templates is None: + from nltk.tag.brill import describe_template_sets, brill24 + + # some pre-built template sets taken from typical systems or publications are + # available. Print a list with describe_template_sets() + # for instance: + templates = brill24() + (training_data, baseline_data, gold_data, testing_data) = _demo_prepare_data( + tagged_data, train, num_sents, randomize, separate_baseline_data + ) + + # creating (or reloading from cache) a baseline tagger (unigram tagger) + # this is just a mechanism for getting deterministic output from the baseline between + # python versions + if cache_baseline_tagger: + if not os.path.exists(cache_baseline_tagger): + baseline_tagger = UnigramTagger( + baseline_data, backoff=baseline_backoff_tagger + ) + with open(cache_baseline_tagger, 'w') as print_rules: + pickle.dump(baseline_tagger, print_rules) + print( + "Trained baseline tagger, pickled it to {0}".format( + cache_baseline_tagger + ) + ) + with open(cache_baseline_tagger, "r") as print_rules: + baseline_tagger = pickle.load(print_rules) + print("Reloaded pickled tagger from {0}".format(cache_baseline_tagger)) + else: + baseline_tagger = UnigramTagger(baseline_data, backoff=baseline_backoff_tagger) + print("Trained baseline tagger") + if gold_data: + print( + " Accuracy on test set: {0:0.4f}".format( + baseline_tagger.evaluate(gold_data) + ) + ) + + # creating a Brill tagger + tbrill = time.time() + trainer = BrillTaggerTrainer( + baseline_tagger, templates, trace, ruleformat=ruleformat + ) + print("Training tbl tagger...") + brill_tagger = trainer.train(training_data, max_rules, min_score, min_acc) + print("Trained tbl tagger in {0:0.2f} seconds".format(time.time() - tbrill)) + if gold_data: + print(" Accuracy on test set: %.4f" % brill_tagger.evaluate(gold_data)) + + # printing the learned rules, if learned silently + if trace == 1: + print("\nLearned rules: ") + for (ruleno, rule) in enumerate(brill_tagger.rules(), 1): + print("{0:4d} {1:s}".format(ruleno, rule.format(ruleformat))) + + # printing template statistics (optionally including comparison with the training data) + # note: if not separate_baseline_data, then baseline accuracy will be artificially high + if incremental_stats: + print( + "Incrementally tagging the test data, collecting individual rule statistics" + ) + (taggedtest, teststats) = brill_tagger.batch_tag_incremental( + testing_data, gold_data + ) + print(" Rule statistics collected") + if not separate_baseline_data: + print( + "WARNING: train_stats asked for separate_baseline_data=True; the baseline " + "will be artificially high" + ) + trainstats = brill_tagger.train_stats() + if template_stats: + brill_tagger.print_template_statistics(teststats) + if learning_curve_output: + _demo_plot( + learning_curve_output, teststats, trainstats, take=learning_curve_take + ) + print("Wrote plot of learning curve to {0}".format(learning_curve_output)) + else: + print("Tagging the test data") + taggedtest = brill_tagger.tag_sents(testing_data) + if template_stats: + brill_tagger.print_template_statistics() + + # writing error analysis to file + if error_output is not None: + with open(error_output, 'w') as f: + f.write('Errors for Brill Tagger %r\n\n' % serialize_output) + f.write( + u'\n'.join(error_list(gold_data, taggedtest)).encode('utf-8') + '\n' + ) + print("Wrote tagger errors including context to {0}".format(error_output)) + + # serializing the tagger to a pickle file and reloading (just to see it works) + if serialize_output is not None: + taggedtest = brill_tagger.tag_sents(testing_data) + with open(serialize_output, 'w') as print_rules: + pickle.dump(brill_tagger, print_rules) + print("Wrote pickled tagger to {0}".format(serialize_output)) + with open(serialize_output, "r") as print_rules: + brill_tagger_reloaded = pickle.load(print_rules) + print("Reloaded pickled tagger from {0}".format(serialize_output)) + taggedtest_reloaded = brill_tagger.tag_sents(testing_data) + if taggedtest == taggedtest_reloaded: + print("Reloaded tagger tried on test set, results identical") + else: + print("PROBLEM: Reloaded tagger gave different results on test set") + + +def _demo_prepare_data( + tagged_data, train, num_sents, randomize, separate_baseline_data +): + # train is the proportion of data used in training; the rest is reserved + # for testing. + if tagged_data is None: + print("Loading tagged data from treebank... ") + tagged_data = treebank.tagged_sents() + if num_sents is None or len(tagged_data) <= num_sents: + num_sents = len(tagged_data) + if randomize: + random.seed(len(tagged_data)) + random.shuffle(tagged_data) + cutoff = int(num_sents * train) + training_data = tagged_data[:cutoff] + gold_data = tagged_data[cutoff:num_sents] + testing_data = [[t[0] for t in sent] for sent in gold_data] + if not separate_baseline_data: + baseline_data = training_data + else: + bl_cutoff = len(training_data) // 3 + (baseline_data, training_data) = ( + training_data[:bl_cutoff], + training_data[bl_cutoff:], + ) + (trainseqs, traintokens) = corpus_size(training_data) + (testseqs, testtokens) = corpus_size(testing_data) + (bltrainseqs, bltraintokens) = corpus_size(baseline_data) + print("Read testing data ({0:d} sents/{1:d} wds)".format(testseqs, testtokens)) + print("Read training data ({0:d} sents/{1:d} wds)".format(trainseqs, traintokens)) + print( + "Read baseline data ({0:d} sents/{1:d} wds) {2:s}".format( + bltrainseqs, + bltraintokens, + "" if separate_baseline_data else "[reused the training set]", + ) + ) + return (training_data, baseline_data, gold_data, testing_data) + + +def _demo_plot(learning_curve_output, teststats, trainstats=None, take=None): + testcurve = [teststats['initialerrors']] + for rulescore in teststats['rulescores']: + testcurve.append(testcurve[-1] - rulescore) + testcurve = [1 - x / teststats['tokencount'] for x in testcurve[:take]] + + traincurve = [trainstats['initialerrors']] + for rulescore in trainstats['rulescores']: + traincurve.append(traincurve[-1] - rulescore) + traincurve = [1 - x / trainstats['tokencount'] for x in traincurve[:take]] + + import matplotlib.pyplot as plt + + r = list(range(len(testcurve))) + plt.plot(r, testcurve, r, traincurve) + plt.axis([None, None, None, 1.0]) + plt.savefig(learning_curve_output) + + +NN_CD_TAGGER = RegexpTagger([(r'^-?[0-9]+(.[0-9]+)?$', 'CD'), (r'.*', 'NN')]) + +REGEXP_TAGGER = RegexpTagger( + [ + (r'^-?[0-9]+(.[0-9]+)?$', 'CD'), # cardinal numbers + (r'(The|the|A|a|An|an)$', 'AT'), # articles + (r'.*able$', 'JJ'), # adjectives + (r'.*ness$', 'NN'), # nouns formed from adjectives + (r'.*ly$', 'RB'), # adverbs + (r'.*s$', 'NNS'), # plural nouns + (r'.*ing$', 'VBG'), # gerunds + (r'.*ed$', 'VBD'), # past tense verbs + (r'.*', 'NN'), # nouns (default) + ] +) + + +def corpus_size(seqs): + return (len(seqs), sum(len(x) for x in seqs)) + + +if __name__ == '__main__': + demo_learning_curve() diff --git a/venv.bak/lib/python3.7/site-packages/nltk/tbl/erroranalysis.py b/venv.bak/lib/python3.7/site-packages/nltk/tbl/erroranalysis.py new file mode 100644 index 0000000..c25d33d --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/tbl/erroranalysis.py @@ -0,0 +1,44 @@ +# -*- coding: utf-8 -*- +# Natural Language Toolkit: Transformation-based learning +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Marcus Uneson +# based on previous (nltk2) version by +# Christopher Maloof, Edward Loper, Steven Bird +# URL: +# For license information, see LICENSE.TXT + +from __future__ import print_function + + +# returns a list of errors in string format + + +def error_list(train_sents, test_sents): + """ + Returns a list of human-readable strings indicating the errors in the + given tagging of the corpus. + + :param train_sents: The correct tagging of the corpus + :type train_sents: list(tuple) + :param test_sents: The tagged corpus + :type test_sents: list(tuple) + """ + hdr = ('%25s | %s | %s\n' + '-' * 26 + '+' + '-' * 24 + '+' + '-' * 26) % ( + 'left context', + 'word/test->gold'.center(22), + 'right context', + ) + errors = [hdr] + for (train_sent, test_sent) in zip(train_sents, test_sents): + for wordnum, (word, train_pos) in enumerate(train_sent): + test_pos = test_sent[wordnum][1] + if train_pos != test_pos: + left = ' '.join('%s/%s' % w for w in train_sent[:wordnum]) + right = ' '.join('%s/%s' % w for w in train_sent[wordnum + 1 :]) + mid = '%s/%s->%s' % (word, test_pos, train_pos) + errors.append( + '%25s | %s | %s' % (left[-25:], mid.center(22), right[:25]) + ) + + return errors diff --git a/venv.bak/lib/python3.7/site-packages/nltk/tbl/feature.py b/venv.bak/lib/python3.7/site-packages/nltk/tbl/feature.py new file mode 100644 index 0000000..d9c6715 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/tbl/feature.py @@ -0,0 +1,270 @@ +# -*- coding: utf-8 -*- +# Natural Language Toolkit: Transformation-based learning +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Marcus Uneson +# based on previous (nltk2) version by +# Christopher Maloof, Edward Loper, Steven Bird +# URL: +# For license information, see LICENSE.TXT + +from __future__ import division, print_function, unicode_literals +from abc import ABCMeta, abstractmethod +from six import add_metaclass + + +@add_metaclass(ABCMeta) +class Feature(object): + """ + An abstract base class for Features. A Feature is a combination of + a specific property-computing method and a list of relative positions + to apply that method to. + + The property-computing method, M{extract_property(tokens, index)}, + must be implemented by every subclass. It extracts or computes a specific + property for the token at the current index. Typical extract_property() + methods return features such as the token text or tag; but more involved + methods may consider the entire sequence M{tokens} and + for instance compute the length of the sentence the token belongs to. + + In addition, the subclass may have a PROPERTY_NAME, which is how + it will be printed (in Rules and Templates, etc). If not given, defaults + to the classname. + + """ + + json_tag = 'nltk.tbl.Feature' + PROPERTY_NAME = None + + def __init__(self, positions, end=None): + """ + Construct a Feature which may apply at C{positions}. + + #For instance, importing some concrete subclasses (Feature is abstract) + >>> from nltk.tag.brill import Word, Pos + + #Feature Word, applying at one of [-2, -1] + >>> Word([-2,-1]) + Word([-2, -1]) + + #Positions need not be contiguous + >>> Word([-2,-1, 1]) + Word([-2, -1, 1]) + + #Contiguous ranges can alternatively be specified giving the + #two endpoints (inclusive) + >>> Pos(-3, -1) + Pos([-3, -2, -1]) + + #In two-arg form, start <= end is enforced + >>> Pos(2, 1) + Traceback (most recent call last): + File "", line 1, in + File "nltk/tbl/template.py", line 306, in __init__ + raise TypeError + ValueError: illegal interval specification: (start=2, end=1) + + :type positions: list of int + :param positions: the positions at which this features should apply + :raises ValueError: illegal position specifications + + An alternative calling convention, for contiguous positions only, + is Feature(start, end): + + :type start: int + :param start: start of range where this feature should apply + :type end: int + :param end: end of range (NOTE: inclusive!) where this feature should apply + + """ + self.positions = None # to avoid warnings + if end is None: + self.positions = tuple(sorted(set(int(i) for i in positions))) + else: # positions was actually not a list, but only the start index + try: + if positions > end: + raise TypeError + self.positions = tuple(range(positions, end + 1)) + except TypeError: + # let any kind of erroneous spec raise ValueError + raise ValueError( + "illegal interval specification: (start={0}, end={1})".format( + positions, end + ) + ) + + # set property name given in subclass, or otherwise name of subclass + self.PROPERTY_NAME = self.__class__.PROPERTY_NAME or self.__class__.__name__ + + def encode_json_obj(self): + return self.positions + + @classmethod + def decode_json_obj(cls, obj): + positions = obj + return cls(positions) + + def __repr__(self): + return "%s(%r)" % (self.__class__.__name__, list(self.positions)) + + @classmethod + def expand(cls, starts, winlens, excludezero=False): + """ + Return a list of features, one for each start point in starts + and for each window length in winlen. If excludezero is True, + no Features containing 0 in its positions will be generated + (many tbl trainers have a special representation for the + target feature at [0]) + + For instance, importing a concrete subclass (Feature is abstract) + >>> from nltk.tag.brill import Word + + First argument gives the possible start positions, second the + possible window lengths + >>> Word.expand([-3,-2,-1], [1]) + [Word([-3]), Word([-2]), Word([-1])] + + >>> Word.expand([-2,-1], [1]) + [Word([-2]), Word([-1])] + + >>> Word.expand([-3,-2,-1], [1,2]) + [Word([-3]), Word([-2]), Word([-1]), Word([-3, -2]), Word([-2, -1])] + + >>> Word.expand([-2,-1], [1]) + [Word([-2]), Word([-1])] + + a third optional argument excludes all Features whose positions contain zero + >>> Word.expand([-2,-1,0], [1,2], excludezero=False) + [Word([-2]), Word([-1]), Word([0]), Word([-2, -1]), Word([-1, 0])] + + >>> Word.expand([-2,-1,0], [1,2], excludezero=True) + [Word([-2]), Word([-1]), Word([-2, -1])] + + All window lengths must be positive + >>> Word.expand([-2,-1], [0]) + Traceback (most recent call last): + File "", line 1, in + File "nltk/tag/tbl/template.py", line 371, in expand + :param starts: where to start looking for Feature + ValueError: non-positive window length in [0] + + :param starts: where to start looking for Feature + :type starts: list of ints + :param winlens: window lengths where to look for Feature + :type starts: list of ints + :param excludezero: do not output any Feature with 0 in any of its positions. + :type excludezero: bool + :returns: list of Features + :raises ValueError: for non-positive window lengths + """ + if not all(x > 0 for x in winlens): + raise ValueError("non-positive window length in {0}".format(winlens)) + xs = (starts[i : i + w] for w in winlens for i in range(len(starts) - w + 1)) + return [cls(x) for x in xs if not (excludezero and 0 in x)] + + def issuperset(self, other): + """ + Return True if this Feature always returns True when other does + + More precisely, return True if this feature refers to the same property as other; + and this Feature looks at all positions that other does (and possibly + other positions in addition). + + #For instance, importing a concrete subclass (Feature is abstract) + >>> from nltk.tag.brill import Word, Pos + + >>> Word([-3,-2,-1]).issuperset(Word([-3,-2])) + True + + >>> Word([-3,-2,-1]).issuperset(Word([-3,-2, 0])) + False + + #Feature subclasses must agree + >>> Word([-3,-2,-1]).issuperset(Pos([-3,-2])) + False + + :param other: feature with which to compare + :type other: (subclass of) Feature + :return: True if this feature is superset, otherwise False + :rtype: bool + + + """ + return self.__class__ is other.__class__ and set(self.positions) >= set( + other.positions + ) + + def intersects(self, other): + """ + Return True if the positions of this Feature intersects with those of other + + More precisely, return True if this feature refers to the same property as other; + and there is some overlap in the positions they look at. + + #For instance, importing a concrete subclass (Feature is abstract) + >>> from nltk.tag.brill import Word, Pos + + >>> Word([-3,-2,-1]).intersects(Word([-3,-2])) + True + + >>> Word([-3,-2,-1]).intersects(Word([-3,-2, 0])) + True + + >>> Word([-3,-2,-1]).intersects(Word([0])) + False + + #Feature subclasses must agree + >>> Word([-3,-2,-1]).intersects(Pos([-3,-2])) + False + + :param other: feature with which to compare + :type other: (subclass of) Feature + :return: True if feature classes agree and there is some overlap in the positions they look at + :rtype: bool + """ + + return bool( + ( + self.__class__ is other.__class__ + and set(self.positions) & set(other.positions) + ) + ) + + # Rich comparisons for Features. With @functools.total_ordering (Python 2.7+), + # it will be enough to define __lt__ and __eq__ + def __eq__(self, other): + return self.__class__ is other.__class__ and self.positions == other.positions + + def __lt__(self, other): + return ( + self.__class__.__name__ < other.__class__.__name__ + or + # self.positions is a sorted tuple of ints + self.positions < other.positions + ) + + def __ne__(self, other): + return not (self == other) + + def __gt__(self, other): + return other < self + + def __ge__(self, other): + return not self < other + + def __le__(self, other): + return self < other or self == other + + @staticmethod + @abstractmethod + def extract_property(tokens, index): + """ + Any subclass of Feature must define static method extract_property(tokens, index) + + :param tokens: the sequence of tokens + :type tokens: list of tokens + :param index: the current index + :type index: int + :return: feature value + :rtype: any (but usually scalar) + """ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/tbl/rule.py b/venv.bak/lib/python3.7/site-packages/nltk/tbl/rule.py new file mode 100644 index 0000000..6d70954 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/tbl/rule.py @@ -0,0 +1,328 @@ +# -*- coding: utf-8 -*- +# Natural Language Toolkit: Transformation-based learning +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Marcus Uneson +# based on previous (nltk2) version by +# Christopher Maloof, Edward Loper, Steven Bird +# URL: +# For license information, see LICENSE.TXT + +from __future__ import print_function +from abc import ABCMeta, abstractmethod +from six import add_metaclass + +from nltk.compat import python_2_unicode_compatible, unicode_repr +from nltk import jsontags + + +###################################################################### +# Tag Rules +###################################################################### +@add_metaclass(ABCMeta) +class TagRule(object): + """ + An interface for tag transformations on a tagged corpus, as + performed by tbl taggers. Each transformation finds all tokens + in the corpus that are tagged with a specific original tag and + satisfy a specific condition, and replaces their tags with a + replacement tag. For any given transformation, the original + tag, replacement tag, and condition are fixed. Conditions may + depend on the token under consideration, as well as any other + tokens in the corpus. + + Tag rules must be comparable and hashable. + """ + + def __init__(self, original_tag, replacement_tag): + + self.original_tag = original_tag + """The tag which this TagRule may cause to be replaced.""" + + self.replacement_tag = replacement_tag + """The tag with which this TagRule may replace another tag.""" + + def apply(self, tokens, positions=None): + """ + Apply this rule at every position in positions where it + applies to the given sentence. I.e., for each position p + in *positions*, if *tokens[p]* is tagged with this rule's + original tag, and satisfies this rule's condition, then set + its tag to be this rule's replacement tag. + + :param tokens: The tagged sentence + :type tokens: list(tuple(str, str)) + :type positions: list(int) + :param positions: The positions where the transformation is to + be tried. If not specified, try it at all positions. + :return: The indices of tokens whose tags were changed by this + rule. + :rtype: int + """ + if positions is None: + positions = list(range(len(tokens))) + + # Determine the indices at which this rule applies. + change = [i for i in positions if self.applies(tokens, i)] + + # Make the changes. Note: this must be done in a separate + # step from finding applicable locations, since we don't want + # the rule to interact with itself. + for i in change: + tokens[i] = (tokens[i][0], self.replacement_tag) + + return change + + @abstractmethod + def applies(self, tokens, index): + """ + :return: True if the rule would change the tag of + ``tokens[index]``, False otherwise + :rtype: bool + :param tokens: A tagged sentence + :type tokens: list(str) + :param index: The index to check + :type index: int + """ + + # Rules must be comparable and hashable for the algorithm to work + def __eq__(self, other): + raise TypeError("Rules must implement __eq__()") + + def __ne__(self, other): + raise TypeError("Rules must implement __ne__()") + + def __hash__(self): + raise TypeError("Rules must implement __hash__()") + + +@python_2_unicode_compatible +@jsontags.register_tag +class Rule(TagRule): + """ + A Rule checks the current corpus position for a certain set of conditions; + if they are all fulfilled, the Rule is triggered, meaning that it + will change tag A to tag B. For other tags than A, nothing happens. + + The conditions are parameters to the Rule instance. Each condition is a feature-value pair, + with a set of positions to check for the value of the corresponding feature. + Conceptually, the positions are joined by logical OR, and the feature set by logical AND. + + More formally, the Rule is then applicable to the M{n}th token iff: + + - The M{n}th token is tagged with the Rule's original tag; and + - For each (Feature(positions), M{value}) tuple: + - The value of Feature of at least one token in {n+p for p in positions} + is M{value}. + + """ + + json_tag = 'nltk.tbl.Rule' + + def __init__(self, templateid, original_tag, replacement_tag, conditions): + """ + Construct a new Rule that changes a token's tag from + C{original_tag} to C{replacement_tag} if all of the properties + specified in C{conditions} hold. + + @type templateid: string + @param templateid: the template id (a zero-padded string, '001' etc, + so it will sort nicely) + + @type conditions: C{iterable} of C{Feature} + @param conditions: A list of Feature(positions), + each of which specifies that the property (computed by + Feature.extract_property()) of at least one + token in M{n} + p in positions is C{value}. + + """ + TagRule.__init__(self, original_tag, replacement_tag) + self._conditions = conditions + self.templateid = templateid + + def encode_json_obj(self): + return { + 'templateid': self.templateid, + 'original': self.original_tag, + 'replacement': self.replacement_tag, + 'conditions': self._conditions, + } + + @classmethod + def decode_json_obj(cls, obj): + return cls( + obj['templateid'], obj['original'], obj['replacement'], obj['conditions'] + ) + + def applies(self, tokens, index): + # Inherit docs from TagRule + + # Does the given token have this Rule's "original tag"? + if tokens[index][1] != self.original_tag: + return False + + # Check to make sure that every condition holds. + for (feature, val) in self._conditions: + + # Look for *any* token that satisfies the condition. + for pos in feature.positions: + if not (0 <= index + pos < len(tokens)): + continue + if feature.extract_property(tokens, index + pos) == val: + break + else: + # No token satisfied the condition; return false. + return False + + # Every condition checked out, so the Rule is applicable. + return True + + def __eq__(self, other): + return self is other or ( + other is not None + and other.__class__ == self.__class__ + and self.original_tag == other.original_tag + and self.replacement_tag == other.replacement_tag + and self._conditions == other._conditions + ) + + def __ne__(self, other): + return not (self == other) + + def __hash__(self): + + # Cache our hash value (justified by profiling.) + try: + return self.__hash + except AttributeError: + self.__hash = hash(repr(self)) + return self.__hash + + def __repr__(self): + # Cache the repr (justified by profiling -- this is used as + # a sort key when deterministic=True.) + try: + return self.__repr + except AttributeError: + self.__repr = "{0}('{1}', {2}, {3}, [{4}])".format( + self.__class__.__name__, + self.templateid, + unicode_repr(self.original_tag), + unicode_repr(self.replacement_tag), + # list(self._conditions) would be simpler but will not generate + # the same Rule.__repr__ in python 2 and 3 and thus break some tests + ', '.join( + "({0},{1})".format(f, unicode_repr(v)) + for (f, v) in self._conditions + ), + ) + + return self.__repr + + def __str__(self): + def _condition_to_logic(feature, value): + """ + Return a compact, predicate-logic styled string representation + of the given condition. + """ + return '{0}:{1}@[{2}]'.format( + feature.PROPERTY_NAME, + value, + ",".join(str(w) for w in feature.positions), + ) + + conditions = ' & '.join( + [_condition_to_logic(f, v) for (f, v) in self._conditions] + ) + s = '{0}->{1} if {2}'.format( + self.original_tag, self.replacement_tag, conditions + ) + + return s + + def format(self, fmt): + """ + Return a string representation of this rule. + + >>> from nltk.tbl.rule import Rule + >>> from nltk.tag.brill import Pos + + >>> r = Rule("23", "VB", "NN", [(Pos([-2,-1]), 'DT')]) + + r.format("str") == str(r) + True + >>> r.format("str") + 'VB->NN if Pos:DT@[-2,-1]' + + r.format("repr") == repr(r) + True + >>> r.format("repr") + "Rule('23', 'VB', 'NN', [(Pos([-2, -1]),'DT')])" + + >>> r.format("verbose") + 'VB -> NN if the Pos of words i-2...i-1 is "DT"' + + >>> r.format("not_found") + Traceback (most recent call last): + File "", line 1, in + File "nltk/tbl/rule.py", line 256, in format + raise ValueError("unknown rule format spec: {0}".format(fmt)) + ValueError: unknown rule format spec: not_found + >>> + + :param fmt: format specification + :type fmt: str + :return: string representation + :rtype: str + """ + if fmt == "str": + return self.__str__() + elif fmt == "repr": + return self.__repr__() + elif fmt == "verbose": + return self._verbose_format() + else: + raise ValueError("unknown rule format spec: {0}".format(fmt)) + + def _verbose_format(self): + """ + Return a wordy, human-readable string representation + of the given rule. + + Not sure how useful this is. + """ + + def condition_to_str(feature, value): + return 'the %s of %s is "%s"' % ( + feature.PROPERTY_NAME, + range_to_str(feature.positions), + value, + ) + + def range_to_str(positions): + if len(positions) == 1: + p = positions[0] + if p == 0: + return 'this word' + if p == -1: + return 'the preceding word' + elif p == 1: + return 'the following word' + elif p < 0: + return 'word i-%d' % -p + elif p > 0: + return 'word i+%d' % p + else: + # for complete compatibility with the wordy format of nltk2 + mx = max(positions) + mn = min(positions) + if mx - mn == len(positions) - 1: + return 'words i%+d...i%+d' % (mn, mx) + else: + return 'words {%s}' % (",".join("i%+d" % d for d in positions),) + + replacement = '%s -> %s' % (self.original_tag, self.replacement_tag) + conditions = (' if ' if self._conditions else "") + ', and '.join( + condition_to_str(f, v) for (f, v) in self._conditions + ) + return replacement + conditions diff --git a/venv.bak/lib/python3.7/site-packages/nltk/tbl/template.py b/venv.bak/lib/python3.7/site-packages/nltk/tbl/template.py new file mode 100644 index 0000000..b0556ed --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/tbl/template.py @@ -0,0 +1,322 @@ +# -*- coding: utf-8 -*- +# Natural Language Toolkit: Transformation-based learning +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Marcus Uneson +# based on previous (nltk2) version by +# Christopher Maloof, Edward Loper, Steven Bird +# URL: +# For license information, see LICENSE.TXT + +from __future__ import print_function +from abc import ABCMeta, abstractmethod +from six import add_metaclass +import itertools as it +from nltk.tbl.feature import Feature +from nltk.tbl.rule import Rule + + +@add_metaclass(ABCMeta) +class BrillTemplateI(object): + """ + An interface for generating lists of transformational rules that + apply at given sentence positions. ``BrillTemplateI`` is used by + ``Brill`` training algorithms to generate candidate rules. + """ + + @abstractmethod + def applicable_rules(self, tokens, i, correctTag): + """ + Return a list of the transformational rules that would correct + the *i*th subtoken's tag in the given token. In particular, + return a list of zero or more rules that would change + *tokens*[i][1] to *correctTag*, if applied to *token*[i]. + + If the *i*th token already has the correct tag (i.e., if + tagged_tokens[i][1] == correctTag), then + ``applicable_rules()`` should return the empty list. + + :param tokens: The tagged tokens being tagged. + :type tokens: list(tuple) + :param i: The index of the token whose tag should be corrected. + :type i: int + :param correctTag: The correct tag for the *i*th token. + :type correctTag: any + :rtype: list(BrillRule) + """ + + @abstractmethod + def get_neighborhood(self, token, index): + """ + Returns the set of indices *i* such that + ``applicable_rules(token, i, ...)`` depends on the value of + the *index*th token of *token*. + + This method is used by the "fast" Brill tagger trainer. + + :param token: The tokens being tagged. + :type token: list(tuple) + :param index: The index whose neighborhood should be returned. + :type index: int + :rtype: set + """ + + +class Template(BrillTemplateI): + """ + A tbl Template that generates a list of L{Rule}s that apply at a given sentence + position. In particular, each C{Template} is parameterized by a list of + independent features (a combination of a specific + property to extract and a list C{L} of relative positions at which to extract + it) and generates all Rules that: + + - use the given features, each at its own independent position; and + - are applicable to the given token. + """ + + ALLTEMPLATES = [] + # record a unique id of form "001", for each template created + # _ids = it.count(0) + + def __init__(self, *features): + + """ + Construct a Template for generating Rules. + + Takes a list of Features. A C{Feature} is a combination + of a specific property and its relative positions and should be + a subclass of L{nltk.tbl.feature.Feature}. + + An alternative calling convention (kept for backwards compatibility, + but less expressive as it only permits one feature type) is + Template(Feature, (start1, end1), (start2, end2), ...) + In new code, that would be better written + Template(Feature(start1, end1), Feature(start2, end2), ...) + + #For instance, importing some features + >>> from nltk.tbl.template import Template + >>> from nltk.tag.brill import Word, Pos + + #create some features + + >>> wfeat1, wfeat2, pfeat = (Word([-1]), Word([1,2]), Pos([-2,-1])) + + #Create a single-feature template + >>> Template(wfeat1) + Template(Word([-1])) + + #or a two-feature one + >>> Template(wfeat1, wfeat2) + Template(Word([-1]),Word([1, 2])) + + #or a three-feature one with two different feature types + >>> Template(wfeat1, wfeat2, pfeat) + Template(Word([-1]),Word([1, 2]),Pos([-2, -1])) + + #deprecated api: Feature subclass, followed by list of (start,end) pairs + #(permits only a single Feature) + >>> Template(Word, (-2,-1), (0,0)) + Template(Word([-2, -1]),Word([0])) + + #incorrect specification raises TypeError + >>> Template(Word, (-2,-1), Pos, (0,0)) + Traceback (most recent call last): + File "", line 1, in + File "nltk/tag/tbl/template.py", line 143, in __init__ + raise TypeError( + TypeError: expected either Feature1(args), Feature2(args), ... or Feature, (start1, end1), (start2, end2), ... + + :type features: list of Features + :param features: the features to build this Template on + """ + # determine the calling form: either + # Template(Feature, args1, [args2, ...)] + # Template(Feature1(args), Feature2(args), ...) + if all(isinstance(f, Feature) for f in features): + self._features = features + elif issubclass(features[0], Feature) and all( + isinstance(a, tuple) for a in features[1:] + ): + self._features = [features[0](*tp) for tp in features[1:]] + else: + raise TypeError( + "expected either Feature1(args), Feature2(args), ... or Feature, (start1, end1), (start2, end2), ..." + ) + self.id = "{0:03d}".format(len(self.ALLTEMPLATES)) + self.ALLTEMPLATES.append(self) + + def __repr__(self): + return "%s(%s)" % ( + self.__class__.__name__, + ",".join([str(f) for f in self._features]), + ) + + def applicable_rules(self, tokens, index, correct_tag): + if tokens[index][1] == correct_tag: + return [] + + # For each of this Template's features, find the conditions + # that are applicable for the given token. + # Then, generate one Rule for each combination of features + # (the crossproduct of the conditions). + + applicable_conditions = self._applicable_conditions(tokens, index) + xs = list(it.product(*applicable_conditions)) + return [Rule(self.id, tokens[index][1], correct_tag, tuple(x)) for x in xs] + + def _applicable_conditions(self, tokens, index): + """ + :returns: A set of all conditions for rules + that are applicable to C{tokens[index]}. + """ + conditions = [] + + for feature in self._features: + conditions.append([]) + for pos in feature.positions: + if not (0 <= index + pos < len(tokens)): + continue + value = feature.extract_property(tokens, index + pos) + conditions[-1].append((feature, value)) + return conditions + + def get_neighborhood(self, tokens, index): + # inherit docs from BrillTemplateI + + # applicable_rules(tokens, index, ...) depends on index. + neighborhood = set([index]) # set literal for python 2.7+ + + # applicable_rules(tokens, i, ...) depends on index if + # i+start < index <= i+end. + + allpositions = [0] + [p for feat in self._features for p in feat.positions] + start, end = min(allpositions), max(allpositions) + s = max(0, index + (-end)) + e = min(index + (-start) + 1, len(tokens)) + for i in range(s, e): + neighborhood.add(i) + return neighborhood + + @classmethod + def expand(cls, featurelists, combinations=None, skipintersecting=True): + + """ + Factory method to mass generate Templates from a list L of lists of Features. + + #With combinations=(k1, k2), the function will in all possible ways choose k1 ... k2 + #of the sublists in L; it will output all Templates formed by the Cartesian product + #of this selection, with duplicates and other semantically equivalent + #forms removed. Default for combinations is (1, len(L)). + + The feature lists may have been specified + manually, or generated from Feature.expand(). For instance, + + >>> from nltk.tbl.template import Template + >>> from nltk.tag.brill import Word, Pos + + #creating some features + >>> (wd_0, wd_01) = (Word([0]), Word([0,1])) + + >>> (pos_m2, pos_m33) = (Pos([-2]), Pos([3-2,-1,0,1,2,3])) + + >>> list(Template.expand([[wd_0], [pos_m2]])) + [Template(Word([0])), Template(Pos([-2])), Template(Pos([-2]),Word([0]))] + + >>> list(Template.expand([[wd_0, wd_01], [pos_m2]])) + [Template(Word([0])), Template(Word([0, 1])), Template(Pos([-2])), Template(Pos([-2]),Word([0])), Template(Pos([-2]),Word([0, 1]))] + + #note: with Feature.expand(), it is very easy to generate more templates + #than your system can handle -- for instance, + >>> wordtpls = Word.expand([-2,-1,0,1], [1,2], excludezero=False) + >>> len(wordtpls) + 7 + + >>> postpls = Pos.expand([-3,-2,-1,0,1,2], [1,2,3], excludezero=True) + >>> len(postpls) + 9 + + #and now the Cartesian product of all non-empty combinations of two wordtpls and + #two postpls, with semantic equivalents removed + >>> templates = list(Template.expand([wordtpls, wordtpls, postpls, postpls])) + >>> len(templates) + 713 + + + will return a list of eight templates + Template(Word([0])), + Template(Word([0, 1])), + Template(Pos([-2])), + Template(Pos([-1])), + Template(Pos([-2]),Word([0])), + Template(Pos([-1]),Word([0])), + Template(Pos([-2]),Word([0, 1])), + Template(Pos([-1]),Word([0, 1]))] + + + #Templates where one feature is a subset of another, such as + #Template(Word([0,1]), Word([1]), will not appear in the output. + #By default, this non-subset constraint is tightened to disjointness: + #Templates of type Template(Word([0,1]), Word([1,2]) will also be filtered out. + #With skipintersecting=False, then such Templates are allowed + + WARNING: this method makes it very easy to fill all your memory when training + generated templates on any real-world corpus + + :param featurelists: lists of Features, whose Cartesian product will return a set of Templates + :type featurelists: list of (list of Features) + :param combinations: given n featurelists: if combinations=k, all generated Templates will have + k features; if combinations=(k1,k2) they will have k1..k2 features; if None, defaults to 1..n + :type combinations: None, int, or (int, int) + :param skipintersecting: if True, do not output intersecting Templates (non-disjoint positions for some feature) + :type skipintersecting: bool + :returns: generator of Templates + + """ + + def nonempty_powerset(xs): # xs is a list + # itertools docnonempty_powerset([1,2,3]) --> (1,) (2,) (3,) (1,2) (1,3) (2,3) (1,2,3) + + # find the correct tuple given combinations, one of {None, k, (k1,k2)} + k = combinations # for brevity + combrange = ( + (1, len(xs) + 1) + if k is None + else (k, k + 1) # n over 1 .. n over n (all non-empty combinations) + if isinstance(k, int) + else (k[0], k[1] + 1) # n over k (only + ) # n over k1, n over k1+1... n over k2 + return it.chain.from_iterable( + it.combinations(xs, r) for r in range(*combrange) + ) + + seentemplates = set() + for picks in nonempty_powerset(featurelists): + for pick in it.product(*picks): + if any( + i != j and x.issuperset(y) + for (i, x) in enumerate(pick) + for (j, y) in enumerate(pick) + ): + continue + if skipintersecting and any( + i != j and x.intersects(y) + for (i, x) in enumerate(pick) + for (j, y) in enumerate(pick) + ): + continue + thistemplate = cls(*sorted(pick)) + strpick = str(thistemplate) + #!!FIXME --this is hackish + if strpick in seentemplates: # already added + cls._poptemplate() + continue + seentemplates.add(strpick) + yield thistemplate + + @classmethod + def _cleartemplates(cls): + cls.ALLTEMPLATES = [] + + @classmethod + def _poptemplate(cls): + return cls.ALLTEMPLATES.pop() if cls.ALLTEMPLATES else None diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/__init__.py b/venv.bak/lib/python3.7/site-packages/nltk/test/__init__.py new file mode 100644 index 0000000..107774e --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/test/__init__.py @@ -0,0 +1,18 @@ +# Natural Language Toolkit: Unit Tests +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Edward Loper +# URL: +# For license information, see LICENSE.TXT + +""" +Unit tests for the NLTK modules. These tests are intended to ensure +that source code changes don't accidentally introduce bugs. +For instructions, please see: + +../../web/dev/local_testing.rst + +https://github.com/nltk/nltk/blob/develop/web/dev/local_testing.rst + + +""" diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/test/__pycache__/__init__.cpython-37.pyc new file mode 100644 index 0000000..102d9e1 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/test/__pycache__/__init__.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/__pycache__/all.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/test/__pycache__/all.cpython-37.pyc new file mode 100644 index 0000000..563b683 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/test/__pycache__/all.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/__pycache__/childes_fixt.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/test/__pycache__/childes_fixt.cpython-37.pyc new file mode 100644 index 0000000..46841aa Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/test/__pycache__/childes_fixt.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/__pycache__/classify_fixt.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/test/__pycache__/classify_fixt.cpython-37.pyc new file mode 100644 index 0000000..149391f Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/test/__pycache__/classify_fixt.cpython-37.pyc differ diff --git a/venv/lib/python3.7/site-packages/nltk/test/__pycache__/compat_fixt.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/test/__pycache__/compat_fixt.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/nltk/test/__pycache__/compat_fixt.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/nltk/test/__pycache__/compat_fixt.cpython-37.pyc diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/__pycache__/corpus_fixt.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/test/__pycache__/corpus_fixt.cpython-37.pyc new file mode 100644 index 0000000..36141ae Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/test/__pycache__/corpus_fixt.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/__pycache__/discourse_fixt.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/test/__pycache__/discourse_fixt.cpython-37.pyc new file mode 100644 index 0000000..88a3aab Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/test/__pycache__/discourse_fixt.cpython-37.pyc differ diff --git a/venv/lib/python3.7/site-packages/nltk/test/__pycache__/doctest_nose_plugin.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/test/__pycache__/doctest_nose_plugin.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/nltk/test/__pycache__/doctest_nose_plugin.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/nltk/test/__pycache__/doctest_nose_plugin.cpython-37.pyc diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/__pycache__/gensim_fixt.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/test/__pycache__/gensim_fixt.cpython-37.pyc new file mode 100644 index 0000000..a6c4dc9 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/test/__pycache__/gensim_fixt.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/__pycache__/gluesemantics_malt_fixt.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/test/__pycache__/gluesemantics_malt_fixt.cpython-37.pyc new file mode 100644 index 0000000..b60b65d Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/test/__pycache__/gluesemantics_malt_fixt.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/__pycache__/inference_fixt.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/test/__pycache__/inference_fixt.cpython-37.pyc new file mode 100644 index 0000000..da756ca Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/test/__pycache__/inference_fixt.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/__pycache__/nonmonotonic_fixt.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/test/__pycache__/nonmonotonic_fixt.cpython-37.pyc new file mode 100644 index 0000000..16e5daf Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/test/__pycache__/nonmonotonic_fixt.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/__pycache__/portuguese_en_fixt.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/test/__pycache__/portuguese_en_fixt.cpython-37.pyc new file mode 100644 index 0000000..11f6917 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/test/__pycache__/portuguese_en_fixt.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/__pycache__/probability_fixt.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/test/__pycache__/probability_fixt.cpython-37.pyc new file mode 100644 index 0000000..0aac1e4 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/test/__pycache__/probability_fixt.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/__pycache__/runtests.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/test/__pycache__/runtests.cpython-37.pyc new file mode 100644 index 0000000..76a0773 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/test/__pycache__/runtests.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/__pycache__/segmentation_fixt.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/test/__pycache__/segmentation_fixt.cpython-37.pyc new file mode 100644 index 0000000..c3521ca Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/test/__pycache__/segmentation_fixt.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/__pycache__/semantics_fixt.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/test/__pycache__/semantics_fixt.cpython-37.pyc new file mode 100644 index 0000000..4e15e5b Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/test/__pycache__/semantics_fixt.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/__pycache__/translate_fixt.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/test/__pycache__/translate_fixt.cpython-37.pyc new file mode 100644 index 0000000..a1cda72 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/test/__pycache__/translate_fixt.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/__pycache__/wordnet_fixt.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/test/__pycache__/wordnet_fixt.cpython-37.pyc new file mode 100644 index 0000000..9c4e1a5 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/test/__pycache__/wordnet_fixt.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/all.py b/venv.bak/lib/python3.7/site-packages/nltk/test/all.py new file mode 100644 index 0000000..c48e52a --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/test/all.py @@ -0,0 +1,24 @@ +"""Test suite that runs all NLTK tests. + +This module, `nltk.test.all`, is named as the NLTK ``test_suite`` in the +project's ``setup-eggs.py`` file. Here, we create a test suite that +runs all of our doctests, and return it for processing by the setuptools +test harness. + +""" +import doctest, unittest +from glob import glob +import os.path + + +def additional_tests(): + # print "here-000000000000000" + # print "-----", glob(os.path.join(os.path.dirname(__file__), '*.doctest')) + dir = os.path.dirname(__file__) + paths = glob(os.path.join(dir, '*.doctest')) + files = [os.path.basename(path) for path in paths] + return unittest.TestSuite([doctest.DocFileSuite(file) for file in files]) + + +# if os.path.split(path)[-1] != 'index.rst' +# skips time-dependent doctest in index.rst diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/bleu.doctest b/venv.bak/lib/python3.7/site-packages/nltk/test/bleu.doctest new file mode 100644 index 0000000..e5ed074 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/test/bleu.doctest @@ -0,0 +1,14 @@ +========== +BLEU tests +========== + +>>> from nltk.translate import bleu + +If the candidate has no alignment to any of the references, the BLEU score is 0. + +>>> bleu( +... ['The candidate has no alignment to any of the references'.split()], +... 'John loves Mary'.split(), +... [1], +... ) +0 diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/bnc.doctest b/venv.bak/lib/python3.7/site-packages/nltk/test/bnc.doctest new file mode 100644 index 0000000..e16f8a1 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/test/bnc.doctest @@ -0,0 +1,60 @@ +.. Copyright (C) 2001-2019 NLTK Project +.. For license information, see LICENSE.TXT + + >>> import os.path + + >>> from nltk.corpus.reader import BNCCorpusReader + >>> import nltk.test + + >>> root = os.path.dirname(nltk.test.__file__) + >>> bnc = BNCCorpusReader(root=root, fileids='FX8.xml') + +Checking the word access. +------------------------- + + >>> len(bnc.words()) + 151 + + >>> bnc.words()[:6] + ['Ah', 'there', 'we', 'are', ',', '.'] + >>> bnc.words(stem=True)[:6] + ['ah', 'there', 'we', 'be', ',', '.'] + + >>> bnc.tagged_words()[:6] + [('Ah', 'INTERJ'), ('there', 'ADV'), ('we', 'PRON'), ('are', 'VERB'), (',', 'PUN'), ('.', 'PUN')] + + >>> bnc.tagged_words(c5=True)[:6] + [('Ah', 'ITJ'), ('there', 'AV0'), ('we', 'PNP'), ('are', 'VBB'), (',', 'PUN'), ('.', 'PUN')] + +Testing access to the sentences. +-------------------------------- + + >>> len(bnc.sents()) + 15 + + >>> bnc.sents()[0] + ['Ah', 'there', 'we', 'are', ',', '.'] + >>> bnc.sents(stem=True)[0] + ['ah', 'there', 'we', 'be', ',', '.'] + + >>> bnc.tagged_sents()[0] + [('Ah', 'INTERJ'), ('there', 'ADV'), ('we', 'PRON'), ('are', 'VERB'), (',', 'PUN'), ('.', 'PUN')] + >>> bnc.tagged_sents(c5=True)[0] + [('Ah', 'ITJ'), ('there', 'AV0'), ('we', 'PNP'), ('are', 'VBB'), (',', 'PUN'), ('.', 'PUN')] + +A not lazy loader. +------------------ + + >>> eager = BNCCorpusReader(root=root, fileids=r'FX8.xml', lazy=False) + + >>> len(eager.words()) + 151 + >>> eager.words(stem=True)[6:17] + ['right', 'abdominal', 'wound', ',', 'she', 'be', 'a', 'wee', 'bit', 'confuse', '.'] + + >>> eager.tagged_words()[6:11] + [('Right', 'ADV'), ('abdominal', 'ADJ'), ('wound', 'SUBST'), (',', 'PUN'), ('she', 'PRON')] + >>> eager.tagged_words(c5=True)[6:17] + [('Right', 'AV0'), ('abdominal', 'AJ0'), ('wound', 'NN1'), (',', 'PUN'), ('she', 'PNP'), ("'s", 'VBZ'), ('a', 'AT0'), ('wee', 'AJ0-NN1'), ('bit', 'NN1'), ('confused', 'VVN-AJ0'), ('.', 'PUN')] + >>> len(eager.sents()) + 15 diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/ccg.doctest b/venv.bak/lib/python3.7/site-packages/nltk/test/ccg.doctest new file mode 100644 index 0000000..cc0ad49 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/test/ccg.doctest @@ -0,0 +1,376 @@ +.. Copyright (C) 2001-2019 NLTK Project +.. For license information, see LICENSE.TXT + +============================== +Combinatory Categorial Grammar +============================== + +Relative Clauses +---------------- + + >>> from nltk.ccg import chart, lexicon + +Construct a lexicon: + + >>> lex = lexicon.parseLexicon(''' + ... :- S, NP, N, VP + ... + ... Det :: NP/N + ... Pro :: NP + ... Modal :: S\\NP/VP + ... + ... TV :: VP/NP + ... DTV :: TV/NP + ... + ... the => Det + ... + ... that => Det + ... that => NP + ... + ... I => Pro + ... you => Pro + ... we => Pro + ... + ... chef => N + ... cake => N + ... children => N + ... dough => N + ... + ... will => Modal + ... should => Modal + ... might => Modal + ... must => Modal + ... + ... and => var\\.,var/.,var + ... + ... to => VP[to]/VP + ... + ... without => (VP\\VP)/VP[ing] + ... + ... be => TV + ... cook => TV + ... eat => TV + ... + ... cooking => VP[ing]/NP + ... + ... give => DTV + ... + ... is => (S\\NP)/NP + ... prefer => (S\\NP)/NP + ... + ... which => (N\\N)/(S/NP) + ... + ... persuade => (VP/VP[to])/NP + ... ''') + + >>> parser = chart.CCGChartParser(lex, chart.DefaultRuleSet) + >>> for parse in parser.parse("you prefer that cake".split()): + ... chart.printCCGDerivation(parse) + ... break + ... + you prefer that cake + NP ((S\NP)/NP) (NP/N) N + --------------> + NP + ---------------------------> + (S\NP) + --------------------------------< + S + + >>> for parse in parser.parse("that is the cake which you prefer".split()): + ... chart.printCCGDerivation(parse) + ... break + ... + that is the cake which you prefer + NP ((S\NP)/NP) (NP/N) N ((N\N)/(S/NP)) NP ((S\NP)/NP) + ----->T + (S/(S\NP)) + ------------------>B + (S/NP) + ----------------------------------> + (N\N) + ----------------------------------------< + N + ------------------------------------------------> + NP + -------------------------------------------------------------> + (S\NP) + -------------------------------------------------------------------< + S + + +Some other sentences to try: +"that is the cake which we will persuade the chef to cook" +"that is the cake which we will persuade the chef to give the children" + + >>> sent = "that is the dough which you will eat without cooking".split() + >>> nosub_parser = chart.CCGChartParser(lex, chart.ApplicationRuleSet + + ... chart.CompositionRuleSet + chart.TypeRaiseRuleSet) + +Without Substitution (no output) + + >>> for parse in nosub_parser.parse(sent): + ... chart.printCCGDerivation(parse) + +With Substitution: + + >>> for parse in parser.parse(sent): + ... chart.printCCGDerivation(parse) + ... break + ... + that is the dough which you will eat without cooking + NP ((S\NP)/NP) (NP/N) N ((N\N)/(S/NP)) NP ((S\NP)/VP) (VP/NP) ((VP\VP)/VP['ing']) (VP['ing']/NP) + ----->T + (S/(S\NP)) + ------------------------------------->B + ((VP\VP)/NP) + ----------------------------------------------B + ((S\NP)/NP) + ---------------------------------------------------------------->B + (S/NP) + --------------------------------------------------------------------------------> + (N\N) + ---------------------------------------------------------------------------------------< + N + -----------------------------------------------------------------------------------------------> + NP + ------------------------------------------------------------------------------------------------------------> + (S\NP) + ------------------------------------------------------------------------------------------------------------------< + S + + +Conjunction +----------- + + >>> from nltk.ccg.chart import CCGChartParser, ApplicationRuleSet, CompositionRuleSet + >>> from nltk.ccg.chart import SubstitutionRuleSet, TypeRaiseRuleSet, printCCGDerivation + >>> from nltk.ccg import lexicon + +Lexicons for the tests: + + >>> test1_lex = ''' + ... :- S,N,NP,VP + ... I => NP + ... you => NP + ... will => S\\NP/VP + ... cook => VP/NP + ... which => (N\\N)/(S/NP) + ... and => var\\.,var/.,var + ... might => S\\NP/VP + ... eat => VP/NP + ... the => NP/N + ... mushrooms => N + ... parsnips => N''' + >>> test2_lex = ''' + ... :- N, S, NP, VP + ... articles => N + ... the => NP/N + ... and => var\\.,var/.,var + ... which => (N\\N)/(S/NP) + ... I => NP + ... anyone => NP + ... will => (S/VP)\\NP + ... file => VP/NP + ... without => (VP\\VP)/VP[ing] + ... forget => VP/NP + ... reading => VP[ing]/NP + ... ''' + +Tests handling of conjunctions. +Note that while the two derivations are different, they are semantically equivalent. + + >>> lex = lexicon.parseLexicon(test1_lex) + >>> parser = CCGChartParser(lex, ApplicationRuleSet + CompositionRuleSet + SubstitutionRuleSet) + >>> for parse in parser.parse("I will cook and might eat the mushrooms and parsnips".split()): + ... printCCGDerivation(parse) + I will cook and might eat the mushrooms and parsnips + NP ((S\NP)/VP) (VP/NP) ((_var0\.,_var0)/.,_var0) ((S\NP)/VP) (VP/NP) (NP/N) N ((_var0\.,_var0)/.,_var0) N + ---------------------->B + ((S\NP)/NP) + ---------------------->B + ((S\NP)/NP) + -------------------------------------------------> + (((S\NP)/NP)\.,((S\NP)/NP)) + -----------------------------------------------------------------------< + ((S\NP)/NP) + -------------------------------------> + (N\.,N) + ------------------------------------------------< + N + --------------------------------------------------------> + NP + -------------------------------------------------------------------------------------------------------------------------------> + (S\NP) + -----------------------------------------------------------------------------------------------------------------------------------< + S + I will cook and might eat the mushrooms and parsnips + NP ((S\NP)/VP) (VP/NP) ((_var0\.,_var0)/.,_var0) ((S\NP)/VP) (VP/NP) (NP/N) N ((_var0\.,_var0)/.,_var0) N + ---------------------->B + ((S\NP)/NP) + ---------------------->B + ((S\NP)/NP) + -------------------------------------------------> + (((S\NP)/NP)\.,((S\NP)/NP)) + -----------------------------------------------------------------------< + ((S\NP)/NP) + ------------------------------------------------------------------------------->B + ((S\NP)/N) + -------------------------------------> + (N\.,N) + ------------------------------------------------< + N + -------------------------------------------------------------------------------------------------------------------------------> + (S\NP) + -----------------------------------------------------------------------------------------------------------------------------------< + S + + +Tests handling subject extraction. +Interesting to point that the two parses are clearly semantically different. + + >>> lex = lexicon.parseLexicon(test2_lex) + >>> parser = CCGChartParser(lex, ApplicationRuleSet + CompositionRuleSet + SubstitutionRuleSet) + >>> for parse in parser.parse("articles which I will file and forget without reading".split()): + ... printCCGDerivation(parse) + articles which I will file and forget without reading + N ((N\N)/(S/NP)) NP ((S/VP)\NP) (VP/NP) ((_var0\.,_var0)/.,_var0) (VP/NP) ((VP\VP)/VP['ing']) (VP['ing']/NP) + -----------------< + (S/VP) + ------------------------------------->B + ((VP\VP)/NP) + ---------------------------------------------- + ((VP/NP)\.,(VP/NP)) + ----------------------------------------------------------------------------------< + (VP/NP) + --------------------------------------------------------------------------------------------------->B + (S/NP) + -------------------------------------------------------------------------------------------------------------------> + (N\N) + -----------------------------------------------------------------------------------------------------------------------------< + N + articles which I will file and forget without reading + N ((N\N)/(S/NP)) NP ((S/VP)\NP) (VP/NP) ((_var0\.,_var0)/.,_var0) (VP/NP) ((VP\VP)/VP['ing']) (VP['ing']/NP) + -----------------< + (S/VP) + ------------------------------------> + ((VP/NP)\.,(VP/NP)) + ---------------------------------------------< + (VP/NP) + ------------------------------------->B + ((VP\VP)/NP) + ----------------------------------------------------------------------------------B + (S/NP) + -------------------------------------------------------------------------------------------------------------------> + (N\N) + -----------------------------------------------------------------------------------------------------------------------------< + N + + +Unicode support +--------------- + +Unicode words are supported. + + >>> from nltk.ccg import chart, lexicon + +Lexicons for the tests: + + >>> lex = lexicon.parseLexicon(u''' + ... :- S, N, NP, PP + ... + ... AdjI :: N\\N + ... AdjD :: N/N + ... AdvD :: S/S + ... AdvI :: S\\S + ... Det :: NP/N + ... PrepNPCompl :: PP/NP + ... PrepNAdjN :: S\\S/N + ... PrepNAdjNP :: S\\S/NP + ... VPNP :: S\\NP/NP + ... VPPP :: S\\NP/PP + ... VPser :: S\\NP/AdjI + ... + ... auto => N + ... bebidas => N + ... cine => N + ... ley => N + ... libro => N + ... ministro => N + ... panadería => N + ... presidente => N + ... super => N + ... + ... el => Det + ... la => Det + ... las => Det + ... un => Det + ... + ... Ana => NP + ... Pablo => NP + ... + ... y => var\\.,var/.,var + ... + ... pero => (S/NP)\\(S/NP)/(S/NP) + ... + ... anunció => VPNP + ... compró => VPNP + ... cree => S\\NP/S[dep] + ... desmintió => VPNP + ... lee => VPNP + ... fueron => VPPP + ... + ... es => VPser + ... + ... interesante => AdjD + ... interesante => AdjI + ... nueva => AdjD + ... nueva => AdjI + ... + ... a => PrepNPCompl + ... en => PrepNAdjN + ... en => PrepNAdjNP + ... + ... ayer => AdvI + ... + ... que => (NP\\NP)/(S/NP) + ... que => S[dep]/S + ... ''') + + >>> parser = chart.CCGChartParser(lex, chart.DefaultRuleSet) + >>> for parse in parser.parse(u"el ministro anunció pero el presidente desmintió la nueva ley".split()): + ... printCCGDerivation(parse) # doctest: +SKIP + ... # it fails on python2.7 because of the unicode problem explained in https://github.com/nltk/nltk/pull/1354 + ... break + el ministro anunció pero el presidente desmintió la nueva ley + (NP/N) N ((S\NP)/NP) (((S/NP)\(S/NP))/(S/NP)) (NP/N) N ((S\NP)/NP) (NP/N) (N/N) N + ------------------> + NP + ------------------>T + (S/(S\NP)) + --------------------> + NP + -------------------->T + (S/(S\NP)) + --------------------------------->B + (S/NP) + -----------------------------------------------------------> + ((S/NP)\(S/NP)) + ------------> + N + --------------------> + NP + -------------------- + S diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/ccg_semantics.doctest b/venv.bak/lib/python3.7/site-packages/nltk/test/ccg_semantics.doctest new file mode 100644 index 0000000..ce62733 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/test/ccg_semantics.doctest @@ -0,0 +1,553 @@ +.. Copyright (C) 2001-2019 NLTK Project +.. For license information, see LICENSE.TXT + +============================================== +Combinatory Categorial Grammar with semantics +============================================== + +----- +Chart +----- + + + >>> from nltk.ccg import chart, lexicon + >>> from nltk.ccg.chart import printCCGDerivation + +No semantics +------------------- + + >>> lex = lexicon.fromstring(''' + ... :- S, NP, N + ... She => NP + ... has => (S\\NP)/NP + ... books => NP + ... ''', + ... False) + + >>> parser = chart.CCGChartParser(lex, chart.DefaultRuleSet) + >>> parses = list(parser.parse("She has books".split())) + >>> print(str(len(parses)) + " parses") + 3 parses + + >>> printCCGDerivation(parses[0]) + She has books + NP ((S\NP)/NP) NP + --------------------> + (S\NP) + -------------------------< + S + + >>> printCCGDerivation(parses[1]) + She has books + NP ((S\NP)/NP) NP + ----->T + (S/(S\NP)) + --------------------> + (S\NP) + -------------------------> + S + + + >>> printCCGDerivation(parses[2]) + She has books + NP ((S\NP)/NP) NP + ----->T + (S/(S\NP)) + ------------------>B + (S/NP) + -------------------------> + S + +Simple semantics +------------------- + + >>> lex = lexicon.fromstring(''' + ... :- S, NP, N + ... She => NP {she} + ... has => (S\\NP)/NP {\\x y.have(y, x)} + ... a => NP/N {\\P.exists z.P(z)} + ... book => N {book} + ... ''', + ... True) + + >>> parser = chart.CCGChartParser(lex, chart.DefaultRuleSet) + >>> parses = list(parser.parse("She has a book".split())) + >>> print(str(len(parses)) + " parses") + 7 parses + + >>> printCCGDerivation(parses[0]) + She has a book + NP {she} ((S\NP)/NP) {\x y.have(y,x)} (NP/N) {\P.exists z.P(z)} N {book} + -------------------------------------> + NP {exists z.book(z)} + -------------------------------------------------------------------> + (S\NP) {\y.have(y,exists z.book(z))} + -----------------------------------------------------------------------------< + S {have(she,exists z.book(z))} + + >>> printCCGDerivation(parses[1]) + She has a book + NP {she} ((S\NP)/NP) {\x y.have(y,x)} (NP/N) {\P.exists z.P(z)} N {book} + --------------------------------------------------------->B + ((S\NP)/N) {\P y.have(y,exists z.P(z))} + -------------------------------------------------------------------> + (S\NP) {\y.have(y,exists z.book(z))} + -----------------------------------------------------------------------------< + S {have(she,exists z.book(z))} + + >>> printCCGDerivation(parses[2]) + She has a book + NP {she} ((S\NP)/NP) {\x y.have(y,x)} (NP/N) {\P.exists z.P(z)} N {book} + ---------->T + (S/(S\NP)) {\F.F(she)} + -------------------------------------> + NP {exists z.book(z)} + -------------------------------------------------------------------> + (S\NP) {\y.have(y,exists z.book(z))} + -----------------------------------------------------------------------------> + S {have(she,exists z.book(z))} + + >>> printCCGDerivation(parses[3]) + She has a book + NP {she} ((S\NP)/NP) {\x y.have(y,x)} (NP/N) {\P.exists z.P(z)} N {book} + ---------->T + (S/(S\NP)) {\F.F(she)} + --------------------------------------------------------->B + ((S\NP)/N) {\P y.have(y,exists z.P(z))} + -------------------------------------------------------------------> + (S\NP) {\y.have(y,exists z.book(z))} + -----------------------------------------------------------------------------> + S {have(she,exists z.book(z))} + + >>> printCCGDerivation(parses[4]) + She has a book + NP {she} ((S\NP)/NP) {\x y.have(y,x)} (NP/N) {\P.exists z.P(z)} N {book} + ---------->T + (S/(S\NP)) {\F.F(she)} + ---------------------------------------->B + (S/NP) {\x.have(she,x)} + -------------------------------------> + NP {exists z.book(z)} + -----------------------------------------------------------------------------> + S {have(she,exists z.book(z))} + + >>> printCCGDerivation(parses[5]) + She has a book + NP {she} ((S\NP)/NP) {\x y.have(y,x)} (NP/N) {\P.exists z.P(z)} N {book} + ---------->T + (S/(S\NP)) {\F.F(she)} + --------------------------------------------------------->B + ((S\NP)/N) {\P y.have(y,exists z.P(z))} + ------------------------------------------------------------------->B + (S/N) {\P.have(she,exists z.P(z))} + -----------------------------------------------------------------------------> + S {have(she,exists z.book(z))} + + >>> printCCGDerivation(parses[6]) + She has a book + NP {she} ((S\NP)/NP) {\x y.have(y,x)} (NP/N) {\P.exists z.P(z)} N {book} + ---------->T + (S/(S\NP)) {\F.F(she)} + ---------------------------------------->B + (S/NP) {\x.have(she,x)} + ------------------------------------------------------------------->B + (S/N) {\P.have(she,exists z.P(z))} + -----------------------------------------------------------------------------> + S {have(she,exists z.book(z))} + +Complex semantics +------------------- + + >>> lex = lexicon.fromstring(''' + ... :- S, NP, N + ... She => NP {she} + ... has => (S\\NP)/NP {\\x y.have(y, x)} + ... a => ((S\\NP)\\((S\\NP)/NP))/N {\\P R x.(exists z.P(z) & R(z,x))} + ... book => N {book} + ... ''', + ... True) + + >>> parser = chart.CCGChartParser(lex, chart.DefaultRuleSet) + >>> parses = list(parser.parse("She has a book".split())) + >>> print(str(len(parses)) + " parses") + 2 parses + + >>> printCCGDerivation(parses[0]) + She has a book + NP {she} ((S\NP)/NP) {\x y.have(y,x)} (((S\NP)\((S\NP)/NP))/N) {\P R x.(exists z.P(z) & R(z,x))} N {book} + ----------------------------------------------------------------------> + ((S\NP)\((S\NP)/NP)) {\R x.(exists z.book(z) & R(z,x))} + ----------------------------------------------------------------------------------------------------< + (S\NP) {\x.(exists z.book(z) & have(x,z))} + --------------------------------------------------------------------------------------------------------------< + S {(exists z.book(z) & have(she,z))} + + >>> printCCGDerivation(parses[1]) + She has a book + NP {she} ((S\NP)/NP) {\x y.have(y,x)} (((S\NP)\((S\NP)/NP))/N) {\P R x.(exists z.P(z) & R(z,x))} N {book} + ---------->T + (S/(S\NP)) {\F.F(she)} + ----------------------------------------------------------------------> + ((S\NP)\((S\NP)/NP)) {\R x.(exists z.book(z) & R(z,x))} + ----------------------------------------------------------------------------------------------------< + (S\NP) {\x.(exists z.book(z) & have(x,z))} + --------------------------------------------------------------------------------------------------------------> + S {(exists z.book(z) & have(she,z))} + +Using conjunctions +--------------------- + + # TODO: The semantics of "and" should have been more flexible + >>> lex = lexicon.fromstring(''' + ... :- S, NP, N + ... I => NP {I} + ... cook => (S\\NP)/NP {\\x y.cook(x,y)} + ... and => var\\.,var/.,var {\\P Q x y.(P(x,y) & Q(x,y))} + ... eat => (S\\NP)/NP {\\x y.eat(x,y)} + ... the => NP/N {\\x.the(x)} + ... bacon => N {bacon} + ... ''', + ... True) + + >>> parser = chart.CCGChartParser(lex, chart.DefaultRuleSet) + >>> parses = list(parser.parse("I cook and eat the bacon".split())) + >>> print(str(len(parses)) + " parses") + 7 parses + + >>> printCCGDerivation(parses[0]) + I cook and eat the bacon + NP {I} ((S\NP)/NP) {\x y.cook(x,y)} ((_var0\.,_var0)/.,_var0) {\P Q x y.(P(x,y) & Q(x,y))} ((S\NP)/NP) {\x y.eat(x,y)} (NP/N) {\x.the(x)} N {bacon} + -------------------------------------------------------------------------------------> + (((S\NP)/NP)\.,((S\NP)/NP)) {\Q x y.(eat(x,y) & Q(x,y))} + -------------------------------------------------------------------------------------------------------------------< + ((S\NP)/NP) {\x y.(eat(x,y) & cook(x,y))} + -------------------------------> + NP {the(bacon)} + --------------------------------------------------------------------------------------------------------------------------------------------------> + (S\NP) {\y.(eat(the(bacon),y) & cook(the(bacon),y))} + ----------------------------------------------------------------------------------------------------------------------------------------------------------< + S {(eat(the(bacon),I) & cook(the(bacon),I))} + + >>> printCCGDerivation(parses[1]) + I cook and eat the bacon + NP {I} ((S\NP)/NP) {\x y.cook(x,y)} ((_var0\.,_var0)/.,_var0) {\P Q x y.(P(x,y) & Q(x,y))} ((S\NP)/NP) {\x y.eat(x,y)} (NP/N) {\x.the(x)} N {bacon} + -------------------------------------------------------------------------------------> + (((S\NP)/NP)\.,((S\NP)/NP)) {\Q x y.(eat(x,y) & Q(x,y))} + -------------------------------------------------------------------------------------------------------------------< + ((S\NP)/NP) {\x y.(eat(x,y) & cook(x,y))} + --------------------------------------------------------------------------------------------------------------------------------------->B + ((S\NP)/N) {\x y.(eat(the(x),y) & cook(the(x),y))} + --------------------------------------------------------------------------------------------------------------------------------------------------> + (S\NP) {\y.(eat(the(bacon),y) & cook(the(bacon),y))} + ----------------------------------------------------------------------------------------------------------------------------------------------------------< + S {(eat(the(bacon),I) & cook(the(bacon),I))} + + >>> printCCGDerivation(parses[2]) + I cook and eat the bacon + NP {I} ((S\NP)/NP) {\x y.cook(x,y)} ((_var0\.,_var0)/.,_var0) {\P Q x y.(P(x,y) & Q(x,y))} ((S\NP)/NP) {\x y.eat(x,y)} (NP/N) {\x.the(x)} N {bacon} + -------->T + (S/(S\NP)) {\F.F(I)} + -------------------------------------------------------------------------------------> + (((S\NP)/NP)\.,((S\NP)/NP)) {\Q x y.(eat(x,y) & Q(x,y))} + -------------------------------------------------------------------------------------------------------------------< + ((S\NP)/NP) {\x y.(eat(x,y) & cook(x,y))} + -------------------------------> + NP {the(bacon)} + --------------------------------------------------------------------------------------------------------------------------------------------------> + (S\NP) {\y.(eat(the(bacon),y) & cook(the(bacon),y))} + ----------------------------------------------------------------------------------------------------------------------------------------------------------> + S {(eat(the(bacon),I) & cook(the(bacon),I))} + + >>> printCCGDerivation(parses[3]) + I cook and eat the bacon + NP {I} ((S\NP)/NP) {\x y.cook(x,y)} ((_var0\.,_var0)/.,_var0) {\P Q x y.(P(x,y) & Q(x,y))} ((S\NP)/NP) {\x y.eat(x,y)} (NP/N) {\x.the(x)} N {bacon} + -------->T + (S/(S\NP)) {\F.F(I)} + -------------------------------------------------------------------------------------> + (((S\NP)/NP)\.,((S\NP)/NP)) {\Q x y.(eat(x,y) & Q(x,y))} + -------------------------------------------------------------------------------------------------------------------< + ((S\NP)/NP) {\x y.(eat(x,y) & cook(x,y))} + --------------------------------------------------------------------------------------------------------------------------------------->B + ((S\NP)/N) {\x y.(eat(the(x),y) & cook(the(x),y))} + --------------------------------------------------------------------------------------------------------------------------------------------------> + (S\NP) {\y.(eat(the(bacon),y) & cook(the(bacon),y))} + ----------------------------------------------------------------------------------------------------------------------------------------------------------> + S {(eat(the(bacon),I) & cook(the(bacon),I))} + + >>> printCCGDerivation(parses[4]) + I cook and eat the bacon + NP {I} ((S\NP)/NP) {\x y.cook(x,y)} ((_var0\.,_var0)/.,_var0) {\P Q x y.(P(x,y) & Q(x,y))} ((S\NP)/NP) {\x y.eat(x,y)} (NP/N) {\x.the(x)} N {bacon} + -------->T + (S/(S\NP)) {\F.F(I)} + -------------------------------------------------------------------------------------> + (((S\NP)/NP)\.,((S\NP)/NP)) {\Q x y.(eat(x,y) & Q(x,y))} + -------------------------------------------------------------------------------------------------------------------< + ((S\NP)/NP) {\x y.(eat(x,y) & cook(x,y))} + --------------------------------------------------------------------------------------------------------------------------->B + (S/NP) {\x.(eat(x,I) & cook(x,I))} + -------------------------------> + NP {the(bacon)} + ----------------------------------------------------------------------------------------------------------------------------------------------------------> + S {(eat(the(bacon),I) & cook(the(bacon),I))} + + >>> printCCGDerivation(parses[5]) + I cook and eat the bacon + NP {I} ((S\NP)/NP) {\x y.cook(x,y)} ((_var0\.,_var0)/.,_var0) {\P Q x y.(P(x,y) & Q(x,y))} ((S\NP)/NP) {\x y.eat(x,y)} (NP/N) {\x.the(x)} N {bacon} + -------->T + (S/(S\NP)) {\F.F(I)} + -------------------------------------------------------------------------------------> + (((S\NP)/NP)\.,((S\NP)/NP)) {\Q x y.(eat(x,y) & Q(x,y))} + -------------------------------------------------------------------------------------------------------------------< + ((S\NP)/NP) {\x y.(eat(x,y) & cook(x,y))} + --------------------------------------------------------------------------------------------------------------------------------------->B + ((S\NP)/N) {\x y.(eat(the(x),y) & cook(the(x),y))} + ----------------------------------------------------------------------------------------------------------------------------------------------->B + (S/N) {\x.(eat(the(x),I) & cook(the(x),I))} + ----------------------------------------------------------------------------------------------------------------------------------------------------------> + S {(eat(the(bacon),I) & cook(the(bacon),I))} + + >>> printCCGDerivation(parses[6]) + I cook and eat the bacon + NP {I} ((S\NP)/NP) {\x y.cook(x,y)} ((_var0\.,_var0)/.,_var0) {\P Q x y.(P(x,y) & Q(x,y))} ((S\NP)/NP) {\x y.eat(x,y)} (NP/N) {\x.the(x)} N {bacon} + -------->T + (S/(S\NP)) {\F.F(I)} + -------------------------------------------------------------------------------------> + (((S\NP)/NP)\.,((S\NP)/NP)) {\Q x y.(eat(x,y) & Q(x,y))} + -------------------------------------------------------------------------------------------------------------------< + ((S\NP)/NP) {\x y.(eat(x,y) & cook(x,y))} + --------------------------------------------------------------------------------------------------------------------------->B + (S/NP) {\x.(eat(x,I) & cook(x,I))} + ----------------------------------------------------------------------------------------------------------------------------------------------->B + (S/N) {\x.(eat(the(x),I) & cook(the(x),I))} + ----------------------------------------------------------------------------------------------------------------------------------------------------------> + S {(eat(the(bacon),I) & cook(the(bacon),I))} + +Tests from published papers +------------------------------ + +An example from "CCGbank: A Corpus of CCG Derivations and Dependency Structures Extracted from the Penn Treebank", Hockenmaier and Steedman, 2007, Page 359, https://www.aclweb.org/anthology/J/J07/J07-3004.pdf + + >>> lex = lexicon.fromstring(''' + ... :- S, NP + ... I => NP {I} + ... give => ((S\\NP)/NP)/NP {\\x y z.give(y,x,z)} + ... them => NP {them} + ... money => NP {money} + ... ''', + ... True) + + >>> parser = chart.CCGChartParser(lex, chart.DefaultRuleSet) + >>> parses = list(parser.parse("I give them money".split())) + >>> print(str(len(parses)) + " parses") + 3 parses + + >>> printCCGDerivation(parses[0]) + I give them money + NP {I} (((S\NP)/NP)/NP) {\x y z.give(y,x,z)} NP {them} NP {money} + --------------------------------------------------> + ((S\NP)/NP) {\y z.give(y,them,z)} + --------------------------------------------------------------> + (S\NP) {\z.give(money,them,z)} + ----------------------------------------------------------------------< + S {give(money,them,I)} + + >>> printCCGDerivation(parses[1]) + I give them money + NP {I} (((S\NP)/NP)/NP) {\x y z.give(y,x,z)} NP {them} NP {money} + -------->T + (S/(S\NP)) {\F.F(I)} + --------------------------------------------------> + ((S\NP)/NP) {\y z.give(y,them,z)} + --------------------------------------------------------------> + (S\NP) {\z.give(money,them,z)} + ----------------------------------------------------------------------> + S {give(money,them,I)} + + + >>> printCCGDerivation(parses[2]) + I give them money + NP {I} (((S\NP)/NP)/NP) {\x y z.give(y,x,z)} NP {them} NP {money} + -------->T + (S/(S\NP)) {\F.F(I)} + --------------------------------------------------> + ((S\NP)/NP) {\y z.give(y,them,z)} + ---------------------------------------------------------->B + (S/NP) {\y.give(y,them,I)} + ----------------------------------------------------------------------> + S {give(money,them,I)} + + +An example from "CCGbank: A Corpus of CCG Derivations and Dependency Structures Extracted from the Penn Treebank", Hockenmaier and Steedman, 2007, Page 359, https://www.aclweb.org/anthology/J/J07/J07-3004.pdf + + >>> lex = lexicon.fromstring(''' + ... :- N, NP, S + ... money => N {money} + ... that => (N\\N)/(S/NP) {\\P Q x.(P(x) & Q(x))} + ... I => NP {I} + ... give => ((S\\NP)/NP)/NP {\\x y z.give(y,x,z)} + ... them => NP {them} + ... ''', + ... True) + + >>> parser = chart.CCGChartParser(lex, chart.DefaultRuleSet) + >>> parses = list(parser.parse("money that I give them".split())) + >>> print(str(len(parses)) + " parses") + 3 parses + + >>> printCCGDerivation(parses[0]) + money that I give them + N {money} ((N\N)/(S/NP)) {\P Q x.(P(x) & Q(x))} NP {I} (((S\NP)/NP)/NP) {\x y z.give(y,x,z)} NP {them} + -------->T + (S/(S\NP)) {\F.F(I)} + --------------------------------------------------> + ((S\NP)/NP) {\y z.give(y,them,z)} + ---------------------------------------------------------->B + (S/NP) {\y.give(y,them,I)} + -------------------------------------------------------------------------------------------------> + (N\N) {\Q x.(give(x,them,I) & Q(x))} + ------------------------------------------------------------------------------------------------------------< + N {\x.(give(x,them,I) & money(x))} + + >>> printCCGDerivation(parses[1]) + money that I give them + N {money} ((N\N)/(S/NP)) {\P Q x.(P(x) & Q(x))} NP {I} (((S\NP)/NP)/NP) {\x y z.give(y,x,z)} NP {them} + ----------->T + (N/(N\N)) {\F.F(money)} + -------->T + (S/(S\NP)) {\F.F(I)} + --------------------------------------------------> + ((S\NP)/NP) {\y z.give(y,them,z)} + ---------------------------------------------------------->B + (S/NP) {\y.give(y,them,I)} + -------------------------------------------------------------------------------------------------> + (N\N) {\Q x.(give(x,them,I) & Q(x))} + ------------------------------------------------------------------------------------------------------------> + N {\x.(give(x,them,I) & money(x))} + + >>> printCCGDerivation(parses[2]) + money that I give them + N {money} ((N\N)/(S/NP)) {\P Q x.(P(x) & Q(x))} NP {I} (((S\NP)/NP)/NP) {\x y z.give(y,x,z)} NP {them} + ----------->T + (N/(N\N)) {\F.F(money)} + -------------------------------------------------->B + (N/(S/NP)) {\P x.(P(x) & money(x))} + -------->T + (S/(S\NP)) {\F.F(I)} + --------------------------------------------------> + ((S\NP)/NP) {\y z.give(y,them,z)} + ---------------------------------------------------------->B + (S/NP) {\y.give(y,them,I)} + ------------------------------------------------------------------------------------------------------------> + N {\x.(give(x,them,I) & money(x))} + + +------- +Lexicon +------- + + >>> from nltk.ccg import lexicon + +Parse lexicon with semantics + + >>> print(str(lexicon.fromstring( + ... ''' + ... :- S,NP + ... + ... IntransVsg :: S\\NP[sg] + ... + ... sleeps => IntransVsg {\\x.sleep(x)} + ... eats => S\\NP[sg]/NP {\\x y.eat(x,y)} + ... + ... and => var\\var/var {\\x y.x & y} + ... ''', + ... True + ... ))) + and => ((_var0\_var0)/_var0) {(\x y.x & y)} + eats => ((S\NP['sg'])/NP) {\x y.eat(x,y)} + sleeps => (S\NP['sg']) {\x.sleep(x)} + +Parse lexicon without semantics + + >>> print(str(lexicon.fromstring( + ... ''' + ... :- S,NP + ... + ... IntransVsg :: S\\NP[sg] + ... + ... sleeps => IntransVsg + ... eats => S\\NP[sg]/NP {sem=\\x y.eat(x,y)} + ... + ... and => var\\var/var + ... ''', + ... False + ... ))) + and => ((_var0\_var0)/_var0) + eats => ((S\NP['sg'])/NP) + sleeps => (S\NP['sg']) + +Semantics are missing + + >>> print(str(lexicon.fromstring( + ... ''' + ... :- S,NP + ... + ... eats => S\\NP[sg]/NP + ... ''', + ... True + ... ))) + Traceback (most recent call last): + ... + AssertionError: eats => S\NP[sg]/NP must contain semantics because include_semantics is set to True + + +------------------------------------ +CCG combinator semantics computation +------------------------------------ + + >>> from nltk.sem.logic import * + >>> from nltk.ccg.logic import * + + >>> read_expr = Expression.fromstring + +Compute semantics from function application + + >>> print(str(compute_function_semantics(read_expr(r'\x.P(x)'), read_expr(r'book')))) + P(book) + + >>> print(str(compute_function_semantics(read_expr(r'\P.P(book)'), read_expr(r'read')))) + read(book) + + >>> print(str(compute_function_semantics(read_expr(r'\P.P(book)'), read_expr(r'\x.read(x)')))) + read(book) + +Compute semantics from composition + + >>> print(str(compute_composition_semantics(read_expr(r'\x.P(x)'), read_expr(r'\x.Q(x)')))) + \x.P(Q(x)) + + >>> print(str(compute_composition_semantics(read_expr(r'\x.P(x)'), read_expr(r'read')))) + Traceback (most recent call last): + ... + AssertionError: `read` must be a lambda expression + +Compute semantics from substitution + + >>> print(str(compute_substitution_semantics(read_expr(r'\x y.P(x,y)'), read_expr(r'\x.Q(x)')))) + \x.P(x,Q(x)) + + >>> print(str(compute_substitution_semantics(read_expr(r'\x.P(x)'), read_expr(r'read')))) + Traceback (most recent call last): + ... + AssertionError: `\x.P(x)` must be a lambda expression with 2 arguments + +Compute type-raise semantics + + >>> print(str(compute_type_raised_semantics(read_expr(r'\x.P(x)')))) + \F x.F(P(x)) + + >>> print(str(compute_type_raised_semantics(read_expr(r'\x.F(x)')))) + \F1 x.F1(F(x)) + + >>> print(str(compute_type_raised_semantics(read_expr(r'\x y z.P(x,y,z)')))) + \F x y z.F(P(x,y,z)) + diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/chat80.doctest b/venv.bak/lib/python3.7/site-packages/nltk/test/chat80.doctest new file mode 100644 index 0000000..9efe693 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/test/chat80.doctest @@ -0,0 +1,234 @@ +.. Copyright (C) 2001-2019 NLTK Project +.. For license information, see LICENSE.TXT + +======= +Chat-80 +======= + +Chat-80 was a natural language system which allowed the user to +interrogate a Prolog knowledge base in the domain of world +geography. It was developed in the early '80s by Warren and Pereira; see +``_ for a description and +``_ for the source +files. + +The ``chat80`` module contains functions to extract data from the Chat-80 +relation files ('the world database'), and convert then into a format +that can be incorporated in the FOL models of +``nltk.sem.evaluate``. The code assumes that the Prolog +input files are available in the NLTK corpora directory. + +The Chat-80 World Database consists of the following files:: + + world0.pl + rivers.pl + cities.pl + countries.pl + contain.pl + borders.pl + +This module uses a slightly modified version of ``world0.pl``, in which +a set of Prolog rules have been omitted. The modified file is named +``world1.pl``. Currently, the file ``rivers.pl`` is not read in, since +it uses a list rather than a string in the second field. + +Reading Chat-80 Files +===================== + +Chat-80 relations are like tables in a relational database. The +relation acts as the name of the table; the first argument acts as the +'primary key'; and subsequent arguments are further fields in the +table. In general, the name of the table provides a label for a unary +predicate whose extension is all the primary keys. For example, +relations in ``cities.pl`` are of the following form:: + + 'city(athens,greece,1368).' + +Here, ``'athens'`` is the key, and will be mapped to a member of the +unary predicate *city*. + +By analogy with NLTK corpora, ``chat80`` defines a number of 'items' +which correspond to the relations. + + >>> from nltk.sem import chat80 + >>> print(chat80.items) # doctest: +ELLIPSIS + ('borders', 'circle_of_lat', 'circle_of_long', 'city', ...) + +The fields in the table are mapped to binary predicates. The first +argument of the predicate is the primary key, while the second +argument is the data in the relevant field. Thus, in the above +example, the third field is mapped to the binary predicate +*population_of*, whose extension is a set of pairs such as +``'(athens, 1368)'``. + +An exception to this general framework is required by the relations in +the files ``borders.pl`` and ``contains.pl``. These contain facts of the +following form:: + + 'borders(albania,greece).' + + 'contains0(africa,central_africa).' + +We do not want to form a unary concept out the element in +the first field of these records, and we want the label of the binary +relation just to be ``'border'``/``'contain'`` respectively. + +In order to drive the extraction process, we use 'relation metadata bundles' +which are Python dictionaries such as the following:: + + city = {'label': 'city', + 'closures': [], + 'schema': ['city', 'country', 'population'], + 'filename': 'cities.pl'} + +According to this, the file ``city['filename']`` contains a list of +relational tuples (or more accurately, the corresponding strings in +Prolog form) whose predicate symbol is ``city['label']`` and whose +relational schema is ``city['schema']``. The notion of a ``closure`` is +discussed in the next section. + +Concepts +======== +In order to encapsulate the results of the extraction, a class of +``Concept``\ s is introduced. A ``Concept`` object has a number of +attributes, in particular a ``prefLabel``, an arity and ``extension``. + + >>> c1 = chat80.Concept('dog', arity=1, extension=set(['d1', 'd2'])) + >>> print(c1) + Label = 'dog' + Arity = 1 + Extension = ['d1', 'd2'] + + + +The ``extension`` attribute makes it easier to inspect the output of +the extraction. + + >>> schema = ['city', 'country', 'population'] + >>> concepts = chat80.clause2concepts('cities.pl', 'city', schema) + >>> concepts + [Concept('city'), Concept('country_of'), Concept('population_of')] + >>> for c in concepts: # doctest: +NORMALIZE_WHITESPACE + ... print("%s:\n\t%s" % (c.prefLabel, c.extension[:4])) + city: + ['athens', 'bangkok', 'barcelona', 'berlin'] + country_of: + [('athens', 'greece'), ('bangkok', 'thailand'), ('barcelona', 'spain'), ('berlin', 'east_germany')] + population_of: + [('athens', '1368'), ('bangkok', '1178'), ('barcelona', '1280'), ('berlin', '3481')] + +In addition, the ``extension`` can be further +processed: in the case of the ``'border'`` relation, we check that the +relation is **symmetric**, and in the case of the ``'contain'`` +relation, we carry out the **transitive closure**. The closure +properties associated with a concept is indicated in the relation +metadata, as indicated earlier. + + >>> borders = set([('a1', 'a2'), ('a2', 'a3')]) + >>> c2 = chat80.Concept('borders', arity=2, extension=borders) + >>> print(c2) + Label = 'borders' + Arity = 2 + Extension = [('a1', 'a2'), ('a2', 'a3')] + >>> c3 = chat80.Concept('borders', arity=2, closures=['symmetric'], extension=borders) + >>> c3.close() + >>> print(c3) + Label = 'borders' + Arity = 2 + Extension = [('a1', 'a2'), ('a2', 'a1'), ('a2', 'a3'), ('a3', 'a2')] + +The ``extension`` of a ``Concept`` object is then incorporated into a +``Valuation`` object. + +Persistence +=========== +The functions ``val_dump`` and ``val_load`` are provided to allow a +valuation to be stored in a persistent database and re-loaded, rather +than having to be re-computed each time. + +Individuals and Lexical Items +============================= +As well as deriving relations from the Chat-80 data, we also create a +set of individual constants, one for each entity in the domain. The +individual constants are string-identical to the entities. For +example, given a data item such as ``'zloty'``, we add to the valuation +a pair ``('zloty', 'zloty')``. In order to parse English sentences that +refer to these entities, we also create a lexical item such as the +following for each individual constant:: + + PropN[num=sg, sem=<\P.(P zloty)>] -> 'Zloty' + +The set of rules is written to the file ``chat_pnames.fcfg`` in the +current directory. + +SQL Query +========= + +The ``city`` relation is also available in RDB form and can be queried +using SQL statements. + + >>> import nltk + >>> q = "SELECT City, Population FROM city_table WHERE Country = 'china' and Population > 1000" + >>> for answer in chat80.sql_query('corpora/city_database/city.db', q): + ... print("%-10s %4s" % answer) + canton 1496 + chungking 1100 + mukden 1551 + peking 2031 + shanghai 5407 + tientsin 1795 + +The (deliberately naive) grammar ``sql.fcfg`` translates from English +to SQL: + + >>> nltk.data.show_cfg('grammars/book_grammars/sql0.fcfg') + % start S + S[SEM=(?np + WHERE + ?vp)] -> NP[SEM=?np] VP[SEM=?vp] + VP[SEM=(?v + ?pp)] -> IV[SEM=?v] PP[SEM=?pp] + VP[SEM=(?v + ?ap)] -> IV[SEM=?v] AP[SEM=?ap] + NP[SEM=(?det + ?n)] -> Det[SEM=?det] N[SEM=?n] + PP[SEM=(?p + ?np)] -> P[SEM=?p] NP[SEM=?np] + AP[SEM=?pp] -> A[SEM=?a] PP[SEM=?pp] + NP[SEM='Country="greece"'] -> 'Greece' + NP[SEM='Country="china"'] -> 'China' + Det[SEM='SELECT'] -> 'Which' | 'What' + N[SEM='City FROM city_table'] -> 'cities' + IV[SEM=''] -> 'are' + A[SEM=''] -> 'located' + P[SEM=''] -> 'in' + +Given this grammar, we can express, and then execute, queries in English. + + >>> cp = nltk.parse.load_parser('grammars/book_grammars/sql0.fcfg') + >>> query = 'What cities are in China' + >>> for tree in cp.parse(query.split()): + ... answer = tree.label()['SEM'] + ... q = " ".join(answer) + ... print(q) + ... + SELECT City FROM city_table WHERE Country="china" + + >>> rows = chat80.sql_query('corpora/city_database/city.db', q) + >>> for r in rows: print("%s" % r, end=' ') + canton chungking dairen harbin kowloon mukden peking shanghai sian tientsin + + +Using Valuations +----------------- + +In order to convert such an extension into a valuation, we use the +``make_valuation()`` method; setting ``read=True`` creates and returns +a new ``Valuation`` object which contains the results. + + >>> val = chat80.make_valuation(concepts, read=True) + >>> 'calcutta' in val['city'] + True + >>> [town for (town, country) in val['country_of'] if country == 'india'] + ['bombay', 'calcutta', 'delhi', 'hyderabad', 'madras'] + >>> dom = val.domain + >>> g = nltk.sem.Assignment(dom) + >>> m = nltk.sem.Model(dom, val) + >>> m.evaluate(r'population_of(jakarta, 533)', g) + True + + diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/childes.doctest b/venv.bak/lib/python3.7/site-packages/nltk/test/childes.doctest new file mode 100644 index 0000000..7900c54 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/test/childes.doctest @@ -0,0 +1,184 @@ +======================= + CHILDES Corpus Readers +======================= + +Read the XML version of the CHILDES corpus. + +How to use CHILDESCorpusReader +============================== + +Read the CHILDESCorpusReader class and read the CHILDES corpus saved in +the nltk_data directory. + + >>> import nltk + >>> from nltk.corpus.reader import CHILDESCorpusReader + >>> corpus_root = nltk.data.find('corpora/childes/data-xml/Eng-USA-MOR/') + +Reading files in the Valian corpus (Valian, 1991). + + >>> valian = CHILDESCorpusReader(corpus_root, 'Valian/.*.xml') + >>> valian.fileids() + ['Valian/01a.xml', 'Valian/01b.xml', 'Valian/02a.xml', 'Valian/02b.xml',... + +Count the number of files + + >>> len(valian.fileids()) + 43 + +Printing properties of the corpus files. + + >>> corpus_data = valian.corpus(valian.fileids()) + >>> print(corpus_data[0]['Lang']) + eng + >>> for key in sorted(corpus_data[0].keys()): + ... print(key, ": ", corpus_data[0][key]) + Corpus : valian + Date : 1986-03-04 + Id : 01a + Lang : eng + Version : 2.0.1 + {http://www.w3.org/2001/XMLSchema-instance}schemaLocation : http://www.talkbank.org/ns/talkbank http://talkbank.org/software/talkbank.xsd + +Printing information of participants of the corpus. The most common codes for +the participants are 'CHI' (target child), 'MOT' (mother), and 'INV' (investigator). + + >>> corpus_participants = valian.participants(valian.fileids()) + >>> for this_corpus_participants in corpus_participants[:2]: + ... for key in sorted(this_corpus_participants.keys()): + ... dct = this_corpus_participants[key] + ... print(key, ": ", [(k, dct[k]) for k in sorted(dct.keys())]) + CHI : [('age', 'P2Y1M3D'), ('group', 'normal'), ('id', 'CHI'), ('language', 'eng'), ('role', 'Target_Child'), ('sex', 'female')] + INV : [('id', 'INV'), ('language', 'eng'), ('role', 'Investigator')] + MOT : [('id', 'MOT'), ('language', 'eng'), ('role', 'Mother')] + CHI : [('age', 'P2Y1M12D'), ('group', 'normal'), ('id', 'CHI'), ('language', 'eng'), ('role', 'Target_Child'), ('sex', 'female')] + INV : [('id', 'INV'), ('language', 'eng'), ('role', 'Investigator')] + MOT : [('id', 'MOT'), ('language', 'eng'), ('role', 'Mother')] + +printing words. + + >>> valian.words('Valian/01a.xml') + ['at', 'Parent', "Lastname's", 'house', 'with', 'Child', 'Lastname', ... + +printing sentences. + + >>> valian.sents('Valian/01a.xml') + [['at', 'Parent', "Lastname's", 'house', 'with', 'Child', 'Lastname', + 'and', 'it', 'is', 'March', 'fourth', 'I', 'believe', 'and', 'when', + 'was', "Parent's", 'birthday'], ["Child's"], ['oh', "I'm", 'sorry'], + ["that's", 'okay'], ... + +You can specify the participants with the argument *speaker*. + + >>> valian.words('Valian/01a.xml',speaker=['INV']) + ['at', 'Parent', "Lastname's", 'house', 'with', 'Child', 'Lastname', ... + >>> valian.words('Valian/01a.xml',speaker=['MOT']) + ["Child's", "that's", 'okay', 'February', 'first', 'nineteen', ... + >>> valian.words('Valian/01a.xml',speaker=['CHI']) + ['tape', 'it', 'up', 'and', 'two', 'tape', 'players', 'have',... + + +tagged_words() and tagged_sents() return the usual (word,pos) tuple lists. +POS tags in the CHILDES are automatically assigned by MOR and POST programs +(MacWhinney, 2000). + + >>> valian.tagged_words('Valian/01a.xml')[:30] + [('at', 'prep'), ('Parent', 'n:prop'), ("Lastname's", 'n:prop'), ('house', 'n'), + ('with', 'prep'), ('Child', 'n:prop'), ('Lastname', 'n:prop'), ('and', 'coord'), + ('it', 'pro'), ('is', 'v:cop'), ('March', 'n:prop'), ('fourth', 'adj'), + ('I', 'pro:sub'), ('believe', 'v'), ('and', 'coord'), ('when', 'adv:wh'), + ('was', 'v:cop'), ("Parent's", 'n:prop'), ('birthday', 'n'), ("Child's", 'n:prop'), + ('oh', 'co'), ("I'm", 'pro:sub'), ('sorry', 'adj'), ("that's", 'pro:dem'), + ('okay', 'adj'), ('February', 'n:prop'), ('first', 'adj'), + ('nineteen', 'det:num'), ('eighty', 'det:num'), ('four', 'det:num')] + + >>> valian.tagged_sents('Valian/01a.xml')[:10] + [[('at', 'prep'), ('Parent', 'n:prop'), ("Lastname's", 'n:prop'), ('house', 'n'), + ('with', 'prep'), ('Child', 'n:prop'), ('Lastname', 'n:prop'), ('and', 'coord'), + ('it', 'pro'), ('is', 'v:cop'), ('March', 'n:prop'), ('fourth', 'adj'), + ('I', 'pro:sub'), ('believe', 'v'), ('and', 'coord'), ('when', 'adv:wh'), + ('was', 'v:cop'), ("Parent's", 'n:prop'), ('birthday', 'n')], + [("Child's", 'n:prop')], [('oh', 'co'), ("I'm", 'pro:sub'), ('sorry', 'adj')], + [("that's", 'pro:dem'), ('okay', 'adj')], + [('February', 'n:prop'), ('first', 'adj'), ('nineteen', 'det:num'), + ('eighty', 'det:num'), ('four', 'det:num')], + [('great', 'adj')], + [('and', 'coord'), ("she's", 'pro:sub'), ('two', 'det:num'), ('years', 'n'), ('old', 'adj')], + [('correct', 'adj')], + [('okay', 'co')], [('she', 'pro:sub'), ('just', 'adv:int'), ('turned', 'part'), ('two', 'det:num'), + ('a', 'det'), ('month', 'n'), ('ago', 'adv')]] + +When the argument *stem* is true, the word stems (e.g., 'is' -> 'be-3PS') are +used instread of the original words. + + >>> valian.words('Valian/01a.xml')[:30] + ['at', 'Parent', "Lastname's", 'house', 'with', 'Child', 'Lastname', 'and', 'it', 'is', ... + >>> valian.words('Valian/01a.xml',stem=True)[:30] + ['at', 'Parent', 'Lastname', 's', 'house', 'with', 'Child', 'Lastname', 'and', 'it', 'be-3S', ... + +When the argument *replace* is true, the replaced words are used instread of +the original words. + + >>> valian.words('Valian/01a.xml',speaker='CHI')[247] + 'tikteat' + >>> valian.words('Valian/01a.xml',speaker='CHI',replace=True)[247] + 'trick' + +When the argument *relation* is true, the relational relationships in the +sentence are returned. See Sagae et al. (2010) for details of the relational +structure adopted in the CHILDES. + + >>> valian.words('Valian/01a.xml',relation=True)[:10] + [[('at', 'prep', '1|0|ROOT'), ('Parent', 'n', '2|5|VOC'), ('Lastname', 'n', '3|5|MOD'), ('s', 'poss', '4|5|MOD'), ('house', 'n', '5|1|POBJ'), ('with', 'prep', '6|1|JCT'), ('Child', 'n', '7|8|NAME'), ('Lastname', 'n', '8|6|POBJ'), ('and', 'coord', '9|8|COORD'), ('it', 'pro', '10|11|SUBJ'), ('be-3S', 'v', '11|9|COMP'), ('March', 'n', '12|11|PRED'), ('fourth', 'adj', '13|12|MOD'), ('I', 'pro', '15|16|SUBJ'), ('believe', 'v', '16|14|ROOT'), ('and', 'coord', '18|17|ROOT'), ('when', 'adv', '19|20|PRED'), ('be-PAST', 'v', '20|18|COMP'), ('Parent', 'n', '21|23|MOD'), ('s', 'poss', '22|23|MOD'), ('birth', 'n', '23|20|SUBJ')], [('Child', 'n', '1|2|MOD'), ('s', 'poss', '2|0|ROOT')], [('oh', 'co', '1|4|COM'), ('I', 'pro', '3|4|SUBJ'), ('be', 'v', '4|0|ROOT'), ('sorry', 'adj', '5|4|PRED')], [('that', 'pro', '1|2|SUBJ'), ('be', 'v', '2|0|ROOT'), ('okay', 'adj', '3|2|PRED')], [('February', 'n', '1|6|VOC'), ('first', 'adj', '2|6|ENUM'), ('nineteen', 'det', '4|6|ENUM'), ('eighty', 'det', '5|6|ENUM'), ('four', 'det', '6|0|ROOT')], [('great', 'adj', '1|0|ROOT')], [('and', 'coord', '1|0|ROOT'), ('she', 'pro', '2|1|ROOT'), ('be', 'aux', '3|5|AUX'), ('two', 'det', '4|5|QUANT'), ('year-PL', 'n', '5|2|ROOT'), ('old', 'adj', '6|5|MOD')], [('correct', 'adj', '1|0|ROOT')], [('okay', 'co', '1|0|ROOT')], [('she', 'pro', '1|0|ROOT'), ('just', 'adv', '2|3|JCT'), ('turn-PERF', 'part', '3|1|XCOMP'), ('two', 'det', '4|6|QUANT'), ('a', 'det', '5|6|DET'), ('month', 'n', '6|3|OBJ'), ('ago', 'adv', '7|3|JCT')]] + +Printing age. When the argument *month* is true, the age information in +the CHILDES format is converted into the number of months. + + >>> valian.age() + ['P2Y1M3D', 'P2Y1M12D', 'P1Y9M21D', 'P1Y9M28D', 'P2Y1M23D', ... + >>> valian.age('Valian/01a.xml') + ['P2Y1M3D'] + >>> valian.age('Valian/01a.xml',month=True) + [25] + +Printing MLU. The criteria for the MLU computation is broadly based on +Brown (1973). + + >>> valian.MLU() + [2.3574660633484..., 2.292682926829..., 3.492857142857..., 2.961783439490..., + 2.0842696629213..., 3.169811320754..., 3.137404580152..., 3.0578034682080..., + 4.090163934426..., 3.488372093023..., 2.8773584905660..., 3.4792899408284..., + 4.0111940298507..., 3.456790123456..., 4.487603305785..., 4.007936507936..., + 5.25, 5.154696132596..., ...] + + >>> valian.MLU('Valian/01a.xml') + [2.35746606334...] + + +Basic stuff +============================== + +Count the number of words and sentences of each file. + + >>> valian = CHILDESCorpusReader(corpus_root, 'Valian/.*.xml') + >>> for this_file in valian.fileids()[:6]: + ... print(valian.corpus(this_file)[0]['Corpus'], valian.corpus(this_file)[0]['Id']) + ... print("num of words: %i" % len(valian.words(this_file))) + ... print("num of sents: %i" % len(valian.sents(this_file))) + valian 01a + num of words: 3606 + num of sents: 1027 + valian 01b + num of words: 4376 + num of sents: 1274 + valian 02a + num of words: 2673 + num of sents: 801 + valian 02b + num of words: 5020 + num of sents: 1583 + valian 03a + num of words: 2743 + num of sents: 988 + valian 03b + num of words: 4409 + num of sents: 1397 diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/childes_fixt.py b/venv.bak/lib/python3.7/site-packages/nltk/test/childes_fixt.py new file mode 100644 index 0000000..04701fb --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/test/childes_fixt.py @@ -0,0 +1,17 @@ +# -*- coding: utf-8 -*- +from __future__ import absolute_import + + +def setup_module(module): + from nose import SkipTest + import nltk.data + + try: + nltk.data.find('corpora/childes/data-xml/Eng-USA-MOR/') + except LookupError as e: + print(e) + raise SkipTest( + "The CHILDES corpus is not found. " + "It should be manually downloaded and saved/unpacked " + "to [NLTK_Data_Dir]/corpora/childes/" + ) diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/chunk.doctest b/venv.bak/lib/python3.7/site-packages/nltk/test/chunk.doctest new file mode 100644 index 0000000..6fd2ad7 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/test/chunk.doctest @@ -0,0 +1,373 @@ +.. Copyright (C) 2001-2019 NLTK Project +.. For license information, see LICENSE.TXT + +========== + Chunking +========== + + >>> from nltk.chunk import * + >>> from nltk.chunk.util import * + >>> from nltk.chunk.regexp import * + >>> from nltk import Tree + + >>> tagged_text = "[ The/DT cat/NN ] sat/VBD on/IN [ the/DT mat/NN ] [ the/DT dog/NN ] chewed/VBD ./." + >>> gold_chunked_text = tagstr2tree(tagged_text) + >>> unchunked_text = gold_chunked_text.flatten() + +Chunking uses a special regexp syntax for rules that delimit the chunks. These +rules must be converted to 'regular' regular expressions before a sentence can +be chunked. + + >>> tag_pattern = "
    ?*" + >>> regexp_pattern = tag_pattern2re_pattern(tag_pattern) + >>> regexp_pattern + '(<(DT)>)?(<(JJ)>)*(<(NN[^\\{\\}<>]*)>)' + +Construct some new chunking rules. + + >>> chunk_rule = ChunkRule("<.*>+", "Chunk everything") + >>> chink_rule = ChinkRule("", "Chink on verbs/prepositions") + >>> split_rule = SplitRule("
    ", "
    ", + ... "Split successive determiner/noun pairs") + + +Create and score a series of chunk parsers, successively more complex. + + >>> chunk_parser = RegexpChunkParser([chunk_rule], chunk_label='NP') + >>> chunked_text = chunk_parser.parse(unchunked_text) + >>> print(chunked_text) + (S + (NP + The/DT + cat/NN + sat/VBD + on/IN + the/DT + mat/NN + the/DT + dog/NN + chewed/VBD + ./.)) + + >>> chunkscore = ChunkScore() + >>> chunkscore.score(gold_chunked_text, chunked_text) + >>> print(chunkscore.precision()) + 0.0 + + >>> print(chunkscore.recall()) + 0.0 + + >>> print(chunkscore.f_measure()) + 0 + + >>> for chunk in sorted(chunkscore.missed()): print(chunk) + (NP The/DT cat/NN) + (NP the/DT dog/NN) + (NP the/DT mat/NN) + + >>> for chunk in chunkscore.incorrect(): print(chunk) + (NP + The/DT + cat/NN + sat/VBD + on/IN + the/DT + mat/NN + the/DT + dog/NN + chewed/VBD + ./.) + + >>> chunk_parser = RegexpChunkParser([chunk_rule, chink_rule], + ... chunk_label='NP') + >>> chunked_text = chunk_parser.parse(unchunked_text) + >>> print(chunked_text) + (S + (NP The/DT cat/NN) + sat/VBD + on/IN + (NP the/DT mat/NN the/DT dog/NN) + chewed/VBD + ./.) + >>> assert chunked_text == chunk_parser.parse(list(unchunked_text)) + + >>> chunkscore = ChunkScore() + >>> chunkscore.score(gold_chunked_text, chunked_text) + >>> chunkscore.precision() + 0.5 + + >>> print(chunkscore.recall()) + 0.33333333... + + >>> print(chunkscore.f_measure()) + 0.4 + + >>> for chunk in sorted(chunkscore.missed()): print(chunk) + (NP the/DT dog/NN) + (NP the/DT mat/NN) + + >>> for chunk in chunkscore.incorrect(): print(chunk) + (NP the/DT mat/NN the/DT dog/NN) + + >>> chunk_parser = RegexpChunkParser([chunk_rule, chink_rule, split_rule], + ... chunk_label='NP') + >>> chunked_text = chunk_parser.parse(unchunked_text, trace=True) + # Input: +
    <.> + # Chunk everything: + {
    <.>} + # Chink on verbs/prepositions: + {
    } {
    } <.> + # Split successive determiner/noun pairs: + {
    } {
    }{
    } <.> + >>> print(chunked_text) + (S + (NP The/DT cat/NN) + sat/VBD + on/IN + (NP the/DT mat/NN) + (NP the/DT dog/NN) + chewed/VBD + ./.) + + >>> chunkscore = ChunkScore() + >>> chunkscore.score(gold_chunked_text, chunked_text) + >>> chunkscore.precision() + 1.0 + + >>> chunkscore.recall() + 1.0 + + >>> chunkscore.f_measure() + 1.0 + + >>> chunkscore.missed() + [] + + >>> chunkscore.incorrect() + [] + + >>> chunk_parser.rules() # doctest: +NORMALIZE_WHITESPACE + [+'>, '>, + ', '
    '>] + +Printing parsers: + + >>> print(repr(chunk_parser)) + + >>> print(chunk_parser) + RegexpChunkParser with 3 rules: + Chunk everything + +'> + Chink on verbs/prepositions + '> + Split successive determiner/noun pairs + ', '
    '> + +Regression Tests +~~~~~~~~~~~~~~~~ +ChunkParserI +------------ +`ChunkParserI` is an abstract interface -- it is not meant to be +instantiated directly. + + >>> ChunkParserI().parse([]) + Traceback (most recent call last): + . . . + NotImplementedError + + +ChunkString +----------- +ChunkString can be built from a tree of tagged tuples, a tree of +trees, or a mixed list of both: + + >>> t1 = Tree('S', [('w%d' % i, 't%d' % i) for i in range(10)]) + >>> t2 = Tree('S', [Tree('t0', []), Tree('t1', ['c1'])]) + >>> t3 = Tree('S', [('w0', 't0'), Tree('t1', ['c1'])]) + >>> ChunkString(t1) + '> + >>> ChunkString(t2) + '> + >>> ChunkString(t3) + '> + +Other values generate an error: + + >>> ChunkString(Tree('S', ['x'])) + Traceback (most recent call last): + . . . + ValueError: chunk structures must contain tagged tokens or trees + +The `str()` for a chunk string adds spaces to it, which makes it line +up with `str()` output for other chunk strings over the same +underlying input. + + >>> cs = ChunkString(t1) + >>> print(cs) + + >>> cs.xform('', '{}') + >>> print(cs) + {} + +The `_verify()` method makes sure that our transforms don't corrupt +the chunk string. By setting debug_level=2, `_verify()` will be +called at the end of every call to `xform`. + + >>> cs = ChunkString(t1, debug_level=3) + + >>> # tag not marked with <...>: + >>> cs.xform('', 't3') + Traceback (most recent call last): + . . . + ValueError: Transformation generated invalid chunkstring: + t3 + + >>> # brackets not balanced: + >>> cs.xform('', '{') + Traceback (most recent call last): + . . . + ValueError: Transformation generated invalid chunkstring: + { + + >>> # nested brackets: + >>> cs.xform('', '{{}}') + Traceback (most recent call last): + . . . + ValueError: Transformation generated invalid chunkstring: + {{}} + + >>> # modified tags: + >>> cs.xform('', '') + Traceback (most recent call last): + . . . + ValueError: Transformation generated invalid chunkstring: tag changed + + >>> # added tags: + >>> cs.xform('', '') + Traceback (most recent call last): + . . . + ValueError: Transformation generated invalid chunkstring: tag changed + +Chunking Rules +-------------- + +Test the different rule constructors & __repr__ methods: + + >>> r1 = RegexpChunkRule(''+ChunkString.IN_CHINK_PATTERN, + ... '{}', 'chunk and ') + >>> r2 = RegexpChunkRule(re.compile(''+ChunkString.IN_CHINK_PATTERN), + ... '{}', 'chunk and ') + >>> r3 = ChunkRule('', 'chunk and ') + >>> r4 = ChinkRule('', 'chink and ') + >>> r5 = UnChunkRule('', 'unchunk and ') + >>> r6 = MergeRule('', '', 'merge w/ ') + >>> r7 = SplitRule('', '', 'split from ') + >>> r8 = ExpandLeftRule('', '', 'expand left ') + >>> r9 = ExpandRightRule('', '', 'expand right ') + >>> for rule in r1, r2, r3, r4, r5, r6, r7, r8, r9: + ... print(rule) + (?=[^\\}]*(\\{|$))'->'{}'> + (?=[^\\}]*(\\{|$))'->'{}'> + '> + '> + '> + ', ''> + ', ''> + ', ''> + ', ''> + +`tag_pattern2re_pattern()` complains if the tag pattern looks problematic: + + >>> tag_pattern2re_pattern('{}') + Traceback (most recent call last): + . . . + ValueError: Bad tag pattern: '{}' + +RegexpChunkParser +----------------- + +A warning is printed when parsing an empty sentence: + + >>> parser = RegexpChunkParser([ChunkRule('', '')]) + >>> parser.parse(Tree('S', [])) + Warning: parsing empty text + Tree('S', []) + +RegexpParser +------------ + + >>> parser = RegexpParser(''' + ... NP: {
    ? * *} # NP + ... P: {} # Preposition + ... V: {} # Verb + ... PP: {

    } # PP -> P NP + ... VP: { *} # VP -> V (NP|PP)* + ... ''') + >>> print(repr(parser)) + + >>> print(parser) + chunk.RegexpParser with 5 stages: + RegexpChunkParser with 1 rules: + NP ? * *'> + RegexpChunkParser with 1 rules: + Preposition '> + RegexpChunkParser with 1 rules: + Verb '> + RegexpChunkParser with 1 rules: + PP -> P NP '> + RegexpChunkParser with 1 rules: + VP -> V (NP|PP)* *'> + >>> print(parser.parse(unchunked_text, trace=True)) + # Input: +

    <.> + # NP: + {
    } {
    }{
    } <.> + # Input: + <.> + # Preposition: + {} <.> + # Input: +

    <.> + # Verb: + {}

    {} <.> + # Input: +

    <.> + # PP -> P NP: + {

    } <.> + # Input: + <.> + # VP -> V (NP|PP)*: + { }{} <.> + (S + (NP The/DT cat/NN) + (VP + (V sat/VBD) + (PP (P on/IN) (NP the/DT mat/NN)) + (NP the/DT dog/NN)) + (VP (V chewed/VBD)) + ./.) + +Test parsing of other rule types: + + >>> print(RegexpParser(''' + ... X: + ... }{ # chink rule + ... }{ # split rule + ... {} # merge rule + ... {} # chunk rule w/ context + ... ''')) + chunk.RegexpParser with 1 stages: + RegexpChunkParser with 4 rules: + chink rule '> + split rule ', ''> + merge rule ', ''> + chunk rule w/ context ', '', ''> + +Illegal patterns give an error message: + + >>> print(RegexpParser('X: {} {}')) + Traceback (most recent call last): + . . . + ValueError: Illegal chunk pattern: {} {} + diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/classify.doctest b/venv.bak/lib/python3.7/site-packages/nltk/test/classify.doctest new file mode 100644 index 0000000..d208084 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/test/classify.doctest @@ -0,0 +1,183 @@ +.. Copyright (C) 2001-2019 NLTK Project +.. For license information, see LICENSE.TXT + +============= + Classifiers +============= + +Classifiers label tokens with category labels (or *class labels*). +Typically, labels are represented with strings (such as ``"health"`` +or ``"sports"``. In NLTK, classifiers are defined using classes that +implement the `ClassifyI` interface: + + >>> import nltk + >>> nltk.usage(nltk.classify.ClassifierI) + ClassifierI supports the following operations: + - self.classify(featureset) + - self.classify_many(featuresets) + - self.labels() + - self.prob_classify(featureset) + - self.prob_classify_many(featuresets) + +NLTK defines several classifier classes: + +- `ConditionalExponentialClassifier` +- `DecisionTreeClassifier` +- `MaxentClassifier` +- `NaiveBayesClassifier` +- `WekaClassifier` + +Classifiers are typically created by training them on a training +corpus. + + +Regression Tests +~~~~~~~~~~~~~~~~ + +We define a very simple training corpus with 3 binary features: ['a', +'b', 'c'], and are two labels: ['x', 'y']. We use a simple feature set so +that the correct answers can be calculated analytically (although we +haven't done this yet for all tests). + + >>> train = [ + ... (dict(a=1,b=1,c=1), 'y'), + ... (dict(a=1,b=1,c=1), 'x'), + ... (dict(a=1,b=1,c=0), 'y'), + ... (dict(a=0,b=1,c=1), 'x'), + ... (dict(a=0,b=1,c=1), 'y'), + ... (dict(a=0,b=0,c=1), 'y'), + ... (dict(a=0,b=1,c=0), 'x'), + ... (dict(a=0,b=0,c=0), 'x'), + ... (dict(a=0,b=1,c=1), 'y'), + ... ] + >>> test = [ + ... (dict(a=1,b=0,c=1)), # unseen + ... (dict(a=1,b=0,c=0)), # unseen + ... (dict(a=0,b=1,c=1)), # seen 3 times, labels=y,y,x + ... (dict(a=0,b=1,c=0)), # seen 1 time, label=x + ... ] + +Test the Naive Bayes classifier: + + >>> classifier = nltk.classify.NaiveBayesClassifier.train(train) + >>> sorted(classifier.labels()) + ['x', 'y'] + >>> classifier.classify_many(test) + ['y', 'x', 'y', 'x'] + >>> for pdist in classifier.prob_classify_many(test): + ... print('%.4f %.4f' % (pdist.prob('x'), pdist.prob('y'))) + 0.3203 0.6797 + 0.5857 0.4143 + 0.3792 0.6208 + 0.6470 0.3530 + >>> classifier.show_most_informative_features() + Most Informative Features + c = 0 x : y = 2.0 : 1.0 + c = 1 y : x = 1.5 : 1.0 + a = 1 y : x = 1.4 : 1.0 + b = 0 x : y = 1.2 : 1.0 + a = 0 x : y = 1.2 : 1.0 + b = 1 y : x = 1.1 : 1.0 + +Test the Decision Tree classifier: + + >>> classifier = nltk.classify.DecisionTreeClassifier.train( + ... train, entropy_cutoff=0, + ... support_cutoff=0) + >>> sorted(classifier.labels()) + ['x', 'y'] + >>> print(classifier) + c=0? .................................................. x + a=0? ................................................ x + a=1? ................................................ y + c=1? .................................................. y + + >>> classifier.classify_many(test) + ['y', 'y', 'y', 'x'] + >>> for pdist in classifier.prob_classify_many(test): + ... print('%.4f %.4f' % (pdist.prob('x'), pdist.prob('y'))) + Traceback (most recent call last): + . . . + NotImplementedError + +Test SklearnClassifier, which requires the scikit-learn package. + + >>> from nltk.classify import SklearnClassifier + >>> from sklearn.naive_bayes import BernoulliNB + >>> from sklearn.svm import SVC + >>> train_data = [({"a": 4, "b": 1, "c": 0}, "ham"), + ... ({"a": 5, "b": 2, "c": 1}, "ham"), + ... ({"a": 0, "b": 3, "c": 4}, "spam"), + ... ({"a": 5, "b": 1, "c": 1}, "ham"), + ... ({"a": 1, "b": 4, "c": 3}, "spam")] + >>> classif = SklearnClassifier(BernoulliNB()).train(train_data) + >>> test_data = [{"a": 3, "b": 2, "c": 1}, + ... {"a": 0, "b": 3, "c": 7}] + >>> classif.classify_many(test_data) + ['ham', 'spam'] + >>> classif = SklearnClassifier(SVC(), sparse=False).train(train_data) + >>> classif.classify_many(test_data) + ['ham', 'spam'] + +Test the Maximum Entropy classifier training algorithms; they should all +generate the same results. + + >>> def print_maxent_test_header(): + ... print(' '*11+''.join([' test[%s] ' % i + ... for i in range(len(test))])) + ... print(' '*11+' p(x) p(y)'*len(test)) + ... print('-'*(11+15*len(test))) + + >>> def test_maxent(algorithm): + ... print('%11s' % algorithm, end=' ') + ... try: + ... classifier = nltk.classify.MaxentClassifier.train( + ... train, algorithm, trace=0, max_iter=1000) + ... except Exception as e: + ... print('Error: %r' % e) + ... return + ... + ... for featureset in test: + ... pdist = classifier.prob_classify(featureset) + ... print('%8.2f%6.2f' % (pdist.prob('x'), pdist.prob('y')), end=' ') + ... print() + + >>> print_maxent_test_header(); test_maxent('GIS'); test_maxent('IIS') + test[0] test[1] test[2] test[3] + p(x) p(y) p(x) p(y) p(x) p(y) p(x) p(y) + ----------------------------------------------------------------------- + GIS 0.16 0.84 0.46 0.54 0.41 0.59 0.76 0.24 + IIS 0.16 0.84 0.46 0.54 0.41 0.59 0.76 0.24 + + >>> test_maxent('MEGAM'); test_maxent('TADM') # doctest: +SKIP + MEGAM 0.16 0.84 0.46 0.54 0.41 0.59 0.76 0.24 + TADM 0.16 0.84 0.46 0.54 0.41 0.59 0.76 0.24 + + + +Regression tests for TypedMaxentFeatureEncoding +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + >>> from nltk.classify import maxent + >>> train = [ + ... ({'a': 1, 'b': 1, 'c': 1}, 'y'), + ... ({'a': 5, 'b': 5, 'c': 5}, 'x'), + ... ({'a': 0.9, 'b': 0.9, 'c': 0.9}, 'y'), + ... ({'a': 5.5, 'b': 5.4, 'c': 5.3}, 'x'), + ... ({'a': 0.8, 'b': 1.2, 'c': 1}, 'y'), + ... ({'a': 5.1, 'b': 4.9, 'c': 5.2}, 'x') + ... ] + + >>> test = [ + ... {'a': 1, 'b': 0.8, 'c': 1.2}, + ... {'a': 5.2, 'b': 5.1, 'c': 5} + ... ] + + >>> encoding = maxent.TypedMaxentFeatureEncoding.train( + ... train, count_cutoff=3, alwayson_features=True) + + >>> classifier = maxent.MaxentClassifier.train( + ... train, bernoulli=False, encoding=encoding, trace=0) + + >>> classifier.classify_many(test) + ['y', 'x'] diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/classify_fixt.py b/venv.bak/lib/python3.7/site-packages/nltk/test/classify_fixt.py new file mode 100644 index 0000000..dce0704 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/test/classify_fixt.py @@ -0,0 +1,12 @@ +# -*- coding: utf-8 -*- +from __future__ import absolute_import + + +# most of classify.doctest requires numpy +def setup_module(module): + from nose import SkipTest + + try: + import numpy + except ImportError: + raise SkipTest("classify.doctest requires numpy") diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/collections.doctest b/venv.bak/lib/python3.7/site-packages/nltk/test/collections.doctest new file mode 100644 index 0000000..6a67511 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/test/collections.doctest @@ -0,0 +1,20 @@ +.. Copyright (C) 2001-2019 NLTK Project +.. For license information, see LICENSE.TXT + +=========== +Collections +=========== + + >>> import nltk + >>> from nltk.collections import * + +Trie +---- + +Trie can be pickled: + + >>> import pickle + >>> trie = nltk.collections.Trie(['a']) + >>> s = pickle.dumps(trie) + >>> pickle.loads(s) + {'a': {True: None}} \ No newline at end of file diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/collocations.doctest b/venv.bak/lib/python3.7/site-packages/nltk/test/collocations.doctest new file mode 100644 index 0000000..033d813 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/test/collocations.doctest @@ -0,0 +1,283 @@ +.. Copyright (C) 2001-2019 NLTK Project +.. For license information, see LICENSE.TXT + +============== + Collocations +============== + +Overview +~~~~~~~~ + +Collocations are expressions of multiple words which commonly co-occur. For +example, the top ten bigram collocations in Genesis are listed below, as +measured using Pointwise Mutual Information. + + >>> import nltk + >>> from nltk.collocations import * + >>> bigram_measures = nltk.collocations.BigramAssocMeasures() + >>> trigram_measures = nltk.collocations.TrigramAssocMeasures() + >>> fourgram_measures = nltk.collocations.QuadgramAssocMeasures() + >>> finder = BigramCollocationFinder.from_words( + ... nltk.corpus.genesis.words('english-web.txt')) + >>> finder.nbest(bigram_measures.pmi, 10) # doctest: +NORMALIZE_WHITESPACE + [(u'Allon', u'Bacuth'), (u'Ashteroth', u'Karnaim'), (u'Ben', u'Ammi'), + (u'En', u'Mishpat'), (u'Jegar', u'Sahadutha'), (u'Salt', u'Sea'), + (u'Whoever', u'sheds'), (u'appoint', u'overseers'), (u'aromatic', u'resin'), + (u'cutting', u'instrument')] + +While these words are highly collocated, the expressions are also very +infrequent. Therefore it is useful to apply filters, such as ignoring all +bigrams which occur less than three times in the corpus: + + >>> finder.apply_freq_filter(3) + >>> finder.nbest(bigram_measures.pmi, 10) # doctest: +NORMALIZE_WHITESPACE + [(u'Beer', u'Lahai'), (u'Lahai', u'Roi'), (u'gray', u'hairs'), + (u'Most', u'High'), (u'ewe', u'lambs'), (u'many', u'colors'), + (u'burnt', u'offering'), (u'Paddan', u'Aram'), (u'east', u'wind'), + (u'living', u'creature')] + +We may similarly find collocations among tagged words: + + >>> finder = BigramCollocationFinder.from_words( + ... nltk.corpus.brown.tagged_words('ca01', tagset='universal')) + >>> finder.nbest(bigram_measures.pmi, 5) # doctest: +NORMALIZE_WHITESPACE + [(('1,119', 'NUM'), ('votes', 'NOUN')), + (('1962', 'NUM'), ("governor's", 'NOUN')), + (('637', 'NUM'), ('E.', 'NOUN')), + (('Alpharetta', 'NOUN'), ('prison', 'NOUN')), + (('Bar', 'NOUN'), ('Association', 'NOUN'))] + +Or tags alone: + + >>> finder = BigramCollocationFinder.from_words(t for w, t in + ... nltk.corpus.brown.tagged_words('ca01', tagset='universal')) + >>> finder.nbest(bigram_measures.pmi, 10) # doctest: +NORMALIZE_WHITESPACE + [('PRT', 'VERB'), ('PRON', 'VERB'), ('ADP', 'DET'), ('.', 'PRON'), ('DET', 'ADJ'), + ('CONJ', 'PRON'), ('ADP', 'NUM'), ('NUM', '.'), ('ADV', 'ADV'), ('VERB', 'ADV')] + +Or spanning intervening words: + + >>> finder = BigramCollocationFinder.from_words( + ... nltk.corpus.genesis.words('english-web.txt'), + ... window_size = 20) + >>> finder.apply_freq_filter(2) + >>> ignored_words = nltk.corpus.stopwords.words('english') + >>> finder.apply_word_filter(lambda w: len(w) < 3 or w.lower() in ignored_words) + >>> finder.nbest(bigram_measures.likelihood_ratio, 10) # doctest: +NORMALIZE_WHITESPACE + [(u'chief', u'chief'), (u'became', u'father'), (u'years', u'became'), + (u'hundred', u'years'), (u'lived', u'became'), (u'king', u'king'), + (u'lived', u'years'), (u'became', u'became'), (u'chief', u'chiefs'), + (u'hundred', u'became')] + +Finders +~~~~~~~ + +The collocations package provides collocation finders which by default +consider all ngrams in a text as candidate collocations: + + >>> text = "I do not like green eggs and ham, I do not like them Sam I am!" + >>> tokens = nltk.wordpunct_tokenize(text) + >>> finder = BigramCollocationFinder.from_words(tokens) + >>> scored = finder.score_ngrams(bigram_measures.raw_freq) + >>> sorted(bigram for bigram, score in scored) # doctest: +NORMALIZE_WHITESPACE + [(',', 'I'), ('I', 'am'), ('I', 'do'), ('Sam', 'I'), ('am', '!'), + ('and', 'ham'), ('do', 'not'), ('eggs', 'and'), ('green', 'eggs'), + ('ham', ','), ('like', 'green'), ('like', 'them'), ('not', 'like'), + ('them', 'Sam')] + +We could otherwise construct the collocation finder from manually-derived +FreqDists: + + >>> word_fd = nltk.FreqDist(tokens) + >>> bigram_fd = nltk.FreqDist(nltk.bigrams(tokens)) + >>> finder = BigramCollocationFinder(word_fd, bigram_fd) + >>> scored == finder.score_ngrams(bigram_measures.raw_freq) + True + +A similar interface is provided for trigrams: + + >>> finder = TrigramCollocationFinder.from_words(tokens) + >>> scored = finder.score_ngrams(trigram_measures.raw_freq) + >>> set(trigram for trigram, score in scored) == set(nltk.trigrams(tokens)) + True + +We may want to select only the top n results: + + >>> sorted(finder.nbest(trigram_measures.raw_freq, 2)) + [('I', 'do', 'not'), ('do', 'not', 'like')] + +Alternatively, we can select those above a minimum score value: + + >>> sorted(finder.above_score(trigram_measures.raw_freq, + ... 1.0 / len(tuple(nltk.trigrams(tokens))))) + [('I', 'do', 'not'), ('do', 'not', 'like')] + +Now spanning intervening words: + + >>> finder = TrigramCollocationFinder.from_words(tokens) + >>> finder = TrigramCollocationFinder.from_words(tokens, window_size=4) + >>> sorted(finder.nbest(trigram_measures.raw_freq, 4)) + [('I', 'do', 'like'), ('I', 'do', 'not'), ('I', 'not', 'like'), ('do', 'not', 'like')] + +A closer look at the finder's ngram frequencies: + + >>> sorted(finder.ngram_fd.items(), key=lambda t: (-t[1], t[0]))[:10] # doctest: +NORMALIZE_WHITESPACE + [(('I', 'do', 'like'), 2), (('I', 'do', 'not'), 2), (('I', 'not', 'like'), 2), + (('do', 'not', 'like'), 2), ((',', 'I', 'do'), 1), ((',', 'I', 'not'), 1), + ((',', 'do', 'not'), 1), (('I', 'am', '!'), 1), (('Sam', 'I', '!'), 1), + (('Sam', 'I', 'am'), 1)] + +A similar interface is provided for fourgrams: + + >>> finder_4grams = QuadgramCollocationFinder.from_words(tokens) + >>> scored_4grams = finder_4grams.score_ngrams(fourgram_measures.raw_freq) + >>> set(fourgram for fourgram, score in scored_4grams) == set(nltk.ngrams(tokens, n=4)) + True + +Filtering candidates +~~~~~~~~~~~~~~~~~~~~ + +All the ngrams in a text are often too many to be useful when finding +collocations. It is generally useful to remove some words or punctuation, +and to require a minimum frequency for candidate collocations. + +Given our sample text above, if we remove all trigrams containing personal +pronouns from candidature, score_ngrams should return 6 less results, and +'do not like' will be the only candidate which occurs more than once: + + >>> finder = TrigramCollocationFinder.from_words(tokens) + >>> len(finder.score_ngrams(trigram_measures.raw_freq)) + 14 + >>> finder.apply_word_filter(lambda w: w in ('I', 'me')) + >>> len(finder.score_ngrams(trigram_measures.raw_freq)) + 8 + >>> sorted(finder.above_score(trigram_measures.raw_freq, + ... 1.0 / len(tuple(nltk.trigrams(tokens))))) + [('do', 'not', 'like')] + +Sometimes a filter is a function on the whole ngram, rather than each word, +such as if we may permit 'and' to appear in the middle of a trigram, but +not on either edge: + + >>> finder.apply_ngram_filter(lambda w1, w2, w3: 'and' in (w1, w3)) + >>> len(finder.score_ngrams(trigram_measures.raw_freq)) + 6 + +Finally, it is often important to remove low frequency candidates, as we +lack sufficient evidence about their significance as collocations: + + >>> finder.apply_freq_filter(2) + >>> len(finder.score_ngrams(trigram_measures.raw_freq)) + 1 + +Association measures +~~~~~~~~~~~~~~~~~~~~ + +A number of measures are available to score collocations or other associations. +The arguments to measure functions are marginals of a contingency table, in the +bigram case (n_ii, (n_ix, n_xi), n_xx):: + + w1 ~w1 + ------ ------ + w2 | n_ii | n_oi | = n_xi + ------ ------ + ~w2 | n_io | n_oo | + ------ ------ + = n_ix TOTAL = n_xx + +We test their calculation using some known values presented in Manning and +Schutze's text and other papers. + +Student's t: examples from Manning and Schutze 5.3.2 + + >>> print('%0.4f' % bigram_measures.student_t(8, (15828, 4675), 14307668)) + 0.9999 + >>> print('%0.4f' % bigram_measures.student_t(20, (42, 20), 14307668)) + 4.4721 + +Chi-square: examples from Manning and Schutze 5.3.3 + + >>> print('%0.2f' % bigram_measures.chi_sq(8, (15828, 4675), 14307668)) + 1.55 + >>> print('%0.0f' % bigram_measures.chi_sq(59, (67, 65), 571007)) + 456400 + +Likelihood ratios: examples from Dunning, CL, 1993 + + >>> print('%0.2f' % bigram_measures.likelihood_ratio(110, (2552, 221), 31777)) + 270.72 + >>> print('%0.2f' % bigram_measures.likelihood_ratio(8, (13, 32), 31777)) + 95.29 + +Pointwise Mutual Information: examples from Manning and Schutze 5.4 + + >>> print('%0.2f' % bigram_measures.pmi(20, (42, 20), 14307668)) + 18.38 + >>> print('%0.2f' % bigram_measures.pmi(20, (15019, 15629), 14307668)) + 0.29 + +TODO: Find authoritative results for trigrams. + +Using contingency table values +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +While frequency counts make marginals readily available for collocation +finding, it is common to find published contingency table values. The +collocations package therefore provides a wrapper, ContingencyMeasures, which +wraps an association measures class, providing association measures which +take contingency values as arguments, (n_ii, n_io, n_oi, n_oo) in the +bigram case. + + >>> from nltk.metrics import ContingencyMeasures + >>> cont_bigram_measures = ContingencyMeasures(bigram_measures) + >>> print('%0.2f' % cont_bigram_measures.likelihood_ratio(8, 5, 24, 31740)) + 95.29 + >>> print('%0.2f' % cont_bigram_measures.chi_sq(8, 15820, 4667, 14287173)) + 1.55 + +Ranking and correlation +~~~~~~~~~~~~~~~~~~~~~~~ + +It is useful to consider the results of finding collocations as a ranking, and +the rankings output using different association measures can be compared using +the Spearman correlation coefficient. + +Ranks can be assigned to a sorted list of results trivially by assigning +strictly increasing ranks to each result: + + >>> from nltk.metrics.spearman import * + >>> results_list = ['item1', 'item2', 'item3', 'item4', 'item5'] + >>> print(list(ranks_from_sequence(results_list))) + [('item1', 0), ('item2', 1), ('item3', 2), ('item4', 3), ('item5', 4)] + +If scores are available for each result, we may allow sufficiently similar +results (differing by no more than rank_gap) to be assigned the same rank: + + >>> results_scored = [('item1', 50.0), ('item2', 40.0), ('item3', 38.0), + ... ('item4', 35.0), ('item5', 14.0)] + >>> print(list(ranks_from_scores(results_scored, rank_gap=5))) + [('item1', 0), ('item2', 1), ('item3', 1), ('item4', 1), ('item5', 4)] + +The Spearman correlation coefficient gives a number from -1.0 to 1.0 comparing +two rankings. A coefficient of 1.0 indicates identical rankings; -1.0 indicates +exact opposite rankings. + + >>> print('%0.1f' % spearman_correlation( + ... ranks_from_sequence(results_list), + ... ranks_from_sequence(results_list))) + 1.0 + >>> print('%0.1f' % spearman_correlation( + ... ranks_from_sequence(reversed(results_list)), + ... ranks_from_sequence(results_list))) + -1.0 + >>> results_list2 = ['item2', 'item3', 'item1', 'item5', 'item4'] + >>> print('%0.1f' % spearman_correlation( + ... ranks_from_sequence(results_list), + ... ranks_from_sequence(results_list2))) + 0.6 + >>> print('%0.1f' % spearman_correlation( + ... ranks_from_sequence(reversed(results_list)), + ... ranks_from_sequence(results_list2))) + -0.6 + + diff --git a/venv/lib/python3.7/site-packages/nltk/test/compat.doctest b/venv.bak/lib/python3.7/site-packages/nltk/test/compat.doctest similarity index 100% rename from venv/lib/python3.7/site-packages/nltk/test/compat.doctest rename to venv.bak/lib/python3.7/site-packages/nltk/test/compat.doctest diff --git a/venv/lib/python3.7/site-packages/nltk/test/compat_fixt.py b/venv.bak/lib/python3.7/site-packages/nltk/test/compat_fixt.py similarity index 100% rename from venv/lib/python3.7/site-packages/nltk/test/compat_fixt.py rename to venv.bak/lib/python3.7/site-packages/nltk/test/compat_fixt.py diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/concordance.doctest b/venv.bak/lib/python3.7/site-packages/nltk/test/concordance.doctest new file mode 100644 index 0000000..8f11fc8 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/test/concordance.doctest @@ -0,0 +1,68 @@ +.. Copyright (C) 2001-2016 NLTK Project +.. For license information, see LICENSE.TXT + +================================== +Concordance Example +================================== + +A concordance view shows us every occurrence of a given +word, together with some context. Here we look up the word monstrous +in Moby Dick by entering text1 followed by a period, then the term +concordance, and then placing "monstrous" in parentheses: + +>>> from nltk.corpus import gutenberg +>>> from nltk.text import Text +>>> corpus = gutenberg.words('melville-moby_dick.txt') +>>> text = Text(corpus) + +>>> text.concordance("monstrous") # doctest:+NORMALIZE_WHITESPACE +Displaying 11 of 11 matches: +ong the former , one was of a most monstrous size . ... This came towards us , +ON OF THE PSALMS . " Touching that monstrous bulk of the whale or ork we have r +ll over with a heathenish array of monstrous clubs and spears . Some were thick +d as you gazed , and wondered what monstrous cannibal and savage could ever hav +that has survived the flood ; most monstrous and most mountainous ! That Himmal +they might scout at Moby Dick as a monstrous fable , or still worse and more de +th of Radney .'" CHAPTER 55 Of the Monstrous Pictures of Whales . I shall ere l +ing Scenes . In connexion with the monstrous pictures of whales , I am strongly +ere to enter upon those still more monstrous stories of them which are to be fo +ght have been rummaged out of this monstrous cabinet there is no telling . But +of Whale - Bones ; for Whales of a monstrous size are oftentimes cast up dead u + +>>> text.concordance("monstrous") # doctest:+ELLIPSIS, +NORMALIZE_WHITESPACE +Displaying 11 of 11 matches: +ong the former , one was of a most monstrous size . ... This came towards us , +ON OF THE PSALMS . " Touching that monstrous bulk of the whale or ork we have r +ll over with a heathenish array of monstrous clubs and spears . Some were thick +... + +================================= +Concordance List +================================= + +Often we need to store the results of concordance for further usage. +To do so, call the concordance function with the stdout argument set +to false: + +>>> from nltk.corpus import gutenberg +>>> from nltk.text import Text +>>> corpus = gutenberg.words('melville-moby_dick.txt') +>>> text = Text(corpus) +>>> con_list = text.concordance_list("monstrous") +>>> con_list[2].line +'ll over with a heathenish array of monstrous clubs and spears . Some were thick' +>>> len(con_list) +11 + +================================= +Patching Issue #2088 +================================= + +Patching https://github.com/nltk/nltk/issues/2088 +The left slice of the left context should be clip to 0 if the `i-context` < 0. + +>>> from nltk import Text, word_tokenize +>>> jane_eyre = 'Chapter 1\nTHERE was no possibility of taking a walk that day. We had been wandering, indeed, in the leafless shrubbery an hour in the morning; but since dinner (Mrs. Reed, when there was no company, dined early) the cold winter wind had brought with it clouds so sombre, and a rain so penetrating, that further outdoor exercise was now out of the question.' +>>> text = Text(word_tokenize(jane_eyre)) +>>> text.concordance_list('taking')[0].left +['Chapter', '1', 'THERE', 'was', 'no', 'possibility', 'of'] diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/corpus.doctest b/venv.bak/lib/python3.7/site-packages/nltk/test/corpus.doctest new file mode 100644 index 0000000..5509fe2 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/test/corpus.doctest @@ -0,0 +1,2200 @@ +.. Copyright (C) 2001-2019 NLTK Project +.. For license information, see LICENSE.TXT + +================ + Corpus Readers +================ + +The `nltk.corpus` package defines a collection of *corpus reader* +classes, which can be used to access the contents of a diverse set of +corpora. The list of available corpora is given at: + +http://www.nltk.org/nltk_data/ + +Each corpus reader class is specialized to handle a specific +corpus format. In addition, the `nltk.corpus` package automatically +creates a set of corpus reader instances that can be used to access +the corpora in the NLTK data package. +Section `Corpus Reader Objects`_ ("Corpus Reader Objects") describes +the corpus reader instances that can be used to read the corpora in +the NLTK data package. Section `Corpus Reader Classes`_ ("Corpus +Reader Classes") describes the corpus reader classes themselves, and +discusses the issues involved in creating new corpus reader objects +and new corpus reader classes. Section `Regression Tests`_ +("Regression Tests") contains regression tests for the corpus readers +and associated functions and classes. + +.. contents:: **Table of Contents** + :depth: 2 + :backlinks: none + +--------------------- +Corpus Reader Objects +--------------------- + +Overview +======== + +NLTK includes a diverse set of corpora which can be +read using the ``nltk.corpus`` package. Each corpus is accessed by +means of a "corpus reader" object from ``nltk.corpus``: + + >>> import nltk.corpus + >>> # The Brown corpus: + >>> print(str(nltk.corpus.brown).replace('\\\\','/')) + + >>> # The Penn Treebank Corpus: + >>> print(str(nltk.corpus.treebank).replace('\\\\','/')) + + >>> # The Name Genders Corpus: + >>> print(str(nltk.corpus.names).replace('\\\\','/')) + + >>> # The Inaugural Address Corpus: + >>> print(str(nltk.corpus.inaugural).replace('\\\\','/')) + + +Most corpora consist of a set of files, each containing a document (or +other pieces of text). A list of identifiers for these files is +accessed via the ``fileids()`` method of the corpus reader: + + >>> nltk.corpus.treebank.fileids() # doctest: +ELLIPSIS + ['wsj_0001.mrg', 'wsj_0002.mrg', 'wsj_0003.mrg', 'wsj_0004.mrg', ...] + >>> nltk.corpus.inaugural.fileids() # doctest: +ELLIPSIS + ['1789-Washington.txt', '1793-Washington.txt', '1797-Adams.txt', ...] + +Each corpus reader provides a variety of methods to read data from the +corpus, depending on the format of the corpus. For example, plaintext +corpora support methods to read the corpus as raw text, a list of +words, a list of sentences, or a list of paragraphs. + + >>> from nltk.corpus import inaugural + >>> inaugural.raw('1789-Washington.txt') # doctest: +ELLIPSIS + 'Fellow-Citizens of the Senate ...' + >>> inaugural.words('1789-Washington.txt') + ['Fellow', '-', 'Citizens', 'of', 'the', ...] + >>> inaugural.sents('1789-Washington.txt') # doctest: +ELLIPSIS + [['Fellow', '-', 'Citizens'...], ['Among', 'the', 'vicissitudes'...]...] + >>> inaugural.paras('1789-Washington.txt') # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE + [[['Fellow', '-', 'Citizens'...]], + [['Among', 'the', 'vicissitudes'...], + ['On', 'the', 'one', 'hand', ',', 'I'...]...]...] + +Each of these reader methods may be given a single document's item +name or a list of document item names. When given a list of document +item names, the reader methods will concatenate together the contents +of the individual documents. + + >>> l1 = len(inaugural.words('1789-Washington.txt')) + >>> l2 = len(inaugural.words('1793-Washington.txt')) + >>> l3 = len(inaugural.words(['1789-Washington.txt', '1793-Washington.txt'])) + >>> print('%s+%s == %s' % (l1, l2, l3)) + 1538+147 == 1685 + +If the reader methods are called without any arguments, they will +typically load all documents in the corpus. + + >>> len(inaugural.words()) + 145735 + +If a corpus contains a README file, it can be accessed with a ``readme()`` method: + + >>> inaugural.readme()[:32] + 'C-Span Inaugural Address Corpus\n' + +Plaintext Corpora +================= + +Here are the first few words from each of NLTK's plaintext corpora: + + >>> nltk.corpus.abc.words() + ['PM', 'denies', 'knowledge', 'of', 'AWB', ...] + >>> nltk.corpus.genesis.words() + [u'In', u'the', u'beginning', u'God', u'created', ...] + >>> nltk.corpus.gutenberg.words(fileids='austen-emma.txt') + ['[', 'Emma', 'by', 'Jane', 'Austen', '1816', ...] + >>> nltk.corpus.inaugural.words() + ['Fellow', '-', 'Citizens', 'of', 'the', ...] + >>> nltk.corpus.state_union.words() + ['PRESIDENT', 'HARRY', 'S', '.', 'TRUMAN', "'", ...] + >>> nltk.corpus.webtext.words() + ['Cookie', 'Manager', ':', '"', 'Don', "'", 't', ...] + +Tagged Corpora +============== + +In addition to the plaintext corpora, NLTK's data package also +contains a wide variety of annotated corpora. For example, the Brown +Corpus is annotated with part-of-speech tags, and defines additional +methods ``tagged_*()`` which words as `(word,tag)` tuples, rather +than just bare word strings. + + >>> from nltk.corpus import brown + >>> print(brown.words()) + ['The', 'Fulton', 'County', 'Grand', 'Jury', ...] + >>> print(brown.tagged_words()) + [('The', 'AT'), ('Fulton', 'NP-TL'), ...] + >>> print(brown.sents()) # doctest: +ELLIPSIS + [['The', 'Fulton', 'County'...], ['The', 'jury', 'further'...], ...] + >>> print(brown.tagged_sents()) # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE + [[('The', 'AT'), ('Fulton', 'NP-TL')...], + [('The', 'AT'), ('jury', 'NN'), ('further', 'RBR')...]...] + >>> print(brown.paras(categories='reviews')) # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE + [[['It', 'is', 'not', 'news', 'that', 'Nathan', 'Milstein'...], + ['Certainly', 'not', 'in', 'Orchestra', 'Hall', 'where'...]], + [['There', 'was', 'about', 'that', 'song', 'something', ...], + ['Not', 'the', 'noblest', 'performance', 'we', 'have', ...], ...], ...] + >>> print(brown.tagged_paras(categories='reviews')) # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE + [[[('It', 'PPS'), ('is', 'BEZ'), ('not', '*'), ...], + [('Certainly', 'RB'), ('not', '*'), ('in', 'IN'), ...]], + [[('There', 'EX'), ('was', 'BEDZ'), ('about', 'IN'), ...], + [('Not', '*'), ('the', 'AT'), ('noblest', 'JJT'), ...], ...], ...] + +Similarly, the Indian Language POS-Tagged Corpus includes samples of +Indian text annotated with part-of-speech tags: + + >>> from nltk.corpus import indian + >>> print(indian.words()) # doctest: +SKIP + ['\xe0\xa6\xae\xe0\xa6\xb9\xe0\xa6\xbf\...', + '\xe0\xa6\xb8\xe0\xa6\xa8\xe0\xa7\x8d\xe0...', ...] + >>> print(indian.tagged_words()) # doctest: +SKIP + [('\xe0\xa6\xae\xe0\xa6\xb9\xe0\xa6\xbf...', 'NN'), + ('\xe0\xa6\xb8\xe0\xa6\xa8\xe0\xa7\x8d\xe0...', 'NN'), ...] + +Several tagged corpora support access to a simplified, universal tagset, e.g. where all nouns +tags are collapsed to a single category ``NOUN``: + + >>> print(brown.tagged_sents(tagset='universal')) # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE + [[('The', 'DET'), ('Fulton', 'NOUN'), ('County', 'NOUN'), ('Grand', 'ADJ'), ('Jury', 'NOUN'), ...], + [('The', 'DET'), ('jury', 'NOUN'), ('further', 'ADV'), ('said', 'VERB'), ('in', 'ADP'), ...]...] + >>> from nltk.corpus import conll2000, switchboard + >>> print(conll2000.tagged_words(tagset='universal')) # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE + [('Confidence', 'NOUN'), ('in', 'ADP'), ...] + +Use ``nltk.app.pos_concordance()`` to access a GUI for searching tagged corpora. + +Chunked Corpora +=============== + +The CoNLL corpora also provide chunk structures, which are encoded as +flat trees. The CoNLL 2000 Corpus includes phrasal chunks; and the +CoNLL 2002 Corpus includes named entity chunks. + + >>> from nltk.corpus import conll2000, conll2002 + >>> print(conll2000.sents()) # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE + [['Confidence', 'in', 'the', 'pound', 'is', 'widely', ...], + ['Chancellor', 'of', 'the', 'Exchequer', ...], ...] + >>> for tree in conll2000.chunked_sents()[:2]: + ... print(tree) # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE + (S + (NP Confidence/NN) + (PP in/IN) + (NP the/DT pound/NN) + (VP is/VBZ widely/RB expected/VBN to/TO take/VB) + (NP another/DT sharp/JJ dive/NN) + if/IN + ...) + (S + Chancellor/NNP + (PP of/IN) + (NP the/DT Exchequer/NNP) + ...) + >>> print(conll2002.sents()) # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE + [[u'Sao', u'Paulo', u'(', u'Brasil', u')', u',', ...], [u'-'], ...] + >>> for tree in conll2002.chunked_sents()[:2]: + ... print(tree) # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE + (S + (LOC Sao/NC Paulo/VMI) + (/Fpa + (LOC Brasil/NC) + )/Fpt + ...) + (S -/Fg) + +.. note:: Since the CONLL corpora do not contain paragraph break + information, these readers do not support the ``para()`` method.) + +.. warning:: if you call the conll corpora reader methods without any + arguments, they will return the contents of the entire corpus, + *including* the 'test' portions of the corpus.) + +SemCor is a subset of the Brown corpus tagged with WordNet senses and +named entities. Both kinds of lexical items include multiword units, +which are encoded as chunks (senses and part-of-speech tags pertain +to the entire chunk). + + >>> from nltk.corpus import semcor + >>> semcor.words() + ['The', 'Fulton', 'County', 'Grand', 'Jury', ...] + >>> semcor.chunks() + [['The'], ['Fulton', 'County', 'Grand', 'Jury'], ...] + >>> semcor.sents() # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE + [['The', 'Fulton', 'County', 'Grand', 'Jury', 'said', ...], + ['The', 'jury', 'further', 'said', ...], ...] + >>> semcor.chunk_sents() # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE + [[['The'], ['Fulton', 'County', 'Grand', 'Jury'], ['said'], ... + ['.']], [['The'], ['jury'], ['further'], ['said'], ... ['.']], ...] + >>> list(map(str, semcor.tagged_chunks(tag='both')[:3])) + ['(DT The)', "(Lemma('group.n.01.group') (NE (NNP Fulton County Grand Jury)))", "(Lemma('state.v.01.say') (VB said))"] + >>> [[str(c) for c in s] for s in semcor.tagged_sents(tag='both')[:2]] + [['(DT The)', "(Lemma('group.n.01.group') (NE (NNP Fulton County Grand Jury)))", ... + '(None .)'], ['(DT The)', ... '(None .)']] + + +The IEER corpus is another chunked corpus. This corpus is unusual in +that each corpus item contains multiple documents. (This reflects the +fact that each corpus file contains multiple documents.) The IEER +corpus defines the `parsed_docs` method, which returns the documents +in a given item as `IEERDocument` objects: + + >>> from nltk.corpus import ieer + >>> ieer.fileids() # doctest: +NORMALIZE_WHITESPACE + ['APW_19980314', 'APW_19980424', 'APW_19980429', + 'NYT_19980315', 'NYT_19980403', 'NYT_19980407'] + >>> docs = ieer.parsed_docs('APW_19980314') + >>> print(docs[0]) + + >>> print(docs[0].docno) + APW19980314.0391 + >>> print(docs[0].doctype) + NEWS STORY + >>> print(docs[0].date_time) + 03/14/1998 10:36:00 + >>> print(docs[0].headline) + (DOCUMENT Kenyans protest tax hikes) + >>> print(docs[0].text) # doctest: +ELLIPSIS + (DOCUMENT + (LOCATION NAIROBI) + , + (LOCATION Kenya) + ( + (ORGANIZATION AP) + ) + _ + (CARDINAL Thousands) + of + laborers, + ... + on + (DATE Saturday) + ...) + +Parsed Corpora +============== + +The Treebank corpora provide a syntactic parse for each sentence. The +NLTK data package includes a 10% sample of the Penn Treebank (in +``treebank``), as well as the Sinica Treebank (in ``sinica_treebank``). + +Reading the Penn Treebank (Wall Street Journal sample): + + >>> from nltk.corpus import treebank + >>> print(treebank.fileids()) # doctest: +ELLIPSIS + ['wsj_0001.mrg', 'wsj_0002.mrg', 'wsj_0003.mrg', 'wsj_0004.mrg', ...] + >>> print(treebank.words('wsj_0003.mrg')) + ['A', 'form', 'of', 'asbestos', 'once', 'used', ...] + >>> print(treebank.tagged_words('wsj_0003.mrg')) + [('A', 'DT'), ('form', 'NN'), ('of', 'IN'), ...] + >>> print(treebank.parsed_sents('wsj_0003.mrg')[0]) # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE + (S + (S-TPC-1 + (NP-SBJ + (NP (NP (DT A) (NN form)) (PP (IN of) (NP (NN asbestos)))) + (RRC ...)...)...) + ... + (VP (VBD reported) (SBAR (-NONE- 0) (S (-NONE- *T*-1)))) + (. .)) + +If you have access to a full installation of the Penn Treebank, NLTK +can be configured to load it as well. Download the ``ptb`` package, +and in the directory ``nltk_data/corpora/ptb`` place the ``BROWN`` +and ``WSJ`` directories of the Treebank installation (symlinks work +as well). Then use the ``ptb`` module instead of ``treebank``: + + >>> from nltk.corpus import ptb + >>> print(ptb.fileids()) # doctest: +SKIP + ['BROWN/CF/CF01.MRG', 'BROWN/CF/CF02.MRG', 'BROWN/CF/CF03.MRG', 'BROWN/CF/CF04.MRG', ...] + >>> print(ptb.words('WSJ/00/WSJ_0003.MRG')) # doctest: +SKIP + ['A', 'form', 'of', 'asbestos', 'once', 'used', '*', ...] + >>> print(ptb.tagged_words('WSJ/00/WSJ_0003.MRG')) # doctest: +SKIP + [('A', 'DT'), ('form', 'NN'), ('of', 'IN'), ...] + +...and so forth, like ``treebank`` but with extended fileids. Categories +specified in ``allcats.txt`` can be used to filter by genre; they consist +of ``news`` (for WSJ articles) and names of the Brown subcategories +(``fiction``, ``humor``, ``romance``, etc.): + + >>> ptb.categories() # doctest: +SKIP + ['adventure', 'belles_lettres', 'fiction', 'humor', 'lore', 'mystery', 'news', 'romance', 'science_fiction'] + >>> print(ptb.fileids('news')) # doctest: +SKIP + ['WSJ/00/WSJ_0001.MRG', 'WSJ/00/WSJ_0002.MRG', 'WSJ/00/WSJ_0003.MRG', ...] + >>> print(ptb.words(categories=['humor','fiction'])) # doctest: +SKIP + ['Thirty-three', 'Scotty', 'did', 'not', 'go', 'back', ...] + +As PropBank and NomBank depend on the (WSJ portion of the) Penn Treebank, +the modules ``propbank_ptb`` and ``nombank_ptb`` are provided for access +to a full PTB installation. + +Reading the Sinica Treebank: + + >>> from nltk.corpus import sinica_treebank + >>> print(sinica_treebank.sents()) # doctest: +SKIP + [['\xe4\xb8\x80'], ['\xe5\x8f\x8b\xe6\x83\x85'], ...] + >>> sinica_treebank.parsed_sents()[25] # doctest: +SKIP + Tree('S', + [Tree('NP', + [Tree('Nba', ['\xe5\x98\x89\xe7\x8f\x8d'])]), + Tree('V\xe2\x80\xa7\xe5\x9c\xb0', + [Tree('VA11', ['\xe4\xb8\x8d\xe5\x81\x9c']), + Tree('DE', ['\xe7\x9a\x84'])]), + Tree('VA4', ['\xe5\x93\xad\xe6\xb3\xa3'])]) + +Reading the CoNLL 2007 Dependency Treebanks: + + >>> from nltk.corpus import conll2007 + >>> conll2007.sents('esp.train')[0] # doctest: +SKIP + ['El', 'aumento', 'del', 'índice', 'de', 'desempleo', ...] + >>> conll2007.parsed_sents('esp.train')[0] # doctest: +SKIP + + >>> print(conll2007.parsed_sents('esp.train')[0].tree()) # doctest: +SKIP + (fortaleció + (aumento El (del (índice (de (desempleo estadounidense))))) + hoy + considerablemente + (al + (euro + (cotizaba + , + que + (a (15.35 las GMT)) + se + (en (mercado el (de divisas) (de Fráncfort))) + (a 0,9452_dólares) + (frente_a , (0,9349_dólares los (de (mañana esta))))))) + .) + +Word Lists and Lexicons +======================= + +The NLTK data package also includes a number of lexicons and word +lists. These are accessed just like text corpora. The following +examples illustrate the use of the wordlist corpora: + + >>> from nltk.corpus import names, stopwords, words + >>> words.fileids() + ['en', 'en-basic'] + >>> words.words('en') # doctest: +ELLIPSIS + ['A', 'a', 'aa', 'aal', 'aalii', 'aam', 'Aani', 'aardvark', 'aardwolf', ...] + + >>> stopwords.fileids() # doctest: +ELLIPSIS + ['arabic', 'azerbaijani', 'danish', 'dutch', 'english', 'finnish', 'french', ...] + >>> stopwords.words('portuguese') # doctest: +ELLIPSIS + ['de', 'a', 'o', 'que', 'e', 'do', 'da', 'em', 'um', 'para', ...] + >>> names.fileids() + ['female.txt', 'male.txt'] + >>> names.words('male.txt') # doctest: +ELLIPSIS + ['Aamir', 'Aaron', 'Abbey', 'Abbie', 'Abbot', 'Abbott', ...] + >>> names.words('female.txt') # doctest: +ELLIPSIS + ['Abagael', 'Abagail', 'Abbe', 'Abbey', 'Abbi', 'Abbie', ...] + +The CMU Pronunciation Dictionary corpus contains pronounciation +transcriptions for over 100,000 words. It can be accessed as a list +of entries (where each entry consists of a word, an identifier, and a +transcription) or as a dictionary from words to lists of +transcriptions. Transcriptions are encoded as tuples of phoneme +strings. + + >>> from nltk.corpus import cmudict + >>> print(cmudict.entries()[653:659]) # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE + [('acetate', ['AE1', 'S', 'AH0', 'T', 'EY2', 'T']), + ('acetic', ['AH0', 'S', 'EH1', 'T', 'IH0', 'K']), + ('acetic', ['AH0', 'S', 'IY1', 'T', 'IH0', 'K']), + ('aceto', ['AA0', 'S', 'EH1', 'T', 'OW0']), + ('acetochlor', ['AA0', 'S', 'EH1', 'T', 'OW0', 'K', 'L', 'AO2', 'R']), + ('acetone', ['AE1', 'S', 'AH0', 'T', 'OW2', 'N'])] + >>> # Load the entire cmudict corpus into a Python dictionary: + >>> transcr = cmudict.dict() + >>> print([transcr[w][0] for w in 'Natural Language Tool Kit'.lower().split()]) # doctest: +NORMALIZE_WHITESPACE + [['N', 'AE1', 'CH', 'ER0', 'AH0', 'L'], + ['L', 'AE1', 'NG', 'G', 'W', 'AH0', 'JH'], + ['T', 'UW1', 'L'], + ['K', 'IH1', 'T']] + + +WordNet +======= + +Please see the separate WordNet howto. + +FrameNet +======== + +Please see the separate FrameNet howto. + +PropBank +======== + +Please see the separate PropBank howto. + +SentiWordNet +============ + +Please see the separate SentiWordNet howto. + +Categorized Corpora +=================== + +Several corpora included with NLTK contain documents that have been categorized for +topic, genre, polarity, etc. In addition to the standard corpus interface, these +corpora provide access to the list of categories and the mapping between the documents +and their categories (in both directions). Access the categories using the ``categories()`` +method, e.g.: + + >>> from nltk.corpus import brown, movie_reviews, reuters + >>> brown.categories() # doctest: +NORMALIZE_WHITESPACE + ['adventure', 'belles_lettres', 'editorial', 'fiction', 'government', 'hobbies', 'humor', + 'learned', 'lore', 'mystery', 'news', 'religion', 'reviews', 'romance', 'science_fiction'] + >>> movie_reviews.categories() + ['neg', 'pos'] + >>> reuters.categories() # doctest: +NORMALIZE_WHITESPACE +ELLIPSIS + ['acq', 'alum', 'barley', 'bop', 'carcass', 'castor-oil', 'cocoa', + 'coconut', 'coconut-oil', 'coffee', 'copper', 'copra-cake', 'corn', + 'cotton', 'cotton-oil', 'cpi', 'cpu', 'crude', 'dfl', 'dlr', ...] + +This method has an optional argument that specifies a document or a list +of documents, allowing us to map from (one or more) documents to (one or more) categories: + + >>> brown.categories('ca01') + ['news'] + >>> brown.categories(['ca01','cb01']) + ['editorial', 'news'] + >>> reuters.categories('training/9865') + ['barley', 'corn', 'grain', 'wheat'] + >>> reuters.categories(['training/9865', 'training/9880']) + ['barley', 'corn', 'grain', 'money-fx', 'wheat'] + +We can go back the other way using the optional argument of the ``fileids()`` method: + + >>> reuters.fileids('barley') # doctest: +ELLIPSIS + ['test/15618', 'test/15649', 'test/15676', 'test/15728', 'test/15871', ...] + +Both the ``categories()`` and ``fileids()`` methods return a sorted list containing +no duplicates. + +In addition to mapping between categories and documents, these corpora permit +direct access to their contents via the categories. Instead of accessing a subset +of a corpus by specifying one or more fileids, we can identify one or more categories, e.g.: + + >>> brown.tagged_words(categories='news') + [('The', 'AT'), ('Fulton', 'NP-TL'), ...] + >>> brown.sents(categories=['editorial','reviews']) # doctest: +NORMALIZE_WHITESPACE + [['Assembly', 'session', 'brought', 'much', 'good'], ['The', 'General', + 'Assembly', ',', 'which', 'adjourns', 'today', ',', 'has', 'performed', + 'in', 'an', 'atmosphere', 'of', 'crisis', 'and', 'struggle', 'from', + 'the', 'day', 'it', 'convened', '.'], ...] + +Note that it is an error to specify both documents and categories. + +In the context of a text categorization system, we can easily test if the +category assigned to a document is correct as follows: + + >>> def classify(doc): return 'news' # Trivial classifier + >>> doc = 'ca01' + >>> classify(doc) in brown.categories(doc) + True + + +Other Corpora +============= + +comparative_sentences +--------------------- +A list of sentences from various sources, especially reviews and articles. Each +line contains one sentence; sentences were separated by using a sentence tokenizer. +Comparative sentences have been annotated with their type, entities, features and +keywords. + + >>> from nltk.corpus import comparative_sentences + >>> comparison = comparative_sentences.comparisons()[0] + >>> comparison.text + ['its', 'fast-forward', 'and', 'rewind', 'work', 'much', 'more', 'smoothly', + 'and', 'consistently', 'than', 'those', 'of', 'other', 'models', 'i', "'ve", + 'had', '.'] + >>> comparison.entity_2 + 'models' + >>> (comparison.feature, comparison.keyword) + ('rewind', 'more') + >>> len(comparative_sentences.comparisons()) + 853 + +opinion_lexicon +--------------- +A list of positive and negative opinion words or sentiment words for English. + + >>> from nltk.corpus import opinion_lexicon + >>> opinion_lexicon.words()[:4] + ['2-faced', '2-faces', 'abnormal', 'abolish'] + +The OpinionLexiconCorpusReader also provides shortcuts to retrieve positive/negative +words: + + >>> opinion_lexicon.negative()[:4] + ['2-faced', '2-faces', 'abnormal', 'abolish'] + +Note that words from `words()` method in opinion_lexicon are sorted by file id, +not alphabetically: + + >>> opinion_lexicon.words()[0:10] + ['2-faced', '2-faces', 'abnormal', 'abolish', 'abominable', 'abominably', + 'abominate', 'abomination', 'abort', 'aborted'] + >>> sorted(opinion_lexicon.words())[0:10] + ['2-faced', '2-faces', 'a+', 'abnormal', 'abolish', 'abominable', 'abominably', + 'abominate', 'abomination', 'abort'] + +ppattach +-------- +The Prepositional Phrase Attachment corpus is a corpus of +prepositional phrase attachment decisions. Each instance in the +corpus is encoded as a ``PPAttachment`` object: + + >>> from nltk.corpus import ppattach + >>> ppattach.attachments('training') # doctest: +NORMALIZE_WHITESPACE + [PPAttachment(sent='0', verb='join', noun1='board', + prep='as', noun2='director', attachment='V'), + PPAttachment(sent='1', verb='is', noun1='chairman', + prep='of', noun2='N.V.', attachment='N'), + ...] + >>> inst = ppattach.attachments('training')[0] + >>> (inst.sent, inst.verb, inst.noun1, inst.prep, inst.noun2) + ('0', 'join', 'board', 'as', 'director') + >>> inst.attachment + 'V' + +product_reviews_1 and product_reviews_2 +--------------------------------------- +These two datasets respectively contain annotated customer reviews of 5 and 9 +products from amazon.com. + + >>> from nltk.corpus import product_reviews_1 + >>> camera_reviews = product_reviews_1.reviews('Canon_G3.txt') + >>> review = camera_reviews[0] + >>> review.sents()[0] + ['i', 'recently', 'purchased', 'the', 'canon', 'powershot', 'g3', 'and', 'am', + 'extremely', 'satisfied', 'with', 'the', 'purchase', '.'] + >>> review.features() + [('canon powershot g3', '+3'), ('use', '+2'), ('picture', '+2'), + ('picture quality', '+1'), ('picture quality', '+1'), ('camera', '+2'), + ('use', '+2'), ('feature', '+1'), ('picture quality', '+3'), ('use', '+1'), + ('option', '+1')] + +It is also possible to reach the same information directly from the stream: + + >>> product_reviews_1.features('Canon_G3.txt') + [('canon powershot g3', '+3'), ('use', '+2'), ...] + +We can compute stats for specific product features: + + >>> n_reviews = len([(feat,score) for (feat,score) in product_reviews_1.features('Canon_G3.txt') if feat=='picture']) + >>> tot = sum([int(score) for (feat,score) in product_reviews_1.features('Canon_G3.txt') if feat=='picture']) + >>> # We use float for backward compatibility with division in Python2.7 + >>> mean = tot/float(n_reviews) + >>> print(n_reviews, tot, mean) + 15 24 1.6 + +pros_cons +--------- +A list of pros/cons sentences for determining context (aspect) dependent +sentiment words, which are then applied to sentiment analysis of comparative +sentences. + + >>> from nltk.corpus import pros_cons + >>> pros_cons.sents(categories='Cons') + [['East', 'batteries', '!', 'On', '-', 'off', 'switch', 'too', 'easy', + 'to', 'maneuver', '.'], ['Eats', '...', 'no', ',', 'GULPS', 'batteries'], + ...] + >>> pros_cons.words('IntegratedPros.txt') + ['Easy', 'to', 'use', ',', 'economical', '!', ...] + +semcor +------ +The Brown Corpus, annotated with WordNet senses. + + >>> from nltk.corpus import semcor + >>> semcor.words('brown2/tagfiles/br-n12.xml') # doctest: +ELLIPSIS + ['When', 'several', 'minutes', 'had', 'passed', ...] + >>> sent = semcor.xml('brown2/tagfiles/br-n12.xml').findall('context/p/s')[0] + >>> for wordform in sent.getchildren(): + ... print(wordform.text, end=' ') + ... for key in sorted(wordform.keys()): + ... print(key + '=' + wordform.get(key), end=' ') + ... print() + ... + When cmd=ignore pos=WRB + several cmd=done lemma=several lexsn=5:00:00:some(a):00 pos=JJ wnsn=1 + minutes cmd=done lemma=minute lexsn=1:28:00:: pos=NN wnsn=1 + had cmd=done ot=notag pos=VBD + passed cmd=done lemma=pass lexsn=2:38:03:: pos=VB wnsn=4 + and cmd=ignore pos=CC + Curt cmd=done lemma=person lexsn=1:03:00:: pn=person pos=NNP rdf=person wnsn=1 + had cmd=done ot=notag pos=VBD + n't cmd=done lemma=n't lexsn=4:02:00:: pos=RB wnsn=0 + emerged cmd=done lemma=emerge lexsn=2:30:00:: pos=VB wnsn=1 + from cmd=ignore pos=IN + the cmd=ignore pos=DT + livery_stable cmd=done lemma=livery_stable lexsn=1:06:00:: pos=NN wnsn=1 + , + Brenner cmd=done lemma=person lexsn=1:03:00:: pn=person pos=NNP rdf=person wnsn=1 + re-entered cmd=done lemma=re-enter lexsn=2:38:00:: pos=VB wnsn=1 + the cmd=ignore pos=DT + hotel cmd=done lemma=hotel lexsn=1:06:00:: pos=NN wnsn=1 + and cmd=ignore pos=CC + faced cmd=done lemma=face lexsn=2:42:02:: pos=VB wnsn=4 + Summers cmd=done lemma=person lexsn=1:03:00:: pn=person pos=NNP rdf=person wnsn=1 + across cmd=ignore pos=IN + the cmd=ignore pos=DT + counter cmd=done lemma=counter lexsn=1:06:00:: pos=NN wnsn=1 + . + +senseval +-------- +The Senseval 2 corpus is a word sense disambiguation corpus. Each +item in the corpus corresponds to a single ambiguous word. For each +of these words, the corpus contains a list of instances, corresponding +to occurrences of that word. Each instance provides the word; a list +of word senses that apply to the word occurrence; and the word's +context. + + >>> from nltk.corpus import senseval + >>> senseval.fileids() + ['hard.pos', 'interest.pos', 'line.pos', 'serve.pos'] + >>> senseval.instances('hard.pos') + ... # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE + [SensevalInstance(word='hard-a', + position=20, + context=[('``', '``'), ('he', 'PRP'), ...('hard', 'JJ'), ...], + senses=('HARD1',)), + SensevalInstance(word='hard-a', + position=10, + context=[('clever', 'NNP'), ...('hard', 'JJ'), ('time', 'NN'), ...], + senses=('HARD1',)), ...] + +The following code looks at instances of the word 'interest', and +displays their local context (2 words on each side) and word sense(s): + + >>> for inst in senseval.instances('interest.pos')[:10]: + ... p = inst.position + ... left = ' '.join(w for (w,t) in inst.context[p-2:p]) + ... word = ' '.join(w for (w,t) in inst.context[p:p+1]) + ... right = ' '.join(w for (w,t) in inst.context[p+1:p+3]) + ... senses = ' '.join(inst.senses) + ... print('%20s |%10s | %-15s -> %s' % (left, word, right, senses)) + declines in | interest | rates . -> interest_6 + indicate declining | interest | rates because -> interest_6 + in short-term | interest | rates . -> interest_6 + 4 % | interest | in this -> interest_5 + company with | interests | in the -> interest_5 + , plus | interest | . -> interest_6 + set the | interest | rate on -> interest_6 + 's own | interest | , prompted -> interest_4 + principal and | interest | is the -> interest_6 + increase its | interest | to 70 -> interest_5 + +sentence_polarity +----------------- +The Sentence Polarity dataset contains 5331 positive and 5331 negative processed +sentences. + + >>> from nltk.corpus import sentence_polarity + >>> sentence_polarity.sents() + [['simplistic', ',', 'silly', 'and', 'tedious', '.'], ["it's", 'so', 'laddish', + 'and', 'juvenile', ',', 'only', 'teenage', 'boys', 'could', 'possibly', 'find', + 'it', 'funny', '.'], ...] + >>> sentence_polarity.categories() + ['neg', 'pos'] + >>> sentence_polarity.sents()[1] + ["it's", 'so', 'laddish', 'and', 'juvenile', ',', 'only', 'teenage', 'boys', + 'could', 'possibly', 'find', 'it', 'funny', '.'] + +shakespeare +----------- +The Shakespeare corpus contains a set of Shakespeare plays, formatted +as XML files. These corpora are returned as ElementTree objects: + + >>> from nltk.corpus import shakespeare + >>> from xml.etree import ElementTree + >>> shakespeare.fileids() # doctest: +ELLIPSIS + ['a_and_c.xml', 'dream.xml', 'hamlet.xml', 'j_caesar.xml', ...] + >>> play = shakespeare.xml('dream.xml') + >>> print(play) # doctest: +ELLIPSIS + + >>> print('%s: %s' % (play[0].tag, play[0].text)) + TITLE: A Midsummer Night's Dream + >>> personae = [persona.text for persona in + ... play.findall('PERSONAE/PERSONA')] + >>> print(personae) # doctest: +ELLIPSIS + ['THESEUS, Duke of Athens.', 'EGEUS, father to Hermia.', ...] + >>> # Find and print speakers not listed as personae + >>> names = [persona.split(',')[0] for persona in personae] + >>> speakers = set(speaker.text for speaker in + ... play.findall('*/*/*/SPEAKER')) + >>> print(sorted(speakers.difference(names))) # doctest: +NORMALIZE_WHITESPACE + ['ALL', 'COBWEB', 'DEMETRIUS', 'Fairy', 'HERNIA', 'LYSANDER', + 'Lion', 'MOTH', 'MUSTARDSEED', 'Moonshine', 'PEASEBLOSSOM', + 'Prologue', 'Pyramus', 'Thisbe', 'Wall'] + +subjectivity +----------- +The Subjectivity Dataset contains 5000 subjective and 5000 objective processed +sentences. + + >>> from nltk.corpus import subjectivity + >>> subjectivity.categories() + ['obj', 'subj'] + >>> subjectivity.sents()[23] + ['television', 'made', 'him', 'famous', ',', 'but', 'his', 'biggest', 'hits', + 'happened', 'off', 'screen', '.'] + >>> subjectivity.words(categories='subj') + ['smart', 'and', 'alert', ',', 'thirteen', ...] + +toolbox +------- +The Toolbox corpus distributed with NLTK contains a sample lexicon and +several sample texts from the Rotokas language. The Toolbox corpus +reader returns Toolbox files as XML ElementTree objects. The +following example loads the Rotokas dictionary, and figures out the +distribution of part-of-speech tags for reduplicated words. + +.. doctest: +SKIP + + >>> from nltk.corpus import toolbox + >>> from nltk.probability import FreqDist + >>> from xml.etree import ElementTree + >>> import re + >>> rotokas = toolbox.xml('rotokas.dic') + >>> redup_pos_freqdist = FreqDist() + >>> # Note: we skip over the first record, which is actually + >>> # the header. + >>> for record in rotokas[1:]: + ... lexeme = record.find('lx').text + ... if re.match(r'(.*)\1$', lexeme): + ... redup_pos_freqdist[record.find('ps').text] += 1 + >>> for item, count in redup_pos_freqdist.most_common(): + ... print(item, count) + V 41 + N 14 + ??? 4 + +This example displays some records from a Rotokas text: + +.. doctest: +SKIP + + >>> river = toolbox.xml('rotokas/river.txt', key='ref') + >>> for record in river.findall('record')[:3]: + ... for piece in record: + ... if len(piece.text) > 60: + ... print('%-6s %s...' % (piece.tag, piece.text[:57])) + ... else: + ... print('%-6s %s' % (piece.tag, piece.text)) + ref Paragraph 1 + t ``Viapau oisio ra ovaupasi ... + m viapau oisio ra ovau -pa -si ... + g NEG this way/like this and forget -PROG -2/3.DL... + p NEG ??? CONJ V.I -SUFF.V.3 -SUFF.V... + f ``No ken lus tingting wanema samting papa i bin tok,'' Na... + fe ``Don't forget what Dad said,'' yelled Naomi. + ref 2 + t Osa Ira ora Reviti viapau uvupasiva. + m osa Ira ora Reviti viapau uvu -pa -si ... + g as/like name and name NEG hear/smell -PROG -2/3... + p CONJ N.PN CONJ N.PN NEG V.T -SUFF.V.3 -SUF... + f Tasol Ila na David no bin harim toktok. + fe But Ila and David took no notice. + ref 3 + t Ikaupaoro rokosiva ... + m ikau -pa -oro roko -si -va ... + g run/hurry -PROG -SIM go down -2/3.DL.M -RP ... + p V.T -SUFF.V.3 -SUFF.V.4 ADV -SUFF.V.4 -SUFF.VT.... + f Tupela i bin hariap i go long wara . + fe They raced to the river. + +timit +----- +The NLTK data package includes a fragment of the TIMIT +Acoustic-Phonetic Continuous Speech Corpus. This corpus is broken +down into small speech samples, each of which is available as a wave +file, a phonetic transcription, and a tokenized word list. + + >>> from nltk.corpus import timit + >>> print(timit.utteranceids()) # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE + ['dr1-fvmh0/sa1', 'dr1-fvmh0/sa2', 'dr1-fvmh0/si1466', + 'dr1-fvmh0/si2096', 'dr1-fvmh0/si836', 'dr1-fvmh0/sx116', + 'dr1-fvmh0/sx206', 'dr1-fvmh0/sx26', 'dr1-fvmh0/sx296', ...] + + >>> item = timit.utteranceids()[5] + >>> print(timit.phones(item)) # doctest: +NORMALIZE_WHITESPACE + ['h#', 'k', 'l', 'ae', 's', 'pcl', 'p', 'dh', 'ax', + 's', 'kcl', 'k', 'r', 'ux', 'ix', 'nx', 'y', 'ax', + 'l', 'eh', 'f', 'tcl', 't', 'hh', 'ae', 'n', 'dcl', + 'd', 'h#'] + >>> print(timit.words(item)) + ['clasp', 'the', 'screw', 'in', 'your', 'left', 'hand'] + >>> timit.play(item) # doctest: +SKIP + +The corpus reader can combine the word segmentation information with +the phonemes to produce a single tree structure: + + >>> for tree in timit.phone_trees(item): + ... print(tree) + (S + h# + (clasp k l ae s pcl p) + (the dh ax) + (screw s kcl k r ux) + (in ix nx) + (your y ax) + (left l eh f tcl t) + (hand hh ae n dcl d) + h#) + +The start time and stop time of each phoneme, word, and sentence are +also available: + + >>> print(timit.phone_times(item)) # doctest: +ELLIPSIS + [('h#', 0, 2190), ('k', 2190, 3430), ('l', 3430, 4326), ...] + >>> print(timit.word_times(item)) # doctest: +ELLIPSIS + [('clasp', 2190, 8804), ('the', 8804, 9734), ...] + >>> print(timit.sent_times(item)) + [('Clasp the screw in your left hand.', 0, 32154)] + +We can use these times to play selected pieces of a speech sample: + + >>> timit.play(item, 2190, 8804) # 'clasp' # doctest: +SKIP + +The corpus reader can also be queried for information about the +speaker and sentence identifier for a given speech sample: + + >>> print(timit.spkrid(item)) + dr1-fvmh0 + >>> print(timit.sentid(item)) + sx116 + >>> print(timit.spkrinfo(timit.spkrid(item))) # doctest: +NORMALIZE_WHITESPACE + SpeakerInfo(id='VMH0', + sex='F', + dr='1', + use='TRN', + recdate='03/11/86', + birthdate='01/08/60', + ht='5\'05"', + race='WHT', + edu='BS', + comments='BEST NEW ENGLAND ACCENT SO FAR') + + >>> # List the speech samples from the same speaker: + >>> timit.utteranceids(spkrid=timit.spkrid(item)) # doctest: +ELLIPSIS + ['dr1-fvmh0/sa1', 'dr1-fvmh0/sa2', 'dr1-fvmh0/si1466', ...] + +twitter_samples +--------------- + +Twitter is well-known microblog service that allows public data to be +collected via APIs. NLTK's twitter corpus currently contains a sample of 20k Tweets +retrieved from the Twitter Streaming API. + + >>> from nltk.corpus import twitter_samples + >>> twitter_samples.fileids() + ['negative_tweets.json', 'positive_tweets.json', 'tweets.20150430-223406.json'] + +We follow standard practice in storing full Tweets as line-separated +JSON. These data structures can be accessed via `tweets.docs()`. However, in general it +is more practical to focus just on the text field of the Tweets, which +are accessed via the `strings()` method. + + >>> twitter_samples.strings('tweets.20150430-223406.json') + ['RT @KirkKus: Indirect cost of the UK being in the EU is estimated to be costing Britain \xa3170 billion per year! #BetterOffOut #UKIP', ...] + +The default tokenizer for Tweets is specialised for 'casual' text, and +the `tokenized()` method returns a list of lists of tokens. + + >>> twitter_samples.tokenized('tweets.20150430-223406.json') + [['RT', '@KirkKus', ':', 'Indirect', 'cost', 'of', 'the', 'UK', 'being', 'in', ...], + ['VIDEO', ':', 'Sturgeon', 'on', 'post-election', 'deals', 'http://t.co/BTJwrpbmOY'], ...] + +rte +--- +The RTE (Recognizing Textual Entailment) corpus was derived from the +RTE1, RTE2 and RTE3 datasets (dev and test data), and consists of a +list of XML-formatted 'text'/'hypothesis' pairs. + + >>> from nltk.corpus import rte + >>> print(rte.fileids()) # doctest: +ELLIPSIS + ['rte1_dev.xml', 'rte1_test.xml', 'rte2_dev.xml', ..., 'rte3_test.xml'] + >>> rtepairs = rte.pairs(['rte2_test.xml', 'rte3_test.xml']) + >>> print(rtepairs) # doctest: +ELLIPSIS + [, , , ...] + +In the gold standard test sets, each pair is labeled according to +whether or not the text 'entails' the hypothesis; the +entailment value is mapped to an integer 1 (True) or 0 (False). + + >>> rtepairs[5] + + >>> rtepairs[5].text # doctest: +NORMALIZE_WHITESPACE + 'His wife Strida won a seat in parliament after forging an alliance + with the main anti-Syrian coalition in the recent election.' + >>> rtepairs[5].hyp + 'Strida elected to parliament.' + >>> rtepairs[5].value + 1 + +The RTE corpus also supports an ``xml()`` method which produces ElementTrees. + + >>> xmltree = rte.xml('rte3_dev.xml') + >>> xmltree # doctest: +SKIP + + >>> xmltree[7].findtext('t') # doctest: +NORMALIZE_WHITESPACE + "Mrs. Bush's approval ratings have remained very high, above 80%, + even as her husband's have recently dropped below 50%." + +verbnet +------- +The VerbNet corpus is a lexicon that divides verbs into classes, based +on their syntax-semantics linking behavior. The basic elements in the +lexicon are verb lemmas, such as 'abandon' and 'accept', and verb +classes, which have identifiers such as 'remove-10.1' and +'admire-31.2-1'. These class identifiers consist of a representative +verb selected from the class, followed by a numerical identifier. The +list of verb lemmas, and the list of class identifiers, can be +retrieved with the following methods: + + >>> from nltk.corpus import verbnet + >>> verbnet.lemmas()[20:25] + ['accelerate', 'accept', 'acclaim', 'accompany', 'accrue'] + >>> verbnet.classids()[:5] + ['accompany-51.7', 'admire-31.2', 'admire-31.2-1', 'admit-65', 'adopt-93'] + +The `classids()` method may also be used to retrieve the classes that +a given lemma belongs to: + + >>> verbnet.classids('accept') + ['approve-77', 'characterize-29.2-1-1', 'obtain-13.5.2'] + +The `classids()` method may additionally be used to retrieve all classes +within verbnet if nothing is passed: + + >>> verbnet.classids() + ['accompany-51.7', 'admire-31.2', 'admire-31.2-1', 'admit-65', 'adopt-93', 'advise-37.9', 'advise-37.9-1', 'allow-64', 'amalgamate-22.2', 'amalgamate-22.2-1', 'amalgamate-22.2-1-1', 'amalgamate-22.2-2', 'amalgamate-22.2-2-1', 'amalgamate-22.2-3', 'amalgamate-22.2-3-1', 'amalgamate-22.2-3-1-1', 'amalgamate-22.2-3-2', 'amuse-31.1', 'animal_sounds-38', 'appeal-31.4', 'appeal-31.4-1', 'appeal-31.4-2', 'appeal-31.4-3', 'appear-48.1.1', 'appoint-29.1', 'approve-77', 'assessment-34', 'assuming_position-50', 'avoid-52', 'banish-10.2', 'battle-36.4', 'battle-36.4-1', 'begin-55.1', 'begin-55.1-1', 'being_dressed-41.3.3', 'bend-45.2', 'berry-13.7', 'bill-54.5', 'body_internal_motion-49', 'body_internal_states-40.6', 'braid-41.2.2', 'break-45.1', 'breathe-40.1.2', 'breathe-40.1.2-1', 'bring-11.3', 'bring-11.3-1', 'build-26.1', 'build-26.1-1', 'bulge-47.5.3', 'bump-18.4', 'bump-18.4-1', 'butter-9.9', 'calibratable_cos-45.6', 'calibratable_cos-45.6-1', 'calve-28', 'captain-29.8', 'captain-29.8-1', 'captain-29.8-1-1', 'care-88', 'care-88-1', 'carry-11.4', 'carry-11.4-1', 'carry-11.4-1-1', 'carve-21.2', 'carve-21.2-1', 'carve-21.2-2', 'change_bodily_state-40.8.4', 'characterize-29.2', 'characterize-29.2-1', 'characterize-29.2-1-1', 'characterize-29.2-1-2', 'chase-51.6', 'cheat-10.6', 'cheat-10.6-1', 'cheat-10.6-1-1', 'chew-39.2', 'chew-39.2-1', 'chew-39.2-2', 'chit_chat-37.6', 'clear-10.3', 'clear-10.3-1', 'cling-22.5', 'coil-9.6', 'coil-9.6-1', 'coloring-24', 'complain-37.8', 'complete-55.2', 'concealment-16', 'concealment-16-1', 'confess-37.10', 'confine-92', 'confine-92-1', 'conjecture-29.5', 'conjecture-29.5-1', 'conjecture-29.5-2', 'consider-29.9', 'consider-29.9-1', 'consider-29.9-1-1', 'consider-29.9-1-1-1', 'consider-29.9-2', 'conspire-71', 'consume-66', 'consume-66-1', 'contiguous_location-47.8', 'contiguous_location-47.8-1', 'contiguous_location-47.8-2', 'continue-55.3', 'contribute-13.2', 'contribute-13.2-1', 'contribute-13.2-1-1', 'contribute-13.2-1-1-1', 'contribute-13.2-2', 'contribute-13.2-2-1', 'convert-26.6.2', 'convert-26.6.2-1', 'cooking-45.3', 'cooperate-73', 'cooperate-73-1', 'cooperate-73-2', 'cooperate-73-3', 'cope-83', 'cope-83-1', 'cope-83-1-1', 'correlate-86', 'correspond-36.1', 'correspond-36.1-1', 'correspond-36.1-1-1', 'cost-54.2', 'crane-40.3.2', 'create-26.4', 'create-26.4-1', 'curtsey-40.3.3', 'cut-21.1', 'cut-21.1-1', 'debone-10.8', 'declare-29.4', 'declare-29.4-1', 'declare-29.4-1-1', 'declare-29.4-1-1-1', 'declare-29.4-1-1-2', 'declare-29.4-1-1-3', 'declare-29.4-2', 'dedicate-79', 'defend-85', 'destroy-44', 'devour-39.4', 'devour-39.4-1', 'devour-39.4-2', 'differ-23.4', 'dine-39.5', 'disappearance-48.2', 'disassemble-23.3', 'discover-84', 'discover-84-1', 'discover-84-1-1', 'dress-41.1.1', 'dressing_well-41.3.2', 'drive-11.5', 'drive-11.5-1', 'dub-29.3', 'dub-29.3-1', 'eat-39.1', 'eat-39.1-1', 'eat-39.1-2', 'enforce-63', 'engender-27', 'entity_specific_cos-45.5', 'entity_specific_modes_being-47.2', 'equip-13.4.2', 'equip-13.4.2-1', 'equip-13.4.2-1-1', 'escape-51.1', 'escape-51.1-1', 'escape-51.1-2', 'escape-51.1-2-1', 'exceed-90', 'exchange-13.6', 'exchange-13.6-1', 'exchange-13.6-1-1', 'exhale-40.1.3', 'exhale-40.1.3-1', 'exhale-40.1.3-2', 'exist-47.1', 'exist-47.1-1', 'exist-47.1-1-1', 'feeding-39.7', 'ferret-35.6', 'fill-9.8', 'fill-9.8-1', 'fit-54.3', 'flinch-40.5', 'floss-41.2.1', 'focus-87', 'forbid-67', 'force-59', 'force-59-1', 'free-80', 'free-80-1', 'fulfilling-13.4.1', 'fulfilling-13.4.1-1', 'fulfilling-13.4.1-2', 'funnel-9.3', 'funnel-9.3-1', 'funnel-9.3-2', 'funnel-9.3-2-1', 'future_having-13.3', 'get-13.5.1', 'get-13.5.1-1', 'give-13.1', 'give-13.1-1', 'gobble-39.3', 'gobble-39.3-1', 'gobble-39.3-2', 'gorge-39.6', 'groom-41.1.2', 'grow-26.2', 'help-72', 'help-72-1', 'herd-47.5.2', 'hiccup-40.1.1', 'hit-18.1', 'hit-18.1-1', 'hold-15.1', 'hold-15.1-1', 'hunt-35.1', 'hurt-40.8.3', 'hurt-40.8.3-1', 'hurt-40.8.3-1-1', 'hurt-40.8.3-2', 'illustrate-25.3', 'image_impression-25.1', 'indicate-78', 'indicate-78-1', 'indicate-78-1-1', 'inquire-37.1.2', 'instr_communication-37.4', 'investigate-35.4', 'judgement-33', 'keep-15.2', 'knead-26.5', 'learn-14', 'learn-14-1', 'learn-14-2', 'learn-14-2-1', 'leave-51.2', 'leave-51.2-1', 'lecture-37.11', 'lecture-37.11-1', 'lecture-37.11-1-1', 'lecture-37.11-2', 'light_emission-43.1', 'limit-76', 'linger-53.1', 'linger-53.1-1', 'lodge-46', 'long-32.2', 'long-32.2-1', 'long-32.2-2', 'manner_speaking-37.3', 'marry-36.2', 'marvel-31.3', 'marvel-31.3-1', 'marvel-31.3-2', 'marvel-31.3-3', 'marvel-31.3-4', 'marvel-31.3-5', 'marvel-31.3-6', 'marvel-31.3-7', 'marvel-31.3-8', 'marvel-31.3-9', 'masquerade-29.6', 'masquerade-29.6-1', 'masquerade-29.6-2', 'matter-91', 'meander-47.7', 'meet-36.3', 'meet-36.3-1', 'meet-36.3-2', 'mine-10.9', 'mix-22.1', 'mix-22.1-1', 'mix-22.1-1-1', 'mix-22.1-2', 'mix-22.1-2-1', 'modes_of_being_with_motion-47.3', 'murder-42.1', 'murder-42.1-1', 'neglect-75', 'neglect-75-1', 'neglect-75-1-1', 'neglect-75-2', 'nonvehicle-51.4.2', 'nonverbal_expression-40.2', 'obtain-13.5.2', 'obtain-13.5.2-1', 'occurrence-48.3', 'order-60', 'order-60-1', 'orphan-29.7', 'other_cos-45.4', 'pain-40.8.1', 'pay-68', 'peer-30.3', 'pelt-17.2', 'performance-26.7', 'performance-26.7-1', 'performance-26.7-1-1', 'performance-26.7-2', 'performance-26.7-2-1', 'pit-10.7', 'pocket-9.10', 'pocket-9.10-1', 'poison-42.2', 'poke-19', 'pour-9.5', 'preparing-26.3', 'preparing-26.3-1', 'preparing-26.3-2', 'price-54.4', 'push-12', 'push-12-1', 'push-12-1-1', 'put-9.1', 'put-9.1-1', 'put-9.1-2', 'put_direction-9.4', 'put_spatial-9.2', 'put_spatial-9.2-1', 'reach-51.8', 'reflexive_appearance-48.1.2', 'refrain-69', 'register-54.1', 'rely-70', 'remove-10.1', 'risk-94', 'risk-94-1', 'roll-51.3.1', 'rummage-35.5', 'run-51.3.2', 'rush-53.2', 'say-37.7', 'say-37.7-1', 'say-37.7-1-1', 'say-37.7-2', 'scribble-25.2', 'search-35.2', 'see-30.1', 'see-30.1-1', 'see-30.1-1-1', 'send-11.1', 'send-11.1-1', 'separate-23.1', 'separate-23.1-1', 'separate-23.1-2', 'settle-89', 'shake-22.3', 'shake-22.3-1', 'shake-22.3-1-1', 'shake-22.3-2', 'shake-22.3-2-1', 'sight-30.2', 'simple_dressing-41.3.1', 'slide-11.2', 'slide-11.2-1-1', 'smell_emission-43.3', 'snooze-40.4', 'sound_emission-43.2', 'sound_existence-47.4', 'spank-18.3', 'spatial_configuration-47.6', 'split-23.2', 'spray-9.7', 'spray-9.7-1', 'spray-9.7-1-1', 'spray-9.7-2', 'stalk-35.3', 'steal-10.5', 'stimulus_subject-30.4', 'stop-55.4', 'stop-55.4-1', 'substance_emission-43.4', 'succeed-74', 'succeed-74-1', 'succeed-74-1-1', 'succeed-74-2', 'suffocate-40.7', 'suspect-81', 'swarm-47.5.1', 'swarm-47.5.1-1', 'swarm-47.5.1-2', 'swarm-47.5.1-2-1', 'swat-18.2', 'talk-37.5', 'tape-22.4', 'tape-22.4-1', 'tell-37.2', 'throw-17.1', 'throw-17.1-1', 'throw-17.1-1-1', 'tingle-40.8.2', 'touch-20', 'touch-20-1', 'transcribe-25.4', 'transfer_mesg-37.1.1', 'transfer_mesg-37.1.1-1', 'transfer_mesg-37.1.1-1-1', 'try-61', 'turn-26.6.1', 'turn-26.6.1-1', 'urge-58', 'vehicle-51.4.1', 'vehicle-51.4.1-1', 'waltz-51.5', 'want-32.1', 'want-32.1-1', 'want-32.1-1-1', 'weather-57', 'weekend-56', 'wink-40.3.1', 'wink-40.3.1-1', 'wipe_instr-10.4.2', 'wipe_instr-10.4.2-1', 'wipe_manner-10.4.1', 'wipe_manner-10.4.1-1', 'wish-62', 'withdraw-82', 'withdraw-82-1', 'withdraw-82-2', 'withdraw-82-3'] + +The primary object in the lexicon is a class record, which is stored +as an ElementTree xml object. The class record for a given class +identifier is returned by the `vnclass()` method: + + >>> verbnet.vnclass('remove-10.1') # doctest: +ELLIPSIS + + +The `vnclass()` method also accepts "short" identifiers, such as '10.1': + + >>> verbnet.vnclass('10.1') # doctest: +ELLIPSIS + + +See the Verbnet documentation, or the Verbnet files, for information +about the structure of this xml. As an example, we can retrieve a +list of thematic roles for a given Verbnet class: + + >>> vn_31_2 = verbnet.vnclass('admire-31.2') + >>> for themrole in vn_31_2.findall('THEMROLES/THEMROLE'): + ... print(themrole.attrib['type'], end=' ') + ... for selrestr in themrole.findall('SELRESTRS/SELRESTR'): + ... print('[%(Value)s%(type)s]' % selrestr.attrib, end=' ') + ... print() + Theme + Experiencer [+animate] + Predicate + +The Verbnet corpus also provides a variety of pretty printing +functions that can be used to display the xml contents in a more +concise form. The simplest such method is `pprint()`: + + >>> print(verbnet.pprint('57')) + weather-57 + Subclasses: (none) + Members: blow clear drizzle fog freeze gust hail howl lightning mist + mizzle pelt pour precipitate rain roar shower sleet snow spit spot + sprinkle storm swelter teem thaw thunder + Thematic roles: + * Theme[+concrete +force] + Frames: + Intransitive (Expletive Subject) + Example: It's raining. + Syntax: LEX[it] LEX[[+be]] VERB + Semantics: + * weather(during(E), Weather_type, ?Theme) + NP (Expletive Subject, Theme Object) + Example: It's raining cats and dogs. + Syntax: LEX[it] LEX[[+be]] VERB NP[Theme] + Semantics: + * weather(during(E), Weather_type, Theme) + PP (Expletive Subject, Theme-PP) + Example: It was pelting with rain. + Syntax: LEX[it[+be]] VERB PREP[with] NP[Theme] + Semantics: + * weather(during(E), Weather_type, Theme) + +Verbnet gives us frames that link the syntax and semantics using an example. +These frames are part of the corpus and we can use `frames()` to get a frame +for a given verbnet class. + + >>> frame = verbnet.frames('57') + >>> frame == [{'semantics': [{'arguments': [{'value': 'during(E)', 'type': 'Event'}, {'value': 'Weather_type', 'type': 'VerbSpecific'}, {'value': '?Theme', 'type': 'ThemRole'}], 'predicate_value': 'weather'}], 'example': "It's raining.", 'syntax': [{'pos_tag': 'LEX', 'modifiers': {'value': 'it', 'synrestrs': [], 'selrestrs': []}}, {'pos_tag': 'LEX', 'modifiers': {'value': '[+be]', 'synrestrs': [], 'selrestrs': []}}, {'pos_tag': 'VERB', 'modifiers': {'value': '', 'synrestrs': [], 'selrestrs': []}}], 'description': {'primary': 'Intransitive', 'secondary': 'Expletive Subject'}}, {'semantics': [{'arguments': [{'value': 'during(E)', 'type': 'Event'}, {'value': 'Weather_type', 'type': 'VerbSpecific'}, {'value': 'Theme', 'type': 'ThemRole'}], 'predicate_value': 'weather'}], 'example': "It's raining cats and dogs.", 'syntax': [{'pos_tag': 'LEX', 'modifiers': {'value': 'it', 'synrestrs': [], 'selrestrs': []}}, {'pos_tag': 'LEX', 'modifiers': {'value': '[+be]', 'synrestrs': [], 'selrestrs': []}}, {'pos_tag': 'VERB', 'modifiers': {'value': '', 'synrestrs': [], 'selrestrs': []}}, {'pos_tag': 'NP', 'modifiers': {'value': 'Theme', 'synrestrs': [], 'selrestrs': []}}], 'description': {'primary': 'NP', 'secondary': 'Expletive Subject, Theme Object'}}, {'semantics': [{'arguments': [{'value': 'during(E)', 'type': 'Event'}, {'value': 'Weather_type', 'type': 'VerbSpecific'}, {'value': 'Theme', 'type': 'ThemRole'}], 'predicate_value': 'weather'}], 'example': 'It was pelting with rain.', 'syntax': [{'pos_tag': 'LEX', 'modifiers': {'value': 'it[+be]', 'synrestrs': [], 'selrestrs': []}}, {'pos_tag': 'VERB', 'modifiers': {'value': '', 'synrestrs': [], 'selrestrs': []}}, {'pos_tag': 'PREP', 'modifiers': {'value': 'with', 'synrestrs': [], 'selrestrs': []}}, {'pos_tag': 'NP', 'modifiers': {'value': 'Theme', 'synrestrs': [], 'selrestrs': []}}], 'description': {'primary': 'PP', 'secondary': 'Expletive Subject, Theme-PP'}}] + True + +Verbnet corpus lets us access thematic roles individually using `themroles()`. + + >>> themroles = verbnet.themroles('57') + >>> themroles == [{'modifiers': [{'type': 'concrete', 'value': '+'}, {'type': 'force', 'value': '+'}], 'type': 'Theme'}] + True + +Verbnet classes may also have subclasses sharing similar syntactic and semantic properties +while having differences with the superclass. The Verbnet corpus allows us to access these +subclasses using `subclasses()`. + + >>> print(verbnet.subclasses('9.1')) #Testing for 9.1 since '57' does not have subclasses + ['put-9.1-1', 'put-9.1-2'] + + +nps_chat +-------- + +The NPS Chat Corpus, Release 1.0 consists of over 10,000 posts in age-specific +chat rooms, which have been anonymized, POS-tagged and dialogue-act tagged. + + >>> print(nltk.corpus.nps_chat.words()) + ['now', 'im', 'left', 'with', 'this', 'gay', ...] + >>> print(nltk.corpus.nps_chat.tagged_words()) + [('now', 'RB'), ('im', 'PRP'), ('left', 'VBD'), ...] + >>> print(nltk.corpus.nps_chat.tagged_posts()) # doctest: +NORMALIZE_WHITESPACE + [[('now', 'RB'), ('im', 'PRP'), ('left', 'VBD'), ('with', 'IN'), + ('this', 'DT'), ('gay', 'JJ'), ('name', 'NN')], [(':P', 'UH')], ...] + +We can access the XML elements corresponding to individual posts. These elements +have ``class`` and ``user`` attributes that we can access using ``p.attrib['class']`` +and ``p.attrib['user']``. They also have text content, accessed using ``p.text``. + + >>> print(nltk.corpus.nps_chat.xml_posts()) # doctest: +ELLIPSIS + [, , ...] + >>> posts = nltk.corpus.nps_chat.xml_posts() + >>> sorted(nltk.FreqDist(p.attrib['class'] for p in posts).keys()) + ['Accept', 'Bye', 'Clarify', 'Continuer', 'Emotion', 'Emphasis', + 'Greet', 'Other', 'Reject', 'Statement', 'System', 'nAnswer', + 'whQuestion', 'yAnswer', 'ynQuestion'] + >>> posts[0].text + 'now im left with this gay name' + +In addition to the above methods for accessing tagged text, we can navigate +the XML structure directly, as follows: + + >>> tokens = posts[0].findall('terminals/t') + >>> [t.attrib['pos'] + "/" + t.attrib['word'] for t in tokens] + ['RB/now', 'PRP/im', 'VBD/left', 'IN/with', 'DT/this', 'JJ/gay', 'NN/name'] + +multext_east +------------ + +The Multext-East Corpus consists of POS-tagged versions of George Orwell's book +1984 in 12 languages: English, Czech, Hungarian, Macedonian, Slovenian, Serbian, +Slovak, Romanian, Estonian, Farsi, Bulgarian and Polish. +The corpus can be accessed using the usual methods for tagged corpora. The tagset +can be transformed from the Multext-East specific MSD tags to the Universal tagset +using the "tagset" parameter of all functions returning tagged parts of the corpus. + + >>> print(nltk.corpus.multext_east.words("oana-en.xml")) + ['It', 'was', 'a', 'bright', ...] + >>> print(nltk.corpus.multext_east.tagged_words("oana-en.xml")) + [('It', '#Pp3ns'), ('was', '#Vmis3s'), ('a', '#Di'), ...] + >>> print(nltk.corpus.multext_east.tagged_sents("oana-en.xml", "universal")) + [[('It', 'PRON'), ('was', 'VERB'), ('a', 'DET'), ...] + + + +--------------------- +Corpus Reader Classes +--------------------- + +NLTK's *corpus reader* classes are used to access the contents of a +diverse set of corpora. Each corpus reader class is specialized to +handle a specific corpus format. Examples include the +`PlaintextCorpusReader`, which handles corpora that consist of a set +of unannotated text files, and the `BracketParseCorpusReader`, which +handles corpora that consist of files containing +parenthesis-delineated parse trees. + +Automatically Created Corpus Reader Instances +============================================= + +When the `nltk.corpus` module is imported, it automatically creates a +set of corpus reader instances that can be used to access the corpora +in the NLTK data distribution. Here is a small sample of those +corpus reader instances: + + >>> import nltk + >>> nltk.corpus.brown # doctest: +ELLIPSIS + + >>> nltk.corpus.treebank # doctest: +ELLIPSIS + + >>> nltk.corpus.names # doctest: +ELLIPSIS + + >>> nltk.corpus.genesis # doctest: +ELLIPSIS + + >>> nltk.corpus.inaugural # doctest: +ELLIPSIS + + +This sample illustrates that different corpus reader classes are used +to read different corpora; but that the same corpus reader class may +be used for more than one corpus (e.g., ``genesis`` and ``inaugural``). + +Creating New Corpus Reader Instances +==================================== + +Although the `nltk.corpus` module automatically creates corpus reader +instances for the corpora in the NLTK data distribution, you may +sometimes need to create your own corpus reader. In particular, you +would need to create your own corpus reader if you want... + +- To access a corpus that is not included in the NLTK data + distribution. + +- To access a full copy of a corpus for which the NLTK data + distribution only provides a sample. + +- To access a corpus using a customized corpus reader (e.g., with + a customized tokenizer). + +To create a new corpus reader, you will first need to look up the +signature for that corpus reader's constructor. Different corpus +readers have different constructor signatures, but most of the +constructor signatures have the basic form:: + + SomeCorpusReader(root, files, ...options...) + +Where ``root`` is an absolute path to the directory containing the +corpus data files; ``files`` is either a list of file names (relative +to ``root``) or a regexp specifying which files should be included; +and ``options`` are additional reader-specific options. For example, +we can create a customized corpus reader for the genesis corpus that +uses a different sentence tokenizer as follows: + + >>> # Find the directory where the corpus lives. + >>> genesis_dir = nltk.data.find('corpora/genesis') + >>> # Create our custom sentence tokenizer. + >>> my_sent_tokenizer = nltk.RegexpTokenizer('[^.!?]+') + >>> # Create the new corpus reader object. + >>> my_genesis = nltk.corpus.PlaintextCorpusReader( + ... genesis_dir, '.*\.txt', sent_tokenizer=my_sent_tokenizer) + >>> # Use the new corpus reader object. + >>> print(my_genesis.sents('english-kjv.txt')[0]) # doctest: +NORMALIZE_WHITESPACE + ['In', 'the', 'beginning', 'God', 'created', 'the', 'heaven', + 'and', 'the', 'earth'] + +If you wish to read your own plaintext corpus, which is stored in the +directory '/usr/share/some-corpus', then you can create a corpus +reader for it with:: + + >>> my_corpus = nltk.corpus.PlaintextCorpusReader( + ... '/usr/share/some-corpus', '.*\.txt') # doctest: +SKIP + +For a complete list of corpus reader subclasses, see the API +documentation for `nltk.corpus.reader`. + +Corpus Types +============ + +Corpora vary widely in the types of content they include. This is +reflected in the fact that the base class `CorpusReader` only defines +a few general-purpose methods for listing and accessing the files that +make up a corpus. It is up to the subclasses to define *data access +methods* that provide access to the information in the corpus. +However, corpus reader subclasses should be consistent in their +definitions of these data access methods wherever possible. + +At a high level, corpora can be divided into three basic types: + +- A *token corpus* contains information about specific occurrences of + language use (or linguistic tokens), such as dialogues or written + texts. Examples of token corpora are collections of written text + and collections of speech. + +- A *type corpus*, or *lexicon*, contains information about a coherent + set of lexical items (or linguistic types). Examples of lexicons + are dictionaries and word lists. + +- A *language description corpus* contains information about a set of + non-lexical linguistic constructs, such as grammar rules. + +However, many individual corpora blur the distinctions between these +types. For example, corpora that are primarily lexicons may include +token data in the form of example sentences; and corpora that are +primarily token corpora may be accompanied by one or more word lists +or other lexical data sets. + +Because corpora vary so widely in their information content, we have +decided that it would not be wise to use separate corpus reader base +classes for different corpus types. Instead, we simply try to make +the corpus readers consistent wherever possible, but let them differ +where the underlying data itself differs. + +Common Corpus Reader Methods +============================ + +As mentioned above, there are only a handful of methods that all +corpus readers are guaranteed to implement. These methods provide +access to the files that contain the corpus data. Every corpus is +assumed to consist of one or more files, all located in a common root +directory (or in subdirectories of that root directory). The absolute +path to the root directory is stored in the ``root`` property: + + >>> import os + >>> str(nltk.corpus.genesis.root).replace(os.path.sep,'/') # doctest: +ELLIPSIS + '.../nltk_data/corpora/genesis' + +Each file within the corpus is identified by a platform-independent +identifier, which is basically a path string that uses ``/`` as the +path separator. I.e., this identifier can be converted to a relative +path as follows: + + >>> some_corpus_file_id = nltk.corpus.reuters.fileids()[0] + >>> import os.path + >>> os.path.normpath(some_corpus_file_id).replace(os.path.sep,'/') + 'test/14826' + +To get a list of all data files that make up a corpus, use the +``fileids()`` method. In some corpora, these files will not all contain +the same type of data; for example, for the ``nltk.corpus.timit`` +corpus, ``fileids()`` will return a list including text files, word +segmentation files, phonetic transcription files, sound files, and +metadata files. For corpora with diverse file types, the ``fileids()`` +method will often take one or more optional arguments, which can be +used to get a list of the files with a specific file type: + + >>> nltk.corpus.timit.fileids() # doctest: +ELLIPSIS + ['dr1-fvmh0/sa1.phn', 'dr1-fvmh0/sa1.txt', 'dr1-fvmh0/sa1.wav', ...] + >>> nltk.corpus.timit.fileids('phn') # doctest: +ELLIPSIS + ['dr1-fvmh0/sa1.phn', 'dr1-fvmh0/sa2.phn', 'dr1-fvmh0/si1466.phn', ...] + +In some corpora, the files are divided into distinct categories. For +these corpora, the ``fileids()`` method takes an optional argument, +which can be used to get a list of the files within a specific category: + + >>> nltk.corpus.brown.fileids('hobbies') # doctest: +ELLIPSIS + ['ce01', 'ce02', 'ce03', 'ce04', 'ce05', 'ce06', 'ce07', ...] + +The ``abspath()`` method can be used to find the absolute path to a +corpus file, given its file identifier: + + >>> str(nltk.corpus.brown.abspath('ce06')).replace(os.path.sep,'/') # doctest: +ELLIPSIS + '.../corpora/brown/ce06' + +The ``abspaths()`` method can be used to find the absolute paths for +one corpus file, a list of corpus files, or (if no fileids are specified), +all corpus files. + +This method is mainly useful as a helper method when defining corpus +data access methods, since data access methods can usually be called +with a string argument (to get a view for a specific file), with a +list argument (to get a view for a specific list of files), or with no +argument (to get a view for the whole corpus). + +Data Access Methods +=================== + +Individual corpus reader subclasses typically extend this basic set of +file-access methods with one or more *data access methods*, which provide +easy access to the data contained in the corpus. The signatures for +data access methods often have the basic form:: + + corpus_reader.some_data access(fileids=None, ...options...) + +Where ``fileids`` can be a single file identifier string (to get a view +for a specific file); a list of file identifier strings (to get a view +for a specific list of files); or None (to get a view for the entire +corpus). Some of the common data access methods, and their return +types, are: + + - I{corpus}.words(): list of str + - I{corpus}.sents(): list of (list of str) + - I{corpus}.paras(): list of (list of (list of str)) + - I{corpus}.tagged_words(): list of (str,str) tuple + - I{corpus}.tagged_sents(): list of (list of (str,str)) + - I{corpus}.tagged_paras(): list of (list of (list of (str,str))) + - I{corpus}.chunked_sents(): list of (Tree w/ (str,str) leaves) + - I{corpus}.parsed_sents(): list of (Tree with str leaves) + - I{corpus}.parsed_paras(): list of (list of (Tree with str leaves)) + - I{corpus}.xml(): A single xml ElementTree + - I{corpus}.raw(): str (unprocessed corpus contents) + +For example, the `words()` method is supported by many different +corpora, and returns a flat list of word strings: + + >>> nltk.corpus.brown.words() + ['The', 'Fulton', 'County', 'Grand', 'Jury', ...] + >>> nltk.corpus.treebank.words() + ['Pierre', 'Vinken', ',', '61', 'years', 'old', ...] + >>> nltk.corpus.conll2002.words() + [u'Sao', u'Paulo', u'(', u'Brasil', u')', u',', u'23', ...] + >>> nltk.corpus.genesis.words() + [u'In', u'the', u'beginning', u'God', u'created', ...] + +On the other hand, the `tagged_words()` method is only supported by +corpora that include part-of-speech annotations: + + >>> nltk.corpus.brown.tagged_words() + [('The', 'AT'), ('Fulton', 'NP-TL'), ...] + >>> nltk.corpus.treebank.tagged_words() + [('Pierre', 'NNP'), ('Vinken', 'NNP'), ...] + >>> nltk.corpus.conll2002.tagged_words() + [(u'Sao', u'NC'), (u'Paulo', u'VMI'), (u'(', u'Fpa'), ...] + >>> nltk.corpus.genesis.tagged_words() + Traceback (most recent call last): + ... + AttributeError: 'PlaintextCorpusReader' object has no attribute 'tagged_words' + +Although most corpus readers use file identifiers to index their +content, some corpora use different identifiers instead. For example, +the data access methods for the ``timit`` corpus uses *utterance +identifiers* to select which corpus items should be returned: + + >>> nltk.corpus.timit.utteranceids() # doctest: +ELLIPSIS + ['dr1-fvmh0/sa1', 'dr1-fvmh0/sa2', 'dr1-fvmh0/si1466', ...] + >>> nltk.corpus.timit.words('dr1-fvmh0/sa2') + ["don't", 'ask', 'me', 'to', 'carry', 'an', 'oily', 'rag', 'like', 'that'] + +Attempting to call ``timit``\ 's data access methods with a file +identifier will result in an exception: + + >>> nltk.corpus.timit.fileids() # doctest: +ELLIPSIS + ['dr1-fvmh0/sa1.phn', 'dr1-fvmh0/sa1.txt', 'dr1-fvmh0/sa1.wav', ...] + >>> nltk.corpus.timit.words('dr1-fvmh0/sa1.txt') # doctest: +SKIP + Traceback (most recent call last): + ... + IOError: No such file or directory: '.../dr1-fvmh0/sa1.txt.wrd' + +As another example, the ``propbank`` corpus defines the ``roleset()`` +method, which expects a roleset identifier, not a file identifier: + + >>> roleset = nltk.corpus.propbank.roleset('eat.01') + >>> from xml.etree import ElementTree as ET + >>> print(ET.tostring(roleset).decode('utf8')) # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE + + + ...... + ... + ... + +Stream Backed Corpus Views +========================== +An important feature of NLTK's corpus readers is that many of them +access the underlying data files using "corpus views." A *corpus +view* is an object that acts like a simple data structure (such as a +list), but does not store the data elements in memory; instead, data +elements are read from the underlying data files on an as-needed +basis. + +By only loading items from the file on an as-needed basis, corpus +views maintain both memory efficiency and responsiveness. The memory +efficiency of corpus readers is important because some corpora contain +very large amounts of data, and storing the entire data set in memory +could overwhelm many machines. The responsiveness is important when +experimenting with corpora in interactive sessions and in in-class +demonstrations. + +The most common corpus view is the `StreamBackedCorpusView`, which +acts as a read-only list of tokens. Two additional corpus view +classes, `ConcatenatedCorpusView` and `LazySubsequence`, make it +possible to create concatenations and take slices of +`StreamBackedCorpusView` objects without actually storing the +resulting list-like object's elements in memory. + +In the future, we may add additional corpus views that act like other +basic data structures, such as dictionaries. + +Writing New Corpus Readers +========================== + +In order to add support for new corpus formats, it is necessary to +define new corpus reader classes. For many corpus formats, writing +new corpus readers is relatively straight-forward. In this section, +we'll describe what's involved in creating a new corpus reader. If +you do create a new corpus reader, we encourage you to contribute it +back to the NLTK project. + +Don't Reinvent the Wheel +------------------------ +Before you start writing a new corpus reader, you should check to be +sure that the desired format can't be read using an existing corpus +reader with appropriate constructor arguments. For example, although +the `TaggedCorpusReader` assumes that words and tags are separated by +``/`` characters by default, an alternative tag-separation character +can be specified via the ``sep`` constructor argument. You should +also check whether the new corpus format can be handled by subclassing +an existing corpus reader, and tweaking a few methods or variables. + +Design +------ +If you decide to write a new corpus reader from scratch, then you +should first decide which data access methods you want the reader to +provide, and what their signatures should be. You should look at +existing corpus readers that process corpora with similar data +contents, and try to be consistent with those corpus readers whenever +possible. + +You should also consider what sets of identifiers are appropriate for +the corpus format. Where it's practical, file identifiers should be +used. However, for some corpora, it may make sense to use additional +sets of identifiers. Each set of identifiers should have a distinct +name (e.g., fileids, utteranceids, rolesets); and you should be consistent +in using that name to refer to that identifier. Do not use parameter +names like ``id``, which leave it unclear what type of identifier is +required. + +Once you've decided what data access methods and identifiers are +appropriate for your corpus, you should decide if there are any +customizable parameters that you'd like the corpus reader to handle. +These parameters make it possible to use a single corpus reader to +handle a wider variety of corpora. The ``sep`` argument for +`TaggedCorpusReader`, mentioned above, is an example of a customizable +corpus reader parameter. + +Implementation +-------------- + +Constructor +~~~~~~~~~~~ +If your corpus reader implements any customizable parameters, then +you'll need to override the constructor. Typically, the new +constructor will first call its base class's constructor, and then +store the customizable parameters. For example, the +`ConllChunkCorpusReader`\ 's constructor is defined as follows: + + def __init__(self, root, fileids, chunk_types, encoding='utf8', + tagset=None, separator=None): + ConllCorpusReader.__init__( + self, root, fileids, ('words', 'pos', 'chunk'), + chunk_types=chunk_types, encoding=encoding, + tagset=tagset, separator=separator) + +If your corpus reader does not implement any customization parameters, +then you can often just inherit the base class's constructor. + +Data Access Methods +~~~~~~~~~~~~~~~~~~~ + +The most common type of data access method takes an argument +identifying which files to access, and returns a view covering those +files. This argument may be a single file identifier string (to get a +view for a specific file); a list of file identifier strings (to get a +view for a specific list of files); or None (to get a view for the +entire corpus). The method's implementation converts this argument to +a list of path names using the `abspaths()` method, which handles all +three value types (string, list, and None): + + >>> print(str(nltk.corpus.brown.abspaths()).replace('\\\\','/')) # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE + [FileSystemPathPointer('.../corpora/brown/ca01'), + FileSystemPathPointer('.../corpora/brown/ca02'), ...] + >>> print(str(nltk.corpus.brown.abspaths('ce06')).replace('\\\\','/')) # doctest: +ELLIPSIS + [FileSystemPathPointer('.../corpora/brown/ce06')] + >>> print(str(nltk.corpus.brown.abspaths(['ce06', 'ce07'])).replace('\\\\','/')) # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE + [FileSystemPathPointer('.../corpora/brown/ce06'), + FileSystemPathPointer('.../corpora/brown/ce07')] + +An example of this type of method is the `words()` method, defined by +the `PlaintextCorpusReader` as follows: + + >>> def words(self, fileids=None): + ... return concat([self.CorpusView(fileid, self._read_word_block) + ... for fileid in self.abspaths(fileids)]) + +This method first uses `abspaths()` to convert ``fileids`` to a list of +absolute paths. It then creates a corpus view for each file, using +the `PlaintextCorpusReader._read_word_block()` method to read elements +from the data file (see the discussion of corpus views below). +Finally, it combines these corpus views using the +`nltk.corpus.reader.util.concat()` function. + +When writing a corpus reader for a corpus that is never expected to be +very large, it can sometimes be appropriate to read the files +directly, rather than using a corpus view. For example, the +`WordListCorpusView` class defines its `words()` method as follows: + + >>> def words(self, fileids=None): + ... return concat([[w for w in open(fileid).read().split('\n') if w] + ... for fileid in self.abspaths(fileids)]) + +(This is usually more appropriate for lexicons than for token corpora.) + +If the type of data returned by a data access method is one for which +NLTK has a conventional representation (e.g., words, tagged words, and +parse trees), then you should use that representation. Otherwise, you +may find it necessary to define your own representation. For data +structures that are relatively corpus-specific, it's usually best to +define new classes for these elements. For example, the ``propbank`` +corpus defines the `PropbankInstance` class to store the semantic role +labeling instances described by the corpus; and the ``ppattach`` +corpus defines the `PPAttachment` class to store the prepositional +attachment instances described by the corpus. + +Corpus Views +~~~~~~~~~~~~ +.. (Much of the content for this section is taken from the + StreamBackedCorpusView docstring.) + +The heart of a `StreamBackedCorpusView` is its *block reader* +function, which reads zero or more tokens from a stream, and returns +them as a list. A very simple example of a block reader is: + + >>> def simple_block_reader(stream): + ... return stream.readline().split() + +This simple block reader reads a single line at a time, and returns a +single token (consisting of a string) for each whitespace-separated +substring on the line. A `StreamBackedCorpusView` built from this +block reader will act like a read-only list of all the +whitespace-separated tokens in an underlying file. + +When deciding how to define the block reader for a given corpus, +careful consideration should be given to the size of blocks handled by +the block reader. Smaller block sizes will increase the memory +requirements of the corpus view's internal data structures (by 2 +integers per block). On the other hand, larger block sizes may +decrease performance for random access to the corpus. (But note that +larger block sizes will *not* decrease performance for iteration.) + +Internally, the `StreamBackedCorpusView` class maintains a partial +mapping from token index to file position, with one entry per block. +When a token with a given index *i* is requested, the corpus view +constructs it as follows: + +1. First, it searches the toknum/filepos mapping for the token index + closest to (but less than or equal to) *i*. + +2. Then, starting at the file position corresponding to that index, it + reads one block at a time using the block reader until it reaches + the requested token. + +The toknum/filepos mapping is created lazily: it is initially empty, +but every time a new block is read, the block's initial token is added +to the mapping. (Thus, the toknum/filepos map has one entry per +block.) + +You can create your own corpus view in one of two ways: + +1. Call the `StreamBackedCorpusView` constructor, and provide your + block reader function via the ``block_reader`` argument. + +2. Subclass `StreamBackedCorpusView`, and override the + `read_block()` method. + +The first option is usually easier, but the second option can allow +you to write a single `read_block` method whose behavior can be +customized by different parameters to the subclass's constructor. For +an example of this design pattern, see the `TaggedCorpusView` class, +which is used by `TaggedCorpusView`. + +---------------- +Regression Tests +---------------- + +The following helper functions are used to create and then delete +testing corpora that are stored in temporary directories. These +testing corpora are used to make sure the readers work correctly. + + >>> import tempfile, os.path, textwrap + >>> def make_testcorpus(ext='', **fileids): + ... root = tempfile.mkdtemp() + ... for fileid, contents in fileids.items(): + ... fileid += ext + ... f = open(os.path.join(root, fileid), 'w') + ... f.write(textwrap.dedent(contents)) + ... f.close() + ... return root + >>> def del_testcorpus(root): + ... for fileid in os.listdir(root): + ... os.remove(os.path.join(root, fileid)) + ... os.rmdir(root) + +Plaintext Corpus Reader +======================= +The plaintext corpus reader is used to access corpora that consist of +unprocessed plaintext data. It assumes that paragraph breaks are +indicated by blank lines. Sentences and words can be tokenized using +the default tokenizers, or by custom tokenizers specified as +parameters to the constructor. + + >>> root = make_testcorpus(ext='.txt', + ... a="""\ + ... This is the first sentence. Here is another + ... sentence! And here's a third sentence. + ... + ... This is the second paragraph. Tokenization is currently + ... fairly simple, so the period in Mr. gets tokenized. + ... """, + ... b="""This is the second file.""") + + >>> from nltk.corpus.reader.plaintext import PlaintextCorpusReader + +The list of documents can be specified explicitly, or implicitly (using a +regexp). The ``ext`` argument specifies a file extension. + + >>> corpus = PlaintextCorpusReader(root, ['a.txt', 'b.txt']) + >>> corpus.fileids() + ['a.txt', 'b.txt'] + >>> corpus = PlaintextCorpusReader(root, '.*\.txt') + >>> corpus.fileids() + ['a.txt', 'b.txt'] + +The directory containing the corpus is corpus.root: + + >>> str(corpus.root) == str(root) + True + +We can get a list of words, or the raw string: + + >>> corpus.words() + ['This', 'is', 'the', 'first', 'sentence', '.', ...] + >>> corpus.raw()[:40] + 'This is the first sentence. Here is ano' + +Check that reading individual documents works, and reading all documents at +once works: + + >>> len(corpus.words()), [len(corpus.words(d)) for d in corpus.fileids()] + (46, [40, 6]) + >>> corpus.words('a.txt') + ['This', 'is', 'the', 'first', 'sentence', '.', ...] + >>> corpus.words('b.txt') + ['This', 'is', 'the', 'second', 'file', '.'] + >>> corpus.words()[:4], corpus.words()[-4:] + (['This', 'is', 'the', 'first'], ['the', 'second', 'file', '.']) + +We're done with the test corpus: + + >>> del_testcorpus(root) + +Test the plaintext corpora that come with nltk: + + >>> from nltk.corpus import abc, genesis, inaugural + >>> from nltk.corpus import state_union, webtext + >>> for corpus in (abc, genesis, inaugural, state_union, + ... webtext): + ... print(str(corpus).replace('\\\\','/')) + ... print(' ', repr(corpus.fileids())[:60]) + ... print(' ', repr(corpus.words()[:10])[:60]) + + ['rural.txt', 'science.txt'] + ['PM', 'denies', 'knowledge', 'of', 'AWB', ... + + ['english-kjv.txt', 'english-web.txt', 'finnish.txt', ... + ['In', 'the', 'beginning', 'God', 'created', 'the', ... + + ['1789-Washington.txt', '1793-Washington.txt', ... + ['Fellow', '-', 'Citizens', 'of', 'the', 'Senate', ... + + ['1945-Truman.txt', '1946-Truman.txt', ... + ['PRESIDENT', 'HARRY', 'S', '.', 'TRUMAN', "'", ... + + ['firefox.txt', 'grail.txt', 'overheard.txt', ... + ['Cookie', 'Manager', ':', '"', 'Don', "'", 't', ... + + +Tagged Corpus Reader +==================== +The Tagged Corpus reader can give us words, sentences, and paragraphs, +each tagged or untagged. All of the read methods can take one item +(in which case they return the contents of that file) or a list of +documents (in which case they concatenate the contents of those files). +By default, they apply to all documents in the corpus. + + >>> root = make_testcorpus( + ... a="""\ + ... This/det is/verb the/det first/adj sentence/noun ./punc + ... Here/det is/verb another/adj sentence/noun ./punc + ... Note/verb that/comp you/pron can/verb use/verb \ + ... any/noun tag/noun set/noun + ... + ... This/det is/verb the/det second/adj paragraph/noun ./punc + ... word/n without/adj a/det tag/noun :/: hello ./punc + ... """, + ... b="""\ + ... This/det is/verb the/det second/adj file/noun ./punc + ... """) + + >>> from nltk.corpus.reader.tagged import TaggedCorpusReader + >>> corpus = TaggedCorpusReader(root, list('ab')) + >>> corpus.fileids() + ['a', 'b'] + >>> str(corpus.root) == str(root) + True + >>> corpus.words() + ['This', 'is', 'the', 'first', 'sentence', '.', ...] + >>> corpus.sents() # doctest: +ELLIPSIS + [['This', 'is', 'the', 'first', ...], ['Here', 'is', 'another'...], ...] + >>> corpus.paras() # doctest: +ELLIPSIS + [[['This', ...], ['Here', ...], ...], [['This', ...], ...], ...] + >>> corpus.tagged_words() # doctest: +ELLIPSIS + [('This', 'DET'), ('is', 'VERB'), ('the', 'DET'), ...] + >>> corpus.tagged_sents() # doctest: +ELLIPSIS + [[('This', 'DET'), ('is', 'VERB'), ...], [('Here', 'DET'), ...], ...] + >>> corpus.tagged_paras() # doctest: +ELLIPSIS + [[[('This', 'DET'), ...], ...], [[('This', 'DET'), ...], ...], ...] + >>> corpus.raw()[:40] + 'This/det is/verb the/det first/adj sente' + >>> len(corpus.words()), [len(corpus.words(d)) for d in corpus.fileids()] + (38, [32, 6]) + >>> len(corpus.sents()), [len(corpus.sents(d)) for d in corpus.fileids()] + (6, [5, 1]) + >>> len(corpus.paras()), [len(corpus.paras(d)) for d in corpus.fileids()] + (3, [2, 1]) + >>> print(corpus.words('a')) + ['This', 'is', 'the', 'first', 'sentence', '.', ...] + >>> print(corpus.words('b')) + ['This', 'is', 'the', 'second', 'file', '.'] + >>> del_testcorpus(root) + +The Brown Corpus uses the tagged corpus reader: + + >>> from nltk.corpus import brown + >>> brown.fileids() # doctest: +ELLIPSIS + ['ca01', 'ca02', 'ca03', 'ca04', 'ca05', 'ca06', 'ca07', ...] + >>> brown.categories() # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE + ['adventure', 'belles_lettres', 'editorial', 'fiction', 'government', 'hobbies', 'humor', + 'learned', 'lore', 'mystery', 'news', 'religion', 'reviews', 'romance', 'science_fiction'] + >>> print(repr(brown.root).replace('\\\\','/')) # doctest: +ELLIPSIS + FileSystemPathPointer('.../corpora/brown') + >>> brown.words() + ['The', 'Fulton', 'County', 'Grand', 'Jury', ...] + >>> brown.sents() # doctest: +ELLIPSIS + [['The', 'Fulton', 'County', 'Grand', ...], ...] + >>> brown.paras() # doctest: +ELLIPSIS + [[['The', 'Fulton', 'County', ...]], [['The', 'jury', ...]], ...] + >>> brown.tagged_words() # doctest: +ELLIPSIS + [('The', 'AT'), ('Fulton', 'NP-TL'), ...] + >>> brown.tagged_sents() # doctest: +ELLIPSIS + [[('The', 'AT'), ('Fulton', 'NP-TL'), ('County', 'NN-TL'), ...], ...] + >>> brown.tagged_paras() # doctest: +ELLIPSIS + [[[('The', 'AT'), ...]], [[('The', 'AT'), ...]], ...] + +Verbnet Corpus Reader +===================== + +Make sure we're picking up the right number of elements: + + >>> from nltk.corpus import verbnet + >>> len(verbnet.lemmas()) + 3621 + >>> len(verbnet.wordnetids()) + 4953 + >>> len(verbnet.classids()) + 429 + +Selecting classids based on various selectors: + + >>> verbnet.classids(lemma='take') # doctest: +NORMALIZE_WHITESPACE + ['bring-11.3', 'characterize-29.2', 'convert-26.6.2', 'cost-54.2', + 'fit-54.3', 'performance-26.7-2', 'steal-10.5'] + >>> verbnet.classids(wordnetid='lead%2:38:01') + ['accompany-51.7'] + >>> verbnet.classids(fileid='approve-77.xml') + ['approve-77'] + >>> verbnet.classids(classid='admire-31.2') # subclasses + ['admire-31.2-1'] + +vnclass() accepts filenames, long ids, and short ids: + + >>> a = ElementTree.tostring(verbnet.vnclass('admire-31.2.xml')) + >>> b = ElementTree.tostring(verbnet.vnclass('admire-31.2')) + >>> c = ElementTree.tostring(verbnet.vnclass('31.2')) + >>> a == b == c + True + +fileids() can be used to get files based on verbnet class ids: + + >>> verbnet.fileids('admire-31.2') + ['admire-31.2.xml'] + >>> verbnet.fileids(['admire-31.2', 'obtain-13.5.2']) + ['admire-31.2.xml', 'obtain-13.5.2.xml'] + >>> verbnet.fileids('badidentifier') + Traceback (most recent call last): + . . . + ValueError: vnclass identifier 'badidentifier' not found + +longid() and shortid() can be used to convert identifiers: + + >>> verbnet.longid('31.2') + 'admire-31.2' + >>> verbnet.longid('admire-31.2') + 'admire-31.2' + >>> verbnet.shortid('31.2') + '31.2' + >>> verbnet.shortid('admire-31.2') + '31.2' + >>> verbnet.longid('badidentifier') + Traceback (most recent call last): + . . . + ValueError: vnclass identifier 'badidentifier' not found + >>> verbnet.shortid('badidentifier') + Traceback (most recent call last): + . . . + ValueError: vnclass identifier 'badidentifier' not found + +Corpus View Regression Tests +============================ + +Select some corpus files to play with: + + >>> import nltk.data + >>> # A very short file (160 chars): + >>> f1 = nltk.data.find('corpora/inaugural/README') + >>> # A relatively short file (791 chars): + >>> f2 = nltk.data.find('corpora/inaugural/1793-Washington.txt') + >>> # A longer file (32k chars): + >>> f3 = nltk.data.find('corpora/inaugural/1909-Taft.txt') + >>> fileids = [f1, f2, f3] + + +Concatenation +------------- +Check that concatenation works as intended. + + >>> from nltk.corpus.reader.util import * + + >>> c1 = StreamBackedCorpusView(f1, read_whitespace_block, encoding='utf-8') + >>> c2 = StreamBackedCorpusView(f2, read_whitespace_block, encoding='utf-8') + >>> c3 = StreamBackedCorpusView(f3, read_whitespace_block, encoding='utf-8') + >>> c123 = c1+c2+c3 + >>> print(c123) + ['C-Span', 'Inaugural', 'Address', 'Corpus', 'US', ...] + + >>> l1 = f1.open(encoding='utf-8').read().split() + >>> l2 = f2.open(encoding='utf-8').read().split() + >>> l3 = f3.open(encoding='utf-8').read().split() + >>> l123 = l1+l2+l3 + + >>> list(c123) == l123 + True + + >>> (c1+c2+c3)[100] == l123[100] + True + +Slicing +------- +First, do some tests with fairly small slices. These will all +generate tuple values. + + >>> from nltk.util import LazySubsequence + >>> c1 = StreamBackedCorpusView(f1, read_whitespace_block, encoding='utf-8') + >>> l1 = f1.open(encoding='utf-8').read().split() + >>> print(len(c1)) + 21 + >>> len(c1) < LazySubsequence.MIN_SIZE + True + +Choose a list of indices, based on the length, that covers the +important corner cases: + + >>> indices = [-60, -30, -22, -21, -20, -1, + ... 0, 1, 10, 20, 21, 22, 30, 60] + +Test slicing with explicit start & stop value: + + >>> for s in indices: + ... for e in indices: + ... assert list(c1[s:e]) == l1[s:e] + +Test slicing with stop=None: + + >>> for s in indices: + ... assert list(c1[s:]) == l1[s:] + +Test slicing with start=None: + + >>> for e in indices: + ... assert list(c1[:e]) == l1[:e] + +Test slicing with start=stop=None: + + >>> list(c1[:]) == list(l1[:]) + True + +Next, we'll do some tests with much longer slices. These will +generate LazySubsequence objects. + + >>> c3 = StreamBackedCorpusView(f3, read_whitespace_block, encoding='utf-8') + >>> l3 = f3.open(encoding='utf-8').read().split() + >>> print(len(c3)) + 5430 + >>> len(c3) > LazySubsequence.MIN_SIZE*2 + True + +Choose a list of indices, based on the length, that covers the +important corner cases: + + >>> indices = [-12000, -6000, -5431, -5430, -5429, -3000, -200, -1, + ... 0, 1, 200, 3000, 5000, 5429, 5430, 5431, 6000, 12000] + +Test slicing with explicit start & stop value: + + >>> for s in indices: + ... for e in indices: + ... assert list(c3[s:e]) == l3[s:e] + +Test slicing with stop=None: + + >>> for s in indices: + ... assert list(c3[s:]) == l3[s:] + +Test slicing with start=None: + + >>> for e in indices: + ... assert list(c3[:e]) == l3[:e] + +Test slicing with start=stop=None: + + >>> list(c3[:]) == list(l3[:]) + True + +Multiple Iterators +------------------ +If multiple iterators are created for the same corpus view, their +iteration can be interleaved: + + >>> c3 = StreamBackedCorpusView(f3, read_whitespace_block) + >>> iterators = [c3.iterate_from(n) for n in [0,15,30,45]] + >>> for i in range(15): + ... for iterator in iterators: + ... print('%-15s' % next(iterator), end=' ') + ... print() + My a duties in + fellow heavy of a + citizens: weight the proper + Anyone of office sense + who responsibility. upon of + has If which the + taken not, he obligation + the he is which + oath has about the + I no to oath + have conception enter, imposes. + just of or The + taken the he office + must powers is of + feel and lacking an + +SeekableUnicodeStreamReader +=========================== + +The file-like objects provided by the ``codecs`` module unfortunately +suffer from a bug that prevents them from working correctly with +corpus view objects. In particular, although the expose ``seek()`` +and ``tell()`` methods, those methods do not exhibit the expected +behavior, because they are not synchronized with the internal buffers +that are kept by the file-like objects. For example, the ``tell()`` +method will return the file position at the end of the buffers (whose +contents have not yet been returned by the stream); and therefore this +file position can not be used to return to the 'current' location in +the stream (since ``seek()`` has no way to reconstruct the buffers). + +To get around these problems, we define a new class, +`SeekableUnicodeStreamReader`, to act as a file-like interface to +files containing encoded unicode data. This class is loosely based on +the ``codecs.StreamReader`` class. To construct a new reader, we call +the constructor with an underlying stream and an encoding name: + + >>> from io import StringIO, BytesIO + >>> from nltk.data import SeekableUnicodeStreamReader + >>> stream = BytesIO(b"""\ + ... This is a test file. + ... It is encoded in ascii. + ... """.decode('ascii').encode('ascii')) + >>> reader = SeekableUnicodeStreamReader(stream, 'ascii') + +`SeekableUnicodeStreamReader`\ s support all of the normal operations +supplied by a read-only stream. Note that all of the read operations +return ``unicode`` objects (not ``str`` objects). + + >>> reader.read() # read the entire file. + u'This is a test file.\nIt is encoded in ascii.\n' + >>> reader.seek(0) # rewind to the start. + >>> reader.read(5) # read at most 5 bytes. + u'This ' + >>> reader.readline() # read to the end of the line. + u'is a test file.\n' + >>> reader.seek(0) # rewind to the start. + >>> for line in reader: + ... print(repr(line)) # iterate over lines + u'This is a test file.\n' + u'It is encoded in ascii.\n' + >>> reader.seek(0) # rewind to the start. + >>> reader.readlines() # read a list of line strings + [u'This is a test file.\n', u'It is encoded in ascii.\n'] + >>> reader.close() + +Size argument to ``read()`` +--------------------------- +The ``size`` argument to ``read()`` specifies the maximum number of +*bytes* to read, not the maximum number of *characters*. Thus, for +encodings that use multiple bytes per character, it may return fewer +characters than the ``size`` argument: + + >>> stream = BytesIO(b"""\ + ... This is a test file. + ... It is encoded in utf-16. + ... """.decode('ascii').encode('utf-16')) + >>> reader = SeekableUnicodeStreamReader(stream, 'utf-16') + >>> reader.read(10) + u'This ' + +If a read block ends in the middle of the byte string encoding a +single character, then that byte string is stored in an internal +buffer, and re-used on the next call to ``read()``. However, if the +size argument is too small to read even a single character, even +though at least one character is available, then the ``read()`` method +will read additional bytes until it can return a single character. +This ensures that the ``read()`` method does not return an empty +string, which could be mistaken for indicating the end of the file. + + >>> reader.seek(0) # rewind to the start. + >>> reader.read(1) # we actually need to read 4 bytes + u'T' + >>> int(reader.tell()) + 4 + +The ``readline()`` method may read more than a single line of text, in +which case it stores the text that it does not return in a buffer. If +this buffer is not empty, then its contents will be included in the +value returned by the next call to ``read()``, regardless of the +``size`` argument, since they are available without reading any new +bytes from the stream: + + >>> reader.seek(0) # rewind to the start. + >>> reader.readline() # stores extra text in a buffer + u'This is a test file.\n' + >>> print(reader.linebuffer) # examine the buffer contents + [u'It is encoded i'] + >>> reader.read(0) # returns the contents of the buffer + u'It is encoded i' + >>> print(reader.linebuffer) # examine the buffer contents + None + +Seek and Tell +------------- +In addition to these basic read operations, +`SeekableUnicodeStreamReader` also supports the ``seek()`` and +``tell()`` operations. However, some care must still be taken when +using these operations. In particular, the only file offsets that +should be passed to ``seek()`` are ``0`` and any offset that has been +returned by ``tell``. + + >>> stream = BytesIO(b"""\ + ... This is a test file. + ... It is encoded in utf-16. + ... """.decode('ascii').encode('utf-16')) + >>> reader = SeekableUnicodeStreamReader(stream, 'utf-16') + >>> reader.read(20) + u'This is a ' + >>> pos = reader.tell(); print(pos) + 22 + >>> reader.read(20) + u'test file.' + >>> reader.seek(pos) # rewind to the position from tell. + >>> reader.read(20) + u'test file.' + +The ``seek()`` and ``tell()`` methods work property even when +``readline()`` is used. + + >>> stream = BytesIO(b"""\ + ... This is a test file. + ... It is encoded in utf-16. + ... """.decode('ascii').encode('utf-16')) + >>> reader = SeekableUnicodeStreamReader(stream, 'utf-16') + >>> reader.readline() + u'This is a test file.\n' + >>> pos = reader.tell(); print(pos) + 44 + >>> reader.readline() + u'It is encoded in utf-16.\n' + >>> reader.seek(pos) # rewind to the position from tell. + >>> reader.readline() + u'It is encoded in utf-16.\n' + + +Squashed Bugs +============= + +svn 5276 fixed a bug in the comment-stripping behavior of +parse_sexpr_block. + + >>> from io import StringIO + >>> from nltk.corpus.reader.util import read_sexpr_block + >>> f = StringIO(b""" + ... (a b c) + ... # This line is a comment. + ... (d e f\ng h)""".decode('ascii')) + >>> print(read_sexpr_block(f, block_size=38, comment_char='#')) + ['(a b c)'] + >>> print(read_sexpr_block(f, block_size=38, comment_char='#')) + ['(d e f\ng h)'] + +svn 5277 fixed a bug in parse_sexpr_block, which would cause it to +enter an infinite loop if a file ended mid-sexpr, or ended with a +token that was not followed by whitespace. A related bug caused +an infinite loop if the corpus ended in an unmatched close paren -- +this was fixed in svn 5279 + + >>> f = StringIO(b""" + ... This file ends mid-sexpr + ... (hello (world""".decode('ascii')) + >>> for i in range(3): print(read_sexpr_block(f)) + ['This', 'file', 'ends', 'mid-sexpr'] + ['(hello (world'] + [] + + >>> f = StringIO(b"This file has no trailing whitespace.".decode('ascii')) + >>> for i in range(3): print(read_sexpr_block(f)) + ['This', 'file', 'has', 'no', 'trailing'] + ['whitespace.'] + [] + + >>> # Bug fixed in 5279: + >>> f = StringIO(b"a b c)".decode('ascii')) + >>> for i in range(3): print(read_sexpr_block(f)) + ['a', 'b'] + ['c)'] + [] + + +svn 5624 & 5265 fixed a bug in ConcatenatedCorpusView, which caused it +to return the wrong items when indexed starting at any index beyond +the first file. + + >>> import nltk + >>> sents = nltk.corpus.brown.sents() + >>> print(sents[6000]) + ['Cholesterol', 'and', 'thyroid'] + >>> print(sents[6000]) + ['Cholesterol', 'and', 'thyroid'] + +svn 5728 fixed a bug in Categorized*CorpusReader, which caused them +to return words from *all* files when just one file was specified. + + >>> from nltk.corpus import reuters + >>> reuters.words('training/13085') + ['SNYDER', '&', 'lt', ';', 'SOI', '>', 'MAKES', ...] + >>> reuters.words('training/5082') + ['SHEPPARD', 'RESOURCES', 'TO', 'MERGE', 'WITH', ...] + +svn 7227 fixed a bug in the qc corpus reader, which prevented +access to its tuples() method + + >>> from nltk.corpus import qc + >>> qc.tuples('test.txt') + [('NUM:dist', 'How far is it from Denver to Aspen ?'), ('LOC:city', 'What county is Modesto , California in ?'), ...] + + + diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/corpus_fixt.py b/venv.bak/lib/python3.7/site-packages/nltk/test/corpus_fixt.py new file mode 100644 index 0000000..ce0cd83 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/test/corpus_fixt.py @@ -0,0 +1,4 @@ +# -*- coding: utf-8 -*- +from __future__ import absolute_import + +from nltk.corpus import teardown_module diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/crubadan.doctest b/venv.bak/lib/python3.7/site-packages/nltk/test/crubadan.doctest new file mode 100644 index 0000000..011af25 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/test/crubadan.doctest @@ -0,0 +1,65 @@ +.. Copyright (C) 2001-2019 NLTK Project +.. For license information, see LICENSE.TXT + +Crubadan Corpus Reader +====================== + +Crubadan is an NLTK corpus reader for ngram files provided +by the Crubadan project. It supports several languages. + + >>> from nltk.corpus import crubadan + >>> crubadan.langs() # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE + ['abk', 'abn',..., 'zpa', 'zul'] + +---------------------------------------- +Language code mapping and helper methods +---------------------------------------- + +The web crawler that generates the 3-gram frequencies works at the +level of "writing systems" rather than languages. Writing systems +are assigned internal 2-3 letter codes that require mapping to the +standard ISO 639-3 codes. For more information, please refer to +the README in nltk_data/crubadan folder after installing it. + +To translate ISO 639-3 codes to "Crubadan Code": + + >>> crubadan.iso_to_crubadan('eng') + 'en' + >>> crubadan.iso_to_crubadan('fra') + 'fr' + >>> crubadan.iso_to_crubadan('aaa') + +In reverse, print ISO 639-3 code if we have the Crubadan Code: + + >>> crubadan.crubadan_to_iso('en') + 'eng' + >>> crubadan.crubadan_to_iso('fr') + 'fra' + >>> crubadan.crubadan_to_iso('aa') + +--------------------------- +Accessing ngram frequencies +--------------------------- + +On initialization the reader will create a dictionary of every +language supported by the Crubadan project, mapping the ISO 639-3 +language code to its corresponding ngram frequency. + +You can access individual language FreqDist and the ngrams within them as follows: + + >>> english_fd = crubadan.lang_freq('eng') + >>> english_fd['the'] + 728135 + +Above accesses the FreqDist of English and returns the frequency of the ngram 'the'. +A ngram that isn't found within the language will return 0: + + >>> english_fd['sometest'] + 0 + +A language that isn't supported will raise an exception: + + >>> crubadan.lang_freq('elvish') + Traceback (most recent call last): + ... + RuntimeError: Unsupported language. diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/data.doctest b/venv.bak/lib/python3.7/site-packages/nltk/test/data.doctest new file mode 100644 index 0000000..184c512 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/test/data.doctest @@ -0,0 +1,379 @@ +.. Copyright (C) 2001-2019 NLTK Project +.. For license information, see LICENSE.TXT + +========================================= + Loading Resources From the Data Package +========================================= + + >>> import nltk.data + +Overview +~~~~~~~~ +The `nltk.data` module contains functions that can be used to load +NLTK resource files, such as corpora, grammars, and saved processing +objects. + +Loading Data Files +~~~~~~~~~~~~~~~~~~ +Resources are loaded using the function `nltk.data.load()`, which +takes as its first argument a URL specifying what file should be +loaded. The ``nltk:`` protocol loads files from the NLTK data +distribution: + + >>> from __future__ import print_function + >>> tokenizer = nltk.data.load('nltk:tokenizers/punkt/english.pickle') + >>> tokenizer.tokenize('Hello. This is a test. It works!') + ['Hello.', 'This is a test.', 'It works!'] + +It is important to note that there should be no space following the +colon (':') in the URL; 'nltk: tokenizers/punkt/english.pickle' will +not work! + +The ``nltk:`` protocol is used by default if no protocol is specified: + + >>> nltk.data.load('tokenizers/punkt/english.pickle') # doctest: +ELLIPSIS + + +But it is also possible to load resources from ``http:``, ``ftp:``, +and ``file:`` URLs, e.g. ``cfg = nltk.data.load('http://example.com/path/to/toy.cfg')`` + + >>> # Load a grammar using an absolute path. + >>> url = 'file:%s' % nltk.data.find('grammars/sample_grammars/toy.cfg') + >>> url.replace('\\', '/') # doctest: +ELLIPSIS + 'file:...toy.cfg' + >>> print(nltk.data.load(url)) # doctest: +ELLIPSIS + Grammar with 14 productions (start state = S) + S -> NP VP + PP -> P NP + ... + P -> 'on' + P -> 'in' + +The second argument to the `nltk.data.load()` function specifies the +file format, which determines how the file's contents are processed +before they are returned by ``load()``. The formats that are +currently supported by the data module are described by the dictionary +`nltk.data.FORMATS`: + + >>> for format, descr in sorted(nltk.data.FORMATS.items()): + ... print('{0:<7} {1:}'.format(format, descr)) # doctest: +NORMALIZE_WHITESPACE + cfg A context free grammar. + fcfg A feature CFG. + fol A list of first order logic expressions, parsed with + nltk.sem.logic.Expression.fromstring. + json A serialized python object, stored using the json module. + logic A list of first order logic expressions, parsed with + nltk.sem.logic.LogicParser. Requires an additional logic_parser + parameter + pcfg A probabilistic CFG. + pickle A serialized python object, stored using the pickle + module. + raw The raw (byte string) contents of a file. + text The raw (unicode string) contents of a file. + val A semantic valuation, parsed by + nltk.sem.Valuation.fromstring. + yaml A serialized python object, stored using the yaml module. + +`nltk.data.load()` will raise a ValueError if a bad format name is +specified: + + >>> nltk.data.load('grammars/sample_grammars/toy.cfg', 'bar') + Traceback (most recent call last): + . . . + ValueError: Unknown format type! + +By default, the ``"auto"`` format is used, which chooses a format +based on the filename's extension. The mapping from file extensions +to format names is specified by `nltk.data.AUTO_FORMATS`: + + >>> for ext, format in sorted(nltk.data.AUTO_FORMATS.items()): + ... print('.%-7s -> %s' % (ext, format)) + .cfg -> cfg + .fcfg -> fcfg + .fol -> fol + .json -> json + .logic -> logic + .pcfg -> pcfg + .pickle -> pickle + .text -> text + .txt -> text + .val -> val + .yaml -> yaml + +If `nltk.data.load()` is unable to determine the format based on the +filename's extension, it will raise a ValueError: + + >>> nltk.data.load('foo.bar') + Traceback (most recent call last): + . . . + ValueError: Could not determine format for foo.bar based on its file + extension; use the "format" argument to specify the format explicitly. + +Note that by explicitly specifying the ``format`` argument, you can +override the load method's default processing behavior. For example, +to get the raw contents of any file, simply use ``format="raw"``: + + >>> s = nltk.data.load('grammars/sample_grammars/toy.cfg', 'text') + >>> print(s) # doctest: +ELLIPSIS + S -> NP VP + PP -> P NP + NP -> Det N | NP PP + VP -> V NP | VP PP + ... + +Making Local Copies +~~~~~~~~~~~~~~~~~~~ +.. This will not be visible in the html output: create a tempdir to + play in. + >>> import tempfile, os + >>> tempdir = tempfile.mkdtemp() + >>> old_dir = os.path.abspath('.') + >>> os.chdir(tempdir) + +The function `nltk.data.retrieve()` copies a given resource to a local +file. This can be useful, for example, if you want to edit one of the +sample grammars. + + >>> nltk.data.retrieve('grammars/sample_grammars/toy.cfg') + Retrieving 'nltk:grammars/sample_grammars/toy.cfg', saving to 'toy.cfg' + + >>> # Simulate editing the grammar. + >>> with open('toy.cfg') as inp: + ... s = inp.read().replace('NP', 'DP') + >>> with open('toy.cfg', 'w') as out: + ... _bytes_written = out.write(s) + + >>> # Load the edited grammar, & display it. + >>> cfg = nltk.data.load('file:///' + os.path.abspath('toy.cfg')) + >>> print(cfg) # doctest: +ELLIPSIS + Grammar with 14 productions (start state = S) + S -> DP VP + PP -> P DP + ... + P -> 'on' + P -> 'in' + +The second argument to `nltk.data.retrieve()` specifies the filename +for the new copy of the file. By default, the source file's filename +is used. + + >>> nltk.data.retrieve('grammars/sample_grammars/toy.cfg', 'mytoy.cfg') + Retrieving 'nltk:grammars/sample_grammars/toy.cfg', saving to 'mytoy.cfg' + >>> os.path.isfile('./mytoy.cfg') + True + >>> nltk.data.retrieve('grammars/sample_grammars/np.fcfg') + Retrieving 'nltk:grammars/sample_grammars/np.fcfg', saving to 'np.fcfg' + >>> os.path.isfile('./np.fcfg') + True + +If a file with the specified (or default) filename already exists in +the current directory, then `nltk.data.retrieve()` will raise a +ValueError exception. It will *not* overwrite the file: + + >>> os.path.isfile('./toy.cfg') + True + >>> nltk.data.retrieve('grammars/sample_grammars/toy.cfg') # doctest: +ELLIPSIS + Traceback (most recent call last): + . . . + ValueError: File '...toy.cfg' already exists! + +.. This will not be visible in the html output: clean up the tempdir. + >>> os.chdir(old_dir) + >>> for f in os.listdir(tempdir): + ... os.remove(os.path.join(tempdir, f)) + >>> os.rmdir(tempdir) + +Finding Files in the NLTK Data Package +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +The `nltk.data.find()` function searches the NLTK data package for a +given file, and returns a pointer to that file. This pointer can +either be a `FileSystemPathPointer` (whose `path` attribute gives the +absolute path of the file); or a `ZipFilePathPointer`, specifying a +zipfile and the name of an entry within that zipfile. Both pointer +types define the `open()` method, which can be used to read the string +contents of the file. + + >>> path = nltk.data.find('corpora/abc/rural.txt') + >>> str(path) # doctest: +ELLIPSIS + '...rural.txt' + >>> print(path.open().read(60).decode()) + PM denies knowledge of AWB kickbacks + The Prime Minister has + +Alternatively, the `nltk.data.load()` function can be used with the +keyword argument ``format="raw"``: + + >>> s = nltk.data.load('corpora/abc/rural.txt', format='raw')[:60] + >>> print(s.decode()) + PM denies knowledge of AWB kickbacks + The Prime Minister has + +Alternatively, you can use the keyword argument ``format="text"``: + + >>> s = nltk.data.load('corpora/abc/rural.txt', format='text')[:60] + >>> print(s) + PM denies knowledge of AWB kickbacks + The Prime Minister has + +Resource Caching +~~~~~~~~~~~~~~~~ + +NLTK uses a weakref dictionary to maintain a cache of resources that +have been loaded. If you load a resource that is already stored in +the cache, then the cached copy will be returned. This behavior can +be seen by the trace output generated when verbose=True: + + >>> feat0 = nltk.data.load('grammars/book_grammars/feat0.fcfg', verbose=True) + <> + >>> feat0 = nltk.data.load('grammars/book_grammars/feat0.fcfg', verbose=True) + <> + +If you wish to load a resource from its source, bypassing the cache, +use the ``cache=False`` argument to `nltk.data.load()`. This can be +useful, for example, if the resource is loaded from a local file, and +you are actively editing that file: + + >>> feat0 = nltk.data.load('grammars/book_grammars/feat0.fcfg',cache=False,verbose=True) + <> + +The cache *no longer* uses weak references. A resource will not be +automatically expunged from the cache when no more objects are using +it. In the following example, when we clear the variable ``feat0``, +the reference count for the feature grammar object drops to zero. +However, the object remains cached: + + >>> del feat0 + >>> feat0 = nltk.data.load('grammars/book_grammars/feat0.fcfg', + ... verbose=True) + <> + +You can clear the entire contents of the cache, using +`nltk.data.clear_cache()`: + + >>> nltk.data.clear_cache() + +Retrieving other Data Sources +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + >>> formulas = nltk.data.load('grammars/book_grammars/background.fol') + >>> for f in formulas: print(str(f)) + all x.(boxerdog(x) -> dog(x)) + all x.(boxer(x) -> person(x)) + all x.-(dog(x) & person(x)) + all x.(married(x) <-> exists y.marry(x,y)) + all x.(bark(x) -> dog(x)) + all x y.(marry(x,y) -> (person(x) & person(y))) + -(Vincent = Mia) + -(Vincent = Fido) + -(Mia = Fido) + +Regression Tests +~~~~~~~~~~~~~~~~ +Create a temp dir for tests that write files: + + >>> import tempfile, os + >>> tempdir = tempfile.mkdtemp() + >>> old_dir = os.path.abspath('.') + >>> os.chdir(tempdir) + +The `retrieve()` function accepts all url types: + + >>> urls = ['https://raw.githubusercontent.com/nltk/nltk/develop/nltk/test/toy.cfg', + ... 'file:%s' % nltk.data.find('grammars/sample_grammars/toy.cfg'), + ... 'nltk:grammars/sample_grammars/toy.cfg', + ... 'grammars/sample_grammars/toy.cfg'] + >>> for i, url in enumerate(urls): + ... nltk.data.retrieve(url, 'toy-%d.cfg' % i) # doctest: +ELLIPSIS + Retrieving 'https://raw.githubusercontent.com/nltk/nltk/develop/nltk/test/toy.cfg', saving to 'toy-0.cfg' + Retrieving 'file:...toy.cfg', saving to 'toy-1.cfg' + Retrieving 'nltk:grammars/sample_grammars/toy.cfg', saving to 'toy-2.cfg' + Retrieving 'nltk:grammars/sample_grammars/toy.cfg', saving to 'toy-3.cfg' + +Clean up the temp dir: + + >>> os.chdir(old_dir) + >>> for f in os.listdir(tempdir): + ... os.remove(os.path.join(tempdir, f)) + >>> os.rmdir(tempdir) + +Lazy Loader +----------- +A lazy loader is a wrapper object that defers loading a resource until +it is accessed or used in any way. This is mainly intended for +internal use by NLTK's corpus readers. + + >>> # Create a lazy loader for toy.cfg. + >>> ll = nltk.data.LazyLoader('grammars/sample_grammars/toy.cfg') + + >>> # Show that it's not loaded yet: + >>> object.__repr__(ll) # doctest: +ELLIPSIS + '' + + >>> # printing it is enough to cause it to be loaded: + >>> print(ll) + + + >>> # Show that it's now been loaded: + >>> object.__repr__(ll) # doctest: +ELLIPSIS + '' + + + >>> # Test that accessing an attribute also loads it: + >>> ll = nltk.data.LazyLoader('grammars/sample_grammars/toy.cfg') + >>> ll.start() + S + >>> object.__repr__(ll) # doctest: +ELLIPSIS + '' + +Buffered Gzip Reading and Writing +--------------------------------- +Write performance to gzip-compressed is extremely poor when the files become large. +File creation can become a bottleneck in those cases. + +Read performance from large gzipped pickle files was improved in data.py by +buffering the reads. A similar fix can be applied to writes by buffering +the writes to a StringIO object first. + +This is mainly intended for internal use. The test simply tests that reading +and writing work as intended and does not test how much improvement buffering +provides. + + >>> from nltk.compat import StringIO + >>> test = nltk.data.BufferedGzipFile('testbuf.gz', 'wb', size=2**10) + >>> ans = [] + >>> for i in range(10000): + ... ans.append(str(i).encode('ascii')) + ... test.write(str(i).encode('ascii')) + >>> test.close() + >>> test = nltk.data.BufferedGzipFile('testbuf.gz', 'rb') + >>> test.read() == b''.join(ans) + True + >>> test.close() + >>> import os + >>> os.unlink('testbuf.gz') + +JSON Encoding and Decoding +-------------------------- +JSON serialization is used instead of pickle for some classes. + + >>> from nltk import jsontags + >>> from nltk.jsontags import JSONTaggedEncoder, JSONTaggedDecoder, register_tag + >>> @jsontags.register_tag + ... class JSONSerializable: + ... json_tag = 'JSONSerializable' + ... + ... def __init__(self, n): + ... self.n = n + ... + ... def encode_json_obj(self): + ... return self.n + ... + ... @classmethod + ... def decode_json_obj(cls, obj): + ... n = obj + ... return cls(n) + ... + >>> JSONTaggedEncoder().encode(JSONSerializable(1)) + '{"!JSONSerializable": 1}' + >>> JSONTaggedDecoder().decode('{"!JSONSerializable": 1}').n + 1 + diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/dependency.doctest b/venv.bak/lib/python3.7/site-packages/nltk/test/dependency.doctest new file mode 100755 index 0000000..31590c4 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/test/dependency.doctest @@ -0,0 +1,241 @@ +.. Copyright (C) 2001-2019 NLTK Project +.. For license information, see LICENSE.TXT + +=================== +Dependency Grammars +=================== + + >>> from nltk.grammar import DependencyGrammar + >>> from nltk.parse import ( + ... DependencyGraph, + ... ProjectiveDependencyParser, + ... NonprojectiveDependencyParser, + ... ) + +CoNLL Data +---------- + + >>> treebank_data = """Pierre NNP 2 NMOD + ... Vinken NNP 8 SUB + ... , , 2 P + ... 61 CD 5 NMOD + ... years NNS 6 AMOD + ... old JJ 2 NMOD + ... , , 2 P + ... will MD 0 ROOT + ... join VB 8 VC + ... the DT 11 NMOD + ... board NN 9 OBJ + ... as IN 9 VMOD + ... a DT 15 NMOD + ... nonexecutive JJ 15 NMOD + ... director NN 12 PMOD + ... Nov. NNP 9 VMOD + ... 29 CD 16 NMOD + ... . . 9 VMOD + ... """ + + >>> dg = DependencyGraph(treebank_data) + >>> dg.tree().pprint() + (will + (Vinken Pierre , (old (years 61)) ,) + (join (board the) (as (director a nonexecutive)) (Nov. 29) .)) + >>> for head, rel, dep in dg.triples(): + ... print( + ... '({h[0]}, {h[1]}), {r}, ({d[0]}, {d[1]})' + ... .format(h=head, r=rel, d=dep) + ... ) + (will, MD), SUB, (Vinken, NNP) + (Vinken, NNP), NMOD, (Pierre, NNP) + (Vinken, NNP), P, (,, ,) + (Vinken, NNP), NMOD, (old, JJ) + (old, JJ), AMOD, (years, NNS) + (years, NNS), NMOD, (61, CD) + (Vinken, NNP), P, (,, ,) + (will, MD), VC, (join, VB) + (join, VB), OBJ, (board, NN) + (board, NN), NMOD, (the, DT) + (join, VB), VMOD, (as, IN) + (as, IN), PMOD, (director, NN) + (director, NN), NMOD, (a, DT) + (director, NN), NMOD, (nonexecutive, JJ) + (join, VB), VMOD, (Nov., NNP) + (Nov., NNP), NMOD, (29, CD) + (join, VB), VMOD, (., .) + +Using a custom cell extractor. + + >>> def custom_extractor(cells): + ... _, tag, head, rel = cells + ... return 'spam', 'spam', tag, tag, '', head, rel + >>> dg = DependencyGraph(treebank_data, cell_extractor=custom_extractor) + >>> dg.tree().pprint() + (spam + (spam spam spam (spam (spam spam)) spam) + (spam (spam spam) (spam (spam spam spam)) (spam spam) spam)) + +Custom cell extractors can take in and return an index. + + >>> def custom_extractor(cells, index): + ... word, tag, head, rel = cells + ... return (index, '{}-{}'.format(word, index), word, + ... tag, tag, '', head, rel) + >>> dg = DependencyGraph(treebank_data, cell_extractor=custom_extractor) + >>> dg.tree().pprint() + (will-8 + (Vinken-2 Pierre-1 ,-3 (old-6 (years-5 61-4)) ,-7) + (join-9 + (board-11 the-10) + (as-12 (director-15 a-13 nonexecutive-14)) + (Nov.-16 29-17) + .-18)) + +Using the dependency-parsed version of the Penn Treebank corpus sample. + + >>> from nltk.corpus import dependency_treebank + >>> t = dependency_treebank.parsed_sents()[0] + >>> print(t.to_conll(3)) # doctest: +NORMALIZE_WHITESPACE + Pierre NNP 2 + Vinken NNP 8 + , , 2 + 61 CD 5 + years NNS 6 + old JJ 2 + , , 2 + will MD 0 + join VB 8 + the DT 11 + board NN 9 + as IN 9 + a DT 15 + nonexecutive JJ 15 + director NN 12 + Nov. NNP 9 + 29 CD 16 + . . 8 + +Using the output of zpar (like Malt-TAB but with zero-based indexing) + + >>> zpar_data = """ + ... Pierre NNP 1 NMOD + ... Vinken NNP 7 SUB + ... , , 1 P + ... 61 CD 4 NMOD + ... years NNS 5 AMOD + ... old JJ 1 NMOD + ... , , 1 P + ... will MD -1 ROOT + ... join VB 7 VC + ... the DT 10 NMOD + ... board NN 8 OBJ + ... as IN 8 VMOD + ... a DT 14 NMOD + ... nonexecutive JJ 14 NMOD + ... director NN 11 PMOD + ... Nov. NNP 8 VMOD + ... 29 CD 15 NMOD + ... . . 7 P + ... """ + + >>> zdg = DependencyGraph(zpar_data, zero_based=True) + >>> print(zdg.tree()) + (will + (Vinken Pierre , (old (years 61)) ,) + (join (board the) (as (director a nonexecutive)) (Nov. 29)) + .) + + +Projective Dependency Parsing +----------------------------- + + >>> grammar = DependencyGrammar.fromstring(""" + ... 'fell' -> 'price' | 'stock' + ... 'price' -> 'of' 'the' + ... 'of' -> 'stock' + ... 'stock' -> 'the' + ... """) + >>> print(grammar) + Dependency grammar with 5 productions + 'fell' -> 'price' + 'fell' -> 'stock' + 'price' -> 'of' 'the' + 'of' -> 'stock' + 'stock' -> 'the' + + >>> dp = ProjectiveDependencyParser(grammar) + >>> for t in sorted(dp.parse(['the', 'price', 'of', 'the', 'stock', 'fell'])): + ... print(t) + (fell (price the (of (stock the)))) + (fell (price the of) (stock the)) + (fell (price the of the) stock) + +Non-Projective Dependency Parsing +--------------------------------- + + >>> grammar = DependencyGrammar.fromstring(""" + ... 'taught' -> 'play' | 'man' + ... 'man' -> 'the' + ... 'play' -> 'golf' | 'dog' | 'to' + ... 'dog' -> 'his' + ... """) + >>> print(grammar) + Dependency grammar with 7 productions + 'taught' -> 'play' + 'taught' -> 'man' + 'man' -> 'the' + 'play' -> 'golf' + 'play' -> 'dog' + 'play' -> 'to' + 'dog' -> 'his' + + >>> dp = NonprojectiveDependencyParser(grammar) + >>> g, = dp.parse(['the', 'man', 'taught', 'his', 'dog', 'to', 'play', 'golf']) + + >>> print(g.root['word']) + taught + + >>> for _, node in sorted(g.nodes.items()): + ... if node['word'] is not None: + ... print('{address} {word}: {d}'.format(d=node['deps'][''], **node)) + 1 the: [] + 2 man: [1] + 3 taught: [2, 7] + 4 his: [] + 5 dog: [4] + 6 to: [] + 7 play: [5, 6, 8] + 8 golf: [] + + >>> print(g.tree()) + (taught (man the) (play (dog his) to golf)) + +Integration with MALT parser +============================ + +In case the top relation is different from the default, we can set it. In case +of MALT parser, it's set to `'null'`. + +>>> dg_str = """1 I _ NN NN _ 2 nn _ _ +... 2 shot _ NN NN _ 0 null _ _ +... 3 an _ AT AT _ 2 dep _ _ +... 4 elephant _ NN NN _ 7 nn _ _ +... 5 in _ NN NN _ 7 nn _ _ +... 6 my _ NN NN _ 7 nn _ _ +... 7 pajamas _ NNS NNS _ 3 dobj _ _ +... """ +>>> dg = DependencyGraph(dg_str, top_relation_label='null') + +>>> len(dg.nodes) +8 + +>>> dg.root['word'], dg.root['address'] +('shot', 2) + +>>> print(dg.to_conll(10)) # doctest: +NORMALIZE_WHITESPACE +1 I _ NN NN _ 2 nn _ _ +2 shot _ NN NN _ 0 null _ _ +3 an _ AT AT _ 2 dep _ _ +4 elephant _ NN NN _ 7 nn _ _ +5 in _ NN NN _ 7 nn _ _ +6 my _ NN NN _ 7 nn _ _ +7 pajamas _ NNS NNS _ 3 dobj _ _ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/discourse.doctest b/venv.bak/lib/python3.7/site-packages/nltk/test/discourse.doctest new file mode 100644 index 0000000..df18fde --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/test/discourse.doctest @@ -0,0 +1,546 @@ +.. Copyright (C) 2001-2019 NLTK Project +.. For license information, see LICENSE.TXT + +================== +Discourse Checking +================== + + >>> from nltk import * + >>> from nltk.sem import logic + >>> logic._counter._value = 0 + +Introduction +============ + +The NLTK discourse module makes it possible to test consistency and +redundancy of simple discourses, using theorem-proving and +model-building from `nltk.inference`. + +The ``DiscourseTester`` constructor takes a list of sentences as a +parameter. + + >>> dt = DiscourseTester(['a boxer walks', 'every boxer chases a girl']) + +The ``DiscourseTester`` parses each sentence into a list of logical +forms. Once we have created ``DiscourseTester`` object, we can +inspect various properties of the discourse. First off, we might want +to double-check what sentences are currently stored as the discourse. + + >>> dt.sentences() + s0: a boxer walks + s1: every boxer chases a girl + +As you will see, each sentence receives an identifier `s`\ :subscript:`i`. +We might also want to check what grammar the ``DiscourseTester`` is +using (by default, ``book_grammars/discourse.fcfg``): + + >>> dt.grammar() # doctest: +ELLIPSIS + % start S + # Grammar Rules + S[SEM = ] -> NP[NUM=?n,SEM=?subj] VP[NUM=?n,SEM=?vp] + NP[NUM=?n,SEM= ] -> Det[NUM=?n,SEM=?det] Nom[NUM=?n,SEM=?nom] + NP[LOC=?l,NUM=?n,SEM=?np] -> PropN[LOC=?l,NUM=?n,SEM=?np] + ... + +A different grammar can be invoked by using the optional ``gramfile`` +parameter when a ``DiscourseTester`` object is created. + +Readings and Threads +==================== + +Depending on +the grammar used, we may find some sentences have more than one +logical form. To check this, use the ``readings()`` method. Given a +sentence identifier of the form `s`\ :subscript:`i`, each reading of +that sentence is given an identifier `s`\ :sub:`i`-`r`\ :sub:`j`. + + + >>> dt.readings() + + s0 readings: + + s0-r0: exists z1.(boxer(z1) & walk(z1)) + s0-r1: exists z1.(boxerdog(z1) & walk(z1)) + + s1 readings: + + s1-r0: all z2.(boxer(z2) -> exists z3.(girl(z3) & chase(z2,z3))) + s1-r1: all z1.(boxerdog(z1) -> exists z2.(girl(z2) & chase(z1,z2))) + + +In this case, the only source of ambiguity lies in the word *boxer*, +which receives two translations: ``boxer`` and ``boxerdog``. The +intention is that one of these corresponds to the ``person`` sense and +one to the ``dog`` sense. In principle, we would also expect to see a +quantifier scope ambiguity in ``s1``. However, the simple grammar we +are using, namely `sem4.fcfg `_, doesn't support quantifier +scope ambiguity. + +We can also investigate the readings of a specific sentence: + + >>> dt.readings('a boxer walks') + The sentence 'a boxer walks' has these readings: + exists x.(boxer(x) & walk(x)) + exists x.(boxerdog(x) & walk(x)) + +Given that each sentence is two-ways ambiguous, we potentially have +four different discourse 'threads', taking all combinations of +readings. To see these, specify the ``threaded=True`` parameter on +the ``readings()`` method. Again, each thread is assigned an +identifier of the form `d`\ :sub:`i`. Following the identifier is a +list of the readings that constitute that thread. + + >>> dt.readings(threaded=True) # doctest: +NORMALIZE_WHITESPACE + d0: ['s0-r0', 's1-r0'] + d1: ['s0-r0', 's1-r1'] + d2: ['s0-r1', 's1-r0'] + d3: ['s0-r1', 's1-r1'] + +Of course, this simple-minded approach doesn't scale: a discourse with, say, three +sentences, each of which has 3 readings, will generate 27 different +threads. It is an interesting exercise to consider how to manage +discourse ambiguity more efficiently. + +Checking Consistency +==================== + +Now, we can check whether some or all of the discourse threads are +consistent, using the ``models()`` method. With no parameter, this +method will try to find a model for every discourse thread in the +current discourse. However, we can also specify just one thread, say ``d1``. + + >>> dt.models('d1') + -------------------------------------------------------------------------------- + Model for Discourse Thread d1 + -------------------------------------------------------------------------------- + % number = 1 + % seconds = 0 + + % Interpretation of size 2 + + c1 = 0. + + f1(0) = 0. + f1(1) = 0. + + boxer(0). + - boxer(1). + + - boxerdog(0). + - boxerdog(1). + + - girl(0). + - girl(1). + + walk(0). + - walk(1). + + - chase(0,0). + - chase(0,1). + - chase(1,0). + - chase(1,1). + + Consistent discourse: d1 ['s0-r0', 's1-r1']: + s0-r0: exists z1.(boxer(z1) & walk(z1)) + s1-r1: all z1.(boxerdog(z1) -> exists z2.(girl(z2) & chase(z1,z2))) + + +There are various formats for rendering **Mace4** models --- here, +we have used the 'cooked' format (which is intended to be +human-readable). There are a number of points to note. + +#. The entities in the domain are all treated as non-negative + integers. In this case, there are only two entities, ``0`` and + ``1``. + +#. The ``-`` symbol indicates negation. So ``0`` is the only + ``boxerdog`` and the only thing that ``walk``\ s. Nothing is a + ``boxer``, or a ``girl`` or in the ``chase`` relation. Thus the + universal sentence is vacuously true. + +#. ``c1`` is an introduced constant that denotes ``0``. + +#. ``f1`` is a Skolem function, but it plays no significant role in + this model. + + +We might want to now add another sentence to the discourse, and there +is method ``add_sentence()`` for doing just this. + + >>> dt.add_sentence('John is a boxer') + >>> dt.sentences() + s0: a boxer walks + s1: every boxer chases a girl + s2: John is a boxer + +We can now test all the properties as before; here, we just show a +couple of them. + + >>> dt.readings() + + s0 readings: + + s0-r0: exists z1.(boxer(z1) & walk(z1)) + s0-r1: exists z1.(boxerdog(z1) & walk(z1)) + + s1 readings: + + s1-r0: all z1.(boxer(z1) -> exists z2.(girl(z2) & chase(z1,z2))) + s1-r1: all z1.(boxerdog(z1) -> exists z2.(girl(z2) & chase(z1,z2))) + + s2 readings: + + s2-r0: boxer(John) + s2-r1: boxerdog(John) + >>> dt.readings(threaded=True) # doctest: +NORMALIZE_WHITESPACE + d0: ['s0-r0', 's1-r0', 's2-r0'] + d1: ['s0-r0', 's1-r0', 's2-r1'] + d2: ['s0-r0', 's1-r1', 's2-r0'] + d3: ['s0-r0', 's1-r1', 's2-r1'] + d4: ['s0-r1', 's1-r0', 's2-r0'] + d5: ['s0-r1', 's1-r0', 's2-r1'] + d6: ['s0-r1', 's1-r1', 's2-r0'] + d7: ['s0-r1', 's1-r1', 's2-r1'] + +If you are interested in a particular thread, the ``expand_threads()`` +method will remind you of what readings it consists of: + + >>> thread = dt.expand_threads('d1') + >>> for rid, reading in thread: + ... print(rid, str(reading.normalize())) + s0-r0 exists z1.(boxer(z1) & walk(z1)) + s1-r0 all z1.(boxer(z1) -> exists z2.(girl(z2) & chase(z1,z2))) + s2-r1 boxerdog(John) + +Suppose we have already defined a discourse, as follows: + + >>> dt = DiscourseTester(['A student dances', 'Every student is a person']) + +Now, when we add a new sentence, is it consistent with what we already +have? The `` consistchk=True`` parameter of ``add_sentence()`` allows +us to check: + + >>> dt.add_sentence('No person dances', consistchk=True) + Inconsistent discourse: d0 ['s0-r0', 's1-r0', 's2-r0']: + s0-r0: exists z1.(student(z1) & dance(z1)) + s1-r0: all z1.(student(z1) -> person(z1)) + s2-r0: -exists z1.(person(z1) & dance(z1)) + + >>> dt.readings() + + s0 readings: + + s0-r0: exists z1.(student(z1) & dance(z1)) + + s1 readings: + + s1-r0: all z1.(student(z1) -> person(z1)) + + s2 readings: + + s2-r0: -exists z1.(person(z1) & dance(z1)) + +So let's retract the inconsistent sentence: + + >>> dt.retract_sentence('No person dances', verbose=True) # doctest: +NORMALIZE_WHITESPACE + Current sentences are + s0: A student dances + s1: Every student is a person + +We can now verify that result is consistent. + + >>> dt.models() + -------------------------------------------------------------------------------- + Model for Discourse Thread d0 + -------------------------------------------------------------------------------- + % number = 1 + % seconds = 0 + + % Interpretation of size 2 + + c1 = 0. + + dance(0). + - dance(1). + + person(0). + - person(1). + + student(0). + - student(1). + + Consistent discourse: d0 ['s0-r0', 's1-r0']: + s0-r0: exists z1.(student(z1) & dance(z1)) + s1-r0: all z1.(student(z1) -> person(z1)) + + +Checking Informativity +====================== + +Let's assume that we are still trying to extend the discourse *A +student dances.* *Every student is a person.* We add a new sentence, +but this time, we check whether it is informative with respect to what +has gone before. + + >>> dt.add_sentence('A person dances', informchk=True) + Sentence 'A person dances' under reading 'exists x.(person(x) & dance(x))': + Not informative relative to thread 'd0' + +In fact, we are just checking whether the new sentence is entailed by +the preceding discourse. + + >>> dt.models() + -------------------------------------------------------------------------------- + Model for Discourse Thread d0 + -------------------------------------------------------------------------------- + % number = 1 + % seconds = 0 + + % Interpretation of size 2 + + c1 = 0. + + c2 = 0. + + dance(0). + - dance(1). + + person(0). + - person(1). + + student(0). + - student(1). + + Consistent discourse: d0 ['s0-r0', 's1-r0', 's2-r0']: + s0-r0: exists z1.(student(z1) & dance(z1)) + s1-r0: all z1.(student(z1) -> person(z1)) + s2-r0: exists z1.(person(z1) & dance(z1)) + + + + +Adding Background Knowledge +=========================== + +Let's build a new discourse, and look at the readings of the component sentences: + + >>> dt = DiscourseTester(['Vincent is a boxer', 'Fido is a boxer', 'Vincent is married', 'Fido barks']) + >>> dt.readings() + + s0 readings: + + s0-r0: boxer(Vincent) + s0-r1: boxerdog(Vincent) + + s1 readings: + + s1-r0: boxer(Fido) + s1-r1: boxerdog(Fido) + + s2 readings: + + s2-r0: married(Vincent) + + s3 readings: + + s3-r0: bark(Fido) + +This gives us a lot of threads: + + >>> dt.readings(threaded=True) # doctest: +NORMALIZE_WHITESPACE + d0: ['s0-r0', 's1-r0', 's2-r0', 's3-r0'] + d1: ['s0-r0', 's1-r1', 's2-r0', 's3-r0'] + d2: ['s0-r1', 's1-r0', 's2-r0', 's3-r0'] + d3: ['s0-r1', 's1-r1', 's2-r0', 's3-r0'] + + +We can eliminate some of the readings, and hence some of the threads, +by adding background information. + + >>> import nltk.data + >>> bg = nltk.data.load('grammars/book_grammars/background.fol') + >>> dt.add_background(bg) + >>> dt.background() + all x.(boxerdog(x) -> dog(x)) + all x.(boxer(x) -> person(x)) + all x.-(dog(x) & person(x)) + all x.(married(x) <-> exists y.marry(x,y)) + all x.(bark(x) -> dog(x)) + all x y.(marry(x,y) -> (person(x) & person(y))) + -(Vincent = Mia) + -(Vincent = Fido) + -(Mia = Fido) + +The background information allows us to reject three of the threads as +inconsistent. To see what remains, use the ``filter=True`` parameter +on ``readings()``. + + >>> dt.readings(filter=True) # doctest: +NORMALIZE_WHITESPACE + d1: ['s0-r0', 's1-r1', 's2-r0', 's3-r0'] + +The ``models()`` method gives us more information about the surviving thread. + + >>> dt.models() + -------------------------------------------------------------------------------- + Model for Discourse Thread d0 + -------------------------------------------------------------------------------- + No model found! + + -------------------------------------------------------------------------------- + Model for Discourse Thread d1 + -------------------------------------------------------------------------------- + % number = 1 + % seconds = 0 + + % Interpretation of size 3 + + Fido = 0. + + Mia = 1. + + Vincent = 2. + + f1(0) = 0. + f1(1) = 0. + f1(2) = 2. + + bark(0). + - bark(1). + - bark(2). + + - boxer(0). + - boxer(1). + boxer(2). + + boxerdog(0). + - boxerdog(1). + - boxerdog(2). + + dog(0). + - dog(1). + - dog(2). + + - married(0). + - married(1). + married(2). + + - person(0). + - person(1). + person(2). + + - marry(0,0). + - marry(0,1). + - marry(0,2). + - marry(1,0). + - marry(1,1). + - marry(1,2). + - marry(2,0). + - marry(2,1). + marry(2,2). + + -------------------------------------------------------------------------------- + Model for Discourse Thread d2 + -------------------------------------------------------------------------------- + No model found! + + -------------------------------------------------------------------------------- + Model for Discourse Thread d3 + -------------------------------------------------------------------------------- + No model found! + + Inconsistent discourse: d0 ['s0-r0', 's1-r0', 's2-r0', 's3-r0']: + s0-r0: boxer(Vincent) + s1-r0: boxer(Fido) + s2-r0: married(Vincent) + s3-r0: bark(Fido) + + Consistent discourse: d1 ['s0-r0', 's1-r1', 's2-r0', 's3-r0']: + s0-r0: boxer(Vincent) + s1-r1: boxerdog(Fido) + s2-r0: married(Vincent) + s3-r0: bark(Fido) + + Inconsistent discourse: d2 ['s0-r1', 's1-r0', 's2-r0', 's3-r0']: + s0-r1: boxerdog(Vincent) + s1-r0: boxer(Fido) + s2-r0: married(Vincent) + s3-r0: bark(Fido) + + Inconsistent discourse: d3 ['s0-r1', 's1-r1', 's2-r0', 's3-r0']: + s0-r1: boxerdog(Vincent) + s1-r1: boxerdog(Fido) + s2-r0: married(Vincent) + s3-r0: bark(Fido) + + + +.. This will not be visible in the html output: create a tempdir to + play in. + >>> import tempfile, os + >>> tempdir = tempfile.mkdtemp() + >>> old_dir = os.path.abspath('.') + >>> os.chdir(tempdir) + +In order to play around with your own version of background knowledge, +you might want to start off with a local copy of ``background.fol``: + + >>> nltk.data.retrieve('grammars/book_grammars/background.fol') + Retrieving 'nltk:grammars/book_grammars/background.fol', saving to 'background.fol' + +After you have modified the file, the ``load_fol()`` function will parse +the strings in the file into expressions of ``nltk.sem.logic``. + + >>> from nltk.inference.discourse import load_fol + >>> mybg = load_fol(open('background.fol').read()) + +The result can be loaded as an argument of ``add_background()`` in the +manner shown earlier. + +.. This will not be visible in the html output: clean up the tempdir. + >>> os.chdir(old_dir) + >>> for f in os.listdir(tempdir): + ... os.remove(os.path.join(tempdir, f)) + >>> os.rmdir(tempdir) + >>> nltk.data.clear_cache() + + +Regression Testing from book +============================ + + >>> logic._counter._value = 0 + + >>> from nltk.tag import RegexpTagger + >>> tagger = RegexpTagger( + ... [('^(chases|runs)$', 'VB'), + ... ('^(a)$', 'ex_quant'), + ... ('^(every)$', 'univ_quant'), + ... ('^(dog|boy)$', 'NN'), + ... ('^(He)$', 'PRP') + ... ]) + >>> rc = DrtGlueReadingCommand(depparser=MaltParser(tagger=tagger)) + >>> dt = DiscourseTester(map(str.split, ['Every dog chases a boy', 'He runs']), rc) + >>> dt.readings() + + s0 readings: + + s0-r0: ([z2],[boy(z2), (([z5],[dog(z5)]) -> ([],[chases(z5,z2)]))]) + s0-r1: ([],[(([z1],[dog(z1)]) -> ([z2],[boy(z2), chases(z1,z2)]))]) + + s1 readings: + + s1-r0: ([z1],[PRO(z1), runs(z1)]) + >>> dt.readings(show_thread_readings=True) + d0: ['s0-r0', 's1-r0'] : ([z1,z2],[boy(z1), (([z3],[dog(z3)]) -> ([],[chases(z3,z1)])), (z2 = z1), runs(z2)]) + d1: ['s0-r1', 's1-r0'] : INVALID: AnaphoraResolutionException + >>> dt.readings(filter=True, show_thread_readings=True) + d0: ['s0-r0', 's1-r0'] : ([z1,z3],[boy(z1), (([z2],[dog(z2)]) -> ([],[chases(z2,z1)])), (z3 = z1), runs(z3)]) + + >>> logic._counter._value = 0 + + >>> from nltk.parse import FeatureEarleyChartParser + >>> from nltk.sem.drt import DrtParser + >>> grammar = nltk.data.load('grammars/book_grammars/drt.fcfg', logic_parser=DrtParser()) + >>> parser = FeatureEarleyChartParser(grammar, trace=0) + >>> trees = parser.parse('Angus owns a dog'.split()) + >>> print(list(trees)[0].label()['SEM'].simplify().normalize()) + ([z1,z2],[Angus(z1), dog(z2), own(z1,z2)]) diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/discourse_fixt.py b/venv.bak/lib/python3.7/site-packages/nltk/test/discourse_fixt.py new file mode 100644 index 0000000..d3ab46f --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/test/discourse_fixt.py @@ -0,0 +1,15 @@ +# -*- coding: utf-8 -*- +from __future__ import absolute_import + + +# FIXME: the entire discourse.doctest is skipped if Prover9/Mace4 is +# not installed, but there are pure-python parts that don't need Prover9. +def setup_module(module): + from nose import SkipTest + from nltk.inference.mace import Mace + + try: + m = Mace() + m._find_binary('mace4') + except LookupError: + raise SkipTest("Mace4/Prover9 is not available so discourse.doctest is skipped") diff --git a/venv/lib/python3.7/site-packages/nltk/test/doctest_nose_plugin.py b/venv.bak/lib/python3.7/site-packages/nltk/test/doctest_nose_plugin.py similarity index 100% rename from venv/lib/python3.7/site-packages/nltk/test/doctest_nose_plugin.py rename to venv.bak/lib/python3.7/site-packages/nltk/test/doctest_nose_plugin.py diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/drt.doctest b/venv.bak/lib/python3.7/site-packages/nltk/test/drt.doctest new file mode 100644 index 0000000..6163052 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/test/drt.doctest @@ -0,0 +1,517 @@ +.. Copyright (C) 2001-2019 NLTK Project +.. For license information, see LICENSE.TXT + +================================ + Discourse Representation Theory +================================ + + >>> from nltk.sem import logic + >>> from nltk.inference import TableauProver + +Overview +======== + +A DRS can be created with the ``DRS()`` constructor. This takes two arguments: a list of +discourse referents and list of conditions. . + + >>> from nltk.sem.drt import * + >>> dexpr = DrtExpression.fromstring + >>> man_x = dexpr('man(x)') + >>> walk_x = dexpr('walk(x)') + >>> x = dexpr('x') + >>> print(DRS([x], [man_x, walk_x])) + ([x],[man(x), walk(x)]) + +The ``parse()`` method can also be applied directly to DRS +expressions, which allows them to be specified more +easily. + + >>> drs1 = dexpr('([x],[man(x),walk(x)])') + >>> print(drs1) + ([x],[man(x), walk(x)]) + +DRSs can be *merged* using the ``+`` operator. + + >>> drs2 = dexpr('([y],[woman(y),stop(y)])') + >>> drs3 = drs1 + drs2 + >>> print(drs3) + (([x],[man(x), walk(x)]) + ([y],[woman(y), stop(y)])) + >>> print(drs3.simplify()) + ([x,y],[man(x), walk(x), woman(y), stop(y)]) + +We can embed DRSs as components of an ``implies`` condition. + + >>> s = '([], [(%s -> %s)])' % (drs1, drs2) + >>> print(dexpr(s)) + ([],[(([x],[man(x), walk(x)]) -> ([y],[woman(y), stop(y)]))]) + +The ``fol()`` method converts DRSs into FOL formulae. + + >>> print(dexpr(r'([x],[man(x), walks(x)])').fol()) + exists x.(man(x) & walks(x)) + >>> print(dexpr(r'([],[(([x],[man(x)]) -> ([],[walks(x)]))])').fol()) + all x.(man(x) -> walks(x)) + +In order to visualize a DRS, the ``pretty_format()`` method can be used. + + >>> print(drs3.pretty_format()) + _________ __________ + | x | | y | + (|---------| + |----------|) + | man(x) | | woman(y) | + | walk(x) | | stop(y) | + |_________| |__________| + + +Parse to semantics +------------------ + +.. + >>> logic._counter._value = 0 + +DRSs can be used for building compositional semantics in a feature +based grammar. To specify that we want to use DRSs, the appropriate +logic parser needs be passed as a parameter to ``load_earley()`` + + >>> from nltk.parse import load_parser + >>> from nltk.sem.drt import DrtParser + >>> parser = load_parser('grammars/book_grammars/drt.fcfg', trace=0, logic_parser=DrtParser()) + >>> for tree in parser.parse('a dog barks'.split()): + ... print(tree.label()['SEM'].simplify()) + ... + ([x],[dog(x), bark(x)]) + +Alternatively, a ``FeatStructReader`` can be passed with the ``logic_parser`` set on it + + >>> from nltk.featstruct import FeatStructReader + >>> from nltk.grammar import FeatStructNonterminal + >>> parser = load_parser('grammars/book_grammars/drt.fcfg', trace=0, fstruct_reader=FeatStructReader(fdict_class=FeatStructNonterminal, logic_parser=DrtParser())) + >>> for tree in parser.parse('every girl chases a dog'.split()): + ... print(tree.label()['SEM'].simplify().normalize()) + ... + ([],[(([z1],[girl(z1)]) -> ([z2],[dog(z2), chase(z1,z2)]))]) + + + +Unit Tests +========== + +Parser +------ + + >>> print(dexpr(r'([x,y],[sees(x,y)])')) + ([x,y],[sees(x,y)]) + >>> print(dexpr(r'([x],[man(x), walks(x)])')) + ([x],[man(x), walks(x)]) + >>> print(dexpr(r'\x.([],[man(x), walks(x)])')) + \x.([],[man(x), walks(x)]) + >>> print(dexpr(r'\x.\y.([],[sees(x,y)])')) + \x y.([],[sees(x,y)]) + + >>> print(dexpr(r'([x,y],[(x = y)])')) + ([x,y],[(x = y)]) + >>> print(dexpr(r'([x,y],[(x != y)])')) + ([x,y],[-(x = y)]) + + >>> print(dexpr(r'\x.([],[walks(x)])(john)')) + (\x.([],[walks(x)]))(john) + >>> print(dexpr(r'\R.\x.([],[big(x,R)])(\y.([],[mouse(y)]))')) + (\R x.([],[big(x,R)]))(\y.([],[mouse(y)])) + + >>> print(dexpr(r'(([x],[walks(x)]) + ([y],[runs(y)]))')) + (([x],[walks(x)]) + ([y],[runs(y)])) + >>> print(dexpr(r'(([x,y],[walks(x), jumps(y)]) + (([z],[twos(z)]) + ([w],[runs(w)])))')) + (([x,y],[walks(x), jumps(y)]) + ([z],[twos(z)]) + ([w],[runs(w)])) + >>> print(dexpr(r'((([],[walks(x)]) + ([],[twos(x)])) + ([],[runs(x)]))')) + (([],[walks(x)]) + ([],[twos(x)]) + ([],[runs(x)])) + >>> print(dexpr(r'((([],[walks(x)]) + ([],[runs(x)])) + (([],[threes(x)]) + ([],[fours(x)])))')) + (([],[walks(x)]) + ([],[runs(x)]) + ([],[threes(x)]) + ([],[fours(x)])) + + >>> print(dexpr(r'(([],[walks(x)]) -> ([],[runs(x)]))')) + (([],[walks(x)]) -> ([],[runs(x)])) + + >>> print(dexpr(r'([x],[PRO(x), sees(John,x)])')) + ([x],[PRO(x), sees(John,x)]) + >>> print(dexpr(r'([x],[man(x), -([],[walks(x)])])')) + ([x],[man(x), -([],[walks(x)])]) + >>> print(dexpr(r'([],[(([x],[man(x)]) -> ([],[walks(x)]))])')) + ([],[(([x],[man(x)]) -> ([],[walks(x)]))]) + + >>> print(dexpr(r'DRS([x],[walk(x)])')) + ([x],[walk(x)]) + >>> print(dexpr(r'DRS([x][walk(x)])')) + ([x],[walk(x)]) + >>> print(dexpr(r'([x][walk(x)])')) + ([x],[walk(x)]) + +``simplify()`` +-------------- + + >>> print(dexpr(r'\x.([],[man(x), walks(x)])(john)').simplify()) + ([],[man(john), walks(john)]) + >>> print(dexpr(r'\x.\y.([z],[dog(z),sees(x,y)])(john)(mary)').simplify()) + ([z],[dog(z), sees(john,mary)]) + >>> print(dexpr(r'\R x.([],[big(x,R)])(\y.([],[mouse(y)]))').simplify()) + \x.([],[big(x,\y.([],[mouse(y)]))]) + + >>> print(dexpr(r'(([x],[walks(x)]) + ([y],[runs(y)]))').simplify()) + ([x,y],[walks(x), runs(y)]) + >>> print(dexpr(r'(([x,y],[walks(x), jumps(y)]) + (([z],[twos(z)]) + ([w],[runs(w)])))').simplify()) + ([w,x,y,z],[walks(x), jumps(y), twos(z), runs(w)]) + >>> print(dexpr(r'((([],[walks(x)]) + ([],[runs(x)]) + ([],[threes(x)]) + ([],[fours(x)])))').simplify()) + ([],[walks(x), runs(x), threes(x), fours(x)]) + >>> dexpr(r'([x],[man(x)])+([x],[walks(x)])').simplify() == \ + ... dexpr(r'([x,z1],[man(x), walks(z1)])') + True + >>> dexpr(r'([y],[boy(y), (([x],[dog(x)]) -> ([],[chase(x,y)]))])+([x],[run(x)])').simplify() == \ + ... dexpr(r'([y,z1],[boy(y), (([x],[dog(x)]) -> ([],[chase(x,y)])), run(z1)])') + True + + >>> dexpr(r'\Q.(([x],[john(x),walks(x)]) + Q)(([x],[PRO(x),leaves(x)]))').simplify() == \ + ... dexpr(r'([x,z1],[john(x), walks(x), PRO(z1), leaves(z1)])') + True + + >>> logic._counter._value = 0 + >>> print(dexpr('([],[(([x],[dog(x)]) -> ([e,y],[boy(y), chase(e), subj(e,x), obj(e,y)]))])+([e,x],[PRO(x), run(e), subj(e,x)])').simplify().normalize().normalize()) + ([e02,z5],[(([z3],[dog(z3)]) -> ([e01,z4],[boy(z4), chase(e01), subj(e01,z3), obj(e01,z4)])), PRO(z5), run(e02), subj(e02,z5)]) + +``fol()`` +----------- + + >>> print(dexpr(r'([x,y],[sees(x,y)])').fol()) + exists x y.sees(x,y) + >>> print(dexpr(r'([x],[man(x), walks(x)])').fol()) + exists x.(man(x) & walks(x)) + >>> print(dexpr(r'\x.([],[man(x), walks(x)])').fol()) + \x.(man(x) & walks(x)) + >>> print(dexpr(r'\x y.([],[sees(x,y)])').fol()) + \x y.sees(x,y) + + >>> print(dexpr(r'\x.([],[walks(x)])(john)').fol()) + \x.walks(x)(john) + >>> print(dexpr(r'\R x.([],[big(x,R)])(\y.([],[mouse(y)]))').fol()) + (\R x.big(x,R))(\y.mouse(y)) + + >>> print(dexpr(r'(([x],[walks(x)]) + ([y],[runs(y)]))').fol()) + (exists x.walks(x) & exists y.runs(y)) + + >>> print(dexpr(r'(([],[walks(x)]) -> ([],[runs(x)]))').fol()) + (walks(x) -> runs(x)) + + >>> print(dexpr(r'([x],[PRO(x), sees(John,x)])').fol()) + exists x.(PRO(x) & sees(John,x)) + >>> print(dexpr(r'([x],[man(x), -([],[walks(x)])])').fol()) + exists x.(man(x) & -walks(x)) + >>> print(dexpr(r'([],[(([x],[man(x)]) -> ([],[walks(x)]))])').fol()) + all x.(man(x) -> walks(x)) + + >>> print(dexpr(r'([x],[man(x) | walks(x)])').fol()) + exists x.(man(x) | walks(x)) + >>> print(dexpr(r'P(x) + ([x],[walks(x)])').fol()) + (P(x) & exists x.walks(x)) + +``resolve_anaphora()`` +---------------------- + + >>> from nltk.sem.drt import AnaphoraResolutionException + + >>> print(resolve_anaphora(dexpr(r'([x,y,z],[dog(x), cat(y), walks(z), PRO(z)])'))) + ([x,y,z],[dog(x), cat(y), walks(z), (z = [x,y])]) + >>> print(resolve_anaphora(dexpr(r'([],[(([x],[dog(x)]) -> ([y],[walks(y), PRO(y)]))])'))) + ([],[(([x],[dog(x)]) -> ([y],[walks(y), (y = x)]))]) + >>> print(resolve_anaphora(dexpr(r'(([x,y],[]) + ([],[PRO(x)]))')).simplify()) + ([x,y],[(x = y)]) + >>> try: print(resolve_anaphora(dexpr(r'([x],[walks(x), PRO(x)])'))) + ... except AnaphoraResolutionException as e: print(e) + Variable 'x' does not resolve to anything. + >>> print(resolve_anaphora(dexpr('([e01,z6,z7],[boy(z6), PRO(z7), run(e01), subj(e01,z7)])'))) + ([e01,z6,z7],[boy(z6), (z7 = z6), run(e01), subj(e01,z7)]) + +``equiv()``: +---------------- + + >>> a = dexpr(r'([x],[man(x), walks(x)])') + >>> b = dexpr(r'([x],[walks(x), man(x)])') + >>> print(a.equiv(b, TableauProver())) + True + + +``replace()``: +-------------- + + >>> a = dexpr(r'a') + >>> w = dexpr(r'w') + >>> x = dexpr(r'x') + >>> y = dexpr(r'y') + >>> z = dexpr(r'z') + + +replace bound +------------- + + >>> print(dexpr(r'([x],[give(x,y,z)])').replace(x.variable, a, False)) + ([x],[give(x,y,z)]) + >>> print(dexpr(r'([x],[give(x,y,z)])').replace(x.variable, a, True)) + ([a],[give(a,y,z)]) + +replace unbound +--------------- + + >>> print(dexpr(r'([x],[give(x,y,z)])').replace(y.variable, a, False)) + ([x],[give(x,a,z)]) + >>> print(dexpr(r'([x],[give(x,y,z)])').replace(y.variable, a, True)) + ([x],[give(x,a,z)]) + +replace unbound with bound +-------------------------- + + >>> dexpr(r'([x],[give(x,y,z)])').replace(y.variable, x, False) == \ + ... dexpr('([z1],[give(z1,x,z)])') + True + >>> dexpr(r'([x],[give(x,y,z)])').replace(y.variable, x, True) == \ + ... dexpr('([z1],[give(z1,x,z)])') + True + +replace unbound with unbound +---------------------------- + + >>> print(dexpr(r'([x],[give(x,y,z)])').replace(y.variable, z, False)) + ([x],[give(x,z,z)]) + >>> print(dexpr(r'([x],[give(x,y,z)])').replace(y.variable, z, True)) + ([x],[give(x,z,z)]) + + +replace unbound +--------------- + + >>> print(dexpr(r'([x],[P(x,y,z)])+([y],[Q(x,y,z)])').replace(z.variable, a, False)) + (([x],[P(x,y,a)]) + ([y],[Q(x,y,a)])) + >>> print(dexpr(r'([x],[P(x,y,z)])+([y],[Q(x,y,z)])').replace(z.variable, a, True)) + (([x],[P(x,y,a)]) + ([y],[Q(x,y,a)])) + +replace bound +------------- + + >>> print(dexpr(r'([x],[P(x,y,z)])+([y],[Q(x,y,z)])').replace(x.variable, a, False)) + (([x],[P(x,y,z)]) + ([y],[Q(x,y,z)])) + >>> print(dexpr(r'([x],[P(x,y,z)])+([y],[Q(x,y,z)])').replace(x.variable, a, True)) + (([a],[P(a,y,z)]) + ([y],[Q(a,y,z)])) + +replace unbound with unbound +---------------------------- + + >>> print(dexpr(r'([x],[P(x,y,z)])+([y],[Q(x,y,z)])').replace(z.variable, a, False)) + (([x],[P(x,y,a)]) + ([y],[Q(x,y,a)])) + >>> print(dexpr(r'([x],[P(x,y,z)])+([y],[Q(x,y,z)])').replace(z.variable, a, True)) + (([x],[P(x,y,a)]) + ([y],[Q(x,y,a)])) + +replace unbound with bound on same side +--------------------------------------- + + >>> dexpr(r'([x],[P(x,y,z)])+([y],[Q(x,y,w)])').replace(z.variable, x, False) == \ + ... dexpr(r'(([z1],[P(z1,y,x)]) + ([y],[Q(z1,y,w)]))') + True + >>> dexpr(r'([x],[P(x,y,z)])+([y],[Q(x,y,w)])').replace(z.variable, x, True) == \ + ... dexpr(r'(([z1],[P(z1,y,x)]) + ([y],[Q(z1,y,w)]))') + True + +replace unbound with bound on other side +---------------------------------------- + + >>> dexpr(r'([x],[P(x,y,z)])+([y],[Q(x,y,w)])').replace(w.variable, x, False) == \ + ... dexpr(r'(([z1],[P(z1,y,z)]) + ([y],[Q(z1,y,x)]))') + True + >>> dexpr(r'([x],[P(x,y,z)])+([y],[Q(x,y,w)])').replace(w.variable, x, True) == \ + ... dexpr(r'(([z1],[P(z1,y,z)]) + ([y],[Q(z1,y,x)]))') + True + +replace unbound with double bound +--------------------------------- + + >>> dexpr(r'([x],[P(x,y,z)])+([x],[Q(x,y,w)])').replace(z.variable, x, False) == \ + ... dexpr(r'(([z1],[P(z1,y,x)]) + ([z1],[Q(z1,y,w)]))') + True + >>> dexpr(r'([x],[P(x,y,z)])+([x],[Q(x,y,w)])').replace(z.variable, x, True) == \ + ... dexpr(r'(([z1],[P(z1,y,x)]) + ([z1],[Q(z1,y,w)]))') + True + + +regression tests +---------------- + + >>> d = dexpr('([x],[A(c), ([y],[B(x,y,z,a)])->([z],[C(x,y,z,a)])])') + >>> print(d) + ([x],[A(c), (([y],[B(x,y,z,a)]) -> ([z],[C(x,y,z,a)]))]) + >>> print(d.pretty_format()) + ____________________________________ + | x | + |------------------------------------| + | A(c) | + | ____________ ____________ | + | | y | | z | | + | (|------------| -> |------------|) | + | | B(x,y,z,a) | | C(x,y,z,a) | | + | |____________| |____________| | + |____________________________________| + >>> print(str(d)) + ([x],[A(c), (([y],[B(x,y,z,a)]) -> ([z],[C(x,y,z,a)]))]) + >>> print(d.fol()) + exists x.(A(c) & all y.(B(x,y,z,a) -> exists z.C(x,y,z,a))) + >>> print(d.replace(Variable('a'), DrtVariableExpression(Variable('r')))) + ([x],[A(c), (([y],[B(x,y,z,r)]) -> ([z],[C(x,y,z,r)]))]) + >>> print(d.replace(Variable('x'), DrtVariableExpression(Variable('r')))) + ([x],[A(c), (([y],[B(x,y,z,a)]) -> ([z],[C(x,y,z,a)]))]) + >>> print(d.replace(Variable('y'), DrtVariableExpression(Variable('r')))) + ([x],[A(c), (([y],[B(x,y,z,a)]) -> ([z],[C(x,y,z,a)]))]) + >>> print(d.replace(Variable('z'), DrtVariableExpression(Variable('r')))) + ([x],[A(c), (([y],[B(x,y,r,a)]) -> ([z],[C(x,y,z,a)]))]) + >>> print(d.replace(Variable('x'), DrtVariableExpression(Variable('r')), True)) + ([r],[A(c), (([y],[B(r,y,z,a)]) -> ([z],[C(r,y,z,a)]))]) + >>> print(d.replace(Variable('y'), DrtVariableExpression(Variable('r')), True)) + ([x],[A(c), (([r],[B(x,r,z,a)]) -> ([z],[C(x,r,z,a)]))]) + >>> print(d.replace(Variable('z'), DrtVariableExpression(Variable('r')), True)) + ([x],[A(c), (([y],[B(x,y,r,a)]) -> ([r],[C(x,y,r,a)]))]) + >>> print(d == dexpr('([l],[A(c), ([m],[B(l,m,z,a)])->([n],[C(l,m,n,a)])])')) + True + >>> d = dexpr('([],[([x,y],[B(x,y,h), ([a,b],[dee(x,a,g)])])->([z,w],[cee(x,y,f), ([c,d],[E(x,c,d,e)])])])') + >>> sorted(d.free()) + [Variable('B'), Variable('E'), Variable('e'), Variable('f'), Variable('g'), Variable('h')] + >>> sorted(d.variables()) + [Variable('B'), Variable('E'), Variable('e'), Variable('f'), Variable('g'), Variable('h')] + >>> sorted(d.get_refs(True)) + [Variable('a'), Variable('b'), Variable('c'), Variable('d'), Variable('w'), Variable('x'), Variable('y'), Variable('z')] + >>> sorted(d.conds[0].get_refs(False)) + [Variable('x'), Variable('y')] + >>> print(dexpr('([x,y],[A(x,y), (x=y), ([],[B(x,y)])->([],[C(x,y)]), ([x,y],[D(x,y)])->([],[E(x,y)]), ([],[F(x,y)])->([x,y],[G(x,y)])])').eliminate_equality()) + ([x],[A(x,x), (([],[B(x,x)]) -> ([],[C(x,x)])), (([x,y],[D(x,y)]) -> ([],[E(x,y)])), (([],[F(x,x)]) -> ([x,y],[G(x,y)]))]) + >>> print(dexpr('([x,y],[A(x,y), (x=y)]) -> ([],[B(x,y)])').eliminate_equality()) + (([x],[A(x,x)]) -> ([],[B(x,x)])) + >>> print(dexpr('([x,y],[A(x,y)]) -> ([],[B(x,y), (x=y)])').eliminate_equality()) + (([x,y],[A(x,y)]) -> ([],[B(x,x)])) + >>> print(dexpr('([x,y],[A(x,y), (x=y), ([],[B(x,y)])])').eliminate_equality()) + ([x],[A(x,x), ([],[B(x,x)])]) + >>> print(dexpr('([x,y],[A(x,y), ([],[B(x,y), (x=y)])])').eliminate_equality()) + ([x,y],[A(x,y), ([],[B(x,x)])]) + >>> print(dexpr('([z8 z9 z10],[A(z8), z8=z10, z9=z10, B(z9), C(z10), D(z10)])').eliminate_equality()) + ([z9],[A(z9), B(z9), C(z9), D(z9)]) + + >>> print(dexpr('([x,y],[A(x,y), (x=y), ([],[B(x,y)]), ([x,y],[C(x,y)])])').eliminate_equality()) + ([x],[A(x,x), ([],[B(x,x)]), ([x,y],[C(x,y)])]) + >>> print(dexpr('([x,y],[A(x,y)]) + ([],[B(x,y), (x=y)]) + ([],[C(x,y)])').eliminate_equality()) + ([x],[A(x,x), B(x,x), C(x,x)]) + >>> print(dexpr('([x,y],[B(x,y)])+([x,y],[C(x,y)])').replace(Variable('y'), DrtVariableExpression(Variable('x')))) + (([x,y],[B(x,y)]) + ([x,y],[C(x,y)])) + >>> print(dexpr('(([x,y],[B(x,y)])+([],[C(x,y)]))+([],[D(x,y)])').replace(Variable('y'), DrtVariableExpression(Variable('x')))) + (([x,y],[B(x,y)]) + ([],[C(x,y)]) + ([],[D(x,y)])) + >>> print(dexpr('(([],[B(x,y)])+([],[C(x,y)]))+([],[D(x,y)])').replace(Variable('y'), DrtVariableExpression(Variable('x')))) + (([],[B(x,x)]) + ([],[C(x,x)]) + ([],[D(x,x)])) + >>> print(dexpr('(([],[B(x,y), ([x,y],[A(x,y)])])+([],[C(x,y)]))+([],[D(x,y)])').replace(Variable('y'), DrtVariableExpression(Variable('x'))).normalize()) + (([],[B(z3,z1), ([z2,z3],[A(z3,z2)])]) + ([],[C(z3,z1)]) + ([],[D(z3,z1)])) + + +Parse errors +============ + + >>> def parse_error(drtstring): + ... try: dexpr(drtstring) + ... except logic.LogicalExpressionException as e: print(e) + + >>> parse_error(r'') + End of input found. Expression expected. + + ^ + >>> parse_error(r'(') + End of input found. Expression expected. + ( + ^ + >>> parse_error(r'()') + Unexpected token: ')'. Expression expected. + () + ^ + >>> parse_error(r'([') + End of input found. Expected token ']'. + ([ + ^ + >>> parse_error(r'([,') + ',' is an illegal variable name. Constants may not be quantified. + ([, + ^ + >>> parse_error(r'([x,') + End of input found. Variable expected. + ([x, + ^ + >>> parse_error(r'([]') + End of input found. Expected token '['. + ([] + ^ + >>> parse_error(r'([][') + End of input found. Expected token ']'. + ([][ + ^ + >>> parse_error(r'([][,') + Unexpected token: ','. Expression expected. + ([][, + ^ + >>> parse_error(r'([][]') + End of input found. Expected token ')'. + ([][] + ^ + >>> parse_error(r'([x][man(x)]) |') + End of input found. Expression expected. + ([x][man(x)]) | + ^ + +Pretty Printing +=============== + + >>> dexpr(r"([],[])").pretty_print() + __ + | | + |--| + |__| + + >>> dexpr(r"([],[([x],[big(x), dog(x)]) -> ([],[bark(x)]) -([x],[walk(x)])])").pretty_print() + _____________________________ + | | + |-----------------------------| + | ________ _________ | + | | x | | | | + | (|--------| -> |---------|) | + | | big(x) | | bark(x) | | + | | dog(x) | |_________| | + | |________| | + | _________ | + | | x | | + | __ |---------| | + | | | walk(x) | | + | |_________| | + |_____________________________| + + >>> dexpr(r"([x,y],[x=y]) + ([z],[dog(z), walk(z)])").pretty_print() + _________ _________ + | x y | | z | + (|---------| + |---------|) + | (x = y) | | dog(z) | + |_________| | walk(z) | + |_________| + + >>> dexpr(r"([],[([x],[]) | ([y],[]) | ([z],[dog(z), walk(z)])])").pretty_print() + _______________________________ + | | + |-------------------------------| + | ___ ___ _________ | + | | x | | y | | z | | + | (|---| | |---| | |---------|) | + | |___| |___| | dog(z) | | + | | walk(z) | | + | |_________| | + |_______________________________| + + >>> dexpr(r"\P.\Q.(([x],[]) + P(x) + Q(x))(\x.([],[dog(x)]))").pretty_print() + ___ ________ + \ | x | \ | | + /\ P Q.(|---| + P(x) + Q(x))( /\ x.|--------|) + |___| | dog(x) | + |________| + + diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/featgram.doctest b/venv.bak/lib/python3.7/site-packages/nltk/test/featgram.doctest new file mode 100644 index 0000000..a1775f8 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/test/featgram.doctest @@ -0,0 +1,607 @@ +.. Copyright (C) 2001-2019 NLTK Project +.. For license information, see LICENSE.TXT + +========================= + Feature Grammar Parsing +========================= + +.. include:: ../../../nltk_book/definitions.rst + +Grammars can be parsed from strings. + + >>> from __future__ import print_function + >>> import nltk + >>> from nltk import grammar, parse + >>> g = """ + ... % start DP + ... DP[AGR=?a] -> D[AGR=?a] N[AGR=?a] + ... D[AGR=[NUM='sg', PERS=3]] -> 'this' | 'that' + ... D[AGR=[NUM='pl', PERS=3]] -> 'these' | 'those' + ... D[AGR=[NUM='pl', PERS=1]] -> 'we' + ... D[AGR=[PERS=2]] -> 'you' + ... N[AGR=[NUM='sg', GND='m']] -> 'boy' + ... N[AGR=[NUM='pl', GND='m']] -> 'boys' + ... N[AGR=[NUM='sg', GND='f']] -> 'girl' + ... N[AGR=[NUM='pl', GND='f']] -> 'girls' + ... N[AGR=[NUM='sg']] -> 'student' + ... N[AGR=[NUM='pl']] -> 'students' + ... """ + >>> grammar = grammar.FeatureGrammar.fromstring(g) + >>> tokens = 'these girls'.split() + >>> parser = parse.FeatureEarleyChartParser(grammar) + >>> trees = parser.parse(tokens) + >>> for tree in trees: print(tree) + (DP[AGR=[GND='f', NUM='pl', PERS=3]] + (D[AGR=[NUM='pl', PERS=3]] these) + (N[AGR=[GND='f', NUM='pl']] girls)) + +In general, when we are trying to develop even a very small grammar, +it is convenient to put the rules in a file where they can be edited, +tested and revised. Let's assume that we have saved feat0cfg_ as a file named +``'feat0.fcfg'`` and placed it in the NLTK ``data`` directory. We can +inspect it as follows: + +.. _feat0cfg: http://nltk.svn.sourceforge.net/svnroot/nltk/trunk/nltk/data/grammars/feat0.fcfg + + >>> nltk.data.show_cfg('grammars/book_grammars/feat0.fcfg') + % start S + # ################### + # Grammar Productions + # ################### + # S expansion productions + S -> NP[NUM=?n] VP[NUM=?n] + # NP expansion productions + NP[NUM=?n] -> N[NUM=?n] + NP[NUM=?n] -> PropN[NUM=?n] + NP[NUM=?n] -> Det[NUM=?n] N[NUM=?n] + NP[NUM=pl] -> N[NUM=pl] + # VP expansion productions + VP[TENSE=?t, NUM=?n] -> IV[TENSE=?t, NUM=?n] + VP[TENSE=?t, NUM=?n] -> TV[TENSE=?t, NUM=?n] NP + # ################### + # Lexical Productions + # ################### + Det[NUM=sg] -> 'this' | 'every' + Det[NUM=pl] -> 'these' | 'all' + Det -> 'the' | 'some' | 'several' + PropN[NUM=sg]-> 'Kim' | 'Jody' + N[NUM=sg] -> 'dog' | 'girl' | 'car' | 'child' + N[NUM=pl] -> 'dogs' | 'girls' | 'cars' | 'children' + IV[TENSE=pres, NUM=sg] -> 'disappears' | 'walks' + TV[TENSE=pres, NUM=sg] -> 'sees' | 'likes' + IV[TENSE=pres, NUM=pl] -> 'disappear' | 'walk' + TV[TENSE=pres, NUM=pl] -> 'see' | 'like' + IV[TENSE=past] -> 'disappeared' | 'walked' + TV[TENSE=past] -> 'saw' | 'liked' + +Assuming we have saved feat0cfg_ as a file named +``'feat0.fcfg'``, the function ``parse.load_parser`` allows us to +read the grammar into NLTK, ready for use in parsing. + + + >>> cp = parse.load_parser('grammars/book_grammars/feat0.fcfg', trace=1) + >>> sent = 'Kim likes children' + >>> tokens = sent.split() + >>> tokens + ['Kim', 'likes', 'children'] + >>> trees = cp.parse(tokens) + |.Kim .like.chil.| + |[----] . .| [0:1] 'Kim' + |. [----] .| [1:2] 'likes' + |. . [----]| [2:3] 'children' + |[----] . .| [0:1] PropN[NUM='sg'] -> 'Kim' * + |[----] . .| [0:1] NP[NUM='sg'] -> PropN[NUM='sg'] * + |[----> . .| [0:1] S[] -> NP[NUM=?n] * VP[NUM=?n] {?n: 'sg'} + |. [----] .| [1:2] TV[NUM='sg', TENSE='pres'] -> 'likes' * + |. [----> .| [1:2] VP[NUM=?n, TENSE=?t] -> TV[NUM=?n, TENSE=?t] * NP[] {?n: 'sg', ?t: 'pres'} + |. . [----]| [2:3] N[NUM='pl'] -> 'children' * + |. . [----]| [2:3] NP[NUM='pl'] -> N[NUM='pl'] * + |. . [---->| [2:3] S[] -> NP[NUM=?n] * VP[NUM=?n] {?n: 'pl'} + |. [---------]| [1:3] VP[NUM='sg', TENSE='pres'] -> TV[NUM='sg', TENSE='pres'] NP[] * + |[==============]| [0:3] S[] -> NP[NUM='sg'] VP[NUM='sg'] * + >>> for tree in trees: print(tree) + (S[] + (NP[NUM='sg'] (PropN[NUM='sg'] Kim)) + (VP[NUM='sg', TENSE='pres'] + (TV[NUM='sg', TENSE='pres'] likes) + (NP[NUM='pl'] (N[NUM='pl'] children)))) + +The parser works directly with +the underspecified productions given by the grammar. That is, the +Predictor rule does not attempt to compile out all admissible feature +combinations before trying to expand the non-terminals on the left hand +side of a production. However, when the Scanner matches an input word +against a lexical production that has been predicted, the new edge will +typically contain fully specified features; e.g., the edge +[PropN[`num`:feat: = `sg`:fval:] |rarr| 'Kim', (0, 1)]. Recall from +Chapter 8 that the Fundamental (or Completer) Rule in +standard CFGs is used to combine an incomplete edge that's expecting a +nonterminal *B* with a following, complete edge whose left hand side +matches *B*. In our current setting, rather than checking for a +complete match, we test whether the expected category *B* will +`unify`:dt: with the left hand side *B'* of a following complete +edge. We will explain in more detail in Section 9.2 how +unification works; for the moment, it is enough to know that as a +result of unification, any variable values of features in *B* will be +instantiated by constant values in the corresponding feature structure +in *B'*, and these instantiated values will be used in the new edge +added by the Completer. This instantiation can be seen, for example, +in the edge +[NP [`num`:feat:\ =\ `sg`:fval:] |rarr| PropN[`num`:feat:\ =\ `sg`:fval:] |dot|, (0, 1)] +in Example 9.2, where the feature `num`:feat: has been assigned the value `sg`:fval:. + +Feature structures in NLTK are ... Atomic feature values can be strings or +integers. + + >>> fs1 = nltk.FeatStruct(TENSE='past', NUM='sg') + >>> print(fs1) + [ NUM = 'sg' ] + [ TENSE = 'past' ] + +We can think of a feature structure as being like a Python dictionary, +and access its values by indexing in the usual way. + + >>> fs1 = nltk.FeatStruct(PER=3, NUM='pl', GND='fem') + >>> print(fs1['GND']) + fem + +We can also define feature structures which have complex values, as +discussed earlier. + + >>> fs2 = nltk.FeatStruct(POS='N', AGR=fs1) + >>> print(fs2) + [ [ GND = 'fem' ] ] + [ AGR = [ NUM = 'pl' ] ] + [ [ PER = 3 ] ] + [ ] + [ POS = 'N' ] + >>> print(fs2['AGR']) + [ GND = 'fem' ] + [ NUM = 'pl' ] + [ PER = 3 ] + >>> print(fs2['AGR']['PER']) + 3 + +Feature structures can also be constructed using the ``parse()`` +method of the ``nltk.FeatStruct`` class. Note that in this case, atomic +feature values do not need to be enclosed in quotes. + + >>> f1 = nltk.FeatStruct("[NUMBER = sg]") + >>> f2 = nltk.FeatStruct("[PERSON = 3]") + >>> print(nltk.unify(f1, f2)) + [ NUMBER = 'sg' ] + [ PERSON = 3 ] + + >>> f1 = nltk.FeatStruct("[A = [B = b, D = d]]") + >>> f2 = nltk.FeatStruct("[A = [C = c, D = d]]") + >>> print(nltk.unify(f1, f2)) + [ [ B = 'b' ] ] + [ A = [ C = 'c' ] ] + [ [ D = 'd' ] ] + + +Feature Structures as Graphs +---------------------------- + +Feature structures are not inherently tied to linguistic objects; they are +general purpose structures for representing knowledge. For example, we +could encode information about a person in a feature structure: + + >>> person01 = nltk.FeatStruct("[NAME=Lee, TELNO='01 27 86 42 96',AGE=33]") + >>> print(person01) + [ AGE = 33 ] + [ NAME = 'Lee' ] + [ TELNO = '01 27 86 42 96' ] + +There are a number of notations for representing reentrancy in +matrix-style representations of feature structures. In NLTK, we adopt +the following convention: the first occurrence of a shared feature structure +is prefixed with an integer in parentheses, such as ``(1)``, and any +subsequent reference to that structure uses the notation +``->(1)``, as shown below. + + + >>> fs = nltk.FeatStruct("""[NAME=Lee, ADDRESS=(1)[NUMBER=74, STREET='rue Pascal'], + ... SPOUSE=[NAME=Kim, ADDRESS->(1)]]""") + >>> print(fs) + [ ADDRESS = (1) [ NUMBER = 74 ] ] + [ [ STREET = 'rue Pascal' ] ] + [ ] + [ NAME = 'Lee' ] + [ ] + [ SPOUSE = [ ADDRESS -> (1) ] ] + [ [ NAME = 'Kim' ] ] + +There can be any number of tags within a single feature structure. + + >>> fs3 = nltk.FeatStruct("[A=(1)[B=b], C=(2)[], D->(1), E->(2)]") + >>> print(fs3) + [ A = (1) [ B = 'b' ] ] + [ ] + [ C = (2) [] ] + [ ] + [ D -> (1) ] + [ E -> (2) ] + >>> fs1 = nltk.FeatStruct(NUMBER=74, STREET='rue Pascal') + >>> fs2 = nltk.FeatStruct(CITY='Paris') + >>> print(nltk.unify(fs1, fs2)) + [ CITY = 'Paris' ] + [ NUMBER = 74 ] + [ STREET = 'rue Pascal' ] + +Unification is symmetric: + + >>> nltk.unify(fs1, fs2) == nltk.unify(fs2, fs1) + True + +Unification is commutative: + + >>> fs3 = nltk.FeatStruct(TELNO='01 27 86 42 96') + >>> nltk.unify(nltk.unify(fs1, fs2), fs3) == nltk.unify(fs1, nltk.unify(fs2, fs3)) + True + +Unification between `FS`:math:\ :subscript:`0` and `FS`:math:\ +:subscript:`1` will fail if the two feature structures share a path |pi|, +but the value of |pi| in `FS`:math:\ :subscript:`0` is a distinct +atom from the value of |pi| in `FS`:math:\ :subscript:`1`. In NLTK, +this is implemented by setting the result of unification to be +``None``. + + >>> fs0 = nltk.FeatStruct(A='a') + >>> fs1 = nltk.FeatStruct(A='b') + >>> print(nltk.unify(fs0, fs1)) + None + +Now, if we look at how unification interacts with structure-sharing, +things become really interesting. + + + + >>> fs0 = nltk.FeatStruct("""[NAME=Lee, + ... ADDRESS=[NUMBER=74, + ... STREET='rue Pascal'], + ... SPOUSE= [NAME=Kim, + ... ADDRESS=[NUMBER=74, + ... STREET='rue Pascal']]]""") + >>> print(fs0) + [ ADDRESS = [ NUMBER = 74 ] ] + [ [ STREET = 'rue Pascal' ] ] + [ ] + [ NAME = 'Lee' ] + [ ] + [ [ ADDRESS = [ NUMBER = 74 ] ] ] + [ SPOUSE = [ [ STREET = 'rue Pascal' ] ] ] + [ [ ] ] + [ [ NAME = 'Kim' ] ] + + + >>> fs1 = nltk.FeatStruct("[SPOUSE=[ADDRESS=[CITY=Paris]]]") + >>> print(nltk.unify(fs0, fs1)) + [ ADDRESS = [ NUMBER = 74 ] ] + [ [ STREET = 'rue Pascal' ] ] + [ ] + [ NAME = 'Lee' ] + [ ] + [ [ [ CITY = 'Paris' ] ] ] + [ [ ADDRESS = [ NUMBER = 74 ] ] ] + [ SPOUSE = [ [ STREET = 'rue Pascal' ] ] ] + [ [ ] ] + [ [ NAME = 'Kim' ] ] + + >>> fs2 = nltk.FeatStruct("""[NAME=Lee, ADDRESS=(1)[NUMBER=74, STREET='rue Pascal'], + ... SPOUSE=[NAME=Kim, ADDRESS->(1)]]""") + + + >>> print(fs2) + [ ADDRESS = (1) [ NUMBER = 74 ] ] + [ [ STREET = 'rue Pascal' ] ] + [ ] + [ NAME = 'Lee' ] + [ ] + [ SPOUSE = [ ADDRESS -> (1) ] ] + [ [ NAME = 'Kim' ] ] + + + >>> print(nltk.unify(fs2, fs1)) + [ [ CITY = 'Paris' ] ] + [ ADDRESS = (1) [ NUMBER = 74 ] ] + [ [ STREET = 'rue Pascal' ] ] + [ ] + [ NAME = 'Lee' ] + [ ] + [ SPOUSE = [ ADDRESS -> (1) ] ] + [ [ NAME = 'Kim' ] ] + + + >>> fs1 = nltk.FeatStruct("[ADDRESS1=[NUMBER=74, STREET='rue Pascal']]") + >>> fs2 = nltk.FeatStruct("[ADDRESS1=?x, ADDRESS2=?x]") + >>> print(fs2) + [ ADDRESS1 = ?x ] + [ ADDRESS2 = ?x ] + >>> print(nltk.unify(fs1, fs2)) + [ ADDRESS1 = (1) [ NUMBER = 74 ] ] + [ [ STREET = 'rue Pascal' ] ] + [ ] + [ ADDRESS2 -> (1) ] + + + + + >>> sent = 'who do you claim that you like' + >>> tokens = sent.split() + >>> cp = parse.load_parser('grammars/book_grammars/feat1.fcfg', trace=1) + >>> trees = cp.parse(tokens) + |.w.d.y.c.t.y.l.| + |[-] . . . . . .| [0:1] 'who' + |. [-] . . . . .| [1:2] 'do' + |. . [-] . . . .| [2:3] 'you' + |. . . [-] . . .| [3:4] 'claim' + |. . . . [-] . .| [4:5] 'that' + |. . . . . [-] .| [5:6] 'you' + |. . . . . . [-]| [6:7] 'like' + |# . . . . . . .| [0:0] NP[]/NP[] -> * + |. # . . . . . .| [1:1] NP[]/NP[] -> * + |. . # . . . . .| [2:2] NP[]/NP[] -> * + |. . . # . . . .| [3:3] NP[]/NP[] -> * + |. . . . # . . .| [4:4] NP[]/NP[] -> * + |. . . . . # . .| [5:5] NP[]/NP[] -> * + |. . . . . . # .| [6:6] NP[]/NP[] -> * + |. . . . . . . #| [7:7] NP[]/NP[] -> * + |[-] . . . . . .| [0:1] NP[+WH] -> 'who' * + |[-> . . . . . .| [0:1] S[-INV] -> NP[] * VP[] {} + |[-> . . . . . .| [0:1] S[-INV]/?x[] -> NP[] * VP[]/?x[] {} + |[-> . . . . . .| [0:1] S[-INV] -> NP[] * S[]/NP[] {} + |. [-] . . . . .| [1:2] V[+AUX] -> 'do' * + |. [-> . . . . .| [1:2] S[+INV] -> V[+AUX] * NP[] VP[] {} + |. [-> . . . . .| [1:2] S[+INV]/?x[] -> V[+AUX] * NP[] VP[]/?x[] {} + |. [-> . . . . .| [1:2] VP[] -> V[+AUX] * VP[] {} + |. [-> . . . . .| [1:2] VP[]/?x[] -> V[+AUX] * VP[]/?x[] {} + |. . [-] . . . .| [2:3] NP[-WH] -> 'you' * + |. . [-> . . . .| [2:3] S[-INV] -> NP[] * VP[] {} + |. . [-> . . . .| [2:3] S[-INV]/?x[] -> NP[] * VP[]/?x[] {} + |. . [-> . . . .| [2:3] S[-INV] -> NP[] * S[]/NP[] {} + |. [---> . . . .| [1:3] S[+INV] -> V[+AUX] NP[] * VP[] {} + |. [---> . . . .| [1:3] S[+INV]/?x[] -> V[+AUX] NP[] * VP[]/?x[] {} + |. . . [-] . . .| [3:4] V[-AUX, SUBCAT='clause'] -> 'claim' * + |. . . [-> . . .| [3:4] VP[] -> V[-AUX, SUBCAT='clause'] * SBar[] {} + |. . . [-> . . .| [3:4] VP[]/?x[] -> V[-AUX, SUBCAT='clause'] * SBar[]/?x[] {} + |. . . . [-] . .| [4:5] Comp[] -> 'that' * + |. . . . [-> . .| [4:5] SBar[] -> Comp[] * S[-INV] {} + |. . . . [-> . .| [4:5] SBar[]/?x[] -> Comp[] * S[-INV]/?x[] {} + |. . . . . [-] .| [5:6] NP[-WH] -> 'you' * + |. . . . . [-> .| [5:6] S[-INV] -> NP[] * VP[] {} + |. . . . . [-> .| [5:6] S[-INV]/?x[] -> NP[] * VP[]/?x[] {} + |. . . . . [-> .| [5:6] S[-INV] -> NP[] * S[]/NP[] {} + |. . . . . . [-]| [6:7] V[-AUX, SUBCAT='trans'] -> 'like' * + |. . . . . . [->| [6:7] VP[] -> V[-AUX, SUBCAT='trans'] * NP[] {} + |. . . . . . [->| [6:7] VP[]/?x[] -> V[-AUX, SUBCAT='trans'] * NP[]/?x[] {} + |. . . . . . [-]| [6:7] VP[]/NP[] -> V[-AUX, SUBCAT='trans'] NP[]/NP[] * + |. . . . . [---]| [5:7] S[-INV]/NP[] -> NP[] VP[]/NP[] * + |. . . . [-----]| [4:7] SBar[]/NP[] -> Comp[] S[-INV]/NP[] * + |. . . [-------]| [3:7] VP[]/NP[] -> V[-AUX, SUBCAT='clause'] SBar[]/NP[] * + |. . [---------]| [2:7] S[-INV]/NP[] -> NP[] VP[]/NP[] * + |. [-----------]| [1:7] S[+INV]/NP[] -> V[+AUX] NP[] VP[]/NP[] * + |[=============]| [0:7] S[-INV] -> NP[] S[]/NP[] * + + >>> trees = list(trees) + >>> for tree in trees: print(tree) + (S[-INV] + (NP[+WH] who) + (S[+INV]/NP[] + (V[+AUX] do) + (NP[-WH] you) + (VP[]/NP[] + (V[-AUX, SUBCAT='clause'] claim) + (SBar[]/NP[] + (Comp[] that) + (S[-INV]/NP[] + (NP[-WH] you) + (VP[]/NP[] (V[-AUX, SUBCAT='trans'] like) (NP[]/NP[] ))))))) + +A different parser should give the same parse trees, but perhaps in a different order: + + >>> cp2 = parse.load_parser('grammars/book_grammars/feat1.fcfg', trace=1, + ... parser=parse.FeatureEarleyChartParser) + >>> trees2 = cp2.parse(tokens) + |.w.d.y.c.t.y.l.| + |[-] . . . . . .| [0:1] 'who' + |. [-] . . . . .| [1:2] 'do' + |. . [-] . . . .| [2:3] 'you' + |. . . [-] . . .| [3:4] 'claim' + |. . . . [-] . .| [4:5] 'that' + |. . . . . [-] .| [5:6] 'you' + |. . . . . . [-]| [6:7] 'like' + |> . . . . . . .| [0:0] S[-INV] -> * NP[] VP[] {} + |> . . . . . . .| [0:0] S[-INV]/?x[] -> * NP[] VP[]/?x[] {} + |> . . . . . . .| [0:0] S[-INV] -> * NP[] S[]/NP[] {} + |> . . . . . . .| [0:0] S[-INV] -> * Adv[+NEG] S[+INV] {} + |> . . . . . . .| [0:0] S[+INV] -> * V[+AUX] NP[] VP[] {} + |> . . . . . . .| [0:0] S[+INV]/?x[] -> * V[+AUX] NP[] VP[]/?x[] {} + |> . . . . . . .| [0:0] NP[+WH] -> * 'who' {} + |[-] . . . . . .| [0:1] NP[+WH] -> 'who' * + |[-> . . . . . .| [0:1] S[-INV] -> NP[] * VP[] {} + |[-> . . . . . .| [0:1] S[-INV]/?x[] -> NP[] * VP[]/?x[] {} + |[-> . . . . . .| [0:1] S[-INV] -> NP[] * S[]/NP[] {} + |. > . . . . . .| [1:1] S[-INV]/?x[] -> * NP[] VP[]/?x[] {} + |. > . . . . . .| [1:1] S[+INV]/?x[] -> * V[+AUX] NP[] VP[]/?x[] {} + |. > . . . . . .| [1:1] V[+AUX] -> * 'do' {} + |. > . . . . . .| [1:1] VP[]/?x[] -> * V[-AUX, SUBCAT='trans'] NP[]/?x[] {} + |. > . . . . . .| [1:1] VP[]/?x[] -> * V[-AUX, SUBCAT='clause'] SBar[]/?x[] {} + |. > . . . . . .| [1:1] VP[]/?x[] -> * V[+AUX] VP[]/?x[] {} + |. > . . . . . .| [1:1] VP[] -> * V[-AUX, SUBCAT='intrans'] {} + |. > . . . . . .| [1:1] VP[] -> * V[-AUX, SUBCAT='trans'] NP[] {} + |. > . . . . . .| [1:1] VP[] -> * V[-AUX, SUBCAT='clause'] SBar[] {} + |. > . . . . . .| [1:1] VP[] -> * V[+AUX] VP[] {} + |. [-] . . . . .| [1:2] V[+AUX] -> 'do' * + |. [-> . . . . .| [1:2] S[+INV]/?x[] -> V[+AUX] * NP[] VP[]/?x[] {} + |. [-> . . . . .| [1:2] VP[]/?x[] -> V[+AUX] * VP[]/?x[] {} + |. [-> . . . . .| [1:2] VP[] -> V[+AUX] * VP[] {} + |. . > . . . . .| [2:2] VP[] -> * V[-AUX, SUBCAT='intrans'] {} + |. . > . . . . .| [2:2] VP[] -> * V[-AUX, SUBCAT='trans'] NP[] {} + |. . > . . . . .| [2:2] VP[] -> * V[-AUX, SUBCAT='clause'] SBar[] {} + |. . > . . . . .| [2:2] VP[] -> * V[+AUX] VP[] {} + |. . > . . . . .| [2:2] VP[]/?x[] -> * V[-AUX, SUBCAT='trans'] NP[]/?x[] {} + |. . > . . . . .| [2:2] VP[]/?x[] -> * V[-AUX, SUBCAT='clause'] SBar[]/?x[] {} + |. . > . . . . .| [2:2] VP[]/?x[] -> * V[+AUX] VP[]/?x[] {} + |. . > . . . . .| [2:2] NP[-WH] -> * 'you' {} + |. . [-] . . . .| [2:3] NP[-WH] -> 'you' * + |. [---> . . . .| [1:3] S[+INV]/?x[] -> V[+AUX] NP[] * VP[]/?x[] {} + |. . . > . . . .| [3:3] VP[]/?x[] -> * V[-AUX, SUBCAT='trans'] NP[]/?x[] {} + |. . . > . . . .| [3:3] VP[]/?x[] -> * V[-AUX, SUBCAT='clause'] SBar[]/?x[] {} + |. . . > . . . .| [3:3] VP[]/?x[] -> * V[+AUX] VP[]/?x[] {} + |. . . > . . . .| [3:3] V[-AUX, SUBCAT='clause'] -> * 'claim' {} + |. . . [-] . . .| [3:4] V[-AUX, SUBCAT='clause'] -> 'claim' * + |. . . [-> . . .| [3:4] VP[]/?x[] -> V[-AUX, SUBCAT='clause'] * SBar[]/?x[] {} + |. . . . > . . .| [4:4] SBar[]/?x[] -> * Comp[] S[-INV]/?x[] {} + |. . . . > . . .| [4:4] Comp[] -> * 'that' {} + |. . . . [-] . .| [4:5] Comp[] -> 'that' * + |. . . . [-> . .| [4:5] SBar[]/?x[] -> Comp[] * S[-INV]/?x[] {} + |. . . . . > . .| [5:5] S[-INV]/?x[] -> * NP[] VP[]/?x[] {} + |. . . . . > . .| [5:5] NP[-WH] -> * 'you' {} + |. . . . . [-] .| [5:6] NP[-WH] -> 'you' * + |. . . . . [-> .| [5:6] S[-INV]/?x[] -> NP[] * VP[]/?x[] {} + |. . . . . . > .| [6:6] VP[]/?x[] -> * V[-AUX, SUBCAT='trans'] NP[]/?x[] {} + |. . . . . . > .| [6:6] VP[]/?x[] -> * V[-AUX, SUBCAT='clause'] SBar[]/?x[] {} + |. . . . . . > .| [6:6] VP[]/?x[] -> * V[+AUX] VP[]/?x[] {} + |. . . . . . > .| [6:6] V[-AUX, SUBCAT='trans'] -> * 'like' {} + |. . . . . . [-]| [6:7] V[-AUX, SUBCAT='trans'] -> 'like' * + |. . . . . . [->| [6:7] VP[]/?x[] -> V[-AUX, SUBCAT='trans'] * NP[]/?x[] {} + |. . . . . . . #| [7:7] NP[]/NP[] -> * + |. . . . . . [-]| [6:7] VP[]/NP[] -> V[-AUX, SUBCAT='trans'] NP[]/NP[] * + |. . . . . [---]| [5:7] S[-INV]/NP[] -> NP[] VP[]/NP[] * + |. . . . [-----]| [4:7] SBar[]/NP[] -> Comp[] S[-INV]/NP[] * + |. . . [-------]| [3:7] VP[]/NP[] -> V[-AUX, SUBCAT='clause'] SBar[]/NP[] * + |. [-----------]| [1:7] S[+INV]/NP[] -> V[+AUX] NP[] VP[]/NP[] * + |[=============]| [0:7] S[-INV] -> NP[] S[]/NP[] * + + >>> sorted(trees) == sorted(trees2) + True + + +Let's load a German grammar: + + >>> cp = parse.load_parser('grammars/book_grammars/german.fcfg', trace=0) + >>> sent = 'die Katze sieht den Hund' + >>> tokens = sent.split() + >>> trees = cp.parse(tokens) + >>> for tree in trees: print(tree) + (S[] + (NP[AGR=[GND='fem', NUM='sg', PER=3], CASE='nom'] + (Det[AGR=[GND='fem', NUM='sg', PER=3], CASE='nom'] die) + (N[AGR=[GND='fem', NUM='sg', PER=3]] Katze)) + (VP[AGR=[NUM='sg', PER=3]] + (TV[AGR=[NUM='sg', PER=3], OBJCASE='acc'] sieht) + (NP[AGR=[GND='masc', NUM='sg', PER=3], CASE='acc'] + (Det[AGR=[GND='masc', NUM='sg', PER=3], CASE='acc'] den) + (N[AGR=[GND='masc', NUM='sg', PER=3]] Hund)))) + +Grammar with Binding Operators +------------------------------ +The `bindop.fcfg`_ grammar is a semantic grammar that uses lambda +calculus. Each element has a core semantics, which is a single lambda +calculus expression; and a set of binding operators, which bind +variables. + +.. _bindop.fcfg: http://nltk.svn.sourceforge.net/svnroot/nltk/trunk/nltk/data/grammars/bindop.fcfg + +In order to make the binding operators work right, they need to +instantiate their bound variable every time they are added to the +chart. To do this, we use a special subclass of `Chart`, called +`InstantiateVarsChart`. + + >>> from nltk.parse.featurechart import InstantiateVarsChart + >>> cp = parse.load_parser('grammars/sample_grammars/bindop.fcfg', trace=1, + ... chart_class=InstantiateVarsChart) + >>> print(cp.grammar()) + Grammar with 15 productions (start state = S[]) + S[SEM=[BO={?b1+?b2}, CORE=]] -> NP[SEM=[BO=?b1, CORE=?subj]] VP[SEM=[BO=?b2, CORE=?vp]] + VP[SEM=[BO={?b1+?b2}, CORE=]] -> TV[SEM=[BO=?b1, CORE=?v]] NP[SEM=[BO=?b2, CORE=?obj]] + VP[SEM=?s] -> IV[SEM=?s] + NP[SEM=[BO={?b1+?b2+{bo(?det(?n),@x)}}, CORE=<@x>]] -> Det[SEM=[BO=?b1, CORE=?det]] N[SEM=[BO=?b2, CORE=?n]] + Det[SEM=[BO={/}, CORE=<\Q P.exists x.(Q(x) & P(x))>]] -> 'a' + N[SEM=[BO={/}, CORE=]] -> 'dog' + N[SEM=[BO={/}, CORE=]] -> 'cat' + N[SEM=[BO={/}, CORE=]] -> 'mouse' + IV[SEM=[BO={/}, CORE=<\x.bark(x)>]] -> 'barks' + IV[SEM=[BO={/}, CORE=<\x.bark(x)>]] -> 'eats' + IV[SEM=[BO={/}, CORE=<\x.bark(x)>]] -> 'walks' + TV[SEM=[BO={/}, CORE=<\x y.feed(y,x)>]] -> 'feeds' + TV[SEM=[BO={/}, CORE=<\x y.feed(y,x)>]] -> 'walks' + NP[SEM=[BO={bo(\P.P(John),@x)}, CORE=<@x>]] -> 'john' + NP[SEM=[BO={bo(\P.P(John),@x)}, CORE=<@x>]] -> 'alex' + +A simple intransitive sentence: + + >>> from nltk.sem import logic + >>> logic._counter._value = 100 + + >>> trees = cp.parse('john barks'.split()) + |. john.barks.| + |[-----] .| [0:1] 'john' + |. [-----]| [1:2] 'barks' + |[-----] .| [0:1] NP[SEM=[BO={bo(\P.P(John),z101)}, CORE=]] -> 'john' * + |[-----> .| [0:1] S[SEM=[BO={?b1+?b2}, CORE=]] -> NP[SEM=[BO=?b1, CORE=?subj]] * VP[SEM=[BO=?b2, CORE=?vp]] {?b1: {bo(\P.P(John),z2)}, ?subj: } + |. [-----]| [1:2] IV[SEM=[BO={/}, CORE=<\x.bark(x)>]] -> 'barks' * + |. [-----]| [1:2] VP[SEM=[BO={/}, CORE=<\x.bark(x)>]] -> IV[SEM=[BO={/}, CORE=<\x.bark(x)>]] * + |[===========]| [0:2] S[SEM=[BO={bo(\P.P(John),z2)}, CORE=]] -> NP[SEM=[BO={bo(\P.P(John),z2)}, CORE=]] VP[SEM=[BO={/}, CORE=<\x.bark(x)>]] * + >>> for tree in trees: print(tree) + (S[SEM=[BO={bo(\P.P(John),z2)}, CORE=]] + (NP[SEM=[BO={bo(\P.P(John),z101)}, CORE=]] john) + (VP[SEM=[BO={/}, CORE=<\x.bark(x)>]] + (IV[SEM=[BO={/}, CORE=<\x.bark(x)>]] barks))) + +A transitive sentence: + + >>> trees = cp.parse('john feeds a dog'.split()) + |.joh.fee. a .dog.| + |[---] . . .| [0:1] 'john' + |. [---] . .| [1:2] 'feeds' + |. . [---] .| [2:3] 'a' + |. . . [---]| [3:4] 'dog' + |[---] . . .| [0:1] NP[SEM=[BO={bo(\P.P(John),z102)}, CORE=]] -> 'john' * + |[---> . . .| [0:1] S[SEM=[BO={?b1+?b2}, CORE=]] -> NP[SEM=[BO=?b1, CORE=?subj]] * VP[SEM=[BO=?b2, CORE=?vp]] {?b1: {bo(\P.P(John),z2)}, ?subj: } + |. [---] . .| [1:2] TV[SEM=[BO={/}, CORE=<\x y.feed(y,x)>]] -> 'feeds' * + |. [---> . .| [1:2] VP[SEM=[BO={?b1+?b2}, CORE=]] -> TV[SEM=[BO=?b1, CORE=?v]] * NP[SEM=[BO=?b2, CORE=?obj]] {?b1: {/}, ?v: } + |. . [---] .| [2:3] Det[SEM=[BO={/}, CORE=<\Q P.exists x.(Q(x) & P(x))>]] -> 'a' * + |. . [---> .| [2:3] NP[SEM=[BO={?b1+?b2+{bo(?det(?n),@x)}}, CORE=<@x>]] -> Det[SEM=[BO=?b1, CORE=?det]] * N[SEM=[BO=?b2, CORE=?n]] {?b1: {/}, ?det: } + |. . . [---]| [3:4] N[SEM=[BO={/}, CORE=]] -> 'dog' * + |. . [-------]| [2:4] NP[SEM=[BO={bo(\P.exists x.(dog(x) & P(x)),z103)}, CORE=]] -> Det[SEM=[BO={/}, CORE=<\Q P.exists x.(Q(x) & P(x))>]] N[SEM=[BO={/}, CORE=]] * + |. . [------->| [2:4] S[SEM=[BO={?b1+?b2}, CORE=]] -> NP[SEM=[BO=?b1, CORE=?subj]] * VP[SEM=[BO=?b2, CORE=?vp]] {?b1: {bo(\P.exists x.(dog(x) & P(x)),z2)}, ?subj: } + |. [-----------]| [1:4] VP[SEM=[BO={bo(\P.exists x.(dog(x) & P(x)),z2)}, CORE=<\y.feed(y,z2)>]] -> TV[SEM=[BO={/}, CORE=<\x y.feed(y,x)>]] NP[SEM=[BO={bo(\P.exists x.(dog(x) & P(x)),z2)}, CORE=]] * + |[===============]| [0:4] S[SEM=[BO={bo(\P.P(John),z2), bo(\P.exists x.(dog(x) & P(x)),z3)}, CORE=]] -> NP[SEM=[BO={bo(\P.P(John),z2)}, CORE=]] VP[SEM=[BO={bo(\P.exists x.(dog(x) & P(x)),z3)}, CORE=<\y.feed(y,z3)>]] * + + >>> for tree in trees: print(tree) + (S[SEM=[BO={bo(\P.P(John),z2), bo(\P.exists x.(dog(x) & P(x)),z3)}, CORE=]] + (NP[SEM=[BO={bo(\P.P(John),z102)}, CORE=]] john) + (VP[SEM=[BO={bo(\P.exists x.(dog(x) & P(x)),z2)}, CORE=<\y.feed(y,z2)>]] + (TV[SEM=[BO={/}, CORE=<\x y.feed(y,x)>]] feeds) + (NP[SEM=[BO={bo(\P.exists x.(dog(x) & P(x)),z103)}, CORE=]] + (Det[SEM=[BO={/}, CORE=<\Q P.exists x.(Q(x) & P(x))>]] a) + (N[SEM=[BO={/}, CORE=]] dog)))) + +Turn down the verbosity: + + >>> cp = parse.load_parser('grammars/sample_grammars/bindop.fcfg', trace=0, + ... chart_class=InstantiateVarsChart) + +Reuse the same lexical item twice: + + >>> trees = cp.parse('john feeds john'.split()) + >>> for tree in trees: print(tree) + (S[SEM=[BO={bo(\P.P(John),z2), bo(\P.P(John),z3)}, CORE=]] + (NP[SEM=[BO={bo(\P.P(John),z104)}, CORE=]] john) + (VP[SEM=[BO={bo(\P.P(John),z2)}, CORE=<\y.feed(y,z2)>]] + (TV[SEM=[BO={/}, CORE=<\x y.feed(y,x)>]] feeds) + (NP[SEM=[BO={bo(\P.P(John),z105)}, CORE=]] john))) + + >>> trees = cp.parse('a dog feeds a dog'.split()) + >>> for tree in trees: print(tree) + (S[SEM=[BO={bo(\P.exists x.(dog(x) & P(x)),z2), bo(\P.exists x.(dog(x) & P(x)),z3)}, CORE=]] + (NP[SEM=[BO={bo(\P.exists x.(dog(x) & P(x)),z106)}, CORE=]] + (Det[SEM=[BO={/}, CORE=<\Q P.exists x.(Q(x) & P(x))>]] a) + (N[SEM=[BO={/}, CORE=]] dog)) + (VP[SEM=[BO={bo(\P.exists x.(dog(x) & P(x)),z2)}, CORE=<\y.feed(y,z2)>]] + (TV[SEM=[BO={/}, CORE=<\x y.feed(y,x)>]] feeds) + (NP[SEM=[BO={bo(\P.exists x.(dog(x) & P(x)),z107)}, CORE=]] + (Det[SEM=[BO={/}, CORE=<\Q P.exists x.(Q(x) & P(x))>]] a) + (N[SEM=[BO={/}, CORE=]] dog)))) diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/featstruct.doctest b/venv.bak/lib/python3.7/site-packages/nltk/test/featstruct.doctest new file mode 100644 index 0000000..8c35dad --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/test/featstruct.doctest @@ -0,0 +1,1229 @@ +.. Copyright (C) 2001-2019 NLTK Project +.. For license information, see LICENSE.TXT + +================================== + Feature Structures & Unification +================================== + >>> from __future__ import print_function + >>> from nltk.featstruct import FeatStruct + >>> from nltk.sem.logic import Variable, VariableExpression, Expression + +.. note:: For now, featstruct uses the older lambdalogic semantics + module. Eventually, it should be updated to use the new first + order predicate logic module. + +Overview +~~~~~~~~ +A feature structure is a mapping from feature identifiers to feature +values, where feature values can be simple values (like strings or +ints), nested feature structures, or variables: + + >>> fs1 = FeatStruct(number='singular', person=3) + >>> print(fs1) + [ number = 'singular' ] + [ person = 3 ] + +Feature structure may be nested: + + >>> fs2 = FeatStruct(type='NP', agr=fs1) + >>> print(fs2) + [ agr = [ number = 'singular' ] ] + [ [ person = 3 ] ] + [ ] + [ type = 'NP' ] + +Variables are used to indicate that two features should be assigned +the same value. For example, the following feature structure requires +that the feature fs3['agr']['number'] be bound to the same value as the +feature fs3['subj']['number']. + + >>> fs3 = FeatStruct(agr=FeatStruct(number=Variable('?n')), + ... subj=FeatStruct(number=Variable('?n'))) + >>> print(fs3) + [ agr = [ number = ?n ] ] + [ ] + [ subj = [ number = ?n ] ] + +Feature structures are typically used to represent partial information +about objects. A feature name that is not mapped to a value stands +for a feature whose value is unknown (*not* a feature without a +value). Two feature structures that represent (potentially +overlapping) information about the same object can be combined by +*unification*. + + >>> print(fs2.unify(fs3)) + [ agr = [ number = 'singular' ] ] + [ [ person = 3 ] ] + [ ] + [ subj = [ number = 'singular' ] ] + [ ] + [ type = 'NP' ] + +When two inconsistent feature structures are unified, the unification +fails and returns ``None``. + + >>> fs4 = FeatStruct(agr=FeatStruct(person=1)) + >>> print(fs4.unify(fs2)) + None + >>> print(fs2.unify(fs4)) + None + +.. + >>> del fs1, fs2, fs3, fs4 # clean-up + +Feature Structure Types +----------------------- +There are actually two types of feature structure: + +- *feature dictionaries*, implemented by `FeatDict`, act like + Python dictionaries. Feature identifiers may be strings or + instances of the `Feature` class. +- *feature lists*, implemented by `FeatList`, act like Python + lists. Feature identifiers are integers. + +When you construct a feature structure using the `FeatStruct` +constructor, it will automatically decide which type is appropriate: + + >>> type(FeatStruct(number='singular')) + + >>> type(FeatStruct([1,2,3])) + + +Usually, we will just use feature dictionaries; but sometimes feature +lists can be useful too. Two feature lists will unify with each other +only if they have equal lengths, and all of their feature values +match. If you wish to write a feature list that contains 'unknown' +values, you must use variables: + + >>> fs1 = FeatStruct([1,2,Variable('?y')]) + >>> fs2 = FeatStruct([1,Variable('?x'),3]) + >>> fs1.unify(fs2) + [1, 2, 3] + +.. + >>> del fs1, fs2 # clean-up + +Parsing Feature Structure Strings +--------------------------------- +Feature structures can be constructed directly from strings. Often, +this is more convenient than constructing them directly. NLTK can +parse most feature strings to produce the corresponding feature +structures. (But you must restrict your base feature values to +strings, ints, logic expressions (`nltk.sem.logic.Expression`), and a +few other types discussed below). + +Feature dictionaries are written like Python dictionaries, except that +keys are not put in quotes; and square brackets (``[]``) are used +instead of braces (``{}``): + + >>> FeatStruct('[tense="past", agr=[number="sing", person=3]]') + [agr=[number='sing', person=3], tense='past'] + +If a feature value is a single alphanumeric word, then it does not +need to be quoted -- it will be automatically treated as a string: + + >>> FeatStruct('[tense=past, agr=[number=sing, person=3]]') + [agr=[number='sing', person=3], tense='past'] + +Feature lists are written like python lists: + + >>> FeatStruct('[1, 2, 3]') + [1, 2, 3] + +The expression ``[]`` is treated as an empty feature dictionary, not +an empty feature list: + + >>> type(FeatStruct('[]')) + + +Feature Paths +------------- +Features can be specified using *feature paths*, or tuples of feature +identifiers that specify path through the nested feature structures to +a value. + + >>> fs1 = FeatStruct('[x=1, y=[1,2,[z=3]]]') + >>> fs1['y'] + [1, 2, [z=3]] + >>> fs1['y', 2] + [z=3] + >>> fs1['y', 2, 'z'] + 3 + +.. + >>> del fs1 # clean-up + +Reentrance +---------- +Feature structures may contain reentrant feature values. A *reentrant +feature value* is a single feature structure that can be accessed via +multiple feature paths. + + >>> fs1 = FeatStruct(x='val') + >>> fs2 = FeatStruct(a=fs1, b=fs1) + >>> print(fs2) + [ a = (1) [ x = 'val' ] ] + [ ] + [ b -> (1) ] + >>> fs2 + [a=(1)[x='val'], b->(1)] + +As you can see, reentrane is displayed by marking a feature structure +with a unique identifier, in this case ``(1)``, the first time it is +encountered; and then using the special form ``var -> id`` whenever it +is encountered again. You can use the same notation to directly +create reentrant feature structures from strings. + + >>> FeatStruct('[a=(1)[], b->(1), c=[d->(1)]]') + [a=(1)[], b->(1), c=[d->(1)]] + +Reentrant feature structures may contain cycles: + + >>> fs3 = FeatStruct('(1)[a->(1)]') + >>> fs3['a', 'a', 'a', 'a'] + (1)[a->(1)] + >>> fs3['a', 'a', 'a', 'a'] is fs3 + True + +Unification preserves the reentrance relations imposed by both of the +unified feature structures. In the feature structure resulting from +unification, any modifications to a reentrant feature value will be +visible using any of its feature paths. + + >>> fs3.unify(FeatStruct('[a=[b=12], c=33]')) + (1)[a->(1), b=12, c=33] + +.. + >>> del fs1, fs2, fs3 # clean-up + +Feature Structure Equality +-------------------------- +Two feature structures are considered equal if they assign the same +values to all features, *and* they contain the same reentrances. + + >>> fs1 = FeatStruct('[a=(1)[x=1], b->(1)]') + >>> fs2 = FeatStruct('[a=(1)[x=1], b->(1)]') + >>> fs3 = FeatStruct('[a=[x=1], b=[x=1]]') + >>> fs1 == fs1, fs1 is fs1 + (True, True) + >>> fs1 == fs2, fs1 is fs2 + (True, False) + >>> fs1 == fs3, fs1 is fs3 + (False, False) + +Note that this differs from how Python dictionaries and lists define +equality -- in particular, Python dictionaries and lists ignore +reentrance relations. To test two feature structures for equality +while ignoring reentrance relations, use the `equal_values()` method: + + >>> fs1.equal_values(fs1) + True + >>> fs1.equal_values(fs2) + True + >>> fs1.equal_values(fs3) + True + +.. + >>> del fs1, fs2, fs3 # clean-up + +Feature Value Sets & Feature Value Tuples +----------------------------------------- +`nltk.featstruct` defines two new data types that are intended to be +used as feature values: `FeatureValueTuple` and `FeatureValueSet`. +Both of these types are considered base values -- i.e., unification +does *not* apply to them. However, variable binding *does* apply to +any values that they contain. + +Feature value tuples are written with parentheses: + + >>> fs1 = FeatStruct('[x=(?x, ?y)]') + >>> fs1 + [x=(?x, ?y)] + >>> fs1.substitute_bindings({Variable('?x'): 1, Variable('?y'): 2}) + [x=(1, 2)] + +Feature sets are written with braces: + + >>> fs1 = FeatStruct('[x={?x, ?y}]') + >>> fs1 + [x={?x, ?y}] + >>> fs1.substitute_bindings({Variable('?x'): 1, Variable('?y'): 2}) + [x={1, 2}] + +In addition to the basic feature value tuple & set classes, nltk +defines feature value unions (for sets) and feature value +concatenations (for tuples). These are written using '+', and can be +used to combine sets & tuples: + + >>> fs1 = FeatStruct('[x=((1, 2)+?z), z=?z]') + >>> fs1 + [x=((1, 2)+?z), z=?z] + >>> fs1.unify(FeatStruct('[z=(3, 4, 5)]')) + [x=(1, 2, 3, 4, 5), z=(3, 4, 5)] + +Thus, feature value tuples and sets can be used to build up tuples +and sets of values over the corse of unification. For example, when +parsing sentences using a semantic feature grammar, feature sets or +feature tuples can be used to build a list of semantic predicates as +the sentence is parsed. + +As was mentioned above, unification does not apply to feature value +tuples and sets. One reason for this that it's impossible to define a +single correct answer for unification when concatenation is used. +Consider the following example: + + >>> fs1 = FeatStruct('[x=(1, 2, 3, 4)]') + >>> fs2 = FeatStruct('[x=(?a+?b), a=?a, b=?b]') + +If unification applied to feature tuples, then the unification +algorithm would have to arbitrarily choose how to divide the tuple +(1,2,3,4) into two parts. Instead, the unification algorithm refuses +to make this decision, and simply unifies based on value. Because +(1,2,3,4) is not equal to (?a+?b), fs1 and fs2 will not unify: + + >>> print(fs1.unify(fs2)) + None + +If you need a list-like structure that unification does apply to, use +`FeatList`. + +.. + >>> del fs1, fs2 # clean-up + +Light-weight Feature Structures +------------------------------- +Many of the functions defined by `nltk.featstruct` can be applied +directly to simple Python dictionaries and lists, rather than to +full-fledged `FeatDict` and `FeatList` objects. In other words, +Python ``dicts`` and ``lists`` can be used as "light-weight" feature +structures. + + >>> # Note: pprint prints dicts sorted + >>> from pprint import pprint + >>> from nltk.featstruct import unify + >>> pprint(unify(dict(x=1, y=dict()), dict(a='a', y=dict(b='b')))) + {'a': 'a', 'x': 1, 'y': {'b': 'b'}} + +However, you should keep in mind the following caveats: + +- Python dictionaries & lists ignore reentrance when checking for + equality between values. But two FeatStructs with different + reentrances are considered nonequal, even if all their base + values are equal. + +- FeatStructs can be easily frozen, allowing them to be used as + keys in hash tables. Python dictionaries and lists can not. + +- FeatStructs display reentrance in their string representations; + Python dictionaries and lists do not. + +- FeatStructs may *not* be mixed with Python dictionaries and lists + (e.g., when performing unification). + +- FeatStructs provide a number of useful methods, such as `walk()` + and `cyclic()`, which are not available for Python dicts & lists. + +In general, if your feature structures will contain any reentrances, +or if you plan to use them as dictionary keys, it is strongly +recommended that you use full-fledged `FeatStruct` objects. + +Custom Feature Values +--------------------- +The abstract base class `CustomFeatureValue` can be used to define new +base value types that have custom unification methods. For example, +the following feature value type encodes a range, and defines +unification as taking the intersection on the ranges: + + >>> from functools import total_ordering + >>> from nltk.featstruct import CustomFeatureValue, UnificationFailure + >>> @total_ordering + ... class Range(CustomFeatureValue): + ... def __init__(self, low, high): + ... assert low <= high + ... self.low = low + ... self.high = high + ... def unify(self, other): + ... if not isinstance(other, Range): + ... return UnificationFailure + ... low = max(self.low, other.low) + ... high = min(self.high, other.high) + ... if low <= high: return Range(low, high) + ... else: return UnificationFailure + ... def __repr__(self): + ... return '(%s>> fs1 = FeatStruct(x=Range(5,8), y=FeatStruct(z=Range(7,22))) + >>> print(fs1.unify(FeatStruct(x=Range(6, 22)))) + [ x = (6>> print(fs1.unify(FeatStruct(x=Range(9, 12)))) + None + >>> print(fs1.unify(FeatStruct(x=12))) + None + >>> print(fs1.unify(FeatStruct('[x=?x, y=[z=?x]]'))) + [ x = (7>> fs1 = FeatStruct(a=1, b=2, c=3) + >>> fs2 = FeatStruct(x=fs1, y='x') + +Feature structures support all dictionary methods (excluding the class +method `dict.fromkeys()`). Non-mutating methods: + + >>> sorted(fs2.keys()) # keys() + ['x', 'y'] + >>> sorted(fs2.values()) # values() + [[a=1, b=2, c=3], 'x'] + >>> sorted(fs2.items()) # items() + [('x', [a=1, b=2, c=3]), ('y', 'x')] + >>> sorted(fs2) # __iter__() + ['x', 'y'] + >>> 'a' in fs2, 'x' in fs2 # __contains__() + (False, True) + >>> fs2.has_key('a'), fs2.has_key('x') # has_key() + (False, True) + >>> fs2['x'], fs2['y'] # __getitem__() + ([a=1, b=2, c=3], 'x') + >>> fs2['a'] # __getitem__() + Traceback (most recent call last): + . . . + KeyError: 'a' + >>> fs2.get('x'), fs2.get('y'), fs2.get('a') # get() + ([a=1, b=2, c=3], 'x', None) + >>> fs2.get('x', 'hello'), fs2.get('a', 'hello') # get() + ([a=1, b=2, c=3], 'hello') + >>> len(fs1), len(fs2) # __len__ + (3, 2) + >>> fs2.copy() # copy() + [x=[a=1, b=2, c=3], y='x'] + >>> fs2.copy() is fs2 # copy() + False + +Note: by default, `FeatStruct.copy()` does a deep copy. Use +`FeatStruct.copy(deep=False)` for a shallow copy. + +.. + >>> del fs1, fs2 # clean-up. + +Dictionary access methods (mutating) +------------------------------------ + >>> fs1 = FeatStruct(a=1, b=2, c=3) + >>> fs2 = FeatStruct(x=fs1, y='x') + +Setting features (`__setitem__()`) + + >>> fs1['c'] = 5 + >>> fs1 + [a=1, b=2, c=5] + >>> fs1['x'] = 12 + >>> fs1 + [a=1, b=2, c=5, x=12] + >>> fs2['x', 'a'] = 2 + >>> fs2 + [x=[a=2, b=2, c=5, x=12], y='x'] + >>> fs1 + [a=2, b=2, c=5, x=12] + +Deleting features (`__delitem__()`) + + >>> del fs1['x'] + >>> fs1 + [a=2, b=2, c=5] + >>> del fs2['x', 'a'] + >>> fs1 + [b=2, c=5] + +`setdefault()`: + + >>> fs1.setdefault('b', 99) + 2 + >>> fs1 + [b=2, c=5] + >>> fs1.setdefault('x', 99) + 99 + >>> fs1 + [b=2, c=5, x=99] + +`update()`: + + >>> fs2.update({'a':'A', 'b':'B'}, c='C') + >>> fs2 + [a='A', b='B', c='C', x=[b=2, c=5, x=99], y='x'] + +`pop()`: + + >>> fs2.pop('a') + 'A' + >>> fs2 + [b='B', c='C', x=[b=2, c=5, x=99], y='x'] + >>> fs2.pop('a') + Traceback (most recent call last): + . . . + KeyError: 'a' + >>> fs2.pop('a', 'foo') + 'foo' + >>> fs2 + [b='B', c='C', x=[b=2, c=5, x=99], y='x'] + +`clear()`: + + >>> fs1.clear() + >>> fs1 + [] + >>> fs2 + [b='B', c='C', x=[], y='x'] + +`popitem()`: + + >>> sorted([fs2.popitem() for i in range(len(fs2))]) + [('b', 'B'), ('c', 'C'), ('x', []), ('y', 'x')] + >>> fs2 + [] + +Once a feature structure has been frozen, it may not be mutated. + + >>> fs1 = FeatStruct('[x=1, y=2, z=[a=3]]') + >>> fs1.freeze() + >>> fs1.frozen() + True + >>> fs1['z'].frozen() + True + + >>> fs1['x'] = 5 + Traceback (most recent call last): + . . . + ValueError: Frozen FeatStructs may not be modified. + >>> del fs1['x'] + Traceback (most recent call last): + . . . + ValueError: Frozen FeatStructs may not be modified. + >>> fs1.clear() + Traceback (most recent call last): + . . . + ValueError: Frozen FeatStructs may not be modified. + >>> fs1.pop('x') + Traceback (most recent call last): + . . . + ValueError: Frozen FeatStructs may not be modified. + >>> fs1.popitem() + Traceback (most recent call last): + . . . + ValueError: Frozen FeatStructs may not be modified. + >>> fs1.setdefault('x') + Traceback (most recent call last): + . . . + ValueError: Frozen FeatStructs may not be modified. + >>> fs1.update(z=22) + Traceback (most recent call last): + . . . + ValueError: Frozen FeatStructs may not be modified. + +.. + >>> del fs1, fs2 # clean-up. + +Feature Paths +------------- +Make sure that __getitem__ with feature paths works as intended: + + >>> fs1 = FeatStruct(a=1, b=2, + ... c=FeatStruct( + ... d=FeatStruct(e=12), + ... f=FeatStruct(g=55, h='hello'))) + >>> fs1[()] + [a=1, b=2, c=[d=[e=12], f=[g=55, h='hello']]] + >>> fs1['a'], fs1[('a',)] + (1, 1) + >>> fs1['c','d','e'] + 12 + >>> fs1['c','f','g'] + 55 + +Feature paths that select unknown features raise KeyError: + + >>> fs1['c', 'f', 'e'] + Traceback (most recent call last): + . . . + KeyError: ('c', 'f', 'e') + >>> fs1['q', 'p'] + Traceback (most recent call last): + . . . + KeyError: ('q', 'p') + +Feature paths that try to go 'through' a feature that's not a feature +structure raise KeyError: + + >>> fs1['a', 'b'] + Traceback (most recent call last): + . . . + KeyError: ('a', 'b') + +Feature paths can go through reentrant structures: + + >>> fs2 = FeatStruct('(1)[a=[b=[c->(1), d=5], e=11]]') + >>> fs2['a', 'b', 'c', 'a', 'e'] + 11 + >>> fs2['a', 'b', 'c', 'a', 'b', 'd'] + 5 + >>> fs2[tuple('abcabcabcabcabcabcabcabcabcabca')] + (1)[b=[c=[a->(1)], d=5], e=11] + +Indexing requires strings, `Feature`\s, or tuples; other types raise a +TypeError: + + >>> fs2[12] + Traceback (most recent call last): + . . . + TypeError: Expected feature name or path. Got 12. + >>> fs2[list('abc')] + Traceback (most recent call last): + . . . + TypeError: Expected feature name or path. Got ['a', 'b', 'c']. + +Feature paths can also be used with `get()`, `has_key()`, and +`__contains__()`. + + >>> fpath1 = tuple('abcabc') + >>> fpath2 = tuple('abcabz') + >>> fs2.get(fpath1), fs2.get(fpath2) + ((1)[a=[b=[c->(1), d=5], e=11]], None) + >>> fpath1 in fs2, fpath2 in fs2 + (True, False) + >>> fs2.has_key(fpath1), fs2.has_key(fpath2) + (True, False) + +.. + >>> del fs1, fs2 # clean-up + +Reading Feature Structures +-------------------------- + +Empty feature struct: + + >>> FeatStruct('[]') + [] + +Test features with integer values: + + >>> FeatStruct('[a=12, b=-33, c=0]') + [a=12, b=-33, c=0] + +Test features with string values. Either single or double quotes may +be used. Strings are evaluated just like python strings -- in +particular, you can use escape sequences and 'u' and 'r' prefixes, and +triple-quoted strings. + + >>> FeatStruct('[a="", b="hello", c="\'", d=\'\', e=\'"\']') + [a='', b='hello', c="'", d='', e='"'] + >>> FeatStruct(r'[a="\\", b="\"", c="\x6f\\y", d="12"]') + [a='\\', b='"', c='o\\y', d='12'] + >>> FeatStruct(r'[b=r"a\b\c"]') + [b='a\\b\\c'] + >>> FeatStruct('[x="""a"""]') + [x='a'] + +Test parsing of reentrant feature structures. + + >>> FeatStruct('[a=(1)[], b->(1)]') + [a=(1)[], b->(1)] + >>> FeatStruct('[a=(1)[x=1, y=2], b->(1)]') + [a=(1)[x=1, y=2], b->(1)] + +Test parsing of cyclic feature structures. + + >>> FeatStruct('[a=(1)[b->(1)]]') + [a=(1)[b->(1)]] + >>> FeatStruct('(1)[a=[b=[c->(1)]]]') + (1)[a=[b=[c->(1)]]] + +Strings of the form "+name" and "-name" may be used to specify boolean +values. + + >>> FeatStruct('[-bar, +baz, +foo]') + [-bar, +baz, +foo] + +None, True, and False are recognized as values: + + >>> FeatStruct('[bar=True, baz=False, foo=None]') + [+bar, -baz, foo=None] + +Special features: + + >>> FeatStruct('NP/VP') + NP[]/VP[] + >>> FeatStruct('?x/?x') + ?x[]/?x[] + >>> print(FeatStruct('VP[+fin, agr=?x, tense=past]/NP[+pl, agr=?x]')) + [ *type* = 'VP' ] + [ ] + [ [ *type* = 'NP' ] ] + [ *slash* = [ agr = ?x ] ] + [ [ pl = True ] ] + [ ] + [ agr = ?x ] + [ fin = True ] + [ tense = 'past' ] + +Here the slash feature gets coerced: + >>> FeatStruct('[*slash*=a, x=b, *type*="NP"]') + NP[x='b']/a[] + + >>> FeatStruct('NP[sem=]/NP') + NP[sem=]/NP[] + >>> FeatStruct('S[sem=]') + S[sem=] + >>> print(FeatStruct('NP[sem=]/NP')) + [ *type* = 'NP' ] + [ ] + [ *slash* = [ *type* = 'NP' ] ] + [ ] + [ sem = ] + +Playing with ranges: + + >>> from nltk.featstruct import RangeFeature, FeatStructReader + >>> width = RangeFeature('width') + >>> reader = FeatStructReader([width]) + >>> fs1 = reader.fromstring('[*width*=-5:12]') + >>> fs2 = reader.fromstring('[*width*=2:123]') + >>> fs3 = reader.fromstring('[*width*=-7:-2]') + >>> fs1.unify(fs2) + [*width*=(2, 12)] + >>> fs1.unify(fs3) + [*width*=(-5, -2)] + >>> print(fs2.unify(fs3)) # no overlap in width. + None + +The slash feature has a default value of 'False': + + >>> print(FeatStruct('NP[]/VP').unify(FeatStruct('NP[]'), trace=1)) + + Unification trace: + / NP[]/VP[] + |\ NP[] + | + | Unify feature: *type* + | / 'NP' + | |\ 'NP' + | | + | +-->'NP' + | + | Unify feature: *slash* + | / VP[] + | |\ False + | | + X X <-- FAIL + None + +The demo structures from category.py. They all parse, but they don't +do quite the right thing, -- ?x vs x. + + >>> FeatStruct(pos='n', agr=FeatStruct(number='pl', gender='f')) + [agr=[gender='f', number='pl'], pos='n'] + >>> FeatStruct(r'NP[sem=]/NP') + NP[sem=]/NP[] + >>> FeatStruct(r'S[sem=]') + S[sem=] + >>> FeatStruct('?x/?x') + ?x[]/?x[] + >>> FeatStruct('VP[+fin, agr=?x, tense=past]/NP[+pl, agr=?x]') + VP[agr=?x, +fin, tense='past']/NP[agr=?x, +pl] + >>> FeatStruct('S[sem = ]') + S[sem=] + + >>> FeatStruct('S') + S[] + +The parser also includes support for reading sets and tuples. + + >>> FeatStruct('[x={1,2,2,2}, y={/}]') + [x={1, 2}, y={/}] + >>> FeatStruct('[x=(1,2,2,2), y=()]') + [x=(1, 2, 2, 2), y=()] + >>> print(FeatStruct('[x=(1,[z=(1,2,?x)],?z,{/})]')) + [ x = (1, [ z = (1, 2, ?x) ], ?z, {/}) ] + +Note that we can't put a featstruct inside a tuple, because doing so +would hash it, and it's not frozen yet: + + >>> print(FeatStruct('[x={[]}]')) + Traceback (most recent call last): + . . . + TypeError: FeatStructs must be frozen before they can be hashed. + +There's a special syntax for taking the union of sets: "{...+...}". +The elements should only be variables or sets. + + >>> FeatStruct('[x={?a+?b+{1,2,3}}]') + [x={?a+?b+{1, 2, 3}}] + +There's a special syntax for taking the concatenation of tuples: +"(...+...)". The elements should only be variables or tuples. + + >>> FeatStruct('[x=(?a+?b+(1,2,3))]') + [x=(?a+?b+(1, 2, 3))] + +Parsing gives helpful messages if your string contains an error. + + >>> FeatStruct('[a=, b=5]]') + Traceback (most recent call last): + . . . + ValueError: Error parsing feature structure + [a=, b=5]] + ^ Expected value + >>> FeatStruct('[a=12 22, b=33]') + Traceback (most recent call last): + . . . + ValueError: Error parsing feature structure + [a=12 22, b=33] + ^ Expected comma + >>> FeatStruct('[a=5] [b=6]') + Traceback (most recent call last): + . . . + ValueError: Error parsing feature structure + [a=5] [b=6] + ^ Expected end of string + >>> FeatStruct(' *++*') + Traceback (most recent call last): + . . . + ValueError: Error parsing feature structure + *++* + ^ Expected open bracket or identifier + >>> FeatStruct('[x->(1)]') + Traceback (most recent call last): + . . . + ValueError: Error parsing feature structure + [x->(1)] + ^ Expected bound identifier + >>> FeatStruct('[x->y]') + Traceback (most recent call last): + . . . + ValueError: Error parsing feature structure + [x->y] + ^ Expected identifier + >>> FeatStruct('') + Traceback (most recent call last): + . . . + ValueError: Error parsing feature structure + + ^ Expected open bracket or identifier + + +Unification +----------- +Very simple unifications give the expected results: + + >>> FeatStruct().unify(FeatStruct()) + [] + >>> FeatStruct(number='singular').unify(FeatStruct()) + [number='singular'] + >>> FeatStruct().unify(FeatStruct(number='singular')) + [number='singular'] + >>> FeatStruct(number='singular').unify(FeatStruct(person=3)) + [number='singular', person=3] + +Merging nested structures: + + >>> fs1 = FeatStruct('[A=[B=b]]') + >>> fs2 = FeatStruct('[A=[C=c]]') + >>> fs1.unify(fs2) + [A=[B='b', C='c']] + >>> fs2.unify(fs1) + [A=[B='b', C='c']] + +A basic case of reentrant unification + + >>> fs4 = FeatStruct('[A=(1)[B=b], E=[F->(1)]]') + >>> fs5 = FeatStruct("[A=[C='c'], E=[F=[D='d']]]") + >>> fs4.unify(fs5) + [A=(1)[B='b', C='c', D='d'], E=[F->(1)]] + >>> fs5.unify(fs4) + [A=(1)[B='b', C='c', D='d'], E=[F->(1)]] + +More than 2 paths to a value + + >>> fs1 = FeatStruct("[a=[],b=[],c=[],d=[]]") + >>> fs2 = FeatStruct('[a=(1)[], b->(1), c->(1), d->(1)]') + >>> fs1.unify(fs2) + [a=(1)[], b->(1), c->(1), d->(1)] + +fs1[a] gets unified with itself + + >>> fs1 = FeatStruct('[x=(1)[], y->(1)]') + >>> fs2 = FeatStruct('[x=(1)[], y->(1)]') + >>> fs1.unify(fs2) + [x=(1)[], y->(1)] + +Bound variables should get forwarded appropriately + + >>> fs1 = FeatStruct('[A=(1)[X=x], B->(1), C=?cvar, D=?dvar]') + >>> fs2 = FeatStruct('[A=(1)[Y=y], B=(2)[Z=z], C->(1), D->(2)]') + >>> fs1.unify(fs2) + [A=(1)[X='x', Y='y', Z='z'], B->(1), C->(1), D->(1)] + >>> fs2.unify(fs1) + [A=(1)[X='x', Y='y', Z='z'], B->(1), C->(1), D->(1)] + +Cyclic structure created by unification. + + >>> fs1 = FeatStruct('[F=(1)[], G->(1)]') + >>> fs2 = FeatStruct('[F=[H=(2)[]], G->(2)]') + >>> fs3 = fs1.unify(fs2) + >>> fs3 + [F=(1)[H->(1)], G->(1)] + >>> fs3['F'] is fs3['G'] + True + >>> fs3['F'] is fs3['G']['H'] + True + >>> fs3['F'] is fs3['G']['H']['H'] + True + >>> fs3['F'] is fs3['F']['H']['H']['H']['H']['H']['H']['H']['H'] + True + +Cyclic structure created w/ variables. + + >>> fs1 = FeatStruct('[F=[H=?x]]') + >>> fs2 = FeatStruct('[F=?x]') + >>> fs3 = fs1.unify(fs2, rename_vars=False) + >>> fs3 + [F=(1)[H->(1)]] + >>> fs3['F'] is fs3['F']['H'] + True + >>> fs3['F'] is fs3['F']['H']['H'] + True + >>> fs3['F'] is fs3['F']['H']['H']['H']['H']['H']['H']['H']['H'] + True + +Unifying w/ a cyclic feature structure. + + >>> fs4 = FeatStruct('[F=[H=[H=[H=(1)[]]]], K->(1)]') + >>> fs3.unify(fs4) + [F=(1)[H->(1)], K->(1)] + >>> fs4.unify(fs3) + [F=(1)[H->(1)], K->(1)] + +Variable bindings should preserve reentrance. + + >>> bindings = {} + >>> fs1 = FeatStruct("[a=?x]") + >>> fs2 = fs1.unify(FeatStruct("[a=[]]"), bindings) + >>> fs2['a'] is bindings[Variable('?x')] + True + >>> fs2.unify(FeatStruct("[b=?x]"), bindings) + [a=(1)[], b->(1)] + +Aliased variable tests + + >>> fs1 = FeatStruct("[a=?x, b=?x]") + >>> fs2 = FeatStruct("[b=?y, c=?y]") + >>> bindings = {} + >>> fs3 = fs1.unify(fs2, bindings) + >>> fs3 + [a=?x, b=?x, c=?x] + >>> bindings + {Variable('?y'): Variable('?x')} + >>> fs3.unify(FeatStruct("[a=1]")) + [a=1, b=1, c=1] + +If we keep track of the bindings, then we can use the same variable +over multiple calls to unify. + + >>> bindings = {} + >>> fs1 = FeatStruct('[a=?x]') + >>> fs2 = fs1.unify(FeatStruct('[a=[]]'), bindings) + >>> fs2.unify(FeatStruct('[b=?x]'), bindings) + [a=(1)[], b->(1)] + >>> bindings + {Variable('?x'): []} + +.. + >>> del fs1, fs2, fs3, fs4, fs5 # clean-up + +Unification Bindings +-------------------- + + >>> bindings = {} + >>> fs1 = FeatStruct('[a=?x]') + >>> fs2 = FeatStruct('[a=12]') + >>> fs3 = FeatStruct('[b=?x]') + >>> fs1.unify(fs2, bindings) + [a=12] + >>> bindings + {Variable('?x'): 12} + >>> fs3.substitute_bindings(bindings) + [b=12] + >>> fs3 # substitute_bindings didn't mutate fs3. + [b=?x] + >>> fs2.unify(fs3, bindings) + [a=12, b=12] + + >>> bindings = {} + >>> fs1 = FeatStruct('[a=?x, b=1]') + >>> fs2 = FeatStruct('[a=5, b=?x]') + >>> fs1.unify(fs2, bindings) + [a=5, b=1] + >>> sorted(bindings.items()) + [(Variable('?x'), 5), (Variable('?x2'), 1)] + +.. + >>> del fs1, fs2, fs3 # clean-up + +Expressions +----------- + + >>> e = Expression.fromstring('\\P y.P(z,y)') + >>> fs1 = FeatStruct(x=e, y=Variable('z')) + >>> fs2 = FeatStruct(y=VariableExpression(Variable('John'))) + >>> fs1.unify(fs2) + [x=<\P y.P(John,y)>, y=] + +Remove Variables +---------------- + + >>> FeatStruct('[a=?x, b=12, c=[d=?y]]').remove_variables() + [b=12, c=[]] + >>> FeatStruct('(1)[a=[b=?x,c->(1)]]').remove_variables() + (1)[a=[c->(1)]] + +Equality & Hashing +------------------ +The `equal_values` method checks whether two feature structures assign +the same value to every feature. If the optional argument +``check_reentrances`` is supplied, then it also returns false if there +is any difference in the reentrances. + + >>> a = FeatStruct('(1)[x->(1)]') + >>> b = FeatStruct('(1)[x->(1)]') + >>> c = FeatStruct('(1)[x=[x->(1)]]') + >>> d = FeatStruct('[x=(1)[x->(1)]]') + >>> e = FeatStruct('(1)[x=[x->(1), y=1], y=1]') + >>> def compare(x,y): + ... assert x.equal_values(y, True) == y.equal_values(x, True) + ... assert x.equal_values(y, False) == y.equal_values(x, False) + ... if x.equal_values(y, True): + ... assert x.equal_values(y, False) + ... print('equal values, same reentrance') + ... elif x.equal_values(y, False): + ... print('equal values, different reentrance') + ... else: + ... print('different values') + + >>> compare(a, a) + equal values, same reentrance + >>> compare(a, b) + equal values, same reentrance + >>> compare(a, c) + equal values, different reentrance + >>> compare(a, d) + equal values, different reentrance + >>> compare(c, d) + equal values, different reentrance + >>> compare(a, e) + different values + >>> compare(c, e) + different values + >>> compare(d, e) + different values + >>> compare(e, e) + equal values, same reentrance + +Feature structures may not be hashed until they are frozen: + + >>> hash(a) + Traceback (most recent call last): + . . . + TypeError: FeatStructs must be frozen before they can be hashed. + >>> a.freeze() + >>> v = hash(a) + +Feature structures define hash consistently. The following example +looks at the hash value for each (fs1,fs2) pair; if their hash values +are not equal, then they must not be equal. If their hash values are +equal, then display a message, and indicate whether their values are +indeed equal. Note that c and d currently have the same hash value, +even though they are not equal. That is not a bug, strictly speaking, +but it wouldn't be a bad thing if it changed. + + >>> for fstruct in (a, b, c, d, e): + ... fstruct.freeze() + >>> for fs1_name in 'abcde': + ... for fs2_name in 'abcde': + ... fs1 = locals()[fs1_name] + ... fs2 = locals()[fs2_name] + ... if hash(fs1) != hash(fs2): + ... assert fs1 != fs2 + ... else: + ... print('%s and %s have the same hash value,' % + ... (fs1_name, fs2_name)) + ... if fs1 == fs2: print('and are equal') + ... else: print('and are not equal') + a and a have the same hash value, and are equal + a and b have the same hash value, and are equal + b and a have the same hash value, and are equal + b and b have the same hash value, and are equal + c and c have the same hash value, and are equal + c and d have the same hash value, and are not equal + d and c have the same hash value, and are not equal + d and d have the same hash value, and are equal + e and e have the same hash value, and are equal + +.. + >>> del a, b, c, d, e, v # clean-up + +Tracing +------- + + >>> fs1 = FeatStruct('[a=[b=(1)[], c=?x], d->(1), e=[f=?x]]') + >>> fs2 = FeatStruct('[a=(1)[c="C"], e=[g->(1)]]') + >>> fs1.unify(fs2, trace=True) + + Unification trace: + / [a=[b=(1)[], c=?x], d->(1), e=[f=?x]] + |\ [a=(1)[c='C'], e=[g->(1)]] + | + | Unify feature: a + | / [b=[], c=?x] + | |\ [c='C'] + | | + | | Unify feature: a.c + | | / ?x + | | |\ 'C' + | | | + | | +-->Variable('?x') + | | + | +-->[b=[], c=?x] + | Bindings: {?x: 'C'} + | + | Unify feature: e + | / [f=?x] + | |\ [g=[c='C']] + | | + | +-->[f=?x, g=[b=[], c=?x]] + | Bindings: {?x: 'C'} + | + +-->[a=(1)[b=(2)[], c='C'], d->(2), e=[f='C', g->(1)]] + Bindings: {?x: 'C'} + [a=(1)[b=(2)[], c='C'], d->(2), e=[f='C', g->(1)]] + >>> + >>> fs1 = FeatStruct('[a=?x, b=?z, c=?z]') + >>> fs2 = FeatStruct('[a=?y, b=?y, c=?q]') + >>> #fs1.unify(fs2, trace=True) + >>> + +.. + >>> del fs1, fs2 # clean-up + +Unification on Dicts & Lists +---------------------------- +It's possible to do unification on dictionaries: + + >>> from nltk.featstruct import unify + >>> pprint(unify(dict(x=1, y=dict(z=2)), dict(x=1, q=5)), width=1) + {'q': 5, 'x': 1, 'y': {'z': 2}} + +It's possible to do unification on lists as well: + + >>> unify([1, 2, 3], [1, Variable('x'), 3]) + [1, 2, 3] + +Mixing dicts and lists is fine: + + >>> pprint(unify([dict(x=1, y=dict(z=2)),3], [dict(x=1, q=5),3]), + ... width=1) + [{'q': 5, 'x': 1, 'y': {'z': 2}}, 3] + +Mixing dicts and FeatStructs is discouraged: + + >>> unify(dict(x=1), FeatStruct(x=1)) + Traceback (most recent call last): + . . . + ValueError: Mixing FeatStruct objects with Python dicts and lists is not supported. + +But you can do it if you really want, by explicitly stating that both +dictionaries and FeatStructs should be treated as feature structures: + + >>> unify(dict(x=1), FeatStruct(x=1), fs_class=(dict, FeatStruct)) + {'x': 1} + +Finding Conflicts +----------------- + + >>> from nltk.featstruct import conflicts + >>> fs1 = FeatStruct('[a=[b=(1)[c=2], d->(1), e=[f->(1)]]]') + >>> fs2 = FeatStruct('[a=[b=[c=[x=5]], d=[c=2], e=[f=[c=3]]]]') + >>> for path in conflicts(fs1, fs2): + ... print('%-8s: %r vs %r' % ('.'.join(path), fs1[path], fs2[path])) + a.b.c : 2 vs [x=5] + a.e.f.c : 2 vs 3 + +.. + >>> del fs1, fs2 # clean-up + +Retracting Bindings +------------------- + + >>> from nltk.featstruct import retract_bindings + >>> bindings = {} + >>> fs1 = FeatStruct('[a=?x, b=[c=?y]]') + >>> fs2 = FeatStruct('[a=(1)[c=[d=1]], b->(1)]') + >>> fs3 = fs1.unify(fs2, bindings) + >>> print(fs3) + [ a = (1) [ c = [ d = 1 ] ] ] + [ ] + [ b -> (1) ] + >>> pprint(bindings) + {Variable('?x'): [c=[d=1]], Variable('?y'): [d=1]} + >>> retract_bindings(fs3, bindings) + [a=?x, b=?x] + >>> pprint(bindings) + {Variable('?x'): [c=?y], Variable('?y'): [d=1]} + +Squashed Bugs +~~~~~~~~~~~~~ +In svn rev 5167, unifying two feature structures that used the same +variable would cause those variables to become aliased in the output. + + >>> fs1 = FeatStruct('[a=?x]') + >>> fs2 = FeatStruct('[b=?x]') + >>> fs1.unify(fs2) + [a=?x, b=?x2] + +There was a bug in svn revision 5172 that caused `rename_variables` to +rename variables to names that are already used. + + >>> FeatStruct('[a=?x, b=?x2]').rename_variables( + ... vars=[Variable('?x')]) + [a=?x3, b=?x2] + >>> fs1 = FeatStruct('[a=?x]') + >>> fs2 = FeatStruct('[a=?x, b=?x2]') + >>> fs1.unify(fs2) + [a=?x, b=?x2] + +There was a bug in svn rev 5167 that caused us to get the following +example wrong. Basically the problem was that we only followed +'forward' pointers for other, not self, when unifying two feature +structures. (nb: this test assumes that features are unified in +alphabetical order -- if they are not, it might pass even if the bug +is present.) + + >>> fs1 = FeatStruct('[a=[x=1], b=?x, c=?x]') + >>> fs2 = FeatStruct('[a=(1)[], b->(1), c=[x=2]]') + >>> print(fs1.unify(fs2)) + None + +.. + >>> del fs1, fs2 # clean-up diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/framenet.doctest b/venv.bak/lib/python3.7/site-packages/nltk/test/framenet.doctest new file mode 100644 index 0000000..6de3a41 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/test/framenet.doctest @@ -0,0 +1,288 @@ +.. Copyright (C) 2001-2019 NLTK Project +.. For license information, see LICENSE.TXT + +======== +FrameNet +======== + +The FrameNet corpus is a lexical database of English that is both human- +and machine-readable, based on annotating examples of how words are used +in actual texts. FrameNet is based on a theory of meaning called Frame +Semantics, deriving from the work of Charles J. Fillmore and colleagues. +The basic idea is straightforward: that the meanings of most words can +best be understood on the basis of a semantic frame: a description of a +type of event, relation, or entity and the participants in it. For +example, the concept of cooking typically involves a person doing the +cooking (Cook), the food that is to be cooked (Food), something to hold +the food while cooking (Container) and a source of heat +(Heating_instrument). In the FrameNet project, this is represented as a +frame called Apply_heat, and the Cook, Food, Heating_instrument and +Container are called frame elements (FEs). Words that evoke this frame, +such as fry, bake, boil, and broil, are called lexical units (LUs) of +the Apply_heat frame. The job of FrameNet is to define the frames +and to annotate sentences to show how the FEs fit syntactically around +the word that evokes the frame. + +------ +Frames +------ + +A Frame is a script-like conceptual structure that describes a +particular type of situation, object, or event along with the +participants and props that are needed for that Frame. For +example, the "Apply_heat" frame describes a common situation +involving a Cook, some Food, and a Heating_Instrument, and is +evoked by words such as bake, blanch, boil, broil, brown, +simmer, steam, etc. + +We call the roles of a Frame "frame elements" (FEs) and the +frame-evoking words are called "lexical units" (LUs). + +FrameNet includes relations between Frames. Several types of +relations are defined, of which the most important are: + +- Inheritance: An IS-A relation. The child frame is a subtype + of the parent frame, and each FE in the parent is bound to + a corresponding FE in the child. An example is the + "Revenge" frame which inherits from the + "Rewards_and_punishments" frame. + +- Using: The child frame presupposes the parent frame as + background, e.g the "Speed" frame "uses" (or presupposes) + the "Motion" frame; however, not all parent FEs need to be + bound to child FEs. + +- Subframe: The child frame is a subevent of a complex event + represented by the parent, e.g. the "Criminal_process" frame + has subframes of "Arrest", "Arraignment", "Trial", and + "Sentencing". + +- Perspective_on: The child frame provides a particular + perspective on an un-perspectivized parent frame. A pair of + examples consists of the "Hiring" and "Get_a_job" frames, + which perspectivize the "Employment_start" frame from the + Employer's and the Employee's point of view, respectively. + +To get a list of all of the Frames in FrameNet, you can use the +`frames()` function. If you supply a regular expression pattern to the +`frames()` function, you will get a list of all Frames whose names match +that pattern: + + >>> from pprint import pprint + >>> from operator import itemgetter + >>> from nltk.corpus import framenet as fn + >>> from nltk.corpus.reader.framenet import PrettyList + >>> x = fn.frames(r'(?i)crim') + >>> x.sort(key=itemgetter('ID')) + >>> x + [, , ...] + >>> PrettyList(sorted(x, key=itemgetter('ID'))) + [, , ...] + +To get the details of a particular Frame, you can use the `frame()` +function passing in the frame number: + + >>> from pprint import pprint + >>> from nltk.corpus import framenet as fn + >>> f = fn.frame(202) + >>> f.ID + 202 + >>> f.name + 'Arrest' + >>> f.definition # doctest: +ELLIPSIS + "Authorities charge a Suspect, who is under suspicion of having committed a crime..." + >>> len(f.lexUnit) + 11 + >>> pprint(sorted([x for x in f.FE])) + ['Authorities', + 'Charges', + 'Co-participant', + 'Manner', + 'Means', + 'Offense', + 'Place', + 'Purpose', + 'Source_of_legal_authority', + 'Suspect', + 'Time', + 'Type'] + >>> pprint(f.frameRelations) + [ Child=Arrest>, Component=Arrest>, ...] + +The `frame()` function shown above returns a dict object containing +detailed information about the Frame. See the documentation on the +`frame()` function for the specifics. + +You can also search for Frames by their Lexical Units (LUs). The +`frames_by_lemma()` function returns a list of all frames that contain +LUs in which the 'name' attribute of the LU matchs the given regular +expression. Note that LU names are composed of "lemma.POS", where the +"lemma" part can be made up of either a single lexeme (e.g. 'run') or +multiple lexemes (e.g. 'a little') (see below). + + >>> PrettyList(sorted(fn.frames_by_lemma(r'(?i)a little'), key=itemgetter('ID'))) # doctest: +ELLIPSIS + [, ] + +------------- +Lexical Units +------------- + +A lexical unit (LU) is a pairing of a word with a meaning. For +example, the "Apply_heat" Frame describes a common situation +involving a Cook, some Food, and a Heating Instrument, and is +_evoked_ by words such as bake, blanch, boil, broil, brown, +simmer, steam, etc. These frame-evoking words are the LUs in the +Apply_heat frame. Each sense of a polysemous word is a different +LU. + +We have used the word "word" in talking about LUs. The reality +is actually rather complex. When we say that the word "bake" is +polysemous, we mean that the lemma "bake.v" (which has the +word-forms "bake", "bakes", "baked", and "baking") is linked to +three different frames: + +- Apply_heat: "Michelle baked the potatoes for 45 minutes." + +- Cooking_creation: "Michelle baked her mother a cake for her birthday." + +- Absorb_heat: "The potatoes have to bake for more than 30 minutes." + +These constitute three different LUs, with different +definitions. + +Multiword expressions such as "given name" and hyphenated words +like "shut-eye" can also be LUs. Idiomatic phrases such as +"middle of nowhere" and "give the slip (to)" are also defined as +LUs in the appropriate frames ("Isolated_places" and "Evading", +respectively), and their internal structure is not analyzed. + +Framenet provides multiple annotated examples of each sense of a +word (i.e. each LU). Moreover, the set of examples +(approximately 20 per LU) illustrates all of the combinatorial +possibilities of the lexical unit. + +Each LU is linked to a Frame, and hence to the other words which +evoke that Frame. This makes the FrameNet database similar to a +thesaurus, grouping together semantically similar words. + +In the simplest case, frame-evoking words are verbs such as +"fried" in: + + "Matilde fried the catfish in a heavy iron skillet." + +Sometimes event nouns may evoke a Frame. For example, +"reduction" evokes "Cause_change_of_scalar_position" in: + + "...the reduction of debt levels to $665 million from $2.6 billion." + +Adjectives may also evoke a Frame. For example, "asleep" may +evoke the "Sleep" frame as in: + + "They were asleep for hours." + +Many common nouns, such as artifacts like "hat" or "tower", +typically serve as dependents rather than clearly evoking their +own frames. + +Details for a specific lexical unit can be obtained using this class's +`lus()` function, which takes an optional regular expression +pattern that will be matched against the name of the lexical unit: + + >>> from pprint import pprint + >>> PrettyList(sorted(fn.lus(r'(?i)a little'), key=itemgetter('ID'))) + [, , ...] + +You can obtain detailed information on a particular LU by calling the +`lu()` function and passing in an LU's 'ID' number: + + >>> from pprint import pprint + >>> from nltk.corpus import framenet as fn + >>> fn.lu(256).name + 'foresee.v' + >>> fn.lu(256).definition + 'COD: be aware of beforehand; predict.' + >>> fn.lu(256).frame.name + 'Expectation' + >>> fn.lu(256).lexemes[0].name + 'foresee' + +Note that LU names take the form of a dotted string (e.g. "run.v" or "a +little.adv") in which a lemma preceeds the "." and a part of speech +(POS) follows the dot. The lemma may be composed of a single lexeme +(e.g. "run") or of multiple lexemes (e.g. "a little"). The list of +POSs used in the LUs is: + +v - verb +n - noun +a - adjective +adv - adverb +prep - preposition +num - numbers +intj - interjection +art - article +c - conjunction +scon - subordinating conjunction + +For more detailed information about the info that is contained in the +dict that is returned by the `lu()` function, see the documentation on +the `lu()` function. + +------------------- +Annotated Documents +------------------- + +The FrameNet corpus contains a small set of annotated documents. A list +of these documents can be obtained by calling the `docs()` function: + + >>> from pprint import pprint + >>> from nltk.corpus import framenet as fn + >>> d = fn.docs('BellRinging')[0] + >>> d.corpname + 'PropBank' + >>> d.sentence[49] # doctest: +ELLIPSIS + full-text sentence (...) in BellRinging: + + + [POS] 17 tags + + [POS_tagset] PENN + + [text] + [annotationSet] + + `` I live in hopes that the ringers themselves will be drawn into + ***** ******* ***** + Desir Cause_t Cause + [1] [3] [2] + + that fuller life . + ****** + Comple + [4] + (Desir=Desiring, Cause_t=Cause_to_make_noise, Cause=Cause_motion, Comple=Completeness) + + + >>> d.sentence[49].annotationSet[1] # doctest: +ELLIPSIS + annotation set (...): + + [status] MANUAL + + [LU] (6605) hope.n in Desiring + + [frame] (366) Desiring + + [GF] 2 relations + + [PT] 2 phrases + + [text] + [Target] + [FE] + [Noun] + + `` I live in hopes that the ringers themselves will be drawn into + - ^^^^ ^^ ***** ---------------------------------------------- + E supp su Event + + that fuller life . + ----------------- + + (E=Experiencer, su=supp) + + diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/generate.doctest b/venv.bak/lib/python3.7/site-packages/nltk/test/generate.doctest new file mode 100644 index 0000000..4453518 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/test/generate.doctest @@ -0,0 +1,67 @@ +.. Copyright (C) 2001-2019 NLTK Project +.. For license information, see LICENSE.TXT + +=============================================== +Generating sentences from context-free grammars +=============================================== + +An example grammar: + + >>> from nltk.parse.generate import generate, demo_grammar + >>> from nltk import CFG + >>> grammar = CFG.fromstring(demo_grammar) + >>> print(grammar) + Grammar with 13 productions (start state = S) + S -> NP VP + NP -> Det N + PP -> P NP + VP -> 'slept' + VP -> 'saw' NP + VP -> 'walked' PP + Det -> 'the' + Det -> 'a' + N -> 'man' + N -> 'park' + N -> 'dog' + P -> 'in' + P -> 'with' + +The first 10 generated sentences: + + >>> for sentence in generate(grammar, n=10): + ... print(' '.join(sentence)) + the man slept + the man saw the man + the man saw the park + the man saw the dog + the man saw a man + the man saw a park + the man saw a dog + the man walked in the man + the man walked in the park + the man walked in the dog + +All sentences of max depth 4: + + >>> for sentence in generate(grammar, depth=4): + ... print(' '.join(sentence)) + the man slept + the park slept + the dog slept + a man slept + a park slept + a dog slept + +The number of sentences of different max depths: + + >>> len(list(generate(grammar, depth=3))) + 0 + >>> len(list(generate(grammar, depth=4))) + 6 + >>> len(list(generate(grammar, depth=5))) + 42 + >>> len(list(generate(grammar, depth=6))) + 114 + >>> len(list(generate(grammar))) + 114 + diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/gensim.doctest b/venv.bak/lib/python3.7/site-packages/nltk/test/gensim.doctest new file mode 100644 index 0000000..2e27597 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/test/gensim.doctest @@ -0,0 +1,140 @@ +.. Copyright (C) 2001-2019 NLTK Project +.. For license information, see LICENSE.TXT + +======================================= +Demonstrate word embedding using Gensim +======================================= + +We demonstrate three functions: +- Train the word embeddings using brown corpus; +- Load the pre-trained model and perform simple tasks; and +- Pruning the pre-trained binary model. + + >>> import gensim + +--------------- +Train the model +--------------- + +Here we train a word embedding using the Brown Corpus: + + >>> from nltk.corpus import brown + >>> model = gensim.models.Word2Vec(brown.sents()) + +It might take some time to train the model. So, after it is trained, it can be saved as follows: + + >>> model.save('brown.embedding') + >>> new_model = gensim.models.Word2Vec.load('brown.embedding') + +The model will be the list of words with their embedding. We can easily get the vector representation of a word. + >>> len(new_model['university']) + 100 + +There are some supporting functions already implemented in Gensim to manipulate with word embeddings. +For example, to compute the cosine similarity between 2 words: + + >>> new_model.similarity('university','school') > 0.3 + True + +--------------------------- +Using the pre-trained model +--------------------------- + +NLTK includes a pre-trained model which is part of a model that is trained on 100 billion words from the Google News Dataset. +The full model is from https://code.google.com/p/word2vec/ (about 3 GB). + + >>> from nltk.data import find + >>> word2vec_sample = str(find('models/word2vec_sample/pruned.word2vec.txt')) + >>> model = gensim.models.KeyedVectors.load_word2vec_format(word2vec_sample, binary=False) + +We pruned the model to only include the most common words (~44k words). + + >>> len(model.vocab) + 43981 + +Each word is represented in the space of 300 dimensions: + + >>> len(model['university']) + 300 + +Finding the top n words that are similar to a target word is simple. The result is the list of n words with the score. + + >>> model.most_similar(positive=['university'], topn = 3) + [(u'universities', 0.70039...), (u'faculty', 0.67809...), (u'undergraduate', 0.65870...)] + +Finding a word that is not in a list is also supported, although, implementing this by yourself is simple. + + >>> model.doesnt_match('breakfast cereal dinner lunch'.split()) + 'cereal' + +Mikolov et al. (2013) figured out that word embedding captures much of syntactic and semantic regularities. For example, +the vector 'King - Man + Woman' is close to 'Queen' and 'Germany - Berlin + Paris' is close to 'France'. + + >>> model.most_similar(positive=['woman','king'], negative=['man'], topn = 1) + [(u'queen', 0.71181...)] + + >>> model.most_similar(positive=['Paris','Germany'], negative=['Berlin'], topn = 1) + [(u'France', 0.78840...)] + +We can visualize the word embeddings using t-SNE (http://lvdmaaten.github.io/tsne/). For this demonstration, we visualize the first 1000 words. + +| import numpy as np +| labels = [] +| count = 0 +| max_count = 1000 +| X = np.zeros(shape=(max_count,len(model['university']))) +| +| for term in model.vocab: +| X[count] = model[term] +| labels.append(term) +| count+= 1 +| if count >= max_count: break +| +| # It is recommended to use PCA first to reduce to ~50 dimensions +| from sklearn.decomposition import PCA +| pca = PCA(n_components=50) +| X_50 = pca.fit_transform(X) +| +| # Using TSNE to further reduce to 2 dimensions +| from sklearn.manifold import TSNE +| model_tsne = TSNE(n_components=2, random_state=0) +| Y = model_tsne.fit_transform(X_50) +| +| # Show the scatter plot +| import matplotlib.pyplot as plt +| plt.scatter(Y[:,0], Y[:,1], 20) +| +| # Add labels +| for label, x, y in zip(labels, Y[:, 0], Y[:, 1]): +| plt.annotate(label, xy = (x,y), xytext = (0, 0), textcoords = 'offset points', size = 10) +| +| plt.show() + +------------------------------ +Prune the trained binary model +------------------------------ + +Here is the supporting code to extract part of the binary model (GoogleNews-vectors-negative300.bin.gz) from https://code.google.com/p/word2vec/ +We use this code to get the `word2vec_sample` model. + +| import gensim +| from gensim.models.word2vec import Word2Vec +| # Load the binary model +| model = Word2Vec.load_word2vec_format('GoogleNews-vectors-negative300.bin.gz', binary = True); +| +| # Only output word that appear in the Brown corpus +| from nltk.corpus import brown +| words = set(brown.words()) +| print (len(words)) +| +| # Output presented word to a temporary file +| out_file = 'pruned.word2vec.txt' +| f = open(out_file,'wb') +| +| word_presented = words.intersection(model.vocab.keys()) +| f.write('{} {}\n'.format(len(word_presented),len(model['word']))) +| +| for word in word_presented: +| f.write('{} {}\n'.format(word, ' '.join(str(value) for value in model[word]))) +| +| f.close() diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/gensim_fixt.py b/venv.bak/lib/python3.7/site-packages/nltk/test/gensim_fixt.py new file mode 100644 index 0000000..b1a6d2e --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/test/gensim_fixt.py @@ -0,0 +1,11 @@ +# -*- coding: utf-8 -*- +from __future__ import absolute_import + + +def setup_module(module): + from nose import SkipTest + + try: + import gensim + except ImportError: + raise SkipTest("Gensim doctest requires gensim") diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/gluesemantics.doctest b/venv.bak/lib/python3.7/site-packages/nltk/test/gluesemantics.doctest new file mode 100644 index 0000000..7bf29a0 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/test/gluesemantics.doctest @@ -0,0 +1,384 @@ +.. Copyright (C) 2001-2019 NLTK Project +.. For license information, see LICENSE.TXT + +============================================================================== + Glue Semantics +============================================================================== + +.. include:: ../../../nltk_book/definitions.rst + + +====================== +Linear logic +====================== + + >>> from nltk.sem import logic + >>> from nltk.sem.glue import * + >>> from nltk.sem.linearlogic import * + + >>> from nltk.sem.linearlogic import Expression + >>> read_expr = Expression.fromstring + +Parser + + >>> print(read_expr(r'f')) + f + >>> print(read_expr(r'(g -o f)')) + (g -o f) + >>> print(read_expr(r'(g -o (h -o f))')) + (g -o (h -o f)) + >>> print(read_expr(r'((g -o G) -o G)')) + ((g -o G) -o G) + >>> print(read_expr(r'(g -o f)(g)')) + (g -o f)(g) + >>> print(read_expr(r'((g -o G) -o G)((g -o f))')) + ((g -o G) -o G)((g -o f)) + +Simplify + + >>> print(read_expr(r'f').simplify()) + f + >>> print(read_expr(r'(g -o f)').simplify()) + (g -o f) + >>> print(read_expr(r'((g -o G) -o G)').simplify()) + ((g -o G) -o G) + >>> print(read_expr(r'(g -o f)(g)').simplify()) + f + >>> try: read_expr(r'(g -o f)(f)').simplify() + ... except LinearLogicApplicationException as e: print(e) + ... + Cannot apply (g -o f) to f. Cannot unify g with f given {} + >>> print(read_expr(r'(G -o f)(g)').simplify()) + f + >>> print(read_expr(r'((g -o G) -o G)((g -o f))').simplify()) + f + +Test BindingDict + + >>> h = ConstantExpression('h') + >>> g = ConstantExpression('g') + >>> f = ConstantExpression('f') + + >>> H = VariableExpression('H') + >>> G = VariableExpression('G') + >>> F = VariableExpression('F') + + >>> d1 = BindingDict({H: h}) + >>> d2 = BindingDict({F: f, G: F}) + >>> d12 = d1 + d2 + >>> all12 = ['%s: %s' % (v, d12[v]) for v in d12.d] + >>> all12.sort() + >>> print(all12) + ['F: f', 'G: f', 'H: h'] + + >>> BindingDict([(F,f),(G,g),(H,h)]) == BindingDict({F:f, G:g, H:h}) + True + + >>> d4 = BindingDict({F: f}) + >>> try: d4[F] = g + ... except VariableBindingException as e: print(e) + Variable F already bound to another value + +Test Unify + + >>> try: f.unify(g, BindingDict()) + ... except UnificationException as e: print(e) + ... + Cannot unify f with g given {} + + >>> f.unify(G, BindingDict()) == BindingDict({G: f}) + True + >>> try: f.unify(G, BindingDict({G: h})) + ... except UnificationException as e: print(e) + ... + Cannot unify f with G given {G: h} + >>> f.unify(G, BindingDict({G: f})) == BindingDict({G: f}) + True + >>> f.unify(G, BindingDict({H: f})) == BindingDict({G: f, H: f}) + True + + >>> G.unify(f, BindingDict()) == BindingDict({G: f}) + True + >>> try: G.unify(f, BindingDict({G: h})) + ... except UnificationException as e: print(e) + ... + Cannot unify G with f given {G: h} + >>> G.unify(f, BindingDict({G: f})) == BindingDict({G: f}) + True + >>> G.unify(f, BindingDict({H: f})) == BindingDict({G: f, H: f}) + True + + >>> G.unify(F, BindingDict()) == BindingDict({G: F}) + True + >>> try: G.unify(F, BindingDict({G: H})) + ... except UnificationException as e: print(e) + ... + Cannot unify G with F given {G: H} + >>> G.unify(F, BindingDict({G: F})) == BindingDict({G: F}) + True + >>> G.unify(F, BindingDict({H: F})) == BindingDict({G: F, H: F}) + True + +Test Compile + + >>> print(read_expr('g').compile_pos(Counter(), GlueFormula)) + (, []) + >>> print(read_expr('(g -o f)').compile_pos(Counter(), GlueFormula)) + (, []) + >>> print(read_expr('(g -o (h -o f))').compile_pos(Counter(), GlueFormula)) + (, []) + + +====================== +Glue +====================== + +Demo of "John walks" +-------------------- + + >>> john = GlueFormula("John", "g") + >>> print(john) + John : g + >>> walks = GlueFormula(r"\x.walks(x)", "(g -o f)") + >>> print(walks) + \x.walks(x) : (g -o f) + >>> print(walks.applyto(john)) + \x.walks(x)(John) : (g -o f)(g) + >>> print(walks.applyto(john).simplify()) + walks(John) : f + + +Demo of "A dog walks" +--------------------- + + >>> a = GlueFormula("\P Q.some x.(P(x) and Q(x))", "((gv -o gr) -o ((g -o G) -o G))") + >>> print(a) + \P Q.exists x.(P(x) & Q(x)) : ((gv -o gr) -o ((g -o G) -o G)) + >>> man = GlueFormula(r"\x.man(x)", "(gv -o gr)") + >>> print(man) + \x.man(x) : (gv -o gr) + >>> walks = GlueFormula(r"\x.walks(x)", "(g -o f)") + >>> print(walks) + \x.walks(x) : (g -o f) + >>> a_man = a.applyto(man) + >>> print(a_man.simplify()) + \Q.exists x.(man(x) & Q(x)) : ((g -o G) -o G) + >>> a_man_walks = a_man.applyto(walks) + >>> print(a_man_walks.simplify()) + exists x.(man(x) & walks(x)) : f + + +Demo of 'every girl chases a dog' +--------------------------------- + +Individual words: + + >>> every = GlueFormula("\P Q.all x.(P(x) -> Q(x))", "((gv -o gr) -o ((g -o G) -o G))") + >>> print(every) + \P Q.all x.(P(x) -> Q(x)) : ((gv -o gr) -o ((g -o G) -o G)) + >>> girl = GlueFormula(r"\x.girl(x)", "(gv -o gr)") + >>> print(girl) + \x.girl(x) : (gv -o gr) + >>> chases = GlueFormula(r"\x y.chases(x,y)", "(g -o (h -o f))") + >>> print(chases) + \x y.chases(x,y) : (g -o (h -o f)) + >>> a = GlueFormula("\P Q.some x.(P(x) and Q(x))", "((hv -o hr) -o ((h -o H) -o H))") + >>> print(a) + \P Q.exists x.(P(x) & Q(x)) : ((hv -o hr) -o ((h -o H) -o H)) + >>> dog = GlueFormula(r"\x.dog(x)", "(hv -o hr)") + >>> print(dog) + \x.dog(x) : (hv -o hr) + +Noun Quantification can only be done one way: + + >>> every_girl = every.applyto(girl) + >>> print(every_girl.simplify()) + \Q.all x.(girl(x) -> Q(x)) : ((g -o G) -o G) + >>> a_dog = a.applyto(dog) + >>> print(a_dog.simplify()) + \Q.exists x.(dog(x) & Q(x)) : ((h -o H) -o H) + +The first reading is achieved by combining 'chases' with 'a dog' first. +Since 'a girl' requires something of the form '(h -o H)' we must +get rid of the 'g' in the glue of 'see'. We will do this with +the '-o elimination' rule. So, x1 will be our subject placeholder. + + >>> xPrime = GlueFormula("x1", "g") + >>> print(xPrime) + x1 : g + >>> xPrime_chases = chases.applyto(xPrime) + >>> print(xPrime_chases.simplify()) + \y.chases(x1,y) : (h -o f) + >>> xPrime_chases_a_dog = a_dog.applyto(xPrime_chases) + >>> print(xPrime_chases_a_dog.simplify()) + exists x.(dog(x) & chases(x1,x)) : f + +Now we can retract our subject placeholder using lambda-abstraction and +combine with the true subject. + + >>> chases_a_dog = xPrime_chases_a_dog.lambda_abstract(xPrime) + >>> print(chases_a_dog.simplify()) + \x1.exists x.(dog(x) & chases(x1,x)) : (g -o f) + >>> every_girl_chases_a_dog = every_girl.applyto(chases_a_dog) + >>> r1 = every_girl_chases_a_dog.simplify() + >>> r2 = GlueFormula(r'all x.(girl(x) -> exists z1.(dog(z1) & chases(x,z1)))', 'f') + >>> r1 == r2 + True + +The second reading is achieved by combining 'every girl' with 'chases' first. + + >>> xPrime = GlueFormula("x1", "g") + >>> print(xPrime) + x1 : g + >>> xPrime_chases = chases.applyto(xPrime) + >>> print(xPrime_chases.simplify()) + \y.chases(x1,y) : (h -o f) + >>> yPrime = GlueFormula("x2", "h") + >>> print(yPrime) + x2 : h + >>> xPrime_chases_yPrime = xPrime_chases.applyto(yPrime) + >>> print(xPrime_chases_yPrime.simplify()) + chases(x1,x2) : f + >>> chases_yPrime = xPrime_chases_yPrime.lambda_abstract(xPrime) + >>> print(chases_yPrime.simplify()) + \x1.chases(x1,x2) : (g -o f) + >>> every_girl_chases_yPrime = every_girl.applyto(chases_yPrime) + >>> print(every_girl_chases_yPrime.simplify()) + all x.(girl(x) -> chases(x,x2)) : f + >>> every_girl_chases = every_girl_chases_yPrime.lambda_abstract(yPrime) + >>> print(every_girl_chases.simplify()) + \x2.all x.(girl(x) -> chases(x,x2)) : (h -o f) + >>> every_girl_chases_a_dog = a_dog.applyto(every_girl_chases) + >>> r1 = every_girl_chases_a_dog.simplify() + >>> r2 = GlueFormula(r'exists x.(dog(x) & all z2.(girl(z2) -> chases(z2,x)))', 'f') + >>> r1 == r2 + True + + +Compilation +----------- + + >>> for cp in GlueFormula('m', '(b -o a)').compile(Counter()): print(cp) + m : (b -o a) : {1} + >>> for cp in GlueFormula('m', '((c -o b) -o a)').compile(Counter()): print(cp) + v1 : c : {1} + m : (b[1] -o a) : {2} + >>> for cp in GlueFormula('m', '((d -o (c -o b)) -o a)').compile(Counter()): print(cp) + v1 : c : {1} + v2 : d : {2} + m : (b[1, 2] -o a) : {3} + >>> for cp in GlueFormula('m', '((d -o e) -o ((c -o b) -o a))').compile(Counter()): print(cp) + v1 : d : {1} + v2 : c : {2} + m : (e[1] -o (b[2] -o a)) : {3} + >>> for cp in GlueFormula('m', '(((d -o c) -o b) -o a)').compile(Counter()): print(cp) + v1 : (d -o c) : {1} + m : (b[1] -o a) : {2} + >>> for cp in GlueFormula('m', '((((e -o d) -o c) -o b) -o a)').compile(Counter()): print(cp) + v1 : e : {1} + v2 : (d[1] -o c) : {2} + m : (b[2] -o a) : {3} + + +Demo of 'a man walks' using Compilation +--------------------------------------- + +Premises + + >>> a = GlueFormula('\\P Q.some x.(P(x) and Q(x))', '((gv -o gr) -o ((g -o G) -o G))') + >>> print(a) + \P Q.exists x.(P(x) & Q(x)) : ((gv -o gr) -o ((g -o G) -o G)) + + >>> man = GlueFormula('\\x.man(x)', '(gv -o gr)') + >>> print(man) + \x.man(x) : (gv -o gr) + + >>> walks = GlueFormula('\\x.walks(x)', '(g -o f)') + >>> print(walks) + \x.walks(x) : (g -o f) + +Compiled Premises: + + >>> counter = Counter() + >>> ahc = a.compile(counter) + >>> g1 = ahc[0] + >>> print(g1) + v1 : gv : {1} + >>> g2 = ahc[1] + >>> print(g2) + v2 : g : {2} + >>> g3 = ahc[2] + >>> print(g3) + \P Q.exists x.(P(x) & Q(x)) : (gr[1] -o (G[2] -o G)) : {3} + >>> g4 = man.compile(counter)[0] + >>> print(g4) + \x.man(x) : (gv -o gr) : {4} + >>> g5 = walks.compile(counter)[0] + >>> print(g5) + \x.walks(x) : (g -o f) : {5} + +Derivation: + + >>> g14 = g4.applyto(g1) + >>> print(g14.simplify()) + man(v1) : gr : {1, 4} + >>> g134 = g3.applyto(g14) + >>> print(g134.simplify()) + \Q.exists x.(man(x) & Q(x)) : (G[2] -o G) : {1, 3, 4} + >>> g25 = g5.applyto(g2) + >>> print(g25.simplify()) + walks(v2) : f : {2, 5} + >>> g12345 = g134.applyto(g25) + >>> print(g12345.simplify()) + exists x.(man(x) & walks(x)) : f : {1, 2, 3, 4, 5} + +--------------------------------- +Dependency Graph to Glue Formulas +--------------------------------- + >>> from nltk.corpus.reader.dependency import DependencyGraph + + >>> depgraph = DependencyGraph("""1 John _ NNP NNP _ 2 SUBJ _ _ + ... 2 sees _ VB VB _ 0 ROOT _ _ + ... 3 a _ ex_quant ex_quant _ 4 SPEC _ _ + ... 4 dog _ NN NN _ 2 OBJ _ _ + ... """) + >>> gfl = GlueDict('nltk:grammars/sample_grammars/glue.semtype').to_glueformula_list(depgraph) + >>> print(gfl) # doctest: +SKIP + [\x y.sees(x,y) : (f -o (i -o g)), + \x.dog(x) : (iv -o ir), + \P Q.exists x.(P(x) & Q(x)) : ((iv -o ir) -o ((i -o I3) -o I3)), + \P Q.exists x.(P(x) & Q(x)) : ((fv -o fr) -o ((f -o F4) -o F4)), + \x.John(x) : (fv -o fr)] + >>> glue = Glue() + >>> for r in sorted([r.simplify().normalize() for r in glue.get_readings(glue.gfl_to_compiled(gfl))], key=str): + ... print(r) + exists z1.(John(z1) & exists z2.(dog(z2) & sees(z1,z2))) + exists z1.(dog(z1) & exists z2.(John(z2) & sees(z2,z1))) + +----------------------------------- +Dependency Graph to LFG f-structure +----------------------------------- + >>> from nltk.sem.lfg import FStructure + + >>> fstruct = FStructure.read_depgraph(depgraph) + + >>> print(fstruct) # doctest: +SKIP + f:[pred 'sees' + obj h:[pred 'dog' + spec 'a'] + subj g:[pred 'John']] + + >>> fstruct.to_depgraph().tree().pprint() + (sees (dog a) John) + +--------------------------------- +LFG f-structure to Glue +--------------------------------- + >>> fstruct.to_glueformula_list(GlueDict('nltk:grammars/sample_grammars/glue.semtype')) # doctest: +SKIP + [\x y.sees(x,y) : (i -o (g -o f)), + \x.dog(x) : (gv -o gr), + \P Q.exists x.(P(x) & Q(x)) : ((gv -o gr) -o ((g -o G3) -o G3)), + \P Q.exists x.(P(x) & Q(x)) : ((iv -o ir) -o ((i -o I4) -o I4)), + \x.John(x) : (iv -o ir)] + +.. see gluesemantics_malt.doctest for more diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/gluesemantics_malt.doctest b/venv.bak/lib/python3.7/site-packages/nltk/test/gluesemantics_malt.doctest new file mode 100644 index 0000000..1329794 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/test/gluesemantics_malt.doctest @@ -0,0 +1,68 @@ +.. Copyright (C) 2001-2019 NLTK Project +.. For license information, see LICENSE.TXT + +.. see also: gluesemantics.doctest + +============================================================================== + Glue Semantics +============================================================================== + + >>> from nltk.sem.glue import * + >>> nltk.sem.logic._counter._value = 0 + +-------------------------------- +Initialize the Dependency Parser +-------------------------------- + >>> from nltk.parse.malt import MaltParser + + >>> tagger = RegexpTagger( + ... [('^(John|Mary)$', 'NNP'), + ... ('^(sees|chases)$', 'VB'), + ... ('^(a)$', 'ex_quant'), + ... ('^(every)$', 'univ_quant'), + ... ('^(girl|dog)$', 'NN') + ... ]) + >>> depparser = MaltParser(tagger=tagger) + +-------------------- +Automated Derivation +-------------------- + >>> glue = Glue(depparser=depparser) + >>> readings = glue.parse_to_meaning('every girl chases a dog'.split()) + >>> for reading in sorted([r.simplify().normalize() for r in readings], key=str): + ... print(reading.normalize()) + all z1.(girl(z1) -> exists z2.(dog(z2) & chases(z1,z2))) + exists z1.(dog(z1) & all z2.(girl(z2) -> chases(z2,z1))) + + >>> drtglue = DrtGlue(depparser=depparser) + >>> readings = drtglue.parse_to_meaning('every girl chases a dog'.split()) + >>> for reading in sorted([r.simplify().normalize() for r in readings], key=str): + ... print(reading) + ([],[(([z1],[girl(z1)]) -> ([z2],[dog(z2), chases(z1,z2)]))]) + ([z1],[dog(z1), (([z2],[girl(z2)]) -> ([],[chases(z2,z1)]))]) + +-------------- +With inference +-------------- + +Checking for equality of two DRSs is very useful when generating readings of a sentence. +For example, the ``glue`` module generates two readings for the sentence +*John sees Mary*: + + >>> from nltk.sem.glue import DrtGlue + >>> readings = drtglue.parse_to_meaning('John sees Mary'.split()) + >>> for drs in sorted([r.simplify().normalize() for r in readings], key=str): + ... print(drs) + ([z1,z2],[John(z1), Mary(z2), sees(z1,z2)]) + ([z1,z2],[Mary(z1), John(z2), sees(z2,z1)]) + +However, it is easy to tell that these two readings are logically the +same, and therefore one of them is superfluous. We can use the theorem prover +to determine this equivalence, and then delete one of them. A particular +theorem prover may be specified, or the argument may be left off to use the +default. + + >>> readings[0].equiv(readings[1]) + True + + diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/gluesemantics_malt_fixt.py b/venv.bak/lib/python3.7/site-packages/nltk/test/gluesemantics_malt_fixt.py new file mode 100644 index 0000000..70e149a --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/test/gluesemantics_malt_fixt.py @@ -0,0 +1,12 @@ +# -*- coding: utf-8 -*- +from __future__ import absolute_import + + +def setup_module(module): + from nose import SkipTest + from nltk.parse.malt import MaltParser + + try: + depparser = MaltParser('maltparser-1.7.2') + except LookupError: + raise SkipTest("MaltParser is not available") diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/grammar.doctest b/venv.bak/lib/python3.7/site-packages/nltk/test/grammar.doctest new file mode 100644 index 0000000..7cae9d9 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/test/grammar.doctest @@ -0,0 +1,48 @@ +.. Copyright (C) 2001-2019 NLTK Project +.. For license information, see LICENSE.TXT + +=============== +Grammar Parsing +=============== + +Grammars can be parsed from strings: + + >>> from nltk import CFG + >>> grammar = CFG.fromstring(""" + ... S -> NP VP + ... PP -> P NP + ... NP -> Det N | NP PP + ... VP -> V NP | VP PP + ... Det -> 'a' | 'the' + ... N -> 'dog' | 'cat' + ... V -> 'chased' | 'sat' + ... P -> 'on' | 'in' + ... """) + >>> grammar + + >>> grammar.start() + S + >>> grammar.productions() # doctest: +NORMALIZE_WHITESPACE + [S -> NP VP, PP -> P NP, NP -> Det N, NP -> NP PP, VP -> V NP, VP -> VP PP, + Det -> 'a', Det -> 'the', N -> 'dog', N -> 'cat', V -> 'chased', V -> 'sat', + P -> 'on', P -> 'in'] + +Probabilistic CFGs: + + >>> from nltk import PCFG + >>> toy_pcfg1 = PCFG.fromstring(""" + ... S -> NP VP [1.0] + ... NP -> Det N [0.5] | NP PP [0.25] | 'John' [0.1] | 'I' [0.15] + ... Det -> 'the' [0.8] | 'my' [0.2] + ... N -> 'man' [0.5] | 'telescope' [0.5] + ... VP -> VP PP [0.1] | V NP [0.7] | V [0.2] + ... V -> 'ate' [0.35] | 'saw' [0.65] + ... PP -> P NP [1.0] + ... P -> 'with' [0.61] | 'under' [0.39] + ... """) + +Chomsky Normal Form grammar (Test for bug 474) + + >>> g = CFG.fromstring("VP^ -> VBP NP^") + >>> g.productions()[0].lhs() + VP^ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/grammartestsuites.doctest b/venv.bak/lib/python3.7/site-packages/nltk/test/grammartestsuites.doctest new file mode 100644 index 0000000..4221537 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/test/grammartestsuites.doctest @@ -0,0 +1,109 @@ +.. Copyright (C) 2001-2019 NLTK Project +.. For license information, see LICENSE.TXT + +========================== + Test Suites for Grammars +========================== + +Sentences in the test suite are divided into two classes: + +- grammatical (*accept*) and +- ungrammatical (*reject*). + +If a sentence should parse accordng to the grammar, the value of +``trees`` will be a non-empty list. If a sentence should be rejected +according to the grammar, then the value of ``trees`` will be ``None``. + + >>> from nltk.parse import TestGrammar + >>> germantest1 = {} + >>> germantest1['doc'] = "Tests for person agreement" + >>> germantest1['accept'] = [ + ... 'ich komme', + ... 'ich sehe mich', + ... 'du kommst', + ... 'du siehst mich', + ... 'sie kommt', + ... 'sie sieht mich', + ... 'ihr kommt', + ... 'wir kommen', + ... 'sie kommen', + ... 'du magst mich', + ... 'er mag mich', + ... 'du folgst mir', + ... 'sie hilft mir', + ... ] + >>> germantest1['reject'] = [ + ... 'ich kommt', + ... 'ich kommst', + ... 'ich siehst mich', + ... 'du komme', + ... 'du sehe mich', + ... 'du kommt', + ... 'er komme', + ... 'er siehst mich', + ... 'wir komme', + ... 'wir kommst', + ... 'die Katzen kommst', + ... 'sie komme', + ... 'sie kommst', + ... 'du mag mich', + ... 'er magst mich', + ... 'du folgt mir', + ... 'sie hilfst mir', + ... ] + >>> germantest2 = {} + >>> germantest2['doc'] = "Tests for number agreement" + >>> germantest2['accept'] = [ + ... 'der Hund kommt', + ... 'die Hunde kommen', + ... 'ich komme', + ... 'wir kommen', + ... 'ich sehe die Katzen', + ... 'ich folge den Katzen', + ... 'ich sehe die Katzen', + ... 'ich folge den Katzen', + ... 'wir sehen die Katzen', + ... 'wir folgen den Katzen' + ... ] + >>> germantest2['reject'] = [ + ... 'ich kommen', + ... 'wir komme', + ... 'der Hunde kommt', + ... 'der Hunde kommen', + ... 'die Katzen kommt', + ... 'ich sehe der Hunde', + ... 'ich folge den Hund', + ... 'ich sehen der Hunde', + ... 'ich folgen den Hund', + ... 'wir sehe die Katzen', + ... 'wir folge den Katzen' + ... ] + >>> germantest3 = {} + >>> germantest3['doc'] = "Tests for case government and subcategorization" + >>> germantest3['accept'] = [ + ... 'der Hund sieht mich', + ... 'der Hund kommt', + ... 'ich sehe den Hund', + ... 'ich helfe dem Hund', + ... ] + >>> germantest3['reject'] = [ + ... 'ich sehe', + ... 'ich helfe', + ... 'ich komme den Hund', + ... 'ich sehe den Hund die Katzen', + ... 'du hilfst mich', + ... 'du siehst mir', + ... 'du siehst ich', + ... 'der Hunde kommt mich', + ... 'die Hunde sehe die Hunde', + ... 'der Hund sehe die Hunde', + ... 'ich hilft den Hund', + ... 'ich hilft der Hund', + ... 'ich sehe dem Hund', + ... ] + >>> germantestsuites = [germantest1, germantest2, germantest3] + >>> tester = TestGrammar('grammars/book_grammars/german.fcfg', germantestsuites) + >>> tester.run() + Tests for person agreement: All tests passed! + Tests for number agreement: All tests passed! + Tests for case government and subcategorization: All tests passed! diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/index.doctest b/venv.bak/lib/python3.7/site-packages/nltk/test/index.doctest new file mode 100644 index 0000000..7ce8167 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/test/index.doctest @@ -0,0 +1,100 @@ +.. Copyright (C) 2001-2019 NLTK Project +.. For license information, see LICENSE.TXT + +.. _align howto: align.html +.. _ccg howto: ccg.html +.. _chat80 howto: chat80.html +.. _childes howto: childes.html +.. _chunk howto: chunk.html +.. _classify howto: classify.html +.. _collocations howto: collocations.html +.. _compat howto: compat.html +.. _corpus howto: corpus.html +.. _data howto: data.html +.. _dependency howto: dependency.html +.. _discourse howto: discourse.html +.. _drt howto: drt.html +.. _featgram howto: featgram.html +.. _featstruct howto: featstruct.html +.. _framenet howto: framenet.html +.. _generate howto: generate.html +.. _gluesemantics howto: gluesemantics.html +.. _gluesemantics_malt howto: gluesemantics_malt.html +.. _grammar howto: grammar.html +.. _grammartestsuites howto: grammartestsuites.html +.. _index howto: index.html +.. _inference howto: inference.html +.. _internals howto: internals.html +.. _japanese howto: japanese.html +.. _logic howto: logic.html +.. _metrics howto: metrics.html +.. _misc howto: misc.html +.. _nonmonotonic howto: nonmonotonic.html +.. _parse howto: parse.html +.. _portuguese_en howto: portuguese_en.html +.. _probability howto: probability.html +.. _propbank howto: propbank.html +.. _relextract howto: relextract.html +.. _resolution howto: resolution.html +.. _semantics howto: semantics.html +.. _simple howto: simple.html +.. _stem howto: stem.html +.. _tag howto: tag.html +.. _tokenize howto: tokenize.html +.. _toolbox howto: toolbox.html +.. _tree howto: tree.html +.. _treetransforms howto: treetransforms.html +.. _util howto: util.html +.. _wordnet howto: wordnet.html +.. _wordnet_lch howto: wordnet_lch.html + +=========== +NLTK HOWTOs +=========== + +* `align HOWTO`_ +* `ccg HOWTO`_ +* `chat80 HOWTO`_ +* `childes HOWTO`_ +* `chunk HOWTO`_ +* `classify HOWTO`_ +* `collocations HOWTO`_ +* `compat HOWTO`_ +* `corpus HOWTO`_ +* `data HOWTO`_ +* `dependency HOWTO`_ +* `discourse HOWTO`_ +* `drt HOWTO`_ +* `featgram HOWTO`_ +* `featstruct HOWTO`_ +* `framenet HOWTO`_ +* `generate HOWTO`_ +* `gluesemantics HOWTO`_ +* `gluesemantics_malt HOWTO`_ +* `grammar HOWTO`_ +* `grammartestsuites HOWTO`_ +* `index HOWTO`_ +* `inference HOWTO`_ +* `internals HOWTO`_ +* `japanese HOWTO`_ +* `logic HOWTO`_ +* `metrics HOWTO`_ +* `misc HOWTO`_ +* `nonmonotonic HOWTO`_ +* `parse HOWTO`_ +* `portuguese_en HOWTO`_ +* `probability HOWTO`_ +* `propbank HOWTO`_ +* `relextract HOWTO`_ +* `resolution HOWTO`_ +* `semantics HOWTO`_ +* `simple HOWTO`_ +* `stem HOWTO`_ +* `tag HOWTO`_ +* `tokenize HOWTO`_ +* `toolbox HOWTO`_ +* `tree HOWTO`_ +* `treetransforms HOWTO`_ +* `util HOWTO`_ +* `wordnet HOWTO`_ +* `wordnet_lch HOWTO`_ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/inference.doctest b/venv.bak/lib/python3.7/site-packages/nltk/test/inference.doctest new file mode 100644 index 0000000..c2a41a3 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/test/inference.doctest @@ -0,0 +1,534 @@ +.. Copyright (C) 2001-2019 NLTK Project +.. For license information, see LICENSE.TXT + +==================================== +Logical Inference and Model Building +==================================== + + >>> from nltk import * + >>> from nltk.sem.drt import DrtParser + >>> from nltk.sem import logic + >>> logic._counter._value = 0 + +------------ +Introduction +------------ + +Within the area of automated reasoning, first order theorem proving +and model building (or model generation) have both received much +attention, and have given rise to highly sophisticated techniques. We +focus therefore on providing an NLTK interface to third party tools +for these tasks. In particular, the module ``nltk.inference`` can be +used to access both theorem provers and model builders. + +--------------------------------- +NLTK Interface to Theorem Provers +--------------------------------- + +The main class used to interface with a theorem prover is the ``Prover`` +class, found in ``nltk.api``. The ``prove()`` method takes three optional +arguments: a goal, a list of assumptions, and a ``verbose`` boolean to +indicate whether the proof should be printed to the console. The proof goal +and any assumptions need to be instances of the ``Expression`` class +specified by ``nltk.sem.logic``. There are currently three theorem provers +included with NLTK: ``Prover9``, ``TableauProver``, and +``ResolutionProver``. The first is an off-the-shelf prover, while the other +two are written in Python and included in the ``nltk.inference`` package. + + >>> from nltk.sem import Expression + >>> read_expr = Expression.fromstring + >>> p1 = read_expr('man(socrates)') + >>> p2 = read_expr('all x.(man(x) -> mortal(x))') + >>> c = read_expr('mortal(socrates)') + >>> Prover9().prove(c, [p1,p2]) + True + >>> TableauProver().prove(c, [p1,p2]) + True + >>> ResolutionProver().prove(c, [p1,p2], verbose=True) + [1] {-mortal(socrates)} A + [2] {man(socrates)} A + [3] {-man(z2), mortal(z2)} A + [4] {-man(socrates)} (1, 3) + [5] {mortal(socrates)} (2, 3) + [6] {} (1, 5) + + True + +--------------------- +The ``ProverCommand`` +--------------------- + +A ``ProverCommand`` is a stateful holder for a theorem +prover. The command stores a theorem prover instance (of type ``Prover``), +a goal, a list of assumptions, the result of the proof, and a string version +of the entire proof. Corresponding to the three included ``Prover`` +implementations, there are three ``ProverCommand`` implementations: +``Prover9Command``, ``TableauProverCommand``, and +``ResolutionProverCommand``. + +The ``ProverCommand``'s constructor takes its goal and assumptions. The +``prove()`` command executes the ``Prover`` and ``proof()`` +returns a String form of the proof +If the ``prove()`` method has not been called, +then the prover command will be unable to display a proof. + + >>> prover = ResolutionProverCommand(c, [p1,p2]) + >>> print(prover.proof()) # doctest: +ELLIPSIS + Traceback (most recent call last): + File "...", line 1212, in __run + compileflags, 1) in test.globs + File "", line 1, in + File "...", line ..., in proof + raise LookupError("You have to call prove() first to get a proof!") + LookupError: You have to call prove() first to get a proof! + >>> prover.prove() + True + >>> print(prover.proof()) + [1] {-mortal(socrates)} A + [2] {man(socrates)} A + [3] {-man(z4), mortal(z4)} A + [4] {-man(socrates)} (1, 3) + [5] {mortal(socrates)} (2, 3) + [6] {} (1, 5) + + +The prover command stores the result of proving so that if ``prove()`` is +called again, then the command can return the result without executing the +prover again. This allows the user to access the result of the proof without +wasting time re-computing what it already knows. + + >>> prover.prove() + True + >>> prover.prove() + True + +The assumptions and goal may be accessed using the ``assumptions()`` and +``goal()`` methods, respectively. + + >>> prover.assumptions() + [, mortal(x))>] + >>> prover.goal() + + +The assumptions list may be modified using the ``add_assumptions()`` and +``retract_assumptions()`` methods. Both methods take a list of ``Expression`` +objects. Since adding or removing assumptions may change the result of the +proof, the stored result is cleared when either of these methods are called. +That means that ``proof()`` will be unavailable until ``prove()`` is called and +a call to ``prove()`` will execute the theorem prover. + + >>> prover.retract_assumptions([read_expr('man(socrates)')]) + >>> print(prover.proof()) # doctest: +ELLIPSIS + Traceback (most recent call last): + File "...", line 1212, in __run + compileflags, 1) in test.globs + File "", line 1, in + File "...", line ..., in proof + raise LookupError("You have to call prove() first to get a proof!") + LookupError: You have to call prove() first to get a proof! + >>> prover.prove() + False + >>> print(prover.proof()) + [1] {-mortal(socrates)} A + [2] {-man(z6), mortal(z6)} A + [3] {-man(socrates)} (1, 2) + + >>> prover.add_assumptions([read_expr('man(socrates)')]) + >>> prover.prove() + True + +------- +Prover9 +------- + +Prover9 Installation +~~~~~~~~~~~~~~~~~~~~ + +You can download Prover9 from http://www.cs.unm.edu/~mccune/prover9/. + +Extract the source code into a suitable directory and follow the +instructions in the Prover9 ``README.make`` file to compile the executables. +Install these into an appropriate location; the +``prover9_search`` variable is currently configured to look in the +following locations: + + >>> p = Prover9() + >>> p.binary_locations() # doctest: +NORMALIZE_WHITESPACE + ['/usr/local/bin/prover9', + '/usr/local/bin/prover9/bin', + '/usr/local/bin', + '/usr/bin', + '/usr/local/prover9', + '/usr/local/share/prover9'] + +Alternatively, the environment variable ``PROVER9HOME`` may be configured with +the binary's location. + +The path to the correct directory can be set manually in the following +manner: + + >>> config_prover9(path='/usr/local/bin') # doctest: +SKIP + [Found prover9: /usr/local/bin/prover9] + +If the executables cannot be found, ``Prover9`` will issue a warning message: + + >>> p.prove() # doctest: +SKIP + Traceback (most recent call last): + ... + LookupError: + =========================================================================== + NLTK was unable to find the prover9 executable! Use config_prover9() or + set the PROVER9HOME environment variable. + + >> config_prover9('/path/to/prover9') + + For more information, on prover9, see: + + =========================================================================== + + +Using Prover9 +~~~~~~~~~~~~~ + +The general case in theorem proving is to determine whether ``S |- g`` +holds, where ``S`` is a possibly empty set of assumptions, and ``g`` +is a proof goal. + +As mentioned earlier, NLTK input to ``Prover9`` must be +``Expression``\ s of ``nltk.sem.logic``. A ``Prover9`` instance is +initialized with a proof goal and, possibly, some assumptions. The +``prove()`` method attempts to find a proof of the goal, given the +list of assumptions (in this case, none). + + >>> goal = read_expr('(man(x) <-> --man(x))') + >>> prover = Prover9Command(goal) + >>> prover.prove() + True + +Given a ``ProverCommand`` instance ``prover``, the method +``prover.proof()`` will return a String of the extensive proof information +provided by Prover9, shown in abbreviated form here:: + + ============================== Prover9 =============================== + Prover9 (32) version ... + Process ... was started by ... on ... + ... + The command was ".../prover9 -f ...". + ============================== end of head =========================== + + ============================== INPUT ================================= + + % Reading from file /var/... + + + formulas(goals). + (all x (man(x) -> man(x))). + end_of_list. + + ... + ============================== end of search ========================= + + THEOREM PROVED + + Exiting with 1 proof. + + Process 6317 exit (max_proofs) Mon Jan 21 15:23:28 2008 + + +As mentioned earlier, we may want to list some assumptions for +the proof, as shown here. + + >>> g = read_expr('mortal(socrates)') + >>> a1 = read_expr('all x.(man(x) -> mortal(x))') + >>> prover = Prover9Command(g, assumptions=[a1]) + >>> prover.print_assumptions() + all x.(man(x) -> mortal(x)) + +However, the assumptions are not sufficient to derive the goal: + + >>> print(prover.prove()) + False + +So let's add another assumption: + + >>> a2 = read_expr('man(socrates)') + >>> prover.add_assumptions([a2]) + >>> prover.print_assumptions() + all x.(man(x) -> mortal(x)) + man(socrates) + >>> print(prover.prove()) + True + +We can also show the assumptions in ``Prover9`` format. + + >>> prover.print_assumptions(output_format='Prover9') + all x (man(x) -> mortal(x)) + man(socrates) + + >>> prover.print_assumptions(output_format='Spass') + Traceback (most recent call last): + . . . + NameError: Unrecognized value for 'output_format': Spass + +Assumptions can be retracted from the list of assumptions. + + >>> prover.retract_assumptions([a1]) + >>> prover.print_assumptions() + man(socrates) + >>> prover.retract_assumptions([a1]) + +Statements can be loaded from a file and parsed. We can then add these +statements as new assumptions. + + >>> g = read_expr('all x.(boxer(x) -> -boxerdog(x))') + >>> prover = Prover9Command(g) + >>> prover.prove() + False + >>> import nltk.data + >>> new = nltk.data.load('grammars/sample_grammars/background0.fol') + >>> for a in new: + ... print(a) + all x.(boxerdog(x) -> dog(x)) + all x.(boxer(x) -> person(x)) + all x.-(dog(x) & person(x)) + exists x.boxer(x) + exists x.boxerdog(x) + >>> prover.add_assumptions(new) + >>> print(prover.prove()) + True + >>> print(prover.proof()) # doctest: +ELLIPSIS + ============================== prooftrans ============================ + Prover9 (...) version ... + Process ... was started by ... on ... + ... + The command was ".../prover9". + ============================== end of head =========================== + + ============================== end of input ========================== + + ============================== PROOF ================================= + + % -------- Comments from original proof -------- + % Proof 1 at ... seconds. + % Length of proof is 13. + % Level of proof is 4. + % Maximum clause weight is 0.000. + % Given clauses 0. + + + 1 (all x (boxerdog(x) -> dog(x))). [assumption]. + 2 (all x (boxer(x) -> person(x))). [assumption]. + 3 (all x -(dog(x) & person(x))). [assumption]. + 6 (all x (boxer(x) -> -boxerdog(x))). [goal]. + 8 -boxerdog(x) | dog(x). [clausify(1)]. + 9 boxerdog(c3). [deny(6)]. + 11 -boxer(x) | person(x). [clausify(2)]. + 12 boxer(c3). [deny(6)]. + 14 -dog(x) | -person(x). [clausify(3)]. + 15 dog(c3). [resolve(9,a,8,a)]. + 18 person(c3). [resolve(12,a,11,a)]. + 19 -person(c3). [resolve(15,a,14,a)]. + 20 $F. [resolve(19,a,18,a)]. + + ============================== end of proof ========================== + +---------------------- +The equiv() method +---------------------- + +One application of the theorem prover functionality is to check if +two Expressions have the same meaning. +The ``equiv()`` method calls a theorem prover to determine whether two +Expressions are logically equivalent. + + >>> a = read_expr(r'exists x.(man(x) & walks(x))') + >>> b = read_expr(r'exists x.(walks(x) & man(x))') + >>> print(a.equiv(b)) + True + +The same method can be used on Discourse Representation Structures (DRSs). +In this case, each DRS is converted to a first order logic form, and then +passed to the theorem prover. + + >>> dp = DrtParser() + >>> a = dp.parse(r'([x],[man(x), walks(x)])') + >>> b = dp.parse(r'([x],[walks(x), man(x)])') + >>> print(a.equiv(b)) + True + + +-------------------------------- +NLTK Interface to Model Builders +-------------------------------- + +The top-level to model builders is parallel to that for +theorem-provers. The ``ModelBuilder`` interface is located +in ``nltk.inference.api``. It is currently only implemented by +``Mace``, which interfaces with the Mace4 model builder. + +Typically we use a model builder to show that some set of formulas has +a model, and is therefore consistent. One way of doing this is by +treating our candidate set of sentences as assumptions, and leaving +the goal unspecified. +Thus, the following interaction shows how both ``{a, c1}`` and ``{a, c2}`` +are consistent sets, since Mace succeeds in a building a +model for each of them, while ``{c1, c2}`` is inconsistent. + + >>> a3 = read_expr('exists x.(man(x) and walks(x))') + >>> c1 = read_expr('mortal(socrates)') + >>> c2 = read_expr('-mortal(socrates)') + >>> mace = Mace() + >>> print(mace.build_model(None, [a3, c1])) + True + >>> print(mace.build_model(None, [a3, c2])) + True + +We can also use the model builder as an adjunct to theorem prover. +Let's suppose we are trying to prove ``S |- g``, i.e. that ``g`` +is logically entailed by assumptions ``S = {s1, s2, ..., sn}``. +We can this same input to Mace4, and the model builder will try to +find a counterexample, that is, to show that ``g`` does *not* follow +from ``S``. So, given this input, Mace4 will try to find a model for +the set ``S' = {s1, s2, ..., sn, (not g)}``. If ``g`` fails to follow +from ``S``, then Mace4 may well return with a counterexample faster +than Prover9 concludes that it cannot find the required proof. +Conversely, if ``g`` *is* provable from ``S``, Mace4 may take a long +time unsuccessfully trying to find a counter model, and will eventually give up. + +In the following example, we see that the model builder does succeed +in building a model of the assumptions together with the negation of +the goal. That is, it succeeds in finding a model +where there is a woman that every man loves; Adam is a man; Eve is a +woman; but Adam does not love Eve. + + >>> a4 = read_expr('exists y. (woman(y) & all x. (man(x) -> love(x,y)))') + >>> a5 = read_expr('man(adam)') + >>> a6 = read_expr('woman(eve)') + >>> g = read_expr('love(adam,eve)') + >>> print(mace.build_model(g, [a4, a5, a6])) + True + +The Model Builder will fail to find a model if the assumptions do entail +the goal. Mace will continue to look for models of ever-increasing sizes +until the end_size number is reached. By default, end_size is 500, +but it can be set manually for quicker response time. + + >>> a7 = read_expr('all x.(man(x) -> mortal(x))') + >>> a8 = read_expr('man(socrates)') + >>> g2 = read_expr('mortal(socrates)') + >>> print(Mace(end_size=50).build_model(g2, [a7, a8])) + False + +There is also a ``ModelBuilderCommand`` class that, like ``ProverCommand``, +stores a ``ModelBuilder``, a goal, assumptions, a result, and a model. The +only implementation in NLTK is ``MaceCommand``. + + +----- +Mace4 +----- + +Mace4 Installation +~~~~~~~~~~~~~~~~~~ + +Mace4 is packaged with Prover9, and can be downloaded from the same +source, namely http://www.cs.unm.edu/~mccune/prover9/. It is installed +in the same manner as Prover9. + +Using Mace4 +~~~~~~~~~~~ + +Check whether Mace4 can find a model. + + >>> a = read_expr('(see(mary,john) & -(mary = john))') + >>> mb = MaceCommand(assumptions=[a]) + >>> mb.build_model() + True + +Show the model in 'tabular' format. + + >>> print(mb.model(format='tabular')) + % number = 1 + % seconds = 0 + + % Interpretation of size 2 + + john : 0 + + mary : 1 + + see : + | 0 1 + ---+---- + 0 | 0 0 + 1 | 1 0 + + +Show the model in 'tabular' format. + + >>> print(mb.model(format='cooked')) + % number = 1 + % seconds = 0 + + % Interpretation of size 2 + + john = 0. + + mary = 1. + + - see(0,0). + - see(0,1). + see(1,0). + - see(1,1). + + +The property ``valuation`` accesses the stored ``Valuation``. + + >>> print(mb.valuation) + {'john': 'a', 'mary': 'b', 'see': {('b', 'a')}} + +We can return to our earlier example and inspect the model: + + >>> mb = MaceCommand(g, assumptions=[a4, a5, a6]) + >>> m = mb.build_model() + >>> print(mb.model(format='cooked')) + % number = 1 + % seconds = 0 + + % Interpretation of size 2 + + adam = 0. + + eve = 0. + + c1 = 1. + + man(0). + - man(1). + + woman(0). + woman(1). + + - love(0,0). + love(0,1). + - love(1,0). + - love(1,1). + + +Here, we can see that ``adam`` and ``eve`` have been assigned the same +individual, namely ``0`` as value; ``0`` is both a man and a woman; a second +individual ``1`` is also a woman; and ``0`` loves ``1``. Thus, this is +an interpretation in which there is a woman that every man loves but +Adam doesn't love Eve. + +Mace can also be used with propositional logic. + + >>> p = read_expr('P') + >>> q = read_expr('Q') + >>> mb = MaceCommand(q, [p, p>-q]) + >>> mb.build_model() + True + >>> mb.valuation['P'] + True + >>> mb.valuation['Q'] + False diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/inference_fixt.py b/venv.bak/lib/python3.7/site-packages/nltk/test/inference_fixt.py new file mode 100644 index 0000000..3fe9d03 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/test/inference_fixt.py @@ -0,0 +1,15 @@ +# -*- coding: utf-8 -*- +from __future__ import absolute_import + + +def setup_module(module): + from nose import SkipTest + from nltk.inference.mace import Mace + + try: + m = Mace() + m._find_binary('mace4') + except LookupError: + raise SkipTest( + "Mace4/Prover9 is not available so inference.doctest was skipped" + ) diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/internals.doctest b/venv.bak/lib/python3.7/site-packages/nltk/test/internals.doctest new file mode 100644 index 0000000..74c2bd9 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/test/internals.doctest @@ -0,0 +1,140 @@ +.. Copyright (C) 2001-2019 NLTK Project +.. For license information, see LICENSE.TXT + +========================================== + Unit tests for the nltk.utilities module +========================================== + +overridden() +~~~~~~~~~~~~ + >>> from nltk.internals import overridden + +The typical use case is in defining methods for an interface or +abstract base class, in such a way that subclasses don't have to +implement all of the methods: + + >>> class EaterI(object): + ... '''Subclass must define eat() or batch_eat().''' + ... def eat(self, food): + ... if overridden(self.batch_eat): + ... return self.batch_eat([food])[0] + ... else: + ... raise NotImplementedError() + ... def batch_eat(self, foods): + ... return [self.eat(food) for food in foods] + +As long as a subclass implements one method, it will be used to +perform the other method: + + >>> class GoodEater1(EaterI): + ... def eat(self, food): + ... return 'yum' + >>> GoodEater1().eat('steak') + 'yum' + >>> GoodEater1().batch_eat(['steak', 'peas']) + ['yum', 'yum'] + + >>> class GoodEater2(EaterI): + ... def batch_eat(self, foods): + ... return ['yum' for food in foods] + >>> GoodEater2().eat('steak') + 'yum' + >>> GoodEater2().batch_eat(['steak', 'peas']) + ['yum', 'yum'] + +But if a subclass doesn't implement either one, then they'll get an +error when they try to call them. (nb this is better than infinite +recursion): + + >>> class BadEater1(EaterI): + ... pass + >>> BadEater1().eat('steak') + Traceback (most recent call last): + . . . + NotImplementedError + >>> BadEater1().batch_eat(['steak', 'peas']) + Traceback (most recent call last): + . . . + NotImplementedError + +Trying to use the abstract base class itself will also result in an +error: + + >>> class EaterI(EaterI): + ... pass + >>> EaterI().eat('steak') + Traceback (most recent call last): + . . . + NotImplementedError + >>> EaterI().batch_eat(['steak', 'peas']) + Traceback (most recent call last): + . . . + NotImplementedError + +It's ok to use intermediate abstract classes: + + >>> class AbstractEater(EaterI): + ... pass + + >>> class GoodEater3(AbstractEater): + ... def eat(self, food): + ... return 'yum' + ... + >>> GoodEater3().eat('steak') + 'yum' + >>> GoodEater3().batch_eat(['steak', 'peas']) + ['yum', 'yum'] + + >>> class GoodEater4(AbstractEater): + ... def batch_eat(self, foods): + ... return ['yum' for food in foods] + >>> GoodEater4().eat('steak') + 'yum' + >>> GoodEater4().batch_eat(['steak', 'peas']) + ['yum', 'yum'] + + >>> class BadEater2(AbstractEater): + ... pass + >>> BadEater2().eat('steak') + Traceback (most recent call last): + . . . + NotImplementedError + >>> BadEater2().batch_eat(['steak', 'peas']) + Traceback (most recent call last): + . . . + NotImplementedError + +Here's some extra tests: + + >>> class A(object): + ... def f(x): pass + >>> class B(A): + ... def f(x): pass + >>> class C(A): pass + >>> class D(B): pass + + >>> overridden(A().f) + False + >>> overridden(B().f) + True + >>> overridden(C().f) + False + >>> overridden(D().f) + True + +It works for classic classes, too: + + >>> class A: + ... def f(x): pass + >>> class B(A): + ... def f(x): pass + >>> class C(A): pass + >>> class D(B): pass + >>> overridden(A().f) + False + >>> overridden(B().f) + True + >>> overridden(C().f) + False + >>> overridden(D().f) + True diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/japanese.doctest b/venv.bak/lib/python3.7/site-packages/nltk/test/japanese.doctest new file mode 100644 index 0000000..181b080 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/test/japanese.doctest @@ -0,0 +1,48 @@ +.. Copyright (C) 2001-2019 NLTK Project +.. For license information, see LICENSE.TXT + +============================ +Japanese Language Processing +============================ + + >>> from nltk import * + +------------- +Corpus Access +------------- + +KNB Corpus +---------- + + >>> from nltk.corpus import knbc + +Access the words: this should produce a list of strings: + + >>> type(knbc.words()[0]) is not bytes + True + +Access the sentences: this should produce a list of lists of strings: + + >>> type(knbc.sents()[0][0]) is not bytes + True + +Access the tagged words: this should produce a list of word, tag pairs: + + >>> type(knbc.tagged_words()[0]) + <... 'tuple'> + +Access the tagged sentences: this should produce a list of lists of word, tag pairs: + + >>> type(knbc.tagged_sents()[0][0]) + <... 'tuple'> + + +JEITA Corpus +------------ + + >>> from nltk.corpus import jeita + +Access the tagged words: this should produce a list of word, tag pairs, where a tag is a string: + + >>> type(jeita.tagged_words()[0][1]) is not bytes + True diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/lm.doctest b/venv.bak/lib/python3.7/site-packages/nltk/test/lm.doctest new file mode 100644 index 0000000..f3bde33 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/test/lm.doctest @@ -0,0 +1,131 @@ +.. Copyright (C) 2001-2019 NLTK Project +.. For license information, see LICENSE.TXT + +.. -*- coding: utf-8 -*- + + +Regression Tests +================ + + +Issue 167 +--------- +https://github.com/nltk/nltk/issues/167 + + >>> from nltk.corpus import brown + >>> from nltk.lm.preprocessing import padded_everygram_pipeline + >>> ngram_order = 3 + >>> train_data, vocab_data = padded_everygram_pipeline( + ... ngram_order, + ... brown.sents(categories="news") + ... ) + + >>> from nltk.lm import WittenBellInterpolated + >>> lm = WittenBellInterpolated(ngram_order) + >>> lm.fit(train_data, vocab_data) + +Sentence containing an unseen word should result in infinite entropy because +Witten-Bell is based ultimately on MLE, which cannot handle unseen ngrams. +Crucially, it shouldn't raise any exceptions for unseen words. + + >>> from nltk.util import ngrams + >>> sent = ngrams("This is a sentence with the word aaddvark".split(), 3) + >>> lm.entropy(sent) + inf + +If we remove all unseen ngrams from the sentence, we'll get a non-infinite value +for the entropy. + + >>> sent = ngrams("This is a sentence".split(), 3) + >>> lm.entropy(sent) + 17.41365588455936 + + +Issue 367 +--------- +https://github.com/nltk/nltk/issues/367 + +Reproducing Dan Blanchard's example: +https://github.com/nltk/nltk/issues/367#issuecomment-14646110 + + >>> from nltk.lm import Lidstone, Vocabulary + >>> word_seq = list('aaaababaaccbacb') + >>> ngram_order = 2 + >>> from nltk.util import everygrams + >>> train_data = [everygrams(word_seq, max_len=ngram_order)] + >>> V = Vocabulary(['a', 'b', 'c', '']) + >>> lm = Lidstone(0.2, ngram_order, vocabulary=V) + >>> lm.fit(train_data) + +For doctest to work we have to sort the vocabulary keys. + + >>> V_keys = sorted(V) + >>> round(sum(lm.score(w, ("b",)) for w in V_keys), 6) + 1.0 + >>> round(sum(lm.score(w, ("a",)) for w in V_keys), 6) + 1.0 + + >>> [lm.score(w, ("b",)) for w in V_keys] + [0.05, 0.05, 0.8, 0.05, 0.05] + >>> [round(lm.score(w, ("a",)), 4) for w in V_keys] + [0.0222, 0.0222, 0.4667, 0.2444, 0.2444] + + +Here's reproducing @afourney's comment: +https://github.com/nltk/nltk/issues/367#issuecomment-15686289 + + >>> sent = ['foo', 'foo', 'foo', 'foo', 'bar', 'baz'] + >>> ngram_order = 3 + >>> from nltk.lm.preprocessing import padded_everygram_pipeline + >>> train_data, vocab_data = padded_everygram_pipeline(ngram_order, [sent]) + >>> from nltk.lm import Lidstone + >>> lm = Lidstone(0.2, ngram_order) + >>> lm.fit(train_data, vocab_data) + +The vocabulary includes the "UNK" symbol as well as two padding symbols. + + >>> len(lm.vocab) + 6 + >>> word = "foo" + >>> context = ("bar", "baz") + +The raw counts. + + >>> lm.context_counts(context)[word] + 0 + >>> lm.context_counts(context).N() + 1 + +Counts with Lidstone smoothing. + + >>> lm.context_counts(context)[word] + lm.gamma + 0.2 + >>> lm.context_counts(context).N() + len(lm.vocab) * lm.gamma + 2.2 + +Without any backoff, just using Lidstone smoothing, P("foo" | "bar", "baz") should be: +0.2 / 2.2 ~= 0.090909 + + >>> round(lm.score(word, context), 6) + 0.090909 + + +Issue 380 +--------- +https://github.com/nltk/nltk/issues/380 + +Reproducing setup akin to this comment: +https://github.com/nltk/nltk/issues/380#issue-12879030 + +For speed take only the first 100 sentences of reuters. Shouldn't affect the test. + >>> from nltk.corpus import reuters + >>> sents = reuters.sents()[:100] + >>> ngram_order = 3 + >>> from nltk.lm.preprocessing import padded_everygram_pipeline + >>> train_data, vocab_data = padded_everygram_pipeline(ngram_order, sents) + + >>> from nltk.lm import Lidstone + >>> lm = Lidstone(0.2, ngram_order) + >>> lm.fit(train_data, vocab_data) + >>> lm.score("said", ("",)) < 1 + True diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/logic.doctest b/venv.bak/lib/python3.7/site-packages/nltk/test/logic.doctest new file mode 100644 index 0000000..ab27009 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/test/logic.doctest @@ -0,0 +1,1098 @@ +.. Copyright (C) 2001-2019 NLTK Project +.. For license information, see LICENSE.TXT + +======================= +Logic & Lambda Calculus +======================= + +The `nltk.logic` package allows expressions of First-Order Logic (FOL) to be +parsed into ``Expression`` objects. In addition to FOL, the parser +handles lambda-abstraction with variables of higher order. + +-------- +Overview +-------- + + >>> from nltk.sem.logic import * + +The default inventory of logical constants is the following: + + >>> boolean_ops() # doctest: +NORMALIZE_WHITESPACE + negation - + conjunction & + disjunction | + implication -> + equivalence <-> + >>> equality_preds() # doctest: +NORMALIZE_WHITESPACE + equality = + inequality != + >>> binding_ops() # doctest: +NORMALIZE_WHITESPACE + existential exists + universal all + lambda \ + +---------------- +Regression Tests +---------------- + + +Untyped Logic ++++++++++++++ + +Process logical expressions conveniently: + + >>> read_expr = Expression.fromstring + +Test for equality under alpha-conversion +======================================== + + >>> e1 = read_expr('exists x.P(x)') + >>> print(e1) + exists x.P(x) + >>> e2 = e1.alpha_convert(Variable('z')) + >>> print(e2) + exists z.P(z) + >>> e1 == e2 + True + + + >>> l = read_expr(r'\X.\X.X(X)(1)').simplify() + >>> id = read_expr(r'\X.X(X)') + >>> l == id + True + +Test numerals +============= + + >>> zero = read_expr(r'\F x.x') + >>> one = read_expr(r'\F x.F(x)') + >>> two = read_expr(r'\F x.F(F(x))') + >>> three = read_expr(r'\F x.F(F(F(x)))') + >>> four = read_expr(r'\F x.F(F(F(F(x))))') + >>> succ = read_expr(r'\N F x.F(N(F,x))') + >>> plus = read_expr(r'\M N F x.M(F,N(F,x))') + >>> mult = read_expr(r'\M N F.M(N(F))') + >>> pred = read_expr(r'\N F x.(N(\G H.H(G(F)))(\u.x)(\u.u))') + >>> v1 = ApplicationExpression(succ, zero).simplify() + >>> v1 == one + True + >>> v2 = ApplicationExpression(succ, v1).simplify() + >>> v2 == two + True + >>> v3 = ApplicationExpression(ApplicationExpression(plus, v1), v2).simplify() + >>> v3 == three + True + >>> v4 = ApplicationExpression(ApplicationExpression(mult, v2), v2).simplify() + >>> v4 == four + True + >>> v5 = ApplicationExpression(pred, ApplicationExpression(pred, v4)).simplify() + >>> v5 == two + True + +Overloaded operators also exist, for convenience. + + >>> print(succ(zero).simplify() == one) + True + >>> print(plus(one,two).simplify() == three) + True + >>> print(mult(two,two).simplify() == four) + True + >>> print(pred(pred(four)).simplify() == two) + True + + >>> john = read_expr(r'john') + >>> man = read_expr(r'\x.man(x)') + >>> walk = read_expr(r'\x.walk(x)') + >>> man(john).simplify() + + >>> print(-walk(john).simplify()) + -walk(john) + >>> print((man(john) & walk(john)).simplify()) + (man(john) & walk(john)) + >>> print((man(john) | walk(john)).simplify()) + (man(john) | walk(john)) + >>> print((man(john) > walk(john)).simplify()) + (man(john) -> walk(john)) + >>> print((man(john) < walk(john)).simplify()) + (man(john) <-> walk(john)) + +Python's built-in lambda operator can also be used with Expressions + + >>> john = VariableExpression(Variable('john')) + >>> run_var = VariableExpression(Variable('run')) + >>> run = lambda x: run_var(x) + >>> run(john) + + + +``betaConversionTestSuite.pl`` +------------------------------ + +Tests based on Blackburn & Bos' book, *Representation and Inference +for Natural Language*. + + >>> x1 = read_expr(r'\P.P(mia)(\x.walk(x))').simplify() + >>> x2 = read_expr(r'walk(mia)').simplify() + >>> x1 == x2 + True + + >>> x1 = read_expr(r'exists x.(man(x) & ((\P.exists x.(woman(x) & P(x)))(\y.love(x,y))))').simplify() + >>> x2 = read_expr(r'exists x.(man(x) & exists y.(woman(y) & love(x,y)))').simplify() + >>> x1 == x2 + True + >>> x1 = read_expr(r'\a.sleep(a)(mia)').simplify() + >>> x2 = read_expr(r'sleep(mia)').simplify() + >>> x1 == x2 + True + >>> x1 = read_expr(r'\a.\b.like(b,a)(mia)').simplify() + >>> x2 = read_expr(r'\b.like(b,mia)').simplify() + >>> x1 == x2 + True + >>> x1 = read_expr(r'\a.(\b.like(b,a)(vincent))').simplify() + >>> x2 = read_expr(r'\a.like(vincent,a)').simplify() + >>> x1 == x2 + True + >>> x1 = read_expr(r'\a.((\b.like(b,a)(vincent)) & sleep(a))').simplify() + >>> x2 = read_expr(r'\a.(like(vincent,a) & sleep(a))').simplify() + >>> x1 == x2 + True + + >>> x1 = read_expr(r'(\a.\b.like(b,a)(mia)(vincent))').simplify() + >>> x2 = read_expr(r'like(vincent,mia)').simplify() + >>> x1 == x2 + True + + >>> x1 = read_expr(r'P((\a.sleep(a)(vincent)))').simplify() + >>> x2 = read_expr(r'P(sleep(vincent))').simplify() + >>> x1 == x2 + True + + >>> x1 = read_expr(r'\A.A((\b.sleep(b)(vincent)))').simplify() + >>> x2 = read_expr(r'\A.A(sleep(vincent))').simplify() + >>> x1 == x2 + True + + >>> x1 = read_expr(r'\A.A(sleep(vincent))').simplify() + >>> x2 = read_expr(r'\A.A(sleep(vincent))').simplify() + >>> x1 == x2 + True + + >>> x1 = read_expr(r'(\A.A(vincent)(\b.sleep(b)))').simplify() + >>> x2 = read_expr(r'sleep(vincent)').simplify() + >>> x1 == x2 + True + + >>> x1 = read_expr(r'\A.believe(mia,A(vincent))(\b.sleep(b))').simplify() + >>> x2 = read_expr(r'believe(mia,sleep(vincent))').simplify() + >>> x1 == x2 + True + + >>> x1 = read_expr(r'(\A.(A(vincent) & A(mia)))(\b.sleep(b))').simplify() + >>> x2 = read_expr(r'(sleep(vincent) & sleep(mia))').simplify() + >>> x1 == x2 + True + + >>> x1 = read_expr(r'\A.\B.(\C.C(A(vincent))(\d.probably(d)) & (\C.C(B(mia))(\d.improbably(d))))(\f.walk(f))(\f.talk(f))').simplify() + >>> x2 = read_expr(r'(probably(walk(vincent)) & improbably(talk(mia)))').simplify() + >>> x1 == x2 + True + + >>> x1 = read_expr(r'(\a.\b.(\C.C(a,b)(\d.\f.love(d,f))))(jules)(mia)').simplify() + >>> x2 = read_expr(r'love(jules,mia)').simplify() + >>> x1 == x2 + True + + >>> x1 = read_expr(r'(\A.\B.exists c.(A(c) & B(c)))(\d.boxer(d),\d.sleep(d))').simplify() + >>> x2 = read_expr(r'exists c.(boxer(c) & sleep(c))').simplify() + >>> x1 == x2 + True + + >>> x1 = read_expr(r'\A.Z(A)(\c.\a.like(a,c))').simplify() + >>> x2 = read_expr(r'Z(\c.\a.like(a,c))').simplify() + >>> x1 == x2 + True + + >>> x1 = read_expr(r'\A.\b.A(b)(\c.\b.like(b,c))').simplify() + >>> x2 = read_expr(r'\b.(\c.\b.like(b,c)(b))').simplify() + >>> x1 == x2 + True + + >>> x1 = read_expr(r'(\a.\b.(\C.C(a,b)(\b.\a.loves(b,a))))(jules)(mia)').simplify() + >>> x2 = read_expr(r'loves(jules,mia)').simplify() + >>> x1 == x2 + True + + >>> x1 = read_expr(r'(\A.\b.(exists b.A(b) & A(b)))(\c.boxer(c))(vincent)').simplify() + >>> x2 = read_expr(r'((exists b.boxer(b)) & boxer(vincent))').simplify() + >>> x1 == x2 + True + +Test Parser +=========== + + >>> print(read_expr(r'john')) + john + >>> print(read_expr(r'x')) + x + >>> print(read_expr(r'-man(x)')) + -man(x) + >>> print(read_expr(r'--man(x)')) + --man(x) + >>> print(read_expr(r'(man(x))')) + man(x) + >>> print(read_expr(r'((man(x)))')) + man(x) + >>> print(read_expr(r'man(x) <-> tall(x)')) + (man(x) <-> tall(x)) + >>> print(read_expr(r'(man(x) <-> tall(x))')) + (man(x) <-> tall(x)) + >>> print(read_expr(r'(man(x) & tall(x) & walks(x))')) + (man(x) & tall(x) & walks(x)) + >>> print(read_expr(r'(man(x) & tall(x) & walks(x))').first) + (man(x) & tall(x)) + >>> print(read_expr(r'man(x) | tall(x) & walks(x)')) + (man(x) | (tall(x) & walks(x))) + >>> print(read_expr(r'((man(x) & tall(x)) | walks(x))')) + ((man(x) & tall(x)) | walks(x)) + >>> print(read_expr(r'man(x) & (tall(x) | walks(x))')) + (man(x) & (tall(x) | walks(x))) + >>> print(read_expr(r'(man(x) & (tall(x) | walks(x)))')) + (man(x) & (tall(x) | walks(x))) + >>> print(read_expr(r'P(x) -> Q(x) <-> R(x) | S(x) & T(x)')) + ((P(x) -> Q(x)) <-> (R(x) | (S(x) & T(x)))) + >>> print(read_expr(r'exists x.man(x)')) + exists x.man(x) + >>> print(read_expr(r'exists x.(man(x) & tall(x))')) + exists x.(man(x) & tall(x)) + >>> print(read_expr(r'exists x.(man(x) & tall(x) & walks(x))')) + exists x.(man(x) & tall(x) & walks(x)) + >>> print(read_expr(r'-P(x) & Q(x)')) + (-P(x) & Q(x)) + >>> read_expr(r'-P(x) & Q(x)') == read_expr(r'(-P(x)) & Q(x)') + True + >>> print(read_expr(r'\x.man(x)')) + \x.man(x) + >>> print(read_expr(r'\x.man(x)(john)')) + \x.man(x)(john) + >>> print(read_expr(r'\x.man(x)(john) & tall(x)')) + (\x.man(x)(john) & tall(x)) + >>> print(read_expr(r'\x.\y.sees(x,y)')) + \x y.sees(x,y) + >>> print(read_expr(r'\x y.sees(x,y)')) + \x y.sees(x,y) + >>> print(read_expr(r'\x.\y.sees(x,y)(a)')) + (\x y.sees(x,y))(a) + >>> print(read_expr(r'\x y.sees(x,y)(a)')) + (\x y.sees(x,y))(a) + >>> print(read_expr(r'\x.\y.sees(x,y)(a)(b)')) + ((\x y.sees(x,y))(a))(b) + >>> print(read_expr(r'\x y.sees(x,y)(a)(b)')) + ((\x y.sees(x,y))(a))(b) + >>> print(read_expr(r'\x.\y.sees(x,y)(a,b)')) + ((\x y.sees(x,y))(a))(b) + >>> print(read_expr(r'\x y.sees(x,y)(a,b)')) + ((\x y.sees(x,y))(a))(b) + >>> print(read_expr(r'((\x.\y.sees(x,y))(a))(b)')) + ((\x y.sees(x,y))(a))(b) + >>> print(read_expr(r'P(x)(y)(z)')) + P(x,y,z) + >>> print(read_expr(r'P(Q)')) + P(Q) + >>> print(read_expr(r'P(Q(x))')) + P(Q(x)) + >>> print(read_expr(r'(\x.exists y.walks(x,y))(x)')) + (\x.exists y.walks(x,y))(x) + >>> print(read_expr(r'exists x.(x = john)')) + exists x.(x = john) + >>> print(read_expr(r'((\P.\Q.exists x.(P(x) & Q(x)))(\x.dog(x)))(\x.bark(x))')) + ((\P Q.exists x.(P(x) & Q(x)))(\x.dog(x)))(\x.bark(x)) + >>> a = read_expr(r'exists c.exists b.A(b,c) & A(b,c)') + >>> b = read_expr(r'(exists c.(exists b.A(b,c))) & A(b,c)') + >>> print(a == b) + True + >>> a = read_expr(r'exists c.(exists b.A(b,c) & A(b,c))') + >>> b = read_expr(r'exists c.((exists b.A(b,c)) & A(b,c))') + >>> print(a == b) + True + >>> print(read_expr(r'exists x.x = y')) + exists x.(x = y) + >>> print(read_expr('A(B)(C)')) + A(B,C) + >>> print(read_expr('(A(B))(C)')) + A(B,C) + >>> print(read_expr('A((B)(C))')) + A(B(C)) + >>> print(read_expr('A(B(C))')) + A(B(C)) + >>> print(read_expr('(A)(B(C))')) + A(B(C)) + >>> print(read_expr('(((A)))(((B))(((C))))')) + A(B(C)) + >>> print(read_expr(r'A != B')) + -(A = B) + >>> print(read_expr('P(x) & x=y & P(y)')) + (P(x) & (x = y) & P(y)) + >>> try: print(read_expr(r'\walk.walk(x)')) + ... except LogicalExpressionException as e: print(e) + 'walk' is an illegal variable name. Constants may not be abstracted. + \walk.walk(x) + ^ + >>> try: print(read_expr(r'all walk.walk(john)')) + ... except LogicalExpressionException as e: print(e) + 'walk' is an illegal variable name. Constants may not be quantified. + all walk.walk(john) + ^ + >>> try: print(read_expr(r'x(john)')) + ... except LogicalExpressionException as e: print(e) + 'x' is an illegal predicate name. Individual variables may not be used as predicates. + x(john) + ^ + + >>> from nltk.sem.logic import LogicParser # hack to give access to custom quote chars + >>> lpq = LogicParser() + >>> lpq.quote_chars = [("'", "'", "\\", False)] + >>> print(lpq.parse(r"(man(x) & 'tall\'s,' (x) & walks (x) )")) + (man(x) & tall's,(x) & walks(x)) + >>> lpq.quote_chars = [("'", "'", "\\", True)] + >>> print(lpq.parse(r"'tall\'s,'")) + 'tall\'s,' + >>> print(lpq.parse(r"'spaced name(x)'")) + 'spaced name(x)' + >>> print(lpq.parse(r"-'tall\'s,'(x)")) + -'tall\'s,'(x) + >>> print(lpq.parse(r"(man(x) & 'tall\'s,' (x) & walks (x) )")) + (man(x) & 'tall\'s,'(x) & walks(x)) + + +Simplify +======== + + >>> print(read_expr(r'\x.man(x)(john)').simplify()) + man(john) + >>> print(read_expr(r'\x.((man(x)))(john)').simplify()) + man(john) + >>> print(read_expr(r'\x.\y.sees(x,y)(john, mary)').simplify()) + sees(john,mary) + >>> print(read_expr(r'\x y.sees(x,y)(john, mary)').simplify()) + sees(john,mary) + >>> print(read_expr(r'\x.\y.sees(x,y)(john)(mary)').simplify()) + sees(john,mary) + >>> print(read_expr(r'\x y.sees(x,y)(john)(mary)').simplify()) + sees(john,mary) + >>> print(read_expr(r'\x.\y.sees(x,y)(john)').simplify()) + \y.sees(john,y) + >>> print(read_expr(r'\x y.sees(x,y)(john)').simplify()) + \y.sees(john,y) + >>> print(read_expr(r'(\x.\y.sees(x,y)(john))(mary)').simplify()) + sees(john,mary) + >>> print(read_expr(r'(\x y.sees(x,y)(john))(mary)').simplify()) + sees(john,mary) + >>> print(read_expr(r'exists x.(man(x) & (\x.exists y.walks(x,y))(x))').simplify()) + exists x.(man(x) & exists y.walks(x,y)) + >>> e1 = read_expr(r'exists x.(man(x) & (\x.exists y.walks(x,y))(y))').simplify() + >>> e2 = read_expr(r'exists x.(man(x) & exists z1.walks(y,z1))') + >>> e1 == e2 + True + >>> print(read_expr(r'(\P Q.exists x.(P(x) & Q(x)))(\x.dog(x))').simplify()) + \Q.exists x.(dog(x) & Q(x)) + >>> print(read_expr(r'((\P.\Q.exists x.(P(x) & Q(x)))(\x.dog(x)))(\x.bark(x))').simplify()) + exists x.(dog(x) & bark(x)) + >>> print(read_expr(r'\P.(P(x)(y))(\a b.Q(a,b))').simplify()) + Q(x,y) + +Replace +======= + + >>> a = read_expr(r'a') + >>> x = read_expr(r'x') + >>> y = read_expr(r'y') + >>> z = read_expr(r'z') + + >>> print(read_expr(r'man(x)').replace(x.variable, a, False)) + man(a) + >>> print(read_expr(r'(man(x) & tall(x))').replace(x.variable, a, False)) + (man(a) & tall(a)) + >>> print(read_expr(r'exists x.man(x)').replace(x.variable, a, False)) + exists x.man(x) + >>> print(read_expr(r'exists x.man(x)').replace(x.variable, a, True)) + exists a.man(a) + >>> print(read_expr(r'exists x.give(x,y,z)').replace(y.variable, a, False)) + exists x.give(x,a,z) + >>> print(read_expr(r'exists x.give(x,y,z)').replace(y.variable, a, True)) + exists x.give(x,a,z) + >>> e1 = read_expr(r'exists x.give(x,y,z)').replace(y.variable, x, False) + >>> e2 = read_expr(r'exists z1.give(z1,x,z)') + >>> e1 == e2 + True + >>> e1 = read_expr(r'exists x.give(x,y,z)').replace(y.variable, x, True) + >>> e2 = read_expr(r'exists z1.give(z1,x,z)') + >>> e1 == e2 + True + >>> print(read_expr(r'\x y z.give(x,y,z)').replace(y.variable, a, False)) + \x y z.give(x,y,z) + >>> print(read_expr(r'\x y z.give(x,y,z)').replace(y.variable, a, True)) + \x a z.give(x,a,z) + >>> print(read_expr(r'\x.\y.give(x,y,z)').replace(z.variable, a, False)) + \x y.give(x,y,a) + >>> print(read_expr(r'\x.\y.give(x,y,z)').replace(z.variable, a, True)) + \x y.give(x,y,a) + >>> e1 = read_expr(r'\x.\y.give(x,y,z)').replace(z.variable, x, False) + >>> e2 = read_expr(r'\z1.\y.give(z1,y,x)') + >>> e1 == e2 + True + >>> e1 = read_expr(r'\x.\y.give(x,y,z)').replace(z.variable, x, True) + >>> e2 = read_expr(r'\z1.\y.give(z1,y,x)') + >>> e1 == e2 + True + >>> print(read_expr(r'\x.give(x,y,z)').replace(z.variable, y, False)) + \x.give(x,y,y) + >>> print(read_expr(r'\x.give(x,y,z)').replace(z.variable, y, True)) + \x.give(x,y,y) + + >>> from nltk.sem import logic + >>> logic._counter._value = 0 + >>> e1 = read_expr('e1') + >>> e2 = read_expr('e2') + >>> print(read_expr('exists e1 e2.(walk(e1) & talk(e2))').replace(e1.variable, e2, True)) + exists e2 e01.(walk(e2) & talk(e01)) + + +Variables / Free +================ + + >>> examples = [r'walk(john)', + ... r'walk(x)', + ... r'?vp(?np)', + ... r'see(john,mary)', + ... r'exists x.walk(x)', + ... r'\x.see(john,x)', + ... r'\x.see(john,x)(mary)', + ... r'P(x)', + ... r'\P.P(x)', + ... r'aa(x,bb(y),cc(z),P(w),u)', + ... r'bo(?det(?n),@x)'] + >>> examples = [read_expr(e) for e in examples] + + >>> for e in examples: + ... print('%-25s' % e, sorted(e.free())) + walk(john) [] + walk(x) [Variable('x')] + ?vp(?np) [] + see(john,mary) [] + exists x.walk(x) [] + \x.see(john,x) [] + (\x.see(john,x))(mary) [] + P(x) [Variable('P'), Variable('x')] + \P.P(x) [Variable('x')] + aa(x,bb(y),cc(z),P(w),u) [Variable('P'), Variable('u'), Variable('w'), Variable('x'), Variable('y'), Variable('z')] + bo(?det(?n),@x) [] + + >>> for e in examples: + ... print('%-25s' % e, sorted(e.constants())) + walk(john) [Variable('john')] + walk(x) [] + ?vp(?np) [Variable('?np')] + see(john,mary) [Variable('john'), Variable('mary')] + exists x.walk(x) [] + \x.see(john,x) [Variable('john')] + (\x.see(john,x))(mary) [Variable('john'), Variable('mary')] + P(x) [] + \P.P(x) [] + aa(x,bb(y),cc(z),P(w),u) [] + bo(?det(?n),@x) [Variable('?n'), Variable('@x')] + + >>> for e in examples: + ... print('%-25s' % e, sorted(e.predicates())) + walk(john) [Variable('walk')] + walk(x) [Variable('walk')] + ?vp(?np) [Variable('?vp')] + see(john,mary) [Variable('see')] + exists x.walk(x) [Variable('walk')] + \x.see(john,x) [Variable('see')] + (\x.see(john,x))(mary) [Variable('see')] + P(x) [] + \P.P(x) [] + aa(x,bb(y),cc(z),P(w),u) [Variable('aa'), Variable('bb'), Variable('cc')] + bo(?det(?n),@x) [Variable('?det'), Variable('bo')] + + >>> for e in examples: + ... print('%-25s' % e, sorted(e.variables())) + walk(john) [] + walk(x) [Variable('x')] + ?vp(?np) [Variable('?np'), Variable('?vp')] + see(john,mary) [] + exists x.walk(x) [] + \x.see(john,x) [] + (\x.see(john,x))(mary) [] + P(x) [Variable('P'), Variable('x')] + \P.P(x) [Variable('x')] + aa(x,bb(y),cc(z),P(w),u) [Variable('P'), Variable('u'), Variable('w'), Variable('x'), Variable('y'), Variable('z')] + bo(?det(?n),@x) [Variable('?det'), Variable('?n'), Variable('@x')] + + + +`normalize` + >>> print(read_expr(r'\e083.(walk(e083, z472) & talk(e092, z938))').normalize()) + \e01.(walk(e01,z3) & talk(e02,z4)) + +Typed Logic ++++++++++++ + + >>> from nltk.sem.logic import LogicParser + >>> tlp = LogicParser(True) + >>> print(tlp.parse(r'man(x)').type) + ? + >>> print(tlp.parse(r'walk(angus)').type) + ? + >>> print(tlp.parse(r'-man(x)').type) + t + >>> print(tlp.parse(r'(man(x) <-> tall(x))').type) + t + >>> print(tlp.parse(r'exists x.(man(x) & tall(x))').type) + t + >>> print(tlp.parse(r'\x.man(x)').type) + + >>> print(tlp.parse(r'john').type) + e + >>> print(tlp.parse(r'\x y.sees(x,y)').type) + > + >>> print(tlp.parse(r'\x.man(x)(john)').type) + ? + >>> print(tlp.parse(r'\x.\y.sees(x,y)(john)').type) + + >>> print(tlp.parse(r'\x.\y.sees(x,y)(john)(mary)').type) + ? + >>> print(tlp.parse(r'\P.\Q.exists x.(P(x) & Q(x))').type) + <,<,t>> + >>> print(tlp.parse(r'\x.y').type) + + >>> print(tlp.parse(r'\P.P(x)').type) + <,?> + + >>> parsed = tlp.parse('see(john,mary)') + >>> print(parsed.type) + ? + >>> print(parsed.function) + see(john) + >>> print(parsed.function.type) + + >>> print(parsed.function.function) + see + >>> print(parsed.function.function.type) + > + + >>> parsed = tlp.parse('P(x,y)') + >>> print(parsed) + P(x,y) + >>> print(parsed.type) + ? + >>> print(parsed.function) + P(x) + >>> print(parsed.function.type) + + >>> print(parsed.function.function) + P + >>> print(parsed.function.function.type) + > + + >>> print(tlp.parse(r'P').type) + ? + + >>> print(tlp.parse(r'P', {'P': 't'}).type) + t + + >>> a = tlp.parse(r'P(x)') + >>> print(a.type) + ? + >>> print(a.function.type) + + >>> print(a.argument.type) + e + + >>> a = tlp.parse(r'-P(x)') + >>> print(a.type) + t + >>> print(a.term.type) + t + >>> print(a.term.function.type) + + >>> print(a.term.argument.type) + e + + >>> a = tlp.parse(r'P & Q') + >>> print(a.type) + t + >>> print(a.first.type) + t + >>> print(a.second.type) + t + + >>> a = tlp.parse(r'(P(x) & Q(x))') + >>> print(a.type) + t + >>> print(a.first.type) + t + >>> print(a.first.function.type) + + >>> print(a.first.argument.type) + e + >>> print(a.second.type) + t + >>> print(a.second.function.type) + + >>> print(a.second.argument.type) + e + + >>> a = tlp.parse(r'\x.P(x)') + >>> print(a.type) + + >>> print(a.term.function.type) + + >>> print(a.term.argument.type) + e + + >>> a = tlp.parse(r'\P.P(x)') + >>> print(a.type) + <,?> + >>> print(a.term.function.type) + + >>> print(a.term.argument.type) + e + + >>> a = tlp.parse(r'(\x.P(x)(john)) & Q(x)') + >>> print(a.type) + t + >>> print(a.first.type) + t + >>> print(a.first.function.type) + + >>> print(a.first.function.term.function.type) + + >>> print(a.first.function.term.argument.type) + e + >>> print(a.first.argument.type) + e + + >>> a = tlp.parse(r'\x y.P(x,y)(john)(mary) & Q(x)') + >>> print(a.type) + t + >>> print(a.first.type) + t + >>> print(a.first.function.type) + + >>> print(a.first.function.function.type) + > + + >>> a = tlp.parse(r'--P') + >>> print(a.type) + t + >>> print(a.term.type) + t + >>> print(a.term.term.type) + t + + >>> tlp.parse(r'\x y.P(x,y)').type + > + >>> tlp.parse(r'\x y.P(x,y)', {'P': '>'}).type + > + + >>> a = tlp.parse(r'\P y.P(john,y)(\x y.see(x,y))') + >>> a.type + + >>> a.function.type + <>,> + >>> a.function.term.term.function.function.type + > + >>> a.argument.type + > + + >>> a = tlp.parse(r'exists c f.(father(c) = f)') + >>> a.type + t + >>> a.term.term.type + t + >>> a.term.term.first.type + e + >>> a.term.term.first.function.type + + >>> a.term.term.second.type + e + +typecheck() + + >>> a = tlp.parse('P(x)') + >>> b = tlp.parse('Q(x)') + >>> a.type + ? + >>> c = a & b + >>> c.first.type + ? + >>> c.typecheck() # doctest: +ELLIPSIS + {...} + >>> c.first.type + t + + >>> a = tlp.parse('P(x)') + >>> b = tlp.parse('P(x) & Q(x)') + >>> a.type + ? + >>> typecheck([a,b]) # doctest: +ELLIPSIS + {...} + >>> a.type + t + + >>> e = tlp.parse(r'man(x)') + >>> print(dict((k,str(v)) for k,v in e.typecheck().items()) == {'x': 'e', 'man': ''}) + True + >>> sig = {'man': ''} + >>> e = tlp.parse(r'man(x)', sig) + >>> print(e.function.type) + + >>> print(dict((k,str(v)) for k,v in e.typecheck().items()) == {'x': 'e', 'man': ''}) + True + >>> print(e.function.type) + + >>> print(dict((k,str(v)) for k,v in e.typecheck(sig).items()) == {'x': 'e', 'man': ''}) + True + +findtype() + + >>> print(tlp.parse(r'man(x)').findtype(Variable('man'))) + + >>> print(tlp.parse(r'see(x,y)').findtype(Variable('see'))) + > + >>> print(tlp.parse(r'P(Q(R(x)))').findtype(Variable('Q'))) + ? + +reading types from strings + + >>> Type.fromstring('e') + e + >>> Type.fromstring('') + + >>> Type.fromstring('<,>') + <,> + >>> Type.fromstring('<,?>') + <,?> + +alternative type format + + >>> Type.fromstring('e').str() + 'IND' + >>> Type.fromstring('').str() + '(IND -> ANY)' + >>> Type.fromstring('<,t>').str() + '((IND -> BOOL) -> BOOL)' + +Type.__eq__() + + >>> from nltk.sem.logic import * + + >>> e = ENTITY_TYPE + >>> t = TRUTH_TYPE + >>> a = ANY_TYPE + >>> et = ComplexType(e,t) + >>> eet = ComplexType(e,ComplexType(e,t)) + >>> at = ComplexType(a,t) + >>> ea = ComplexType(e,a) + >>> aa = ComplexType(a,a) + + >>> e == e + True + >>> t == t + True + >>> e == t + False + >>> a == t + False + >>> t == a + False + >>> a == a + True + >>> et == et + True + >>> a == et + False + >>> et == a + False + >>> a == ComplexType(a,aa) + True + >>> ComplexType(a,aa) == a + True + +matches() + + >>> e.matches(t) + False + >>> a.matches(t) + True + >>> t.matches(a) + True + >>> a.matches(et) + True + >>> et.matches(a) + True + >>> ea.matches(eet) + True + >>> eet.matches(ea) + True + >>> aa.matches(et) + True + >>> aa.matches(t) + True + +Type error during parsing +========================= + + >>> try: print(tlp.parse(r'exists x y.(P(x) & P(x,y))')) + ... except InconsistentTypeHierarchyException as e: print(e) + The variable 'P' was found in multiple places with different types. + >>> try: tlp.parse(r'\x y.see(x,y)(\x.man(x))') + ... except TypeException as e: print(e) + The function '\x y.see(x,y)' is of type '>' and cannot be applied to '\x.man(x)' of type ''. Its argument must match type 'e'. + >>> try: tlp.parse(r'\P x y.-P(x,y)(\x.-man(x))') + ... except TypeException as e: print(e) + The function '\P x y.-P(x,y)' is of type '<>,>>' and cannot be applied to '\x.-man(x)' of type ''. Its argument must match type '>'. + + >>> a = tlp.parse(r'-talk(x)') + >>> signature = a.typecheck() + >>> try: print(tlp.parse(r'-talk(x,y)', signature)) + ... except InconsistentTypeHierarchyException as e: print(e) + The variable 'talk' was found in multiple places with different types. + + >>> a = tlp.parse(r'-P(x)') + >>> b = tlp.parse(r'-P(x,y)') + >>> a.typecheck() # doctest: +ELLIPSIS + {...} + >>> b.typecheck() # doctest: +ELLIPSIS + {...} + >>> try: typecheck([a,b]) + ... except InconsistentTypeHierarchyException as e: print(e) + The variable 'P' was found in multiple places with different types. + + >>> a = tlp.parse(r'P(x)') + >>> b = tlp.parse(r'P(x,y)') + >>> signature = {'P': ''} + >>> a.typecheck(signature) # doctest: +ELLIPSIS + {...} + >>> try: typecheck([a,b], signature) + ... except InconsistentTypeHierarchyException as e: print(e) + The variable 'P' was found in multiple places with different types. + +Parse errors +============ + + >>> try: read_expr(r'') + ... except LogicalExpressionException as e: print(e) + End of input found. Expression expected. + + ^ + >>> try: read_expr(r'(') + ... except LogicalExpressionException as e: print(e) + End of input found. Expression expected. + ( + ^ + >>> try: read_expr(r')') + ... except LogicalExpressionException as e: print(e) + Unexpected token: ')'. Expression expected. + ) + ^ + >>> try: read_expr(r'()') + ... except LogicalExpressionException as e: print(e) + Unexpected token: ')'. Expression expected. + () + ^ + >>> try: read_expr(r'(P(x) & Q(x)') + ... except LogicalExpressionException as e: print(e) + End of input found. Expected token ')'. + (P(x) & Q(x) + ^ + >>> try: read_expr(r'(P(x) &') + ... except LogicalExpressionException as e: print(e) + End of input found. Expression expected. + (P(x) & + ^ + >>> try: read_expr(r'(P(x) | )') + ... except LogicalExpressionException as e: print(e) + Unexpected token: ')'. Expression expected. + (P(x) | ) + ^ + >>> try: read_expr(r'P(x) ->') + ... except LogicalExpressionException as e: print(e) + End of input found. Expression expected. + P(x) -> + ^ + >>> try: read_expr(r'P(x') + ... except LogicalExpressionException as e: print(e) + End of input found. Expected token ')'. + P(x + ^ + >>> try: read_expr(r'P(x,') + ... except LogicalExpressionException as e: print(e) + End of input found. Expression expected. + P(x, + ^ + >>> try: read_expr(r'P(x,)') + ... except LogicalExpressionException as e: print(e) + Unexpected token: ')'. Expression expected. + P(x,) + ^ + >>> try: read_expr(r'exists') + ... except LogicalExpressionException as e: print(e) + End of input found. Variable and Expression expected following quantifier 'exists'. + exists + ^ + >>> try: read_expr(r'exists x') + ... except LogicalExpressionException as e: print(e) + End of input found. Expression expected. + exists x + ^ + >>> try: read_expr(r'exists x.') + ... except LogicalExpressionException as e: print(e) + End of input found. Expression expected. + exists x. + ^ + >>> try: read_expr(r'\ ') + ... except LogicalExpressionException as e: print(e) + End of input found. Variable and Expression expected following lambda operator. + \ + ^ + >>> try: read_expr(r'\ x') + ... except LogicalExpressionException as e: print(e) + End of input found. Expression expected. + \ x + ^ + >>> try: read_expr(r'\ x y') + ... except LogicalExpressionException as e: print(e) + End of input found. Expression expected. + \ x y + ^ + >>> try: read_expr(r'\ x.') + ... except LogicalExpressionException as e: print(e) + End of input found. Expression expected. + \ x. + ^ + >>> try: read_expr(r'P(x)Q(x)') + ... except LogicalExpressionException as e: print(e) + Unexpected token: 'Q'. + P(x)Q(x) + ^ + >>> try: read_expr(r'(P(x)Q(x)') + ... except LogicalExpressionException as e: print(e) + Unexpected token: 'Q'. Expected token ')'. + (P(x)Q(x) + ^ + >>> try: read_expr(r'exists x y') + ... except LogicalExpressionException as e: print(e) + End of input found. Expression expected. + exists x y + ^ + >>> try: read_expr(r'exists x y.') + ... except LogicalExpressionException as e: print(e) + End of input found. Expression expected. + exists x y. + ^ + >>> try: read_expr(r'exists x -> y') + ... except LogicalExpressionException as e: print(e) + Unexpected token: '->'. Expression expected. + exists x -> y + ^ + + + >>> try: read_expr(r'A -> ((P(x) & Q(x)) -> Z') + ... except LogicalExpressionException as e: print(e) + End of input found. Expected token ')'. + A -> ((P(x) & Q(x)) -> Z + ^ + >>> try: read_expr(r'A -> ((P(x) &) -> Z') + ... except LogicalExpressionException as e: print(e) + Unexpected token: ')'. Expression expected. + A -> ((P(x) &) -> Z + ^ + >>> try: read_expr(r'A -> ((P(x) | )) -> Z') + ... except LogicalExpressionException as e: print(e) + Unexpected token: ')'. Expression expected. + A -> ((P(x) | )) -> Z + ^ + >>> try: read_expr(r'A -> (P(x) ->) -> Z') + ... except LogicalExpressionException as e: print(e) + Unexpected token: ')'. Expression expected. + A -> (P(x) ->) -> Z + ^ + >>> try: read_expr(r'A -> (P(x) -> Z') + ... except LogicalExpressionException as e: print(e) + End of input found. Expected token ')'. + A -> (P(x) -> Z + ^ + >>> try: read_expr(r'A -> (P(x,) -> Z') + ... except LogicalExpressionException as e: print(e) + Unexpected token: ')'. Expression expected. + A -> (P(x,) -> Z + ^ + >>> try: read_expr(r'A -> (P(x,)) -> Z') + ... except LogicalExpressionException as e: print(e) + Unexpected token: ')'. Expression expected. + A -> (P(x,)) -> Z + ^ + >>> try: read_expr(r'A -> (exists) -> Z') + ... except LogicalExpressionException as e: print(e) + ')' is an illegal variable name. Constants may not be quantified. + A -> (exists) -> Z + ^ + >>> try: read_expr(r'A -> (exists x) -> Z') + ... except LogicalExpressionException as e: print(e) + Unexpected token: ')'. Expression expected. + A -> (exists x) -> Z + ^ + >>> try: read_expr(r'A -> (exists x.) -> Z') + ... except LogicalExpressionException as e: print(e) + Unexpected token: ')'. Expression expected. + A -> (exists x.) -> Z + ^ + >>> try: read_expr(r'A -> (\ ) -> Z') + ... except LogicalExpressionException as e: print(e) + ')' is an illegal variable name. Constants may not be abstracted. + A -> (\ ) -> Z + ^ + >>> try: read_expr(r'A -> (\ x) -> Z') + ... except LogicalExpressionException as e: print(e) + Unexpected token: ')'. Expression expected. + A -> (\ x) -> Z + ^ + >>> try: read_expr(r'A -> (\ x y) -> Z') + ... except LogicalExpressionException as e: print(e) + Unexpected token: ')'. Expression expected. + A -> (\ x y) -> Z + ^ + >>> try: read_expr(r'A -> (\ x.) -> Z') + ... except LogicalExpressionException as e: print(e) + Unexpected token: ')'. Expression expected. + A -> (\ x.) -> Z + ^ + >>> try: read_expr(r'A -> (P(x)Q(x)) -> Z') + ... except LogicalExpressionException as e: print(e) + Unexpected token: 'Q'. Expected token ')'. + A -> (P(x)Q(x)) -> Z + ^ + >>> try: read_expr(r'A -> ((P(x)Q(x)) -> Z') + ... except LogicalExpressionException as e: print(e) + Unexpected token: 'Q'. Expected token ')'. + A -> ((P(x)Q(x)) -> Z + ^ + >>> try: read_expr(r'A -> (all x y) -> Z') + ... except LogicalExpressionException as e: print(e) + Unexpected token: ')'. Expression expected. + A -> (all x y) -> Z + ^ + >>> try: read_expr(r'A -> (exists x y.) -> Z') + ... except LogicalExpressionException as e: print(e) + Unexpected token: ')'. Expression expected. + A -> (exists x y.) -> Z + ^ + >>> try: read_expr(r'A -> (exists x -> y) -> Z') + ... except LogicalExpressionException as e: print(e) + Unexpected token: '->'. Expression expected. + A -> (exists x -> y) -> Z + ^ + + diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/meteor.doctest b/venv.bak/lib/python3.7/site-packages/nltk/test/meteor.doctest new file mode 100644 index 0000000..2a38009 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/test/meteor.doctest @@ -0,0 +1,45 @@ +.. Copyright (C) 2001-2019 NLTK Project +.. For license information, see LICENSE.TXT + +.. -*- coding: utf-8 -*- + +============= +METEOR tests +============= + +No Allignment test +------------------ + + >>> from nltk.translate import meteor + +If the candidate has no alignment to any of the references, the METEOR score is 0. + + >>> round(meteor( + ... ['The candidate has no alignment to any of the references'], + ... 'John loves Mary' + ... ),4) + 0.0 + +Tests based on wikipedia examples +--------------------------------- + +Testing on `wikipedia examples `_ + + >>> same_res = round(meteor( + ... ['The cat sat on the mat'], + ... 'The cat sat on the mat' + ... ),4) + >>> abs(same_res - 0.9977) < 1e-2 + True + + >>> meteor( + ... ['The cat sat on the mat'], + ... 'on the mat sat the cat' + ... ) + 0.5 + + >>> round(meteor( + ... ['The cat sat on the mat'], + ... 'The cat was sat on the mat' + ... ),4) + 0.9654 diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/metrics.doctest b/venv.bak/lib/python3.7/site-packages/nltk/test/metrics.doctest new file mode 100644 index 0000000..bbfc3f3 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/test/metrics.doctest @@ -0,0 +1,293 @@ +.. Copyright (C) 2001-2019 NLTK Project +.. For license information, see LICENSE.TXT + +======= +Metrics +======= + +The `nltk.metrics` package provides a variety of *evaluation measures* +which can be used for a wide variety of NLP tasks. + + >>> from __future__ import print_function + >>> from nltk.metrics import * + +------------------ +Standard IR Scores +------------------ + +We can use standard scores from information retrieval to test the +performance of taggers, chunkers, etc. + + >>> reference = 'DET NN VB DET JJ NN NN IN DET NN'.split() + >>> test = 'DET VB VB DET NN NN NN IN DET NN'.split() + >>> print(accuracy(reference, test)) + 0.8 + + +The following measures apply to sets: + + >>> reference_set = set(reference) + >>> test_set = set(test) + >>> precision(reference_set, test_set) + 1.0 + >>> print(recall(reference_set, test_set)) + 0.8 + >>> print(f_measure(reference_set, test_set)) + 0.88888888888... + +Measuring the likelihood of the data, given probability distributions: + + >>> from nltk import FreqDist, MLEProbDist + >>> pdist1 = MLEProbDist(FreqDist("aldjfalskfjaldsf")) + >>> pdist2 = MLEProbDist(FreqDist("aldjfalssjjlldss")) + >>> print(log_likelihood(['a', 'd'], [pdist1, pdist2])) + -2.7075187496... + + +---------------- +Distance Metrics +---------------- + +String edit distance (Levenshtein): + + >>> edit_distance("rain", "shine") + 3 + >>> edit_distance_align("shine", "shine") + [(0, 0), (1, 1), (2, 2), (3, 3), (4, 4), (5, 5)] + >>> edit_distance_align("rain", "brainy") + [(0, 0), (1, 1), (1, 2), (2, 3), (3, 4), (4, 5), (4, 6)] + >>> edit_distance_align("", "brainy") + [(0, 0), (0, 1), (0, 2), (0, 3), (0, 4), (0, 5), (0, 6)] + >>> edit_distance_align("", "") + [(0, 0)] + +Other distance measures: + + >>> s1 = set([1,2,3,4]) + >>> s2 = set([3,4,5]) + >>> binary_distance(s1, s2) + 1.0 + >>> print(jaccard_distance(s1, s2)) + 0.6 + >>> print(masi_distance(s1, s2)) + 0.868 + +---------------------- +Miscellaneous Measures +---------------------- + +Rank Correlation works with two dictionaries mapping keys to ranks. +The dictionaries should have the same set of keys. + + >>> spearman_correlation({'e':1, 't':2, 'a':3}, {'e':1, 'a':2, 't':3}) + 0.5 + +Windowdiff uses a sliding window in comparing two segmentations of the same input (e.g. tokenizations, chunkings). +Segmentations are represented using strings of zeros and ones. + + >>> s1 = "000100000010" + >>> s2 = "000010000100" + >>> s3 = "100000010000" + >>> s4 = "000000000000" + >>> s5 = "111111111111" + >>> windowdiff(s1, s1, 3) + 0.0 + >>> abs(windowdiff(s1, s2, 3) - 0.3) < 1e-6 # windowdiff(s1, s2, 3) == 0.3 + True + >>> abs(windowdiff(s2, s3, 3) - 0.8) < 1e-6 # windowdiff(s2, s3, 3) == 0.8 + True + >>> windowdiff(s1, s4, 3) + 0.5 + >>> windowdiff(s1, s5, 3) + 1.0 + +---------------- +Confusion Matrix +---------------- + + >>> reference = 'This is the reference data. Testing 123. aoaeoeoe' + >>> test = 'Thos iz_the rifirenci data. Testeng 123. aoaeoeoe' + >>> print(ConfusionMatrix(reference, test)) + | . 1 2 3 T _ a c d e f g h i n o r s t z | + --+-------------------------------------------+ + |<8>. . . . . 1 . . . . . . . . . . . . . . | + . | .<2>. . . . . . . . . . . . . . . . . . . | + 1 | . .<1>. . . . . . . . . . . . . . . . . . | + 2 | . . .<1>. . . . . . . . . . . . . . . . . | + 3 | . . . .<1>. . . . . . . . . . . . . . . . | + T | . . . . .<2>. . . . . . . . . . . . . . . | + _ | . . . . . .<.>. . . . . . . . . . . . . . | + a | . . . . . . .<4>. . . . . . . . . . . . . | + c | . . . . . . . .<1>. . . . . . . . . . . . | + d | . . . . . . . . .<1>. . . . . . . . . . . | + e | . . . . . . . . . .<6>. . . 3 . . . . . . | + f | . . . . . . . . . . .<1>. . . . . . . . . | + g | . . . . . . . . . . . .<1>. . . . . . . . | + h | . . . . . . . . . . . . .<2>. . . . . . . | + i | . . . . . . . . . . 1 . . .<1>. 1 . . . . | + n | . . . . . . . . . . . . . . .<2>. . . . . | + o | . . . . . . . . . . . . . . . .<3>. . . . | + r | . . . . . . . . . . . . . . . . .<2>. . . | + s | . . . . . . . . . . . . . . . . . .<2>. 1 | + t | . . . . . . . . . . . . . . . . . . .<3>. | + z | . . . . . . . . . . . . . . . . . . . .<.>| + --+-------------------------------------------+ + (row = reference; col = test) + + + >>> cm = ConfusionMatrix(reference, test) + >>> print(cm.pretty_format(sort_by_count=True)) + | e a i o s t . T h n r 1 2 3 c d f g _ z | + --+-------------------------------------------+ + |<8>. . . . . . . . . . . . . . . . . . 1 . | + e | .<6>. 3 . . . . . . . . . . . . . . . . . | + a | . .<4>. . . . . . . . . . . . . . . . . . | + i | . 1 .<1>1 . . . . . . . . . . . . . . . . | + o | . . . .<3>. . . . . . . . . . . . . . . . | + s | . . . . .<2>. . . . . . . . . . . . . . 1 | + t | . . . . . .<3>. . . . . . . . . . . . . . | + . | . . . . . . .<2>. . . . . . . . . . . . . | + T | . . . . . . . .<2>. . . . . . . . . . . . | + h | . . . . . . . . .<2>. . . . . . . . . . . | + n | . . . . . . . . . .<2>. . . . . . . . . . | + r | . . . . . . . . . . .<2>. . . . . . . . . | + 1 | . . . . . . . . . . . .<1>. . . . . . . . | + 2 | . . . . . . . . . . . . .<1>. . . . . . . | + 3 | . . . . . . . . . . . . . .<1>. . . . . . | + c | . . . . . . . . . . . . . . .<1>. . . . . | + d | . . . . . . . . . . . . . . . .<1>. . . . | + f | . . . . . . . . . . . . . . . . .<1>. . . | + g | . . . . . . . . . . . . . . . . . .<1>. . | + _ | . . . . . . . . . . . . . . . . . . .<.>. | + z | . . . . . . . . . . . . . . . . . . . .<.>| + --+-------------------------------------------+ + (row = reference; col = test) + + + >>> print(cm.pretty_format(sort_by_count=True, truncate=10)) + | e a i o s t . T h | + --+---------------------+ + |<8>. . . . . . . . . | + e | .<6>. 3 . . . . . . | + a | . .<4>. . . . . . . | + i | . 1 .<1>1 . . . . . | + o | . . . .<3>. . . . . | + s | . . . . .<2>. . . . | + t | . . . . . .<3>. . . | + . | . . . . . . .<2>. . | + T | . . . . . . . .<2>. | + h | . . . . . . . . .<2>| + --+---------------------+ + (row = reference; col = test) + + + >>> print(cm.pretty_format(sort_by_count=True, truncate=10, values_in_chart=False)) + | 1 | + | 1 2 3 4 5 6 7 8 9 0 | + ---+---------------------+ + 1 |<8>. . . . . . . . . | + 2 | .<6>. 3 . . . . . . | + 3 | . .<4>. . . . . . . | + 4 | . 1 .<1>1 . . . . . | + 5 | . . . .<3>. . . . . | + 6 | . . . . .<2>. . . . | + 7 | . . . . . .<3>. . . | + 8 | . . . . . . .<2>. . | + 9 | . . . . . . . .<2>. | + 10 | . . . . . . . . .<2>| + ---+---------------------+ + (row = reference; col = test) + Value key: + 1: + 2: e + 3: a + 4: i + 5: o + 6: s + 7: t + 8: . + 9: T + 10: h + + + +-------------------- +Association measures +-------------------- + +These measures are useful to determine whether the coocurrence of two random +events is meaningful. They are used, for instance, to distinguish collocations +from other pairs of adjacent words. + +We bring some examples of bigram association calculations from Manning and +Schutze's SNLP, 2nd Ed. chapter 5. + + >>> n_new_companies, n_new, n_companies, N = 8, 15828, 4675, 14307668 + >>> bam = BigramAssocMeasures + >>> bam.raw_freq(20, (42, 20), N) == 20. / N + True + >>> bam.student_t(n_new_companies, (n_new, n_companies), N) + 0.999... + >>> bam.chi_sq(n_new_companies, (n_new, n_companies), N) + 1.54... + >>> bam.likelihood_ratio(150, (12593, 932), N) + 1291... + +For other associations, we ensure the ordering of the measures: + + >>> bam.mi_like(20, (42, 20), N) > bam.mi_like(20, (41, 27), N) + True + >>> bam.pmi(20, (42, 20), N) > bam.pmi(20, (41, 27), N) + True + >>> bam.phi_sq(20, (42, 20), N) > bam.phi_sq(20, (41, 27), N) + True + >>> bam.poisson_stirling(20, (42, 20), N) > bam.poisson_stirling(20, (41, 27), N) + True + >>> bam.jaccard(20, (42, 20), N) > bam.jaccard(20, (41, 27), N) + True + >>> bam.dice(20, (42, 20), N) > bam.dice(20, (41, 27), N) + True + >>> bam.fisher(20, (42, 20), N) > bam.fisher(20, (41, 27), N) # doctest: +SKIP + False + +For trigrams, we have to provide more count information: + + >>> n_w1_w2_w3 = 20 + >>> n_w1_w2, n_w1_w3, n_w2_w3 = 35, 60, 40 + >>> pair_counts = (n_w1_w2, n_w1_w3, n_w2_w3) + >>> n_w1, n_w2, n_w3 = 100, 200, 300 + >>> uni_counts = (n_w1, n_w2, n_w3) + >>> N = 14307668 + >>> tam = TrigramAssocMeasures + >>> tam.raw_freq(n_w1_w2_w3, pair_counts, uni_counts, N) == 1. * n_w1_w2_w3 / N + True + >>> uni_counts2 = (n_w1, n_w2, 100) + >>> tam.student_t(n_w1_w2_w3, pair_counts, uni_counts2, N) > tam.student_t(n_w1_w2_w3, pair_counts, uni_counts, N) + True + >>> tam.chi_sq(n_w1_w2_w3, pair_counts, uni_counts2, N) > tam.chi_sq(n_w1_w2_w3, pair_counts, uni_counts, N) + True + >>> tam.mi_like(n_w1_w2_w3, pair_counts, uni_counts2, N) > tam.mi_like(n_w1_w2_w3, pair_counts, uni_counts, N) + True + >>> tam.pmi(n_w1_w2_w3, pair_counts, uni_counts2, N) > tam.pmi(n_w1_w2_w3, pair_counts, uni_counts, N) + True + >>> tam.likelihood_ratio(n_w1_w2_w3, pair_counts, uni_counts2, N) > tam.likelihood_ratio(n_w1_w2_w3, pair_counts, uni_counts, N) + True + >>> tam.poisson_stirling(n_w1_w2_w3, pair_counts, uni_counts2, N) > tam.poisson_stirling(n_w1_w2_w3, pair_counts, uni_counts, N) + True + >>> tam.jaccard(n_w1_w2_w3, pair_counts, uni_counts2, N) > tam.jaccard(n_w1_w2_w3, pair_counts, uni_counts, N) + True + + +For fourgrams, we have to provide more count information: + + >>> n_w1_w2_w3_w4 = 5 + >>> n_w1_w2, n_w1_w3, n_w2_w3 = 35, 60, 40 + >>> n_w1_w2_w3, n_w2_w3_w4 = 20, 10 + >>> pair_counts = (n_w1_w2, n_w1_w3, n_w2_w3) + >>> triplet_counts = (n_w1_w2_w3, n_w2_w3_w4) + >>> n_w1, n_w2, n_w3, n_w4 = 100, 200, 300, 400 + >>> uni_counts = (n_w1, n_w2, n_w3, n_w4) + >>> N = 14307668 + >>> qam = QuadgramAssocMeasures + >>> qam.raw_freq(n_w1_w2_w3_w4, pair_counts, triplet_counts, uni_counts, N) == 1. * n_w1_w2_w3_w4 / N + True diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/misc.doctest b/venv.bak/lib/python3.7/site-packages/nltk/test/misc.doctest new file mode 100644 index 0000000..71343b3 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/test/misc.doctest @@ -0,0 +1,118 @@ +.. Copyright (C) 2001-2019 NLTK Project +.. For license information, see LICENSE.TXT + +-------------------------------------------------------------------------------- +Unit tests for the miscellaneous sort functions. +-------------------------------------------------------------------------------- + + >>> from copy import deepcopy + >>> from nltk.misc.sort import * + +A (very) small list of unsorted integers. + + >>> test_data = [12, 67, 7, 28, 92, 56, 53, 720, 91, 57, 20, 20] + +Test each sorting method - each method returns the number of operations +required to sort the data, and sorts in-place (desctructively - hence the need +for multiple copies). + + >>> sorted_data = deepcopy(test_data) + >>> selection(sorted_data) + 66 + + >>> sorted_data + [7, 12, 20, 20, 28, 53, 56, 57, 67, 91, 92, 720] + + >>> sorted_data = deepcopy(test_data) + >>> bubble(sorted_data) + 30 + + >>> sorted_data + [7, 12, 20, 20, 28, 53, 56, 57, 67, 91, 92, 720] + + >>> sorted_data = deepcopy(test_data) + >>> merge(sorted_data) + 30 + + >>> sorted_data + [7, 12, 20, 20, 28, 53, 56, 57, 67, 91, 92, 720] + + >>> sorted_data = deepcopy(test_data) + >>> quick(sorted_data) + 13 + + >>> sorted_data + [7, 12, 20, 20, 28, 53, 56, 57, 67, 91, 92, 720] + +-------------------------------------------------------------------------------- +Unit tests for Wordfinder class +-------------------------------------------------------------------------------- + + >>> import random + + >>> # The following is not enough for reproducibility under Python 2/3 + >>> # (see http://bugs.python.org/issue9025) so this test is skipped. + >>> random.seed(12345) + + >>> from nltk.misc import wordfinder + >>> wordfinder.word_finder() # doctest: +SKIP + Word Finder + + J V L A I R O T A T I S I V O D E R E T + H U U B E A R O E P O C S O R E T N E P + A D A U Z E E S R A P P A L L M E N T R + C X A D Q S Z T P E O R S N G P J A D E + I G Y K K T I A A R G F I D T E L C N S + R E C N B H T R L T N N B W N T A O A I + A Y I L O E I A M E I A A Y U R P L L D + G L T V S T S F E A D I P H D O O H N I + R L S E C I N I L R N N M E C G R U E A + A A Y G I C E N L L E O I G Q R T A E L + M R C E T I S T A E T L L E U A E N R L + O U O T A S E E C S O O N H Y P A T G Y + E M H O M M D R E S F P U L T H C F N V + L A C A I M A M A N L B R U T E D O M I + O R I L N E E E E E U A R S C R Y L I P + H T R K E S N N M S I L A S R E V I N U + T X T A A O U T K S E T A R R E S I B J + A E D L E L J I F O O R P E L K N I R W + K H A I D E Q O P R I C K T I M B E R P + Z K D O O H G N I H T U R V E Y D R O P + + 1: INTERCHANGER + 2: TEARLESSNESS + 3: UNIVERSALISM + 4: DESENSITIZER + 5: INTERMENTION + 6: TRICHOCYSTIC + 7: EXTRAMURALLY + 8: VEGETOALKALI + 9: PALMELLACEAE + 10: AESTHETICISM + 11: PETROGRAPHER + 12: VISITATORIAL + 13: OLEOMARGARIC + 14: WRINKLEPROOF + 15: PRICKTIMBER + 16: PRESIDIALLY + 17: SCITAMINEAE + 18: ENTEROSCOPE + 19: APPALLMENT + 20: TURVEYDROP + 21: THINGHOOD + 22: BISERRATE + 23: GREENLAND + 24: BRUTEDOM + 25: POLONIAN + 26: ACOLHUAN + 27: LAPORTEA + 28: TENDING + 29: TEREDO + 30: MESOLE + 31: UNLIMP + 32: OSTARA + 33: PILY + 34: DUNT + 35: ONYX + 36: KATH + 37: JUNE diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/nonmonotonic.doctest b/venv.bak/lib/python3.7/site-packages/nltk/test/nonmonotonic.doctest new file mode 100644 index 0000000..be761b3 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/test/nonmonotonic.doctest @@ -0,0 +1,286 @@ +.. Copyright (C) 2001-2019 NLTK Project +.. For license information, see LICENSE.TXT + +====================== +Nonmonotonic Reasoning +====================== + + >>> from nltk import * + >>> from nltk.inference.nonmonotonic import * + >>> from nltk.sem import logic + >>> logic._counter._value = 0 + >>> read_expr = logic.Expression.fromstring + +------------------------ +Closed Domain Assumption +------------------------ + +The only entities in the domain are those found in the assumptions or goal. +If the domain only contains "A" and "B", then the expression "exists x.P(x)" can +be replaced with "P(A) | P(B)" and an expression "all x.P(x)" can be replaced +with "P(A) & P(B)". + + >>> p1 = read_expr(r'all x.(man(x) -> mortal(x))') + >>> p2 = read_expr(r'man(Socrates)') + >>> c = read_expr(r'mortal(Socrates)') + >>> prover = Prover9Command(c, [p1,p2]) + >>> prover.prove() + True + >>> cdp = ClosedDomainProver(prover) + >>> for a in cdp.assumptions(): print(a) # doctest: +SKIP + (man(Socrates) -> mortal(Socrates)) + man(Socrates) + >>> cdp.prove() + True + + >>> p1 = read_expr(r'exists x.walk(x)') + >>> p2 = read_expr(r'man(Socrates)') + >>> c = read_expr(r'walk(Socrates)') + >>> prover = Prover9Command(c, [p1,p2]) + >>> prover.prove() + False + >>> cdp = ClosedDomainProver(prover) + >>> for a in cdp.assumptions(): print(a) # doctest: +SKIP + walk(Socrates) + man(Socrates) + >>> cdp.prove() + True + + >>> p1 = read_expr(r'exists x.walk(x)') + >>> p2 = read_expr(r'man(Socrates)') + >>> p3 = read_expr(r'-walk(Bill)') + >>> c = read_expr(r'walk(Socrates)') + >>> prover = Prover9Command(c, [p1,p2,p3]) + >>> prover.prove() + False + >>> cdp = ClosedDomainProver(prover) + >>> for a in cdp.assumptions(): print(a) # doctest: +SKIP + (walk(Socrates) | walk(Bill)) + man(Socrates) + -walk(Bill) + >>> cdp.prove() + True + + >>> p1 = read_expr(r'walk(Socrates)') + >>> p2 = read_expr(r'walk(Bill)') + >>> c = read_expr(r'all x.walk(x)') + >>> prover = Prover9Command(c, [p1,p2]) + >>> prover.prove() + False + >>> cdp = ClosedDomainProver(prover) + >>> for a in cdp.assumptions(): print(a) # doctest: +SKIP + walk(Socrates) + walk(Bill) + >>> print(cdp.goal()) # doctest: +SKIP + (walk(Socrates) & walk(Bill)) + >>> cdp.prove() + True + + >>> p1 = read_expr(r'girl(mary)') + >>> p2 = read_expr(r'dog(rover)') + >>> p3 = read_expr(r'all x.(girl(x) -> -dog(x))') + >>> p4 = read_expr(r'all x.(dog(x) -> -girl(x))') + >>> p5 = read_expr(r'chase(mary, rover)') + >>> c = read_expr(r'exists y.(dog(y) & all x.(girl(x) -> chase(x,y)))') + >>> prover = Prover9Command(c, [p1,p2,p3,p4,p5]) + >>> print(prover.prove()) + False + >>> cdp = ClosedDomainProver(prover) + >>> for a in cdp.assumptions(): print(a) # doctest: +SKIP + girl(mary) + dog(rover) + ((girl(rover) -> -dog(rover)) & (girl(mary) -> -dog(mary))) + ((dog(rover) -> -girl(rover)) & (dog(mary) -> -girl(mary))) + chase(mary,rover) + >>> print(cdp.goal()) # doctest: +SKIP + ((dog(rover) & (girl(rover) -> chase(rover,rover)) & (girl(mary) -> chase(mary,rover))) | (dog(mary) & (girl(rover) -> chase(rover,mary)) & (girl(mary) -> chase(mary,mary)))) + >>> print(cdp.prove()) + True + +----------------------- +Unique Names Assumption +----------------------- + +No two entities in the domain represent the same entity unless it can be +explicitly proven that they do. Therefore, if the domain contains "A" and "B", +then add the assumption "-(A = B)" if it is not the case that +" \|- (A = B)". + + >>> p1 = read_expr(r'man(Socrates)') + >>> p2 = read_expr(r'man(Bill)') + >>> c = read_expr(r'exists x.exists y.-(x = y)') + >>> prover = Prover9Command(c, [p1,p2]) + >>> prover.prove() + False + >>> unp = UniqueNamesProver(prover) + >>> for a in unp.assumptions(): print(a) # doctest: +SKIP + man(Socrates) + man(Bill) + -(Socrates = Bill) + >>> unp.prove() + True + + >>> p1 = read_expr(r'all x.(walk(x) -> (x = Socrates))') + >>> p2 = read_expr(r'Bill = William') + >>> p3 = read_expr(r'Bill = Billy') + >>> c = read_expr(r'-walk(William)') + >>> prover = Prover9Command(c, [p1,p2,p3]) + >>> prover.prove() + False + >>> unp = UniqueNamesProver(prover) + >>> for a in unp.assumptions(): print(a) # doctest: +SKIP + all x.(walk(x) -> (x = Socrates)) + (Bill = William) + (Bill = Billy) + -(William = Socrates) + -(Billy = Socrates) + -(Socrates = Bill) + >>> unp.prove() + True + +----------------------- +Closed World Assumption +----------------------- + +The only entities that have certain properties are those that is it stated +have the properties. We accomplish this assumption by "completing" predicates. + +If the assumptions contain "P(A)", then "all x.(P(x) -> (x=A))" is the completion +of "P". If the assumptions contain "all x.(ostrich(x) -> bird(x))", then +"all x.(bird(x) -> ostrich(x))" is the completion of "bird". If the +assumptions don't contain anything that are "P", then "all x.-P(x)" is the +completion of "P". + + >>> p1 = read_expr(r'walk(Socrates)') + >>> p2 = read_expr(r'-(Socrates = Bill)') + >>> c = read_expr(r'-walk(Bill)') + >>> prover = Prover9Command(c, [p1,p2]) + >>> prover.prove() + False + >>> cwp = ClosedWorldProver(prover) + >>> for a in cwp.assumptions(): print(a) # doctest: +SKIP + walk(Socrates) + -(Socrates = Bill) + all z1.(walk(z1) -> (z1 = Socrates)) + >>> cwp.prove() + True + + >>> p1 = read_expr(r'see(Socrates, John)') + >>> p2 = read_expr(r'see(John, Mary)') + >>> p3 = read_expr(r'-(Socrates = John)') + >>> p4 = read_expr(r'-(John = Mary)') + >>> c = read_expr(r'-see(Socrates, Mary)') + >>> prover = Prover9Command(c, [p1,p2,p3,p4]) + >>> prover.prove() + False + >>> cwp = ClosedWorldProver(prover) + >>> for a in cwp.assumptions(): print(a) # doctest: +SKIP + see(Socrates,John) + see(John,Mary) + -(Socrates = John) + -(John = Mary) + all z3 z4.(see(z3,z4) -> (((z3 = Socrates) & (z4 = John)) | ((z3 = John) & (z4 = Mary)))) + >>> cwp.prove() + True + + >>> p1 = read_expr(r'all x.(ostrich(x) -> bird(x))') + >>> p2 = read_expr(r'bird(Tweety)') + >>> p3 = read_expr(r'-ostrich(Sam)') + >>> p4 = read_expr(r'Sam != Tweety') + >>> c = read_expr(r'-bird(Sam)') + >>> prover = Prover9Command(c, [p1,p2,p3,p4]) + >>> prover.prove() + False + >>> cwp = ClosedWorldProver(prover) + >>> for a in cwp.assumptions(): print(a) # doctest: +SKIP + all x.(ostrich(x) -> bird(x)) + bird(Tweety) + -ostrich(Sam) + -(Sam = Tweety) + all z7.-ostrich(z7) + all z8.(bird(z8) -> ((z8 = Tweety) | ostrich(z8))) + >>> print(cwp.prove()) + True + +----------------------- +Multi-Decorator Example +----------------------- + +Decorators can be nested to utilize multiple assumptions. + + >>> p1 = read_expr(r'see(Socrates, John)') + >>> p2 = read_expr(r'see(John, Mary)') + >>> c = read_expr(r'-see(Socrates, Mary)') + >>> prover = Prover9Command(c, [p1,p2]) + >>> print(prover.prove()) + False + >>> cmd = ClosedDomainProver(UniqueNamesProver(ClosedWorldProver(prover))) + >>> print(cmd.prove()) + True + +----------------- +Default Reasoning +----------------- + >>> logic._counter._value = 0 + >>> premises = [] + +define the taxonomy + >>> premises.append(read_expr(r'all x.(elephant(x) -> animal(x))')) + >>> premises.append(read_expr(r'all x.(bird(x) -> animal(x))')) + >>> premises.append(read_expr(r'all x.(dove(x) -> bird(x))')) + >>> premises.append(read_expr(r'all x.(ostrich(x) -> bird(x))')) + >>> premises.append(read_expr(r'all x.(flying_ostrich(x) -> ostrich(x))')) + +default the properties using abnormalities + >>> premises.append(read_expr(r'all x.((animal(x) & -Ab1(x)) -> -fly(x))')) #normal animals don't fly + >>> premises.append(read_expr(r'all x.((bird(x) & -Ab2(x)) -> fly(x))')) #normal birds fly + >>> premises.append(read_expr(r'all x.((ostrich(x) & -Ab3(x)) -> -fly(x))')) #normal ostriches don't fly + +specify abnormal entities + >>> premises.append(read_expr(r'all x.(bird(x) -> Ab1(x))')) #flight + >>> premises.append(read_expr(r'all x.(ostrich(x) -> Ab2(x))')) #non-flying bird + >>> premises.append(read_expr(r'all x.(flying_ostrich(x) -> Ab3(x))')) #flying ostrich + +define entities + >>> premises.append(read_expr(r'elephant(el)')) + >>> premises.append(read_expr(r'dove(do)')) + >>> premises.append(read_expr(r'ostrich(os)')) + +print the augmented assumptions list + >>> prover = Prover9Command(None, premises) + >>> command = UniqueNamesProver(ClosedWorldProver(prover)) + >>> for a in command.assumptions(): print(a) # doctest: +SKIP + all x.(elephant(x) -> animal(x)) + all x.(bird(x) -> animal(x)) + all x.(dove(x) -> bird(x)) + all x.(ostrich(x) -> bird(x)) + all x.(flying_ostrich(x) -> ostrich(x)) + all x.((animal(x) & -Ab1(x)) -> -fly(x)) + all x.((bird(x) & -Ab2(x)) -> fly(x)) + all x.((ostrich(x) & -Ab3(x)) -> -fly(x)) + all x.(bird(x) -> Ab1(x)) + all x.(ostrich(x) -> Ab2(x)) + all x.(flying_ostrich(x) -> Ab3(x)) + elephant(el) + dove(do) + ostrich(os) + all z1.(animal(z1) -> (elephant(z1) | bird(z1))) + all z2.(Ab1(z2) -> bird(z2)) + all z3.(bird(z3) -> (dove(z3) | ostrich(z3))) + all z4.(dove(z4) -> (z4 = do)) + all z5.(Ab2(z5) -> ostrich(z5)) + all z6.(Ab3(z6) -> flying_ostrich(z6)) + all z7.(ostrich(z7) -> ((z7 = os) | flying_ostrich(z7))) + all z8.-flying_ostrich(z8) + all z9.(elephant(z9) -> (z9 = el)) + -(el = os) + -(el = do) + -(os = do) + + >>> UniqueNamesProver(ClosedWorldProver(Prover9Command(read_expr('-fly(el)'), premises))).prove() + True + >>> UniqueNamesProver(ClosedWorldProver(Prover9Command(read_expr('fly(do)'), premises))).prove() + True + >>> UniqueNamesProver(ClosedWorldProver(Prover9Command(read_expr('-fly(os)'), premises))).prove() + True + diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/nonmonotonic_fixt.py b/venv.bak/lib/python3.7/site-packages/nltk/test/nonmonotonic_fixt.py new file mode 100644 index 0000000..0c38381 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/test/nonmonotonic_fixt.py @@ -0,0 +1,15 @@ +# -*- coding: utf-8 -*- +from __future__ import absolute_import + + +def setup_module(module): + from nose import SkipTest + from nltk.inference.mace import Mace + + try: + m = Mace() + m._find_binary('mace4') + except LookupError: + raise SkipTest( + "Mace4/Prover9 is not available so nonmonotonic.doctest was skipped" + ) diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/paice.doctest b/venv.bak/lib/python3.7/site-packages/nltk/test/paice.doctest new file mode 100644 index 0000000..1e3a65c --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/test/paice.doctest @@ -0,0 +1,35 @@ + +===================================================== +PAICE's evaluation statistics for stemming algorithms +===================================================== + +Given a list of words with their real lemmas and stems according to stemming algorithm under evaluation, +counts Understemming Index (UI), Overstemming Index (OI), Stemming Weight (SW) and Error-rate relative to truncation (ERRT). + + >>> from nltk.metrics import Paice + + +------------------------------------- +Understemming and Overstemming values +------------------------------------- + + >>> lemmas = {'kneel': ['kneel', 'knelt'], + ... 'range': ['range', 'ranged'], + ... 'ring': ['ring', 'rang', 'rung']} + >>> stems = {'kneel': ['kneel'], + ... 'knelt': ['knelt'], + ... 'rang': ['rang', 'range', 'ranged'], + ... 'ring': ['ring'], + ... 'rung': ['rung']} + >>> p = Paice(lemmas, stems) + >>> p.gumt, p.gdmt, p.gwmt, p.gdnt + (4.0, 5.0, 2.0, 16.0) + + >>> p.ui, p.oi, p.sw + (0.8..., 0.125..., 0.15625...) + + >>> p.errt + 1.0 + + >>> [('{0:.3f}'.format(a), '{0:.3f}'.format(b)) for a, b in p.coords] + [('0.000', '1.000'), ('0.000', '0.375'), ('0.600', '0.125'), ('0.800', '0.125')] diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/parse.doctest b/venv.bak/lib/python3.7/site-packages/nltk/test/parse.doctest new file mode 100644 index 0000000..b7c0ee1 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/test/parse.doctest @@ -0,0 +1,884 @@ +.. Copyright (C) 2001-2019 NLTK Project +.. For license information, see LICENSE.TXT + +========= + Parsing +========= + +Unit tests for the Context Free Grammar class +--------------------------------------------- + + >>> from nltk import Nonterminal, nonterminals, Production, CFG + + >>> nt1 = Nonterminal('NP') + >>> nt2 = Nonterminal('VP') + + >>> nt1.symbol() + 'NP' + + >>> nt1 == Nonterminal('NP') + True + + >>> nt1 == nt2 + False + + >>> S, NP, VP, PP = nonterminals('S, NP, VP, PP') + >>> N, V, P, DT = nonterminals('N, V, P, DT') + + >>> prod1 = Production(S, [NP, VP]) + >>> prod2 = Production(NP, [DT, NP]) + + >>> prod1.lhs() + S + + >>> prod1.rhs() + (NP, VP) + + >>> prod1 == Production(S, [NP, VP]) + True + + >>> prod1 == prod2 + False + + >>> grammar = CFG.fromstring(""" + ... S -> NP VP + ... PP -> P NP + ... NP -> 'the' N | N PP | 'the' N PP + ... VP -> V NP | V PP | V NP PP + ... N -> 'cat' + ... N -> 'dog' + ... N -> 'rug' + ... V -> 'chased' + ... V -> 'sat' + ... P -> 'in' + ... P -> 'on' + ... """) + +Unit tests for the rd (Recursive Descent Parser) class +------------------------------------------------------ + +Create and run a recursive descent parser over both a syntactically ambiguous +and unambiguous sentence. + + >>> from nltk.parse import RecursiveDescentParser + >>> rd = RecursiveDescentParser(grammar) + + >>> sentence1 = 'the cat chased the dog'.split() + >>> sentence2 = 'the cat chased the dog on the rug'.split() + + >>> for t in rd.parse(sentence1): + ... print(t) + (S (NP the (N cat)) (VP (V chased) (NP the (N dog)))) + + >>> for t in rd.parse(sentence2): + ... print(t) + (S + (NP the (N cat)) + (VP (V chased) (NP the (N dog) (PP (P on) (NP the (N rug)))))) + (S + (NP the (N cat)) + (VP (V chased) (NP the (N dog)) (PP (P on) (NP the (N rug))))) + + +(dolist (expr doctest-font-lock-keywords) + (add-to-list 'font-lock-keywords expr)) + + font-lock-keywords +(add-to-list 'font-lock-keywords + (car doctest-font-lock-keywords)) + + +Unit tests for the sr (Shift Reduce Parser) class +------------------------------------------------- + +Create and run a shift reduce parser over both a syntactically ambiguous +and unambiguous sentence. Note that unlike the recursive descent parser, one +and only one parse is ever returned. + + >>> from nltk.parse import ShiftReduceParser + >>> sr = ShiftReduceParser(grammar) + + >>> sentence1 = 'the cat chased the dog'.split() + >>> sentence2 = 'the cat chased the dog on the rug'.split() + + >>> for t in sr.parse(sentence1): + ... print(t) + (S (NP the (N cat)) (VP (V chased) (NP the (N dog)))) + + +The shift reduce parser uses heuristics to decide what to do when there are +multiple possible shift or reduce operations available - for the supplied +grammar clearly the wrong operation is selected. + + >>> for t in sr.parse(sentence2): + ... print(t) + + +Unit tests for the Chart Parser class +------------------------------------- + +We use the demo() function for testing. +We must turn off showing of times. + + >>> import nltk + +First we test tracing with a short sentence + + >>> nltk.parse.chart.demo(2, print_times=False, trace=1, + ... sent='I saw a dog', numparses=1) + * Sentence: + I saw a dog + ['I', 'saw', 'a', 'dog'] + + * Strategy: Bottom-up + + |. I . saw . a . dog .| + |[---------] . . .| [0:1] 'I' + |. [---------] . .| [1:2] 'saw' + |. . [---------] .| [2:3] 'a' + |. . . [---------]| [3:4] 'dog' + |> . . . .| [0:0] NP -> * 'I' + |[---------] . . .| [0:1] NP -> 'I' * + |> . . . .| [0:0] S -> * NP VP + |> . . . .| [0:0] NP -> * NP PP + |[---------> . . .| [0:1] S -> NP * VP + |[---------> . . .| [0:1] NP -> NP * PP + |. > . . .| [1:1] Verb -> * 'saw' + |. [---------] . .| [1:2] Verb -> 'saw' * + |. > . . .| [1:1] VP -> * Verb NP + |. > . . .| [1:1] VP -> * Verb + |. [---------> . .| [1:2] VP -> Verb * NP + |. [---------] . .| [1:2] VP -> Verb * + |. > . . .| [1:1] VP -> * VP PP + |[-------------------] . .| [0:2] S -> NP VP * + |. [---------> . .| [1:2] VP -> VP * PP + |. . > . .| [2:2] Det -> * 'a' + |. . [---------] .| [2:3] Det -> 'a' * + |. . > . .| [2:2] NP -> * Det Noun + |. . [---------> .| [2:3] NP -> Det * Noun + |. . . > .| [3:3] Noun -> * 'dog' + |. . . [---------]| [3:4] Noun -> 'dog' * + |. . [-------------------]| [2:4] NP -> Det Noun * + |. . > . .| [2:2] S -> * NP VP + |. . > . .| [2:2] NP -> * NP PP + |. [-----------------------------]| [1:4] VP -> Verb NP * + |. . [------------------->| [2:4] S -> NP * VP + |. . [------------------->| [2:4] NP -> NP * PP + |[=======================================]| [0:4] S -> NP VP * + |. [----------------------------->| [1:4] VP -> VP * PP + Nr edges in chart: 33 + (S (NP I) (VP (Verb saw) (NP (Det a) (Noun dog)))) + + +Then we test the different parsing Strategies. +Note that the number of edges differ between the strategies. + +Top-down + + >>> nltk.parse.chart.demo(1, print_times=False, trace=0, + ... sent='I saw John with a dog', numparses=2) + * Sentence: + I saw John with a dog + ['I', 'saw', 'John', 'with', 'a', 'dog'] + + * Strategy: Top-down + + Nr edges in chart: 48 + (S + (NP I) + (VP (Verb saw) (NP (NP John) (PP with (NP (Det a) (Noun dog)))))) + (S + (NP I) + (VP (VP (Verb saw) (NP John)) (PP with (NP (Det a) (Noun dog))))) + + +Bottom-up + + >>> nltk.parse.chart.demo(2, print_times=False, trace=0, + ... sent='I saw John with a dog', numparses=2) + * Sentence: + I saw John with a dog + ['I', 'saw', 'John', 'with', 'a', 'dog'] + + * Strategy: Bottom-up + + Nr edges in chart: 53 + (S + (NP I) + (VP (VP (Verb saw) (NP John)) (PP with (NP (Det a) (Noun dog))))) + (S + (NP I) + (VP (Verb saw) (NP (NP John) (PP with (NP (Det a) (Noun dog)))))) + + +Bottom-up Left-Corner + + >>> nltk.parse.chart.demo(3, print_times=False, trace=0, + ... sent='I saw John with a dog', numparses=2) + * Sentence: + I saw John with a dog + ['I', 'saw', 'John', 'with', 'a', 'dog'] + + * Strategy: Bottom-up left-corner + + Nr edges in chart: 36 + (S + (NP I) + (VP (VP (Verb saw) (NP John)) (PP with (NP (Det a) (Noun dog))))) + (S + (NP I) + (VP (Verb saw) (NP (NP John) (PP with (NP (Det a) (Noun dog)))))) + + +Left-Corner with Bottom-Up Filter + + >>> nltk.parse.chart.demo(4, print_times=False, trace=0, + ... sent='I saw John with a dog', numparses=2) + * Sentence: + I saw John with a dog + ['I', 'saw', 'John', 'with', 'a', 'dog'] + + * Strategy: Filtered left-corner + + Nr edges in chart: 28 + (S + (NP I) + (VP (VP (Verb saw) (NP John)) (PP with (NP (Det a) (Noun dog))))) + (S + (NP I) + (VP (Verb saw) (NP (NP John) (PP with (NP (Det a) (Noun dog)))))) + + +The stepping chart parser + + >>> nltk.parse.chart.demo(5, print_times=False, trace=1, + ... sent='I saw John with a dog', numparses=2) + * Sentence: + I saw John with a dog + ['I', 'saw', 'John', 'with', 'a', 'dog'] + + * Strategy: Stepping (top-down vs bottom-up) + + *** SWITCH TO TOP DOWN + |[------] . . . . .| [0:1] 'I' + |. [------] . . . .| [1:2] 'saw' + |. . [------] . . .| [2:3] 'John' + |. . . [------] . .| [3:4] 'with' + |. . . . [------] .| [4:5] 'a' + |. . . . . [------]| [5:6] 'dog' + |> . . . . . .| [0:0] S -> * NP VP + |> . . . . . .| [0:0] NP -> * NP PP + |> . . . . . .| [0:0] NP -> * Det Noun + |> . . . . . .| [0:0] NP -> * 'I' + |[------] . . . . .| [0:1] NP -> 'I' * + |[------> . . . . .| [0:1] S -> NP * VP + |[------> . . . . .| [0:1] NP -> NP * PP + |. > . . . . .| [1:1] VP -> * VP PP + |. > . . . . .| [1:1] VP -> * Verb NP + |. > . . . . .| [1:1] VP -> * Verb + |. > . . . . .| [1:1] Verb -> * 'saw' + |. [------] . . . .| [1:2] Verb -> 'saw' * + |. [------> . . . .| [1:2] VP -> Verb * NP + |. [------] . . . .| [1:2] VP -> Verb * + |[-------------] . . . .| [0:2] S -> NP VP * + |. [------> . . . .| [1:2] VP -> VP * PP + *** SWITCH TO BOTTOM UP + |. . > . . . .| [2:2] NP -> * 'John' + |. . . > . . .| [3:3] PP -> * 'with' NP + |. . . > . . .| [3:3] Prep -> * 'with' + |. . . . > . .| [4:4] Det -> * 'a' + |. . . . . > .| [5:5] Noun -> * 'dog' + |. . [------] . . .| [2:3] NP -> 'John' * + |. . . [------> . .| [3:4] PP -> 'with' * NP + |. . . [------] . .| [3:4] Prep -> 'with' * + |. . . . [------] .| [4:5] Det -> 'a' * + |. . . . . [------]| [5:6] Noun -> 'dog' * + |. [-------------] . . .| [1:3] VP -> Verb NP * + |[--------------------] . . .| [0:3] S -> NP VP * + |. [-------------> . . .| [1:3] VP -> VP * PP + |. . > . . . .| [2:2] S -> * NP VP + |. . > . . . .| [2:2] NP -> * NP PP + |. . . . > . .| [4:4] NP -> * Det Noun + |. . [------> . . .| [2:3] S -> NP * VP + |. . [------> . . .| [2:3] NP -> NP * PP + |. . . . [------> .| [4:5] NP -> Det * Noun + |. . . . [-------------]| [4:6] NP -> Det Noun * + |. . . [--------------------]| [3:6] PP -> 'with' NP * + |. [----------------------------------]| [1:6] VP -> VP PP * + *** SWITCH TO TOP DOWN + |. . > . . . .| [2:2] NP -> * Det Noun + |. . . . > . .| [4:4] NP -> * NP PP + |. . . > . . .| [3:3] VP -> * VP PP + |. . . > . . .| [3:3] VP -> * Verb NP + |. . . > . . .| [3:3] VP -> * Verb + |[=========================================]| [0:6] S -> NP VP * + |. [---------------------------------->| [1:6] VP -> VP * PP + |. . [---------------------------]| [2:6] NP -> NP PP * + |. . . . [------------->| [4:6] NP -> NP * PP + |. [----------------------------------]| [1:6] VP -> Verb NP * + |. . [--------------------------->| [2:6] S -> NP * VP + |. . [--------------------------->| [2:6] NP -> NP * PP + |[=========================================]| [0:6] S -> NP VP * + |. [---------------------------------->| [1:6] VP -> VP * PP + |. . . . . . >| [6:6] VP -> * VP PP + |. . . . . . >| [6:6] VP -> * Verb NP + |. . . . . . >| [6:6] VP -> * Verb + *** SWITCH TO BOTTOM UP + |. . . . > . .| [4:4] S -> * NP VP + |. . . . [------------->| [4:6] S -> NP * VP + *** SWITCH TO TOP DOWN + *** SWITCH TO BOTTOM UP + *** SWITCH TO TOP DOWN + *** SWITCH TO BOTTOM UP + *** SWITCH TO TOP DOWN + *** SWITCH TO BOTTOM UP + Nr edges in chart: 61 + (S + (NP I) + (VP (VP (Verb saw) (NP John)) (PP with (NP (Det a) (Noun dog))))) + (S + (NP I) + (VP (Verb saw) (NP (NP John) (PP with (NP (Det a) (Noun dog)))))) + + + +Unit tests for the Incremental Chart Parser class +------------------------------------------------- + +The incremental chart parsers are defined in earleychart.py. +We use the demo() function for testing. We must turn off showing of times. + + >>> import nltk + +Earley Chart Parser + + >>> nltk.parse.earleychart.demo(print_times=False, trace=1, + ... sent='I saw John with a dog', numparses=2) + * Sentence: + I saw John with a dog + ['I', 'saw', 'John', 'with', 'a', 'dog'] + + |. I . saw . John . with . a . dog .| + |[------] . . . . .| [0:1] 'I' + |. [------] . . . .| [1:2] 'saw' + |. . [------] . . .| [2:3] 'John' + |. . . [------] . .| [3:4] 'with' + |. . . . [------] .| [4:5] 'a' + |. . . . . [------]| [5:6] 'dog' + |> . . . . . .| [0:0] S -> * NP VP + |> . . . . . .| [0:0] NP -> * NP PP + |> . . . . . .| [0:0] NP -> * Det Noun + |> . . . . . .| [0:0] NP -> * 'I' + |[------] . . . . .| [0:1] NP -> 'I' * + |[------> . . . . .| [0:1] S -> NP * VP + |[------> . . . . .| [0:1] NP -> NP * PP + |. > . . . . .| [1:1] VP -> * VP PP + |. > . . . . .| [1:1] VP -> * Verb NP + |. > . . . . .| [1:1] VP -> * Verb + |. > . . . . .| [1:1] Verb -> * 'saw' + |. [------] . . . .| [1:2] Verb -> 'saw' * + |. [------> . . . .| [1:2] VP -> Verb * NP + |. [------] . . . .| [1:2] VP -> Verb * + |[-------------] . . . .| [0:2] S -> NP VP * + |. [------> . . . .| [1:2] VP -> VP * PP + |. . > . . . .| [2:2] NP -> * NP PP + |. . > . . . .| [2:2] NP -> * Det Noun + |. . > . . . .| [2:2] NP -> * 'John' + |. . [------] . . .| [2:3] NP -> 'John' * + |. [-------------] . . .| [1:3] VP -> Verb NP * + |. . [------> . . .| [2:3] NP -> NP * PP + |. . . > . . .| [3:3] PP -> * 'with' NP + |[--------------------] . . .| [0:3] S -> NP VP * + |. [-------------> . . .| [1:3] VP -> VP * PP + |. . . [------> . .| [3:4] PP -> 'with' * NP + |. . . . > . .| [4:4] NP -> * NP PP + |. . . . > . .| [4:4] NP -> * Det Noun + |. . . . > . .| [4:4] Det -> * 'a' + |. . . . [------] .| [4:5] Det -> 'a' * + |. . . . [------> .| [4:5] NP -> Det * Noun + |. . . . . > .| [5:5] Noun -> * 'dog' + |. . . . . [------]| [5:6] Noun -> 'dog' * + |. . . . [-------------]| [4:6] NP -> Det Noun * + |. . . [--------------------]| [3:6] PP -> 'with' NP * + |. . . . [------------->| [4:6] NP -> NP * PP + |. . [---------------------------]| [2:6] NP -> NP PP * + |. [----------------------------------]| [1:6] VP -> VP PP * + |[=========================================]| [0:6] S -> NP VP * + |. [---------------------------------->| [1:6] VP -> VP * PP + |. [----------------------------------]| [1:6] VP -> Verb NP * + |. . [--------------------------->| [2:6] NP -> NP * PP + |[=========================================]| [0:6] S -> NP VP * + |. [---------------------------------->| [1:6] VP -> VP * PP + (S + (NP I) + (VP (VP (Verb saw) (NP John)) (PP with (NP (Det a) (Noun dog))))) + (S + (NP I) + (VP (Verb saw) (NP (NP John) (PP with (NP (Det a) (Noun dog)))))) + + +Unit tests for LARGE context-free grammars +------------------------------------------ + +Reading the ATIS grammar. + + >>> grammar = nltk.data.load('grammars/large_grammars/atis.cfg') + >>> grammar + + +Reading the test sentences. + + >>> sentences = nltk.data.load('grammars/large_grammars/atis_sentences.txt') + >>> sentences = nltk.parse.util.extract_test_sentences(sentences) + >>> len(sentences) + 98 + >>> testsentence = sentences[22] + >>> testsentence[0] + ['show', 'me', 'northwest', 'flights', 'to', 'detroit', '.'] + >>> testsentence[1] + 17 + >>> sentence = testsentence[0] + +Now we test all different parsing strategies. +Note that the number of edges differ between the strategies. + +Bottom-up parsing. + + >>> parser = nltk.parse.BottomUpChartParser(grammar) + >>> chart = parser.chart_parse(sentence) + >>> print((chart.num_edges())) + 7661 + >>> print((len(list(chart.parses(grammar.start()))))) + 17 + +Bottom-up Left-corner parsing. + + >>> parser = nltk.parse.BottomUpLeftCornerChartParser(grammar) + >>> chart = parser.chart_parse(sentence) + >>> print((chart.num_edges())) + 4986 + >>> print((len(list(chart.parses(grammar.start()))))) + 17 + +Left-corner parsing with bottom-up filter. + + >>> parser = nltk.parse.LeftCornerChartParser(grammar) + >>> chart = parser.chart_parse(sentence) + >>> print((chart.num_edges())) + 1342 + >>> print((len(list(chart.parses(grammar.start()))))) + 17 + +Top-down parsing. + + >>> parser = nltk.parse.TopDownChartParser(grammar) + >>> chart = parser.chart_parse(sentence) + >>> print((chart.num_edges())) + 28352 + >>> print((len(list(chart.parses(grammar.start()))))) + 17 + +Incremental Bottom-up parsing. + + >>> parser = nltk.parse.IncrementalBottomUpChartParser(grammar) + >>> chart = parser.chart_parse(sentence) + >>> print((chart.num_edges())) + 7661 + >>> print((len(list(chart.parses(grammar.start()))))) + 17 + +Incremental Bottom-up Left-corner parsing. + + >>> parser = nltk.parse.IncrementalBottomUpLeftCornerChartParser(grammar) + >>> chart = parser.chart_parse(sentence) + >>> print((chart.num_edges())) + 4986 + >>> print((len(list(chart.parses(grammar.start()))))) + 17 + +Incremental Left-corner parsing with bottom-up filter. + + >>> parser = nltk.parse.IncrementalLeftCornerChartParser(grammar) + >>> chart = parser.chart_parse(sentence) + >>> print((chart.num_edges())) + 1342 + >>> print((len(list(chart.parses(grammar.start()))))) + 17 + +Incremental Top-down parsing. + + >>> parser = nltk.parse.IncrementalTopDownChartParser(grammar) + >>> chart = parser.chart_parse(sentence) + >>> print((chart.num_edges())) + 28352 + >>> print((len(list(chart.parses(grammar.start()))))) + 17 + +Earley parsing. This is similar to the incremental top-down algorithm. + + >>> parser = nltk.parse.EarleyChartParser(grammar) + >>> chart = parser.chart_parse(sentence) + >>> print((chart.num_edges())) + 28352 + >>> print((len(list(chart.parses(grammar.start()))))) + 17 + + +Unit tests for the Probabilistic CFG class +------------------------------------------ + + >>> from nltk.corpus import treebank + >>> from itertools import islice + >>> from nltk.grammar import PCFG, induce_pcfg, toy_pcfg1, toy_pcfg2 + +Create a set of PCFG productions. + + >>> grammar = PCFG.fromstring(""" + ... A -> B B [.3] | C B C [.7] + ... B -> B D [.5] | C [.5] + ... C -> 'a' [.1] | 'b' [0.9] + ... D -> 'b' [1.0] + ... """) + >>> prod = grammar.productions()[0] + >>> prod + A -> B B [0.3] + + >>> prod.lhs() + A + + >>> prod.rhs() + (B, B) + + >>> print((prod.prob())) + 0.3 + + >>> grammar.start() + A + + >>> grammar.productions() + [A -> B B [0.3], A -> C B C [0.7], B -> B D [0.5], B -> C [0.5], C -> 'a' [0.1], C -> 'b' [0.9], D -> 'b' [1.0]] + +Induce some productions using parsed Treebank data. + + >>> productions = [] + >>> for fileid in treebank.fileids()[:2]: + ... for t in treebank.parsed_sents(fileid): + ... productions += t.productions() + + >>> grammar = induce_pcfg(S, productions) + >>> grammar + + + >>> sorted(grammar.productions(lhs=Nonterminal('PP')))[:2] + [PP -> IN NP [1.0]] + >>> sorted(grammar.productions(lhs=Nonterminal('NNP')))[:2] + [NNP -> 'Agnew' [0.0714286], NNP -> 'Consolidated' [0.0714286]] + >>> sorted(grammar.productions(lhs=Nonterminal('JJ')))[:2] + [JJ -> 'British' [0.142857], JJ -> 'former' [0.142857]] + >>> sorted(grammar.productions(lhs=Nonterminal('NP')))[:2] + [NP -> CD NNS [0.133333], NP -> DT JJ JJ NN [0.0666667]] + +Unit tests for the Probabilistic Chart Parse classes +---------------------------------------------------- + + >>> tokens = "Jack saw Bob with my cookie".split() + >>> grammar = toy_pcfg2 + >>> print(grammar) + Grammar with 23 productions (start state = S) + S -> NP VP [1.0] + VP -> V NP [0.59] + VP -> V [0.4] + VP -> VP PP [0.01] + NP -> Det N [0.41] + NP -> Name [0.28] + NP -> NP PP [0.31] + PP -> P NP [1.0] + V -> 'saw' [0.21] + V -> 'ate' [0.51] + V -> 'ran' [0.28] + N -> 'boy' [0.11] + N -> 'cookie' [0.12] + N -> 'table' [0.13] + N -> 'telescope' [0.14] + N -> 'hill' [0.5] + Name -> 'Jack' [0.52] + Name -> 'Bob' [0.48] + P -> 'with' [0.61] + P -> 'under' [0.39] + Det -> 'the' [0.41] + Det -> 'a' [0.31] + Det -> 'my' [0.28] + +Create several parsers using different queuing strategies and show the +resulting parses. + + >>> from nltk.parse import pchart + + >>> parser = pchart.InsideChartParser(grammar) + >>> for t in parser.parse(tokens): + ... print(t) + (S + (NP (Name Jack)) + (VP + (V saw) + (NP + (NP (Name Bob)) + (PP (P with) (NP (Det my) (N cookie)))))) (p=6.31607e-06) + (S + (NP (Name Jack)) + (VP + (VP (V saw) (NP (Name Bob))) + (PP (P with) (NP (Det my) (N cookie))))) (p=2.03744e-07) + + >>> parser = pchart.RandomChartParser(grammar) + >>> for t in parser.parse(tokens): + ... print(t) + (S + (NP (Name Jack)) + (VP + (V saw) + (NP + (NP (Name Bob)) + (PP (P with) (NP (Det my) (N cookie)))))) (p=6.31607e-06) + (S + (NP (Name Jack)) + (VP + (VP (V saw) (NP (Name Bob))) + (PP (P with) (NP (Det my) (N cookie))))) (p=2.03744e-07) + + >>> parser = pchart.UnsortedChartParser(grammar) + >>> for t in parser.parse(tokens): + ... print(t) + (S + (NP (Name Jack)) + (VP + (V saw) + (NP + (NP (Name Bob)) + (PP (P with) (NP (Det my) (N cookie)))))) (p=6.31607e-06) + (S + (NP (Name Jack)) + (VP + (VP (V saw) (NP (Name Bob))) + (PP (P with) (NP (Det my) (N cookie))))) (p=2.03744e-07) + + >>> parser = pchart.LongestChartParser(grammar) + >>> for t in parser.parse(tokens): + ... print(t) + (S + (NP (Name Jack)) + (VP + (V saw) + (NP + (NP (Name Bob)) + (PP (P with) (NP (Det my) (N cookie)))))) (p=6.31607e-06) + (S + (NP (Name Jack)) + (VP + (VP (V saw) (NP (Name Bob))) + (PP (P with) (NP (Det my) (N cookie))))) (p=2.03744e-07) + + >>> parser = pchart.InsideChartParser(grammar, beam_size = len(tokens)+1) + >>> for t in parser.parse(tokens): + ... print(t) + + +Unit tests for the Viterbi Parse classes +---------------------------------------- + + >>> from nltk.parse import ViterbiParser + >>> tokens = "Jack saw Bob with my cookie".split() + >>> grammar = toy_pcfg2 + +Parse the tokenized sentence. + + >>> parser = ViterbiParser(grammar) + >>> for t in parser.parse(tokens): + ... print(t) + (S + (NP (Name Jack)) + (VP + (V saw) + (NP + (NP (Name Bob)) + (PP (P with) (NP (Det my) (N cookie)))))) (p=6.31607e-06) + + +Unit tests for the FeatStructNonterminal class +---------------------------------------------- + + >>> from nltk.grammar import FeatStructNonterminal + >>> FeatStructNonterminal( + ... pos='n', agr=FeatStructNonterminal(number='pl', gender='f')) + [agr=[gender='f', number='pl'], pos='n'] + + >>> FeatStructNonterminal('VP[+fin]/NP[+pl]') + VP[+fin]/NP[+pl] + + +Tracing the Feature Chart Parser +-------------------------------- + +We use the featurechart.demo() function for tracing the Feature Chart Parser. + + >>> nltk.parse.featurechart.demo(print_times=False, + ... print_grammar=True, + ... parser=nltk.parse.featurechart.FeatureChartParser, + ... sent='I saw John with a dog') + + Grammar with 18 productions (start state = S[]) + S[] -> NP[] VP[] + PP[] -> Prep[] NP[] + NP[] -> NP[] PP[] + VP[] -> VP[] PP[] + VP[] -> Verb[] NP[] + VP[] -> Verb[] + NP[] -> Det[pl=?x] Noun[pl=?x] + NP[] -> 'John' + NP[] -> 'I' + Det[] -> 'the' + Det[] -> 'my' + Det[-pl] -> 'a' + Noun[-pl] -> 'dog' + Noun[-pl] -> 'cookie' + Verb[] -> 'ate' + Verb[] -> 'saw' + Prep[] -> 'with' + Prep[] -> 'under' + + * FeatureChartParser + Sentence: I saw John with a dog + |.I.s.J.w.a.d.| + |[-] . . . . .| [0:1] 'I' + |. [-] . . . .| [1:2] 'saw' + |. . [-] . . .| [2:3] 'John' + |. . . [-] . .| [3:4] 'with' + |. . . . [-] .| [4:5] 'a' + |. . . . . [-]| [5:6] 'dog' + |[-] . . . . .| [0:1] NP[] -> 'I' * + |[-> . . . . .| [0:1] S[] -> NP[] * VP[] {} + |[-> . . . . .| [0:1] NP[] -> NP[] * PP[] {} + |. [-] . . . .| [1:2] Verb[] -> 'saw' * + |. [-> . . . .| [1:2] VP[] -> Verb[] * NP[] {} + |. [-] . . . .| [1:2] VP[] -> Verb[] * + |. [-> . . . .| [1:2] VP[] -> VP[] * PP[] {} + |[---] . . . .| [0:2] S[] -> NP[] VP[] * + |. . [-] . . .| [2:3] NP[] -> 'John' * + |. . [-> . . .| [2:3] S[] -> NP[] * VP[] {} + |. . [-> . . .| [2:3] NP[] -> NP[] * PP[] {} + |. [---] . . .| [1:3] VP[] -> Verb[] NP[] * + |. [---> . . .| [1:3] VP[] -> VP[] * PP[] {} + |[-----] . . .| [0:3] S[] -> NP[] VP[] * + |. . . [-] . .| [3:4] Prep[] -> 'with' * + |. . . [-> . .| [3:4] PP[] -> Prep[] * NP[] {} + |. . . . [-] .| [4:5] Det[-pl] -> 'a' * + |. . . . [-> .| [4:5] NP[] -> Det[pl=?x] * Noun[pl=?x] {?x: False} + |. . . . . [-]| [5:6] Noun[-pl] -> 'dog' * + |. . . . [---]| [4:6] NP[] -> Det[-pl] Noun[-pl] * + |. . . . [--->| [4:6] S[] -> NP[] * VP[] {} + |. . . . [--->| [4:6] NP[] -> NP[] * PP[] {} + |. . . [-----]| [3:6] PP[] -> Prep[] NP[] * + |. . [-------]| [2:6] NP[] -> NP[] PP[] * + |. [---------]| [1:6] VP[] -> VP[] PP[] * + |. [--------->| [1:6] VP[] -> VP[] * PP[] {} + |[===========]| [0:6] S[] -> NP[] VP[] * + |. . [------->| [2:6] S[] -> NP[] * VP[] {} + |. . [------->| [2:6] NP[] -> NP[] * PP[] {} + |. [---------]| [1:6] VP[] -> Verb[] NP[] * + |. [--------->| [1:6] VP[] -> VP[] * PP[] {} + |[===========]| [0:6] S[] -> NP[] VP[] * + (S[] + (NP[] I) + (VP[] + (VP[] (Verb[] saw) (NP[] John)) + (PP[] (Prep[] with) (NP[] (Det[-pl] a) (Noun[-pl] dog))))) + (S[] + (NP[] I) + (VP[] + (Verb[] saw) + (NP[] + (NP[] John) + (PP[] (Prep[] with) (NP[] (Det[-pl] a) (Noun[-pl] dog)))))) + + +Unit tests for the Feature Chart Parser classes +----------------------------------------------- + +The list of parsers we want to test. + + >>> parsers = [nltk.parse.featurechart.FeatureChartParser, + ... nltk.parse.featurechart.FeatureTopDownChartParser, + ... nltk.parse.featurechart.FeatureBottomUpChartParser, + ... nltk.parse.featurechart.FeatureBottomUpLeftCornerChartParser, + ... nltk.parse.earleychart.FeatureIncrementalChartParser, + ... nltk.parse.earleychart.FeatureEarleyChartParser, + ... nltk.parse.earleychart.FeatureIncrementalTopDownChartParser, + ... nltk.parse.earleychart.FeatureIncrementalBottomUpChartParser, + ... nltk.parse.earleychart.FeatureIncrementalBottomUpLeftCornerChartParser, + ... ] + +A helper function that tests each parser on the given grammar and sentence. +We check that the number of trees are correct, and that all parsers +return the same trees. Otherwise an error is printed. + + >>> def unittest(grammar, sentence, nr_trees): + ... sentence = sentence.split() + ... trees = None + ... for P in parsers: + ... result = P(grammar).parse(sentence) + ... result = set(tree.freeze() for tree in result) + ... if len(result) != nr_trees: + ... print("Wrong nr of trees:", len(result)) + ... elif trees is None: + ... trees = result + ... elif result != trees: + ... print("Trees differ for parser:", P.__name__) + +The demo grammar from before, with an ambiguous sentence. + + >>> isawjohn = nltk.parse.featurechart.demo_grammar() + >>> unittest(isawjohn, "I saw John with a dog with my cookie", 5) + +This grammar tests that variables in different grammar rules are renamed +before unification. (The problematic variable is in this case ?X). + + >>> whatwasthat = nltk.grammar.FeatureGrammar.fromstring(''' + ... S[] -> NP[num=?N] VP[num=?N, slash=?X] + ... NP[num=?X] -> "what" + ... NP[num=?X] -> "that" + ... VP[num=?P, slash=none] -> V[num=?P] NP[] + ... V[num=sg] -> "was" + ... ''') + >>> unittest(whatwasthat, "what was that", 1) + +This grammar tests that the same rule can be used in different places +in another rule, and that the variables are properly renamed. + + >>> thislovesthat = nltk.grammar.FeatureGrammar.fromstring(''' + ... S[] -> NP[case=nom] V[] NP[case=acc] + ... NP[case=?X] -> Pron[case=?X] + ... Pron[] -> "this" + ... Pron[] -> "that" + ... V[] -> "loves" + ... ''') + >>> unittest(thislovesthat, "this loves that", 1) + + +Tests for loading feature grammar files +--------------------------------------- + +Alternative 1: first load the grammar, then create the parser. + + >>> fcfg = nltk.data.load('grammars/book_grammars/feat0.fcfg') + >>> fcp1 = nltk.parse.FeatureChartParser(fcfg) + >>> print((type(fcp1))) + + +Alternative 2: directly load the parser. + + >>> fcp2 = nltk.parse.load_parser('grammars/book_grammars/feat0.fcfg') + >>> print((type(fcp2))) + + + + diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/portuguese_en.doctest b/venv.bak/lib/python3.7/site-packages/nltk/test/portuguese_en.doctest new file mode 100644 index 0000000..87051c9 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/test/portuguese_en.doctest @@ -0,0 +1,565 @@ +.. Copyright (C) 2001-2019 NLTK Project +.. For license information, see LICENSE.TXT + +================================== +Examples for Portuguese Processing +================================== + +This HOWTO contains a variety of examples relating to the Portuguese language. +It is intended to be read in conjunction with the NLTK book +(``http://nltk.org/book``). For instructions on running the Python +interpreter, please see the section *Getting Started with Python*, in Chapter 1. + +-------------------------------------------- +Python Programming, with Portuguese Examples +-------------------------------------------- + +Chapter 1 of the NLTK book contains many elementary programming examples, all +with English texts. In this section, we'll see some corresponding examples +using Portuguese. Please refer to the chapter for full discussion. *Vamos!* + + >>> from nltk.examples.pt import * + *** Introductory Examples for the NLTK Book *** + Loading ptext1, ... and psent1, ... + Type the name of the text or sentence to view it. + Type: 'texts()' or 'sents()' to list the materials. + ptext1: Memórias Póstumas de Brás Cubas (1881) + ptext2: Dom Casmurro (1899) + ptext3: Gênesis + ptext4: Folha de Sao Paulo (1994) + + +Any time we want to find out about these texts, we just have +to enter their names at the Python prompt: + + >>> ptext2 + + +Searching Text +-------------- + +A concordance permits us to see words in context. + + >>> ptext1.concordance('olhos') + Building index... + Displaying 25 of 138 matches: + De pé , à cabeceira da cama , com os olhos estúpidos , a boca entreaberta , a t + orelhas . Pela minha parte fechei os olhos e deixei - me ir à ventura . Já agor + xões de cérebro enfermo . Como ia de olhos fechados , não via o caminho ; lembr + gelos eternos . Com efeito , abri os olhos e vi que o meu animal galopava numa + me apareceu então , fitando - me uns olhos rutilantes como o sol . Tudo nessa f + mim mesmo . Então , encarei - a com olhos súplices , e pedi mais alguns anos . + ... + +For a given word, we can find words with a similar text distribution: + + >>> ptext1.similar('chegar') + Building word-context index... + acabada acudir aludir avistar bramanismo casamento cheguei com contar + contrário corpo dali deixei desferirem dizer fazer filhos já leitor lhe + >>> ptext3.similar('chegar') + Building word-context index... + achar alumiar arrombar destruir governar guardar ir lavrar passar que + toda tomar ver vir + +We can search for the statistically significant collocations in a text: + + >>> ptext1.collocations() + Building collocations list + Quincas Borba; Lobo Neves; alguma coisa; Brás Cubas; meu pai; dia + seguinte; não sei; Meu pai; alguns instantes; outra vez; outra coisa; + por exemplo; mim mesmo; coisa nenhuma; mesma coisa; não era; dias + depois; Passeio Público; olhar para; das coisas + +We can search for words in context, with the help of *regular expressions*, e.g.: + + >>> ptext1.findall(" (<.*>)") + estúpidos; e; fechados; rutilantes; súplices; a; do; babavam; + na; moles; se; da; umas; espraiavam; chamejantes; espetados; + ... + +We can automatically generate random text based on a given text, e.g.: + + >>> ptext3.generate() # doctest: +SKIP + No princípio , criou Deus os abençoou , dizendo : Onde { estão } e até + à ave dos céus , { que } será . Disse mais Abrão : Dá - me a mulher + que tomaste ; porque daquele poço Eseque , { tinha .} E disse : Não + poderemos descer ; mas , do campo ainda não estava na casa do teu + pescoço . E viveu Serugue , depois Simeão e Levi { são } estes ? E o + varão , porque habitava na terra de Node , da mão de Esaú : Jeús , + Jalão e Corá + +Texts as List of Words +---------------------- + +A few sentences have been defined for you. + + >>> psent1 + ['o', 'amor', 'da', 'gl\xf3ria', 'era', 'a', 'coisa', 'mais', + 'verdadeiramente', 'humana', 'que', 'h\xe1', 'no', 'homem', ',', + 'e', ',', 'conseq\xfcentemente', ',', 'a', 'sua', 'mais', + 'genu\xedna', 'fei\xe7\xe3o', '.'] + >>> + +Notice that the sentence has been *tokenized*. Each token is +represented as a string, represented using quotes, e.g. ``'coisa'``. +Some strings contain special characters, e.g. ``\xf3``, +the internal representation for ó. +The tokens are combined in the form of a *list*. How long is this list? + + >>> len(psent1) + 25 + >>> + +What is the vocabulary of this sentence? + + >>> sorted(set(psent1)) + [',', '.', 'a', 'amor', 'coisa', 'conseqüentemente', 'da', 'e', 'era', + 'feição', 'genuína', 'glória', 'homem', 'humana', 'há', 'mais', 'no', + 'o', 'que', 'sua', 'verdadeiramente'] + >>> + +Let's iterate over each item in ``psent2``, and print information for each: + + >>> for w in psent2: + ... print(w, len(w), w[-1]) + ... + Não 3 o + consultes 9 s + dicionários 11 s + . 1 . + +Observe how we make a human-readable version of a string, using ``decode()``. +Also notice that we accessed the last character of a string ``w`` using ``w[-1]``. + +We just saw a ``for`` loop above. Another useful control structure is a +*list comprehension*. + + >>> [w.upper() for w in psent2] + ['N\xc3O', 'CONSULTES', 'DICION\xc1RIOS', '.'] + >>> [w for w in psent1 if w.endswith('a')] + ['da', 'gl\xf3ria', 'era', 'a', 'coisa', 'humana', 'a', 'sua', 'genu\xedna'] + >>> [w for w in ptext4 if len(w) > 15] + [u'norte-irlandeses', u'pan-nacionalismo', u'predominatemente', u'primeiro-ministro', + u'primeiro-ministro', u'irlandesa-americana', u'responsabilidades', u'significativamente'] + +We can examine the relative frequency of words in a text, using ``FreqDist``: + + >>> fd1 = FreqDist(ptext1) + >>> fd1 + + >>> fd1['olhos'] + 137 + >>> fd1.max() + u',' + >>> fd1.samples()[:100] + [u',', u'.', u'a', u'que', u'de', u'e', u'-', u'o', u';', u'me', u'um', u'n\xe3o', + u'\x97', u'se', u'do', u'da', u'uma', u'com', u'os', u'\xe9', u'era', u'as', u'eu', + u'lhe', u'ao', u'em', u'para', u'mas', u'...', u'!', u'\xe0', u'na', u'mais', u'?', + u'no', u'como', u'por', u'N\xe3o', u'dos', u'ou', u'ele', u':', u'Virg\xedlia', + u'meu', u'disse', u'minha', u'das', u'O', u'/', u'A', u'CAP\xcdTULO', u'muito', + u'depois', u'coisa', u'foi', u'sem', u'olhos', u'ela', u'nos', u'tinha', u'nem', + u'E', u'outro', u'vida', u'nada', u'tempo', u'menos', u'outra', u'casa', u'homem', + u'porque', u'quando', u'mim', u'mesmo', u'ser', u'pouco', u'estava', u'dia', + u't\xe3o', u'tudo', u'Mas', u'at\xe9', u'D', u'ainda', u's\xf3', u'alguma', + u'la', u'vez', u'anos', u'h\xe1', u'Era', u'pai', u'esse', u'lo', u'dizer', u'assim', + u'ent\xe3o', u'dizia', u'aos', u'Borba'] + +--------------- +Reading Corpora +--------------- + +Accessing the Machado Text Corpus +--------------------------------- + +NLTK includes the complete works of Machado de Assis. + + >>> from nltk.corpus import machado + >>> machado.fileids() + ['contos/macn001.txt', 'contos/macn002.txt', 'contos/macn003.txt', ...] + +Each file corresponds to one of the works of Machado de Assis. To see a complete +list of works, you can look at the corpus README file: ``print machado.readme()``. +Let's access the text of the *Posthumous Memories of Brás Cubas*. + +We can access the text as a list of characters, and access 200 characters starting +from position 10,000. + + >>> raw_text = machado.raw('romance/marm05.txt') + >>> raw_text[10000:10200] + u', primou no\nEstado, e foi um dos amigos particulares do vice-rei Conde + da Cunha.\n\nComo este apelido de Cubas lhe\ncheirasse excessivamente a + tanoaria, alegava meu pai, bisneto de Dami\xe3o, que o\ndito ape' + +However, this is not a very useful way to work with a text. We generally think +of a text as a sequence of words and punctuation, not characters: + + >>> text1 = machado.words('romance/marm05.txt') + >>> text1 + ['Romance', ',', 'Mem\xf3rias', 'P\xf3stumas', 'de', ...] + >>> len(text1) + 77098 + >>> len(set(text1)) + 10848 + +Here's a program that finds the most common ngrams that contain a +particular target word. + + >>> from nltk import ngrams, FreqDist + >>> target_word = 'olhos' + >>> fd = FreqDist(ng + ... for ng in ngrams(text1, 5) + ... if target_word in ng) + >>> for hit in fd.samples(): + ... print(' '.join(hit)) + ... + , com os olhos no + com os olhos no ar + com os olhos no chão + e todos com os olhos + me estar com os olhos + os olhos estúpidos , a + os olhos na costura , + os olhos no ar , + , com os olhos espetados + , com os olhos estúpidos + , com os olhos fitos + , com os olhos naquele + , com os olhos para + + +Accessing the MacMorpho Tagged Corpus +------------------------------------- + +NLTK includes the MAC-MORPHO Brazilian Portuguese POS-tagged news text, +with over a million words of +journalistic texts extracted from ten sections of +the daily newspaper *Folha de Sao Paulo*, 1994. + +We can access this corpus as a sequence of words or tagged words as follows: + >>> import nltk.corpus + >>> nltk.corpus.mac_morpho.words() + ['Jersei', 'atinge', 'm\xe9dia', 'de', 'Cr$', '1,4', ...] + >>> nltk.corpus.mac_morpho.sents() # doctest: +NORMALIZE_WHITESPACE + [['Jersei', 'atinge', 'm\xe9dia', 'de', 'Cr$', '1,4', 'milh\xe3o', + 'em', 'a', 'venda', 'de', 'a', 'Pinhal', 'em', 'S\xe3o', 'Paulo'], + ['Programe', 'sua', 'viagem', 'a', 'a', 'Exposi\xe7\xe3o', 'Nacional', + 'do', 'Zebu', ',', 'que', 'come\xe7a', 'dia', '25'], ...] + >>> nltk.corpus.mac_morpho.tagged_words() + [('Jersei', 'N'), ('atinge', 'V'), ('m\xe9dia', 'N'), ...] + +We can also access it in sentence chunks. + + >>> nltk.corpus.mac_morpho.tagged_sents() # doctest: +NORMALIZE_WHITESPACE + [[('Jersei', 'N'), ('atinge', 'V'), ('m\xe9dia', 'N'), ('de', 'PREP'), + ('Cr$', 'CUR'), ('1,4', 'NUM'), ('milh\xe3o', 'N'), ('em', 'PREP|+'), + ('a', 'ART'), ('venda', 'N'), ('de', 'PREP|+'), ('a', 'ART'), + ('Pinhal', 'NPROP'), ('em', 'PREP'), ('S\xe3o', 'NPROP'), + ('Paulo', 'NPROP')], + [('Programe', 'V'), ('sua', 'PROADJ'), ('viagem', 'N'), ('a', 'PREP|+'), + ('a', 'ART'), ('Exposi\xe7\xe3o', 'NPROP'), ('Nacional', 'NPROP'), + ('do', 'NPROP'), ('Zebu', 'NPROP'), (',', ','), ('que', 'PRO-KS-REL'), + ('come\xe7a', 'V'), ('dia', 'N'), ('25', 'N|AP')], ...] + +This data can be used to train taggers (examples below for the Floresta treebank). + +Accessing the Floresta Portuguese Treebank +------------------------------------------ + +The NLTK data distribution includes the +"Floresta Sinta(c)tica Corpus" version 7.4, available from +``http://www.linguateca.pt/Floresta/``. + +We can access this corpus as a sequence of words or tagged words as follows: + + >>> from nltk.corpus import floresta + >>> floresta.words() + ['Um', 'revivalismo', 'refrescante', 'O', '7_e_Meio', ...] + >>> floresta.tagged_words() + [('Um', '>N+art'), ('revivalismo', 'H+n'), ...] + +The tags consist of some syntactic information, followed by a plus sign, +followed by a conventional part-of-speech tag. Let's strip off the material before +the plus sign: + + >>> def simplify_tag(t): + ... if "+" in t: + ... return t[t.index("+")+1:] + ... else: + ... return t + >>> twords = floresta.tagged_words() + >>> twords = [(w.lower(), simplify_tag(t)) for (w,t) in twords] + >>> twords[:10] + [('um', 'art'), ('revivalismo', 'n'), ('refrescante', 'adj'), ('o', 'art'), ('7_e_meio', 'prop'), + ('\xe9', 'v-fin'), ('um', 'art'), ('ex-libris', 'n'), ('de', 'prp'), ('a', 'art')] + +Pretty printing the tagged words: + + >>> print(' '.join(word + '/' + tag for (word, tag) in twords[:10])) + um/art revivalismo/n refrescante/adj o/art 7_e_meio/prop é/v-fin um/art ex-libris/n de/prp a/art + +Count the word tokens and types, and determine the most common word: + + >>> words = floresta.words() + >>> len(words) + 211852 + >>> fd = nltk.FreqDist(words) + >>> len(fd) + 29421 + >>> fd.max() + 'de' + +List the 20 most frequent tags, in order of decreasing frequency: + + >>> tags = [simplify_tag(tag) for (word,tag) in floresta.tagged_words()] + >>> fd = nltk.FreqDist(tags) + >>> fd.keys()[:20] # doctest: +NORMALIZE_WHITESPACE + ['n', 'prp', 'art', 'v-fin', ',', 'prop', 'adj', 'adv', '.', + 'conj-c', 'v-inf', 'pron-det', 'v-pcp', 'num', 'pron-indp', + 'pron-pers', '\xab', '\xbb', 'conj-s', '}'] + +We can also access the corpus grouped by sentence: + + >>> floresta.sents() # doctest: +NORMALIZE_WHITESPACE + [['Um', 'revivalismo', 'refrescante'], + ['O', '7_e_Meio', '\xe9', 'um', 'ex-libris', 'de', 'a', 'noite', + 'algarvia', '.'], ...] + >>> floresta.tagged_sents() # doctest: +NORMALIZE_WHITESPACE + [[('Um', '>N+art'), ('revivalismo', 'H+n'), ('refrescante', 'N<+adj')], + [('O', '>N+art'), ('7_e_Meio', 'H+prop'), ('\xe9', 'P+v-fin'), + ('um', '>N+art'), ('ex-libris', 'H+n'), ('de', 'H+prp'), + ('a', '>N+art'), ('noite', 'H+n'), ('algarvia', 'N<+adj'), ('.', '.')], + ...] + >>> floresta.parsed_sents() # doctest: +NORMALIZE_WHITESPACE +ELLIPSIS + [Tree('UTT+np', [Tree('>N+art', ['Um']), Tree('H+n', ['revivalismo']), + Tree('N<+adj', ['refrescante'])]), + Tree('STA+fcl', + [Tree('SUBJ+np', [Tree('>N+art', ['O']), + Tree('H+prop', ['7_e_Meio'])]), + Tree('P+v-fin', ['\xe9']), + Tree('SC+np', + [Tree('>N+art', ['um']), + Tree('H+n', ['ex-libris']), + Tree('N<+pp', [Tree('H+prp', ['de']), + Tree('P<+np', [Tree('>N+art', ['a']), + Tree('H+n', ['noite']), + Tree('N<+adj', ['algarvia'])])])]), + Tree('.', ['.'])]), ...] + +To view a parse tree, use the ``draw()`` method, e.g.: + + >>> psents = floresta.parsed_sents() + >>> psents[5].draw() # doctest: +SKIP + +Character Encodings +------------------- + +Python understands the common character encoding used for Portuguese, ISO 8859-1 (ISO Latin 1). + + >>> import os, nltk.test + >>> testdir = os.path.split(nltk.test.__file__)[0] + >>> text = open(os.path.join(testdir, 'floresta.txt'), 'rb').read().decode('ISO 8859-1') + >>> text[:60] + 'O 7 e Meio \xe9 um ex-libris da noite algarvia.\n\xc9 uma das mais ' + >>> print(text[:60]) + O 7 e Meio é um ex-libris da noite algarvia. + É uma das mais + +For more information about character encodings and Python, please see section 3.3 of the book. + +---------------- +Processing Tasks +---------------- + + +Simple Concordancing +-------------------- + +Here's a function that takes a word and a specified amount of context (measured +in characters), and generates a concordance for that word. + + >>> def concordance(word, context=30): + ... for sent in floresta.sents(): + ... if word in sent: + ... pos = sent.index(word) + ... left = ' '.join(sent[:pos]) + ... right = ' '.join(sent[pos+1:]) + ... print('%*s %s %-*s' % + ... (context, left[-context:], word, context, right[:context])) + + >>> concordance("dar") # doctest: +SKIP + anduru , foi o suficiente para dar a volta a o resultado . + 1. O P?BLICO veio dar a a imprensa di?ria portuguesa + A fartura de pensamento pode dar maus resultados e n?s n?o quer + Come?a a dar resultados a pol?tica de a Uni + ial come?ar a incorporar- lo e dar forma a um ' site ' que tem se + r com Constantino para ele lhe dar tamb?m os pap?is assinados . + va a brincar , pois n?o lhe ia dar procura??o nenhuma enquanto n? + ?rica como o ant?doto capaz de dar sentido a o seu enorme poder . + . . . + >>> concordance("vender") # doctest: +SKIP + er recebido uma encomenda para vender 4000 blindados a o Iraque . + m?rico_Amorim caso conseguisse vender o lote de ac??es de o empres?r + mpre ter jovens simp?ticos a ? vender ? chega ! } + Disse que o governo vai vender ? desde autom?vel at? particip + ndiciou ontem duas pessoas por vender carro com ?gio . + A inten??o de Fleury ? vender as a??es para equilibrar as fi + +Part-of-Speech Tagging +---------------------- + +Let's begin by getting the tagged sentence data, and simplifying the tags +as described earlier. + + >>> from nltk.corpus import floresta + >>> tsents = floresta.tagged_sents() + >>> tsents = [[(w.lower(),simplify_tag(t)) for (w,t) in sent] for sent in tsents if sent] + >>> train = tsents[100:] + >>> test = tsents[:100] + +We already know that ``n`` is the most common tag, so we can set up a +default tagger that tags every word as a noun, and see how well it does: + + >>> tagger0 = nltk.DefaultTagger('n') + >>> nltk.tag.accuracy(tagger0, test) + 0.17697228144989338 + +Evidently, about one in every six words is a noun. Let's improve on this by +training a unigram tagger: + + >>> tagger1 = nltk.UnigramTagger(train, backoff=tagger0) + >>> nltk.tag.accuracy(tagger1, test) + 0.87029140014214645 + +Next a bigram tagger: + + >>> tagger2 = nltk.BigramTagger(train, backoff=tagger1) + >>> nltk.tag.accuracy(tagger2, test) + 0.89019189765458417 + + +Sentence Segmentation +--------------------- + +Punkt is a language-neutral sentence segmentation tool. We + + >>> sent_tokenizer=nltk.data.load('tokenizers/punkt/portuguese.pickle') + >>> raw_text = machado.raw('romance/marm05.txt') + >>> sentences = sent_tokenizer.tokenize(raw_text) + >>> for sent in sentences[1000:1005]: + ... print("<<", sent, ">>") + ... + << Em verdade, parecia ainda mais mulher do que era; + seria criança nos seus folgares de moça; mas assim quieta, impassível, tinha a + compostura da mulher casada. >> + << Talvez essa circunstância lhe diminuía um pouco da + graça virginal. >> + << Depressa nos familiarizamos; a mãe fazia-lhe grandes elogios, eu + escutava-os de boa sombra, e ela sorria com os olhos fúlgidos, como se lá dentro + do cérebro lhe estivesse a voar uma borboletinha de asas de ouro e olhos de + diamante... >> + << Digo lá dentro, porque cá fora o + que esvoaçou foi uma borboleta preta, que subitamente penetrou na varanda, e + começou a bater as asas em derredor de D. Eusébia. >> + << D. Eusébia deu um grito, + levantou-se, praguejou umas palavras soltas: - T'esconjuro!... >> + +The sentence tokenizer can be trained and evaluated on other text. +The source text (from the Floresta Portuguese Treebank) contains one sentence per line. +We read the text, split it into its lines, and then join these lines together using +spaces. Now the information about sentence breaks has been discarded. We split this +material into training and testing data: + + >>> import os, nltk.test + >>> testdir = os.path.split(nltk.test.__file__)[0] + >>> text = open(os.path.join(testdir, 'floresta.txt'), 'rb').read().decode('ISO-8859-1') + >>> lines = text.split('\n') + >>> train = ' '.join(lines[10:]) + >>> test = ' '.join(lines[:10]) + +Now we train the sentence segmenter (or sentence tokenizer) and use it on our test sentences: + + >>> stok = nltk.PunktSentenceTokenizer(train) + >>> print(stok.tokenize(test)) + ['O 7 e Meio \xe9 um ex-libris da noite algarvia.', + '\xc9 uma das mais antigas discotecas do Algarve, situada em Albufeira, + que continua a manter os tra\xe7os decorativos e as clientelas de sempre.', + '\xc9 um pouco a vers\xe3o de uma esp\xe9cie de \xaboutro lado\xbb da noite, + a meio caminho entre os devaneios de uma fauna perif\xe9rica, seja de Lisboa, + Londres, Dublin ou Faro e Portim\xe3o, e a postura circunspecta dos fi\xe9is da casa, + que dela esperam a m\xfasica \xabgeracionista\xbb dos 60 ou dos 70.', + 'N\xe3o deixa de ser, nos tempos que correm, um certo \xabvery typical\xbb algarvio, + cabe\xe7a de cartaz para os que querem fugir a algumas movimenta\xe7\xf5es nocturnas + j\xe1 a caminho da ritualiza\xe7\xe3o de massas, do g\xe9nero \xabvamos todos ao + Calypso e encontramo-nos na Locomia\xbb.', + 'E assim, aos 2,5 milh\xf5es que o Minist\xe9rio do Planeamento e Administra\xe7\xe3o + do Territ\xf3rio j\xe1 gasta no pagamento do pessoal afecto a estes organismos, + v\xeam juntar-se os montantes das obras propriamente ditas, que os munic\xedpios, + j\xe1 com projectos na m\xe3o, v\xeam reivindicar junto do Executivo, como salienta + aquele membro do Governo.', + 'E o dinheiro \xabn\xe3o falta s\xf3 \xe0s c\xe2maras\xbb, lembra o secret\xe1rio de Estado, + que considera que a solu\xe7\xe3o para as autarquias \xe9 \xabespecializarem-se em + fundos comunit\xe1rios\xbb.', + 'Mas como, se muitas n\xe3o disp\xf5em, nos seus quadros, dos t\xe9cnicos necess\xe1rios?', + '\xabEncomendem-nos a projectistas de fora\xbb porque, se as obras vierem a ser financiadas, + eles at\xe9 saem de gra\xe7a, j\xe1 que, nesse caso, \xabos fundos comunit\xe1rios pagam + os projectos, o mesmo n\xe3o acontecendo quando eles s\xe3o feitos pelos GAT\xbb, + dado serem organismos do Estado.', + 'Essa poder\xe1 vir a ser uma hip\xf3tese, at\xe9 porque, no terreno, a capacidade dos GAT + est\xe1 cada vez mais enfraquecida.', + 'Alguns at\xe9 j\xe1 desapareceram, como o de Castro Verde, e outros t\xeam vindo a perder quadros.'] + +NLTK's data collection includes a trained model for Portuguese sentence +segmentation, which can be loaded as follows. It is faster to load a trained model than +to retrain it. + + >>> stok = nltk.data.load('tokenizers/punkt/portuguese.pickle') + +Stemming +-------- + +NLTK includes the RSLP Portuguese stemmer. Here we use it to stem some Portuguese text: + + >>> stemmer = nltk.stem.RSLPStemmer() + >>> stemmer.stem("copiar") + 'copi' + >>> stemmer.stem("paisagem") + 'pais' + + +Stopwords +--------- + +NLTK includes Portuguese stopwords: + + >>> stopwords = nltk.corpus.stopwords.words('portuguese') + >>> stopwords[:10] + ['a', 'ao', 'aos', 'aquela', 'aquelas', 'aquele', 'aqueles', 'aquilo', 'as', 'at\xe9'] + +Now we can use these to filter text. Let's find the most frequent words (other than stopwords) +and print them in descending order of frequency: + + >>> fd = nltk.FreqDist(w.lower() for w in floresta.words() if w not in stopwords) + >>> for word in list(fd.keys())[:20]: + ... print(word, fd[word]) + , 13444 + . 7725 + « 2369 + » 2310 + é 1305 + o 1086 + } 1047 + { 1044 + a 897 + ; 633 + em 516 + ser 466 + sobre 349 + os 313 + anos 301 + ontem 292 + ainda 279 + segundo 256 + ter 249 + dois 231 + diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/portuguese_en_fixt.py b/venv.bak/lib/python3.7/site-packages/nltk/test/portuguese_en_fixt.py new file mode 100644 index 0000000..afbd59e --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/test/portuguese_en_fixt.py @@ -0,0 +1,18 @@ +# -*- coding: utf-8 -*- +from __future__ import absolute_import +from nltk.compat import PY3 + +from nltk.corpus import teardown_module + + +def setup_module(module): + from nose import SkipTest + + raise SkipTest( + "portuguese_en.doctest imports nltk.examples.pt which doesn't exist!" + ) + + if not PY3: + raise SkipTest( + "portuguese_en.doctest was skipped because non-ascii doctests are not supported under Python 2.x" + ) diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/probability.doctest b/venv.bak/lib/python3.7/site-packages/nltk/test/probability.doctest new file mode 100644 index 0000000..3cb582e --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/test/probability.doctest @@ -0,0 +1,304 @@ +.. Copyright (C) 2001-2019 NLTK Project +.. For license information, see LICENSE.TXT + +=========== +Probability +=========== + + >>> import nltk + >>> from nltk.probability import * + +FreqDist +-------- + + >>> text1 = ['no', 'good', 'fish', 'goes', 'anywhere', 'without', 'a', 'porpoise', '!'] + >>> text2 = ['no', 'good', 'porpoise', 'likes', 'to', 'fish', 'fish', 'anywhere', '.'] + + >>> fd1 = nltk.FreqDist(text1) + >>> fd1 == nltk.FreqDist(text1) + True + +Note that items are sorted in order of decreasing frequency; two items of the same frequency appear in indeterminate order. + + >>> import itertools + >>> both = nltk.FreqDist(text1 + text2) + >>> both_most_common = both.most_common() + >>> list(itertools.chain(*(sorted(ys) for k, ys in itertools.groupby(both_most_common, key=lambda t: t[1])))) + [('fish', 3), ('anywhere', 2), ('good', 2), ('no', 2), ('porpoise', 2), ('!', 1), ('.', 1), ('a', 1), ('goes', 1), ('likes', 1), ('to', 1), ('without', 1)] + + >>> both == fd1 + nltk.FreqDist(text2) + True + >>> fd1 == nltk.FreqDist(text1) # But fd1 is unchanged + True + + >>> fd2 = nltk.FreqDist(text2) + >>> fd1.update(fd2) + >>> fd1 == both + True + + >>> fd1 = nltk.FreqDist(text1) + >>> fd1.update(text2) + >>> fd1 == both + True + + >>> fd1 = nltk.FreqDist(text1) + >>> fd2 = nltk.FreqDist(fd1) + >>> fd2 == fd1 + True + +``nltk.FreqDist`` can be pickled: + + >>> import pickle + >>> fd1 = nltk.FreqDist(text1) + >>> pickled = pickle.dumps(fd1) + >>> fd1 == pickle.loads(pickled) + True + +Mathematical operations: + + >>> FreqDist('abbb') + FreqDist('bcc') + FreqDist({'b': 4, 'c': 2, 'a': 1}) + >>> FreqDist('abbbc') - FreqDist('bccd') + FreqDist({'b': 2, 'a': 1}) + >>> FreqDist('abbb') | FreqDist('bcc') + FreqDist({'b': 3, 'c': 2, 'a': 1}) + >>> FreqDist('abbb') & FreqDist('bcc') + FreqDist({'b': 1}) + +ConditionalFreqDist +------------------- + + >>> cfd1 = ConditionalFreqDist() + >>> cfd1[1] = FreqDist('abbbb') + >>> cfd1[2] = FreqDist('xxxxyy') + >>> cfd1 + + + >>> cfd2 = ConditionalFreqDist() + >>> cfd2[1] = FreqDist('bbccc') + >>> cfd2[2] = FreqDist('xxxyyyzz') + >>> cfd2[3] = FreqDist('m') + >>> cfd2 + + + >>> r = cfd1 + cfd2 + >>> [(i,r[i]) for i in r.conditions()] + [(1, FreqDist({'b': 6, 'c': 3, 'a': 1})), (2, FreqDist({'x': 7, 'y': 5, 'z': 2})), (3, FreqDist({'m': 1}))] + + >>> r = cfd1 - cfd2 + >>> [(i,r[i]) for i in r.conditions()] + [(1, FreqDist({'b': 2, 'a': 1})), (2, FreqDist({'x': 1}))] + + >>> r = cfd1 | cfd2 + >>> [(i,r[i]) for i in r.conditions()] + [(1, FreqDist({'b': 4, 'c': 3, 'a': 1})), (2, FreqDist({'x': 4, 'y': 3, 'z': 2})), (3, FreqDist({'m': 1}))] + + >>> r = cfd1 & cfd2 + >>> [(i,r[i]) for i in r.conditions()] + [(1, FreqDist({'b': 2})), (2, FreqDist({'x': 3, 'y': 2}))] + +Testing some HMM estimators +--------------------------- + +We extract a small part (500 sentences) of the Brown corpus + + >>> corpus = nltk.corpus.brown.tagged_sents(categories='adventure')[:500] + >>> print(len(corpus)) + 500 + +We create a HMM trainer - note that we need the tags and symbols +from the whole corpus, not just the training corpus + + >>> from nltk.util import unique_list + >>> tag_set = unique_list(tag for sent in corpus for (word,tag) in sent) + >>> print(len(tag_set)) + 92 + >>> symbols = unique_list(word for sent in corpus for (word,tag) in sent) + >>> print(len(symbols)) + 1464 + >>> trainer = nltk.tag.HiddenMarkovModelTrainer(tag_set, symbols) + +We divide the corpus into 90% training and 10% testing + + >>> train_corpus = [] + >>> test_corpus = [] + >>> for i in range(len(corpus)): + ... if i % 10: + ... train_corpus += [corpus[i]] + ... else: + ... test_corpus += [corpus[i]] + >>> print(len(train_corpus)) + 450 + >>> print(len(test_corpus)) + 50 + +And now we can test the estimators + + >>> def train_and_test(est): + ... hmm = trainer.train_supervised(train_corpus, estimator=est) + ... print('%.2f%%' % (100 * hmm.evaluate(test_corpus))) + +Maximum Likelihood Estimation +----------------------------- +- this resulted in an initialization error before r7209 + + >>> mle = lambda fd, bins: MLEProbDist(fd) + >>> train_and_test(mle) + 22.75% + +Laplace (= Lidstone with gamma==1) + + >>> train_and_test(LaplaceProbDist) + 66.04% + +Expected Likelihood Estimation (= Lidstone with gamma==0.5) + + >>> train_and_test(ELEProbDist) + 73.01% + +Lidstone Estimation, for gamma==0.1, 0.5 and 1 +(the later two should be exactly equal to MLE and ELE above) + + >>> def lidstone(gamma): + ... return lambda fd, bins: LidstoneProbDist(fd, gamma, bins) + >>> train_and_test(lidstone(0.1)) + 82.51% + >>> train_and_test(lidstone(0.5)) + 73.01% + >>> train_and_test(lidstone(1.0)) + 66.04% + +Witten Bell Estimation +---------------------- +- This resulted in ZeroDivisionError before r7209 + + >>> train_and_test(WittenBellProbDist) + 88.12% + +Good Turing Estimation + + >>> gt = lambda fd, bins: SimpleGoodTuringProbDist(fd, bins=1e5) + >>> train_and_test(gt) + 86.93% + +Kneser Ney Estimation +--------------------- +Since the Kneser-Ney distribution is best suited for trigrams, we must adjust +our testing accordingly. + + >>> corpus = [[((x[0],y[0],z[0]),(x[1],y[1],z[1])) + ... for x, y, z in nltk.trigrams(sent)] + ... for sent in corpus[:100]] + +We will then need to redefine the rest of the training/testing variables + >>> tag_set = unique_list(tag for sent in corpus for (word,tag) in sent) + >>> len(tag_set) + 906 + + >>> symbols = unique_list(word for sent in corpus for (word,tag) in sent) + >>> len(symbols) + 1341 + + >>> trainer = nltk.tag.HiddenMarkovModelTrainer(tag_set, symbols) + >>> train_corpus = [] + >>> test_corpus = [] + + >>> for i in range(len(corpus)): + ... if i % 10: + ... train_corpus += [corpus[i]] + ... else: + ... test_corpus += [corpus[i]] + + >>> len(train_corpus) + 90 + >>> len(test_corpus) + 10 + + >>> kn = lambda fd, bins: KneserNeyProbDist(fd) + >>> train_and_test(kn) + 0.86% + +Remains to be added: +- Tests for HeldoutProbDist, CrossValidationProbDist and MutableProbDist + +Squashed bugs +------------- + +Issue 511: override pop and popitem to invalidate the cache + + >>> fd = nltk.FreqDist('a') + >>> list(fd.keys()) + ['a'] + >>> fd.pop('a') + 1 + >>> list(fd.keys()) + [] + +Issue 533: access cumulative frequencies with no arguments + + >>> fd = nltk.FreqDist('aab') + >>> list(fd._cumulative_frequencies(['a'])) + [2.0] + >>> list(fd._cumulative_frequencies(['a', 'b'])) + [2.0, 3.0] + +Issue 579: override clear to reset some variables + + >>> fd = FreqDist('aab') + >>> fd.clear() + >>> fd.N() + 0 + +Issue 351: fix fileids method of CategorizedCorpusReader to inadvertently +add errant categories + + >>> from nltk.corpus import brown + >>> brown.fileids('blah') + Traceback (most recent call last): + ... + ValueError: Category blah not found + >>> brown.categories() + ['adventure', 'belles_lettres', 'editorial', 'fiction', 'government', 'hobbies', 'humor', 'learned', 'lore', 'mystery', 'news', 'religion', 'reviews', 'romance', 'science_fiction'] + +Issue 175: add the unseen bin to SimpleGoodTuringProbDist by default +otherwise any unseen events get a probability of zero, i.e., +they don't get smoothed + + >>> from nltk import SimpleGoodTuringProbDist, FreqDist + >>> fd = FreqDist({'a':1, 'b':1, 'c': 2, 'd': 3, 'e': 4, 'f': 4, 'g': 4, 'h': 5, 'i': 5, 'j': 6, 'k': 6, 'l': 6, 'm': 7, 'n': 7, 'o': 8, 'p': 9, 'q': 10}) + >>> p = SimpleGoodTuringProbDist(fd) + >>> p.prob('a') + 0.017649766667026317... + >>> p.prob('o') + 0.08433050215340411... + >>> p.prob('z') + 0.022727272727272728... + >>> p.prob('foobar') + 0.022727272727272728... + +``MLEProbDist``, ``ConditionalProbDist'', ``DictionaryConditionalProbDist`` and +``ConditionalFreqDist`` can be pickled: + + >>> import pickle + >>> pd = MLEProbDist(fd) + >>> sorted(pd.samples()) == sorted(pickle.loads(pickle.dumps(pd)).samples()) + True + >>> dpd = DictionaryConditionalProbDist({'x': pd}) + >>> unpickled = pickle.loads(pickle.dumps(dpd)) + >>> dpd['x'].prob('a') + 0.011363636... + >>> dpd['x'].prob('a') == unpickled['x'].prob('a') + True + >>> cfd = nltk.probability.ConditionalFreqDist() + >>> cfd['foo']['hello'] += 1 + >>> cfd['foo']['hello'] += 1 + >>> cfd['bar']['hello'] += 1 + >>> cfd2 = pickle.loads(pickle.dumps(cfd)) + >>> cfd2 == cfd + True + >>> cpd = ConditionalProbDist(cfd, SimpleGoodTuringProbDist) + >>> cpd2 = pickle.loads(pickle.dumps(cpd)) + >>> cpd['foo'].prob('hello') == cpd2['foo'].prob('hello') + True + + diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/probability_fixt.py b/venv.bak/lib/python3.7/site-packages/nltk/test/probability_fixt.py new file mode 100644 index 0000000..680dab6 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/test/probability_fixt.py @@ -0,0 +1,15 @@ +# -*- coding: utf-8 -*- +from __future__ import absolute_import + + +# probability.doctest uses HMM which requires numpy; +# skip probability.doctest if numpy is not available + + +def setup_module(module): + from nose import SkipTest + + try: + import numpy + except ImportError: + raise SkipTest("probability.doctest requires numpy") diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/propbank.doctest b/venv.bak/lib/python3.7/site-packages/nltk/test/propbank.doctest new file mode 100644 index 0000000..9bec607 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/test/propbank.doctest @@ -0,0 +1,176 @@ +.. Copyright (C) 2001-2019 NLTK Project +.. For license information, see LICENSE.TXT + +======== +PropBank +======== + +The PropBank Corpus provides predicate-argument annotation for the +entire Penn Treebank. Each verb in the treebank is annotated by a single +instance in PropBank, containing information about the location of +the verb, and the location and identity of its arguments: + + >>> from nltk.corpus import propbank + >>> pb_instances = propbank.instances() + >>> print(pb_instances) # doctest: +NORMALIZE_WHITESPACE + [, + , ...] + +Each propbank instance defines the following member variables: + + - Location information: `fileid`, `sentnum`, `wordnum` + - Annotator information: `tagger` + - Inflection information: `inflection` + - Roleset identifier: `roleset` + - Verb (aka predicate) location: `predicate` + - Argument locations and types: `arguments` + +The following examples show the types of these arguments: + + >>> inst = pb_instances[103] + >>> (inst.fileid, inst.sentnum, inst.wordnum) + ('wsj_0004.mrg', 8, 16) + >>> inst.tagger + 'gold' + >>> inst.inflection + + >>> infl = inst.inflection + >>> infl.form, infl.tense, infl.aspect, infl.person, infl.voice + ('v', 'p', '-', '-', 'a') + >>> inst.roleset + 'rise.01' + >>> inst.predicate + PropbankTreePointer(16, 0) + >>> inst.arguments # doctest: +NORMALIZE_WHITESPACE + ((PropbankTreePointer(0, 2), 'ARG1'), + (PropbankTreePointer(13, 1), 'ARGM-DIS'), + (PropbankTreePointer(17, 1), 'ARG4-to'), + (PropbankTreePointer(20, 1), 'ARG3-from')) + +The location of the predicate and of the arguments are encoded using +`PropbankTreePointer` objects, as well as `PropbankChainTreePointer` +objects and `PropbankSplitTreePointer` objects. A +`PropbankTreePointer` consists of a `wordnum` and a `height`: + + >>> print(inst.predicate.wordnum, inst.predicate.height) + 16 0 + +This identifies the tree constituent that is headed by the word that +is the `wordnum`\ 'th token in the sentence, and whose span is found +by going `height` nodes up in the tree. This type of pointer is only +useful if we also have the corresponding tree structure, since it +includes empty elements such as traces in the word number count. The +trees for 10% of the standard PropBank Corpus are contained in the +`treebank` corpus: + + >>> tree = inst.tree + + >>> from nltk.corpus import treebank + >>> assert tree == treebank.parsed_sents(inst.fileid)[inst.sentnum] + + >>> inst.predicate.select(tree) + Tree('VBD', ['rose']) + >>> for (argloc, argid) in inst.arguments: + ... print('%-10s %s' % (argid, argloc.select(tree).pformat(500)[:50])) + ARG1 (NP-SBJ (NP (DT The) (NN yield)) (PP (IN on) (NP ( + ARGM-DIS (PP (IN for) (NP (NN example))) + ARG4-to (PP-DIR (TO to) (NP (CD 8.04) (NN %))) + ARG3-from (PP-DIR (IN from) (NP (CD 7.90) (NN %))) + +Propbank tree pointers can be converted to standard tree locations, +which are usually easier to work with, using the `treepos()` method: + + >>> treepos = inst.predicate.treepos(tree) + >>> print (treepos, tree[treepos]) + (4, 0) (VBD rose) + +In some cases, argument locations will be encoded using +`PropbankChainTreePointer`\ s (for trace chains) or +`PropbankSplitTreePointer`\ s (for discontinuous constituents). Both +of these objects contain a single member variable, `pieces`, +containing a list of the constituent pieces. They also define the +method `select()`, which will return a tree containing all the +elements of the argument. (A new head node is created, labeled +"*CHAIN*" or "*SPLIT*", since the argument is not a single constituent +in the original tree). Sentence #6 contains an example of an argument +that is both discontinuous and contains a chain: + + >>> inst = pb_instances[6] + >>> inst.roleset + 'expose.01' + >>> argloc, argid = inst.arguments[2] + >>> argloc + + >>> argloc.pieces + [, PropbankTreePointer(27, 0)] + >>> argloc.pieces[0].pieces + ... # doctest: +NORMALIZE_WHITESPACE + [PropbankTreePointer(22, 1), PropbankTreePointer(24, 0), + PropbankTreePointer(25, 1)] + >>> print(argloc.select(inst.tree)) + (*CHAIN* + (*SPLIT* (NP (DT a) (NN group)) (IN of) (NP (NNS workers))) + (-NONE- *)) + +The PropBank Corpus also provides access to the frameset files, which +define the argument labels used by the annotations, on a per-verb +basis. Each frameset file contains one or more predicates, such as +'turn' or 'turn_on', each of which is divided into coarse-grained word +senses called rolesets. For each roleset, the frameset file provides +descriptions of the argument roles, along with examples. + + >>> expose_01 = propbank.roleset('expose.01') + >>> turn_01 = propbank.roleset('turn.01') + >>> print(turn_01) # doctest: +ELLIPSIS + + >>> for role in turn_01.findall("roles/role"): + ... print(role.attrib['n'], role.attrib['descr']) + 0 turner + 1 thing turning + m direction, location + + >>> from xml.etree import ElementTree + >>> print(ElementTree.tostring(turn_01.find('example')).decode('utf8').strip()) + + + John turned the key in the lock. + + John + turned + the key + in the lock + + +Note that the standard corpus distribution only contains 10% of the +treebank, so the parse trees are not available for instances starting +at 9353: + + >>> inst = pb_instances[9352] + >>> inst.fileid + 'wsj_0199.mrg' + >>> print(inst.tree) # doctest: +NORMALIZE_WHITESPACE +ELLIPSIS + (S (NP-SBJ (NNP Trinity)) (VP (VBD said) (SBAR (-NONE- 0) ...)) + >>> print(inst.predicate.select(inst.tree)) + (VB begin) + + >>> inst = pb_instances[9353] + >>> inst.fileid + 'wsj_0200.mrg' + >>> print(inst.tree) + None + >>> print(inst.predicate.select(inst.tree)) + Traceback (most recent call last): + . . . + ValueError: Parse tree not avaialable + +However, if you supply your own version of the treebank corpus (by +putting it before the nltk-provided version on `nltk.data.path`, or +by creating a `ptb` directory as described above and using the +`propbank_ptb` module), then you can access the trees for all +instances. + +A list of the verb lemmas contained in PropBank is returned by the +`propbank.verbs()` method: + + >>> propbank.verbs() + ['abandon', 'abate', 'abdicate', 'abet', 'abide', ...] diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/relextract.doctest b/venv.bak/lib/python3.7/site-packages/nltk/test/relextract.doctest new file mode 100644 index 0000000..085fa90 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/test/relextract.doctest @@ -0,0 +1,263 @@ +.. Copyright (C) 2001-2019 NLTK Project +.. For license information, see LICENSE.TXT + +====================== +Information Extraction +====================== + +Information Extraction standardly consists of three subtasks: + +#. Named Entity Recognition + +#. Relation Extraction + +#. Template Filling + +Named Entities +~~~~~~~~~~~~~~ + +The IEER corpus is marked up for a variety of Named Entities. A `Named +Entity`:dt: (more strictly, a Named Entity mention) is a name of an +entity belonging to a specified class. For example, the Named Entity +classes in IEER include PERSON, LOCATION, ORGANIZATION, DATE and so +on. Within NLTK, Named Entities are represented as subtrees within a +chunk structure: the class name is treated as node label, while the +entity mention itself appears as the leaves of the subtree. This is +illustrated below, where we have show an extract of the chunk +representation of document NYT_19980315.064: + + >>> from nltk.corpus import ieer + >>> docs = ieer.parsed_docs('NYT_19980315') + >>> tree = docs[1].text + >>> print(tree) # doctest: +ELLIPSIS + (DOCUMENT + ... + ``It's + a + chance + to + think + about + first-level + questions,'' + said + Ms. + (PERSON Cohn) + , + a + partner + in + the + (ORGANIZATION McGlashan & Sarrail) + firm + in + (LOCATION San Mateo) + , + (LOCATION Calif.) + ...) + +Thus, the Named Entity mentions in this example are *Cohn*, *McGlashan & +Sarrail*, *San Mateo* and *Calif.*. + +The CoNLL2002 Dutch and Spanish data is treated similarly, although in +this case, the strings are also POS tagged. + + >>> from nltk.corpus import conll2002 + >>> for doc in conll2002.chunked_sents('ned.train')[27]: + ... print(doc) + (u'Het', u'Art') + (ORG Hof/N van/Prep Cassatie/N) + (u'verbrak', u'V') + (u'het', u'Art') + (u'arrest', u'N') + (u'zodat', u'Conj') + (u'het', u'Pron') + (u'moest', u'V') + (u'worden', u'V') + (u'overgedaan', u'V') + (u'door', u'Prep') + (u'het', u'Art') + (u'hof', u'N') + (u'van', u'Prep') + (u'beroep', u'N') + (u'van', u'Prep') + (LOC Antwerpen/N) + (u'.', u'Punc') + +Relation Extraction +~~~~~~~~~~~~~~~~~~~ + +Relation Extraction standardly consists of identifying specified +relations between Named Entities. For example, assuming that we can +recognize ORGANIZATIONs and LOCATIONs in text, we might want to also +recognize pairs *(o, l)* of these kinds of entities such that *o* is +located in *l*. + +The `sem.relextract` module provides some tools to help carry out a +simple version of this task. The `tree2semi_rel()` function splits a chunk +document into a list of two-member lists, each of which consists of a +(possibly empty) string followed by a `Tree` (i.e., a Named Entity): + + >>> from nltk.sem import relextract + >>> pairs = relextract.tree2semi_rel(tree) + >>> for s, tree in pairs[18:22]: + ... print('("...%s", %s)' % (" ".join(s[-5:]),tree)) + ("...about first-level questions,'' said Ms.", (PERSON Cohn)) + ("..., a partner in the", (ORGANIZATION McGlashan & Sarrail)) + ("...firm in", (LOCATION San Mateo)) + ("...,", (LOCATION Calif.)) + +The function `semi_rel2reldict()` processes triples of these pairs, i.e., +pairs of the form ``((string1, Tree1), (string2, Tree2), (string3, +Tree3))`` and outputs a dictionary (a `reldict`) in which ``Tree1`` is +the subject of the relation, ``string2`` is the filler +and ``Tree3`` is the object of the relation. ``string1`` and ``string3`` are +stored as left and right context respectively. + + >>> reldicts = relextract.semi_rel2reldict(pairs) + >>> for k, v in sorted(reldicts[0].items()): + ... print(k, '=>', v) # doctest: +ELLIPSIS + filler => of messages to their own ``Cyberia'' ... + lcon => transactions.'' Each week, they post + objclass => ORGANIZATION + objsym => white_house + objtext => White House + rcon => for access to its planned + subjclass => CARDINAL + subjsym => hundreds + subjtext => hundreds + untagged_filler => of messages to their own ``Cyberia'' ... + +The next example shows some of the values for two `reldict`\ s +corresponding to the ``'NYT_19980315'`` text extract shown earlier. + + >>> for r in reldicts[18:20]: + ... print('=' * 20) + ... print(r['subjtext']) + ... print(r['filler']) + ... print(r['objtext']) + ==================== + Cohn + , a partner in the + McGlashan & Sarrail + ==================== + McGlashan & Sarrail + firm in + San Mateo + +The function `relextract()` allows us to filter the `reldict`\ s +according to the classes of the subject and object named entities. In +addition, we can specify that the filler text has to match a given +regular expression, as illustrated in the next example. Here, we are +looking for pairs of entities in the IN relation, where IN has +signature . + + >>> import re + >>> IN = re.compile(r'.*\bin\b(?!\b.+ing\b)') + >>> for fileid in ieer.fileids(): + ... for doc in ieer.parsed_docs(fileid): + ... for rel in relextract.extract_rels('ORG', 'LOC', doc, corpus='ieer', pattern = IN): + ... print(relextract.rtuple(rel)) # doctest: +ELLIPSIS + [ORG: 'Christian Democrats'] ', the leading political forces in' [LOC: 'Italy'] + [ORG: 'AP'] ') _ Lebanese guerrillas attacked Israeli forces in southern' [LOC: 'Lebanon'] + [ORG: 'Security Council'] 'adopted Resolution 425. Huge yellow banners hung across intersections in' [LOC: 'Beirut'] + [ORG: 'U.N.'] 'failures in' [LOC: 'Africa'] + [ORG: 'U.N.'] 'peacekeeping operation in' [LOC: 'Somalia'] + [ORG: 'U.N.'] 'partners on a more effective role in' [LOC: 'Africa'] + [ORG: 'AP'] ') _ A bomb exploded in a mosque in central' [LOC: 'San`a'] + [ORG: 'Krasnoye Sormovo'] 'shipyard in the Soviet city of' [LOC: 'Gorky'] + [ORG: 'Kelab Golf Darul Ridzuan'] 'in' [LOC: 'Perak'] + [ORG: 'U.N.'] 'peacekeeping operation in' [LOC: 'Somalia'] + [ORG: 'WHYY'] 'in' [LOC: 'Philadelphia'] + [ORG: 'McGlashan & Sarrail'] 'firm in' [LOC: 'San Mateo'] + [ORG: 'Freedom Forum'] 'in' [LOC: 'Arlington'] + [ORG: 'Brookings Institution'] ', the research group in' [LOC: 'Washington'] + [ORG: 'Idealab'] ', a self-described business incubator based in' [LOC: 'Los Angeles'] + [ORG: 'Open Text'] ', based in' [LOC: 'Waterloo'] + ... + +The next example illustrates a case where the patter is a disjunction +of roles that a PERSON can occupy in an ORGANIZATION. + + >>> roles = """ + ... (.*( + ... analyst| + ... chair(wo)?man| + ... commissioner| + ... counsel| + ... director| + ... economist| + ... editor| + ... executive| + ... foreman| + ... governor| + ... head| + ... lawyer| + ... leader| + ... librarian).*)| + ... manager| + ... partner| + ... president| + ... producer| + ... professor| + ... researcher| + ... spokes(wo)?man| + ... writer| + ... ,\sof\sthe?\s* # "X, of (the) Y" + ... """ + >>> ROLES = re.compile(roles, re.VERBOSE) + >>> for fileid in ieer.fileids(): + ... for doc in ieer.parsed_docs(fileid): + ... for rel in relextract.extract_rels('PER', 'ORG', doc, corpus='ieer', pattern=ROLES): + ... print(relextract.rtuple(rel)) # doctest: +ELLIPSIS + [PER: 'Kivutha Kibwana'] ', of the' [ORG: 'National Convention Assembly'] + [PER: 'Boban Boskovic'] ', chief executive of the' [ORG: 'Plastika'] + [PER: 'Annan'] ', the first sub-Saharan African to head the' [ORG: 'United Nations'] + [PER: 'Kiriyenko'] 'became a foreman at the' [ORG: 'Krasnoye Sormovo'] + [PER: 'Annan'] ', the first sub-Saharan African to head the' [ORG: 'United Nations'] + [PER: 'Mike Godwin'] ', chief counsel for the' [ORG: 'Electronic Frontier Foundation'] + ... + +In the case of the CoNLL2002 data, we can include POS tags in the +query pattern. This example also illustrates how the output can be +presented as something that looks more like a clause in a logical language. + + >>> de = """ + ... .* + ... ( + ... de/SP| + ... del/SP + ... ) + ... """ + >>> DE = re.compile(de, re.VERBOSE) + >>> rels = [rel for doc in conll2002.chunked_sents('esp.train') + ... for rel in relextract.extract_rels('ORG', 'LOC', doc, corpus='conll2002', pattern = DE)] + >>> for r in rels[:10]: + ... print(relextract.clause(r, relsym='DE')) # doctest: +NORMALIZE_WHITESPACE + DE(u'tribunal_supremo', u'victoria') + DE(u'museo_de_arte', u'alcorc\xf3n') + DE(u'museo_de_bellas_artes', u'a_coru\xf1a') + DE(u'siria', u'l\xedbano') + DE(u'uni\xf3n_europea', u'pek\xedn') + DE(u'ej\xe9rcito', u'rogberi') + DE(u'juzgado_de_instrucci\xf3n_n\xfamero_1', u'san_sebasti\xe1n') + DE(u'psoe', u'villanueva_de_la_serena') + DE(u'ej\xe9rcito', u'l\xedbano') + DE(u'juzgado_de_lo_penal_n\xfamero_2', u'ceuta') + >>> vnv = """ + ... ( + ... is/V| + ... was/V| + ... werd/V| + ... wordt/V + ... ) + ... .* + ... van/Prep + ... """ + >>> VAN = re.compile(vnv, re.VERBOSE) + >>> for doc in conll2002.chunked_sents('ned.train'): + ... for r in relextract.extract_rels('PER', 'ORG', doc, corpus='conll2002', pattern=VAN): + ... print(relextract.clause(r, relsym="VAN")) + VAN(u"cornet_d'elzius", u'buitenlandse_handel') + VAN(u'johan_rottiers', u'kardinaal_van_roey_instituut') + VAN(u'annie_lennox', u'eurythmics') diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/resolution.doctest b/venv.bak/lib/python3.7/site-packages/nltk/test/resolution.doctest new file mode 100644 index 0000000..318efcd --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/test/resolution.doctest @@ -0,0 +1,221 @@ +.. Copyright (C) 2001-2019 NLTK Project +.. For license information, see LICENSE.TXT + +========================= +Resolution Theorem Prover +========================= + + >>> from nltk.inference.resolution import * + >>> from nltk.sem import logic + >>> from nltk.sem.logic import * + >>> logic._counter._value = 0 + >>> read_expr = logic.Expression.fromstring + + >>> P = read_expr('P') + >>> Q = read_expr('Q') + >>> R = read_expr('R') + >>> A = read_expr('A') + >>> B = read_expr('B') + >>> x = read_expr('x') + >>> y = read_expr('y') + >>> z = read_expr('z') + +------------------------------- +Test most_general_unification() +------------------------------- + >>> print(most_general_unification(x, x)) + {} + >>> print(most_general_unification(A, A)) + {} + >>> print(most_general_unification(A, x)) + {x: A} + >>> print(most_general_unification(x, A)) + {x: A} + >>> print(most_general_unification(x, y)) + {x: y} + >>> print(most_general_unification(P(x), P(A))) + {x: A} + >>> print(most_general_unification(P(x,B), P(A,y))) + {x: A, y: B} + >>> print(most_general_unification(P(x,B), P(B,x))) + {x: B} + >>> print(most_general_unification(P(x,y), P(A,x))) + {x: A, y: x} + >>> print(most_general_unification(P(Q(x)), P(y))) + {y: Q(x)} + +------------ +Test unify() +------------ + >>> print(Clause([]).unify(Clause([]))) + [] + >>> print(Clause([P(x)]).unify(Clause([-P(A)]))) + [{}] + >>> print(Clause([P(A), Q(x)]).unify(Clause([-P(x), R(x)]))) + [{R(A), Q(A)}] + >>> print(Clause([P(A), Q(x), R(x,y)]).unify(Clause([-P(x), Q(y)]))) + [{Q(y), Q(A), R(A,y)}] + >>> print(Clause([P(A), -Q(y)]).unify(Clause([-P(x), Q(B)]))) + [{}] + >>> print(Clause([P(x), Q(x)]).unify(Clause([-P(A), -Q(B)]))) + [{-Q(B), Q(A)}, {-P(A), P(B)}] + >>> print(Clause([P(x,x), Q(x), R(x)]).unify(Clause([-P(A,z), -Q(B)]))) + [{-Q(B), Q(A), R(A)}, {-P(A,z), R(B), P(B,B)}] + + >>> a = clausify(read_expr('P(A)')) + >>> b = clausify(read_expr('A=B')) + >>> print(a[0].unify(b[0])) + [{P(B)}] + +------------------------- +Test is_tautology() +------------------------- + >>> print(Clause([P(A), -P(A)]).is_tautology()) + True + >>> print(Clause([-P(A), P(A)]).is_tautology()) + True + >>> print(Clause([P(x), -P(A)]).is_tautology()) + False + >>> print(Clause([Q(B), -P(A), P(A)]).is_tautology()) + True + >>> print(Clause([-Q(A), P(R(A)), -P(R(A)), Q(x), -R(y)]).is_tautology()) + True + >>> print(Clause([P(x), -Q(A)]).is_tautology()) + False + +------------------------- +Test subsumes() +------------------------- + >>> print(Clause([P(A), Q(B)]).subsumes(Clause([P(A), Q(B)]))) + True + >>> print(Clause([-P(A)]).subsumes(Clause([P(A)]))) + False + >>> print(Clause([P(A), Q(B)]).subsumes(Clause([Q(B), P(A)]))) + True + >>> print(Clause([P(A), Q(B)]).subsumes(Clause([Q(B), R(A), P(A)]))) + True + >>> print(Clause([P(A), R(A), Q(B)]).subsumes(Clause([Q(B), P(A)]))) + False + >>> print(Clause([P(x)]).subsumes(Clause([P(A)]))) + True + >>> print(Clause([P(A)]).subsumes(Clause([P(x)]))) + True + +------------ +Test prove() +------------ + >>> print(ResolutionProverCommand(read_expr('man(x)')).prove()) + False + >>> print(ResolutionProverCommand(read_expr('(man(x) -> man(x))')).prove()) + True + >>> print(ResolutionProverCommand(read_expr('(man(x) -> --man(x))')).prove()) + True + >>> print(ResolutionProverCommand(read_expr('-(man(x) & -man(x))')).prove()) + True + >>> print(ResolutionProverCommand(read_expr('(man(x) | -man(x))')).prove()) + True + >>> print(ResolutionProverCommand(read_expr('(man(x) -> man(x))')).prove()) + True + >>> print(ResolutionProverCommand(read_expr('-(man(x) & -man(x))')).prove()) + True + >>> print(ResolutionProverCommand(read_expr('(man(x) | -man(x))')).prove()) + True + >>> print(ResolutionProverCommand(read_expr('(man(x) -> man(x))')).prove()) + True + >>> print(ResolutionProverCommand(read_expr('(man(x) <-> man(x))')).prove()) + True + >>> print(ResolutionProverCommand(read_expr('-(man(x) <-> -man(x))')).prove()) + True + >>> print(ResolutionProverCommand(read_expr('all x.man(x)')).prove()) + False + >>> print(ResolutionProverCommand(read_expr('-all x.some y.F(x,y) & some x.all y.(-F(x,y))')).prove()) + False + >>> print(ResolutionProverCommand(read_expr('some x.all y.sees(x,y)')).prove()) + False + + >>> p1 = read_expr('all x.(man(x) -> mortal(x))') + >>> p2 = read_expr('man(Socrates)') + >>> c = read_expr('mortal(Socrates)') + >>> ResolutionProverCommand(c, [p1,p2]).prove() + True + + >>> p1 = read_expr('all x.(man(x) -> walks(x))') + >>> p2 = read_expr('man(John)') + >>> c = read_expr('some y.walks(y)') + >>> ResolutionProverCommand(c, [p1,p2]).prove() + True + + >>> p = read_expr('some e1.some e2.(believe(e1,john,e2) & walk(e2,mary))') + >>> c = read_expr('some e0.walk(e0,mary)') + >>> ResolutionProverCommand(c, [p]).prove() + True + +------------ +Test proof() +------------ + >>> p1 = read_expr('all x.(man(x) -> mortal(x))') + >>> p2 = read_expr('man(Socrates)') + >>> c = read_expr('mortal(Socrates)') + >>> logic._counter._value = 0 + >>> tp = ResolutionProverCommand(c, [p1,p2]) + >>> tp.prove() + True + >>> print(tp.proof()) + [1] {-mortal(Socrates)} A + [2] {-man(z2), mortal(z2)} A + [3] {man(Socrates)} A + [4] {-man(Socrates)} (1, 2) + [5] {mortal(Socrates)} (2, 3) + [6] {} (1, 5) + + +------------------ +Question Answering +------------------ +One answer + >>> p1 = read_expr('father_of(art,john)') + >>> p2 = read_expr('father_of(bob,kim)') + >>> p3 = read_expr('all x.all y.(father_of(x,y) -> parent_of(x,y))') + >>> c = read_expr('all x.(parent_of(x,john) -> ANSWER(x))') + >>> logic._counter._value = 0 + >>> tp = ResolutionProverCommand(None, [p1,p2,p3,c]) + >>> sorted(tp.find_answers()) + [] + >>> print(tp.proof()) # doctest: +SKIP + [1] {father_of(art,john)} A + [2] {father_of(bob,kim)} A + [3] {-father_of(z3,z4), parent_of(z3,z4)} A + [4] {-parent_of(z6,john), ANSWER(z6)} A + [5] {parent_of(art,john)} (1, 3) + [6] {parent_of(bob,kim)} (2, 3) + [7] {ANSWER(z6), -father_of(z6,john)} (3, 4) + [8] {ANSWER(art)} (1, 7) + [9] {ANSWER(art)} (4, 5) + + +Multiple answers + >>> p1 = read_expr('father_of(art,john)') + >>> p2 = read_expr('mother_of(ann,john)') + >>> p3 = read_expr('all x.all y.(father_of(x,y) -> parent_of(x,y))') + >>> p4 = read_expr('all x.all y.(mother_of(x,y) -> parent_of(x,y))') + >>> c = read_expr('all x.(parent_of(x,john) -> ANSWER(x))') + >>> logic._counter._value = 0 + >>> tp = ResolutionProverCommand(None, [p1,p2,p3,p4,c]) + >>> sorted(tp.find_answers()) + [, ] + >>> print(tp.proof()) # doctest: +SKIP + [ 1] {father_of(art,john)} A + [ 2] {mother_of(ann,john)} A + [ 3] {-father_of(z3,z4), parent_of(z3,z4)} A + [ 4] {-mother_of(z7,z8), parent_of(z7,z8)} A + [ 5] {-parent_of(z10,john), ANSWER(z10)} A + [ 6] {parent_of(art,john)} (1, 3) + [ 7] {parent_of(ann,john)} (2, 4) + [ 8] {ANSWER(z10), -father_of(z10,john)} (3, 5) + [ 9] {ANSWER(art)} (1, 8) + [10] {ANSWER(z10), -mother_of(z10,john)} (4, 5) + [11] {ANSWER(ann)} (2, 10) + [12] {ANSWER(art)} (5, 6) + [13] {ANSWER(ann)} (5, 7) + + diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/runtests.py b/venv.bak/lib/python3.7/site-packages/nltk/test/runtests.py new file mode 100644 index 0000000..8f40cc6 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/test/runtests.py @@ -0,0 +1,81 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +from __future__ import absolute_import, print_function +import sys +import os +import nose +from nose.plugins.manager import PluginManager +from nose.plugins.doctests import Doctest +from nose.plugins import builtin + +NLTK_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..')) +sys.path.insert(0, NLTK_ROOT) + +NLTK_TEST_DIR = os.path.join(NLTK_ROOT, 'nltk') + +if __name__ == '__main__': + # there shouldn't be import from NLTK for coverage to work properly + from doctest_nose_plugin import DoctestFix + + try: + # Import RedNose plugin for colored test output + from rednose import RedNose + + rednose_available = True + except ImportError: + rednose_available = False + + class NltkPluginManager(PluginManager): + """ + Nose plugin manager that replaces standard doctest plugin + with a patched version and adds RedNose plugin for colored test output. + """ + + def loadPlugins(self): + for plug in builtin.plugins: + if plug != Doctest: + self.addPlugin(plug()) + self.addPlugin(DoctestFix()) + if rednose_available: + self.addPlugin(RedNose()) + + super(NltkPluginManager, self).loadPlugins() + + manager = NltkPluginManager() + manager.loadPlugins() + + # allow passing extra options and running individual tests + # Examples: + # + # python runtests.py semantics.doctest + # python runtests.py --with-id -v + # python runtests.py --with-id -v nltk.featstruct + + args = sys.argv[1:] + if not args: + args = [NLTK_TEST_DIR] + + if all(arg.startswith('-') for arg in args): + # only extra options were passed + args += [NLTK_TEST_DIR] + + # Activate RedNose and hide skipped test messages from output + if rednose_available: + args += ['--rednose', '--hide-skips'] + + arguments = [ + '--exclude=', # why is this needed? + # '--with-xunit', + # '--xunit-file=$WORKSPACE/nosetests.xml', + # '--nocapture', + '--with-doctest', + # '--doctest-tests', + # '--debug=nose,nose.importer,nose.inspector,nose.plugins,nose.result,nose.selector', + '--doctest-extension=.doctest', + '--doctest-fixtures=_fixt', + '--doctest-options=+ELLIPSIS,+NORMALIZE_WHITESPACE,+IGNORE_EXCEPTION_DETAIL,+ALLOW_UNICODE,' + 'doctestencoding=utf-8', + # '--verbosity=3', + ] + args + + nose.main(argv=arguments, plugins=manager.plugins) diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/segmentation_fixt.py b/venv.bak/lib/python3.7/site-packages/nltk/test/segmentation_fixt.py new file mode 100644 index 0000000..bb8a7cf --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/test/segmentation_fixt.py @@ -0,0 +1,12 @@ +# -*- coding: utf-8 -*- +from __future__ import absolute_import + + +# skip segmentation.doctest if numpy is not available +def setup_module(module): + from nose import SkipTest + + try: + import numpy + except ImportError: + raise SkipTest("segmentation.doctest requires numpy") diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/semantics.doctest b/venv.bak/lib/python3.7/site-packages/nltk/test/semantics.doctest new file mode 100644 index 0000000..f1a1f3c --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/test/semantics.doctest @@ -0,0 +1,665 @@ +.. Copyright (C) 2001-2019 NLTK Project +.. For license information, see LICENSE.TXT + +========= +Semantics +========= + + >>> import nltk + >>> from nltk.sem import Valuation, Model + >>> v = [('adam', 'b1'), ('betty', 'g1'), ('fido', 'd1'), + ... ('girl', set(['g1', 'g2'])), ('boy', set(['b1', 'b2'])), + ... ('dog', set(['d1'])), + ... ('love', set([('b1', 'g1'), ('b2', 'g2'), ('g1', 'b1'), ('g2', 'b1')]))] + >>> val = Valuation(v) + >>> dom = val.domain + >>> m = Model(dom, val) + +Evaluation +---------- + +The top-level method of a ``Model`` instance is ``evaluate()``, which +assigns a semantic value to expressions of the ``logic`` module, under +an assignment ``g``: + + >>> dom = val.domain + >>> g = nltk.sem.Assignment(dom) + >>> m.evaluate('all x.(boy(x) -> - girl(x))', g) + True + + +``evaluate()`` calls a recursive function ``satisfy()``, which in turn +calls a function ``i()`` to interpret non-logical constants and +individual variables. ``i()`` delegates the interpretation of these to +the the model's ``Valuation`` and the variable assignment ``g`` +respectively. Any atomic expression which cannot be assigned a value +by ``i`` raises an ``Undefined`` exception; this is caught by +``evaluate``, which returns the string ``'Undefined'``. + + >>> m.evaluate('walk(adam)', g, trace=2) + + 'walk(adam)' is undefined under M, g + 'Undefined' + +Batch Processing +---------------- + +The utility functions ``interpret_sents()`` and ``evaluate_sents()`` are intended to +help with processing multiple sentences. Here's an example of the first of these: + + >>> sents = ['Mary walks'] + >>> results = nltk.sem.util.interpret_sents(sents, 'grammars/sample_grammars/sem2.fcfg') + >>> for result in results: + ... for (synrep, semrep) in result: + ... print(synrep) + (S[SEM=] + (NP[-LOC, NUM='sg', SEM=<\P.P(mary)>] + (PropN[-LOC, NUM='sg', SEM=<\P.P(mary)>] Mary)) + (VP[NUM='sg', SEM=<\x.walk(x)>] + (IV[NUM='sg', SEM=<\x.walk(x)>, TNS='pres'] walks))) + +In order to provide backwards compatibility with 'legacy' grammars where the semantics value +is specified with a lowercase +``sem`` feature, the relevant feature name can be passed to the function using the +``semkey`` parameter, as shown here: + + >>> sents = ['raining'] + >>> g = nltk.grammar.FeatureGrammar.fromstring(""" + ... % start S + ... S[sem=] -> 'raining' + ... """) + >>> results = nltk.sem.util.interpret_sents(sents, g, semkey='sem') + >>> for result in results: + ... for (synrep, semrep) in result: + ... print(semrep) + raining + +The function ``evaluate_sents()`` works in a similar manner, but also needs to be +passed a ``Model`` against which the semantic representations are evaluated. + +Unit Tests +========== + + +Unit tests for relations and valuations +--------------------------------------- + + >>> from nltk.sem import * + +Relations are sets of tuples, all of the same length. + + >>> s1 = set([('d1', 'd2'), ('d1', 'd1'), ('d2', 'd1')]) + >>> is_rel(s1) + True + >>> s2 = set([('d1', 'd2'), ('d1', 'd2'), ('d1',)]) + >>> is_rel(s2) + Traceback (most recent call last): + . . . + ValueError: Set set([('d1', 'd2'), ('d1',)]) contains sequences of different lengths + >>> s3 = set(['d1', 'd2']) + >>> is_rel(s3) + Traceback (most recent call last): + . . . + ValueError: Set set(['d2', 'd1']) contains sequences of different lengths + >>> s4 = set2rel(s3) + >>> is_rel(s4) + True + >>> is_rel(set()) + True + >>> null_binary_rel = set([(None, None)]) + >>> is_rel(null_binary_rel) + True + +Sets of entities are converted into sets of singleton tuples +(containing strings). + + >>> sorted(set2rel(s3)) + [('d1',), ('d2',)] + >>> sorted(set2rel(set([1,3,5,]))) + ['1', '3', '5'] + >>> set2rel(set()) == set() + True + >>> set2rel(set2rel(s3)) == set2rel(s3) + True + +Predication is evaluated by set membership. + + >>> ('d1', 'd2') in s1 + True + >>> ('d2', 'd2') in s1 + False + >>> ('d1',) in s1 + False + >>> 'd2' in s1 + False + >>> ('d1',) in s4 + True + >>> ('d1',) in set() + False + >>> 'd1' in null_binary_rel + False + + + >>> val = Valuation([('Fido', 'd1'), ('dog', set(['d1', 'd2'])), ('walk', set())]) + >>> sorted(val['dog']) + [('d1',), ('d2',)] + >>> val.domain == set(['d1', 'd2']) + True + >>> print(val.symbols) + ['Fido', 'dog', 'walk'] + + +Parse a valuation from a string. + + >>> v = """ + ... john => b1 + ... mary => g1 + ... suzie => g2 + ... fido => d1 + ... tess => d2 + ... noosa => n + ... girl => {g1, g2} + ... boy => {b1, b2} + ... dog => {d1, d2} + ... bark => {d1, d2} + ... walk => {b1, g2, d1} + ... chase => {(b1, g1), (b2, g1), (g1, d1), (g2, d2)} + ... see => {(b1, g1), (b2, d2), (g1, b1),(d2, b1), (g2, n)} + ... in => {(b1, n), (b2, n), (d2, n)} + ... with => {(b1, g1), (g1, b1), (d1, b1), (b1, d1)} + ... """ + >>> val = Valuation.fromstring(v) + + >>> print(val) # doctest: +SKIP + {'bark': set([('d1',), ('d2',)]), + 'boy': set([('b1',), ('b2',)]), + 'chase': set([('b1', 'g1'), ('g2', 'd2'), ('g1', 'd1'), ('b2', 'g1')]), + 'dog': set([('d1',), ('d2',)]), + 'fido': 'd1', + 'girl': set([('g2',), ('g1',)]), + 'in': set([('d2', 'n'), ('b1', 'n'), ('b2', 'n')]), + 'john': 'b1', + 'mary': 'g1', + 'noosa': 'n', + 'see': set([('b1', 'g1'), ('b2', 'd2'), ('d2', 'b1'), ('g2', 'n'), ('g1', 'b1')]), + 'suzie': 'g2', + 'tess': 'd2', + 'walk': set([('d1',), ('b1',), ('g2',)]), + 'with': set([('b1', 'g1'), ('d1', 'b1'), ('b1', 'd1'), ('g1', 'b1')])} + + +Unit tests for function argument application in a Model +------------------------------------------------------- + + >>> v = [('adam', 'b1'), ('betty', 'g1'), ('fido', 'd1'),\ + ... ('girl', set(['g1', 'g2'])), ('boy', set(['b1', 'b2'])), ('dog', set(['d1'])), + ... ('love', set([('b1', 'g1'), ('b2', 'g2'), ('g1', 'b1'), ('g2', 'b1')])), + ... ('kiss', null_binary_rel)] + >>> val = Valuation(v) + >>> dom = val.domain + >>> m = Model(dom, val) + >>> g = Assignment(dom) + >>> sorted(val['boy']) + [('b1',), ('b2',)] + >>> ('b1',) in val['boy'] + True + >>> ('g1',) in val['boy'] + False + >>> ('foo',) in val['boy'] + False + >>> ('b1', 'g1') in val['love'] + True + >>> ('b1', 'b1') in val['kiss'] + False + >>> sorted(val.domain) + ['b1', 'b2', 'd1', 'g1', 'g2'] + + +Model Tests +=========== + +Extension of Lambda expressions + + >>> v0 = [('adam', 'b1'), ('betty', 'g1'), ('fido', 'd1'),\ + ... ('girl', set(['g1', 'g2'])), ('boy', set(['b1', 'b2'])), + ... ('dog', set(['d1'])), + ... ('love', set([('b1', 'g1'), ('b2', 'g2'), ('g1', 'b1'), ('g2', 'b1')]))] + + >>> val0 = Valuation(v0) + >>> dom0 = val0.domain + >>> m0 = Model(dom0, val0) + >>> g0 = Assignment(dom0) + + >>> print(m0.evaluate(r'\x. \y. love(x, y)', g0) == {'g2': {'g2': False, 'b2': False, 'b1': True, 'g1': False, 'd1': False}, 'b2': {'g2': True, 'b2': False, 'b1': False, 'g1': False, 'd1': False}, 'b1': {'g2': False, 'b2': False, 'b1': False, 'g1': True, 'd1': False}, 'g1': {'g2': False, 'b2': False, 'b1': True, 'g1': False, 'd1': False}, 'd1': {'g2': False, 'b2': False, 'b1': False, 'g1': False, 'd1': False}}) + True + >>> print(m0.evaluate(r'\x. dog(x) (adam)', g0)) + False + >>> print(m0.evaluate(r'\x. (dog(x) | boy(x)) (adam)', g0)) + True + >>> print(m0.evaluate(r'\x. \y. love(x, y)(fido)', g0) == {'g2': False, 'b2': False, 'b1': False, 'g1': False, 'd1': False}) + True + >>> print(m0.evaluate(r'\x. \y. love(x, y)(adam)', g0) == {'g2': False, 'b2': False, 'b1': False, 'g1': True, 'd1': False}) + True + >>> print(m0.evaluate(r'\x. \y. love(x, y)(betty)', g0) == {'g2': False, 'b2': False, 'b1': True, 'g1': False, 'd1': False}) + True + >>> print(m0.evaluate(r'\x. \y. love(x, y)(betty)(adam)', g0)) + True + >>> print(m0.evaluate(r'\x. \y. love(x, y)(betty, adam)', g0)) + True + >>> print(m0.evaluate(r'\y. \x. love(x, y)(fido)(adam)', g0)) + False + >>> print(m0.evaluate(r'\y. \x. love(x, y)(betty, adam)', g0)) + True + >>> print(m0.evaluate(r'\x. exists y. love(x, y)', g0) == {'g2': True, 'b2': True, 'b1': True, 'g1': True, 'd1': False}) + True + >>> print(m0.evaluate(r'\z. adam', g0) == {'g2': 'b1', 'b2': 'b1', 'b1': 'b1', 'g1': 'b1', 'd1': 'b1'}) + True + >>> print(m0.evaluate(r'\z. love(x, y)', g0) == {'g2': False, 'b2': False, 'b1': False, 'g1': False, 'd1': False}) + True + + +Propositional Model Test +------------------------ + + >>> tests = [ + ... ('P & Q', True), + ... ('P & R', False), + ... ('- P', False), + ... ('- R', True), + ... ('- - P', True), + ... ('- (P & R)', True), + ... ('P | R', True), + ... ('R | P', True), + ... ('R | R', False), + ... ('- P | R', False), + ... ('P | - P', True), + ... ('P -> Q', True), + ... ('P -> R', False), + ... ('R -> P', True), + ... ('P <-> P', True), + ... ('R <-> R', True), + ... ('P <-> R', False), + ... ] + >>> val1 = Valuation([('P', True), ('Q', True), ('R', False)]) + >>> dom = set([]) + >>> m = Model(dom, val1) + >>> g = Assignment(dom) + >>> for (sent, testvalue) in tests: + ... semvalue = m.evaluate(sent, g) + ... if semvalue == testvalue: + ... print('*', end=' ') + * * * * * * * * * * * * * * * * * + + +Test of i Function +------------------ + + >>> from nltk.sem import Expression + >>> v = [('adam', 'b1'), ('betty', 'g1'), ('fido', 'd1'), + ... ('girl', set(['g1', 'g2'])), ('boy', set(['b1', 'b2'])), ('dog', set(['d1'])), + ... ('love', set([('b1', 'g1'), ('b2', 'g2'), ('g1', 'b1'), ('g2', 'b1')]))] + >>> val = Valuation(v) + >>> dom = val.domain + >>> m = Model(dom, val) + >>> g = Assignment(dom, [('x', 'b1'), ('y', 'g2')]) + >>> exprs = ['adam', 'girl', 'love', 'walks', 'x', 'y', 'z'] + >>> parsed_exprs = [Expression.fromstring(e) for e in exprs] + >>> sorted_set = lambda x: sorted(x) if isinstance(x, set) else x + >>> for parsed in parsed_exprs: + ... try: + ... print("'%s' gets value %s" % (parsed, sorted_set(m.i(parsed, g)))) + ... except Undefined: + ... print("'%s' is Undefined" % parsed) + 'adam' gets value b1 + 'girl' gets value [('g1',), ('g2',)] + 'love' gets value [('b1', 'g1'), ('b2', 'g2'), ('g1', 'b1'), ('g2', 'b1')] + 'walks' is Undefined + 'x' gets value b1 + 'y' gets value g2 + 'z' is Undefined + +Test for formulas in Model +-------------------------- + + >>> tests = [ + ... ('love(adam, betty)', True), + ... ('love(adam, sue)', 'Undefined'), + ... ('dog(fido)', True), + ... ('- dog(fido)', False), + ... ('- - dog(fido)', True), + ... ('- dog(sue)', 'Undefined'), + ... ('dog(fido) & boy(adam)', True), + ... ('- (dog(fido) & boy(adam))', False), + ... ('- dog(fido) & boy(adam)', False), + ... ('dog(fido) | boy(adam)', True), + ... ('- (dog(fido) | boy(adam))', False), + ... ('- dog(fido) | boy(adam)', True), + ... ('- dog(fido) | - boy(adam)', False), + ... ('dog(fido) -> boy(adam)', True), + ... ('- (dog(fido) -> boy(adam))', False), + ... ('- dog(fido) -> boy(adam)', True), + ... ('exists x . love(adam, x)', True), + ... ('all x . love(adam, x)', False), + ... ('fido = fido', True), + ... ('exists x . all y. love(x, y)', False), + ... ('exists x . (x = fido)', True), + ... ('all x . (dog(x) | - dog(x))', True), + ... ('adam = mia', 'Undefined'), + ... ('\\x. (boy(x) | girl(x))', {'g2': True, 'b2': True, 'b1': True, 'g1': True, 'd1': False}), + ... ('\\x. exists y. (boy(x) & love(x, y))', {'g2': False, 'b2': True, 'b1': True, 'g1': False, 'd1': False}), + ... ('exists z1. boy(z1)', True), + ... ('exists x. (boy(x) & - (x = adam))', True), + ... ('exists x. (boy(x) & all y. love(y, x))', False), + ... ('all x. (boy(x) | girl(x))', False), + ... ('all x. (girl(x) -> exists y. boy(y) & love(x, y))', False), + ... ('exists x. (boy(x) & all y. (girl(y) -> love(y, x)))', True), + ... ('exists x. (boy(x) & all y. (girl(y) -> love(x, y)))', False), + ... ('all x. (dog(x) -> - girl(x))', True), + ... ('exists x. exists y. (love(x, y) & love(x, y))', True), + ... ] + >>> for (sent, testvalue) in tests: + ... semvalue = m.evaluate(sent, g) + ... if semvalue == testvalue: + ... print('*', end=' ') + ... else: + ... print(sent, semvalue) + * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * + + + +Satisfier Tests +--------------- + + >>> formulas = [ + ... 'boy(x)', + ... '(x = x)', + ... '(boy(x) | girl(x))', + ... '(boy(x) & girl(x))', + ... 'love(adam, x)', + ... 'love(x, adam)', + ... '- (x = adam)', + ... 'exists z22. love(x, z22)', + ... 'exists y. love(y, x)', + ... 'all y. (girl(y) -> love(x, y))', + ... 'all y. (girl(y) -> love(y, x))', + ... 'all y. (girl(y) -> (boy(x) & love(y, x)))', + ... 'boy(x) & all y. (girl(y) -> love(x, y))', + ... 'boy(x) & all y. (girl(y) -> love(y, x))', + ... 'boy(x) & exists y. (girl(y) & love(y, x))', + ... 'girl(x) -> dog(x)', + ... 'all y. (dog(y) -> (x = y))', + ... '- exists y. love(y, x)', + ... 'exists y. (love(adam, y) & love(y, x))' + ... ] + >>> g.purge() + >>> g.add('x', 'b1') + {'x': 'b1'} + >>> for f in formulas: # doctest: +NORMALIZE_WHITESPACE + ... try: + ... print("'%s' gets value: %s" % (f, m.evaluate(f, g))) + ... except Undefined: + ... print("'%s' is Undefined" % f) + 'boy(x)' gets value: True + '(x = x)' gets value: True + '(boy(x) | girl(x))' gets value: True + '(boy(x) & girl(x))' gets value: False + 'love(adam, x)' gets value: False + 'love(x, adam)' gets value: False + '- (x = adam)' gets value: False + 'exists z22. love(x, z22)' gets value: True + 'exists y. love(y, x)' gets value: True + 'all y. (girl(y) -> love(x, y))' gets value: False + 'all y. (girl(y) -> love(y, x))' gets value: True + 'all y. (girl(y) -> (boy(x) & love(y, x)))' gets value: True + 'boy(x) & all y. (girl(y) -> love(x, y))' gets value: False + 'boy(x) & all y. (girl(y) -> love(y, x))' gets value: True + 'boy(x) & exists y. (girl(y) & love(y, x))' gets value: True + 'girl(x) -> dog(x)' gets value: True + 'all y. (dog(y) -> (x = y))' gets value: False + '- exists y. love(y, x)' gets value: False + 'exists y. (love(adam, y) & love(y, x))' gets value: True + + >>> from nltk.sem import Expression + >>> for fmla in formulas: # doctest: +NORMALIZE_WHITESPACE + ... p = Expression.fromstring(fmla) + ... g.purge() + ... print("Satisfiers of '%s':\n\t%s" % (p, sorted(m.satisfiers(p, 'x', g)))) + Satisfiers of 'boy(x)': + ['b1', 'b2'] + Satisfiers of '(x = x)': + ['b1', 'b2', 'd1', 'g1', 'g2'] + Satisfiers of '(boy(x) | girl(x))': + ['b1', 'b2', 'g1', 'g2'] + Satisfiers of '(boy(x) & girl(x))': + [] + Satisfiers of 'love(adam,x)': + ['g1'] + Satisfiers of 'love(x,adam)': + ['g1', 'g2'] + Satisfiers of '-(x = adam)': + ['b2', 'd1', 'g1', 'g2'] + Satisfiers of 'exists z22.love(x,z22)': + ['b1', 'b2', 'g1', 'g2'] + Satisfiers of 'exists y.love(y,x)': + ['b1', 'g1', 'g2'] + Satisfiers of 'all y.(girl(y) -> love(x,y))': + [] + Satisfiers of 'all y.(girl(y) -> love(y,x))': + ['b1'] + Satisfiers of 'all y.(girl(y) -> (boy(x) & love(y,x)))': + ['b1'] + Satisfiers of '(boy(x) & all y.(girl(y) -> love(x,y)))': + [] + Satisfiers of '(boy(x) & all y.(girl(y) -> love(y,x)))': + ['b1'] + Satisfiers of '(boy(x) & exists y.(girl(y) & love(y,x)))': + ['b1'] + Satisfiers of '(girl(x) -> dog(x))': + ['b1', 'b2', 'd1'] + Satisfiers of 'all y.(dog(y) -> (x = y))': + ['d1'] + Satisfiers of '-exists y.love(y,x)': + ['b2', 'd1'] + Satisfiers of 'exists y.(love(adam,y) & love(y,x))': + ['b1'] + + +Tests based on the Blackburn & Bos testsuite +-------------------------------------------- + + >>> v1 = [('jules', 'd1'), ('vincent', 'd2'), ('pumpkin', 'd3'), + ... ('honey_bunny', 'd4'), ('yolanda', 'd5'), + ... ('customer', set(['d1', 'd2'])), + ... ('robber', set(['d3', 'd4'])), + ... ('love', set([('d3', 'd4')]))] + >>> val1 = Valuation(v1) + >>> dom1 = val1.domain + >>> m1 = Model(dom1, val1) + >>> g1 = Assignment(dom1) + + >>> v2 = [('jules', 'd1'), ('vincent', 'd2'), ('pumpkin', 'd3'), + ... ('honey_bunny', 'd4'), ('yolanda', 'd4'), + ... ('customer', set(['d1', 'd2', 'd5', 'd6'])), + ... ('robber', set(['d3', 'd4'])), + ... ('love', set([(None, None)]))] + >>> val2 = Valuation(v2) + >>> dom2 = set(['d1', 'd2', 'd3', 'd4', 'd5', 'd6']) + >>> m2 = Model(dom2, val2) + >>> g2 = Assignment(dom2) + >>> g21 = Assignment(dom2) + >>> g21.add('y', 'd3') + {'y': 'd3'} + + >>> v3 = [('mia', 'd1'), ('jody', 'd2'), ('jules', 'd3'), + ... ('vincent', 'd4'), + ... ('woman', set(['d1', 'd2'])), ('man', set(['d3', 'd4'])), + ... ('joke', set(['d5', 'd6'])), ('episode', set(['d7', 'd8'])), + ... ('in', set([('d5', 'd7'), ('d5', 'd8')])), + ... ('tell', set([('d1', 'd5'), ('d2', 'd6')]))] + >>> val3 = Valuation(v3) + >>> dom3 = set(['d1', 'd2', 'd3', 'd4', 'd5', 'd6', 'd7', 'd8']) + >>> m3 = Model(dom3, val3) + >>> g3 = Assignment(dom3) + + >>> tests = [ + ... ('exists x. robber(x)', m1, g1, True), + ... ('exists x. exists y. love(y, x)', m1, g1, True), + ... ('exists x0. exists x1. love(x1, x0)', m2, g2, False), + ... ('all x. all y. love(y, x)', m2, g2, False), + ... ('- (all x. all y. love(y, x))', m2, g2, True), + ... ('all x. all y. - love(y, x)', m2, g2, True), + ... ('yolanda = honey_bunny', m2, g2, True), + ... ('mia = honey_bunny', m2, g2, 'Undefined'), + ... ('- (yolanda = honey_bunny)', m2, g2, False), + ... ('- (mia = honey_bunny)', m2, g2, 'Undefined'), + ... ('all x. (robber(x) | customer(x))', m2, g2, True), + ... ('- (all x. (robber(x) | customer(x)))', m2, g2, False), + ... ('(robber(x) | customer(x))', m2, g2, 'Undefined'), + ... ('(robber(y) | customer(y))', m2, g21, True), + ... ('exists x. (man(x) & exists x. woman(x))', m3, g3, True), + ... ('exists x. (man(x) & exists x. woman(x))', m3, g3, True), + ... ('- exists x. woman(x)', m3, g3, False), + ... ('exists x. (tasty(x) & burger(x))', m3, g3, 'Undefined'), + ... ('- exists x. (tasty(x) & burger(x))', m3, g3, 'Undefined'), + ... ('exists x. (man(x) & - exists y. woman(y))', m3, g3, False), + ... ('exists x. (man(x) & - exists x. woman(x))', m3, g3, False), + ... ('exists x. (woman(x) & - exists x. customer(x))', m2, g2, 'Undefined'), + ... ] + + >>> for item in tests: + ... sentence, model, g, testvalue = item + ... semvalue = model.evaluate(sentence, g) + ... if semvalue == testvalue: + ... print('*', end=' ') + ... g.purge() + * * * * * * * * * * * * * * * * * * * * * * + + +Tests for mapping from syntax to semantics +------------------------------------------ + +Load a valuation from a file. + + >>> import nltk.data + >>> from nltk.sem.util import parse_sents + >>> val = nltk.data.load('grammars/sample_grammars/valuation1.val') + >>> dom = val.domain + >>> m = Model(dom, val) + >>> g = Assignment(dom) + >>> gramfile = 'grammars/sample_grammars/sem2.fcfg' + >>> inputs = ['John sees a girl', 'every dog barks'] + >>> parses = parse_sents(inputs, gramfile) + >>> for sent, trees in zip(inputs, parses): + ... print() + ... print("Sentence: %s" % sent) + ... for tree in trees: + ... print("Parse:\n %s" %tree) + ... print("Semantics: %s" % root_semrep(tree)) + + Sentence: John sees a girl + Parse: + (S[SEM=] + (NP[-LOC, NUM='sg', SEM=<\P.P(john)>] + (PropN[-LOC, NUM='sg', SEM=<\P.P(john)>] John)) + (VP[NUM='sg', SEM=<\y.exists x.(girl(x) & see(y,x))>] + (TV[NUM='sg', SEM=<\X y.X(\x.see(y,x))>, TNS='pres'] sees) + (NP[NUM='sg', SEM=<\Q.exists x.(girl(x) & Q(x))>] + (Det[NUM='sg', SEM=<\P Q.exists x.(P(x) & Q(x))>] a) + (Nom[NUM='sg', SEM=<\x.girl(x)>] + (N[NUM='sg', SEM=<\x.girl(x)>] girl))))) + Semantics: exists x.(girl(x) & see(john,x)) + + Sentence: every dog barks + Parse: + (S[SEM= bark(x))>] + (NP[NUM='sg', SEM=<\Q.all x.(dog(x) -> Q(x))>] + (Det[NUM='sg', SEM=<\P Q.all x.(P(x) -> Q(x))>] every) + (Nom[NUM='sg', SEM=<\x.dog(x)>] + (N[NUM='sg', SEM=<\x.dog(x)>] dog))) + (VP[NUM='sg', SEM=<\x.bark(x)>] + (IV[NUM='sg', SEM=<\x.bark(x)>, TNS='pres'] barks))) + Semantics: all x.(dog(x) -> bark(x)) + + >>> sent = "every dog barks" + >>> result = nltk.sem.util.interpret_sents([sent], gramfile)[0] + >>> for (syntree, semrep) in result: + ... print(syntree) + ... print() + ... print(semrep) + (S[SEM= bark(x))>] + (NP[NUM='sg', SEM=<\Q.all x.(dog(x) -> Q(x))>] + (Det[NUM='sg', SEM=<\P Q.all x.(P(x) -> Q(x))>] every) + (Nom[NUM='sg', SEM=<\x.dog(x)>] + (N[NUM='sg', SEM=<\x.dog(x)>] dog))) + (VP[NUM='sg', SEM=<\x.bark(x)>] + (IV[NUM='sg', SEM=<\x.bark(x)>, TNS='pres'] barks))) + + all x.(dog(x) -> bark(x)) + + >>> result = nltk.sem.util.evaluate_sents([sent], gramfile, m, g)[0] + >>> for (syntree, semrel, value) in result: + ... print(syntree) + ... print() + ... print(semrep) + ... print() + ... print(value) + (S[SEM= bark(x))>] + (NP[NUM='sg', SEM=<\Q.all x.(dog(x) -> Q(x))>] + (Det[NUM='sg', SEM=<\P Q.all x.(P(x) -> Q(x))>] every) + (Nom[NUM='sg', SEM=<\x.dog(x)>] + (N[NUM='sg', SEM=<\x.dog(x)>] dog))) + (VP[NUM='sg', SEM=<\x.bark(x)>] + (IV[NUM='sg', SEM=<\x.bark(x)>, TNS='pres'] barks))) + + all x.(dog(x) -> bark(x)) + + True + + >>> sents = ['Mary walks', 'John sees a dog'] + >>> results = nltk.sem.util.interpret_sents(sents, 'grammars/sample_grammars/sem2.fcfg') + >>> for result in results: + ... for (synrep, semrep) in result: + ... print(synrep) + (S[SEM=] + (NP[-LOC, NUM='sg', SEM=<\P.P(mary)>] + (PropN[-LOC, NUM='sg', SEM=<\P.P(mary)>] Mary)) + (VP[NUM='sg', SEM=<\x.walk(x)>] + (IV[NUM='sg', SEM=<\x.walk(x)>, TNS='pres'] walks))) + (S[SEM=] + (NP[-LOC, NUM='sg', SEM=<\P.P(john)>] + (PropN[-LOC, NUM='sg', SEM=<\P.P(john)>] John)) + (VP[NUM='sg', SEM=<\y.exists x.(dog(x) & see(y,x))>] + (TV[NUM='sg', SEM=<\X y.X(\x.see(y,x))>, TNS='pres'] sees) + (NP[NUM='sg', SEM=<\Q.exists x.(dog(x) & Q(x))>] + (Det[NUM='sg', SEM=<\P Q.exists x.(P(x) & Q(x))>] a) + (Nom[NUM='sg', SEM=<\x.dog(x)>] + (N[NUM='sg', SEM=<\x.dog(x)>] dog))))) + +Cooper Storage +-------------- + + >>> from nltk.sem import cooper_storage as cs + >>> sentence = 'every girl chases a dog' + >>> trees = cs.parse_with_bindops(sentence, grammar='grammars/book_grammars/storage.fcfg') + >>> semrep = trees[0].label()['SEM'] + >>> cs_semrep = cs.CooperStore(semrep) + >>> print(cs_semrep.core) + chase(z2,z4) + >>> for bo in cs_semrep.store: + ... print(bo) + bo(\P.all x.(girl(x) -> P(x)),z2) + bo(\P.exists x.(dog(x) & P(x)),z4) + >>> cs_semrep.s_retrieve(trace=True) + Permutation 1 + (\P.all x.(girl(x) -> P(x)))(\z2.chase(z2,z4)) + (\P.exists x.(dog(x) & P(x)))(\z4.all x.(girl(x) -> chase(x,z4))) + Permutation 2 + (\P.exists x.(dog(x) & P(x)))(\z4.chase(z2,z4)) + (\P.all x.(girl(x) -> P(x)))(\z2.exists x.(dog(x) & chase(z2,x))) + + >>> for reading in cs_semrep.readings: + ... print(reading) + exists x.(dog(x) & all z3.(girl(z3) -> chase(z3,x))) + all x.(girl(x) -> exists z4.(dog(z4) & chase(x,z4))) + + diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/semantics_fixt.py b/venv.bak/lib/python3.7/site-packages/nltk/test/semantics_fixt.py new file mode 100644 index 0000000..135180d --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/test/semantics_fixt.py @@ -0,0 +1,8 @@ +# -*- coding: utf-8 -*- +from __future__ import absolute_import + +# reset the variables counter before running tests +def setup_module(module): + from nltk.sem import logic + + logic._counter._value = 0 diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/sentiment.doctest b/venv.bak/lib/python3.7/site-packages/nltk/test/sentiment.doctest new file mode 100644 index 0000000..359e165 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/test/sentiment.doctest @@ -0,0 +1,233 @@ +.. Copyright (C) 2001-2019 NLTK Project +.. For license information, see LICENSE.TXT + +=================== +Sentiment Analysis +=================== + + >>> from nltk.classify import NaiveBayesClassifier + >>> from nltk.corpus import subjectivity + >>> from nltk.sentiment import SentimentAnalyzer + >>> from nltk.sentiment.util import * + + >>> n_instances = 100 + >>> subj_docs = [(sent, 'subj') for sent in subjectivity.sents(categories='subj')[:n_instances]] + >>> obj_docs = [(sent, 'obj') for sent in subjectivity.sents(categories='obj')[:n_instances]] + >>> len(subj_docs), len(obj_docs) + (100, 100) + +Each document is represented by a tuple (sentence, label). The sentence is tokenized, +so it is represented by a list of strings: + + >>> subj_docs[0] + (['smart', 'and', 'alert', ',', 'thirteen', 'conversations', 'about', 'one', + 'thing', 'is', 'a', 'small', 'gem', '.'], 'subj') + +We separately split subjective and objective instances to keep a balanced uniform +class distribution in both train and test sets. + + >>> train_subj_docs = subj_docs[:80] + >>> test_subj_docs = subj_docs[80:100] + >>> train_obj_docs = obj_docs[:80] + >>> test_obj_docs = obj_docs[80:100] + >>> training_docs = train_subj_docs+train_obj_docs + >>> testing_docs = test_subj_docs+test_obj_docs + + >>> sentim_analyzer = SentimentAnalyzer() + >>> all_words_neg = sentim_analyzer.all_words([mark_negation(doc) for doc in training_docs]) + +We use simple unigram word features, handling negation: + + >>> unigram_feats = sentim_analyzer.unigram_word_feats(all_words_neg, min_freq=4) + >>> len(unigram_feats) + 83 + >>> sentim_analyzer.add_feat_extractor(extract_unigram_feats, unigrams=unigram_feats) + +We apply features to obtain a feature-value representation of our datasets: + + >>> training_set = sentim_analyzer.apply_features(training_docs) + >>> test_set = sentim_analyzer.apply_features(testing_docs) + +We can now train our classifier on the training set, and subsequently output the +evaluation results: + + >>> trainer = NaiveBayesClassifier.train + >>> classifier = sentim_analyzer.train(trainer, training_set) + Training classifier + >>> for key,value in sorted(sentim_analyzer.evaluate(test_set).items()): + ... print('{0}: {1}'.format(key, value)) + Evaluating NaiveBayesClassifier results... + Accuracy: 0.8 + F-measure [obj]: 0.8 + F-measure [subj]: 0.8 + Precision [obj]: 0.8 + Precision [subj]: 0.8 + Recall [obj]: 0.8 + Recall [subj]: 0.8 + + +Vader +------ + + >>> from nltk.sentiment.vader import SentimentIntensityAnalyzer + >>> sentences = ["VADER is smart, handsome, and funny.", # positive sentence example + ... "VADER is smart, handsome, and funny!", # punctuation emphasis handled correctly (sentiment intensity adjusted) + ... "VADER is very smart, handsome, and funny.", # booster words handled correctly (sentiment intensity adjusted) + ... "VADER is VERY SMART, handsome, and FUNNY.", # emphasis for ALLCAPS handled + ... "VADER is VERY SMART, handsome, and FUNNY!!!",# combination of signals - VADER appropriately adjusts intensity + ... "VADER is VERY SMART, really handsome, and INCREDIBLY FUNNY!!!",# booster words & punctuation make this close to ceiling for score + ... "The book was good.", # positive sentence + ... "The book was kind of good.", # qualified positive sentence is handled correctly (intensity adjusted) + ... "The plot was good, but the characters are uncompelling and the dialog is not great.", # mixed negation sentence + ... "A really bad, horrible book.", # negative sentence with booster words + ... "At least it isn't a horrible book.", # negated negative sentence with contraction + ... ":) and :D", # emoticons handled + ... "", # an empty string is correctly handled + ... "Today sux", # negative slang handled + ... "Today sux!", # negative slang with punctuation emphasis handled + ... "Today SUX!", # negative slang with capitalization emphasis + ... "Today kinda sux! But I'll get by, lol" # mixed sentiment example with slang and constrastive conjunction "but" + ... ] + >>> paragraph = "It was one of the worst movies I've seen, despite good reviews. \ + ... Unbelievably bad acting!! Poor direction. VERY poor production. \ + ... The movie was bad. Very bad movie. VERY bad movie. VERY BAD movie. VERY BAD movie!" + + >>> from nltk import tokenize + >>> lines_list = tokenize.sent_tokenize(paragraph) + >>> sentences.extend(lines_list) + + >>> tricky_sentences = [ + ... "Most automated sentiment analysis tools are shit.", + ... "VADER sentiment analysis is the shit.", + ... "Sentiment analysis has never been good.", + ... "Sentiment analysis with VADER has never been this good.", + ... "Warren Beatty has never been so entertaining.", + ... "I won't say that the movie is astounding and I wouldn't claim that \ + ... the movie is too banal either.", + ... "I like to hate Michael Bay films, but I couldn't fault this one", + ... "It's one thing to watch an Uwe Boll film, but another thing entirely \ + ... to pay for it", + ... "The movie was too good", + ... "This movie was actually neither that funny, nor super witty.", + ... "This movie doesn't care about cleverness, wit or any other kind of \ + ... intelligent humor.", + ... "Those who find ugly meanings in beautiful things are corrupt without \ + ... being charming.", + ... "There are slow and repetitive parts, BUT it has just enough spice to \ + ... keep it interesting.", + ... "The script is not fantastic, but the acting is decent and the cinematography \ + ... is EXCELLENT!", + ... "Roger Dodger is one of the most compelling variations on this theme.", + ... "Roger Dodger is one of the least compelling variations on this theme.", + ... "Roger Dodger is at least compelling as a variation on the theme.", + ... "they fall in love with the product", + ... "but then it breaks", + ... "usually around the time the 90 day warranty expires", + ... "the twin towers collapsed today", + ... "However, Mr. Carter solemnly argues, his client carried out the kidnapping \ + ... under orders and in the ''least offensive way possible.''" + ... ] + >>> sentences.extend(tricky_sentences) + >>> for sentence in sentences: + ... sid = SentimentIntensityAnalyzer() + ... print(sentence) + ... ss = sid.polarity_scores(sentence) + ... for k in sorted(ss): + ... print('{0}: {1}, '.format(k, ss[k]), end='') + ... print() + VADER is smart, handsome, and funny. + compound: 0.8316, neg: 0.0, neu: 0.254, pos: 0.746, + VADER is smart, handsome, and funny! + compound: 0.8439, neg: 0.0, neu: 0.248, pos: 0.752, + VADER is very smart, handsome, and funny. + compound: 0.8545, neg: 0.0, neu: 0.299, pos: 0.701, + VADER is VERY SMART, handsome, and FUNNY. + compound: 0.9227, neg: 0.0, neu: 0.246, pos: 0.754, + VADER is VERY SMART, handsome, and FUNNY!!! + compound: 0.9342, neg: 0.0, neu: 0.233, pos: 0.767, + VADER is VERY SMART, really handsome, and INCREDIBLY FUNNY!!! + compound: 0.9469, neg: 0.0, neu: 0.294, pos: 0.706, + The book was good. + compound: 0.4404, neg: 0.0, neu: 0.508, pos: 0.492, + The book was kind of good. + compound: 0.3832, neg: 0.0, neu: 0.657, pos: 0.343, + The plot was good, but the characters are uncompelling and the dialog is not great. + compound: -0.7042, neg: 0.327, neu: 0.579, pos: 0.094, + A really bad, horrible book. + compound: -0.8211, neg: 0.791, neu: 0.209, pos: 0.0, + At least it isn't a horrible book. + compound: 0.431, neg: 0.0, neu: 0.637, pos: 0.363, + :) and :D + compound: 0.7925, neg: 0.0, neu: 0.124, pos: 0.876, + + compound: 0.0, neg: 0.0, neu: 0.0, pos: 0.0, + Today sux + compound: -0.3612, neg: 0.714, neu: 0.286, pos: 0.0, + Today sux! + compound: -0.4199, neg: 0.736, neu: 0.264, pos: 0.0, + Today SUX! + compound: -0.5461, neg: 0.779, neu: 0.221, pos: 0.0, + Today kinda sux! But I'll get by, lol + compound: 0.2228, neg: 0.195, neu: 0.531, pos: 0.274, + It was one of the worst movies I've seen, despite good reviews. + compound: -0.7584, neg: 0.394, neu: 0.606, pos: 0.0, + Unbelievably bad acting!! + compound: -0.6572, neg: 0.686, neu: 0.314, pos: 0.0, + Poor direction. + compound: -0.4767, neg: 0.756, neu: 0.244, pos: 0.0, + VERY poor production. + compound: -0.6281, neg: 0.674, neu: 0.326, pos: 0.0, + The movie was bad. + compound: -0.5423, neg: 0.538, neu: 0.462, pos: 0.0, + Very bad movie. + compound: -0.5849, neg: 0.655, neu: 0.345, pos: 0.0, + VERY bad movie. + compound: -0.6732, neg: 0.694, neu: 0.306, pos: 0.0, + VERY BAD movie. + compound: -0.7398, neg: 0.724, neu: 0.276, pos: 0.0, + VERY BAD movie! + compound: -0.7616, neg: 0.735, neu: 0.265, pos: 0.0, + Most automated sentiment analysis tools are shit. + compound: -0.5574, neg: 0.375, neu: 0.625, pos: 0.0, + VADER sentiment analysis is the shit. + compound: 0.6124, neg: 0.0, neu: 0.556, pos: 0.444, + Sentiment analysis has never been good. + compound: -0.3412, neg: 0.325, neu: 0.675, pos: 0.0, + Sentiment analysis with VADER has never been this good. + compound: 0.5228, neg: 0.0, neu: 0.703, pos: 0.297, + Warren Beatty has never been so entertaining. + compound: 0.5777, neg: 0.0, neu: 0.616, pos: 0.384, + I won't say that the movie is astounding and I wouldn't claim that the movie is too banal either. + compound: 0.4215, neg: 0.0, neu: 0.851, pos: 0.149, + I like to hate Michael Bay films, but I couldn't fault this one + compound: 0.3153, neg: 0.157, neu: 0.534, pos: 0.309, + It's one thing to watch an Uwe Boll film, but another thing entirely to pay for it + compound: -0.2541, neg: 0.112, neu: 0.888, pos: 0.0, + The movie was too good + compound: 0.4404, neg: 0.0, neu: 0.58, pos: 0.42, + This movie was actually neither that funny, nor super witty. + compound: -0.6759, neg: 0.41, neu: 0.59, pos: 0.0, + This movie doesn't care about cleverness, wit or any other kind of intelligent humor. + compound: -0.1338, neg: 0.265, neu: 0.497, pos: 0.239, + Those who find ugly meanings in beautiful things are corrupt without being charming. + compound: -0.3553, neg: 0.314, neu: 0.493, pos: 0.192, + There are slow and repetitive parts, BUT it has just enough spice to keep it interesting. + compound: 0.4678, neg: 0.079, neu: 0.735, pos: 0.186, + The script is not fantastic, but the acting is decent and the cinematography is EXCELLENT! + compound: 0.7565, neg: 0.092, neu: 0.607, pos: 0.301, + Roger Dodger is one of the most compelling variations on this theme. + compound: 0.2944, neg: 0.0, neu: 0.834, pos: 0.166, + Roger Dodger is one of the least compelling variations on this theme. + compound: -0.1695, neg: 0.132, neu: 0.868, pos: 0.0, + Roger Dodger is at least compelling as a variation on the theme. + compound: 0.2263, neg: 0.0, neu: 0.84, pos: 0.16, + they fall in love with the product + compound: 0.6369, neg: 0.0, neu: 0.588, pos: 0.412, + but then it breaks + compound: 0.0, neg: 0.0, neu: 1.0, pos: 0.0, + usually around the time the 90 day warranty expires + compound: 0.0, neg: 0.0, neu: 1.0, pos: 0.0, + the twin towers collapsed today + compound: -0.2732, neg: 0.344, neu: 0.656, pos: 0.0, + However, Mr. Carter solemnly argues, his client carried out the kidnapping under orders and in the ''least offensive way possible.'' + compound: -0.5859, neg: 0.23, neu: 0.697, pos: 0.074, diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/sentiwordnet.doctest b/venv.bak/lib/python3.7/site-packages/nltk/test/sentiwordnet.doctest new file mode 100644 index 0000000..46126bb --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/test/sentiwordnet.doctest @@ -0,0 +1,41 @@ +.. Copyright (C) 2001-2019 NLTK Project +.. For license information, see LICENSE.TXT + +====================== +SentiWordNet Interface +====================== + +SentiWordNet can be imported like this: + + >>> from nltk.corpus import sentiwordnet as swn + +------------ +SentiSynsets +------------ + + >>> breakdown = swn.senti_synset('breakdown.n.03') + >>> print(breakdown) + + >>> breakdown.pos_score() + 0.0 + >>> breakdown.neg_score() + 0.25 + >>> breakdown.obj_score() + 0.75 + + +------ +Lookup +------ + + >>> list(swn.senti_synsets('slow')) # doctest: +NORMALIZE_WHITESPACE + [SentiSynset('decelerate.v.01'), SentiSynset('slow.v.02'), + SentiSynset('slow.v.03'), SentiSynset('slow.a.01'), + SentiSynset('slow.a.02'), SentiSynset('dense.s.04'), + SentiSynset('slow.a.04'), SentiSynset('boring.s.01'), + SentiSynset('dull.s.08'), SentiSynset('slowly.r.01'), + SentiSynset('behind.r.03')] + + >>> happy = swn.senti_synsets('happy', 'a') + + >>> all = swn.all_senti_synsets() diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/simple.doctest b/venv.bak/lib/python3.7/site-packages/nltk/test/simple.doctest new file mode 100644 index 0000000..5636163 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/test/simple.doctest @@ -0,0 +1,85 @@ +.. Copyright (C) 2001-2019 NLTK Project +.. For license information, see LICENSE.TXT + +================= +EasyInstall Tests +================= + +This file contains some simple tests that will be run by EasyInstall in +order to test the installation when NLTK-Data is absent. + + >>> from __future__ import print_function + +------------ +Tokenization +------------ + + >>> from nltk.tokenize import wordpunct_tokenize + >>> s = ("Good muffins cost $3.88\nin New York. Please buy me\n" + ... "two of them.\n\nThanks.") + >>> wordpunct_tokenize(s) # doctest: +NORMALIZE_WHITESPACE + ['Good', 'muffins', 'cost', '$', '3', '.', '88', 'in', 'New', 'York', '.', + 'Please', 'buy', 'me', 'two', 'of', 'them', '.', 'Thanks', '.'] + +------- +Metrics +------- + + >>> from nltk.metrics import precision, recall, f_measure + >>> reference = 'DET NN VB DET JJ NN NN IN DET NN'.split() + >>> test = 'DET VB VB DET NN NN NN IN DET NN'.split() + >>> reference_set = set(reference) + >>> test_set = set(test) + >>> precision(reference_set, test_set) + 1.0 + >>> print(recall(reference_set, test_set)) + 0.8 + >>> print(f_measure(reference_set, test_set)) + 0.88888888888... + +------------------ +Feature Structures +------------------ + + >>> from nltk import FeatStruct + >>> fs1 = FeatStruct(PER=3, NUM='pl', GND='fem') + >>> fs2 = FeatStruct(POS='N', AGR=fs1) + >>> print(fs2) + [ [ GND = 'fem' ] ] + [ AGR = [ NUM = 'pl' ] ] + [ [ PER = 3 ] ] + [ ] + [ POS = 'N' ] + >>> print(fs2['AGR']) + [ GND = 'fem' ] + [ NUM = 'pl' ] + [ PER = 3 ] + >>> print(fs2['AGR']['PER']) + 3 + +------- +Parsing +------- + + >>> from nltk.parse.recursivedescent import RecursiveDescentParser + >>> from nltk.grammar import CFG + >>> grammar = CFG.fromstring(""" + ... S -> NP VP + ... PP -> P NP + ... NP -> 'the' N | N PP | 'the' N PP + ... VP -> V NP | V PP | V NP PP + ... N -> 'cat' | 'dog' | 'rug' + ... V -> 'chased' + ... P -> 'on' + ... """) + >>> rd = RecursiveDescentParser(grammar) + >>> sent = 'the cat chased the dog on the rug'.split() + >>> for t in rd.parse(sent): + ... print(t) + (S + (NP the (N cat)) + (VP (V chased) (NP the (N dog) (PP (P on) (NP the (N rug)))))) + (S + (NP the (N cat)) + (VP (V chased) (NP the (N dog)) (PP (P on) (NP the (N rug))))) + diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/stem.doctest b/venv.bak/lib/python3.7/site-packages/nltk/test/stem.doctest new file mode 100644 index 0000000..2cf9857 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/test/stem.doctest @@ -0,0 +1,78 @@ +.. Copyright (C) 2001-2019 NLTK Project +.. For license information, see LICENSE.TXT + +========== + Stemmers +========== + +Overview +~~~~~~~~ + +Stemmers remove morphological affixes from words, leaving only the +word stem. + + >>> from __future__ import print_function + >>> from nltk.stem import * + +Unit tests for the Porter stemmer +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + >>> from nltk.stem.porter import * + +Create a new Porter stemmer. + + >>> stemmer = PorterStemmer() + +Test the stemmer on various pluralised words. + + >>> plurals = ['caresses', 'flies', 'dies', 'mules', 'denied', + ... 'died', 'agreed', 'owned', 'humbled', 'sized', + ... 'meeting', 'stating', 'siezing', 'itemization', + ... 'sensational', 'traditional', 'reference', 'colonizer', + ... 'plotted'] + + >>> singles = [stemmer.stem(plural) for plural in plurals] + + >>> print(' '.join(singles)) # doctest: +NORMALIZE_WHITESPACE + caress fli die mule deni die agre own humbl size meet + state siez item sensat tradit refer colon plot + + +Unit tests for Snowball stemmer +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + >>> from nltk.stem.snowball import SnowballStemmer + +See which languages are supported. + + >>> print(" ".join(SnowballStemmer.languages)) + arabic danish dutch english finnish french german hungarian italian + norwegian porter portuguese romanian russian spanish swedish + +Create a new instance of a language specific subclass. + + >>> stemmer = SnowballStemmer("english") + +Stem a word. + + >>> print(stemmer.stem("running")) + run + +Decide not to stem stopwords. + + >>> stemmer2 = SnowballStemmer("english", ignore_stopwords=True) + >>> print(stemmer.stem("having")) + have + >>> print(stemmer2.stem("having")) + having + +The 'english' stemmer is better than the original 'porter' stemmer. + + >>> print(SnowballStemmer("english").stem("generously")) + generous + >>> print(SnowballStemmer("porter").stem("generously")) + gener + +.. note:: + + Extra stemmer tests can be found in `nltk.test.unit.test_stem`. diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/tag.doctest b/venv.bak/lib/python3.7/site-packages/nltk/test/tag.doctest new file mode 100644 index 0000000..2248cba --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/test/tag.doctest @@ -0,0 +1,33 @@ +.. Copyright (C) 2001-2019 NLTK Project +.. For license information, see LICENSE.TXT + +Regression Tests +~~~~~~~~~~~~~~~~ + +Sequential Taggers +------------------ + +Add tests for: + - make sure backoff is being done correctly. + - make sure ngram taggers don't use previous sentences for context. + - make sure ngram taggers see 'beginning of the sentence' as a + unique context + - make sure regexp tagger's regexps are tried in order + - train on some simple examples, & make sure that the size & the + generated models are correct. + - make sure cutoff works as intended + - make sure that ngram models only exclude contexts covered by the + backoff tagger if the backoff tagger gets that context correct at + *all* locations. + + +Regression Testing for issue #1025 +================================== + +We want to ensure that a RegexpTagger can be created with more than 100 patterns +and does not fail with: + "AssertionError: sorry, but this version only supports 100 named groups" + + >>> from nltk.tag import RegexpTagger + >>> patterns = [(str(i), 'NNP',) for i in range(200)] + >>> tagger = RegexpTagger(patterns) diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/tokenize.doctest b/venv.bak/lib/python3.7/site-packages/nltk/test/tokenize.doctest new file mode 100644 index 0000000..f99e22a --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/test/tokenize.doctest @@ -0,0 +1,290 @@ +.. Copyright (C) 2001-2019 NLTK Project +.. For license information, see LICENSE.TXT + + >>> from __future__ import print_function + >>> from nltk.tokenize import * + +Regression Tests: Treebank Tokenizer +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Some test strings. + + >>> s1 = "On a $50,000 mortgage of 30 years at 8 percent, the monthly payment would be $366.88." + >>> word_tokenize(s1) + ['On', 'a', '$', '50,000', 'mortgage', 'of', '30', 'years', 'at', '8', 'percent', ',', 'the', 'monthly', 'payment', 'would', 'be', '$', '366.88', '.'] + >>> s2 = "\"We beat some pretty good teams to get here,\" Slocum said." + >>> word_tokenize(s2) + ['``', 'We', 'beat', 'some', 'pretty', 'good', 'teams', 'to', 'get', 'here', ',', "''", 'Slocum', 'said', '.'] + >>> s3 = "Well, we couldn't have this predictable, cliche-ridden, \"Touched by an Angel\" (a show creator John Masius worked on) wanna-be if she didn't." + >>> word_tokenize(s3) + ['Well', ',', 'we', 'could', "n't", 'have', 'this', 'predictable', ',', 'cliche-ridden', ',', '``', 'Touched', 'by', 'an', 'Angel', "''", '(', 'a', 'show', 'creator', 'John', 'Masius', 'worked', 'on', ')', 'wanna-be', 'if', 'she', 'did', "n't", '.'] + >>> s4 = "I cannot cannot work under these conditions!" + >>> word_tokenize(s4) + ['I', 'can', 'not', 'can', 'not', 'work', 'under', 'these', 'conditions', '!'] + >>> s5 = "The company spent $30,000,000 last year." + >>> word_tokenize(s5) + ['The', 'company', 'spent', '$', '30,000,000', 'last', 'year', '.'] + >>> s6 = "The company spent 40.75% of its income last year." + >>> word_tokenize(s6) + ['The', 'company', 'spent', '40.75', '%', 'of', 'its', 'income', 'last', 'year', '.'] + >>> s7 = "He arrived at 3:00 pm." + >>> word_tokenize(s7) + ['He', 'arrived', 'at', '3:00', 'pm', '.'] + >>> s8 = "I bought these items: books, pencils, and pens." + >>> word_tokenize(s8) + ['I', 'bought', 'these', 'items', ':', 'books', ',', 'pencils', ',', 'and', 'pens', '.'] + >>> s9 = "Though there were 150, 100 of them were old." + >>> word_tokenize(s9) + ['Though', 'there', 'were', '150', ',', '100', 'of', 'them', 'were', 'old', '.'] + >>> s10 = "There were 300,000, but that wasn't enough." + >>> word_tokenize(s10) + ['There', 'were', '300,000', ',', 'but', 'that', 'was', "n't", 'enough', '.'] + + +Testing improvement made to the TreebankWordTokenizer + + >>> sx1 = u'\xabNow that I can do.\xbb' + >>> expected = [u'\xab', u'Now', u'that', u'I', u'can', u'do', u'.', u'\xbb'] + >>> word_tokenize(sx1) == expected + True + >>> sx2 = u'The unicode 201C and 201D \u201cLEFT(RIGHT) DOUBLE QUOTATION MARK\u201d is also OPEN_PUNCT and CLOSE_PUNCT.' + >>> expected = [u'The', u'unicode', u'201C', u'and', u'201D', u'\u201c', u'LEFT', u'(', u'RIGHT', u')', u'DOUBLE', u'QUOTATION', u'MARK', u'\u201d', u'is', u'also', u'OPEN_PUNCT', u'and', u'CLOSE_PUNCT', u'.'] + >>> word_tokenize(sx2) == expected + True + + +Sentence tokenization in word_tokenize: + + >>> s11 = "I called Dr. Jones. I called Dr. Jones." + >>> word_tokenize(s11) + ['I', 'called', 'Dr.', 'Jones', '.', 'I', 'called', 'Dr.', 'Jones', '.'] + >>> s12 = ("Ich muss unbedingt daran denken, Mehl, usw. fur einen " + ... "Kuchen einzukaufen. Ich muss.") + >>> word_tokenize(s12) + ['Ich', 'muss', 'unbedingt', 'daran', 'denken', ',', 'Mehl', ',', 'usw', + '.', 'fur', 'einen', 'Kuchen', 'einzukaufen', '.', 'Ich', 'muss', '.'] + >>> word_tokenize(s12, 'german') + ['Ich', 'muss', 'unbedingt', 'daran', 'denken', ',', 'Mehl', ',', 'usw.', + 'fur', 'einen', 'Kuchen', 'einzukaufen', '.', 'Ich', 'muss', '.'] + + +Regression Tests: Regexp Tokenizer +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Some additional test strings. + + >>> s = ("Good muffins cost $3.88\nin New York. Please buy me\n" + ... "two of them.\n\nThanks.") + >>> s2 = ("Alas, it has not rained today. When, do you think, " + ... "will it rain again?") + >>> s3 = ("

    Although this is not the case here, we must " + ... "not relax our vigilance!

    ") + + >>> regexp_tokenize(s2, r'[,\.\?!"]\s*', gaps=False) + [', ', '. ', ', ', ', ', '?'] + >>> regexp_tokenize(s2, r'[,\.\?!"]\s*', gaps=True) + ['Alas', 'it has not rained today', 'When', 'do you think', + 'will it rain again'] + +Take care to avoid using capturing groups: + + >>> regexp_tokenize(s3, r'', gaps=False) + ['

    ', '', '', '

    '] + >>> regexp_tokenize(s3, r'', gaps=False) + ['

    ', '', '', '

    '] + >>> regexp_tokenize(s3, r'', gaps=True) + ['Although this is ', 'not', + ' the case here, we must not relax our vigilance!'] + +Named groups are capturing groups, and confuse the tokenizer: + + >>> regexp_tokenize(s3, r'b|p)>', gaps=False) + ['p', 'b', 'b', 'p'] + >>> regexp_tokenize(s3, r'b|p)>', gaps=True) + ['p', 'Although this is ', 'b', 'not', 'b', + ' the case here, we must not relax our vigilance!', 'p'] + +Make sure that nested groups don't confuse the tokenizer: + + >>> regexp_tokenize(s2, r'(?:h|r|l)a(?:s|(?:i|n0))', gaps=False) + ['las', 'has', 'rai', 'rai'] + >>> regexp_tokenize(s2, r'(?:h|r|l)a(?:s|(?:i|n0))', gaps=True) + ['A', ', it ', ' not ', 'ned today. When, do you think, will it ', + 'n again?'] + +Back-references require capturing groups, and these are not supported: + + >>> regexp_tokenize("aabbbcccc", r'(.)\1') + ['a', 'b', 'c', 'c'] + +A simple sentence tokenizer '\.(\s+|$)' + + >>> regexp_tokenize(s, pattern=r'\.(?:\s+|$)', gaps=True) + ['Good muffins cost $3.88\nin New York', + 'Please buy me\ntwo of them', 'Thanks'] + + +Regression Tests: TweetTokenizer +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +TweetTokenizer is a tokenizer specifically designed for micro-blogging tokenization tasks. + + >>> from nltk.tokenize import TweetTokenizer + >>> tknzr = TweetTokenizer() + >>> s0 = "This is a cooool #dummysmiley: :-) :-P <3 and some arrows < > -> <--" + >>> tknzr.tokenize(s0) + ['This', 'is', 'a', 'cooool', '#dummysmiley', ':', ':-)', ':-P', '<3', 'and', 'some', 'arrows', '<', '>', '->', '<--'] + >>> s1 = "@Joyster2012 @CathStaincliffe Good for you, girl!! Best wishes :-)" + >>> tknzr.tokenize(s1) + ['@Joyster2012', '@CathStaincliffe', 'Good', 'for', 'you', ',', 'girl', '!', '!', 'Best', 'wishes', ':-)'] + >>> s2 = "3Points for #DreamTeam Gooo BAILEY! :) #PBB737Gold @PBBabscbn" + >>> tknzr.tokenize(s2) + ['3Points', 'for', '#DreamTeam', 'Gooo', 'BAILEY', '!', ':)', '#PBB737Gold', '@PBBabscbn'] + >>> s3 = "@Insanomania They do... Their mentality doesn't :(" + >>> tknzr.tokenize(s3) + ['@Insanomania', 'They', 'do', '...', 'Their', 'mentality', "doesn't", ':('] + >>> s4 = "RT @facugambande: Ya por arrancar a grabar !!! #TirenTirenTiren vamoo !!" + >>> tknzr.tokenize(s4) + ['RT', '@facugambande', ':', 'Ya', 'por', 'arrancar', 'a', 'grabar', '!', '!', '!', '#TirenTirenTiren', 'vamoo', '!', '!'] + >>> tknzr = TweetTokenizer(reduce_len=True) + >>> s5 = "@crushinghes the summer holidays are great but I'm so bored already :(" + >>> tknzr.tokenize(s5) + ['@crushinghes', 'the', 'summer', 'holidays', 'are', 'great', 'but', "I'm", 'so', 'bored', 'already', ':('] + +It is possible to specify `strip_handles` and `reduce_len` parameters for a TweetTokenizer instance. Setting `strip_handles` to True, the tokenizer will remove Twitter handles (e.g. usernames). Setting `reduce_len` to True, repeated character sequences of length 3 or greater will be replaced with sequences of length 3. + + >>> tknzr = TweetTokenizer(strip_handles=True, reduce_len=True) + >>> s6 = '@remy: This is waaaaayyyy too much for you!!!!!!' + >>> tknzr.tokenize(s6) + [':', 'This', 'is', 'waaayyy', 'too', 'much', 'for', 'you', '!', '!', '!'] + >>> s7 = '@_willy65: No place for @chuck tonight. Sorry.' + >>> tknzr.tokenize(s7) + [':', 'No', 'place', 'for', 'tonight', '.', 'Sorry', '.'] + >>> s8 = '@mar_tin is a great developer. Contact him at mar_tin@email.com.' + >>> tknzr.tokenize(s8) + ['is', 'a', 'great', 'developer', '.', 'Contact', 'him', 'at', 'mar_tin@email.com', '.'] + +The `preserve_case` parameter (default: True) allows to convert uppercase tokens to lowercase tokens. Emoticons are not affected: + + >>> tknzr = TweetTokenizer(preserve_case=False) + >>> s9 = "@jrmy: I'm REALLY HAPPYYY about that! NICEEEE :D :P" + >>> tknzr.tokenize(s9) + ['@jrmy', ':', "i'm", 'really', 'happyyy', 'about', 'that', '!', 'niceeee', ':D', ':P'] + +It should not hang on long sequences of the same punctuation character. + + >>> tknzr = TweetTokenizer() + >>> s10 = "Photo: Aujourd'hui sur http://t.co/0gebOFDUzn Projet... http://t.co/bKfIUbydz2.............................. http://fb.me/3b6uXpz0L" + >>> tknzr.tokenize(s10) + [u'Photo', u':', u"Aujourd'hui", u'sur', u'http://t.co/0gebOFDUzn', u'Projet', u'...', u'http://t.co/bKfIUbydz2', u'...', u'http://fb.me/3b6uXpz0L'] + + +Regression Tests: PunktSentenceTokenizer +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The sentence splitter should remove whitespace following the sentence boundary. + + >>> pst = PunktSentenceTokenizer() + >>> pst.tokenize('See Section 3). Or Section 2). ') + ['See Section 3).', 'Or Section 2).'] + >>> pst.tokenize('See Section 3.) Or Section 2.) ') + ['See Section 3.)', 'Or Section 2.)'] + >>> pst.tokenize('See Section 3.) Or Section 2.) ', realign_boundaries=False) + ['See Section 3.', ') Or Section 2.', ')'] + + +Two instances of PunktSentenceTokenizer should not share PunktParameters. + + >>> pst = PunktSentenceTokenizer() + >>> pst2 = PunktSentenceTokenizer() + >>> pst._params is pst2._params + False + +Testing mutable default arguments for https://github.com/nltk/nltk/pull/2067 + + >>> from nltk.tokenize.punkt import PunktBaseClass, PunktTrainer, PunktSentenceTokenizer + >>> from nltk.tokenize.punkt import PunktLanguageVars, PunktParameters + >>> pbc = PunktBaseClass(lang_vars=None, params=None) + >>> type(pbc._params) + + >>> type(pbc._lang_vars) + + >>> pt = PunktTrainer(lang_vars=None) + >>> type(pt._lang_vars) + + >>> pst = PunktSentenceTokenizer(lang_vars=None) + >>> type(pst._lang_vars) + + + +Regression Tests: align_tokens +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Post-hoc alignment of tokens with a source string + + >>> from nltk.tokenize.util import align_tokens + >>> list(align_tokens([''], "")) + [(0, 0)] + >>> list(align_tokens([''], " ")) + [(0, 0)] + >>> list(align_tokens([], "")) + [] + >>> list(align_tokens([], " ")) + [] + >>> list(align_tokens(['a'], "a")) + [(0, 1)] + >>> list(align_tokens(['abc', 'def'], "abcdef")) + [(0, 3), (3, 6)] + >>> list(align_tokens(['abc', 'def'], "abc def")) + [(0, 3), (4, 7)] + >>> list(align_tokens(['ab', 'cd'], "ab cd ef")) + [(0, 2), (3, 5)] + >>> list(align_tokens(['ab', 'cd', 'ef'], "ab cd ef")) + [(0, 2), (3, 5), (6, 8)] + >>> list(align_tokens(['ab', 'cd', 'efg'], "ab cd ef")) + Traceback (most recent call last): + .... + ValueError: substring "efg" not found in "ab cd ef" + >>> list(align_tokens(['ab', 'cd', 'ef', 'gh'], "ab cd ef")) + Traceback (most recent call last): + .... + ValueError: substring "gh" not found in "ab cd ef" + >>> list(align_tokens(['The', 'plane', ',', 'bound', 'for', 'St', 'Petersburg', ',', 'crashed', 'in', 'Egypt', "'s", 'Sinai', 'desert', 'just', '23', 'minutes', 'after', 'take-off', 'from', 'Sharm', 'el-Sheikh', 'on', 'Saturday', '.'], "The plane, bound for St Petersburg, crashed in Egypt's Sinai desert just 23 minutes after take-off from Sharm el-Sheikh on Saturday.")) + [(0, 3), (4, 9), (9, 10), (11, 16), (17, 20), (21, 23), (24, 34), (34, 35), (36, 43), (44, 46), (47, 52), (52, 54), (55, 60), (61, 67), (68, 72), (73, 75), (76, 83), (84, 89), (90, 98), (99, 103), (104, 109), (110, 119), (120, 122), (123, 131), (131, 132)] + + +Regression Tests: MWETokenizer +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Pickle an MWETokenizer + + >>> from nltk.tokenize import MWETokenizer + >>> import pickle + + >>> tokenizer = MWETokenizer([('hors', "d'oeuvre")], separator='+') + >>> p = pickle.dumps(tokenizer) + >>> unpickeled = pickle.loads(p) + >>> unpickeled.tokenize("An hors d'oeuvre tonight, sir?".split()) + ['An', "hors+d'oeuvre", 'tonight,', 'sir?'] + + +Regression Tests: TextTilingTokenizer +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +TextTilingTokneizer tokenizes text into coherent subtopic chunks based upon Hearst's TextTiling algorithm. + + >>> from nltk.tokenize import TextTilingTokenizer + >>> from nltk.corpus import brown + >>> tt = TextTilingTokenizer() + >>> tt.tokenize(brown.raw()[0:1000]) + ["\n\n\tThe/at Fulton/np-tl County/nn-tl Grand/jj-tl Jury/nn-tl said/vbd Friday/nr an/at investigation/nn of/in Atlanta's/np$ recent/jj primary/nn election/nn produced/vbd ``/`` no/at evidence/nn ''/'' that/cs any/dti irregularities/nns took/vbd place/nn ./.\n\n\n\tThe/at jury/nn further/rbr said/vbd in/in term-end/nn presentments/nns that/cs the/at City/nn-tl Executive/jj-tl Committee/nn-tl ,/, which/wdt had/hvd over-all/jj charge/nn of/in the/at election/nn ,/, ``/`` deserves/vbz the/at praise/nn and/cc thanks/nns of/in the/at City/nn-tl of/in-tl Atlanta/np-tl ''/'' for/in the/at manner/nn in/in which/wdt the/at election/nn was/bedz conducted/vbn ./.\n\n\n\tThe/at September-October/np term/nn jury/nn had/hvd been/ben charged/vbn by/in Fulton/np-tl Superior/jj-tl Court/nn-tl Judge/nn-tl Durwood/np Pye/np to/to investigate/vb reports/nns of/in possible/jj ``/`` irregularities/nns ''/'' in/in the/at hard-fought/jj primary/nn which/wdt was/bedz won/vbn by/in Mayor-nominate/nn-tl Ivan/np Allen/np Jr./"] + +Test that `ValueError` exceptions are raised when illegal arguments are used. + + >>> TextTilingTokenizer(similarity_method='foo').tokenize(brown.raw()[0:1000]) + Traceback (most recent call last): + ... + ValueError: Similarity method foo not recognized + >>> TextTilingTokenizer(smoothing_method='bar').tokenize(brown.raw()[0:1000]) + Traceback (most recent call last): + ... + ValueError: Smoothing method bar not recognized + diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/toolbox.doctest b/venv.bak/lib/python3.7/site-packages/nltk/test/toolbox.doctest new file mode 100644 index 0000000..1abf684 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/test/toolbox.doctest @@ -0,0 +1,307 @@ +.. Copyright (C) 2001-2019 NLTK Project +.. For license information, see LICENSE.TXT + +=============================== +Unit test cases for ``toolbox`` +=============================== + + >>> from nltk import toolbox + +-------------------------- +``toolbox.StandardFormat`` +-------------------------- + + >>> f = toolbox.StandardFormat() + +``toolbox.StandardFormat.open()`` +--------------------------------- + >>> import os, tempfile + >>> (fd, fname) = tempfile.mkstemp() + >>> tf = os.fdopen(fd, "w") + >>> _ = tf.write('\\lx a value\n\\lx another value\n') + >>> tf.close() + >>> f = toolbox.StandardFormat() + >>> f.open(fname) + >>> list(f.fields()) + [('lx', 'a value'), ('lx', 'another value')] + >>> f.close() + >>> os.unlink(fname) + +``toolbox.StandardFormat.open_string()`` +---------------------------------------- + >>> f = toolbox.StandardFormat() + >>> f.open_string('\\lx a value\n\\lx another value\n') + >>> list(f.fields()) + [('lx', 'a value'), ('lx', 'another value')] + >>> f.close() + +``toolbox.StandardFormat.close()`` +---------------------------------- + >>> f = toolbox.StandardFormat() + >>> f.open_string('\\lx a value\n\\lx another value\n') + >>> list(f.fields()) + [('lx', 'a value'), ('lx', 'another value')] + >>> f.close() + +``toolbox.StandardFormat.line_num`` +--------------------------------------- + +``StandardFormat.line_num`` contains the line number of the last line returned: + + >>> f = toolbox.StandardFormat() + >>> f.open_string('\\lx a value\n\\lx another value\n\\lx a third value\n') + >>> line_nums = [] + >>> for l in f.raw_fields(): + ... line_nums.append(f.line_num) + >>> line_nums + [1, 2, 3] + +``StandardFormat.line_num`` contains the line number of the last line returned: + + >>> f = toolbox.StandardFormat() + >>> f.open_string('\\lx two\nlines\n\\lx three\nlines\n\n\\lx two\nlines\n') + >>> line_nums = [] + >>> for l in f.raw_fields(): + ... line_nums.append(f.line_num) + >>> line_nums + [2, 5, 7] + +``StandardFormat.line_num`` doesn't exist before openning or after closing +a file or string: + + >>> f = toolbox.StandardFormat() + >>> f.line_num + Traceback (most recent call last): + ... + AttributeError: 'StandardFormat' object has no attribute 'line_num' + >>> f.open_string('\\lx two\nlines\n\\lx three\nlines\n\n\\lx two\nlines\n') + >>> line_nums = [] + >>> for l in f.raw_fields(): + ... line_nums.append(f.line_num) + >>> line_nums + [2, 5, 7] + >>> f.close() + >>> f.line_num + Traceback (most recent call last): + ... + AttributeError: 'StandardFormat' object has no attribute 'line_num' + +``toolbox.StandardFormat.raw_fields()`` +--------------------------------------- +``raw_fields()`` returns an iterator over tuples of two strings representing the +marker and its value. The marker is given without the backslash and the value +without its trailing newline: + + >>> f = toolbox.StandardFormat() + >>> f.open_string('\\lx a value\n\\lx another value\n') + >>> list(f.raw_fields()) + [('lx', 'a value'), ('lx', 'another value')] + +an empty file returns nothing: + + >>> f = toolbox.StandardFormat() + >>> f.open_string('') + >>> list(f.raw_fields()) + [] + +file with only a newline returns WHAT SHOULD IT RETURN???: + + >>> f = toolbox.StandardFormat() + >>> f.open_string('\n') + >>> list(f.raw_fields()) + [(None, '')] + +file with only one field should be parsed ok: + + >>> f = toolbox.StandardFormat() + >>> f.open_string('\\lx one value\n') + >>> list(f.raw_fields()) + [('lx', 'one value')] + +file without a trailing newline should be parsed ok: + + >>> f = toolbox.StandardFormat() + >>> f.open_string('\\lx a value\n\\lx another value') + >>> list(f.raw_fields()) + [('lx', 'a value'), ('lx', 'another value')] + +trailing white space is preserved except for the final newline: + + >>> f = toolbox.StandardFormat() + >>> f.open_string('\\lx trailing space \n\\lx trailing tab\t\n\\lx extra newline\n\n') + >>> list(f.raw_fields()) + [('lx', 'trailing space '), ('lx', 'trailing tab\t'), ('lx', 'extra newline\n')] + +line wrapping is preserved: + + >>> f = toolbox.StandardFormat() + >>> f.open_string('\\lx a value\nmore of the value\nand still more\n\\lc another val\n') + >>> list(f.raw_fields()) + [('lx', 'a value\nmore of the value\nand still more'), ('lc', 'another val')] + +file beginning with a multiline record should be parsed ok: + + >>> f = toolbox.StandardFormat() + >>> f.open_string('\\lx a value\nmore of the value\nand still more\n\\lc another val\n') + >>> list(f.raw_fields()) + [('lx', 'a value\nmore of the value\nand still more'), ('lc', 'another val')] + +file ending with a multiline record should be parsed ok: + + >>> f = toolbox.StandardFormat() + >>> f.open_string('\\lc a value\n\\lx another value\nmore of the value\nand still more\n') + >>> list(f.raw_fields()) + [('lc', 'a value'), ('lx', 'another value\nmore of the value\nand still more')] + +file beginning with a BOM should be parsed ok: + + >>> f = toolbox.StandardFormat() + >>> f.open_string('\xef\xbb\xbf\\lx a value\n\\lx another value\n') + >>> list(f.raw_fields()) + [('lx', 'a value'), ('lx', 'another value')] + +file beginning with two BOMs should ignore only the first one: + + >>> f = toolbox.StandardFormat() + >>> f.open_string('\xef\xbb\xbf\xef\xbb\xbf\\lx a value\n\\lx another value\n') + >>> list(f.raw_fields()) + [(None, '\xef\xbb\xbf\\lx a value'), ('lx', 'another value')] + +should not ignore a BOM not at the beginning of the file: + + >>> f = toolbox.StandardFormat() + >>> f.open_string('\\lx a value\n\xef\xbb\xbf\\lx another value\n') + >>> list(f.raw_fields()) + [('lx', 'a value\n\xef\xbb\xbf\\lx another value')] + +``toolbox.StandardFormat.fields()`` +----------------------------------- +trailing white space is not preserved: + + >>> f = toolbox.StandardFormat() + >>> f.open_string('\\lx trailing space \n\\lx trailing tab\t\n\\lx extra newline\n\n') + >>> list(f.fields()) + [('lx', 'trailing space'), ('lx', 'trailing tab'), ('lx', 'extra newline')] + +multiline fields are unwrapped: + + >>> f = toolbox.StandardFormat() + >>> f.open_string('\\lx a value\nmore of the value\nand still more\n\\lc another val\n') + >>> list(f.fields()) + [('lx', 'a value more of the value and still more'), ('lc', 'another val')] + +markers +------- +A backslash in the first position on a new line indicates the start of a +marker. The backslash is not part of the marker: + + >>> f = toolbox.StandardFormat() + >>> f.open_string('\\mk a value\n') + >>> list(f.fields()) + [('mk', 'a value')] + +If the backslash occurs later in the line it does not indicate the start +of a marker: + + >>> f = toolbox.StandardFormat() + >>> f.open_string('\\mk a value\n \\mk another one\n') + >>> list(f.raw_fields()) + [('mk', 'a value\n \\mk another one')] + +There is no specific limit to the length of a marker: + + >>> f = toolbox.StandardFormat() + >>> f.open_string('\\this_is_an_extremely_long_marker value\n') + >>> list(f.fields()) + [('this_is_an_extremely_long_marker', 'value')] + +A marker can contain any non white space character: + + >>> f = toolbox.StandardFormat() + >>> f.open_string('\\`~!@#$%^&*()_-=+[{]}\|,<.>/?;:"0123456789 value\n') + >>> list(f.fields()) + [('`~!@#$%^&*()_-=+[{]}\\|,<.>/?;:"0123456789', 'value')] + +A marker is terminated by any white space character: + + >>> f = toolbox.StandardFormat() + >>> f.open_string('\\mk a value\n\\mk\tanother one\n\\mk\rthird one\n\\mk\ffourth one') + >>> list(f.fields()) + [('mk', 'a value'), ('mk', 'another one'), ('mk', 'third one'), ('mk', 'fourth one')] + +Consecutive whitespace characters (except newline) are treated the same as one: + + >>> f = toolbox.StandardFormat() + >>> f.open_string('\\mk \t\r\fa value\n') + >>> list(f.fields()) + [('mk', 'a value')] + +----------------------- +``toolbox.ToolboxData`` +----------------------- + + >>> db = toolbox.ToolboxData() + +``toolbox.ToolboxData.parse()`` +------------------------------- +check that normal parsing works: + + >>> from xml.etree import ElementTree + >>> td = toolbox.ToolboxData() + >>> s = """\\_sh v3.0 400 Rotokas Dictionary + ... \\_DateStampHasFourDigitYear + ... + ... \\lx kaa + ... \\ps V.A + ... \\ge gag + ... \\gp nek i pas + ... + ... \\lx kaa + ... \\ps V.B + ... \\ge strangle + ... \\gp pasim nek + ... """ + >>> td.open_string(s) + >>> tree = td.parse(key='lx') + >>> tree.tag + 'toolbox_data' + >>> ElementTree.tostring(list(tree)[0]).decode('utf8') + '
    <_sh>v3.0 400 Rotokas Dictionary<_DateStampHasFourDigitYear />
    ' + >>> ElementTree.tostring(list(tree)[1]).decode('utf8') + 'kaaV.Agagnek i pas' + >>> ElementTree.tostring(list(tree)[2]).decode('utf8') + 'kaaV.Bstranglepasim nek' + +check that guessing the key marker works: + + >>> from xml.etree import ElementTree + >>> td = toolbox.ToolboxData() + >>> s = """\\_sh v3.0 400 Rotokas Dictionary + ... \\_DateStampHasFourDigitYear + ... + ... \\lx kaa + ... \\ps V.A + ... \\ge gag + ... \\gp nek i pas + ... + ... \\lx kaa + ... \\ps V.B + ... \\ge strangle + ... \\gp pasim nek + ... """ + >>> td.open_string(s) + >>> tree = td.parse() + >>> ElementTree.tostring(list(tree)[0]).decode('utf8') + '
    <_sh>v3.0 400 Rotokas Dictionary<_DateStampHasFourDigitYear />
    ' + >>> ElementTree.tostring(list(tree)[1]).decode('utf8') + 'kaaV.Agagnek i pas' + >>> ElementTree.tostring(list(tree)[2]).decode('utf8') + 'kaaV.Bstranglepasim nek' + +----------------------- +``toolbox`` functions +----------------------- + +``toolbox.to_sfm_string()`` +------------------------------- + diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/translate.doctest b/venv.bak/lib/python3.7/site-packages/nltk/test/translate.doctest new file mode 100644 index 0000000..6a1bb70 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/test/translate.doctest @@ -0,0 +1,242 @@ +.. Copyright (C) 2001-2019 NLTK Project +.. For license information, see LICENSE.TXT + +.. -*- coding: utf-8 -*- + +========= +Alignment +========= + +Corpus Reader +------------- + + >>> from nltk.corpus import comtrans + >>> words = comtrans.words('alignment-en-fr.txt') + >>> for word in words[:6]: + ... print(word) + Resumption + of + the + session + I + declare + >>> als = comtrans.aligned_sents('alignment-en-fr.txt')[0] + >>> als # doctest: +NORMALIZE_WHITESPACE + AlignedSent(['Resumption', 'of', 'the', 'session'], + ['Reprise', 'de', 'la', 'session'], + Alignment([(0, 0), (1, 1), (2, 2), (3, 3)])) + + +Alignment Objects +----------------- + +Aligned sentences are simply a mapping between words in a sentence: + + >>> print(" ".join(als.words)) + Resumption of the session + >>> print(" ".join(als.mots)) + Reprise de la session + >>> als.alignment + Alignment([(0, 0), (1, 1), (2, 2), (3, 3)]) + + +Usually we look at them from the perspective of a source to a target language, +but they are easily inverted: + + >>> als.invert() # doctest: +NORMALIZE_WHITESPACE + AlignedSent(['Reprise', 'de', 'la', 'session'], + ['Resumption', 'of', 'the', 'session'], + Alignment([(0, 0), (1, 1), (2, 2), (3, 3)])) + + +We can create new alignments, but these need to be in the correct range of +the corresponding sentences: + + >>> from nltk.translate import Alignment, AlignedSent + >>> als = AlignedSent(['Reprise', 'de', 'la', 'session'], + ... ['Resumption', 'of', 'the', 'session'], + ... Alignment([(0, 0), (1, 4), (2, 1), (3, 3)])) + Traceback (most recent call last): + ... + IndexError: Alignment is outside boundary of mots + + +You can set alignments with any sequence of tuples, so long as the first two +indexes of the tuple are the alignment indices: + + >>> als.alignment = Alignment([(0, 0), (1, 1), (2, 2, "boat"), (3, 3, False, (1,2))]) + + >>> Alignment([(0, 0), (1, 1), (2, 2, "boat"), (3, 3, False, (1,2))]) + Alignment([(0, 0), (1, 1), (2, 2, 'boat'), (3, 3, False, (1, 2))]) + + +Alignment Algorithms +-------------------- + +EM for IBM Model 1 +~~~~~~~~~~~~~~~~~~ + +Here is an example from Koehn, 2010: + + >>> from nltk.translate import IBMModel1 + >>> corpus = [AlignedSent(['the', 'house'], ['das', 'Haus']), + ... AlignedSent(['the', 'book'], ['das', 'Buch']), + ... AlignedSent(['a', 'book'], ['ein', 'Buch'])] + >>> em_ibm1 = IBMModel1(corpus, 20) + >>> print(round(em_ibm1.translation_table['the']['das'], 1)) + 1.0 + >>> print(round(em_ibm1.translation_table['book']['das'], 1)) + 0.0 + >>> print(round(em_ibm1.translation_table['house']['das'], 1)) + 0.0 + >>> print(round(em_ibm1.translation_table['the']['Buch'], 1)) + 0.0 + >>> print(round(em_ibm1.translation_table['book']['Buch'], 1)) + 1.0 + >>> print(round(em_ibm1.translation_table['a']['Buch'], 1)) + 0.0 + >>> print(round(em_ibm1.translation_table['book']['ein'], 1)) + 0.0 + >>> print(round(em_ibm1.translation_table['a']['ein'], 1)) + 1.0 + >>> print(round(em_ibm1.translation_table['the']['Haus'], 1)) + 0.0 + >>> print(round(em_ibm1.translation_table['house']['Haus'], 1)) + 1.0 + >>> print(round(em_ibm1.translation_table['book'][None], 1)) + 0.5 + +And using an NLTK corpus. We train on only 10 sentences, since it is so slow: + + >>> from nltk.corpus import comtrans + >>> com_ibm1 = IBMModel1(comtrans.aligned_sents()[:10], 20) + >>> print(round(com_ibm1.translation_table['bitte']['Please'], 1)) + 0.2 + >>> print(round(com_ibm1.translation_table['Sitzungsperiode']['session'], 1)) + 1.0 + + +Evaluation +---------- +The evaluation metrics for alignments are usually not interested in the +contents of alignments but more often the comparison to a "gold standard" +alignment that has been been constructed by human experts. For this reason we +often want to work just with raw set operations against the alignment points. +This then gives us a very clean form for defining our evaluation metrics. + +.. Note:: + The AlignedSent class has no distinction of "possible" or "sure" + alignments. Thus all alignments are treated as "sure". + +Consider the following aligned sentence for evaluation: + + >>> my_als = AlignedSent(['Resumption', 'of', 'the', 'session'], + ... ['Reprise', 'de', 'la', 'session'], + ... Alignment([(0, 0), (3, 3), (1, 2), (1, 1), (1, 3)])) + +Precision +~~~~~~~~~ +``precision = |A∩P| / |A|`` + +**Precision** is probably the most well known evaluation metric and it is implemented +in `nltk.metrics.scores.precision`_. Since precision is simply interested in the +proportion of correct alignments, we calculate the ratio of the number of our +test alignments (*A*) that match a possible alignment (*P*), over the number of +test alignments provided. There is no penalty for missing a possible alignment +in our test alignments. An easy way to game this metric is to provide just one +test alignment that is in *P* [OCH2000]_. + +Here are some examples: + + >>> from nltk.metrics import precision + >>> als.alignment = Alignment([(0,0), (1,1), (2,2), (3,3)]) + >>> precision(Alignment([]), als.alignment) + 0.0 + >>> precision(Alignment([(0,0), (1,1), (2,2), (3,3)]), als.alignment) + 1.0 + >>> precision(Alignment([(0,0), (3,3)]), als.alignment) + 0.5 + >>> precision(Alignment.fromstring('0-0 3-3'), als.alignment) + 0.5 + >>> precision(Alignment([(0,0), (1,1), (2,2), (3,3), (1,2), (2,1)]), als.alignment) + 1.0 + >>> precision(als.alignment, my_als.alignment) + 0.6 + + +.. _nltk.metrics.scores.precision: + http://www.nltk.org/api/nltk.metrics.html#nltk.metrics.scores.precision + + +Recall +~~~~~~ +``recall = |A∩S| / |S|`` + +**Recall** is another well known evaluation metric that has a set based +implementation in NLTK as `nltk.metrics.scores.recall`_. Since recall is +simply interested in the proportion of found alignments, we calculate the +ratio of the number of our test alignments (*A*) that match a sure alignment +(*S*) over the number of sure alignments. There is no penalty for producing +a lot of test alignments. An easy way to game this metric is to include every +possible alignment in our test alignments, regardless if they are correct or +not [OCH2000]_. + +Here are some examples: + + >>> from nltk.metrics import recall + >>> print(recall(Alignment([]), als.alignment)) + None + >>> recall(Alignment([(0,0), (1,1), (2,2), (3,3)]), als.alignment) + 1.0 + >>> recall(Alignment.fromstring('0-0 3-3'), als.alignment) + 1.0 + >>> recall(Alignment([(0,0), (3,3)]), als.alignment) + 1.0 + >>> recall(Alignment([(0,0), (1,1), (2,2), (3,3), (1,2), (2,1)]), als.alignment) + 0.66666... + >>> recall(als.alignment, my_als.alignment) + 0.75 + + +.. _nltk.metrics.scores.recall: + http://www.nltk.org/api/nltk.metrics.html#nltk.metrics.scores.recall + + +Alignment Error Rate (AER) +~~~~~~~~~~~~~~~~~~~~~~~~~~ +``AER = 1 - (|A∩S| + |A∩P|) / (|A| + |S|)`` + +**Alignment Error Rate** is commonly used metric for assessing sentence +alignments. It combines precision and recall metrics together such that a +perfect alignment must have all of the sure alignments and may have some +possible alignments [MIHALCEA2003]_ [KOEHN2010]_. + +.. Note:: + [KOEHN2010]_ defines the AER as ``AER = (|A∩S| + |A∩P|) / (|A| + |S|)`` + in his book, but corrects it to the above in his online errata. This is + in line with [MIHALCEA2003]_. + +Here are some examples: + + >>> from nltk.translate import alignment_error_rate + >>> alignment_error_rate(Alignment([]), als.alignment) + 1.0 + >>> alignment_error_rate(Alignment([(0,0), (1,1), (2,2), (3,3)]), als.alignment) + 0.0 + >>> alignment_error_rate(als.alignment, my_als.alignment) + 0.333333... + >>> alignment_error_rate(als.alignment, my_als.alignment, + ... als.alignment | Alignment([(1,2), (2,1)])) + 0.222222... + + +.. [OCH2000] Och, F. and Ney, H. (2000) + *Statistical Machine Translation*, EAMT Workshop + +.. [MIHALCEA2003] Mihalcea, R. and Pedersen, T. (2003) + *An evaluation exercise for word alignment*, HLT-NAACL 2003 + +.. [KOEHN2010] Koehn, P. (2010) + *Statistical Machine Translation*, Cambridge University Press + + diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/translate_fixt.py b/venv.bak/lib/python3.7/site-packages/nltk/test/translate_fixt.py new file mode 100644 index 0000000..ce0cd83 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/test/translate_fixt.py @@ -0,0 +1,4 @@ +# -*- coding: utf-8 -*- +from __future__ import absolute_import + +from nltk.corpus import teardown_module diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/tree.doctest b/venv.bak/lib/python3.7/site-packages/nltk/test/tree.doctest new file mode 100644 index 0000000..a4b93ed --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/test/tree.doctest @@ -0,0 +1,1101 @@ +.. Copyright (C) 2001-2019 NLTK Project +.. For license information, see LICENSE.TXT + +=============================== + Unit tests for nltk.tree.Tree +=============================== + + >>> from nltk.tree import * + +Some trees to run tests on: + + >>> dp1 = Tree('dp', [Tree('d', ['the']), Tree('np', ['dog'])]) + >>> dp2 = Tree('dp', [Tree('d', ['the']), Tree('np', ['cat'])]) + >>> vp = Tree('vp', [Tree('v', ['chased']), dp2]) + >>> tree = Tree('s', [dp1, vp]) + >>> print(tree) + (s (dp (d the) (np dog)) (vp (v chased) (dp (d the) (np cat)))) + +The node label is accessed using the `label()` method: + + >>> dp1.label(), dp2.label(), vp.label(), tree.label() + ('dp', 'dp', 'vp', 's') + + >>> print(tree[1,1,1,0]) + cat + +The `treepositions` method returns a list of the tree positions of +subtrees and leaves in a tree. By default, it gives the position of +every tree, subtree, and leaf, in prefix order: + + >>> print(tree.treepositions()) + [(), (0,), (0, 0), (0, 0, 0), (0, 1), (0, 1, 0), (1,), (1, 0), (1, 0, 0), (1, 1), (1, 1, 0), (1, 1, 0, 0), (1, 1, 1), (1, 1, 1, 0)] + +In addition to `str` and `repr`, several methods exist to convert a +tree object to one of several standard tree encodings: + + >>> print(tree.pformat_latex_qtree()) + \Tree [.s + [.dp [.d the ] [.np dog ] ] + [.vp [.v chased ] [.dp [.d the ] [.np cat ] ] ] ] + +There is also a fancy ASCII art representation: + + >>> tree.pretty_print() + s + ________|_____ + | vp + | _____|___ + dp | dp + ___|___ | ___|___ + d np v d np + | | | | | + the dog chased the cat + + >>> tree.pretty_print(unicodelines=True, nodedist=4) + s + ┌──────────────┴────────┐ + │ vp + │ ┌────────┴──────┐ + dp │ dp + ┌──────┴──────┐ │ ┌──────┴──────┐ + d np v d np + │ │ │ │ │ + the dog chased the cat + +Trees can be initialized from treebank strings: + + >>> tree2 = Tree.fromstring('(S (NP I) (VP (V enjoyed) (NP my cookie)))') + >>> print(tree2) + (S (NP I) (VP (V enjoyed) (NP my cookie))) + +Trees can be compared for equality: + + >>> tree == Tree.fromstring(str(tree)) + True + >>> tree2 == Tree.fromstring(str(tree2)) + True + >>> tree == tree2 + False + >>> tree == Tree.fromstring(str(tree2)) + False + >>> tree2 == Tree.fromstring(str(tree)) + False + + >>> tree != Tree.fromstring(str(tree)) + False + >>> tree2 != Tree.fromstring(str(tree2)) + False + >>> tree != tree2 + True + >>> tree != Tree.fromstring(str(tree2)) + True + >>> tree2 != Tree.fromstring(str(tree)) + True + + >>> tree < tree2 or tree > tree2 + True + +Tree Parsing +============ + +The class method `Tree.fromstring()` can be used to parse trees, and it +provides some additional options. + + >>> tree = Tree.fromstring('(S (NP I) (VP (V enjoyed) (NP my cookie)))') + >>> print(tree) + (S (NP I) (VP (V enjoyed) (NP my cookie))) + +When called on a subclass of `Tree`, it will create trees of that +type: + + >>> tree = ImmutableTree.fromstring('(VP (V enjoyed) (NP my cookie))') + >>> print(tree) + (VP (V enjoyed) (NP my cookie)) + >>> print(type(tree)) + + >>> tree[1] = 'x' + Traceback (most recent call last): + . . . + ValueError: ImmutableTree may not be modified + >>> del tree[0] + Traceback (most recent call last): + . . . + ValueError: ImmutableTree may not be modified + +The ``brackets`` parameter can be used to specify two characters that +should be used as brackets: + + >>> print(Tree.fromstring('[S [NP I] [VP [V enjoyed] [NP my cookie]]]', + ... brackets='[]')) + (S (NP I) (VP (V enjoyed) (NP my cookie))) + >>> print(Tree.fromstring(' >>', + ... brackets='<>')) + (S (NP I) (VP (V enjoyed) (NP my cookie))) + +If ``brackets`` is not a string, or is not exactly two characters, +then `Tree.fromstring` raises an exception: + + >>> Tree.fromstring(' >', brackets='') + Traceback (most recent call last): + . . . + TypeError: brackets must be a length-2 string + >>> Tree.fromstring(' >', brackets='<<>>') + Traceback (most recent call last): + . . . + TypeError: brackets must be a length-2 string + >>> Tree.fromstring(' >', brackets=12) + Traceback (most recent call last): + . . . + TypeError: brackets must be a length-2 string + >>> Tree.fromstring('<>', brackets=('<<','>>')) + Traceback (most recent call last): + . . . + TypeError: brackets must be a length-2 string + +(We may add support for multi-character brackets in the future, in +which case the ``brackets=('<<','>>')`` example would start working.) + +Whitespace brackets are not permitted: + + >>> Tree.fromstring('(NP my cookie\n', brackets='(\n') + Traceback (most recent call last): + . . . + TypeError: whitespace brackets not allowed + +If an invalid tree is given to Tree.fromstring, then it raises a +ValueError, with a description of the problem: + + >>> Tree.fromstring('(NP my cookie) (NP my milk)') + Traceback (most recent call last): + . . . + ValueError: Tree.fromstring(): expected 'end-of-string' but got '(NP' + at index 15. + "...y cookie) (NP my mil..." + ^ + >>> Tree.fromstring(')NP my cookie(') + Traceback (most recent call last): + . . . + ValueError: Tree.fromstring(): expected '(' but got ')' + at index 0. + ")NP my coo..." + ^ + >>> Tree.fromstring('(NP my cookie))') + Traceback (most recent call last): + . . . + ValueError: Tree.fromstring(): expected 'end-of-string' but got ')' + at index 14. + "...my cookie))" + ^ + >>> Tree.fromstring('my cookie)') + Traceback (most recent call last): + . . . + ValueError: Tree.fromstring(): expected '(' but got 'my' + at index 0. + "my cookie)" + ^ + >>> Tree.fromstring('(NP my cookie') + Traceback (most recent call last): + . . . + ValueError: Tree.fromstring(): expected ')' but got 'end-of-string' + at index 13. + "... my cookie" + ^ + >>> Tree.fromstring('') + Traceback (most recent call last): + . . . + ValueError: Tree.fromstring(): expected '(' but got 'end-of-string' + at index 0. + "" + ^ + +Trees with no children are supported: + + >>> print(Tree.fromstring('(S)')) + (S ) + >>> print(Tree.fromstring('(X (Y) (Z))')) + (X (Y ) (Z )) + +Trees with an empty node label and no children are supported: + + >>> print(Tree.fromstring('()')) + ( ) + >>> print(Tree.fromstring('(X () ())')) + (X ( ) ( )) + +Trees with an empty node label and children are supported, but only if the +first child is not a leaf (otherwise, it will be treated as the node label). + + >>> print(Tree.fromstring('((A) (B) (C))')) + ( (A ) (B ) (C )) + >>> print(Tree.fromstring('((A) leaf)')) + ( (A ) leaf) + >>> print(Tree.fromstring('(((())))')) + ( ( ( ( )))) + +The optional arguments `read_node` and `read_leaf` may be used to +transform the string values of nodes or leaves. + + >>> print(Tree.fromstring('(A b (C d e) (F (G h i)))', + ... read_node=lambda s: '<%s>' % s, + ... read_leaf=lambda s: '"%s"' % s)) + (
    "b" ( "d" "e") ( ( "h" "i"))) + +These transformation functions are typically used when the node or +leaf labels should be parsed to a non-string value (such as a feature +structure). If node and leaf labels need to be able to include +whitespace, then you must also use the optional `node_pattern` and +`leaf_pattern` arguments. + + >>> from nltk.featstruct import FeatStruct + >>> tree = Tree.fromstring('([cat=NP] [lex=the] [lex=dog])', + ... read_node=FeatStruct, read_leaf=FeatStruct) + >>> tree.set_label(tree.label().unify(FeatStruct('[num=singular]'))) + >>> print(tree) + ([cat='NP', num='singular'] [lex='the'] [lex='dog']) + +The optional argument ``remove_empty_top_bracketing`` can be used to +remove any top-level empty bracketing that occurs. + + >>> print(Tree.fromstring('((S (NP I) (VP (V enjoyed) (NP my cookie))))', + ... remove_empty_top_bracketing=True)) + (S (NP I) (VP (V enjoyed) (NP my cookie))) + +It will not remove a top-level empty bracketing with multiple children: + + >>> print(Tree.fromstring('((A a) (B b))')) + ( (A a) (B b)) + +Parented Trees +============== +`ParentedTree` is a subclass of `Tree` that automatically maintains +parent pointers for single-parented trees. Parented trees can be +created directly from a node label and a list of children: + + >>> ptree = ( + ... ParentedTree('VP', [ + ... ParentedTree('VERB', ['saw']), + ... ParentedTree('NP', [ + ... ParentedTree('DET', ['the']), + ... ParentedTree('NOUN', ['dog'])])])) + >>> print(ptree) + (VP (VERB saw) (NP (DET the) (NOUN dog))) + +Parented trees can be created from strings using the classmethod +`ParentedTree.fromstring`: + + >>> ptree = ParentedTree.fromstring('(VP (VERB saw) (NP (DET the) (NOUN dog)))') + >>> print(ptree) + (VP (VERB saw) (NP (DET the) (NOUN dog))) + >>> print(type(ptree)) + + +Parented trees can also be created by using the classmethod +`ParentedTree.convert` to convert another type of tree to a parented +tree: + + >>> tree = Tree.fromstring('(VP (VERB saw) (NP (DET the) (NOUN dog)))') + >>> ptree = ParentedTree.convert(tree) + >>> print(ptree) + (VP (VERB saw) (NP (DET the) (NOUN dog))) + >>> print(type(ptree)) + + +.. clean-up: + + >>> del tree + +`ParentedTree`\ s should never be used in the same tree as `Tree`\ s +or `MultiParentedTree`\ s. Mixing tree implementations may result in +incorrect parent pointers and in `TypeError` exceptions: + + >>> # Inserting a Tree in a ParentedTree gives an exception: + >>> ParentedTree('NP', [ + ... Tree('DET', ['the']), Tree('NOUN', ['dog'])]) + Traceback (most recent call last): + . . . + TypeError: Can not insert a non-ParentedTree into a ParentedTree + + >>> # inserting a ParentedTree in a Tree gives incorrect parent pointers: + >>> broken_tree = Tree('NP', [ + ... ParentedTree('DET', ['the']), ParentedTree('NOUN', ['dog'])]) + >>> print(broken_tree[0].parent()) + None + +Parented Tree Methods +------------------------ +In addition to all the methods defined by the `Tree` class, the +`ParentedTree` class adds six new methods whose values are +automatically updated whenver a parented tree is modified: `parent()`, +`parent_index()`, `left_sibling()`, `right_sibling()`, `root()`, and +`treeposition()`. + +The `parent()` method contains a `ParentedTree`\ 's parent, if it has +one; and ``None`` otherwise. `ParentedTree`\ s that do not have +parents are known as "root trees." + + >>> for subtree in ptree.subtrees(): + ... print(subtree) + ... print(' Parent = %s' % subtree.parent()) + (VP (VERB saw) (NP (DET the) (NOUN dog))) + Parent = None + (VERB saw) + Parent = (VP (VERB saw) (NP (DET the) (NOUN dog))) + (NP (DET the) (NOUN dog)) + Parent = (VP (VERB saw) (NP (DET the) (NOUN dog))) + (DET the) + Parent = (NP (DET the) (NOUN dog)) + (NOUN dog) + Parent = (NP (DET the) (NOUN dog)) + +The `parent_index()` method stores the index of a tree in its parent's +child list. If a tree does not have a parent, then its `parent_index` +is ``None``. + + >>> for subtree in ptree.subtrees(): + ... print(subtree) + ... print(' Parent Index = %s' % subtree.parent_index()) + ... assert (subtree.parent() is None or + ... subtree.parent()[subtree.parent_index()] is subtree) + (VP (VERB saw) (NP (DET the) (NOUN dog))) + Parent Index = None + (VERB saw) + Parent Index = 0 + (NP (DET the) (NOUN dog)) + Parent Index = 1 + (DET the) + Parent Index = 0 + (NOUN dog) + Parent Index = 1 + +Note that ``ptree.parent().index(ptree)`` is *not* equivalent to +``ptree.parent_index()``. In particular, ``ptree.parent().index(ptree)`` +will return the index of the first child of ``ptree.parent()`` that is +equal to ``ptree`` (using ``==``); and that child may not be +``ptree``: + + >>> on_and_on = ParentedTree('CONJP', [ + ... ParentedTree('PREP', ['on']), + ... ParentedTree('COJN', ['and']), + ... ParentedTree('PREP', ['on'])]) + >>> second_on = on_and_on[2] + >>> print(second_on.parent_index()) + 2 + >>> print(second_on.parent().index(second_on)) + 0 + +The methods `left_sibling()` and `right_sibling()` can be used to get a +parented tree's siblings. If a tree does not have a left or right +sibling, then the corresponding method's value is ``None``: + + >>> for subtree in ptree.subtrees(): + ... print(subtree) + ... print(' Left Sibling = %s' % subtree.left_sibling()) + ... print(' Right Sibling = %s' % subtree.right_sibling()) + (VP (VERB saw) (NP (DET the) (NOUN dog))) + Left Sibling = None + Right Sibling = None + (VERB saw) + Left Sibling = None + Right Sibling = (NP (DET the) (NOUN dog)) + (NP (DET the) (NOUN dog)) + Left Sibling = (VERB saw) + Right Sibling = None + (DET the) + Left Sibling = None + Right Sibling = (NOUN dog) + (NOUN dog) + Left Sibling = (DET the) + Right Sibling = None + +A parented tree's root tree can be accessed using the `root()` +method. This method follows the tree's parent pointers until it +finds a tree without a parent. If a tree does not have a parent, then +it is its own root: + + >>> for subtree in ptree.subtrees(): + ... print(subtree) + ... print(' Root = %s' % subtree.root()) + (VP (VERB saw) (NP (DET the) (NOUN dog))) + Root = (VP (VERB saw) (NP (DET the) (NOUN dog))) + (VERB saw) + Root = (VP (VERB saw) (NP (DET the) (NOUN dog))) + (NP (DET the) (NOUN dog)) + Root = (VP (VERB saw) (NP (DET the) (NOUN dog))) + (DET the) + Root = (VP (VERB saw) (NP (DET the) (NOUN dog))) + (NOUN dog) + Root = (VP (VERB saw) (NP (DET the) (NOUN dog))) + +The `treeposition()` method can be used to find a tree's treeposition +relative to its root: + + >>> for subtree in ptree.subtrees(): + ... print(subtree) + ... print(' Tree Position = %s' % (subtree.treeposition(),)) + ... assert subtree.root()[subtree.treeposition()] is subtree + (VP (VERB saw) (NP (DET the) (NOUN dog))) + Tree Position = () + (VERB saw) + Tree Position = (0,) + (NP (DET the) (NOUN dog)) + Tree Position = (1,) + (DET the) + Tree Position = (1, 0) + (NOUN dog) + Tree Position = (1, 1) + +Whenever a parented tree is modified, all of the methods described +above (`parent()`, `parent_index()`, `left_sibling()`, `right_sibling()`, +`root()`, and `treeposition()`) are automatically updated. For example, +if we replace ``ptree``\ 's subtree for the word "dog" with a new +subtree for "cat," the method values for both the "dog" subtree and the +"cat" subtree get automatically updated: + + >>> # Replace the dog with a cat + >>> dog = ptree[1,1] + >>> cat = ParentedTree('NOUN', ['cat']) + >>> ptree[1,1] = cat + + >>> # the noun phrase is no longer the dog's parent: + >>> print(dog.parent(), dog.parent_index(), dog.left_sibling()) + None None None + >>> # dog is now its own root. + >>> print(dog.root()) + (NOUN dog) + >>> print(dog.treeposition()) + () + + >>> # the cat's parent is now the noun phrase: + >>> print(cat.parent()) + (NP (DET the) (NOUN cat)) + >>> print(cat.parent_index()) + 1 + >>> print(cat.left_sibling()) + (DET the) + >>> print(cat.root()) + (VP (VERB saw) (NP (DET the) (NOUN cat))) + >>> print(cat.treeposition()) + (1, 1) + +ParentedTree Regression Tests +----------------------------- +Keep track of all trees that we create (including subtrees) using this +variable: + + >>> all_ptrees = [] + +Define a helper funciton to create new parented trees: + + >>> def make_ptree(s): + ... ptree = ParentedTree.convert(Tree.fromstring(s)) + ... all_ptrees.extend(t for t in ptree.subtrees() + ... if isinstance(t, Tree)) + ... return ptree + +Define a test function that examines every subtree in all_ptrees; and +checks that all six of its methods are defined correctly. If any +ptrees are passed as arguments, then they are printed. + + >>> def pcheck(*print_ptrees): + ... for ptree in all_ptrees: + ... # Check ptree's methods. + ... if ptree.parent() is not None: + ... i = ptree.parent_index() + ... assert ptree.parent()[i] is ptree + ... if i > 0: + ... assert ptree.left_sibling() is ptree.parent()[i-1] + ... if i < (len(ptree.parent())-1): + ... assert ptree.right_sibling() is ptree.parent()[i+1] + ... assert len(ptree.treeposition()) > 0 + ... assert (ptree.treeposition() == + ... ptree.parent().treeposition() + (ptree.parent_index(),)) + ... assert ptree.root() is not ptree + ... assert ptree.root() is not None + ... assert ptree.root() is ptree.parent().root() + ... assert ptree.root()[ptree.treeposition()] is ptree + ... else: + ... assert ptree.parent_index() is None + ... assert ptree.left_sibling() is None + ... assert ptree.right_sibling() is None + ... assert ptree.root() is ptree + ... assert ptree.treeposition() == () + ... # Check ptree's children's methods: + ... for i, child in enumerate(ptree): + ... if isinstance(child, Tree): + ... # pcheck parent() & parent_index() methods + ... assert child.parent() is ptree + ... assert child.parent_index() == i + ... # pcheck sibling methods + ... if i == 0: + ... assert child.left_sibling() is None + ... else: + ... assert child.left_sibling() is ptree[i-1] + ... if i == len(ptree)-1: + ... assert child.right_sibling() is None + ... else: + ... assert child.right_sibling() is ptree[i+1] + ... if print_ptrees: + ... print('ok!', end=' ') + ... for ptree in print_ptrees: print(ptree) + ... else: + ... print('ok!') + +Run our test function on a variety of newly-created trees: + + >>> pcheck(make_ptree('(A)')) + ok! (A ) + >>> pcheck(make_ptree('(A (B (C (D) (E f)) g) h)')) + ok! (A (B (C (D ) (E f)) g) h) + >>> pcheck(make_ptree('(A (B) (C c) (D d d) (E e e e))')) + ok! (A (B ) (C c) (D d d) (E e e e)) + >>> pcheck(make_ptree('(A (B) (C (c)) (D (d) (d)) (E (e) (e) (e)))')) + ok! (A (B ) (C (c )) (D (d ) (d )) (E (e ) (e ) (e ))) + +Run our test function after performing various tree-modification +operations: + +**__delitem__()** + + >>> ptree = make_ptree('(A (B (C (D) (E f) (Q p)) g) h)') + >>> e = ptree[0,0,1] + >>> del ptree[0,0,1]; pcheck(ptree); pcheck(e) + ok! (A (B (C (D ) (Q p)) g) h) + ok! (E f) + >>> del ptree[0,0,0]; pcheck(ptree) + ok! (A (B (C (Q p)) g) h) + >>> del ptree[0,1]; pcheck(ptree) + ok! (A (B (C (Q p))) h) + >>> del ptree[-1]; pcheck(ptree) + ok! (A (B (C (Q p)))) + >>> del ptree[-100] + Traceback (most recent call last): + . . . + IndexError: index out of range + >>> del ptree[()] + Traceback (most recent call last): + . . . + IndexError: The tree position () may not be deleted. + + >>> # With slices: + >>> ptree = make_ptree('(A (B c) (D e) f g (H i) j (K l))') + >>> b = ptree[0] + >>> del ptree[0:0]; pcheck(ptree) + ok! (A (B c) (D e) f g (H i) j (K l)) + >>> del ptree[:1]; pcheck(ptree); pcheck(b) + ok! (A (D e) f g (H i) j (K l)) + ok! (B c) + >>> del ptree[-2:]; pcheck(ptree) + ok! (A (D e) f g (H i)) + >>> del ptree[1:3]; pcheck(ptree) + ok! (A (D e) (H i)) + >>> ptree = make_ptree('(A (B c) (D e) f g (H i) j (K l))') + >>> del ptree[5:1000]; pcheck(ptree) + ok! (A (B c) (D e) f g (H i)) + >>> del ptree[-2:1000]; pcheck(ptree) + ok! (A (B c) (D e) f) + >>> del ptree[-100:1]; pcheck(ptree) + ok! (A (D e) f) + >>> ptree = make_ptree('(A (B c) (D e) f g (H i) j (K l))') + >>> del ptree[1:-2:2]; pcheck(ptree) + ok! (A (B c) f (H i) j (K l)) + +**__setitem__()** + + >>> ptree = make_ptree('(A (B (C (D) (E f) (Q p)) g) h)') + >>> d, e, q = ptree[0,0] + >>> ptree[0,0,0] = 'x'; pcheck(ptree); pcheck(d) + ok! (A (B (C x (E f) (Q p)) g) h) + ok! (D ) + >>> ptree[0,0,1] = make_ptree('(X (Y z))'); pcheck(ptree); pcheck(e) + ok! (A (B (C x (X (Y z)) (Q p)) g) h) + ok! (E f) + >>> ptree[1] = d; pcheck(ptree) + ok! (A (B (C x (X (Y z)) (Q p)) g) (D )) + >>> ptree[-1] = 'x'; pcheck(ptree) + ok! (A (B (C x (X (Y z)) (Q p)) g) x) + >>> ptree[-100] = 'y' + Traceback (most recent call last): + . . . + IndexError: index out of range + >>> ptree[()] = make_ptree('(X y)') + Traceback (most recent call last): + . . . + IndexError: The tree position () may not be assigned to. + + >>> # With slices: + >>> ptree = make_ptree('(A (B c) (D e) f g (H i) j (K l))') + >>> b = ptree[0] + >>> ptree[0:0] = ('x', make_ptree('(Y)')); pcheck(ptree) + ok! (A x (Y ) (B c) (D e) f g (H i) j (K l)) + >>> ptree[2:6] = (); pcheck(ptree); pcheck(b) + ok! (A x (Y ) (H i) j (K l)) + ok! (B c) + >>> ptree[-2:] = ('z', 'p'); pcheck(ptree) + ok! (A x (Y ) (H i) z p) + >>> ptree[1:3] = [make_ptree('(X)') for x in range(10)]; pcheck(ptree) + ok! (A x (X ) (X ) (X ) (X ) (X ) (X ) (X ) (X ) (X ) (X ) z p) + >>> ptree[5:1000] = []; pcheck(ptree) + ok! (A x (X ) (X ) (X ) (X )) + >>> ptree[-2:1000] = ['n']; pcheck(ptree) + ok! (A x (X ) (X ) n) + >>> ptree[-100:1] = [make_ptree('(U v)')]; pcheck(ptree) + ok! (A (U v) (X ) (X ) n) + >>> ptree[-1:] = (make_ptree('(X)') for x in range(3)); pcheck(ptree) + ok! (A (U v) (X ) (X ) (X ) (X ) (X )) + >>> ptree[1:-2:2] = ['x', 'y']; pcheck(ptree) + ok! (A (U v) x (X ) y (X ) (X )) + +**append()** + + >>> ptree = make_ptree('(A (B (C (D) (E f) (Q p)) g) h)') + >>> ptree.append('x'); pcheck(ptree) + ok! (A (B (C (D ) (E f) (Q p)) g) h x) + >>> ptree.append(make_ptree('(X (Y z))')); pcheck(ptree) + ok! (A (B (C (D ) (E f) (Q p)) g) h x (X (Y z))) + +**extend()** + + >>> ptree = make_ptree('(A (B (C (D) (E f) (Q p)) g) h)') + >>> ptree.extend(['x', 'y', make_ptree('(X (Y z))')]); pcheck(ptree) + ok! (A (B (C (D ) (E f) (Q p)) g) h x y (X (Y z))) + >>> ptree.extend([]); pcheck(ptree) + ok! (A (B (C (D ) (E f) (Q p)) g) h x y (X (Y z))) + >>> ptree.extend(make_ptree('(X)') for x in range(3)); pcheck(ptree) + ok! (A (B (C (D ) (E f) (Q p)) g) h x y (X (Y z)) (X ) (X ) (X )) + +**insert()** + + >>> ptree = make_ptree('(A (B (C (D) (E f) (Q p)) g) h)') + >>> ptree.insert(0, make_ptree('(X (Y z))')); pcheck(ptree) + ok! (A (X (Y z)) (B (C (D ) (E f) (Q p)) g) h) + >>> ptree.insert(-1, make_ptree('(X (Y z))')); pcheck(ptree) + ok! (A (X (Y z)) (B (C (D ) (E f) (Q p)) g) (X (Y z)) h) + >>> ptree.insert(-4, make_ptree('(X (Y z))')); pcheck(ptree) + ok! (A (X (Y z)) (X (Y z)) (B (C (D ) (E f) (Q p)) g) (X (Y z)) h) + >>> # Note: as with ``list``, inserting at a negative index that + >>> # gives a position before the start of the list does *not* + >>> # raise an IndexError exception; it just inserts at 0. + >>> ptree.insert(-400, make_ptree('(X (Y z))')); pcheck(ptree) + ok! (A + (X (Y z)) + (X (Y z)) + (X (Y z)) + (B (C (D ) (E f) (Q p)) g) + (X (Y z)) + h) + +**pop()** + + >>> ptree = make_ptree('(A (B (C (D) (E f) (Q p)) g) h)') + >>> ptree[0,0].pop(1); pcheck(ptree) + ParentedTree('E', ['f']) + ok! (A (B (C (D ) (Q p)) g) h) + >>> ptree[0].pop(-1); pcheck(ptree) + 'g' + ok! (A (B (C (D ) (Q p))) h) + >>> ptree.pop(); pcheck(ptree) + 'h' + ok! (A (B (C (D ) (Q p)))) + >>> ptree.pop(-100) + Traceback (most recent call last): + . . . + IndexError: index out of range + +**remove()** + + >>> ptree = make_ptree('(A (B (C (D) (E f) (Q p)) g) h)') + >>> e = ptree[0,0,1] + >>> ptree[0,0].remove(ptree[0,0,1]); pcheck(ptree); pcheck(e) + ok! (A (B (C (D ) (Q p)) g) h) + ok! (E f) + >>> ptree[0,0].remove(make_ptree('(Q p)')); pcheck(ptree) + ok! (A (B (C (D )) g) h) + >>> ptree[0,0].remove(make_ptree('(Q p)')) + Traceback (most recent call last): + . . . + ValueError: ParentedTree('Q', ['p']) is not in list + >>> ptree.remove('h'); pcheck(ptree) + ok! (A (B (C (D )) g)) + >>> ptree.remove('h'); + Traceback (most recent call last): + . . . + ValueError: 'h' is not in list + >>> # remove() removes the first subtree that is equal (==) to the + >>> # given tree, which may not be the identical tree we give it: + >>> ptree = make_ptree('(A (X x) (Y y) (X x))') + >>> x1, y, x2 = ptree + >>> ptree.remove(ptree[-1]); pcheck(ptree) + ok! (A (Y y) (X x)) + >>> print(x1.parent()); pcheck(x1) + None + ok! (X x) + >>> print(x2.parent()) + (A (Y y) (X x)) + +Test that a tree can not be given multiple parents: + + >>> ptree = make_ptree('(A (X x) (Y y) (Z z))') + >>> ptree[0] = ptree[1] + Traceback (most recent call last): + . . . + ValueError: Can not insert a subtree that already has a parent. + >>> pcheck() + ok! + +[more to be written] + + +ImmutableParentedTree Regression Tests +-------------------------------------- + + >>> iptree = ImmutableParentedTree.convert(ptree) + >>> type(iptree) + + >>> del iptree[0] + Traceback (most recent call last): + . . . + ValueError: ImmutableParentedTree may not be modified + >>> iptree.set_label('newnode') + Traceback (most recent call last): + . . . + ValueError: ImmutableParentedTree may not be modified + + +MultiParentedTree Regression Tests +---------------------------------- +Keep track of all trees that we create (including subtrees) using this +variable: + + >>> all_mptrees = [] + +Define a helper funciton to create new parented trees: + + >>> def make_mptree(s): + ... mptree = MultiParentedTree.convert(Tree.fromstring(s)) + ... all_mptrees.extend(t for t in mptree.subtrees() + ... if isinstance(t, Tree)) + ... return mptree + +Define a test function that examines every subtree in all_mptrees; and +checks that all six of its methods are defined correctly. If any +mptrees are passed as arguments, then they are printed. + + >>> def mpcheck(*print_mptrees): + ... def has(seq, val): # uses identity comparison + ... for item in seq: + ... if item is val: return True + ... return False + ... for mptree in all_mptrees: + ... # Check mptree's methods. + ... if len(mptree.parents()) == 0: + ... assert len(mptree.left_siblings()) == 0 + ... assert len(mptree.right_siblings()) == 0 + ... assert len(mptree.roots()) == 1 + ... assert mptree.roots()[0] is mptree + ... assert mptree.treepositions(mptree) == [()] + ... left_siblings = right_siblings = () + ... roots = {id(mptree): 1} + ... else: + ... roots = dict((id(r), 0) for r in mptree.roots()) + ... left_siblings = mptree.left_siblings() + ... right_siblings = mptree.right_siblings() + ... for parent in mptree.parents(): + ... for i in mptree.parent_indices(parent): + ... assert parent[i] is mptree + ... # check left siblings + ... if i > 0: + ... for j in range(len(left_siblings)): + ... if left_siblings[j] is parent[i-1]: + ... del left_siblings[j] + ... break + ... else: + ... assert 0, 'sibling not found!' + ... # check ight siblings + ... if i < (len(parent)-1): + ... for j in range(len(right_siblings)): + ... if right_siblings[j] is parent[i+1]: + ... del right_siblings[j] + ... break + ... else: + ... assert 0, 'sibling not found!' + ... # check roots + ... for root in parent.roots(): + ... assert id(root) in roots, 'missing root' + ... roots[id(root)] += 1 + ... # check that we don't have any unexplained values + ... assert len(left_siblings)==0, 'unexpected sibling' + ... assert len(right_siblings)==0, 'unexpected sibling' + ... for v in roots.values(): assert v>0, roots #'unexpected root' + ... # check treepositions + ... for root in mptree.roots(): + ... for treepos in mptree.treepositions(root): + ... assert root[treepos] is mptree + ... # Check mptree's children's methods: + ... for i, child in enumerate(mptree): + ... if isinstance(child, Tree): + ... # mpcheck parent() & parent_index() methods + ... assert has(child.parents(), mptree) + ... assert i in child.parent_indices(mptree) + ... # mpcheck sibling methods + ... if i > 0: + ... assert has(child.left_siblings(), mptree[i-1]) + ... if i < len(mptree)-1: + ... assert has(child.right_siblings(), mptree[i+1]) + ... if print_mptrees: + ... print('ok!', end=' ') + ... for mptree in print_mptrees: print(mptree) + ... else: + ... print('ok!') + +Run our test function on a variety of newly-created trees: + + >>> mpcheck(make_mptree('(A)')) + ok! (A ) + >>> mpcheck(make_mptree('(A (B (C (D) (E f)) g) h)')) + ok! (A (B (C (D ) (E f)) g) h) + >>> mpcheck(make_mptree('(A (B) (C c) (D d d) (E e e e))')) + ok! (A (B ) (C c) (D d d) (E e e e)) + >>> mpcheck(make_mptree('(A (B) (C (c)) (D (d) (d)) (E (e) (e) (e)))')) + ok! (A (B ) (C (c )) (D (d ) (d )) (E (e ) (e ) (e ))) + >>> subtree = make_mptree('(A (B (C (D) (E f)) g) h)') + +Including some trees that contain multiple parents: + + >>> mpcheck(MultiParentedTree('Z', [subtree, subtree])) + ok! (Z (A (B (C (D ) (E f)) g) h) (A (B (C (D ) (E f)) g) h)) + +Run our test function after performing various tree-modification +operations (n.b., these are the same tests that we ran for +`ParentedTree`, above; thus, none of these trees actually *uses* +multiple parents.) + +**__delitem__()** + + >>> mptree = make_mptree('(A (B (C (D) (E f) (Q p)) g) h)') + >>> e = mptree[0,0,1] + >>> del mptree[0,0,1]; mpcheck(mptree); mpcheck(e) + ok! (A (B (C (D ) (Q p)) g) h) + ok! (E f) + >>> del mptree[0,0,0]; mpcheck(mptree) + ok! (A (B (C (Q p)) g) h) + >>> del mptree[0,1]; mpcheck(mptree) + ok! (A (B (C (Q p))) h) + >>> del mptree[-1]; mpcheck(mptree) + ok! (A (B (C (Q p)))) + >>> del mptree[-100] + Traceback (most recent call last): + . . . + IndexError: index out of range + >>> del mptree[()] + Traceback (most recent call last): + . . . + IndexError: The tree position () may not be deleted. + + >>> # With slices: + >>> mptree = make_mptree('(A (B c) (D e) f g (H i) j (K l))') + >>> b = mptree[0] + >>> del mptree[0:0]; mpcheck(mptree) + ok! (A (B c) (D e) f g (H i) j (K l)) + >>> del mptree[:1]; mpcheck(mptree); mpcheck(b) + ok! (A (D e) f g (H i) j (K l)) + ok! (B c) + >>> del mptree[-2:]; mpcheck(mptree) + ok! (A (D e) f g (H i)) + >>> del mptree[1:3]; mpcheck(mptree) + ok! (A (D e) (H i)) + >>> mptree = make_mptree('(A (B c) (D e) f g (H i) j (K l))') + >>> del mptree[5:1000]; mpcheck(mptree) + ok! (A (B c) (D e) f g (H i)) + >>> del mptree[-2:1000]; mpcheck(mptree) + ok! (A (B c) (D e) f) + >>> del mptree[-100:1]; mpcheck(mptree) + ok! (A (D e) f) + >>> mptree = make_mptree('(A (B c) (D e) f g (H i) j (K l))') + >>> del mptree[1:-2:2]; mpcheck(mptree) + ok! (A (B c) f (H i) j (K l)) + +**__setitem__()** + + >>> mptree = make_mptree('(A (B (C (D) (E f) (Q p)) g) h)') + >>> d, e, q = mptree[0,0] + >>> mptree[0,0,0] = 'x'; mpcheck(mptree); mpcheck(d) + ok! (A (B (C x (E f) (Q p)) g) h) + ok! (D ) + >>> mptree[0,0,1] = make_mptree('(X (Y z))'); mpcheck(mptree); mpcheck(e) + ok! (A (B (C x (X (Y z)) (Q p)) g) h) + ok! (E f) + >>> mptree[1] = d; mpcheck(mptree) + ok! (A (B (C x (X (Y z)) (Q p)) g) (D )) + >>> mptree[-1] = 'x'; mpcheck(mptree) + ok! (A (B (C x (X (Y z)) (Q p)) g) x) + >>> mptree[-100] = 'y' + Traceback (most recent call last): + . . . + IndexError: index out of range + >>> mptree[()] = make_mptree('(X y)') + Traceback (most recent call last): + . . . + IndexError: The tree position () may not be assigned to. + + >>> # With slices: + >>> mptree = make_mptree('(A (B c) (D e) f g (H i) j (K l))') + >>> b = mptree[0] + >>> mptree[0:0] = ('x', make_mptree('(Y)')); mpcheck(mptree) + ok! (A x (Y ) (B c) (D e) f g (H i) j (K l)) + >>> mptree[2:6] = (); mpcheck(mptree); mpcheck(b) + ok! (A x (Y ) (H i) j (K l)) + ok! (B c) + >>> mptree[-2:] = ('z', 'p'); mpcheck(mptree) + ok! (A x (Y ) (H i) z p) + >>> mptree[1:3] = [make_mptree('(X)') for x in range(10)]; mpcheck(mptree) + ok! (A x (X ) (X ) (X ) (X ) (X ) (X ) (X ) (X ) (X ) (X ) z p) + >>> mptree[5:1000] = []; mpcheck(mptree) + ok! (A x (X ) (X ) (X ) (X )) + >>> mptree[-2:1000] = ['n']; mpcheck(mptree) + ok! (A x (X ) (X ) n) + >>> mptree[-100:1] = [make_mptree('(U v)')]; mpcheck(mptree) + ok! (A (U v) (X ) (X ) n) + >>> mptree[-1:] = (make_mptree('(X)') for x in range(3)); mpcheck(mptree) + ok! (A (U v) (X ) (X ) (X ) (X ) (X )) + >>> mptree[1:-2:2] = ['x', 'y']; mpcheck(mptree) + ok! (A (U v) x (X ) y (X ) (X )) + +**append()** + + >>> mptree = make_mptree('(A (B (C (D) (E f) (Q p)) g) h)') + >>> mptree.append('x'); mpcheck(mptree) + ok! (A (B (C (D ) (E f) (Q p)) g) h x) + >>> mptree.append(make_mptree('(X (Y z))')); mpcheck(mptree) + ok! (A (B (C (D ) (E f) (Q p)) g) h x (X (Y z))) + +**extend()** + + >>> mptree = make_mptree('(A (B (C (D) (E f) (Q p)) g) h)') + >>> mptree.extend(['x', 'y', make_mptree('(X (Y z))')]); mpcheck(mptree) + ok! (A (B (C (D ) (E f) (Q p)) g) h x y (X (Y z))) + >>> mptree.extend([]); mpcheck(mptree) + ok! (A (B (C (D ) (E f) (Q p)) g) h x y (X (Y z))) + >>> mptree.extend(make_mptree('(X)') for x in range(3)); mpcheck(mptree) + ok! (A (B (C (D ) (E f) (Q p)) g) h x y (X (Y z)) (X ) (X ) (X )) + +**insert()** + + >>> mptree = make_mptree('(A (B (C (D) (E f) (Q p)) g) h)') + >>> mptree.insert(0, make_mptree('(X (Y z))')); mpcheck(mptree) + ok! (A (X (Y z)) (B (C (D ) (E f) (Q p)) g) h) + >>> mptree.insert(-1, make_mptree('(X (Y z))')); mpcheck(mptree) + ok! (A (X (Y z)) (B (C (D ) (E f) (Q p)) g) (X (Y z)) h) + >>> mptree.insert(-4, make_mptree('(X (Y z))')); mpcheck(mptree) + ok! (A (X (Y z)) (X (Y z)) (B (C (D ) (E f) (Q p)) g) (X (Y z)) h) + >>> # Note: as with ``list``, inserting at a negative index that + >>> # gives a position before the start of the list does *not* + >>> # raise an IndexError exception; it just inserts at 0. + >>> mptree.insert(-400, make_mptree('(X (Y z))')); mpcheck(mptree) + ok! (A + (X (Y z)) + (X (Y z)) + (X (Y z)) + (B (C (D ) (E f) (Q p)) g) + (X (Y z)) + h) + +**pop()** + + >>> mptree = make_mptree('(A (B (C (D) (E f) (Q p)) g) h)') + >>> mptree[0,0].pop(1); mpcheck(mptree) + MultiParentedTree('E', ['f']) + ok! (A (B (C (D ) (Q p)) g) h) + >>> mptree[0].pop(-1); mpcheck(mptree) + 'g' + ok! (A (B (C (D ) (Q p))) h) + >>> mptree.pop(); mpcheck(mptree) + 'h' + ok! (A (B (C (D ) (Q p)))) + >>> mptree.pop(-100) + Traceback (most recent call last): + . . . + IndexError: index out of range + +**remove()** + + >>> mptree = make_mptree('(A (B (C (D) (E f) (Q p)) g) h)') + >>> e = mptree[0,0,1] + >>> mptree[0,0].remove(mptree[0,0,1]); mpcheck(mptree); mpcheck(e) + ok! (A (B (C (D ) (Q p)) g) h) + ok! (E f) + >>> mptree[0,0].remove(make_mptree('(Q p)')); mpcheck(mptree) + ok! (A (B (C (D )) g) h) + >>> mptree[0,0].remove(make_mptree('(Q p)')) + Traceback (most recent call last): + . . . + ValueError: MultiParentedTree('Q', ['p']) is not in list + >>> mptree.remove('h'); mpcheck(mptree) + ok! (A (B (C (D )) g)) + >>> mptree.remove('h'); + Traceback (most recent call last): + . . . + ValueError: 'h' is not in list + >>> # remove() removes the first subtree that is equal (==) to the + >>> # given tree, which may not be the identical tree we give it: + >>> mptree = make_mptree('(A (X x) (Y y) (X x))') + >>> x1, y, x2 = mptree + >>> mptree.remove(mptree[-1]); mpcheck(mptree) + ok! (A (Y y) (X x)) + >>> print([str(p) for p in x1.parents()]) + [] + >>> print([str(p) for p in x2.parents()]) + ['(A (Y y) (X x))'] + + +ImmutableMultiParentedTree Regression Tests +------------------------------------------- + + >>> imptree = ImmutableMultiParentedTree.convert(mptree) + >>> type(imptree) + + >>> del imptree[0] + Traceback (most recent call last): + . . . + ValueError: ImmutableMultiParentedTree may not be modified + >>> imptree.set_label('newnode') + Traceback (most recent call last): + . . . + ValueError: ImmutableMultiParentedTree may not be modified + + +ProbabilisticTree Regression Tests +---------------------------------- + + >>> prtree = ProbabilisticTree("S", [ProbabilisticTree("NP", ["N"], prob=0.3)], prob=0.6) + >>> print(prtree) + (S (NP N)) (p=0.6) + >>> import copy + >>> prtree == copy.deepcopy(prtree) == prtree.copy(deep=True) == prtree.copy() + True + >>> prtree[0] is prtree.copy()[0] + True + >>> prtree[0] is prtree.copy(deep=True)[0] + False + + >>> imprtree = ImmutableProbabilisticTree.convert(prtree) + >>> type(imprtree) + + >>> del imprtree[0] + Traceback (most recent call last): + . . . + ValueError: ImmutableProbabilisticTree may not be modified + >>> imprtree.set_label('newnode') + Traceback (most recent call last): + . . . + ValueError: ImmutableProbabilisticTree may not be modified + + +Squashed Bugs +============= + +This used to discard the ``(B b)`` subtree (fixed in svn 6270): + + >>> print(Tree.fromstring('((A a) (B b))')) + ( (A a) (B b)) + diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/treeprettyprinter.doctest b/venv.bak/lib/python3.7/site-packages/nltk/test/treeprettyprinter.doctest new file mode 100644 index 0000000..8302c2c --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/test/treeprettyprinter.doctest @@ -0,0 +1,127 @@ +.. Copyright (C) 2001-2019 NLTK Project +.. For license information, see LICENSE.TXT + +======================================================== + Unit tests for nltk.treeprettyprinter.TreePrettyPrinter +======================================================== + + >>> from nltk.tree import Tree + >>> from nltk.treeprettyprinter import TreePrettyPrinter + +Tree nr 2170 from nltk.corpus.treebank: + + >>> tree = Tree.fromstring( + ... '(S (NP-SBJ (PRP I)) (VP (VBP feel) (ADJP-PRD (RB pretty) ' + ... '(JJ good)) (PP-CLR (IN about) (NP (PRP it)))) (. .))') + >>> tpp = TreePrettyPrinter(tree) + >>> print(tpp.text()) + S + __________________________|_____________________ + | VP | + | ____________________|___________ | + | | | PP-CLR | + | | | _____|_____ | + NP-SBJ | ADJP-PRD | NP | + | | _______|______ | | | + PRP VBP RB JJ IN PRP . + | | | | | | | + I feel pretty good about it . + + >>> print(tpp.text(unicodelines=True)) + S + ┌──────────────────────────┼─────────────────────┐ + │ VP │ + │ ┌─────────────┬──────┴───────────┐ │ + │ │ │ PP-CLR │ + │ │ │ ┌─────┴─────┐ │ + NP-SBJ │ ADJP-PRD │ NP │ + │ │ ┌───────┴──────┐ │ │ │ + PRP VBP RB JJ IN PRP . + │ │ │ │ │ │ │ + I feel pretty good about it . + +A tree with long labels: + + >>> tree = Tree.fromstring( + ... '(sentence (plural-noun-phrase (plural-noun Superconductors)) ' + ... '(verb-phrase (plural-verb conduct) ' + ... '(noun-phrase (singular-noun electricity))))') + >>> tpp = TreePrettyPrinter(tree) + >>> print(tpp.text(abbreviate=8, nodedist=2)) + sentence + __________|__________ + | verb-phr. + | __________|__________ + plural-n. | noun-phr. + | | | + plural-n. plural-v. singular. + | | | + Supercon. conduct electric. + + >>> print(tpp.text(maxwidth=8, nodedist=2)) + sentence + _________|________ + | verb- + | phrase + | ________|_________ + plural- | noun- + noun- | phrase + phrase | | + | | | + plural- plural- singular- + noun verb noun + | | | + Supercon conduct electric + ductors ity + +A discontinuous tree: + + >>> tree = Tree.fromstring( + ... '(top (punct 8) (smain (noun 0) (verb 1) (inf (verb 5) (inf (verb 6) ' + ... '(conj (inf (pp (prep 2) (np (det 3) (noun 4))) (verb 7)) (inf (verb 9)) ' + ... '(vg 10) (inf (verb 11)))))) (punct 12))', read_leaf=int) + >>> sentence = ('Ze had met haar moeder kunnen gaan winkelen ,' + ... ' zwemmen of terrassen .'.split()) + >>> tpp = TreePrettyPrinter(tree, sentence) + >>> print(tpp.text()) + top + _____|______________________________________________ + smain | | + _______________________________|_____ | | + | | inf | | + | | _____|____ | | + | | | inf | | + | | | ____|_____ | | + | | | | conj | | + | | _____ | ___ | _________|______ | __________________ | + | | inf | | | | | | | + | | _________|_____ | ___ | _________ | | | | | + | | pp | | | | | | | | + | | ____|____ | | | | | | | | + | | | np | | | | inf | inf | + | | | ____|____ | | | | | | | | + noun verb prep det noun verb verb verb punct verb vg verb punct + | | | | | | | | | | | | | + Ze had met haar moeder kunnen gaan winkelen , zwemmen of terrassen . + + >>> print(tpp.text(unicodelines=True)) + top + ┌─────┴──────────────────┬───────────────────────────┐ + smain │ │ + ┌────┬──────────────────────────┴─────┐ │ │ + │ │ inf │ │ + │ │ ┌─────┴────┐ │ │ + │ │ │ inf │ │ + │ │ │ ┌────┴─────┐ │ │ + │ │ │ │ conj │ │ + │ │ ┌───── │ ─── │ ─────────┴────── │ ─────┬─────┬──────┐ │ + │ │ inf │ │ │ │ │ │ │ + │ │ ┌─────────┴───── │ ─── │ ─────────┐ │ │ │ │ │ + │ │ pp │ │ │ │ │ │ │ │ + │ │ ┌────┴────┐ │ │ │ │ │ │ │ │ + │ │ │ np │ │ │ │ inf │ inf │ + │ │ │ ┌────┴────┐ │ │ │ │ │ │ │ │ + noun verb prep det noun verb verb verb punct verb vg verb punct + │ │ │ │ │ │ │ │ │ │ │ │ │ + Ze had met haar moeder kunnen gaan winkelen , zwemmen of terrassen . + diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/treetransforms.doctest b/venv.bak/lib/python3.7/site-packages/nltk/test/treetransforms.doctest new file mode 100644 index 0000000..e44e504 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/test/treetransforms.doctest @@ -0,0 +1,156 @@ +.. Copyright (C) 2001-2019 NLTK Project +.. For license information, see LICENSE.TXT + +------------------------------------------- +Unit tests for the TreeTransformation class +------------------------------------------- + + >>> from copy import deepcopy + >>> from nltk.tree import * + >>> from nltk.treetransforms import * + + >>> tree_string = "(TOP (S (S (VP (VBN Turned) (ADVP (RB loose)) (PP (IN in) (NP (NP (NNP Shane) (NNP Longman) (POS 's)) (NN trading) (NN room))))) (, ,) (NP (DT the) (NN yuppie) (NNS dealers)) (VP (AUX do) (NP (NP (RB little)) (ADJP (RB right)))) (. .)))" + + >>> tree = Tree.fromstring(tree_string) + >>> print(tree) + (TOP + (S + (S + (VP + (VBN Turned) + (ADVP (RB loose)) + (PP + (IN in) + (NP + (NP (NNP Shane) (NNP Longman) (POS 's)) + (NN trading) + (NN room))))) + (, ,) + (NP (DT the) (NN yuppie) (NNS dealers)) + (VP (AUX do) (NP (NP (RB little)) (ADJP (RB right)))) + (. .))) + +Make a copy of the original tree and collapse the subtrees with only one child + + >>> collapsedTree = deepcopy(tree) + >>> collapse_unary(collapsedTree) + >>> print(collapsedTree) + (TOP + (S + (S+VP + (VBN Turned) + (ADVP (RB loose)) + (PP + (IN in) + (NP + (NP (NNP Shane) (NNP Longman) (POS 's)) + (NN trading) + (NN room)))) + (, ,) + (NP (DT the) (NN yuppie) (NNS dealers)) + (VP (AUX do) (NP (NP (RB little)) (ADJP (RB right)))) + (. .))) + + >>> collapsedTree2 = deepcopy(tree) + >>> collapse_unary(collapsedTree2, collapsePOS=True, collapseRoot=True) + >>> print(collapsedTree2) + (TOP+S + (S+VP + (VBN Turned) + (ADVP+RB loose) + (PP + (IN in) + (NP + (NP (NNP Shane) (NNP Longman) (POS 's)) + (NN trading) + (NN room)))) + (, ,) + (NP (DT the) (NN yuppie) (NNS dealers)) + (VP (AUX do) (NP (NP+RB little) (ADJP+RB right))) + (. .)) + +Convert the tree to Chomsky Normal Form i.e. each subtree has either two +subtree children or a single leaf value. This conversion can be performed +using either left- or right-factoring. + + >>> cnfTree = deepcopy(collapsedTree) + >>> chomsky_normal_form(cnfTree, factor='left') + >>> print(cnfTree) + (TOP + (S + (S| + (S| + (S| + (S+VP + (S+VP| (VBN Turned) (ADVP (RB loose))) + (PP + (IN in) + (NP + (NP| + (NP + (NP| (NNP Shane) (NNP Longman)) + (POS 's)) + (NN trading)) + (NN room)))) + (, ,)) + (NP (NP| (DT the) (NN yuppie)) (NNS dealers))) + (VP (AUX do) (NP (NP (RB little)) (ADJP (RB right))))) + (. .))) + + >>> cnfTree = deepcopy(collapsedTree) + >>> chomsky_normal_form(cnfTree, factor='right') + >>> print(cnfTree) + (TOP + (S + (S+VP + (VBN Turned) + (S+VP| + (ADVP (RB loose)) + (PP + (IN in) + (NP + (NP (NNP Shane) (NP| (NNP Longman) (POS 's))) + (NP| (NN trading) (NN room)))))) + (S|<,-NP-VP-.> + (, ,) + (S| + (NP (DT the) (NP| (NN yuppie) (NNS dealers))) + (S| + (VP (AUX do) (NP (NP (RB little)) (ADJP (RB right)))) + (. .)))))) + +Employ some Markov smoothing to make the artificial node labels a bit more +readable. See the treetransforms.py documentation for more details. + + >>> markovTree = deepcopy(collapsedTree) + >>> chomsky_normal_form(markovTree, horzMarkov=2, vertMarkov=1) + >>> print(markovTree) + (TOP + (S^ + (S+VP^ + (VBN Turned) + (S+VP|^ + (ADVP^ (RB loose)) + (PP^ + (IN in) + (NP^ + (NP^ + (NNP Shane) + (NP|^ (NNP Longman) (POS 's))) + (NP|^ (NN trading) (NN room)))))) + (S|<,-NP>^ + (, ,) + (S|^ + (NP^ (DT the) (NP|^ (NN yuppie) (NNS dealers))) + (S|^ + (VP^ + (AUX do) + (NP^ (NP^ (RB little)) (ADJP^ (RB right)))) + (. .)))))) + +Convert the transformed tree back to its original form + + >>> un_chomsky_normal_form(markovTree) + >>> tree == markovTree + True + diff --git a/venv/lib/python3.7/site-packages/numpy/core/tests/__init__.py b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/__init__.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/tests/__init__.py rename to venv.bak/lib/python3.7/site-packages/nltk/test/unit/__init__.py diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/unit/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/__pycache__/__init__.cpython-37.pyc new file mode 100644 index 0000000..961a9e1 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/__pycache__/__init__.cpython-37.pyc differ diff --git a/venv/lib/python3.7/site-packages/nltk/test/unit/__pycache__/test_2x_compat.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/__pycache__/test_2x_compat.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/nltk/test/unit/__pycache__/test_2x_compat.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/nltk/test/unit/__pycache__/test_2x_compat.cpython-37.pyc diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/unit/__pycache__/test_aline.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/__pycache__/test_aline.cpython-37.pyc new file mode 100644 index 0000000..24525ee Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/__pycache__/test_aline.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/unit/__pycache__/test_brill.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/__pycache__/test_brill.cpython-37.pyc new file mode 100644 index 0000000..c63fc95 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/__pycache__/test_brill.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/unit/__pycache__/test_cfd_mutation.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/__pycache__/test_cfd_mutation.cpython-37.pyc new file mode 100644 index 0000000..b15fdd5 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/__pycache__/test_cfd_mutation.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/unit/__pycache__/test_cfg2chomsky.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/__pycache__/test_cfg2chomsky.cpython-37.pyc new file mode 100644 index 0000000..b800040 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/__pycache__/test_cfg2chomsky.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/unit/__pycache__/test_chunk.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/__pycache__/test_chunk.cpython-37.pyc new file mode 100644 index 0000000..841cb9d Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/__pycache__/test_chunk.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/unit/__pycache__/test_classify.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/__pycache__/test_classify.cpython-37.pyc new file mode 100644 index 0000000..5e0df93 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/__pycache__/test_classify.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/unit/__pycache__/test_collocations.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/__pycache__/test_collocations.cpython-37.pyc new file mode 100644 index 0000000..189635b Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/__pycache__/test_collocations.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/unit/__pycache__/test_concordance.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/__pycache__/test_concordance.cpython-37.pyc new file mode 100644 index 0000000..ba818ce Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/__pycache__/test_concordance.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/unit/__pycache__/test_corenlp.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/__pycache__/test_corenlp.cpython-37.pyc new file mode 100644 index 0000000..957a033 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/__pycache__/test_corenlp.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/unit/__pycache__/test_corpora.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/__pycache__/test_corpora.cpython-37.pyc new file mode 100644 index 0000000..132027f Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/__pycache__/test_corpora.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/unit/__pycache__/test_corpus_views.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/__pycache__/test_corpus_views.cpython-37.pyc new file mode 100644 index 0000000..a48b40a Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/__pycache__/test_corpus_views.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/unit/__pycache__/test_data.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/__pycache__/test_data.cpython-37.pyc new file mode 100644 index 0000000..1003166 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/__pycache__/test_data.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/unit/__pycache__/test_disagreement.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/__pycache__/test_disagreement.cpython-37.pyc new file mode 100644 index 0000000..889bf92 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/__pycache__/test_disagreement.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/unit/__pycache__/test_hmm.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/__pycache__/test_hmm.cpython-37.pyc new file mode 100644 index 0000000..7faf5ba Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/__pycache__/test_hmm.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/unit/__pycache__/test_json2csv_corpus.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/__pycache__/test_json2csv_corpus.cpython-37.pyc new file mode 100644 index 0000000..b0b1b07 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/__pycache__/test_json2csv_corpus.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/unit/__pycache__/test_naivebayes.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/__pycache__/test_naivebayes.cpython-37.pyc new file mode 100644 index 0000000..db1755a Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/__pycache__/test_naivebayes.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/unit/__pycache__/test_nombank.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/__pycache__/test_nombank.cpython-37.pyc new file mode 100644 index 0000000..1200689 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/__pycache__/test_nombank.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/unit/__pycache__/test_pos_tag.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/__pycache__/test_pos_tag.cpython-37.pyc new file mode 100644 index 0000000..e22e476 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/__pycache__/test_pos_tag.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/unit/__pycache__/test_rte_classify.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/__pycache__/test_rte_classify.cpython-37.pyc new file mode 100644 index 0000000..9caf1f7 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/__pycache__/test_rte_classify.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/unit/__pycache__/test_seekable_unicode_stream_reader.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/__pycache__/test_seekable_unicode_stream_reader.cpython-37.pyc new file mode 100644 index 0000000..65e0fb6 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/__pycache__/test_seekable_unicode_stream_reader.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/unit/__pycache__/test_senna.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/__pycache__/test_senna.cpython-37.pyc new file mode 100644 index 0000000..dd0d1f3 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/__pycache__/test_senna.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/unit/__pycache__/test_stem.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/__pycache__/test_stem.cpython-37.pyc new file mode 100644 index 0000000..fe02a52 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/__pycache__/test_stem.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/unit/__pycache__/test_tag.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/__pycache__/test_tag.cpython-37.pyc new file mode 100644 index 0000000..f9b34be Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/__pycache__/test_tag.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/unit/__pycache__/test_tgrep.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/__pycache__/test_tgrep.cpython-37.pyc new file mode 100644 index 0000000..d3c18ab Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/__pycache__/test_tgrep.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/unit/__pycache__/test_tokenize.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/__pycache__/test_tokenize.cpython-37.pyc new file mode 100644 index 0000000..6450ba5 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/__pycache__/test_tokenize.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/unit/__pycache__/test_twitter_auth.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/__pycache__/test_twitter_auth.cpython-37.pyc new file mode 100644 index 0000000..1ec6015 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/__pycache__/test_twitter_auth.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/unit/__pycache__/test_wordnet.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/__pycache__/test_wordnet.cpython-37.pyc new file mode 100644 index 0000000..b913ab2 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/__pycache__/test_wordnet.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/unit/__pycache__/utils.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/__pycache__/utils.cpython-37.pyc new file mode 100644 index 0000000..597b65b Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/__pycache__/utils.cpython-37.pyc differ diff --git a/venv/lib/python3.7/site-packages/numpy/distutils/tests/__init__.py b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/lm/__init__.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/distutils/tests/__init__.py rename to venv.bak/lib/python3.7/site-packages/nltk/test/unit/lm/__init__.py diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/unit/lm/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/lm/__pycache__/__init__.cpython-37.pyc new file mode 100644 index 0000000..3274a87 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/lm/__pycache__/__init__.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/unit/lm/__pycache__/test_counter.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/lm/__pycache__/test_counter.cpython-37.pyc new file mode 100644 index 0000000..b970b02 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/lm/__pycache__/test_counter.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/unit/lm/__pycache__/test_models.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/lm/__pycache__/test_models.cpython-37.pyc new file mode 100644 index 0000000..f61be67 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/lm/__pycache__/test_models.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/unit/lm/__pycache__/test_preprocessing.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/lm/__pycache__/test_preprocessing.cpython-37.pyc new file mode 100644 index 0000000..7197906 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/lm/__pycache__/test_preprocessing.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/unit/lm/__pycache__/test_vocabulary.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/lm/__pycache__/test_vocabulary.cpython-37.pyc new file mode 100644 index 0000000..36d1497 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/lm/__pycache__/test_vocabulary.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/unit/lm/test_counter.py b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/lm/test_counter.py new file mode 100644 index 0000000..31fab79 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/lm/test_counter.py @@ -0,0 +1,135 @@ +# -*- coding: utf-8 -*- +# Natural Language Toolkit: Language Model Unit Tests +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Ilia Kurenkov +# URL: +# For license information, see LICENSE.TXT + +import unittest + +import six + +from nltk import FreqDist +from nltk.lm import NgramCounter +from nltk.util import everygrams + + +class NgramCounterTests(unittest.TestCase): + """Tests for NgramCounter that only involve lookup, no modification.""" + + @classmethod + def setUpClass(cls): + + text = [list("abcd"), list("egdbe")] + cls.trigram_counter = NgramCounter( + (everygrams(sent, max_len=3) for sent in text) + ) + cls.bigram_counter = NgramCounter( + (everygrams(sent, max_len=2) for sent in text) + ) + + def test_N(self): + self.assertEqual(self.bigram_counter.N(), 16) + self.assertEqual(self.trigram_counter.N(), 21) + + def test_counter_len_changes_with_lookup(self): + self.assertEqual(len(self.bigram_counter), 2) + _ = self.bigram_counter[50] + self.assertEqual(len(self.bigram_counter), 3) + + def test_ngram_order_access_unigrams(self): + self.assertEqual(self.bigram_counter[1], self.bigram_counter.unigrams) + + def test_ngram_conditional_freqdist(self): + expected_trigram_contexts = [ + ("a", "b"), + ("b", "c"), + ("e", "g"), + ("g", "d"), + ("d", "b"), + ] + expected_bigram_contexts = [("a",), ("b",), ("d",), ("e",), ("c",), ("g",)] + + bigrams = self.trigram_counter[2] + trigrams = self.trigram_counter[3] + + six.assertCountEqual(self, expected_bigram_contexts, bigrams.conditions()) + six.assertCountEqual(self, expected_trigram_contexts, trigrams.conditions()) + + def test_bigram_counts_seen_ngrams(self): + b_given_a_count = 1 + unk_given_b_count = 1 + + self.assertEqual(b_given_a_count, self.bigram_counter[["a"]]["b"]) + self.assertEqual(unk_given_b_count, self.bigram_counter[["b"]]["c"]) + + def test_bigram_counts_unseen_ngrams(self): + z_given_b_count = 0 + + self.assertEqual(z_given_b_count, self.bigram_counter[["b"]]["z"]) + + def test_unigram_counts_seen_words(self): + expected_count_b = 2 + + self.assertEqual(expected_count_b, self.bigram_counter["b"]) + + def test_unigram_counts_completely_unseen_words(self): + unseen_count = 0 + + self.assertEqual(unseen_count, self.bigram_counter["z"]) + + +class NgramCounterTrainingTests(unittest.TestCase): + def setUp(self): + self.counter = NgramCounter() + + def test_empty_string(self): + test = NgramCounter("") + self.assertNotIn(2, test) + self.assertEqual(test[1], FreqDist()) + + def test_empty_list(self): + test = NgramCounter([]) + self.assertNotIn(2, test) + self.assertEqual(test[1], FreqDist()) + + def test_None(self): + test = NgramCounter(None) + self.assertNotIn(2, test) + self.assertEqual(test[1], FreqDist()) + + def test_train_on_unigrams(self): + words = list("abcd") + counter = NgramCounter([[(w,) for w in words]]) + + self.assertFalse(bool(counter[3])) + self.assertFalse(bool(counter[2])) + six.assertCountEqual(self, words, counter[1].keys()) + + def test_train_on_illegal_sentences(self): + str_sent = ["Check", "this", "out", "!"] + list_sent = [["Check", "this"], ["this", "out"], ["out", "!"]] + + with self.assertRaises(TypeError): + NgramCounter([str_sent]) + + with self.assertRaises(TypeError): + NgramCounter([list_sent]) + + def test_train_on_bigrams(self): + bigram_sent = [("a", "b"), ("c", "d")] + counter = NgramCounter([bigram_sent]) + + self.assertFalse(bool(counter[3])) + + def test_train_on_mix(self): + mixed_sent = [("a", "b"), ("c", "d"), ("e", "f", "g"), ("h",)] + counter = NgramCounter([mixed_sent]) + unigrams = ["h"] + bigram_contexts = [("a",), ("c",)] + trigram_contexts = [("e", "f")] + + six.assertCountEqual(self, unigrams, counter[1].keys()) + six.assertCountEqual(self, bigram_contexts, counter[2].keys()) + six.assertCountEqual(self, trigram_contexts, counter[3].keys()) diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/unit/lm/test_models.py b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/lm/test_models.py new file mode 100644 index 0000000..8c5ba11 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/lm/test_models.py @@ -0,0 +1,446 @@ +# -*- coding: utf-8 -*- +# Natural Language Toolkit: Language Model Unit Tests +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Ilia Kurenkov +# URL: +# For license information, see LICENSE.TXT + +from __future__ import division + +import math +import sys +import unittest + +from six import add_metaclass + +from nltk.lm import ( + Vocabulary, + MLE, + Lidstone, + Laplace, + WittenBellInterpolated, + KneserNeyInterpolated, +) +from nltk.lm.preprocessing import padded_everygrams + + +def _prepare_test_data(ngram_order): + return ( + Vocabulary(["a", "b", "c", "d", "z", "", ""], unk_cutoff=1), + [ + list(padded_everygrams(ngram_order, sent)) + for sent in (list("abcd"), list("egadbe")) + ], + ) + + +class ParametrizeTestsMeta(type): + """Metaclass for generating parametrized tests.""" + + def __new__(cls, name, bases, dct): + contexts = ( + ("a",), + ("c",), + (u"",), + ("b",), + (u"",), + ("d",), + ("e",), + ("r",), + ("w",), + ) + for i, c in enumerate(contexts): + dct["test_sumto1_{0}".format(i)] = cls.add_sum_to_1_test(c) + scores = dct.get("score_tests", []) + for i, (word, context, expected_score) in enumerate(scores): + dct["test_score_{0}".format(i)] = cls.add_score_test( + word, context, expected_score + ) + return super(ParametrizeTestsMeta, cls).__new__(cls, name, bases, dct) + + @classmethod + def add_score_test(cls, word, context, expected_score): + if sys.version_info > (3, 5): + message = "word='{word}', context={context}" + else: + # Python 2 doesn't report the mismatched values if we pass a custom + # message, so we have to report them manually. + message = ( + "{score} != {expected_score} within 4 places, " + "word='{word}', context={context}" + ) + + def test_method(self): + score = self.model.score(word, context) + self.assertAlmostEqual( + score, expected_score, msg=message.format(**locals()), places=4 + ) + + return test_method + + @classmethod + def add_sum_to_1_test(cls, context): + def test(self): + s = sum(self.model.score(w, context) for w in self.model.vocab) + self.assertAlmostEqual(s, 1.0, msg="The context is {}".format(context)) + + return test + + +@add_metaclass(ParametrizeTestsMeta) +class MleBigramTests(unittest.TestCase): + """unit tests for MLENgramModel class""" + + score_tests = [ + ("d", ["c"], 1), + # Unseen ngrams should yield 0 + ("d", ["e"], 0), + # Unigrams should also be 0 + ("z", None, 0), + # N unigrams = 14 + # count('a') = 2 + ("a", None, 2.0 / 14), + # count('y') = 3 + ("y", None, 3.0 / 14), + ] + + def setUp(self): + vocab, training_text = _prepare_test_data(2) + self.model = MLE(2, vocabulary=vocab) + self.model.fit(training_text) + + def test_logscore_zero_score(self): + # logscore of unseen ngrams should be -inf + logscore = self.model.logscore("d", ["e"]) + + self.assertTrue(math.isinf(logscore)) + + def test_entropy_perplexity_seen(self): + # ngrams seen during training + trained = [ + ("", "a"), + ("a", "b"), + ("b", ""), + ("", "a"), + ("a", "d"), + ("d", ""), + ] + # Ngram = Log score + # , a = -1 + # a, b = -1 + # b, UNK = -1 + # UNK, a = -1.585 + # a, d = -1 + # d, = -1 + # TOTAL logscores = -6.585 + # - AVG logscores = 1.0975 + H = 1.0975 + perplexity = 2.1398 + + self.assertAlmostEqual(H, self.model.entropy(trained), places=4) + self.assertAlmostEqual(perplexity, self.model.perplexity(trained), places=4) + + def test_entropy_perplexity_unseen(self): + # In MLE, even one unseen ngram should make entropy and perplexity infinite + untrained = [("", "a"), ("a", "c"), ("c", "d"), ("d", "")] + + self.assertTrue(math.isinf(self.model.entropy(untrained))) + self.assertTrue(math.isinf(self.model.perplexity(untrained))) + + def test_entropy_perplexity_unigrams(self): + # word = score, log score + # = 0.1429, -2.8074 + # a = 0.1429, -2.8074 + # c = 0.0714, -3.8073 + # UNK = 0.2143, -2.2224 + # d = 0.1429, -2.8074 + # c = 0.0714, -3.8073 + # = 0.1429, -2.8074 + # TOTAL logscores = -21.6243 + # - AVG logscores = 3.0095 + H = 3.0095 + perplexity = 8.0529 + + text = [("",), ("a",), ("c",), ("-",), ("d",), ("c",), ("",)] + + self.assertAlmostEqual(H, self.model.entropy(text), places=4) + self.assertAlmostEqual(perplexity, self.model.perplexity(text), places=4) + + +@add_metaclass(ParametrizeTestsMeta) +class MleTrigramTests(unittest.TestCase): + """MLE trigram model tests""" + + score_tests = [ + # count(d | b, c) = 1 + # count(b, c) = 1 + ("d", ("b", "c"), 1), + # count(d | c) = 1 + # count(c) = 1 + ("d", ["c"], 1), + # total number of tokens is 18, of which "a" occured 2 times + ("a", None, 2.0 / 18), + # in vocabulary but unseen + ("z", None, 0), + # out of vocabulary should use "UNK" score + ("y", None, 3.0 / 18), + ] + + def setUp(self): + vocab, training_text = _prepare_test_data(3) + self.model = MLE(3, vocabulary=vocab) + self.model.fit(training_text) + + +@add_metaclass(ParametrizeTestsMeta) +class LidstoneBigramTests(unittest.TestCase): + """unit tests for Lidstone class""" + + score_tests = [ + # count(d | c) = 1 + # *count(d | c) = 1.1 + # Count(w | c for w in vocab) = 1 + # *Count(w | c for w in vocab) = 1.8 + ("d", ["c"], 1.1 / 1.8), + # Total unigrams: 14 + # Vocab size: 8 + # Denominator: 14 + 0.8 = 14.8 + # count("a") = 2 + # *count("a") = 2.1 + ("a", None, 2.1 / 14.8), + # in vocabulary but unseen + # count("z") = 0 + # *count("z") = 0.1 + ("z", None, 0.1 / 14.8), + # out of vocabulary should use "UNK" score + # count("") = 3 + # *count("") = 3.1 + ("y", None, 3.1 / 14.8), + ] + + def setUp(self): + vocab, training_text = _prepare_test_data(2) + self.model = Lidstone(0.1, 2, vocabulary=vocab) + self.model.fit(training_text) + + def test_gamma(self): + self.assertEqual(0.1, self.model.gamma) + + def test_entropy_perplexity(self): + text = [ + ("", "a"), + ("a", "c"), + ("c", ""), + ("", "d"), + ("d", "c"), + ("c", ""), + ] + # Unlike MLE this should be able to handle completely novel ngrams + # Ngram = score, log score + # , a = 0.3929, -1.3479 + # a, c = 0.0357, -4.8074 + # c, UNK = 0.0(5), -4.1699 + # UNK, d = 0.0263, -5.2479 + # d, c = 0.0357, -4.8074 + # c, = 0.0(5), -4.1699 + # TOTAL logscore: −24.5504 + # - AVG logscore: 4.0917 + H = 4.0917 + perplexity = 17.0504 + self.assertAlmostEqual(H, self.model.entropy(text), places=4) + self.assertAlmostEqual(perplexity, self.model.perplexity(text), places=4) + + +@add_metaclass(ParametrizeTestsMeta) +class LidstoneTrigramTests(unittest.TestCase): + score_tests = [ + # Logic behind this is the same as for bigram model + ("d", ["c"], 1.1 / 1.8), + # if we choose a word that hasn't appeared after (b, c) + ("e", ["c"], 0.1 / 1.8), + # Trigram score now + ("d", ["b", "c"], 1.1 / 1.8), + ("e", ["b", "c"], 0.1 / 1.8), + ] + + def setUp(self): + vocab, training_text = _prepare_test_data(3) + self.model = Lidstone(0.1, 3, vocabulary=vocab) + self.model.fit(training_text) + + +@add_metaclass(ParametrizeTestsMeta) +class LaplaceBigramTests(unittest.TestCase): + """unit tests for Laplace class""" + + score_tests = [ + # basic sanity-check: + # count(d | c) = 1 + # *count(d | c) = 2 + # Count(w | c for w in vocab) = 1 + # *Count(w | c for w in vocab) = 9 + ("d", ["c"], 2.0 / 9), + # Total unigrams: 14 + # Vocab size: 8 + # Denominator: 14 + 8 = 22 + # count("a") = 2 + # *count("a") = 3 + ("a", None, 3.0 / 22), + # in vocabulary but unseen + # count("z") = 0 + # *count("z") = 1 + ("z", None, 1.0 / 22), + # out of vocabulary should use "UNK" score + # count("") = 3 + # *count("") = 4 + ("y", None, 4.0 / 22), + ] + + def setUp(self): + vocab, training_text = _prepare_test_data(2) + self.model = Laplace(2, vocabulary=vocab) + self.model.fit(training_text) + + def test_gamma(self): + # Make sure the gamma is set to 1 + self.assertEqual(1, self.model.gamma) + + def test_entropy_perplexity(self): + text = [ + ("", "a"), + ("a", "c"), + ("c", ""), + ("", "d"), + ("d", "c"), + ("c", ""), + ] + # Unlike MLE this should be able to handle completely novel ngrams + # Ngram = score, log score + # , a = 0.2, -2.3219 + # a, c = 0.1, -3.3219 + # c, UNK = 0.(1), -3.1699 + # UNK, d = 0.(09), 3.4594 + # d, c = 0.1 -3.3219 + # c, = 0.(1), -3.1699 + # Total logscores: −18.7651 + # - AVG logscores: 3.1275 + H = 3.1275 + perplexity = 8.7393 + self.assertAlmostEqual(H, self.model.entropy(text), places=4) + self.assertAlmostEqual(perplexity, self.model.perplexity(text), places=4) + + +@add_metaclass(ParametrizeTestsMeta) +class WittenBellInterpolatedTrigramTests(unittest.TestCase): + def setUp(self): + vocab, training_text = _prepare_test_data(3) + self.model = WittenBellInterpolated(3, vocabulary=vocab) + self.model.fit(training_text) + + score_tests = [ + # For unigram scores by default revert to MLE + # Total unigrams: 18 + # count('c'): 1 + ("c", None, 1.0 / 18), + # in vocabulary but unseen + # count("z") = 0 + ("z", None, 0.0 / 18), + # out of vocabulary should use "UNK" score + # count("") = 3 + ("y", None, 3.0 / 18), + # gamma(['b']) = 0.1111 + # mle.score('c', ['b']) = 0.5 + # (1 - gamma) * mle + gamma * mle('c') ~= 0.45 + .3 / 18 + ("c", ["b"], (1 - 0.1111) * 0.5 + 0.1111 * 1 / 18), + # building on that, let's try 'a b c' as the trigram + # gamma(['a', 'b']) = 0.0667 + # mle("c", ["a", "b"]) = 1 + ("c", ["a", "b"], (1 - 0.0667) + 0.0667 * ((1 - 0.1111) * 0.5 + 0.1111 / 18)), + ] + + +@add_metaclass(ParametrizeTestsMeta) +class KneserNeyInterpolatedTrigramTests(unittest.TestCase): + def setUp(self): + vocab, training_text = _prepare_test_data(3) + self.model = KneserNeyInterpolated(3, vocabulary=vocab) + self.model.fit(training_text) + + score_tests = [ + # For unigram scores revert to uniform + # Vocab size: 8 + # count('c'): 1 + ("c", None, 1.0 / 8), + # in vocabulary but unseen, still uses uniform + ("z", None, 1 / 8), + # out of vocabulary should use "UNK" score, i.e. again uniform + ("y", None, 1.0 / 8), + # alpha = count('bc') - discount = 1 - 0.1 = 0.9 + # gamma(['b']) = discount * number of unique words that follow ['b'] = 0.1 * 2 + # normalizer = total number of bigrams with this context = 2 + # the final should be: (alpha + gamma * unigram_score("c")) + ("c", ["b"], (0.9 + 0.2 * (1 / 8)) / 2), + # building on that, let's try 'a b c' as the trigram + # alpha = count('abc') - discount = 1 - 0.1 = 0.9 + # gamma(['a', 'b']) = 0.1 * 1 + # normalizer = total number of trigrams with prefix "ab" = 1 => we can ignore it! + ("c", ["a", "b"], 0.9 + 0.1 * ((0.9 + 0.2 * (1 / 8)) / 2)), + ] + + +class NgramModelTextGenerationTests(unittest.TestCase): + """Using MLE estimator, generate some text.""" + + def setUp(self): + vocab, training_text = _prepare_test_data(3) + self.model = MLE(3, vocabulary=vocab) + self.model.fit(training_text) + + def test_generate_one_no_context(self): + self.assertEqual(self.model.generate(random_seed=3), "") + + def test_generate_one_limiting_context(self): + # We don't need random_seed for contexts with only one continuation + self.assertEqual(self.model.generate(text_seed=["c"]), "d") + self.assertEqual(self.model.generate(text_seed=["b", "c"]), "d") + self.assertEqual(self.model.generate(text_seed=["a", "c"]), "d") + + def test_generate_one_varied_context(self): + # When context doesn't limit our options enough, seed the random choice + self.assertEqual( + self.model.generate(text_seed=("a", ""), random_seed=2), "a" + ) + + def test_generate_cycle(self): + # Add a cycle to the model: bd -> b, db -> d + more_training_text = [list(padded_everygrams(self.model.order, list("bdbdbd")))] + self.model.fit(more_training_text) + # Test that we can escape the cycle + self.assertEqual( + self.model.generate(7, text_seed=("b", "d"), random_seed=5), + ["b", "d", "b", "d", "b", "d", ""], + ) + + def test_generate_with_text_seed(self): + self.assertEqual( + self.model.generate(5, text_seed=("", "e"), random_seed=3), + ["", "a", "d", "b", ""], + ) + + def test_generate_oov_text_seed(self): + self.assertEqual( + self.model.generate(text_seed=("aliens",), random_seed=3), + self.model.generate(text_seed=("",), random_seed=3), + ) + + def test_generate_None_text_seed(self): + # should crash with type error when we try to look it up in vocabulary + with self.assertRaises(TypeError): + self.model.generate(text_seed=(None,)) + + # This will work + self.assertEqual( + self.model.generate(text_seed=None, random_seed=3), + self.model.generate(random_seed=3), + ) diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/unit/lm/test_preprocessing.py b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/lm/test_preprocessing.py new file mode 100644 index 0000000..02a8af5 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/lm/test_preprocessing.py @@ -0,0 +1,31 @@ +# -*- coding: utf-8 -*- +# Natural Language Toolkit: Language Model Unit Tests +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Ilia Kurenkov +# URL: +# For license information, see LICENSE.TXT +import unittest + +from nltk.lm.preprocessing import padded_everygram_pipeline + + +class TestPreprocessing(unittest.TestCase): + def test_padded_everygram_pipeline(self): + expected_train = [ + [ + ("",), + ("a",), + ("b",), + ("c",), + ("",), + ("", "a"), + ("a", "b"), + ("b", "c"), + ("c", ""), + ] + ] + expected_vocab = ["", "a", "b", "c", ""] + train_data, vocab_data = padded_everygram_pipeline(2, [["a", "b", "c"]]) + self.assertEqual([list(sent) for sent in train_data], expected_train) + self.assertEqual(list(vocab_data), expected_vocab) diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/unit/lm/test_vocabulary.py b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/lm/test_vocabulary.py new file mode 100644 index 0000000..dd78b42 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/lm/test_vocabulary.py @@ -0,0 +1,141 @@ +# -*- coding: utf-8 -*- +# Natural Language Toolkit: Language Model Unit Tests +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Ilia Kurenkov +# URL: +# For license information, see LICENSE.TXT + +import unittest +from collections import Counter + +import six +from nltk.lm import Vocabulary + + +class NgramModelVocabularyTests(unittest.TestCase): + """tests Vocabulary Class""" + + @classmethod + def setUpClass(cls): + cls.vocab = Vocabulary( + ["z", "a", "b", "c", "f", "d", "e", "g", "a", "d", "b", "e", "w"], + unk_cutoff=2, + ) + + def test_truthiness(self): + self.assertTrue(self.vocab) + + def test_cutoff_value_set_correctly(self): + self.assertEqual(self.vocab.cutoff, 2) + + def test_unable_to_change_cutoff(self): + with self.assertRaises(AttributeError): + self.vocab.cutoff = 3 + + def test_cutoff_setter_checks_value(self): + with self.assertRaises(ValueError) as exc_info: + Vocabulary("abc", unk_cutoff=0) + expected_error_msg = "Cutoff value cannot be less than 1. Got: 0" + self.assertEqual(expected_error_msg, str(exc_info.exception)) + + def test_counts_set_correctly(self): + self.assertEqual(self.vocab.counts["a"], 2) + self.assertEqual(self.vocab.counts["b"], 2) + self.assertEqual(self.vocab.counts["c"], 1) + + def test_membership_check_respects_cutoff(self): + # a was seen 2 times, so it should be considered part of the vocabulary + self.assertTrue("a" in self.vocab) + # "c" was seen once, it shouldn't be considered part of the vocab + self.assertFalse("c" in self.vocab) + # "z" was never seen at all, also shouldn't be considered in the vocab + self.assertFalse("z" in self.vocab) + + def test_vocab_len_respects_cutoff(self): + # Vocab size is the number of unique tokens that occur at least as often + # as the cutoff value, plus 1 to account for unknown words. + self.assertEqual(5, len(self.vocab)) + + def test_vocab_iter_respects_cutoff(self): + vocab_counts = ["a", "b", "c", "d", "e", "f", "g", "w", "z"] + vocab_items = ["a", "b", "d", "e", ""] + + six.assertCountEqual(self, vocab_counts, list(self.vocab.counts.keys())) + six.assertCountEqual(self, vocab_items, list(self.vocab)) + + def test_update_empty_vocab(self): + empty = Vocabulary(unk_cutoff=2) + self.assertEqual(len(empty), 0) + self.assertFalse(empty) + self.assertIn(empty.unk_label, empty) + + empty.update(list("abcde")) + self.assertIn(empty.unk_label, empty) + + def test_lookup(self): + self.assertEqual(self.vocab.lookup("a"), "a") + self.assertEqual(self.vocab.lookup("c"), "") + + def test_lookup_iterables(self): + self.assertEqual(self.vocab.lookup(["a", "b"]), ("a", "b")) + self.assertEqual(self.vocab.lookup(("a", "b")), ("a", "b")) + self.assertEqual(self.vocab.lookup(("a", "c")), ("a", "")) + self.assertEqual( + self.vocab.lookup(map(str, range(3))), ("", "", "") + ) + + def test_lookup_empty_iterables(self): + self.assertEqual(self.vocab.lookup(()), ()) + self.assertEqual(self.vocab.lookup([]), ()) + self.assertEqual(self.vocab.lookup(iter([])), ()) + self.assertEqual(self.vocab.lookup(n for n in range(0, 0)), ()) + + def test_lookup_recursive(self): + self.assertEqual( + self.vocab.lookup([["a", "b"], ["a", "c"]]), (("a", "b"), ("a", "")) + ) + self.assertEqual(self.vocab.lookup([["a", "b"], "c"]), (("a", "b"), "")) + self.assertEqual(self.vocab.lookup([[[[["a", "b"]]]]]), ((((("a", "b"),),),),)) + + def test_lookup_None(self): + with self.assertRaises(TypeError): + self.vocab.lookup(None) + with self.assertRaises(TypeError): + list(self.vocab.lookup([None, None])) + + def test_lookup_int(self): + with self.assertRaises(TypeError): + self.vocab.lookup(1) + with self.assertRaises(TypeError): + list(self.vocab.lookup([1, 2])) + + def test_lookup_empty_str(self): + self.assertEqual(self.vocab.lookup(""), "") + + def test_eqality(self): + v1 = Vocabulary(["a", "b", "c"], unk_cutoff=1) + v2 = Vocabulary(["a", "b", "c"], unk_cutoff=1) + v3 = Vocabulary(["a", "b", "c"], unk_cutoff=1, unk_label="blah") + v4 = Vocabulary(["a", "b"], unk_cutoff=1) + + self.assertEqual(v1, v2) + self.assertNotEqual(v1, v3) + self.assertNotEqual(v1, v4) + + def test_str(self): + self.assertEqual( + str(self.vocab), + (""), + ) + + def test_creation_with_counter(self): + self.assertEqual( + self.vocab, + Vocabulary( + Counter( + ["z", "a", "b", "c", "f", "d", "e", "g", "a", "d", "b", "e", "w"] + ), + unk_cutoff=2, + ), + ) diff --git a/venv/lib/python3.7/site-packages/nltk/test/unit/test_2x_compat.py b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/test_2x_compat.py similarity index 100% rename from venv/lib/python3.7/site-packages/nltk/test/unit/test_2x_compat.py rename to venv.bak/lib/python3.7/site-packages/nltk/test/unit/test_2x_compat.py diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/unit/test_aline.py b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/test_aline.py new file mode 100644 index 0000000..72b92c7 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/test_aline.py @@ -0,0 +1,78 @@ +# -*- coding: utf-8 -*- +""" +Unit tests for nltk.metrics.aline +""" + +from __future__ import unicode_literals + +import unittest + +from nltk.metrics import aline + + +class TestAline(unittest.TestCase): + """ + Test Aline algorithm for aligning phonetic sequences + """ + + def test_aline(self): + result = aline.align('θin', 'tenwis') + expected = [ + [('θ', 't'), ('i', 'e'), ('n', 'n'), ('-', 'w'), ('-', 'i'), ('-', 's')] + ] + + self.assertEqual(result, expected) + + result = aline.align('jo', 'ʒə') + expected = [[('j', 'ʒ'), ('o', 'ə')]] + + self.assertEqual(result, expected) + + result = aline.align('pematesiweni', 'pematesewen') + expected = [ + [ + ('p', 'p'), + ('e', 'e'), + ('m', 'm'), + ('a', 'a'), + ('t', 't'), + ('e', 'e'), + ('s', 's'), + ('i', 'e'), + ('w', 'w'), + ('e', 'e'), + ('n', 'n'), + ('i', '-'), + ] + ] + + self.assertEqual(result, expected) + + result = aline.align('tuwθ', 'dentis') + expected = [ + [ + ('t', 'd'), + ('u', 'e'), + ('w', '-'), + ('-', 'n'), + ('-', 't'), + ('-', 'i'), + ('θ', 's'), + ] + ] + + self.assertEqual(result, expected) + + def test_aline_delta(self): + """ + Test aline for computing the difference between two segments + """ + result = aline.delta('p', 'q') + expected = 20.0 + + self.assertEqual(result, expected) + + result = aline.delta('a', 'A') + expected = 0.0 + + self.assertEqual(result, expected) diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/unit/test_brill.py b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/test_brill.py new file mode 100644 index 0000000..5297fe1 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/test_brill.py @@ -0,0 +1,37 @@ +# -*- coding: utf-8 -*- +""" +Tests for Brill tagger. +""" + +import unittest + +from nltk.tag import UnigramTagger, brill, brill_trainer +from nltk.tbl import Template +from nltk.corpus import treebank + +from nltk.tbl import demo + + +class TestBrill(unittest.TestCase): + def test_pos_template(self): + train_sents = treebank.tagged_sents()[:1000] + tagger = UnigramTagger(train_sents) + trainer = brill_trainer.BrillTaggerTrainer( + tagger, [brill.Template(brill.Pos([-1]))] + ) + brill_tagger = trainer.train(train_sents) + # Example from https://github.com/nltk/nltk/issues/769 + result = brill_tagger.tag('This is a foo bar sentence'.split()) + expected = [ + ('This', 'DT'), + ('is', 'VBZ'), + ('a', 'DT'), + ('foo', None), + ('bar', 'NN'), + ('sentence', None), + ] + self.assertEqual(result, expected) + + @unittest.skip("Should be tested in __main__ of nltk.tbl.demo") + def test_brill_demo(self): + demo() diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/unit/test_cfd_mutation.py b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/test_cfd_mutation.py new file mode 100644 index 0000000..7e21d7e --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/test_cfd_mutation.py @@ -0,0 +1,39 @@ +import unittest +from nltk import ConditionalFreqDist, tokenize + +class TestEmptyCondFreq(unittest.TestCase): + def test_tabulate(self): + empty = ConditionalFreqDist() + self.assertEqual(empty.conditions(),[]) + try: + empty.tabulate(conditions="BUG") # nonexistent keys shouldn't be added + except: + pass + self.assertEqual(empty.conditions(), []) + + + def test_plot(self): + empty = ConditionalFreqDist() + self.assertEqual(empty.conditions(),[]) + try: + empty.plot(conditions=["BUG"]) # nonexistent keys shouldn't be added + except: + pass + self.assertEqual(empty.conditions(),[]) + + def test_increment(self): + # make sure that we can still mutate cfd normally + text = "cow cat mouse cat tiger" + cfd = ConditionalFreqDist() + + # create cfd with word length as condition + for word in tokenize.word_tokenize(text): + condition = len(word) + cfd[condition][word] += 1 + + self.assertEqual(cfd.conditions(), [3,5]) + + # incrementing previously unseen key is still possible + cfd[2]['hi'] += 1 + self.assertEqual(set(cfd.conditions()),set([3,5,2])) # new condition added + self.assertEqual(cfd[2]['hi'], 1) # key's frequency incremented from 0 (unseen) to 1 diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/unit/test_cfg2chomsky.py b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/test_cfg2chomsky.py new file mode 100644 index 0000000..686861e --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/test_cfg2chomsky.py @@ -0,0 +1,49 @@ +# -*- coding: utf-8 -*- +import unittest +import nltk +from nltk.grammar import CFG + + +class ChomskyNormalFormForCFGTest(unittest.TestCase): + def test_simple(self): + grammar = CFG.fromstring( + """ + S -> NP VP + PP -> P NP + NP -> Det N | NP PP P + VP -> V NP | VP PP + VP -> Det + Det -> 'a' | 'the' + N -> 'dog' | 'cat' + V -> 'chased' | 'sat' + P -> 'on' | 'in' + """ + ) + self.assertFalse(grammar.is_flexible_chomsky_normal_form()) + self.assertFalse(grammar.is_chomsky_normal_form()) + grammar = grammar.chomsky_normal_form(flexible=True) + self.assertTrue(grammar.is_flexible_chomsky_normal_form()) + self.assertFalse(grammar.is_chomsky_normal_form()) + + grammar2 = CFG.fromstring( + """ + S -> NP VP + NP -> VP N P + VP -> P + N -> 'dog' | 'cat' + P -> 'on' | 'in' + """ + ) + self.assertFalse(grammar2.is_flexible_chomsky_normal_form()) + self.assertFalse(grammar2.is_chomsky_normal_form()) + grammar2 = grammar2.chomsky_normal_form() + self.assertTrue(grammar2.is_flexible_chomsky_normal_form()) + self.assertTrue(grammar2.is_chomsky_normal_form()) + + def test_complex(self): + grammar = nltk.data.load('grammars/large_grammars/atis.cfg') + self.assertFalse(grammar.is_flexible_chomsky_normal_form()) + self.assertFalse(grammar.is_chomsky_normal_form()) + grammar = grammar.chomsky_normal_form(flexible=True) + self.assertTrue(grammar.is_flexible_chomsky_normal_form()) + self.assertFalse(grammar.is_chomsky_normal_form()) diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/unit/test_chunk.py b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/test_chunk.py new file mode 100644 index 0000000..8c40dfc --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/test_chunk.py @@ -0,0 +1,87 @@ +# -*- coding: utf-8 -*- +from __future__ import absolute_import, unicode_literals +import unittest + +from nltk import RegexpParser + + +class TestChunkRule(unittest.TestCase): + def test_tag_pattern2re_pattern_quantifier(self): + """Test for bug https://github.com/nltk/nltk/issues/1597 + + Ensures that curly bracket quantifiers can be used inside a chunk rule. + This type of quantifier has been used for the supplementary example + in http://www.nltk.org/book/ch07.html#exploring-text-corpora. + """ + sent = [ + ('The', 'AT'), + ('September-October', 'NP'), + ('term', 'NN'), + ('jury', 'NN'), + ('had', 'HVD'), + ('been', 'BEN'), + ('charged', 'VBN'), + ('by', 'IN'), + ('Fulton', 'NP-TL'), + ('Superior', 'JJ-TL'), + ('Court', 'NN-TL'), + ('Judge', 'NN-TL'), + ('Durwood', 'NP'), + ('Pye', 'NP'), + ('to', 'TO'), + ('investigate', 'VB'), + ('reports', 'NNS'), + ('of', 'IN'), + ('possible', 'JJ'), + ('``', '``'), + ('irregularities', 'NNS'), + ("''", "''"), + ('in', 'IN'), + ('the', 'AT'), + ('hard-fought', 'JJ'), + ('primary', 'NN'), + ('which', 'WDT'), + ('was', 'BEDZ'), + ('won', 'VBN'), + ('by', 'IN'), + ('Mayor-nominate', 'NN-TL'), + ('Ivan', 'NP'), + ('Allen', 'NP'), + ('Jr.', 'NP'), + ('.', '.'), + ] # source: brown corpus + cp = RegexpParser('CHUNK: {{4,}}') + tree = cp.parse(sent) + assert ( + tree.pformat() + == """(S + The/AT + September-October/NP + term/NN + jury/NN + had/HVD + been/BEN + charged/VBN + by/IN + Fulton/NP-TL + Superior/JJ-TL + (CHUNK Court/NN-TL Judge/NN-TL Durwood/NP Pye/NP) + to/TO + investigate/VB + reports/NNS + of/IN + possible/JJ + ``/`` + irregularities/NNS + ''/'' + in/IN + the/AT + hard-fought/JJ + primary/NN + which/WDT + was/BEDZ + won/VBN + by/IN + (CHUNK Mayor-nominate/NN-TL Ivan/NP Allen/NP Jr./NP) + ./.)""" + ) diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/unit/test_classify.py b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/test_classify.py new file mode 100644 index 0000000..e9128d2 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/test_classify.py @@ -0,0 +1,50 @@ +# -*- coding: utf-8 -*- +""" +Unit tests for nltk.classify. See also: nltk/test/classify.doctest +""" +from __future__ import absolute_import +from nose import SkipTest +from nltk import classify + +TRAIN = [ + (dict(a=1, b=1, c=1), 'y'), + (dict(a=1, b=1, c=1), 'x'), + (dict(a=1, b=1, c=0), 'y'), + (dict(a=0, b=1, c=1), 'x'), + (dict(a=0, b=1, c=1), 'y'), + (dict(a=0, b=0, c=1), 'y'), + (dict(a=0, b=1, c=0), 'x'), + (dict(a=0, b=0, c=0), 'x'), + (dict(a=0, b=1, c=1), 'y'), +] + +TEST = [ + (dict(a=1, b=0, c=1)), # unseen + (dict(a=1, b=0, c=0)), # unseen + (dict(a=0, b=1, c=1)), # seen 3 times, labels=y,y,x + (dict(a=0, b=1, c=0)), # seen 1 time, label=x +] + +RESULTS = [(0.16, 0.84), (0.46, 0.54), (0.41, 0.59), (0.76, 0.24)] + + +def assert_classifier_correct(algorithm): + try: + classifier = classify.MaxentClassifier.train( + TRAIN, algorithm, trace=0, max_iter=1000 + ) + except (LookupError, AttributeError) as e: + raise SkipTest(str(e)) + + for (px, py), featureset in zip(RESULTS, TEST): + pdist = classifier.prob_classify(featureset) + assert abs(pdist.prob('x') - px) < 1e-2, (pdist.prob('x'), px) + assert abs(pdist.prob('y') - py) < 1e-2, (pdist.prob('y'), py) + + +def test_megam(): + assert_classifier_correct('MEGAM') + + +def test_tadm(): + assert_classifier_correct('TADM') diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/unit/test_collocations.py b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/test_collocations.py new file mode 100644 index 0000000..8e3535f --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/test_collocations.py @@ -0,0 +1,159 @@ +# -*- coding: utf-8 -*- +from __future__ import absolute_import, unicode_literals +import unittest + +from nltk.collocations import BigramCollocationFinder +from nltk.metrics import BigramAssocMeasures + +## Test bigram counters with discontinuous bigrams and repeated words + +_EPSILON = 1e-8 + + +def close_enough(x, y): + """Verify that two sequences of n-gram association values are within + _EPSILON of each other. + """ + + for (x1, y1) in zip(x, y): + if x1[0] != y1[0] or abs(x1[1] - y1[1]) > _EPSILON: + return False + return True + + +class TestBigram(unittest.TestCase): + def test_bigram2(self): + sent = 'this this is is a a test test'.split() + + b = BigramCollocationFinder.from_words(sent) + + # python 2.6 does not have assertItemsEqual or assertListEqual + self.assertEqual( + sorted(b.ngram_fd.items()), + sorted( + [ + (('a', 'a'), 1), + (('a', 'test'), 1), + (('is', 'a'), 1), + (('is', 'is'), 1), + (('test', 'test'), 1), + (('this', 'is'), 1), + (('this', 'this'), 1), + ] + ), + ) + self.assertEqual( + sorted(b.word_fd.items()), + sorted([('a', 2), ('is', 2), ('test', 2), ('this', 2)]), + ) + self.assertTrue( + len(sent) == sum(b.word_fd.values()) == sum(b.ngram_fd.values()) + 1 + ) + self.assertTrue( + close_enough( + sorted(b.score_ngrams(BigramAssocMeasures.pmi)), + sorted( + [ + (('a', 'a'), 1.0), + (('a', 'test'), 1.0), + (('is', 'a'), 1.0), + (('is', 'is'), 1.0), + (('test', 'test'), 1.0), + (('this', 'is'), 1.0), + (('this', 'this'), 1.0), + ] + ), + ) + ) + + def test_bigram3(self): + sent = 'this this is is a a test test'.split() + + b = BigramCollocationFinder.from_words(sent, window_size=3) + self.assertEqual( + sorted(b.ngram_fd.items()), + sorted( + [ + (('a', 'test'), 3), + (('is', 'a'), 3), + (('this', 'is'), 3), + (('a', 'a'), 1), + (('is', 'is'), 1), + (('test', 'test'), 1), + (('this', 'this'), 1), + ] + ), + ) + self.assertEqual( + sorted(b.word_fd.items()), + sorted([('a', 2), ('is', 2), ('test', 2), ('this', 2)]), + ) + self.assertTrue( + len(sent) + == sum(b.word_fd.values()) + == (sum(b.ngram_fd.values()) + 2 + 1) / 2.0 + ) + self.assertTrue( + close_enough( + sorted(b.score_ngrams(BigramAssocMeasures.pmi)), + sorted( + [ + (('a', 'test'), 1.584962500721156), + (('is', 'a'), 1.584962500721156), + (('this', 'is'), 1.584962500721156), + (('a', 'a'), 0.0), + (('is', 'is'), 0.0), + (('test', 'test'), 0.0), + (('this', 'this'), 0.0), + ] + ), + ) + ) + + def test_bigram5(self): + sent = 'this this is is a a test test'.split() + + b = BigramCollocationFinder.from_words(sent, window_size=5) + self.assertEqual( + sorted(b.ngram_fd.items()), + sorted( + [ + (('a', 'test'), 4), + (('is', 'a'), 4), + (('this', 'is'), 4), + (('is', 'test'), 3), + (('this', 'a'), 3), + (('a', 'a'), 1), + (('is', 'is'), 1), + (('test', 'test'), 1), + (('this', 'this'), 1), + ] + ), + ) + self.assertEqual( + sorted(b.word_fd.items()), + sorted([('a', 2), ('is', 2), ('test', 2), ('this', 2)]), + ) + self.assertTrue( + len(sent) + == sum(b.word_fd.values()) + == (sum(b.ngram_fd.values()) + 4 + 3 + 2 + 1) / 4.0 + ) + self.assertTrue( + close_enough( + sorted(b.score_ngrams(BigramAssocMeasures.pmi)), + sorted( + [ + (('a', 'test'), 1.0), + (('is', 'a'), 1.0), + (('this', 'is'), 1.0), + (('is', 'test'), 0.5849625007211562), + (('this', 'a'), 0.5849625007211562), + (('a', 'a'), -1.0), + (('is', 'is'), -1.0), + (('test', 'test'), -1.0), + (('this', 'this'), -1.0), + ] + ), + ) + ) diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/unit/test_concordance.py b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/test_concordance.py new file mode 100644 index 0000000..81ac47b --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/test_concordance.py @@ -0,0 +1,107 @@ +# -*- coding: utf-8 -*- +from __future__ import absolute_import, unicode_literals + +import unittest +import contextlib +import sys + +from nose import with_setup + +from nltk.corpus import gutenberg +from nltk.text import Text + +try: + from StringIO import StringIO +except ImportError as e: + from io import StringIO + + +@contextlib.contextmanager +def stdout_redirect(where): + sys.stdout = where + try: + yield where + finally: + sys.stdout = sys.__stdout__ + + +class TestConcordance(unittest.TestCase): + """Text constructed using: http://www.nltk.org/book/ch01.html""" + + @classmethod + def setup_class(cls): + cls.corpus = gutenberg.words('melville-moby_dick.txt') + + @classmethod + def teardown_class(cls): + pass + + def setUp(self): + self.text = Text(TestConcordance.corpus) + self.query = "monstrous" + self.maxDiff = None + self.list_out = [ + 'ong the former , one was of a most monstrous size . ... This came towards us , ', + 'ON OF THE PSALMS . " Touching that monstrous bulk of the whale or ork we have r', + 'll over with a heathenish array of monstrous clubs and spears . Some were thick', + 'd as you gazed , and wondered what monstrous cannibal and savage could ever hav', + 'that has survived the flood ; most monstrous and most mountainous ! That Himmal', + 'they might scout at Moby Dick as a monstrous fable , or still worse and more de', + 'th of Radney .\'" CHAPTER 55 Of the Monstrous Pictures of Whales . I shall ere l', + 'ing Scenes . In connexion with the monstrous pictures of whales , I am strongly', + 'ere to enter upon those still more monstrous stories of them which are to be fo', + 'ght have been rummaged out of this monstrous cabinet there is no telling . But ', + 'of Whale - Bones ; for Whales of a monstrous size are oftentimes cast up dead u', + ] + + def tearDown(self): + pass + + def test_concordance_list(self): + concordance_out = self.text.concordance_list(self.query) + self.assertEqual(self.list_out, [c.line for c in concordance_out]) + + def test_concordance_width(self): + list_out = [ + "monstrous", + "monstrous", + "monstrous", + "monstrous", + "monstrous", + "monstrous", + "Monstrous", + "monstrous", + "monstrous", + "monstrous", + "monstrous", + ] + + concordance_out = self.text.concordance_list(self.query, width=0) + self.assertEqual(list_out, [c.query for c in concordance_out]) + + def test_concordance_lines(self): + concordance_out = self.text.concordance_list(self.query, lines=3) + self.assertEqual(self.list_out[:3], [c.line for c in concordance_out]) + + def test_concordance_print(self): + print_out = """Displaying 11 of 11 matches: + ong the former , one was of a most monstrous size . ... This came towards us , + ON OF THE PSALMS . " Touching that monstrous bulk of the whale or ork we have r + ll over with a heathenish array of monstrous clubs and spears . Some were thick + d as you gazed , and wondered what monstrous cannibal and savage could ever hav + that has survived the flood ; most monstrous and most mountainous ! That Himmal + they might scout at Moby Dick as a monstrous fable , or still worse and more de + th of Radney .'" CHAPTER 55 Of the Monstrous Pictures of Whales . I shall ere l + ing Scenes . In connexion with the monstrous pictures of whales , I am strongly + ere to enter upon those still more monstrous stories of them which are to be fo + ght have been rummaged out of this monstrous cabinet there is no telling . But + of Whale - Bones ; for Whales of a monstrous size are oftentimes cast up dead u + """ + + with stdout_redirect(StringIO()) as stdout: + self.text.concordance(self.query) + + def strip_space(raw_str): + return raw_str.replace(" ", "") + + self.assertEqual(strip_space(print_out), strip_space(stdout.getvalue())) diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/unit/test_corenlp.py b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/test_corenlp.py new file mode 100644 index 0000000..bd99184 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/test_corenlp.py @@ -0,0 +1,1419 @@ +# -*- coding: utf-8 -*- + +""" +Mock test for Stanford CoreNLP wrappers. +""" + +import sys +from itertools import chain +from unittest import TestCase, SkipTest + +try: + from unittest.mock import MagicMock +except ImportError: + raise SkipTest('unittest.mock no supported in Python2') +from nltk.tree import Tree +from nltk.parse import corenlp + + +class TestTokenizerAPI(TestCase): + def test_tokenize(self): + corenlp_tokenizer = corenlp.CoreNLPParser() + + api_return_value = { + u'sentences': [ + { + u'index': 0, + u'tokens': [ + { + u'after': u' ', + u'before': u'', + u'characterOffsetBegin': 0, + u'characterOffsetEnd': 4, + u'index': 1, + u'originalText': u'Good', + u'word': u'Good', + }, + { + u'after': u' ', + u'before': u' ', + u'characterOffsetBegin': 5, + u'characterOffsetEnd': 12, + u'index': 2, + u'originalText': u'muffins', + u'word': u'muffins', + }, + { + u'after': u' ', + u'before': u' ', + u'characterOffsetBegin': 13, + u'characterOffsetEnd': 17, + u'index': 3, + u'originalText': u'cost', + u'word': u'cost', + }, + { + u'after': u'', + u'before': u' ', + u'characterOffsetBegin': 18, + u'characterOffsetEnd': 19, + u'index': 4, + u'originalText': u'$', + u'word': u'$', + }, + { + u'after': u'\n', + u'before': u'', + u'characterOffsetBegin': 19, + u'characterOffsetEnd': 23, + u'index': 5, + u'originalText': u'3.88', + u'word': u'3.88', + }, + { + u'after': u' ', + u'before': u'\n', + u'characterOffsetBegin': 24, + u'characterOffsetEnd': 26, + u'index': 6, + u'originalText': u'in', + u'word': u'in', + }, + { + u'after': u' ', + u'before': u' ', + u'characterOffsetBegin': 27, + u'characterOffsetEnd': 30, + u'index': 7, + u'originalText': u'New', + u'word': u'New', + }, + { + u'after': u'', + u'before': u' ', + u'characterOffsetBegin': 31, + u'characterOffsetEnd': 35, + u'index': 8, + u'originalText': u'York', + u'word': u'York', + }, + { + u'after': u' ', + u'before': u'', + u'characterOffsetBegin': 35, + u'characterOffsetEnd': 36, + u'index': 9, + u'originalText': u'.', + u'word': u'.', + }, + ], + }, + { + u'index': 1, + u'tokens': [ + { + u'after': u' ', + u'before': u' ', + u'characterOffsetBegin': 38, + u'characterOffsetEnd': 44, + u'index': 1, + u'originalText': u'Please', + u'word': u'Please', + }, + { + u'after': u' ', + u'before': u' ', + u'characterOffsetBegin': 45, + u'characterOffsetEnd': 48, + u'index': 2, + u'originalText': u'buy', + u'word': u'buy', + }, + { + u'after': u'\n', + u'before': u' ', + u'characterOffsetBegin': 49, + u'characterOffsetEnd': 51, + u'index': 3, + u'originalText': u'me', + u'word': u'me', + }, + { + u'after': u' ', + u'before': u'\n', + u'characterOffsetBegin': 52, + u'characterOffsetEnd': 55, + u'index': 4, + u'originalText': u'two', + u'word': u'two', + }, + { + u'after': u' ', + u'before': u' ', + u'characterOffsetBegin': 56, + u'characterOffsetEnd': 58, + u'index': 5, + u'originalText': u'of', + u'word': u'of', + }, + { + u'after': u'', + u'before': u' ', + u'characterOffsetBegin': 59, + u'characterOffsetEnd': 63, + u'index': 6, + u'originalText': u'them', + u'word': u'them', + }, + { + u'after': u'\n', + u'before': u'', + u'characterOffsetBegin': 63, + u'characterOffsetEnd': 64, + u'index': 7, + u'originalText': u'.', + u'word': u'.', + }, + ], + }, + { + u'index': 2, + u'tokens': [ + { + u'after': u'', + u'before': u'\n', + u'characterOffsetBegin': 65, + u'characterOffsetEnd': 71, + u'index': 1, + u'originalText': u'Thanks', + u'word': u'Thanks', + }, + { + u'after': u'', + u'before': u'', + u'characterOffsetBegin': 71, + u'characterOffsetEnd': 72, + u'index': 2, + u'originalText': u'.', + u'word': u'.', + }, + ], + }, + ] + } + corenlp_tokenizer.api_call = MagicMock(return_value=api_return_value) + + input_string = "Good muffins cost $3.88\nin New York. Please buy me\ntwo of them.\nThanks." + + expected_output = [ + u'Good', + u'muffins', + u'cost', + u'$', + u'3.88', + u'in', + u'New', + u'York', + u'.', + u'Please', + u'buy', + u'me', + u'two', + u'of', + u'them', + u'.', + u'Thanks', + u'.', + ] + + tokenized_output = list(corenlp_tokenizer.tokenize(input_string)) + + corenlp_tokenizer.api_call.assert_called_once_with( + 'Good muffins cost $3.88\nin New York. Please buy me\ntwo of them.\nThanks.', + properties={'annotators': 'tokenize,ssplit'}, + ) + self.assertEqual(expected_output, tokenized_output) + + +class TestTaggerAPI(TestCase): + def test_pos_tagger(self): + corenlp_tagger = corenlp.CoreNLPParser(tagtype='pos') + + api_return_value = { + u'sentences': [ + { + u'basicDependencies': [ + { + u'dep': u'ROOT', + u'dependent': 1, + u'dependentGloss': u'What', + u'governor': 0, + u'governorGloss': u'ROOT', + }, + { + u'dep': u'cop', + u'dependent': 2, + u'dependentGloss': u'is', + u'governor': 1, + u'governorGloss': u'What', + }, + { + u'dep': u'det', + u'dependent': 3, + u'dependentGloss': u'the', + u'governor': 4, + u'governorGloss': u'airspeed', + }, + { + u'dep': u'nsubj', + u'dependent': 4, + u'dependentGloss': u'airspeed', + u'governor': 1, + u'governorGloss': u'What', + }, + { + u'dep': u'case', + u'dependent': 5, + u'dependentGloss': u'of', + u'governor': 8, + u'governorGloss': u'swallow', + }, + { + u'dep': u'det', + u'dependent': 6, + u'dependentGloss': u'an', + u'governor': 8, + u'governorGloss': u'swallow', + }, + { + u'dep': u'compound', + u'dependent': 7, + u'dependentGloss': u'unladen', + u'governor': 8, + u'governorGloss': u'swallow', + }, + { + u'dep': u'nmod', + u'dependent': 8, + u'dependentGloss': u'swallow', + u'governor': 4, + u'governorGloss': u'airspeed', + }, + { + u'dep': u'punct', + u'dependent': 9, + u'dependentGloss': u'?', + u'governor': 1, + u'governorGloss': u'What', + }, + ], + u'enhancedDependencies': [ + { + u'dep': u'ROOT', + u'dependent': 1, + u'dependentGloss': u'What', + u'governor': 0, + u'governorGloss': u'ROOT', + }, + { + u'dep': u'cop', + u'dependent': 2, + u'dependentGloss': u'is', + u'governor': 1, + u'governorGloss': u'What', + }, + { + u'dep': u'det', + u'dependent': 3, + u'dependentGloss': u'the', + u'governor': 4, + u'governorGloss': u'airspeed', + }, + { + u'dep': u'nsubj', + u'dependent': 4, + u'dependentGloss': u'airspeed', + u'governor': 1, + u'governorGloss': u'What', + }, + { + u'dep': u'case', + u'dependent': 5, + u'dependentGloss': u'of', + u'governor': 8, + u'governorGloss': u'swallow', + }, + { + u'dep': u'det', + u'dependent': 6, + u'dependentGloss': u'an', + u'governor': 8, + u'governorGloss': u'swallow', + }, + { + u'dep': u'compound', + u'dependent': 7, + u'dependentGloss': u'unladen', + u'governor': 8, + u'governorGloss': u'swallow', + }, + { + u'dep': u'nmod:of', + u'dependent': 8, + u'dependentGloss': u'swallow', + u'governor': 4, + u'governorGloss': u'airspeed', + }, + { + u'dep': u'punct', + u'dependent': 9, + u'dependentGloss': u'?', + u'governor': 1, + u'governorGloss': u'What', + }, + ], + u'enhancedPlusPlusDependencies': [ + { + u'dep': u'ROOT', + u'dependent': 1, + u'dependentGloss': u'What', + u'governor': 0, + u'governorGloss': u'ROOT', + }, + { + u'dep': u'cop', + u'dependent': 2, + u'dependentGloss': u'is', + u'governor': 1, + u'governorGloss': u'What', + }, + { + u'dep': u'det', + u'dependent': 3, + u'dependentGloss': u'the', + u'governor': 4, + u'governorGloss': u'airspeed', + }, + { + u'dep': u'nsubj', + u'dependent': 4, + u'dependentGloss': u'airspeed', + u'governor': 1, + u'governorGloss': u'What', + }, + { + u'dep': u'case', + u'dependent': 5, + u'dependentGloss': u'of', + u'governor': 8, + u'governorGloss': u'swallow', + }, + { + u'dep': u'det', + u'dependent': 6, + u'dependentGloss': u'an', + u'governor': 8, + u'governorGloss': u'swallow', + }, + { + u'dep': u'compound', + u'dependent': 7, + u'dependentGloss': u'unladen', + u'governor': 8, + u'governorGloss': u'swallow', + }, + { + u'dep': u'nmod:of', + u'dependent': 8, + u'dependentGloss': u'swallow', + u'governor': 4, + u'governorGloss': u'airspeed', + }, + { + u'dep': u'punct', + u'dependent': 9, + u'dependentGloss': u'?', + u'governor': 1, + u'governorGloss': u'What', + }, + ], + u'index': 0, + u'parse': u'(ROOT\n (SBARQ\n (WHNP (WP What))\n (SQ (VBZ is)\n (NP\n (NP (DT the) (NN airspeed))\n (PP (IN of)\n (NP (DT an) (NN unladen) (NN swallow)))))\n (. ?)))', + u'tokens': [ + { + u'after': u' ', + u'before': u'', + u'characterOffsetBegin': 0, + u'characterOffsetEnd': 4, + u'index': 1, + u'lemma': u'what', + u'originalText': u'What', + u'pos': u'WP', + u'word': u'What', + }, + { + u'after': u' ', + u'before': u' ', + u'characterOffsetBegin': 5, + u'characterOffsetEnd': 7, + u'index': 2, + u'lemma': u'be', + u'originalText': u'is', + u'pos': u'VBZ', + u'word': u'is', + }, + { + u'after': u' ', + u'before': u' ', + u'characterOffsetBegin': 8, + u'characterOffsetEnd': 11, + u'index': 3, + u'lemma': u'the', + u'originalText': u'the', + u'pos': u'DT', + u'word': u'the', + }, + { + u'after': u' ', + u'before': u' ', + u'characterOffsetBegin': 12, + u'characterOffsetEnd': 20, + u'index': 4, + u'lemma': u'airspeed', + u'originalText': u'airspeed', + u'pos': u'NN', + u'word': u'airspeed', + }, + { + u'after': u' ', + u'before': u' ', + u'characterOffsetBegin': 21, + u'characterOffsetEnd': 23, + u'index': 5, + u'lemma': u'of', + u'originalText': u'of', + u'pos': u'IN', + u'word': u'of', + }, + { + u'after': u' ', + u'before': u' ', + u'characterOffsetBegin': 24, + u'characterOffsetEnd': 26, + u'index': 6, + u'lemma': u'a', + u'originalText': u'an', + u'pos': u'DT', + u'word': u'an', + }, + { + u'after': u' ', + u'before': u' ', + u'characterOffsetBegin': 27, + u'characterOffsetEnd': 34, + u'index': 7, + u'lemma': u'unladen', + u'originalText': u'unladen', + u'pos': u'JJ', + u'word': u'unladen', + }, + { + u'after': u' ', + u'before': u' ', + u'characterOffsetBegin': 35, + u'characterOffsetEnd': 42, + u'index': 8, + u'lemma': u'swallow', + u'originalText': u'swallow', + u'pos': u'VB', + u'word': u'swallow', + }, + { + u'after': u'', + u'before': u' ', + u'characterOffsetBegin': 43, + u'characterOffsetEnd': 44, + u'index': 9, + u'lemma': u'?', + u'originalText': u'?', + u'pos': u'.', + u'word': u'?', + }, + ], + } + ] + } + corenlp_tagger.api_call = MagicMock(return_value=api_return_value) + + input_tokens = 'What is the airspeed of an unladen swallow ?'.split() + expected_output = [ + ('What', 'WP'), + ('is', 'VBZ'), + ('the', 'DT'), + ('airspeed', 'NN'), + ('of', 'IN'), + ('an', 'DT'), + ('unladen', 'JJ'), + ('swallow', 'VB'), + ('?', '.'), + ] + tagged_output = corenlp_tagger.tag(input_tokens) + + corenlp_tagger.api_call.assert_called_once_with( + 'What is the airspeed of an unladen swallow ?', + properties={ + 'ssplit.isOneSentence': 'true', + 'annotators': 'tokenize,ssplit,pos', + }, + ) + self.assertEqual(expected_output, tagged_output) + + def test_ner_tagger(self): + corenlp_tagger = corenlp.CoreNLPParser(tagtype='ner') + + api_return_value = { + 'sentences': [ + { + 'index': 0, + 'tokens': [ + { + 'after': ' ', + 'before': '', + 'characterOffsetBegin': 0, + 'characterOffsetEnd': 4, + 'index': 1, + 'lemma': 'Rami', + 'ner': 'PERSON', + 'originalText': 'Rami', + 'pos': 'NNP', + 'word': 'Rami', + }, + { + 'after': ' ', + 'before': ' ', + 'characterOffsetBegin': 5, + 'characterOffsetEnd': 8, + 'index': 2, + 'lemma': 'Eid', + 'ner': 'PERSON', + 'originalText': 'Eid', + 'pos': 'NNP', + 'word': 'Eid', + }, + { + 'after': ' ', + 'before': ' ', + 'characterOffsetBegin': 9, + 'characterOffsetEnd': 11, + 'index': 3, + 'lemma': 'be', + 'ner': 'O', + 'originalText': 'is', + 'pos': 'VBZ', + 'word': 'is', + }, + { + 'after': ' ', + 'before': ' ', + 'characterOffsetBegin': 12, + 'characterOffsetEnd': 20, + 'index': 4, + 'lemma': 'study', + 'ner': 'O', + 'originalText': 'studying', + 'pos': 'VBG', + 'word': 'studying', + }, + { + 'after': ' ', + 'before': ' ', + 'characterOffsetBegin': 21, + 'characterOffsetEnd': 23, + 'index': 5, + 'lemma': 'at', + 'ner': 'O', + 'originalText': 'at', + 'pos': 'IN', + 'word': 'at', + }, + { + 'after': ' ', + 'before': ' ', + 'characterOffsetBegin': 24, + 'characterOffsetEnd': 29, + 'index': 6, + 'lemma': 'Stony', + 'ner': 'ORGANIZATION', + 'originalText': 'Stony', + 'pos': 'NNP', + 'word': 'Stony', + }, + { + 'after': ' ', + 'before': ' ', + 'characterOffsetBegin': 30, + 'characterOffsetEnd': 35, + 'index': 7, + 'lemma': 'Brook', + 'ner': 'ORGANIZATION', + 'originalText': 'Brook', + 'pos': 'NNP', + 'word': 'Brook', + }, + { + 'after': ' ', + 'before': ' ', + 'characterOffsetBegin': 36, + 'characterOffsetEnd': 46, + 'index': 8, + 'lemma': 'University', + 'ner': 'ORGANIZATION', + 'originalText': 'University', + 'pos': 'NNP', + 'word': 'University', + }, + { + 'after': ' ', + 'before': ' ', + 'characterOffsetBegin': 47, + 'characterOffsetEnd': 49, + 'index': 9, + 'lemma': 'in', + 'ner': 'O', + 'originalText': 'in', + 'pos': 'IN', + 'word': 'in', + }, + { + 'after': '', + 'before': ' ', + 'characterOffsetBegin': 50, + 'characterOffsetEnd': 52, + 'index': 10, + 'lemma': 'NY', + 'ner': 'O', + 'originalText': 'NY', + 'pos': 'NNP', + 'word': 'NY', + }, + ], + } + ] + } + + corenlp_tagger.api_call = MagicMock(return_value=api_return_value) + + input_tokens = 'Rami Eid is studying at Stony Brook University in NY'.split() + expected_output = [ + ('Rami', 'PERSON'), + ('Eid', 'PERSON'), + ('is', 'O'), + ('studying', 'O'), + ('at', 'O'), + ('Stony', 'ORGANIZATION'), + ('Brook', 'ORGANIZATION'), + ('University', 'ORGANIZATION'), + ('in', 'O'), + ('NY', 'O'), + ] + tagged_output = corenlp_tagger.tag(input_tokens) + + corenlp_tagger.api_call.assert_called_once_with( + 'Rami Eid is studying at Stony Brook University in NY', + properties={ + 'ssplit.isOneSentence': 'true', + 'annotators': 'tokenize,ssplit,ner', + }, + ) + self.assertEqual(expected_output, tagged_output) + + def test_unexpected_tagtype(self): + with self.assertRaises(ValueError): + corenlp_tagger = corenlp.CoreNLPParser(tagtype='test') + + +class TestParserAPI(TestCase): + def test_parse(self): + corenlp_parser = corenlp.CoreNLPParser() + + api_return_value = { + 'sentences': [ + { + 'basicDependencies': [ + { + 'dep': 'ROOT', + 'dependent': 4, + 'dependentGloss': 'fox', + 'governor': 0, + 'governorGloss': 'ROOT', + }, + { + 'dep': 'det', + 'dependent': 1, + 'dependentGloss': 'The', + 'governor': 4, + 'governorGloss': 'fox', + }, + { + 'dep': 'amod', + 'dependent': 2, + 'dependentGloss': 'quick', + 'governor': 4, + 'governorGloss': 'fox', + }, + { + 'dep': 'amod', + 'dependent': 3, + 'dependentGloss': 'brown', + 'governor': 4, + 'governorGloss': 'fox', + }, + { + 'dep': 'dep', + 'dependent': 5, + 'dependentGloss': 'jumps', + 'governor': 4, + 'governorGloss': 'fox', + }, + { + 'dep': 'case', + 'dependent': 6, + 'dependentGloss': 'over', + 'governor': 9, + 'governorGloss': 'dog', + }, + { + 'dep': 'det', + 'dependent': 7, + 'dependentGloss': 'the', + 'governor': 9, + 'governorGloss': 'dog', + }, + { + 'dep': 'amod', + 'dependent': 8, + 'dependentGloss': 'lazy', + 'governor': 9, + 'governorGloss': 'dog', + }, + { + 'dep': 'nmod', + 'dependent': 9, + 'dependentGloss': 'dog', + 'governor': 5, + 'governorGloss': 'jumps', + }, + ], + 'enhancedDependencies': [ + { + 'dep': 'ROOT', + 'dependent': 4, + 'dependentGloss': 'fox', + 'governor': 0, + 'governorGloss': 'ROOT', + }, + { + 'dep': 'det', + 'dependent': 1, + 'dependentGloss': 'The', + 'governor': 4, + 'governorGloss': 'fox', + }, + { + 'dep': 'amod', + 'dependent': 2, + 'dependentGloss': 'quick', + 'governor': 4, + 'governorGloss': 'fox', + }, + { + 'dep': 'amod', + 'dependent': 3, + 'dependentGloss': 'brown', + 'governor': 4, + 'governorGloss': 'fox', + }, + { + 'dep': 'dep', + 'dependent': 5, + 'dependentGloss': 'jumps', + 'governor': 4, + 'governorGloss': 'fox', + }, + { + 'dep': 'case', + 'dependent': 6, + 'dependentGloss': 'over', + 'governor': 9, + 'governorGloss': 'dog', + }, + { + 'dep': 'det', + 'dependent': 7, + 'dependentGloss': 'the', + 'governor': 9, + 'governorGloss': 'dog', + }, + { + 'dep': 'amod', + 'dependent': 8, + 'dependentGloss': 'lazy', + 'governor': 9, + 'governorGloss': 'dog', + }, + { + 'dep': 'nmod:over', + 'dependent': 9, + 'dependentGloss': 'dog', + 'governor': 5, + 'governorGloss': 'jumps', + }, + ], + 'enhancedPlusPlusDependencies': [ + { + 'dep': 'ROOT', + 'dependent': 4, + 'dependentGloss': 'fox', + 'governor': 0, + 'governorGloss': 'ROOT', + }, + { + 'dep': 'det', + 'dependent': 1, + 'dependentGloss': 'The', + 'governor': 4, + 'governorGloss': 'fox', + }, + { + 'dep': 'amod', + 'dependent': 2, + 'dependentGloss': 'quick', + 'governor': 4, + 'governorGloss': 'fox', + }, + { + 'dep': 'amod', + 'dependent': 3, + 'dependentGloss': 'brown', + 'governor': 4, + 'governorGloss': 'fox', + }, + { + 'dep': 'dep', + 'dependent': 5, + 'dependentGloss': 'jumps', + 'governor': 4, + 'governorGloss': 'fox', + }, + { + 'dep': 'case', + 'dependent': 6, + 'dependentGloss': 'over', + 'governor': 9, + 'governorGloss': 'dog', + }, + { + 'dep': 'det', + 'dependent': 7, + 'dependentGloss': 'the', + 'governor': 9, + 'governorGloss': 'dog', + }, + { + 'dep': 'amod', + 'dependent': 8, + 'dependentGloss': 'lazy', + 'governor': 9, + 'governorGloss': 'dog', + }, + { + 'dep': 'nmod:over', + 'dependent': 9, + 'dependentGloss': 'dog', + 'governor': 5, + 'governorGloss': 'jumps', + }, + ], + 'index': 0, + 'parse': '(ROOT\n (NP\n (NP (DT The) (JJ quick) (JJ brown) (NN fox))\n (NP\n (NP (NNS jumps))\n (PP (IN over)\n (NP (DT the) (JJ lazy) (NN dog))))))', + 'tokens': [ + { + 'after': ' ', + 'before': '', + 'characterOffsetBegin': 0, + 'characterOffsetEnd': 3, + 'index': 1, + 'lemma': 'the', + 'originalText': 'The', + 'pos': 'DT', + 'word': 'The', + }, + { + 'after': ' ', + 'before': ' ', + 'characterOffsetBegin': 4, + 'characterOffsetEnd': 9, + 'index': 2, + 'lemma': 'quick', + 'originalText': 'quick', + 'pos': 'JJ', + 'word': 'quick', + }, + { + 'after': ' ', + 'before': ' ', + 'characterOffsetBegin': 10, + 'characterOffsetEnd': 15, + 'index': 3, + 'lemma': 'brown', + 'originalText': 'brown', + 'pos': 'JJ', + 'word': 'brown', + }, + { + 'after': ' ', + 'before': ' ', + 'characterOffsetBegin': 16, + 'characterOffsetEnd': 19, + 'index': 4, + 'lemma': 'fox', + 'originalText': 'fox', + 'pos': 'NN', + 'word': 'fox', + }, + { + 'after': ' ', + 'before': ' ', + 'characterOffsetBegin': 20, + 'characterOffsetEnd': 25, + 'index': 5, + 'lemma': 'jump', + 'originalText': 'jumps', + 'pos': 'VBZ', + 'word': 'jumps', + }, + { + 'after': ' ', + 'before': ' ', + 'characterOffsetBegin': 26, + 'characterOffsetEnd': 30, + 'index': 6, + 'lemma': 'over', + 'originalText': 'over', + 'pos': 'IN', + 'word': 'over', + }, + { + 'after': ' ', + 'before': ' ', + 'characterOffsetBegin': 31, + 'characterOffsetEnd': 34, + 'index': 7, + 'lemma': 'the', + 'originalText': 'the', + 'pos': 'DT', + 'word': 'the', + }, + { + 'after': ' ', + 'before': ' ', + 'characterOffsetBegin': 35, + 'characterOffsetEnd': 39, + 'index': 8, + 'lemma': 'lazy', + 'originalText': 'lazy', + 'pos': 'JJ', + 'word': 'lazy', + }, + { + 'after': '', + 'before': ' ', + 'characterOffsetBegin': 40, + 'characterOffsetEnd': 43, + 'index': 9, + 'lemma': 'dog', + 'originalText': 'dog', + 'pos': 'NN', + 'word': 'dog', + }, + ], + } + ] + } + + corenlp_parser.api_call = MagicMock(return_value=api_return_value) + + input_string = "The quick brown fox jumps over the lazy dog".split() + expected_output = Tree( + 'ROOT', + [ + Tree( + 'NP', + [ + Tree( + 'NP', + [ + Tree('DT', ['The']), + Tree('JJ', ['quick']), + Tree('JJ', ['brown']), + Tree('NN', ['fox']), + ], + ), + Tree( + 'NP', + [ + Tree('NP', [Tree('NNS', ['jumps'])]), + Tree( + 'PP', + [ + Tree('IN', ['over']), + Tree( + 'NP', + [ + Tree('DT', ['the']), + Tree('JJ', ['lazy']), + Tree('NN', ['dog']), + ], + ), + ], + ), + ], + ), + ], + ) + ], + ) + + parsed_data = next(corenlp_parser.parse(input_string)) + + corenlp_parser.api_call.assert_called_once_with( + "The quick brown fox jumps over the lazy dog", + properties={'ssplit.eolonly': 'true'}, + ) + self.assertEqual(expected_output, parsed_data) + + def test_dependency_parser(self): + corenlp_parser = corenlp.CoreNLPDependencyParser() + + api_return_value = { + 'sentences': [ + { + 'basicDependencies': [ + { + 'dep': 'ROOT', + 'dependent': 5, + 'dependentGloss': 'jumps', + 'governor': 0, + 'governorGloss': 'ROOT', + }, + { + 'dep': 'det', + 'dependent': 1, + 'dependentGloss': 'The', + 'governor': 4, + 'governorGloss': 'fox', + }, + { + 'dep': 'amod', + 'dependent': 2, + 'dependentGloss': 'quick', + 'governor': 4, + 'governorGloss': 'fox', + }, + { + 'dep': 'amod', + 'dependent': 3, + 'dependentGloss': 'brown', + 'governor': 4, + 'governorGloss': 'fox', + }, + { + 'dep': 'nsubj', + 'dependent': 4, + 'dependentGloss': 'fox', + 'governor': 5, + 'governorGloss': 'jumps', + }, + { + 'dep': 'case', + 'dependent': 6, + 'dependentGloss': 'over', + 'governor': 9, + 'governorGloss': 'dog', + }, + { + 'dep': 'det', + 'dependent': 7, + 'dependentGloss': 'the', + 'governor': 9, + 'governorGloss': 'dog', + }, + { + 'dep': 'amod', + 'dependent': 8, + 'dependentGloss': 'lazy', + 'governor': 9, + 'governorGloss': 'dog', + }, + { + 'dep': 'nmod', + 'dependent': 9, + 'dependentGloss': 'dog', + 'governor': 5, + 'governorGloss': 'jumps', + }, + ], + 'enhancedDependencies': [ + { + 'dep': 'ROOT', + 'dependent': 5, + 'dependentGloss': 'jumps', + 'governor': 0, + 'governorGloss': 'ROOT', + }, + { + 'dep': 'det', + 'dependent': 1, + 'dependentGloss': 'The', + 'governor': 4, + 'governorGloss': 'fox', + }, + { + 'dep': 'amod', + 'dependent': 2, + 'dependentGloss': 'quick', + 'governor': 4, + 'governorGloss': 'fox', + }, + { + 'dep': 'amod', + 'dependent': 3, + 'dependentGloss': 'brown', + 'governor': 4, + 'governorGloss': 'fox', + }, + { + 'dep': 'nsubj', + 'dependent': 4, + 'dependentGloss': 'fox', + 'governor': 5, + 'governorGloss': 'jumps', + }, + { + 'dep': 'case', + 'dependent': 6, + 'dependentGloss': 'over', + 'governor': 9, + 'governorGloss': 'dog', + }, + { + 'dep': 'det', + 'dependent': 7, + 'dependentGloss': 'the', + 'governor': 9, + 'governorGloss': 'dog', + }, + { + 'dep': 'amod', + 'dependent': 8, + 'dependentGloss': 'lazy', + 'governor': 9, + 'governorGloss': 'dog', + }, + { + 'dep': 'nmod:over', + 'dependent': 9, + 'dependentGloss': 'dog', + 'governor': 5, + 'governorGloss': 'jumps', + }, + ], + 'enhancedPlusPlusDependencies': [ + { + 'dep': 'ROOT', + 'dependent': 5, + 'dependentGloss': 'jumps', + 'governor': 0, + 'governorGloss': 'ROOT', + }, + { + 'dep': 'det', + 'dependent': 1, + 'dependentGloss': 'The', + 'governor': 4, + 'governorGloss': 'fox', + }, + { + 'dep': 'amod', + 'dependent': 2, + 'dependentGloss': 'quick', + 'governor': 4, + 'governorGloss': 'fox', + }, + { + 'dep': 'amod', + 'dependent': 3, + 'dependentGloss': 'brown', + 'governor': 4, + 'governorGloss': 'fox', + }, + { + 'dep': 'nsubj', + 'dependent': 4, + 'dependentGloss': 'fox', + 'governor': 5, + 'governorGloss': 'jumps', + }, + { + 'dep': 'case', + 'dependent': 6, + 'dependentGloss': 'over', + 'governor': 9, + 'governorGloss': 'dog', + }, + { + 'dep': 'det', + 'dependent': 7, + 'dependentGloss': 'the', + 'governor': 9, + 'governorGloss': 'dog', + }, + { + 'dep': 'amod', + 'dependent': 8, + 'dependentGloss': 'lazy', + 'governor': 9, + 'governorGloss': 'dog', + }, + { + 'dep': 'nmod:over', + 'dependent': 9, + 'dependentGloss': 'dog', + 'governor': 5, + 'governorGloss': 'jumps', + }, + ], + 'index': 0, + 'tokens': [ + { + 'after': ' ', + 'before': '', + 'characterOffsetBegin': 0, + 'characterOffsetEnd': 3, + 'index': 1, + 'lemma': 'the', + 'originalText': 'The', + 'pos': 'DT', + 'word': 'The', + }, + { + 'after': ' ', + 'before': ' ', + 'characterOffsetBegin': 4, + 'characterOffsetEnd': 9, + 'index': 2, + 'lemma': 'quick', + 'originalText': 'quick', + 'pos': 'JJ', + 'word': 'quick', + }, + { + 'after': ' ', + 'before': ' ', + 'characterOffsetBegin': 10, + 'characterOffsetEnd': 15, + 'index': 3, + 'lemma': 'brown', + 'originalText': 'brown', + 'pos': 'JJ', + 'word': 'brown', + }, + { + 'after': ' ', + 'before': ' ', + 'characterOffsetBegin': 16, + 'characterOffsetEnd': 19, + 'index': 4, + 'lemma': 'fox', + 'originalText': 'fox', + 'pos': 'NN', + 'word': 'fox', + }, + { + 'after': ' ', + 'before': ' ', + 'characterOffsetBegin': 20, + 'characterOffsetEnd': 25, + 'index': 5, + 'lemma': 'jump', + 'originalText': 'jumps', + 'pos': 'VBZ', + 'word': 'jumps', + }, + { + 'after': ' ', + 'before': ' ', + 'characterOffsetBegin': 26, + 'characterOffsetEnd': 30, + 'index': 6, + 'lemma': 'over', + 'originalText': 'over', + 'pos': 'IN', + 'word': 'over', + }, + { + 'after': ' ', + 'before': ' ', + 'characterOffsetBegin': 31, + 'characterOffsetEnd': 34, + 'index': 7, + 'lemma': 'the', + 'originalText': 'the', + 'pos': 'DT', + 'word': 'the', + }, + { + 'after': ' ', + 'before': ' ', + 'characterOffsetBegin': 35, + 'characterOffsetEnd': 39, + 'index': 8, + 'lemma': 'lazy', + 'originalText': 'lazy', + 'pos': 'JJ', + 'word': 'lazy', + }, + { + 'after': '', + 'before': ' ', + 'characterOffsetBegin': 40, + 'characterOffsetEnd': 43, + 'index': 9, + 'lemma': 'dog', + 'originalText': 'dog', + 'pos': 'NN', + 'word': 'dog', + }, + ], + } + ] + } + + corenlp_parser.api_call = MagicMock(return_value=api_return_value) + + input_string = "The quick brown fox jumps over the lazy dog".split() + expected_output = Tree( + 'jumps', + [ + Tree('fox', ['The', 'quick', 'brown']), + Tree('dog', ['over', 'the', 'lazy']), + ], + ) + + parsed_data = next(corenlp_parser.parse(input_string)) + + corenlp_parser.api_call.assert_called_once_with( + "The quick brown fox jumps over the lazy dog", + properties={'ssplit.eolonly': 'true'}, + ) + self.assertEqual(expected_output, parsed_data.tree()) diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/unit/test_corpora.py b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/test_corpora.py new file mode 100644 index 0000000..bce083b --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/test_corpora.py @@ -0,0 +1,272 @@ +# -*- coding: utf-8 -*- +from __future__ import absolute_import, unicode_literals +import unittest + +from nltk.corpus import ( + sinica_treebank, + conll2007, + indian, + cess_cat, + cess_esp, + floresta, + ptb, + udhr, +) # mwa_ppdb + +from nltk.compat import python_2_unicode_compatible +from nltk.tree import Tree +from nltk.test.unit.utils import skipIf + + +class TestUdhr(unittest.TestCase): + def test_words(self): + for name in udhr.fileids(): + try: + words = list(udhr.words(name)) + except AssertionError: + print(name) + raise + self.assertTrue(words) + + def test_raw_unicode(self): + for name in udhr.fileids(): + txt = udhr.raw(name) + assert not isinstance(txt, bytes), name + + +class TestIndian(unittest.TestCase): + def test_words(self): + words = indian.words()[:3] + self.assertEqual(words, ['মহিষের', 'সন্তান', ':']) + + def test_tagged_words(self): + tagged_words = indian.tagged_words()[:3] + self.assertEqual( + tagged_words, [('মহিষের', 'NN'), ('সন্তান', 'NN'), (':', 'SYM')] + ) + + +class TestCess(unittest.TestCase): + def test_catalan(self): + words = cess_cat.words()[:15] + txt = "El Tribunal_Suprem -Fpa- TS -Fpt- ha confirmat la condemna a quatre anys d' inhabilitació especial" + self.assertEqual(words, txt.split()) + self.assertEqual(cess_cat.tagged_sents()[0][34][0], "càrrecs") + + def test_esp(self): + words = cess_esp.words()[:15] + txt = "El grupo estatal Electricité_de_France -Fpa- EDF -Fpt- anunció hoy , jueves , la compra del" + self.assertEqual(words, txt.split()) + self.assertEqual(cess_esp.words()[115], "años") + + +class TestFloresta(unittest.TestCase): + def test_words(self): + words = floresta.words()[:10] + txt = "Um revivalismo refrescante O 7_e_Meio é um ex-libris de a" + self.assertEqual(words, txt.split()) + + +class TestSinicaTreebank(unittest.TestCase): + def test_sents(self): + first_3_sents = sinica_treebank.sents()[:3] + self.assertEqual( + first_3_sents, [['一'], ['友情'], ['嘉珍', '和', '我', '住在', '同一條', '巷子']] + ) + + def test_parsed_sents(self): + parsed_sents = sinica_treebank.parsed_sents()[25] + self.assertEqual( + parsed_sents, + Tree( + 'S', + [ + Tree('NP', [Tree('Nba', ['嘉珍'])]), + Tree('V‧地', [Tree('VA11', ['不停']), Tree('DE', ['的'])]), + Tree('VA4', ['哭泣']), + ], + ), + ) + + +class TestCoNLL2007(unittest.TestCase): + # Reading the CoNLL 2007 Dependency Treebanks + + def test_sents(self): + sents = conll2007.sents('esp.train')[0] + self.assertEqual( + sents[:6], ['El', 'aumento', 'del', 'índice', 'de', 'desempleo'] + ) + + def test_parsed_sents(self): + + parsed_sents = conll2007.parsed_sents('esp.train')[0] + + self.assertEqual( + parsed_sents.tree(), + Tree( + 'fortaleció', + [ + Tree( + 'aumento', + [ + 'El', + Tree( + 'del', + [ + Tree( + 'índice', + [ + Tree( + 'de', + [Tree('desempleo', ['estadounidense'])], + ) + ], + ) + ], + ), + ], + ), + 'hoy', + 'considerablemente', + Tree( + 'al', + [ + Tree( + 'euro', + [ + Tree( + 'cotizaba', + [ + ',', + 'que', + Tree('a', [Tree('15.35', ['las', 'GMT'])]), + 'se', + Tree( + 'en', + [ + Tree( + 'mercado', + [ + 'el', + Tree('de', ['divisas']), + Tree('de', ['Fráncfort']), + ], + ) + ], + ), + Tree('a', ['0,9452_dólares']), + Tree( + 'frente_a', + [ + ',', + Tree( + '0,9349_dólares', + [ + 'los', + Tree( + 'de', + [ + Tree( + 'mañana', + ['esta'], + ) + ], + ), + ], + ), + ], + ), + ], + ) + ], + ) + ], + ), + '.', + ], + ), + ) + + +@skipIf(not ptb.fileids(), "A full installation of the Penn Treebank is not available") +class TestPTB(unittest.TestCase): + def test_fileids(self): + self.assertEqual( + ptb.fileids()[:4], + [ + 'BROWN/CF/CF01.MRG', + 'BROWN/CF/CF02.MRG', + 'BROWN/CF/CF03.MRG', + 'BROWN/CF/CF04.MRG', + ], + ) + + def test_words(self): + self.assertEqual( + ptb.words('WSJ/00/WSJ_0003.MRG')[:7], + ['A', 'form', 'of', 'asbestos', 'once', 'used', '*'], + ) + + def test_tagged_words(self): + self.assertEqual( + ptb.tagged_words('WSJ/00/WSJ_0003.MRG')[:3], + [('A', 'DT'), ('form', 'NN'), ('of', 'IN')], + ) + + def test_categories(self): + self.assertEqual( + ptb.categories(), + [ + 'adventure', + 'belles_lettres', + 'fiction', + 'humor', + 'lore', + 'mystery', + 'news', + 'romance', + 'science_fiction', + ], + ) + + def test_news_fileids(self): + self.assertEqual( + ptb.fileids('news')[:3], + ['WSJ/00/WSJ_0001.MRG', 'WSJ/00/WSJ_0002.MRG', 'WSJ/00/WSJ_0003.MRG'], + ) + + def test_category_words(self): + self.assertEqual( + ptb.words(categories=['humor', 'fiction'])[:6], + ['Thirty-three', 'Scotty', 'did', 'not', 'go', 'back'], + ) + + +@unittest.skip("Skipping test for mwa_ppdb.") +class TestMWAPPDB(unittest.TestCase): + def test_fileids(self): + self.assertEqual( + mwa_ppdb.fileids(), ['ppdb-1.0-xxxl-lexical.extended.synonyms.uniquepairs'] + ) + + def test_entries(self): + self.assertEqual( + mwa_ppdb.entries()[:10], + [ + ('10/17/01', '17/10/2001'), + ('102,70', '102.70'), + ('13,53', '13.53'), + ('3.2.5.3.2.1', '3.2.5.3.2.1.'), + ('53,76', '53.76'), + ('6.9.5', '6.9.5.'), + ('7.7.6.3', '7.7.6.3.'), + ('76,20', '76.20'), + ('79,85', '79.85'), + ('93,65', '93.65'), + ], + ) + + +# unload corpora +from nltk.corpus import teardown_module diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/unit/test_corpus_views.py b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/test_corpus_views.py new file mode 100644 index 0000000..222385a --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/test_corpus_views.py @@ -0,0 +1,49 @@ +# -*- coding: utf-8 -*- +""" +Corpus View Regression Tests +""" +from __future__ import absolute_import, unicode_literals +import unittest +import nltk.data +from nltk.corpus.reader.util import ( + StreamBackedCorpusView, + read_whitespace_block, + read_line_block, +) + + +class TestCorpusViews(unittest.TestCase): + + linetok = nltk.LineTokenizer(blanklines='keep') + names = [ + 'corpora/inaugural/README', # A very short file (160 chars) + 'corpora/inaugural/1793-Washington.txt', # A relatively short file (791 chars) + 'corpora/inaugural/1909-Taft.txt', # A longer file (32k chars) + ] + + def data(self): + for name in self.names: + f = nltk.data.find(name) + with f.open() as fp: + file_data = fp.read().decode('utf8') + yield f, file_data + + def test_correct_values(self): + # Check that corpus views produce the correct sequence of values. + + for f, file_data in self.data(): + v = StreamBackedCorpusView(f, read_whitespace_block) + self.assertEqual(list(v), file_data.split()) + + v = StreamBackedCorpusView(f, read_line_block) + self.assertEqual(list(v), self.linetok.tokenize(file_data)) + + def test_correct_length(self): + # Check that the corpus views report the correct lengths: + + for f, file_data in self.data(): + v = StreamBackedCorpusView(f, read_whitespace_block) + self.assertEqual(len(v), len(file_data.split())) + + v = StreamBackedCorpusView(f, read_line_block) + self.assertEqual(len(v), len(self.linetok.tokenize(file_data))) diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/unit/test_data.py b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/test_data.py new file mode 100644 index 0000000..b586155 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/test_data.py @@ -0,0 +1,22 @@ +import unittest +import nltk.data +from nose.tools import assert_raises + + +class TestData(unittest.TestCase): + def test_find_raises_exception(self): + + with assert_raises(LookupError) as context: + nltk.data.find('no_such_resource/foo') + + assert type(context.exception) == LookupError, 'Unexpected exception raised' + + def test_find_raises_exception_with_full_resource_name(self): + no_such_thing = 'no_such_thing/bar' + + with assert_raises(LookupError) as context: + nltk.data.find(no_such_thing) + + assert no_such_thing in str( + context.exception + ), 'Exception message does not include full resource name' diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/unit/test_disagreement.py b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/test_disagreement.py new file mode 100644 index 0000000..3054868 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/test_disagreement.py @@ -0,0 +1,142 @@ +# -*- coding: utf-8 -*- +from __future__ import absolute_import, unicode_literals +import unittest + +from nltk.metrics.agreement import AnnotationTask + +class TestDisagreement(unittest.TestCase): + + ''' + Class containing unit tests for nltk.metrics.agreement.Disagreement. + ''' + + def test_easy(self): + ''' + Simple test, based on + https://github.com/foolswood/krippendorffs_alpha/raw/master/krippendorff.pdf. + ''' + data = [('coder1', 'dress1', 'YES'), + ('coder2', 'dress1', 'NO'), + ('coder3', 'dress1', 'NO'), + ('coder1', 'dress2', 'YES'), + ('coder2', 'dress2', 'NO'), + ('coder3', 'dress3', 'NO'), + ] + annotation_task = AnnotationTask(data) + self.assertAlmostEqual(annotation_task.alpha(), -0.3333333) + + def test_easy2(self): + ''' + Same simple test with 1 rating removed. + Removal of that rating should not matter: K-Apha ignores items with + only 1 rating. + ''' + data = [('coder1', 'dress1', 'YES'), + ('coder2', 'dress1', 'NO'), + ('coder3', 'dress1', 'NO'), + ('coder1', 'dress2', 'YES'), + ('coder2', 'dress2', 'NO'), + ] + annotation_task = AnnotationTask(data) + self.assertAlmostEqual(annotation_task.alpha(), -0.3333333) + + def test_advanced(self): + ''' + More advanced test, based on + http://www.agreestat.com/research_papers/onkrippendorffalpha.pdf + ''' + data = [('A', '1', '1'), + ('B', '1', '1'), + ('D', '1', '1'), + ('A', '2', '2'), + ('B', '2', '2'), + ('C', '2', '3'), + ('D', '2', '2'), + ('A', '3', '3'), + ('B', '3', '3'), + ('C', '3', '3'), + ('D', '3', '3'), + ('A', '4', '3'), + ('B', '4', '3'), + ('C', '4', '3'), + ('D', '4', '3'), + ('A', '5', '2'), + ('B', '5', '2'), + ('C', '5', '2'), + ('D', '5', '2'), + ('A', '6', '1'), + ('B', '6', '2'), + ('C', '6', '3'), + ('D', '6', '4'), + ('A', '7', '4'), + ('B', '7', '4'), + ('C', '7', '4'), + ('D', '7', '4'), + ('A', '8', '1'), + ('B', '8', '1'), + ('C', '8', '2'), + ('D', '8', '1'), + ('A', '9', '2'), + ('B', '9', '2'), + ('C', '9', '2'), + ('D', '9', '2'), + ('B', '10', '5'), + ('C', '10', '5'), + ('D', '10', '5'), + ('C', '11', '1'), + ('D', '11', '1'), + ('C', '12', '3'), + ] + annotation_task = AnnotationTask(data) + self.assertAlmostEqual(annotation_task.alpha(), 0.743421052632) + + def test_advanced2(self): + ''' + Same more advanced example, but with 1 rating removed. + Again, removal of that 1 rating shoudl not matter. + ''' + data = [('A', '1', '1'), + ('B', '1', '1'), + ('D', '1', '1'), + ('A', '2', '2'), + ('B', '2', '2'), + ('C', '2', '3'), + ('D', '2', '2'), + ('A', '3', '3'), + ('B', '3', '3'), + ('C', '3', '3'), + ('D', '3', '3'), + ('A', '4', '3'), + ('B', '4', '3'), + ('C', '4', '3'), + ('D', '4', '3'), + ('A', '5', '2'), + ('B', '5', '2'), + ('C', '5', '2'), + ('D', '5', '2'), + ('A', '6', '1'), + ('B', '6', '2'), + ('C', '6', '3'), + ('D', '6', '4'), + ('A', '7', '4'), + ('B', '7', '4'), + ('C', '7', '4'), + ('D', '7', '4'), + ('A', '8', '1'), + ('B', '8', '1'), + ('C', '8', '2'), + ('D', '8', '1'), + ('A', '9', '2'), + ('B', '9', '2'), + ('C', '9', '2'), + ('D', '9', '2'), + ('B', '10', '5'), + ('C', '10', '5'), + ('D', '10', '5'), + ('C', '11', '1'), + ('D', '11', '1'), + ('C', '12', '3'), + ] + annotation_task = AnnotationTask(data) + self.assertAlmostEqual(annotation_task.alpha(), 0.743421052632) + diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/unit/test_hmm.py b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/test_hmm.py new file mode 100644 index 0000000..d211bc2 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/test_hmm.py @@ -0,0 +1,87 @@ +# -*- coding: utf-8 -*- +from __future__ import absolute_import, unicode_literals +from nltk.tag import hmm + + +def _wikipedia_example_hmm(): + # Example from wikipedia + # (http://en.wikipedia.org/wiki/Forward%E2%80%93backward_algorithm) + + states = ['rain', 'no rain'] + symbols = ['umbrella', 'no umbrella'] + + A = [[0.7, 0.3], [0.3, 0.7]] # transition probabilities + B = [[0.9, 0.1], [0.2, 0.8]] # emission probabilities + pi = [0.5, 0.5] # initial probabilities + + seq = ['umbrella', 'umbrella', 'no umbrella', 'umbrella', 'umbrella'] + seq = list(zip(seq, [None] * len(seq))) + + model = hmm._create_hmm_tagger(states, symbols, A, B, pi) + return model, states, symbols, seq + + +def test_forward_probability(): + from numpy.testing import assert_array_almost_equal + + # example from p. 385, Huang et al + model, states, symbols = hmm._market_hmm_example() + seq = [('up', None), ('up', None)] + expected = [[0.35, 0.02, 0.09], [0.1792, 0.0085, 0.0357]] + + fp = 2 ** model._forward_probability(seq) + + assert_array_almost_equal(fp, expected) + + +def test_forward_probability2(): + from numpy.testing import assert_array_almost_equal + + model, states, symbols, seq = _wikipedia_example_hmm() + fp = 2 ** model._forward_probability(seq) + + # examples in wikipedia are normalized + fp = (fp.T / fp.sum(axis=1)).T + + wikipedia_results = [ + [0.8182, 0.1818], + [0.8834, 0.1166], + [0.1907, 0.8093], + [0.7308, 0.2692], + [0.8673, 0.1327], + ] + + assert_array_almost_equal(wikipedia_results, fp, 4) + + +def test_backward_probability(): + from numpy.testing import assert_array_almost_equal + + model, states, symbols, seq = _wikipedia_example_hmm() + + bp = 2 ** model._backward_probability(seq) + # examples in wikipedia are normalized + + bp = (bp.T / bp.sum(axis=1)).T + + wikipedia_results = [ + # Forward-backward algorithm doesn't need b0_5, + # so .backward_probability doesn't compute it. + # [0.6469, 0.3531], + [0.5923, 0.4077], + [0.3763, 0.6237], + [0.6533, 0.3467], + [0.6273, 0.3727], + [0.5, 0.5], + ] + + assert_array_almost_equal(wikipedia_results, bp, 4) + + +def setup_module(module): + from nose import SkipTest + + try: + import numpy + except ImportError: + raise SkipTest("numpy is required for nltk.test.test_hmm") diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/unit/test_json2csv_corpus.py b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/test_json2csv_corpus.py new file mode 100644 index 0000000..ac61a65 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/test_json2csv_corpus.py @@ -0,0 +1,237 @@ +# -*- coding: utf-8 -*- +# Natural Language Toolkit: Twitter client +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Lorenzo Rubio +# URL: +# For license information, see LICENSE.TXT + +""" +Regression tests for `json2csv()` and `json2csv_entities()` in Twitter +package. + +""" + +import os +import unittest + +from six.moves import zip + +from nltk.compat import TemporaryDirectory +from nltk.corpus import twitter_samples +from nltk.twitter.common import json2csv, json2csv_entities + + +def are_files_identical(filename1, filename2, debug=False): + """ + Compare two files, ignoring carriage returns. + """ + with open(filename1, "rb") as fileA: + with open(filename2, "rb") as fileB: + result = True + for lineA, lineB in zip( + sorted(fileA.readlines()), sorted(fileB.readlines()) + ): + if lineA.strip() != lineB.strip(): + if debug: + print( + "Error while comparing files. " + + "First difference at line below." + ) + print("=> Output file line: {0}".format(lineA)) + print("=> Refer. file line: {0}".format(lineB)) + result = False + break + return result + + +class TestJSON2CSV(unittest.TestCase): + def setUp(self): + with open(twitter_samples.abspath("tweets.20150430-223406.json")) as infile: + self.infile = [next(infile) for x in range(100)] + infile.close() + self.msg = "Test and reference files are not the same" + self.subdir = os.path.join(os.path.dirname(__file__), 'files') + + def tearDown(self): + return + + def test_textoutput(self): + ref_fn = os.path.join(self.subdir, 'tweets.20150430-223406.text.csv.ref') + with TemporaryDirectory() as tempdir: + outfn = os.path.join(tempdir, 'tweets.20150430-223406.text.csv') + json2csv(self.infile, outfn, ['text'], gzip_compress=False) + self.assertTrue(are_files_identical(outfn, ref_fn), msg=self.msg) + + def test_tweet_metadata(self): + ref_fn = os.path.join(self.subdir, 'tweets.20150430-223406.tweet.csv.ref') + fields = [ + 'created_at', + 'favorite_count', + 'id', + 'in_reply_to_status_id', + 'in_reply_to_user_id', + 'retweet_count', + 'retweeted', + 'text', + 'truncated', + 'user.id', + ] + + with TemporaryDirectory() as tempdir: + outfn = os.path.join(tempdir, 'tweets.20150430-223406.tweet.csv') + json2csv(self.infile, outfn, fields, gzip_compress=False) + self.assertTrue(are_files_identical(outfn, ref_fn), msg=self.msg) + + def test_user_metadata(self): + ref_fn = os.path.join(self.subdir, 'tweets.20150430-223406.user.csv.ref') + fields = ['id', 'text', 'user.id', 'user.followers_count', 'user.friends_count'] + + with TemporaryDirectory() as tempdir: + outfn = os.path.join(tempdir, 'tweets.20150430-223406.user.csv') + json2csv(self.infile, outfn, fields, gzip_compress=False) + self.assertTrue(are_files_identical(outfn, ref_fn), msg=self.msg) + + def test_tweet_hashtag(self): + ref_fn = os.path.join(self.subdir, 'tweets.20150430-223406.hashtag.csv.ref') + with TemporaryDirectory() as tempdir: + outfn = os.path.join(tempdir, 'tweets.20150430-223406.hashtag.csv') + json2csv_entities( + self.infile, + outfn, + ['id', 'text'], + 'hashtags', + ['text'], + gzip_compress=False, + ) + self.assertTrue(are_files_identical(outfn, ref_fn), msg=self.msg) + + def test_tweet_usermention(self): + ref_fn = os.path.join(self.subdir, 'tweets.20150430-223406.usermention.csv.ref') + with TemporaryDirectory() as tempdir: + outfn = os.path.join(tempdir, 'tweets.20150430-223406.usermention.csv') + json2csv_entities( + self.infile, + outfn, + ['id', 'text'], + 'user_mentions', + ['id', 'screen_name'], + gzip_compress=False, + ) + self.assertTrue(are_files_identical(outfn, ref_fn), msg=self.msg) + + def test_tweet_media(self): + ref_fn = os.path.join(self.subdir, 'tweets.20150430-223406.media.csv.ref') + with TemporaryDirectory() as tempdir: + outfn = os.path.join(tempdir, 'tweets.20150430-223406.media.csv') + json2csv_entities( + self.infile, + outfn, + ['id'], + 'media', + ['media_url', 'url'], + gzip_compress=False, + ) + + self.assertTrue(are_files_identical(outfn, ref_fn), msg=self.msg) + + def test_tweet_url(self): + ref_fn = os.path.join(self.subdir, 'tweets.20150430-223406.url.csv.ref') + with TemporaryDirectory() as tempdir: + outfn = os.path.join(tempdir, 'tweets.20150430-223406.url.csv') + json2csv_entities( + self.infile, + outfn, + ['id'], + 'urls', + ['url', 'expanded_url'], + gzip_compress=False, + ) + + self.assertTrue(are_files_identical(outfn, ref_fn), msg=self.msg) + + def test_userurl(self): + ref_fn = os.path.join(self.subdir, 'tweets.20150430-223406.userurl.csv.ref') + with TemporaryDirectory() as tempdir: + outfn = os.path.join(tempdir, 'tweets.20150430-223406.userurl.csv') + json2csv_entities( + self.infile, + outfn, + ['id', 'screen_name'], + 'user.urls', + ['url', 'expanded_url'], + gzip_compress=False, + ) + + self.assertTrue(are_files_identical(outfn, ref_fn), msg=self.msg) + + def test_tweet_place(self): + ref_fn = os.path.join(self.subdir, 'tweets.20150430-223406.place.csv.ref') + with TemporaryDirectory() as tempdir: + outfn = os.path.join(tempdir, 'tweets.20150430-223406.place.csv') + json2csv_entities( + self.infile, + outfn, + ['id', 'text'], + 'place', + ['name', 'country'], + gzip_compress=False, + ) + + self.assertTrue(are_files_identical(outfn, ref_fn), msg=self.msg) + + def test_tweet_place_boundingbox(self): + ref_fn = os.path.join( + self.subdir, 'tweets.20150430-223406.placeboundingbox.csv.ref' + ) + with TemporaryDirectory() as tempdir: + outfn = os.path.join(tempdir, 'tweets.20150430-223406.placeboundingbox.csv') + json2csv_entities( + self.infile, + outfn, + ['id', 'name'], + 'place.bounding_box', + ['coordinates'], + gzip_compress=False, + ) + + self.assertTrue(are_files_identical(outfn, ref_fn), msg=self.msg) + + def test_retweet_original_tweet(self): + ref_fn = os.path.join(self.subdir, 'tweets.20150430-223406.retweet.csv.ref') + with TemporaryDirectory() as tempdir: + outfn = os.path.join(tempdir, 'tweets.20150430-223406.retweet.csv') + json2csv_entities( + self.infile, + outfn, + ['id'], + 'retweeted_status', + [ + 'created_at', + 'favorite_count', + 'id', + 'in_reply_to_status_id', + 'in_reply_to_user_id', + 'retweet_count', + 'text', + 'truncated', + 'user.id', + ], + gzip_compress=False, + ) + + self.assertTrue(are_files_identical(outfn, ref_fn), msg=self.msg) + + def test_file_is_wrong(self): + """ + Sanity check that file comparison is not giving false positives. + """ + ref_fn = os.path.join(self.subdir, 'tweets.20150430-223406.retweet.csv.ref') + with TemporaryDirectory() as tempdir: + outfn = os.path.join(tempdir, 'tweets.20150430-223406.text.csv') + json2csv(self.infile, outfn, ['text'], gzip_compress=False) + self.assertFalse(are_files_identical(outfn, ref_fn), msg=self.msg) + + +if __name__ == "__main__": + unittest.main() diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/unit/test_naivebayes.py b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/test_naivebayes.py new file mode 100644 index 0000000..37e4411 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/test_naivebayes.py @@ -0,0 +1,24 @@ +# -*- coding: utf-8 -*- +from __future__ import print_function, unicode_literals + + +import unittest +from nltk.classify.naivebayes import NaiveBayesClassifier + + +class NaiveBayesClassifierTest(unittest.TestCase): + def test_simple(self): + training_features = [ + ({'nice': True, 'good': True}, 'positive'), + ({'bad': True, 'mean': True}, 'negative'), + ] + + classifier = NaiveBayesClassifier.train(training_features) + + result = classifier.prob_classify({'nice': True}) + self.assertTrue(result.prob('positive') > result.prob('negative')) + self.assertEqual(result.max(), 'positive') + + result = classifier.prob_classify({'bad': True}) + self.assertTrue(result.prob('positive') < result.prob('negative')) + self.assertEqual(result.max(), 'negative') diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/unit/test_nombank.py b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/test_nombank.py new file mode 100644 index 0000000..85e1591 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/test_nombank.py @@ -0,0 +1,28 @@ +# -*- coding: utf-8 -*- +""" +Unit tests for nltk.corpus.nombank +""" + +from __future__ import unicode_literals +import unittest + +from nltk.corpus import nombank +# Load the nombank once. +nombank.nouns() + +class NombankDemo(unittest.TestCase): + def test_numbers(self): + # No. of instances. + self.assertEqual(len(nombank.instances()), 114574) + # No. of rolesets + self.assertEqual(len(nombank.rolesets()), 5577) + # No. of nouns. + self.assertEqual(len(nombank.nouns()), 4704) + + + def test_instance(self): + self.assertEqual(nombank.instances()[0].roleset, 'perc-sign.01') + + def test_framefiles_fileids(self): + self.assertEqual(len(nombank.fileids()), 4705) + self.assertTrue(all(fileid.endswith('.xml') for fileid in nombank.fileids())) diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/unit/test_pos_tag.py b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/test_pos_tag.py new file mode 100644 index 0000000..a0aa1d0 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/test_pos_tag.py @@ -0,0 +1,85 @@ +# -*- coding: utf-8 -*- +""" +Tests for nltk.pos_tag +""" + +from __future__ import unicode_literals + +import unittest + +from nltk import word_tokenize, pos_tag + + +class TestPosTag(unittest.TestCase): + def test_pos_tag_eng(self): + text = "John's big idea isn't all that bad." + expected_tagged = [ + ('John', 'NNP'), + ("'s", 'POS'), + ('big', 'JJ'), + ('idea', 'NN'), + ('is', 'VBZ'), + ("n't", 'RB'), + ('all', 'PDT'), + ('that', 'DT'), + ('bad', 'JJ'), + ('.', '.'), + ] + assert pos_tag(word_tokenize(text)) == expected_tagged + + def test_pos_tag_eng_universal(self): + text = "John's big idea isn't all that bad." + expected_tagged = [ + ('John', 'NOUN'), + ("'s", 'PRT'), + ('big', 'ADJ'), + ('idea', 'NOUN'), + ('is', 'VERB'), + ("n't", 'ADV'), + ('all', 'DET'), + ('that', 'DET'), + ('bad', 'ADJ'), + ('.', '.'), + ] + assert pos_tag(word_tokenize(text), tagset='universal') == expected_tagged + + def test_pos_tag_rus(self): + text = u"Илья оторопел и дважды перечитал бумажку." + expected_tagged = [ + ('Илья', 'S'), + ('оторопел', 'V'), + ('и', 'CONJ'), + ('дважды', 'ADV'), + ('перечитал', 'V'), + ('бумажку', 'S'), + ('.', 'NONLEX'), + ] + assert pos_tag(word_tokenize(text), lang='rus') == expected_tagged + + def test_pos_tag_rus_universal(self): + text = u"Илья оторопел и дважды перечитал бумажку." + expected_tagged = [ + ('Илья', 'NOUN'), + ('оторопел', 'VERB'), + ('и', 'CONJ'), + ('дважды', 'ADV'), + ('перечитал', 'VERB'), + ('бумажку', 'NOUN'), + ('.', '.'), + ] + assert ( + pos_tag(word_tokenize(text), tagset='universal', lang='rus') + == expected_tagged + ) + + def test_pos_tag_unknown_lang(self): + text = u"모르겠 습니 다" + self.assertRaises(NotImplementedError, pos_tag, word_tokenize(text), lang='kor') + # Test for default kwarg, `lang=None` + self.assertRaises(NotImplementedError, pos_tag, word_tokenize(text), lang=None) + + def test_unspecified_lang(self): + # Tries to force the lang='eng' option. + text = u"모르겠 습니 다" + expected_but_wrong = [('모르겠', 'JJ'), ('습니', 'NNP'), ('다', 'NN')] + assert pos_tag(word_tokenize(text)) == expected_but_wrong diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/unit/test_rte_classify.py b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/test_rte_classify.py new file mode 100644 index 0000000..b26298c --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/test_rte_classify.py @@ -0,0 +1,92 @@ +# -*- coding: utf-8 -*- +from __future__ import print_function, unicode_literals + +import unittest + +from nltk.corpus import rte as rte_corpus +from nltk.classify.rte_classify import RTEFeatureExtractor, rte_features, rte_classifier + +expected_from_rte_feature_extration = """ +alwayson => True +ne_hyp_extra => 0 +ne_overlap => 1 +neg_hyp => 0 +neg_txt => 0 +word_hyp_extra => 3 +word_overlap => 3 + +alwayson => True +ne_hyp_extra => 0 +ne_overlap => 1 +neg_hyp => 0 +neg_txt => 0 +word_hyp_extra => 2 +word_overlap => 1 + +alwayson => True +ne_hyp_extra => 1 +ne_overlap => 1 +neg_hyp => 0 +neg_txt => 0 +word_hyp_extra => 1 +word_overlap => 2 + +alwayson => True +ne_hyp_extra => 1 +ne_overlap => 0 +neg_hyp => 0 +neg_txt => 0 +word_hyp_extra => 6 +word_overlap => 2 + +alwayson => True +ne_hyp_extra => 1 +ne_overlap => 0 +neg_hyp => 0 +neg_txt => 0 +word_hyp_extra => 4 +word_overlap => 0 + +alwayson => True +ne_hyp_extra => 1 +ne_overlap => 0 +neg_hyp => 0 +neg_txt => 0 +word_hyp_extra => 3 +word_overlap => 1 +""" + + +class RTEClassifierTest(unittest.TestCase): + # Test the feature extraction method. + def test_rte_feature_extraction(self): + pairs = rte_corpus.pairs(['rte1_dev.xml'])[:6] + test_output = [ + "%-15s => %s" % (key, rte_features(pair)[key]) + for pair in pairs + for key in sorted(rte_features(pair)) + ] + expected_output = expected_from_rte_feature_extration.strip().split('\n') + # Remove null strings. + expected_output = list(filter(None, expected_output)) + self.assertEqual(test_output, expected_output) + + # Test the RTEFeatureExtractor object. + def test_feature_extractor_object(self): + rtepair = rte_corpus.pairs(['rte3_dev.xml'])[33] + extractor = RTEFeatureExtractor(rtepair) + self.assertEqual(extractor.hyp_words, {'member', 'China', 'SCO.'}) + self.assertEqual(extractor.overlap('word'), set()) + self.assertEqual(extractor.overlap('ne'), {'China'}) + self.assertEqual(extractor.hyp_extra('word'), {'member'}) + + # Test the RTE classifier training. + def test_rte_classification_without_megam(self): + clf = rte_classifier('IIS') + clf = rte_classifier('GIS') + + @unittest.skip("Skipping tests with dependencies on MEGAM") + def test_rte_classification_with_megam(self): + nltk.config_megam('/usr/local/bin/megam') + clf = rte_classifier('megam') + clf = rte_classifier('BFGS') diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/unit/test_seekable_unicode_stream_reader.py b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/test_seekable_unicode_stream_reader.py new file mode 100644 index 0000000..a54c559 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/test_seekable_unicode_stream_reader.py @@ -0,0 +1,140 @@ +# -*- coding: utf-8 -*- +""" +The following test performs a random series of reads, seeks, and +tells, and checks that the results are consistent. +""" +from __future__ import absolute_import, unicode_literals +import random +import functools +from io import BytesIO +from nltk.corpus.reader import SeekableUnicodeStreamReader + + +def check_reader(unicode_string, encoding, n=1000): + bytestr = unicode_string.encode(encoding) + strlen = len(unicode_string) + stream = BytesIO(bytestr) + reader = SeekableUnicodeStreamReader(stream, encoding) + # Find all character positions + chars = [] + while True: + pos = reader.tell() + chars.append((pos, reader.read(1))) + if chars[-1][1] == '': + break + # Find all strings + strings = dict((pos, '') for (pos, c) in chars) + for pos1, char in chars: + for pos2, _ in chars: + if pos2 <= pos1: + strings[pos2] += char + while True: + op = random.choice('tsrr') + # Check our position? + if op == 't': # tell + reader.tell() + # Perform a seek? + if op == 's': # seek + new_pos = random.choice([p for (p, c) in chars]) + reader.seek(new_pos) + # Perform a read? + if op == 'r': # read + if random.random() < 0.3: + pos = reader.tell() + else: + pos = None + if random.random() < 0.2: + size = None + elif random.random() < 0.8: + size = random.randint(0, int(strlen / 6)) + else: + size = random.randint(0, strlen + 20) + if random.random() < 0.8: + s = reader.read(size) + else: + s = reader.readline(size) + # check that everything's consistent + if pos is not None: + assert pos in strings + assert strings[pos].startswith(s) + n -= 1 + if n == 0: + return 'passed' + + +# Call the randomized test function `check_reader` with a variety of +# input strings and encodings. + +ENCODINGS = ['ascii', 'latin1', 'greek', 'hebrew', 'utf-16', 'utf-8'] + +STRINGS = [ + """ + This is a test file. + It is fairly short. + """, + "This file can be encoded with latin1. \x83", + """\ + This is a test file. + Here's a blank line: + + And here's some unicode: \xee \u0123 \uffe3 + """, + """\ + This is a test file. + Unicode characters: \xf3 \u2222 \u3333\u4444 \u5555 + """, +] + + +def test_reader(): + for string in STRINGS: + for encoding in ENCODINGS: + try: + # skip strings that can't be encoded with the current encoding + string.encode(encoding) + yield check_reader, string, encoding + except UnicodeEncodeError: + pass + + +# nose shows the whole string arguments in a verbose mode; this is annoying, +# so large string test is separated. + +LARGE_STRING = ( + """\ +This is a larger file. It has some lines that are longer \ +than 72 characters. It's got lots of repetition. Here's \ +some unicode chars: \xee \u0123 \uffe3 \ueeee \u2345 + +How fun! Let's repeat it twenty times. +""" + * 10 +) + + +def test_reader_on_large_string(): + for encoding in ENCODINGS: + try: + # skip strings that can't be encoded with the current encoding + LARGE_STRING.encode(encoding) + + def _check(encoding, n=1000): + check_reader(LARGE_STRING, encoding, n) + + yield _check, encoding + + except UnicodeEncodeError: + pass + + +def test_reader_stream_is_closed(): + reader = SeekableUnicodeStreamReader(BytesIO(b''), 'ascii') + assert reader.stream.closed is False + reader.__del__() + assert reader.stream.closed is True + + +def teardown_module(module=None): + import gc + + gc.collect() diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/unit/test_senna.py b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/test_senna.py new file mode 100644 index 0000000..8701225 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/test_senna.py @@ -0,0 +1,116 @@ +# -*- coding: utf-8 -*- +""" +Unit tests for Senna +""" + +from __future__ import unicode_literals +from os import environ, path, sep + +import logging +import unittest + +from nltk.classify import Senna +from nltk.tag import SennaTagger, SennaChunkTagger, SennaNERTagger + +# Set Senna executable path for tests if it is not specified as an environment variable +if 'SENNA' in environ: + SENNA_EXECUTABLE_PATH = path.normpath(environ['SENNA']) + sep +else: + SENNA_EXECUTABLE_PATH = '/usr/share/senna-v3.0' + +senna_is_installed = path.exists(SENNA_EXECUTABLE_PATH) + + +@unittest.skipUnless(senna_is_installed, "Requires Senna executable") +class TestSennaPipeline(unittest.TestCase): + """Unittest for nltk.classify.senna""" + + def test_senna_pipeline(self): + """Senna pipeline interface""" + + pipeline = Senna(SENNA_EXECUTABLE_PATH, ['pos', 'chk', 'ner']) + sent = 'Dusseldorf is an international business center'.split() + result = [ + (token['word'], token['chk'], token['ner'], token['pos']) + for token in pipeline.tag(sent) + ] + expected = [ + ('Dusseldorf', 'B-NP', 'B-LOC', 'NNP'), + ('is', 'B-VP', 'O', 'VBZ'), + ('an', 'B-NP', 'O', 'DT'), + ('international', 'I-NP', 'O', 'JJ'), + ('business', 'I-NP', 'O', 'NN'), + ('center', 'I-NP', 'O', 'NN'), + ] + self.assertEqual(result, expected) + + +@unittest.skipUnless(senna_is_installed, "Requires Senna executable") +class TestSennaTagger(unittest.TestCase): + """Unittest for nltk.tag.senna""" + + def test_senna_tagger(self): + tagger = SennaTagger(SENNA_EXECUTABLE_PATH) + result = tagger.tag('What is the airspeed of an unladen swallow ?'.split()) + expected = [ + ('What', 'WP'), + ('is', 'VBZ'), + ('the', 'DT'), + ('airspeed', 'NN'), + ('of', 'IN'), + ('an', 'DT'), + ('unladen', 'NN'), + ('swallow', 'NN'), + ('?', '.'), + ] + self.assertEqual(result, expected) + + def test_senna_chunk_tagger(self): + chktagger = SennaChunkTagger(SENNA_EXECUTABLE_PATH) + result_1 = chktagger.tag('What is the airspeed of an unladen swallow ?'.split()) + expected_1 = [ + ('What', 'B-NP'), + ('is', 'B-VP'), + ('the', 'B-NP'), + ('airspeed', 'I-NP'), + ('of', 'B-PP'), + ('an', 'B-NP'), + ('unladen', 'I-NP'), + ('swallow', 'I-NP'), + ('?', 'O'), + ] + + result_2 = list(chktagger.bio_to_chunks(result_1, chunk_type='NP')) + expected_2 = [ + ('What', '0'), + ('the airspeed', '2-3'), + ('an unladen swallow', '5-6-7'), + ] + self.assertEqual(result_1, expected_1) + self.assertEqual(result_2, expected_2) + + def test_senna_ner_tagger(self): + nertagger = SennaNERTagger(SENNA_EXECUTABLE_PATH) + result_1 = nertagger.tag('Shakespeare theatre was in London .'.split()) + expected_1 = [ + ('Shakespeare', 'B-PER'), + ('theatre', 'O'), + ('was', 'O'), + ('in', 'O'), + ('London', 'B-LOC'), + ('.', 'O'), + ] + + result_2 = nertagger.tag('UN headquarters are in NY , USA .'.split()) + expected_2 = [ + ('UN', 'B-ORG'), + ('headquarters', 'O'), + ('are', 'O'), + ('in', 'O'), + ('NY', 'B-LOC'), + (',', 'O'), + ('USA', 'B-LOC'), + ('.', 'O'), + ] + self.assertEqual(result_1, expected_1) + self.assertEqual(result_2, expected_2) diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/unit/test_stem.py b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/test_stem.py new file mode 100644 index 0000000..8985179 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/test_stem.py @@ -0,0 +1,146 @@ +# -*- coding: utf-8 -*- +from __future__ import print_function, unicode_literals + +import os +import unittest +from contextlib import closing + +from nltk import data +from nltk.stem.snowball import SnowballStemmer +from nltk.stem.porter import PorterStemmer + + +class SnowballTest(unittest.TestCase): + def test_arabic(self): + """ + this unit testing for test the snowball arabic light stemmer + this stemmer deals with prefixes and suffixes + """ + # Test where the ignore_stopwords=True. + ar_stemmer = SnowballStemmer("arabic", True) + assert ar_stemmer.stem('الْعَرَبِــــــيَّة') == "عرب" + assert ar_stemmer.stem("العربية") == "عرب" + assert ar_stemmer.stem("فقالوا") == "قال" + assert ar_stemmer.stem("الطالبات") == "طالب" + assert ar_stemmer.stem("فالطالبات") == "طالب" + assert ar_stemmer.stem("والطالبات") == "طالب" + assert ar_stemmer.stem("الطالبون") == "طالب" + assert ar_stemmer.stem("اللذان") == "اللذان" + assert ar_stemmer.stem("من") == "من" + # Test where the ignore_stopwords=False. + ar_stemmer = SnowballStemmer("arabic", False) + assert ar_stemmer.stem("اللذان") == "اللذ" # this is a stop word + assert ar_stemmer.stem("الطالبات") == "طالب" + assert ar_stemmer.stem("الكلمات") == "كلم" + # test where create the arabic stemmer without given init value to ignore_stopwords + ar_stemmer = SnowballStemmer("arabic") + assert ar_stemmer.stem('الْعَرَبِــــــيَّة') == "عرب" + assert ar_stemmer.stem("العربية") == "عرب" + assert ar_stemmer.stem("فقالوا") == "قال" + assert ar_stemmer.stem("الطالبات") == "طالب" + assert ar_stemmer.stem("الكلمات") == "كلم" + + def test_russian(self): + stemmer_russian = SnowballStemmer("russian") + assert stemmer_russian.stem("авантненькая") == "авантненьк" + + def test_german(self): + stemmer_german = SnowballStemmer("german") + stemmer_german2 = SnowballStemmer("german", ignore_stopwords=True) + + assert stemmer_german.stem("Schr\xe4nke") == 'schrank' + assert stemmer_german2.stem("Schr\xe4nke") == 'schrank' + + assert stemmer_german.stem("keinen") == 'kein' + assert stemmer_german2.stem("keinen") == 'keinen' + + def test_spanish(self): + stemmer = SnowballStemmer('spanish') + + assert stemmer.stem("Visionado") == 'vision' + + # The word 'algue' was raising an IndexError + assert stemmer.stem("algue") == 'algu' + + def test_short_strings_bug(self): + stemmer = SnowballStemmer('english') + assert stemmer.stem("y's") == 'y' + + +class PorterTest(unittest.TestCase): + def _vocabulary(self): + with closing( + data.find('stemmers/porter_test/porter_vocabulary.txt').open( + encoding='utf-8' + ) + ) as fp: + return fp.read().splitlines() + + def _test_against_expected_output(self, stemmer_mode, expected_stems): + stemmer = PorterStemmer(mode=stemmer_mode) + for word, true_stem in zip(self._vocabulary(), expected_stems): + our_stem = stemmer.stem(word) + assert our_stem == true_stem, ( + "%s should stem to %s in %s mode but got %s" + % (word, true_stem, stemmer_mode, our_stem) + ) + + def test_vocabulary_martin_mode(self): + """Tests all words from the test vocabulary provided by M Porter + + The sample vocabulary and output were sourced from: + http://tartarus.org/martin/PorterStemmer/voc.txt + http://tartarus.org/martin/PorterStemmer/output.txt + and are linked to from the Porter Stemmer algorithm's homepage + at + http://tartarus.org/martin/PorterStemmer/ + """ + with closing( + data.find('stemmers/porter_test/porter_martin_output.txt').open( + encoding='utf-8' + ) + ) as fp: + self._test_against_expected_output( + PorterStemmer.MARTIN_EXTENSIONS, fp.read().splitlines() + ) + + def test_vocabulary_nltk_mode(self): + with closing( + data.find('stemmers/porter_test/porter_nltk_output.txt').open( + encoding='utf-8' + ) + ) as fp: + self._test_against_expected_output( + PorterStemmer.NLTK_EXTENSIONS, fp.read().splitlines() + ) + + def test_vocabulary_original_mode(self): + # The list of stems for this test was generated by taking the + # Martin-blessed stemmer from + # http://tartarus.org/martin/PorterStemmer/c.txt + # and removing all the --DEPARTURE-- sections from it and + # running it against Martin's test vocabulary. + + with closing( + data.find('stemmers/porter_test/porter_original_output.txt').open( + encoding='utf-8' + ) + ) as fp: + self._test_against_expected_output( + PorterStemmer.ORIGINAL_ALGORITHM, fp.read().splitlines() + ) + + self._test_against_expected_output( + PorterStemmer.ORIGINAL_ALGORITHM, + data.find('stemmers/porter_test/porter_original_output.txt') + .open(encoding='utf-8') + .read() + .splitlines(), + ) + + def test_oed_bug(self): + """Test for bug https://github.com/nltk/nltk/issues/1581 + + Ensures that 'oed' can be stemmed without throwing an error. + """ + assert PorterStemmer().stem('oed') == 'o' diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/unit/test_tag.py b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/test_tag.py new file mode 100644 index 0000000..c382074 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/test_tag.py @@ -0,0 +1,30 @@ +# -*- coding: utf-8 -*- +from __future__ import absolute_import, unicode_literals + + +def test_basic(): + from nltk.tag import pos_tag + from nltk.tokenize import word_tokenize + + result = pos_tag(word_tokenize("John's big idea isn't all that bad.")) + assert result == [ + ('John', 'NNP'), + ("'s", 'POS'), + ('big', 'JJ'), + ('idea', 'NN'), + ('is', 'VBZ'), + ("n't", 'RB'), + ('all', 'PDT'), + ('that', 'DT'), + ('bad', 'JJ'), + ('.', '.'), + ] + + +def setup_module(module): + from nose import SkipTest + + try: + import numpy + except ImportError: + raise SkipTest("numpy is required for nltk.test.test_tag") diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/unit/test_tgrep.py b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/test_tgrep.py new file mode 100644 index 0000000..17b2c4a --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/test_tgrep.py @@ -0,0 +1,790 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Natural Language Toolkit: TGrep search +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Will Roberts +# URL: +# For license information, see LICENSE.TXT + +''' +Unit tests for nltk.tgrep. +''' + +from __future__ import absolute_import, print_function, unicode_literals + +import unittest + +from six import b + +from nltk.tree import ParentedTree +from nltk import tgrep + + +class TestSequenceFunctions(unittest.TestCase): + + ''' + Class containing unit tests for nltk.tgrep. + ''' + + def test_tokenize_simple(self): + ''' + Simple test of tokenization. + ''' + tokens = tgrep.tgrep_tokenize('A .. (B !< C . D) | ![<< (E , F) $ G]') + self.assertEqual( + tokens, + [ + 'A', + '..', + '(', + 'B', + '!', + '<', + 'C', + '.', + 'D', + ')', + '|', + '!', + '[', + '<<', + '(', + 'E', + ',', + 'F', + ')', + '$', + 'G', + ']', + ], + ) + + def test_tokenize_encoding(self): + ''' + Test that tokenization handles bytes and strs the same way. + ''' + self.assertEqual( + tgrep.tgrep_tokenize(b('A .. (B !< C . D) | ![<< (E , F) $ G]')), + tgrep.tgrep_tokenize('A .. (B !< C . D) | ![<< (E , F) $ G]'), + ) + + def test_tokenize_link_types(self): + ''' + Test tokenization of basic link types. + ''' + self.assertEqual(tgrep.tgrep_tokenize('AB'), ['A', '>', 'B']) + self.assertEqual(tgrep.tgrep_tokenize('A<3B'), ['A', '<3', 'B']) + self.assertEqual(tgrep.tgrep_tokenize('A>3B'), ['A', '>3', 'B']) + self.assertEqual(tgrep.tgrep_tokenize('A<,B'), ['A', '<,', 'B']) + self.assertEqual(tgrep.tgrep_tokenize('A>,B'), ['A', '>,', 'B']) + self.assertEqual(tgrep.tgrep_tokenize('A<-3B'), ['A', '<-3', 'B']) + self.assertEqual(tgrep.tgrep_tokenize('A>-3B'), ['A', '>-3', 'B']) + self.assertEqual(tgrep.tgrep_tokenize('A<-B'), ['A', '<-', 'B']) + self.assertEqual(tgrep.tgrep_tokenize('A>-B'), ['A', '>-', 'B']) + self.assertEqual(tgrep.tgrep_tokenize('A<\'B'), ['A', '<\'', 'B']) + self.assertEqual(tgrep.tgrep_tokenize('A>\'B'), ['A', '>\'', 'B']) + self.assertEqual(tgrep.tgrep_tokenize('A<:B'), ['A', '<:', 'B']) + self.assertEqual(tgrep.tgrep_tokenize('A>:B'), ['A', '>:', 'B']) + self.assertEqual(tgrep.tgrep_tokenize('A<>B'), ['A', '>>', 'B']) + self.assertEqual(tgrep.tgrep_tokenize('A<<,B'), ['A', '<<,', 'B']) + self.assertEqual(tgrep.tgrep_tokenize('A>>,B'), ['A', '>>,', 'B']) + self.assertEqual(tgrep.tgrep_tokenize('A<<\'B'), ['A', '<<\'', 'B']) + self.assertEqual(tgrep.tgrep_tokenize('A>>\'B'), ['A', '>>\'', 'B']) + self.assertEqual(tgrep.tgrep_tokenize('A<<:B'), ['A', '<<:', 'B']) + self.assertEqual(tgrep.tgrep_tokenize('A>>:B'), ['A', '>>:', 'B']) + self.assertEqual(tgrep.tgrep_tokenize('A.B'), ['A', '.', 'B']) + self.assertEqual(tgrep.tgrep_tokenize('A,B'), ['A', ',', 'B']) + self.assertEqual(tgrep.tgrep_tokenize('A..B'), ['A', '..', 'B']) + self.assertEqual(tgrep.tgrep_tokenize('A,,B'), ['A', ',,', 'B']) + self.assertEqual(tgrep.tgrep_tokenize('A$B'), ['A', '$', 'B']) + self.assertEqual(tgrep.tgrep_tokenize('A$.B'), ['A', '$.', 'B']) + self.assertEqual(tgrep.tgrep_tokenize('A$,B'), ['A', '$,', 'B']) + self.assertEqual(tgrep.tgrep_tokenize('A$..B'), ['A', '$..', 'B']) + self.assertEqual(tgrep.tgrep_tokenize('A$,,B'), ['A', '$,,', 'B']) + self.assertEqual(tgrep.tgrep_tokenize('A!B'), ['A', '!', '>', 'B']) + self.assertEqual(tgrep.tgrep_tokenize('A!<3B'), ['A', '!', '<3', 'B']) + self.assertEqual(tgrep.tgrep_tokenize('A!>3B'), ['A', '!', '>3', 'B']) + self.assertEqual(tgrep.tgrep_tokenize('A!<,B'), ['A', '!', '<,', 'B']) + self.assertEqual(tgrep.tgrep_tokenize('A!>,B'), ['A', '!', '>,', 'B']) + self.assertEqual(tgrep.tgrep_tokenize('A!<-3B'), ['A', '!', '<-3', 'B']) + self.assertEqual(tgrep.tgrep_tokenize('A!>-3B'), ['A', '!', '>-3', 'B']) + self.assertEqual(tgrep.tgrep_tokenize('A!<-B'), ['A', '!', '<-', 'B']) + self.assertEqual(tgrep.tgrep_tokenize('A!>-B'), ['A', '!', '>-', 'B']) + self.assertEqual(tgrep.tgrep_tokenize('A!<\'B'), ['A', '!', '<\'', 'B']) + self.assertEqual(tgrep.tgrep_tokenize('A!>\'B'), ['A', '!', '>\'', 'B']) + self.assertEqual(tgrep.tgrep_tokenize('A!<:B'), ['A', '!', '<:', 'B']) + self.assertEqual(tgrep.tgrep_tokenize('A!>:B'), ['A', '!', '>:', 'B']) + self.assertEqual(tgrep.tgrep_tokenize('A!<>B'), ['A', '!', '>>', 'B']) + self.assertEqual(tgrep.tgrep_tokenize('A!<<,B'), ['A', '!', '<<,', 'B']) + self.assertEqual(tgrep.tgrep_tokenize('A!>>,B'), ['A', '!', '>>,', 'B']) + self.assertEqual(tgrep.tgrep_tokenize('A!<<\'B'), ['A', '!', '<<\'', 'B']) + self.assertEqual(tgrep.tgrep_tokenize('A!>>\'B'), ['A', '!', '>>\'', 'B']) + self.assertEqual(tgrep.tgrep_tokenize('A!<<:B'), ['A', '!', '<<:', 'B']) + self.assertEqual(tgrep.tgrep_tokenize('A!>>:B'), ['A', '!', '>>:', 'B']) + self.assertEqual(tgrep.tgrep_tokenize('A!.B'), ['A', '!', '.', 'B']) + self.assertEqual(tgrep.tgrep_tokenize('A!,B'), ['A', '!', ',', 'B']) + self.assertEqual(tgrep.tgrep_tokenize('A!..B'), ['A', '!', '..', 'B']) + self.assertEqual(tgrep.tgrep_tokenize('A!,,B'), ['A', '!', ',,', 'B']) + self.assertEqual(tgrep.tgrep_tokenize('A!$B'), ['A', '!', '$', 'B']) + self.assertEqual(tgrep.tgrep_tokenize('A!$.B'), ['A', '!', '$.', 'B']) + self.assertEqual(tgrep.tgrep_tokenize('A!$,B'), ['A', '!', '$,', 'B']) + self.assertEqual(tgrep.tgrep_tokenize('A!$..B'), ['A', '!', '$..', 'B']) + self.assertEqual(tgrep.tgrep_tokenize('A!$,,B'), ['A', '!', '$,,', 'B']) + + def test_tokenize_examples(self): + ''' + Test tokenization of the TGrep2 manual example patterns. + ''' + self.assertEqual(tgrep.tgrep_tokenize('NP < PP'), ['NP', '<', 'PP']) + self.assertEqual(tgrep.tgrep_tokenize('/^NP/'), ['/^NP/']) + self.assertEqual( + tgrep.tgrep_tokenize('NP << PP . VP'), ['NP', '<<', 'PP', '.', 'VP'] + ) + self.assertEqual( + tgrep.tgrep_tokenize('NP << PP | . VP'), ['NP', '<<', 'PP', '|', '.', 'VP'] + ) + self.assertEqual( + tgrep.tgrep_tokenize('NP !<< PP [> NP | >> VP]'), + ['NP', '!', '<<', 'PP', '[', '>', 'NP', '|', '>>', 'VP', ']'], + ) + self.assertEqual( + tgrep.tgrep_tokenize('NP << (PP . VP)'), + ['NP', '<<', '(', 'PP', '.', 'VP', ')'], + ) + self.assertEqual( + tgrep.tgrep_tokenize('NP <\' (PP <, (IN < on))'), + ['NP', '<\'', '(', 'PP', '<,', '(', 'IN', '<', 'on', ')', ')'], + ) + self.assertEqual( + tgrep.tgrep_tokenize('S < (A < B) < C'), + ['S', '<', '(', 'A', '<', 'B', ')', '<', 'C'], + ) + self.assertEqual( + tgrep.tgrep_tokenize('S < ((A < B) < C)'), + ['S', '<', '(', '(', 'A', '<', 'B', ')', '<', 'C', ')'], + ) + self.assertEqual( + tgrep.tgrep_tokenize('S < (A < B < C)'), + ['S', '<', '(', 'A', '<', 'B', '<', 'C', ')'], + ) + self.assertEqual(tgrep.tgrep_tokenize('A3B"3B"', '<', 'C'], + ) + + def test_tokenize_nodenames(self): + ''' + Test tokenization of node names. + ''' + self.assertEqual(tgrep.tgrep_tokenize('Robert'), ['Robert']) + self.assertEqual(tgrep.tgrep_tokenize('/^[Bb]ob/'), ['/^[Bb]ob/']) + self.assertEqual(tgrep.tgrep_tokenize('*'), ['*']) + self.assertEqual(tgrep.tgrep_tokenize('__'), ['__']) + # test tokenization of NLTK tree position syntax + self.assertEqual(tgrep.tgrep_tokenize('N()'), ['N(', ')']) + self.assertEqual(tgrep.tgrep_tokenize('N(0,)'), ['N(', '0', ',', ')']) + self.assertEqual(tgrep.tgrep_tokenize('N(0,0)'), ['N(', '0', ',', '0', ')']) + self.assertEqual( + tgrep.tgrep_tokenize('N(0,0,)'), ['N(', '0', ',', '0', ',', ')'] + ) + + def test_tokenize_macros(self): + ''' + Test tokenization of macro definitions. + ''' + self.assertEqual( + tgrep.tgrep_tokenize( + '@ NP /^NP/;\n@ NN /^NN/;\n@NP [!< NP | < @NN] !$.. @NN' + ), + [ + '@', + 'NP', + '/^NP/', + ';', + '@', + 'NN', + '/^NN/', + ';', + '@NP', + '[', + '!', + '<', + 'NP', + '|', + '<', + '@NN', + ']', + '!', + '$..', + '@NN', + ], + ) + + def test_node_simple(self): + ''' + Test a simple use of tgrep for finding nodes matching a given + pattern. + ''' + tree = ParentedTree.fromstring( + '(S (NP (DT the) (JJ big) (NN dog)) ' '(VP bit) (NP (DT a) (NN cat)))' + ) + self.assertEqual(list(tgrep.tgrep_positions('NN', [tree])), [[(0, 2), (2, 1)]]) + self.assertEqual( + list(tgrep.tgrep_nodes('NN', [tree])), [[tree[0, 2], tree[2, 1]]] + ) + self.assertEqual( + list(tgrep.tgrep_positions('NN|JJ', [tree])), [[(0, 1), (0, 2), (2, 1)]] + ) + + def test_node_printing(self): + '''Test that the tgrep print operator ' is properly ignored.''' + tree = ParentedTree.fromstring('(S (n x) (N x))') + self.assertEqual( + list(tgrep.tgrep_positions('N', [tree])), + list(tgrep.tgrep_positions('\'N', [tree])), + ) + self.assertEqual( + list(tgrep.tgrep_positions('/[Nn]/', [tree])), + list(tgrep.tgrep_positions('\'/[Nn]/', [tree])), + ) + + def test_node_encoding(self): + ''' + Test that tgrep search strings handles bytes and strs the same + way. + ''' + tree = ParentedTree.fromstring( + '(S (NP (DT the) (JJ big) (NN dog)) ' '(VP bit) (NP (DT a) (NN cat)))' + ) + self.assertEqual( + list(tgrep.tgrep_positions(b('NN'), [tree])), + list(tgrep.tgrep_positions('NN', [tree])), + ) + self.assertEqual( + list(tgrep.tgrep_nodes(b('NN'), [tree])), + list(tgrep.tgrep_nodes('NN', [tree])), + ) + self.assertEqual( + list(tgrep.tgrep_positions(b('NN|JJ'), [tree])), + list(tgrep.tgrep_positions('NN|JJ', [tree])), + ) + + def test_node_nocase(self): + ''' + Test selecting nodes using case insensitive node names. + ''' + tree = ParentedTree.fromstring('(S (n x) (N x))') + self.assertEqual(list(tgrep.tgrep_positions('"N"', [tree])), [[(1,)]]) + self.assertEqual(list(tgrep.tgrep_positions('i@"N"', [tree])), [[(0,), (1,)]]) + + def test_node_quoted(self): + ''' + Test selecting nodes using quoted node names. + ''' + tree = ParentedTree.fromstring('(N ("N" x) (N" x) ("\\" x))') + self.assertEqual(list(tgrep.tgrep_positions('"N"', [tree])), [[()]]) + self.assertEqual(list(tgrep.tgrep_positions('"\\"N\\""', [tree])), [[(0,)]]) + self.assertEqual(list(tgrep.tgrep_positions('"N\\""', [tree])), [[(1,)]]) + self.assertEqual(list(tgrep.tgrep_positions('"\\"\\\\\\""', [tree])), [[(2,)]]) + + def test_node_regex(self): + ''' + Test regex matching on nodes. + ''' + tree = ParentedTree.fromstring('(S (NP-SBJ x) (NP x) (NNP x) (VP x))') + # This is a regular expression that matches any node whose + # name starts with NP, including NP-SBJ: + self.assertEqual(list(tgrep.tgrep_positions('/^NP/', [tree])), [[(0,), (1,)]]) + + def test_node_regex_2(self): + ''' + Test regex matching on nodes. + ''' + tree = ParentedTree.fromstring('(S (SBJ x) (SBJ1 x) (NP-SBJ x))') + self.assertEqual(list(tgrep.tgrep_positions('/^SBJ/', [tree])), [[(0,), (1,)]]) + # This is a regular expression that matches any node whose + # name includes SBJ, including NP-SBJ: + self.assertEqual( + list(tgrep.tgrep_positions('/SBJ/', [tree])), [[(0,), (1,), (2,)]] + ) + + def test_node_tree_position(self): + ''' + Test matching on nodes based on NLTK tree position. + ''' + tree = ParentedTree.fromstring('(S (NP-SBJ x) (NP x) (NNP x) (VP x))') + # test all tree positions that are not leaves + leaf_positions = set( + tree.leaf_treeposition(x) for x in range(len(tree.leaves())) + ) + tree_positions = [x for x in tree.treepositions() if x not in leaf_positions] + for position in tree_positions: + node_id = 'N{0}'.format(position) + tgrep_positions = list(tgrep.tgrep_positions(node_id, [tree])) + self.assertEqual(len(tgrep_positions[0]), 1) + self.assertEqual(tgrep_positions[0][0], position) + + def test_node_noleaves(self): + ''' + Test node name matching with the search_leaves flag set to False. + ''' + tree = ParentedTree.fromstring('(S (A (T x)) (B (N x)))') + self.assertEqual( + list(tgrep.tgrep_positions('x', [tree])), [[(0, 0, 0), (1, 0, 0)]] + ) + self.assertEqual(list(tgrep.tgrep_positions('x', [tree], False)), [[]]) + + def tests_rel_dominance(self): + ''' + Test matching nodes based on dominance relations. + ''' + tree = ParentedTree.fromstring('(S (A (T x)) (B (N x)))') + self.assertEqual(list(tgrep.tgrep_positions('* < T', [tree])), [[(0,)]]) + self.assertEqual(list(tgrep.tgrep_positions('* < T > S', [tree])), [[(0,)]]) + self.assertEqual( + list(tgrep.tgrep_positions('* !< T', [tree])), + [[(), (0, 0), (0, 0, 0), (1,), (1, 0), (1, 0, 0)]], + ) + self.assertEqual(list(tgrep.tgrep_positions('* !< T > S', [tree])), [[(1,)]]) + self.assertEqual(list(tgrep.tgrep_positions('* > A', [tree])), [[(0, 0)]]) + self.assertEqual(list(tgrep.tgrep_positions('* > B', [tree])), [[(1, 0)]]) + self.assertEqual( + list(tgrep.tgrep_positions('* !> B', [tree])), + [[(), (0,), (0, 0), (0, 0, 0), (1,), (1, 0, 0)]], + ) + self.assertEqual( + list(tgrep.tgrep_positions('* !> B >> S', [tree])), [[(0,), (0, 0), (1,)]] + ) + self.assertEqual( + list(tgrep.tgrep_positions('* >> S', [tree])), + [[(0,), (0, 0), (1,), (1, 0)]], + ) + self.assertEqual( + list(tgrep.tgrep_positions('* >>, S', [tree])), [[(0,), (0, 0)]] + ) + self.assertEqual( + list(tgrep.tgrep_positions('* >>\' S', [tree])), [[(1,), (1, 0)]] + ) + # Known issue: + # self.assertEqual(list(tgrep.tgrep_positions('* !>> S', [tree])), + # [[()]]) + self.assertEqual(list(tgrep.tgrep_positions('* << T', [tree])), [[(), (0,)]]) + self.assertEqual(list(tgrep.tgrep_positions('* <<\' T', [tree])), [[(0,)]]) + self.assertEqual(list(tgrep.tgrep_positions('* <<1 N', [tree])), [[(1,)]]) + self.assertEqual( + list(tgrep.tgrep_positions('* !<< T', [tree])), + [[(0, 0), (0, 0, 0), (1,), (1, 0), (1, 0, 0)]], + ) + tree = ParentedTree.fromstring('(S (A (T x)) (B (T x) (N x )))') + self.assertEqual(list(tgrep.tgrep_positions('* <: T', [tree])), [[(0,)]]) + self.assertEqual(list(tgrep.tgrep_positions('* < T', [tree])), [[(0,), (1,)]]) + self.assertEqual( + list(tgrep.tgrep_positions('* !<: T', [tree])), + [[(), (0, 0), (0, 0, 0), (1,), (1, 0), (1, 0, 0), (1, 1), (1, 1, 0)]], + ) + self.assertEqual(list(tgrep.tgrep_positions('* !<: T > S', [tree])), [[(1,)]]) + tree = ParentedTree.fromstring('(S (T (A x) (B x)) (T (C x)))') + self.assertEqual(list(tgrep.tgrep_positions('* >: T', [tree])), [[(1, 0)]]) + self.assertEqual( + list(tgrep.tgrep_positions('* !>: T', [tree])), + [[(), (0,), (0, 0), (0, 0, 0), (0, 1), (0, 1, 0), (1,), (1, 0, 0)]], + ) + tree = ParentedTree.fromstring( + '(S (A (B (C (D (E (T x))))))' ' (A (B (C (D (E (T x))) (N x)))))' + ) + self.assertEqual( + list(tgrep.tgrep_positions('* <<: T', [tree])), + [ + [ + (0,), + (0, 0), + (0, 0, 0), + (0, 0, 0, 0), + (0, 0, 0, 0, 0), + (1, 0, 0, 0), + (1, 0, 0, 0, 0), + ] + ], + ) + self.assertEqual( + list(tgrep.tgrep_positions('* >>: A', [tree])), + [ + [ + (0, 0), + (0, 0, 0), + (0, 0, 0, 0), + (0, 0, 0, 0, 0), + (0, 0, 0, 0, 0, 0), + (1, 0), + (1, 0, 0), + ] + ], + ) + + def test_bad_operator(self): + ''' + Test error handling of undefined tgrep operators. + ''' + tree = ParentedTree.fromstring('(S (A (T x)) (B (N x)))') + self.assertRaises( + tgrep.TgrepException, list, tgrep.tgrep_positions('* >>> S', [tree]) + ) + + def test_comments(self): + ''' + Test that comments are correctly filtered out of tgrep search + strings. + ''' + tree = ParentedTree.fromstring('(S (NN x) (NP x) (NN x))') + search1 = ''' + @ NP /^NP/; + @ NN /^NN/; + @NN + ''' + self.assertEqual(list(tgrep.tgrep_positions(search1, [tree])), [[(0,), (2,)]]) + search2 = ''' + # macros + @ NP /^NP/; + @ NN /^NN/; + + # search string + @NN + ''' + self.assertEqual(list(tgrep.tgrep_positions(search2, [tree])), [[(0,), (2,)]]) + + def test_rel_sister_nodes(self): + ''' + Test matching sister nodes in a tree. + ''' + tree = ParentedTree.fromstring('(S (A x) (B x) (C x))') + self.assertEqual(list(tgrep.tgrep_positions('* $. B', [tree])), [[(0,)]]) + self.assertEqual(list(tgrep.tgrep_positions('* $.. B', [tree])), [[(0,)]]) + self.assertEqual(list(tgrep.tgrep_positions('* $, B', [tree])), [[(2,)]]) + self.assertEqual(list(tgrep.tgrep_positions('* $,, B', [tree])), [[(2,)]]) + self.assertEqual(list(tgrep.tgrep_positions('* $ B', [tree])), [[(0,), (2,)]]) + + def tests_rel_indexed_children(self): + ''' + Test matching nodes based on their index in their parent node. + ''' + tree = ParentedTree.fromstring('(S (A x) (B x) (C x))') + self.assertEqual(list(tgrep.tgrep_positions('* >, S', [tree])), [[(0,)]]) + self.assertEqual(list(tgrep.tgrep_positions('* >1 S', [tree])), [[(0,)]]) + self.assertEqual(list(tgrep.tgrep_positions('* >2 S', [tree])), [[(1,)]]) + self.assertEqual(list(tgrep.tgrep_positions('* >3 S', [tree])), [[(2,)]]) + self.assertEqual(list(tgrep.tgrep_positions('* >\' S', [tree])), [[(2,)]]) + self.assertEqual(list(tgrep.tgrep_positions('* >-1 S', [tree])), [[(2,)]]) + self.assertEqual(list(tgrep.tgrep_positions('* >-2 S', [tree])), [[(1,)]]) + self.assertEqual(list(tgrep.tgrep_positions('* >-3 S', [tree])), [[(0,)]]) + tree = ParentedTree.fromstring( + '(S (D (A x) (B x) (C x)) (E (B x) (C x) (A x)) ' '(F (C x) (A x) (B x)))' + ) + self.assertEqual(list(tgrep.tgrep_positions('* <, A', [tree])), [[(0,)]]) + self.assertEqual(list(tgrep.tgrep_positions('* <1 A', [tree])), [[(0,)]]) + self.assertEqual(list(tgrep.tgrep_positions('* <2 A', [tree])), [[(2,)]]) + self.assertEqual(list(tgrep.tgrep_positions('* <3 A', [tree])), [[(1,)]]) + self.assertEqual(list(tgrep.tgrep_positions('* <\' A', [tree])), [[(1,)]]) + self.assertEqual(list(tgrep.tgrep_positions('* <-1 A', [tree])), [[(1,)]]) + self.assertEqual(list(tgrep.tgrep_positions('* <-2 A', [tree])), [[(2,)]]) + self.assertEqual(list(tgrep.tgrep_positions('* <-3 A', [tree])), [[(0,)]]) + + def test_rel_precedence(self): + ''' + Test matching nodes based on precedence relations. + ''' + tree = ParentedTree.fromstring( + '(S (NP (NP (PP x)) (NP (AP x)))' + ' (VP (AP (X (PP x)) (Y (AP x))))' + ' (NP (RC (NP (AP x)))))' + ) + self.assertEqual( + list(tgrep.tgrep_positions('* . X', [tree])), [[(0,), (0, 1), (0, 1, 0)]] + ) + self.assertEqual( + list(tgrep.tgrep_positions('* . Y', [tree])), [[(1, 0, 0), (1, 0, 0, 0)]] + ) + self.assertEqual( + list(tgrep.tgrep_positions('* .. X', [tree])), + [[(0,), (0, 0), (0, 0, 0), (0, 1), (0, 1, 0)]], + ) + self.assertEqual( + list(tgrep.tgrep_positions('* .. Y', [tree])), + [[(0,), (0, 0), (0, 0, 0), (0, 1), (0, 1, 0), (1, 0, 0), (1, 0, 0, 0)]], + ) + self.assertEqual( + list(tgrep.tgrep_positions('* , X', [tree])), [[(1, 0, 1), (1, 0, 1, 0)]] + ) + self.assertEqual( + list(tgrep.tgrep_positions('* , Y', [tree])), + [[(2,), (2, 0), (2, 0, 0), (2, 0, 0, 0)]], + ) + self.assertEqual( + list(tgrep.tgrep_positions('* ,, X', [tree])), + [[(1, 0, 1), (1, 0, 1, 0), (2,), (2, 0), (2, 0, 0), (2, 0, 0, 0)]], + ) + self.assertEqual( + list(tgrep.tgrep_positions('* ,, Y', [tree])), + [[(2,), (2, 0), (2, 0, 0), (2, 0, 0, 0)]], + ) + + def test_examples(self): + ''' + Test the Basic Examples from the TGrep2 manual. + ''' + tree = ParentedTree.fromstring('(S (NP (AP x)) (NP (PP x)))') + # This matches any NP node that immediately dominates a PP: + self.assertEqual(list(tgrep.tgrep_positions('NP < PP', [tree])), [[(1,)]]) + + tree = ParentedTree.fromstring('(S (NP x) (VP x) (NP (PP x)) (VP x))') + # This matches an NP that dominates a PP and is immediately + # followed by a VP: + self.assertEqual(list(tgrep.tgrep_positions('NP << PP . VP', [tree])), [[(2,)]]) + + tree = ParentedTree.fromstring( + '(S (NP (AP x)) (NP (PP x)) ' '(NP (DET x) (NN x)) (VP x))' + ) + # This matches an NP that dominates a PP or is immediately + # followed by a VP: + self.assertEqual( + list(tgrep.tgrep_positions('NP << PP | . VP', [tree])), [[(1,), (2,)]] + ) + + tree = ParentedTree.fromstring( + '(S (NP (NP (PP x)) (NP (AP x)))' + ' (VP (AP (NP (PP x)) (NP (AP x))))' + ' (NP (RC (NP (AP x)))))' + ) + # This matches an NP that does not dominate a PP. Also, the NP + # must either have a parent that is an NP or be dominated by a + # VP: + self.assertEqual( + list(tgrep.tgrep_positions('NP !<< PP [> NP | >> VP]', [tree])), + [[(0, 1), (1, 0, 1)]], + ) + + tree = ParentedTree.fromstring( + '(S (NP (AP (PP x) (VP x))) ' '(NP (AP (PP x) (NP x))) (NP x))' + ) + # This matches an NP that dominates a PP which itself is + # immediately followed by a VP. Note the use of parentheses to + # group ". VP" with the PP rather than with the NP: + self.assertEqual( + list(tgrep.tgrep_positions('NP << (PP . VP)', [tree])), [[(0,)]] + ) + + tree = ParentedTree.fromstring( + '(S (NP (DET a) (NN cat) (PP (IN on) (NP x)))' + ' (NP (DET a) (NN cat) (PP (IN on) (NP x)) (PP x))' + ' (NP x))' + ) + # This matches an NP whose last child is a PP that begins with + # the preposition "on": + self.assertEqual( + list(tgrep.tgrep_positions('NP <\' (PP <, (IN < on))', [tree])), [[(0,)]] + ) + + tree = ParentedTree.fromstring( + '(S (S (C x) (A (B x))) (S (C x) (A x)) ' '(S (D x) (A (B x))))' + ) + # The following pattern matches an S which has a child A and + # another child that is a C and that the A has a child B: + self.assertEqual( + list(tgrep.tgrep_positions('S < (A < B) < C', [tree])), [[(0,)]] + ) + + tree = ParentedTree.fromstring( + '(S (S (A (B x) (C x))) (S (S (C x) (A (B x)))))' + ) + # However, this pattern means that S has child A and that A + # has children B and C: + self.assertEqual( + list(tgrep.tgrep_positions('S < ((A < B) < C)', [tree])), [[(0,)]] + ) + + # It is equivalent to this: + self.assertEqual( + list(tgrep.tgrep_positions('S < (A < B < C)', [tree])), [[(0,)]] + ) + + def test_use_macros(self): + ''' + Test defining and using tgrep2 macros. + ''' + tree = ParentedTree.fromstring( + '(VP (VB sold) (NP (DET the) ' + '(NN heiress)) (NP (NN deed) (PREP to) ' + '(NP (DET the) (NN school) (NN house))))' + ) + self.assertEqual( + list( + tgrep.tgrep_positions( + '@ NP /^NP/;\n@ NN /^NN/;\n@NP !< @NP !$.. @NN', [tree] + ) + ), + [[(1,), (2, 2)]], + ) + # use undefined macro @CNP + self.assertRaises( + tgrep.TgrepException, + list, + tgrep.tgrep_positions( + '@ NP /^NP/;\n@ NN /^NN/;\n@CNP !< @NP !$.. @NN', [tree] + ), + ) + + def test_tokenize_node_labels(self): + '''Test tokenization of labeled nodes.''' + self.assertEqual( + tgrep.tgrep_tokenize('S < @SBJ < (@VP < (@VB $.. @OBJ))'), + [ + 'S', + '<', + '@SBJ', + '<', + '(', + '@VP', + '<', + '(', + '@VB', + '$..', + '@OBJ', + ')', + ')', + ], + ) + self.assertEqual( + tgrep.tgrep_tokenize('S < @SBJ=s < (@VP=v < (@VB $.. @OBJ))'), + [ + 'S', + '<', + '@SBJ', + '=', + 's', + '<', + '(', + '@VP', + '=', + 'v', + '<', + '(', + '@VB', + '$..', + '@OBJ', + ')', + ')', + ], + ) + + def test_tokenize_segmented_patterns(self): + '''Test tokenization of segmented patterns.''' + self.assertEqual( + tgrep.tgrep_tokenize('S < @SBJ=s < (@VP=v < (@VB $.. @OBJ)) : =s .. =v'), + [ + 'S', + '<', + '@SBJ', + '=', + 's', + '<', + '(', + '@VP', + '=', + 'v', + '<', + '(', + '@VB', + '$..', + '@OBJ', + ')', + ')', + ':', + '=s', + '..', + '=v', + ], + ) + + def test_labeled_nodes(self): + ''' + Test labeled nodes. + + Test case from Emily M. Bender. + ''' + search = ''' + # macros + @ SBJ /SBJ/; + @ VP /VP/; + @ VB /VB/; + @ VPoB /V[PB]/; + @ OBJ /OBJ/; + + # 1 svo + S < @SBJ=s < (@VP=v < (@VB $.. @OBJ)) : =s .. =v''' + sent1 = ParentedTree.fromstring( + '(S (NP-SBJ I) (VP (VB eat) (NP-OBJ (NNS apples))))' + ) + sent2 = ParentedTree.fromstring( + '(S (VP (VB eat) (NP-OBJ (NNS apples))) (NP-SBJ I))' + ) + search_firsthalf = search.split('\n\n')[0] + 'S < @SBJ < (@VP < (@VB $.. @OBJ))' + search_rewrite = 'S < (/.*SBJ/ $.. (/VP/ < (/VB/ $.. /.*OBJ/)))' + + self.assertTrue(list(tgrep.tgrep_positions(search_firsthalf, [sent1]))[0]) + self.assertTrue(list(tgrep.tgrep_positions(search, [sent1]))[0]) + self.assertTrue(list(tgrep.tgrep_positions(search_rewrite, [sent1]))[0]) + self.assertEqual( + list(tgrep.tgrep_positions(search, [sent1])), + list(tgrep.tgrep_positions(search_rewrite, [sent1])), + ) + self.assertTrue(list(tgrep.tgrep_positions(search_firsthalf, [sent2]))[0]) + self.assertFalse(list(tgrep.tgrep_positions(search, [sent2]))[0]) + self.assertFalse(list(tgrep.tgrep_positions(search_rewrite, [sent2]))[0]) + self.assertEqual( + list(tgrep.tgrep_positions(search, [sent2])), + list(tgrep.tgrep_positions(search_rewrite, [sent2])), + ) + + def test_multiple_conjs(self): + ''' + Test that multiple (3 or more) conjunctions of node relations are + handled properly. + ''' + sent = ParentedTree.fromstring('((A (B b) (C c)) (A (B b) (C c) (D d)))') + # search = '(A < B < C < D)' + # search_tworels = '(A < B < C)' + self.assertEqual( + list(tgrep.tgrep_positions('(A < B < C < D)', [sent])), [[(1,)]] + ) + self.assertEqual( + list(tgrep.tgrep_positions('(A < B < C)', [sent])), [[(0,), (1,)]] + ) + + def test_trailing_semicolon(self): + ''' + Test that semicolons at the end of a tgrep2 search string won't + cause a parse failure. + ''' + tree = ParentedTree.fromstring( + '(S (NP (DT the) (JJ big) (NN dog)) ' '(VP bit) (NP (DT a) (NN cat)))' + ) + self.assertEqual(list(tgrep.tgrep_positions('NN', [tree])), [[(0, 2), (2, 1)]]) + self.assertEqual(list(tgrep.tgrep_positions('NN;', [tree])), [[(0, 2), (2, 1)]]) + self.assertEqual( + list(tgrep.tgrep_positions('NN;;', [tree])), [[(0, 2), (2, 1)]] + ) + + +if __name__ == '__main__': + unittest.main() diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/unit/test_tokenize.py b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/test_tokenize.py new file mode 100644 index 0000000..9acfb96 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/test_tokenize.py @@ -0,0 +1,407 @@ +# -*- coding: utf-8 -*- +""" +Unit tests for nltk.tokenize. +See also nltk/test/tokenize.doctest +""" + +from __future__ import unicode_literals + +import unittest + +from nose import SkipTest +from nose.tools import assert_equal + +from nltk.tokenize import ( + punkt, + word_tokenize, + TweetTokenizer, + StanfordSegmenter, + TreebankWordTokenizer, + SyllableTokenizer, +) + + +class TestTokenize(unittest.TestCase): + def test_tweet_tokenizer(self): + """ + Test TweetTokenizer using words with special and accented characters. + """ + + tokenizer = TweetTokenizer(strip_handles=True, reduce_len=True) + s9 = "@myke: Let's test these words: resumé España München français" + tokens = tokenizer.tokenize(s9) + expected = [ + ':', + "Let's", + 'test', + 'these', + 'words', + ':', + 'resumé', + 'España', + 'München', + 'français', + ] + self.assertEqual(tokens, expected) + + def test_sonority_sequencing_syllable_tokenizer(self): + """ + Test SyllableTokenizer tokenizer. + """ + tokenizer = SyllableTokenizer() + tokens = tokenizer.tokenize('justification') + self.assertEqual(tokens, ['jus', 'ti', 'fi', 'ca', 'tion']) + + def test_stanford_segmenter_arabic(self): + """ + Test the Stanford Word Segmenter for Arabic (default config) + """ + try: + seg = StanfordSegmenter() + seg.default_config('ar') + sent = u'يبحث علم الحاسوب استخدام الحوسبة بجميع اشكالها لحل المشكلات' + segmented_sent = seg.segment(sent.split()) + assert segmented_sent.split() == [ + 'يبحث', + 'علم', + 'الحاسوب', + 'استخدام', + 'الحوسبة', + 'ب', + 'جميع', + 'اشكال', + 'ها', + 'ل', + 'حل', + 'المشكلات', + ] + except LookupError as e: + raise SkipTest(str(e)) + + def test_stanford_segmenter_chinese(self): + """ + Test the Stanford Word Segmenter for Chinese (default config) + """ + try: + seg = StanfordSegmenter() + seg.default_config('zh') + sent = u"这是斯坦福中文分词器测试" + segmented_sent = seg.segment(sent.split()) + assert segmented_sent.split() == ['这', '是', '斯坦福', '中文', '分词器', '测试'] + except LookupError as e: + raise SkipTest(str(e)) + + def test_phone_tokenizer(self): + """ + Test a string that resembles a phone number but contains a newline + """ + + # Should be recognized as a phone number, albeit one with multiple spaces + tokenizer = TweetTokenizer() + test1 = "(393) 928 -3010" + expected = ['(393) 928 -3010'] + result = tokenizer.tokenize(test1) + self.assertEqual(result, expected) + + # Due to newline, first three elements aren't part of a phone number; + # fourth is + test2 = "(393)\n928 -3010" + expected = ['(', '393', ')', "928 -3010"] + result = tokenizer.tokenize(test2) + self.assertEqual(result, expected) + + def test_remove_handle(self): + """ + Test remove_handle() from casual.py with specially crafted edge cases + """ + + tokenizer = TweetTokenizer(strip_handles=True) + + # Simple example. Handles with just numbers should be allowed + test1 = "@twitter hello @twi_tter_. hi @12345 @123news" + expected = ['hello', '.', 'hi'] + result = tokenizer.tokenize(test1) + self.assertEqual(result, expected) + + # Handles are allowed to follow any of the following characters + test2 = "@n`@n~@n(@n)@n-@n=@n+@n\\@n|@n[@n]@n{@n}@n;@n:@n'@n\"@n/@n?@n.@n,@n<@n>@n @n\n@n ñ@n.ü@n.ç@n." + expected = [ + '`', + '~', + '(', + ')', + '-', + '=', + '+', + '\\', + '|', + '[', + ']', + '{', + '}', + ';', + ':', + "'", + '"', + '/', + '?', + '.', + ',', + '<', + '>', + 'ñ', + '.', + 'ü', + '.', + 'ç', + '.', + ] + result = tokenizer.tokenize(test2) + self.assertEqual(result, expected) + + # Handles are NOT allowed to follow any of the following characters + test3 = "a@n j@n z@n A@n L@n Z@n 1@n 4@n 7@n 9@n 0@n _@n !@n @@n #@n $@n %@n &@n *@n" + expected = [ + 'a', + '@n', + 'j', + '@n', + 'z', + '@n', + 'A', + '@n', + 'L', + '@n', + 'Z', + '@n', + '1', + '@n', + '4', + '@n', + '7', + '@n', + '9', + '@n', + '0', + '@n', + '_', + '@n', + '!', + '@n', + '@', + '@n', + '#', + '@n', + '$', + '@n', + '%', + '@n', + '&', + '@n', + '*', + '@n', + ] + result = tokenizer.tokenize(test3) + self.assertEqual(result, expected) + + # Handles are allowed to precede the following characters + test4 = "@n!a @n#a @n$a @n%a @n&a @n*a" + expected = ['!', 'a', '#', 'a', '$', 'a', '%', 'a', '&', 'a', '*', 'a'] + result = tokenizer.tokenize(test4) + self.assertEqual(result, expected) + + # Tests interactions with special symbols and multiple @ + test5 = "@n!@n @n#@n @n$@n @n%@n @n&@n @n*@n @n@n @@n @n@@n @n_@n @n7@n @nj@n" + expected = [ + '!', + '@n', + '#', + '@n', + '$', + '@n', + '%', + '@n', + '&', + '@n', + '*', + '@n', + '@n', + '@n', + '@', + '@n', + '@n', + '@', + '@n', + '@n_', + '@n', + '@n7', + '@n', + '@nj', + '@n', + ] + result = tokenizer.tokenize(test5) + self.assertEqual(result, expected) + + # Tests that handles can have a max length of 20 + test6 = "@abcdefghijklmnopqrstuvwxyz @abcdefghijklmnopqrst1234 @abcdefghijklmnopqrst_ @abcdefghijklmnopqrstendofhandle" + expected = ['uvwxyz', '1234', '_', 'endofhandle'] + result = tokenizer.tokenize(test6) + self.assertEqual(result, expected) + + # Edge case where an @ comes directly after a long handle + test7 = "@abcdefghijklmnopqrstu@abcde @abcdefghijklmnopqrst@abcde @abcdefghijklmnopqrst_@abcde @abcdefghijklmnopqrst5@abcde" + expected = [ + 'u', + '@abcde', + '@abcdefghijklmnopqrst', + '@abcde', + '_', + '@abcde', + '5', + '@abcde', + ] + result = tokenizer.tokenize(test7) + self.assertEqual(result, expected) + + def test_treebank_span_tokenizer(self): + """ + Test TreebankWordTokenizer.span_tokenize function + """ + + tokenizer = TreebankWordTokenizer() + + # Test case in the docstring + test1 = "Good muffins cost $3.88\nin New (York). Please (buy) me\ntwo of them.\n(Thanks)." + expected = [ + (0, 4), + (5, 12), + (13, 17), + (18, 19), + (19, 23), + (24, 26), + (27, 30), + (31, 32), + (32, 36), + (36, 37), + (37, 38), + (40, 46), + (47, 48), + (48, 51), + (51, 52), + (53, 55), + (56, 59), + (60, 62), + (63, 68), + (69, 70), + (70, 76), + (76, 77), + (77, 78), + ] + result = list(tokenizer.span_tokenize(test1)) + self.assertEqual(result, expected) + + # Test case with double quotation + test2 = "The DUP is similar to the \"religious right\" in the United States and takes a hardline stance on social issues" + expected = [ + (0, 3), + (4, 7), + (8, 10), + (11, 18), + (19, 21), + (22, 25), + (26, 27), + (27, 36), + (37, 42), + (42, 43), + (44, 46), + (47, 50), + (51, 57), + (58, 64), + (65, 68), + (69, 74), + (75, 76), + (77, 85), + (86, 92), + (93, 95), + (96, 102), + (103, 109), + ] + result = list(tokenizer.span_tokenize(test2)) + self.assertEqual(result, expected) + + # Test case with double qoutation as well as converted quotations + test3 = "The DUP is similar to the \"religious right\" in the United States and takes a ``hardline'' stance on social issues" + expected = [ + (0, 3), + (4, 7), + (8, 10), + (11, 18), + (19, 21), + (22, 25), + (26, 27), + (27, 36), + (37, 42), + (42, 43), + (44, 46), + (47, 50), + (51, 57), + (58, 64), + (65, 68), + (69, 74), + (75, 76), + (77, 79), + (79, 87), + (87, 89), + (90, 96), + (97, 99), + (100, 106), + (107, 113), + ] + result = list(tokenizer.span_tokenize(test3)) + self.assertEqual(result, expected) + + def test_word_tokenize(self): + """ + Test word_tokenize function + """ + + sentence = "The 'v', I've been fooled but I'll seek revenge." + expected = ['The', "'", 'v', "'", ',', 'I', "'ve", 'been', 'fooled', + 'but', 'I', "'ll", 'seek', 'revenge', '.'] + self.assertEqual(word_tokenize(sentence), expected) + + sentence = "'v' 're'" + expected = ["'", 'v', "'", "'re", "'"] + self.assertEqual(word_tokenize(sentence), expected) + + def test_punkt_pair_iter(self): + + test_cases = [ + ('12', [('1', '2'), ('2', None)]), + ('123', [('1', '2'), ('2', '3'), ('3', None)]), + ('1234', [('1', '2'), ('2', '3'), ('3', '4'), ('4', None)]), + ] + + for (test_input, expected_output) in test_cases: + actual_output = [x for x in punkt._pair_iter(test_input)] + + assert_equal(actual_output, expected_output) + + def test_punkt_pair_iter_handles_stop_iteration_exception(self): + # test input to trigger StopIteration from next() + it = iter([]) + # call method under test and produce a generator + gen = punkt._pair_iter(it) + # unpack generator, ensure that no error is raised + list(gen) + + def test_punkt_tokenize_words_handles_stop_iteration_exception(self): + obj = punkt.PunktBaseClass() + + class TestPunktTokenizeWordsMock: + def word_tokenize(self, s): + return iter([]) + + obj._lang_vars = TestPunktTokenizeWordsMock() + # unpack generator, ensure that no error is raised + list(obj._tokenize_words('test')) diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/unit/test_twitter_auth.py b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/test_twitter_auth.py new file mode 100644 index 0000000..e0189fb --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/test_twitter_auth.py @@ -0,0 +1,181 @@ +# -*- coding: utf-8 -*- +""" +Tests for static parts of Twitter package +""" + +import os +import unittest +from nose import SkipTest + +try: + import twython +except ImportError as e: + raise SkipTest("The twython library has not been installed.") + +from nltk.twitter import Authenticate + + +class TestCredentials(unittest.TestCase): + """ + Tests that Twitter credentials information from file is handled correctly. + """ + + def setUp(self): + self.subdir = os.path.join(os.path.dirname(__file__), 'files') + self.auth = Authenticate() + os.environ['TWITTER'] = 'twitter-files' + + def test_environment(self): + """ + Test that environment variable has been read correctly. + """ + fn = os.path.basename(self.auth.creds_subdir) + self.assertEqual(fn, os.environ['TWITTER']) + + def test_empty_subdir1(self): + """ + Setting subdir to empty path should raise an error. + """ + try: + self.auth.load_creds(subdir='') + # raises ValueError (zero length field name in format) for python 2.6 + # OSError for the rest + except OSError: + pass + except ValueError: + pass + except Exception as e: + self.fail('Unexpected exception thrown: %s' % e) + else: + self.fail('OSError exception not thrown.') + + def test_empty_subdir2(self): + """ + Setting subdir to `None` should raise an error. + """ + self.auth.creds_subdir = None + try: + self.auth.load_creds() + except ValueError: + pass + except Exception as e: + self.fail('Unexpected exception thrown: %s' % e) + else: + self.fail('ValueError exception not thrown.') + + def test_missingdir(self): + """ + Setting subdir to nonexistent directory should raise an error. + """ + try: + self.auth.load_creds(subdir='/nosuchdir') + # raises ValueError (zero length field name in format) for python 2.6 + # OSError for the rest + except OSError: + pass + except ValueError: + pass + except Exception as e: + self.fail('Unexpected exception thrown: %s' % e) + else: + self.fail('OSError exception not thrown.') + + def test_missingfile1(self): + """ + Defaults for authentication will fail since 'credentials.txt' not + present in default subdir, as read from `os.environ['TWITTER']`. + """ + try: + self.auth.load_creds() + # raises ValueError (zero length field name in format) for python 2.6 + # OSError for the rest + except OSError: + pass + except ValueError: + pass + except Exception as e: + self.fail('Unexpected exception thrown: %s' % e) + else: + self.fail('OSError exception not thrown.') + + def test_missingfile2(self): + """ + Credentials file 'foobar' cannot be found in default subdir. + """ + try: + self.auth.load_creds(creds_file='foobar') + # raises ValueError (zero length field name in format) for python 2.6 + # OSError for the rest + except OSError: + pass + except ValueError: + pass + except Exception as e: + self.fail('Unexpected exception thrown: %s' % e) + else: + self.fail('OSError exception not thrown.') + + def test_incomplete_file(self): + """ + Credentials file 'bad_oauth1-1.txt' is incomplete + """ + try: + self.auth.load_creds(creds_file='bad_oauth1-1.txt', subdir=self.subdir) + except ValueError: + pass + except Exception as e: + self.fail('Unexpected exception thrown: %s' % e) + else: + self.fail('ValueError exception not thrown.') + + def test_malformed_file1(self): + """ + First key in credentials file 'bad_oauth1-2.txt' is ill-formed + """ + try: + self.auth.load_creds(creds_file='bad_oauth1-2.txt', subdir=self.subdir) + except ValueError: + pass + except Exception as e: + self.fail('Unexpected exception thrown: %s' % e) + else: + self.fail('ValueError exception not thrown.') + + def test_malformed_file2(self): + """ + First key in credentials file 'bad_oauth1-2.txt' is ill-formed + """ + try: + self.auth.load_creds(creds_file='bad_oauth1-3.txt', subdir=self.subdir) + except ValueError: + pass + except Exception as e: + self.fail('Unexpected exception thrown: %s' % e) + else: + self.fail('ValueError exception not thrown.') + + def test_correct_path(self): + """ + Path to default credentials file is well-formed, given specified + subdir. + """ + self.auth.load_creds(subdir=self.subdir) + self.auth.creds_fullpath = os.path.join(self.subdir, self.auth.creds_file) + + def test_correct_file1(self): + """ + Default credentials file is identified + """ + self.auth.load_creds(subdir=self.subdir) + self.assertEqual(self.auth.creds_file, 'credentials.txt') + + def test_correct_file2(self): + """ + Default credentials file has been read correctluy + """ + oauth = self.auth.load_creds(subdir=self.subdir) + self.assertEqual(oauth['app_key'], 'a') + + +if __name__ == '__main__': + unittest.main() diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/unit/test_wordnet.py b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/test_wordnet.py new file mode 100644 index 0000000..a7b26ac --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/test_wordnet.py @@ -0,0 +1,221 @@ +# -*- coding: utf-8 -*- +""" +Unit tests for nltk.corpus.wordnet +See also nltk/test/wordnet.doctest +""" + +from __future__ import unicode_literals + +import collections +import os +import unittest + +from nose import SkipTest + +from nltk.corpus.reader.wordnet import WordNetCorpusReader +from nltk.corpus import wordnet as wn +from nltk.corpus import wordnet_ic as wnic +from nltk.data import find as find_data + + +wn.ensure_loaded() +S = wn.synset +L = wn.lemma + + +class WordnNetDemo(unittest.TestCase): + def test_retrieve_synset(self): + move_synset = S('go.v.21') + self.assertEqual(move_synset.name(), "move.v.15") + self.assertEqual(move_synset.lemma_names(), ['move', 'go']) + self.assertEqual( + move_synset.definition(), "have a turn; make one's move in a game" + ) + self.assertEqual(move_synset.examples(), ['Can I go now?']) + + def test_retrieve_synsets(self): + self.assertEqual(sorted(wn.synsets('zap', pos='n')), [S('zap.n.01')]) + self.assertEqual( + sorted(wn.synsets('zap', pos='v')), + [S('microwave.v.01'), S('nuke.v.01'), S('zap.v.01'), S('zap.v.02')], + ) + + def test_hyperhyponyms(self): + # Not every synset as hypernyms() + self.assertEqual(S('travel.v.01').hypernyms(), []) + self.assertEqual(S('travel.v.02').hypernyms(), [S('travel.v.03')]) + self.assertEqual(S('travel.v.03').hypernyms(), []) + + # Test hyper-/hyponyms. + self.assertEqual(S('breakfast.n.1').hypernyms(), [S('meal.n.01')]) + first_five_meal_hypo = [ + S('banquet.n.02'), + S('bite.n.04'), + S('breakfast.n.01'), + S('brunch.n.01'), + S('buffet.n.02'), + ] + self.assertEqual(sorted(S('meal.n.1').hyponyms()[:5]), first_five_meal_hypo) + self.assertEqual(S('Austen.n.1').instance_hypernyms(), [S('writer.n.01')]) + first_five_composer_hypo = [ + S('ambrose.n.01'), + S('bach.n.01'), + S('barber.n.01'), + S('bartok.n.01'), + S('beethoven.n.01'), + ] + self.assertEqual( + S('composer.n.1').instance_hyponyms()[:5], first_five_composer_hypo + ) + + # Test root hyper-/hyponyms + self.assertEqual(S('person.n.01').root_hypernyms(), [S('entity.n.01')]) + self.assertEqual(S('sail.v.01').root_hypernyms(), [S('travel.v.01')]) + self.assertEqual( + S('fall.v.12').root_hypernyms(), [S('act.v.01'), S('fall.v.17')] + ) + + def test_derivationally_related_forms(self): + # Test `derivationally_related_forms()` + self.assertEqual( + L('zap.v.03.nuke').derivationally_related_forms(), + [L('atomic_warhead.n.01.nuke')], + ) + self.assertEqual( + L('zap.v.03.atomize').derivationally_related_forms(), + [L('atomization.n.02.atomization')], + ) + self.assertEqual( + L('zap.v.03.atomise').derivationally_related_forms(), + [L('atomization.n.02.atomisation')], + ) + self.assertEqual(L('zap.v.03.zap').derivationally_related_forms(), []) + + def test_meronyms_holonyms(self): + # Test meronyms, holonyms. + self.assertEqual( + S('dog.n.01').member_holonyms(), [S('canis.n.01'), S('pack.n.06')] + ) + self.assertEqual(S('dog.n.01').part_meronyms(), [S('flag.n.07')]) + + self.assertEqual(S('faculty.n.2').member_meronyms(), [S('professor.n.01')]) + self.assertEqual(S('copilot.n.1').member_holonyms(), [S('crew.n.01')]) + + self.assertEqual( + S('table.n.2').part_meronyms(), + [S('leg.n.03'), S('tabletop.n.01'), S('tableware.n.01')], + ) + self.assertEqual(S('course.n.7').part_holonyms(), [S('meal.n.01')]) + + self.assertEqual( + S('water.n.1').substance_meronyms(), [S('hydrogen.n.01'), S('oxygen.n.01')] + ) + self.assertEqual( + S('gin.n.1').substance_holonyms(), + [ + S('gin_and_it.n.01'), + S('gin_and_tonic.n.01'), + S('martini.n.01'), + S('pink_lady.n.01'), + ], + ) + + def test_antonyms(self): + # Test antonyms. + self.assertEqual( + L('leader.n.1.leader').antonyms(), [L('follower.n.01.follower')] + ) + self.assertEqual( + L('increase.v.1.increase').antonyms(), [L('decrease.v.01.decrease')] + ) + + def test_misc_relations(self): + # Test misc relations. + self.assertEqual(S('snore.v.1').entailments(), [S('sleep.v.01')]) + self.assertEqual( + S('heavy.a.1').similar_tos(), + [ + S('dense.s.03'), + S('doughy.s.01'), + S('heavier-than-air.s.01'), + S('hefty.s.02'), + S('massive.s.04'), + S('non-buoyant.s.01'), + S('ponderous.s.02'), + ], + ) + self.assertEqual(S('light.a.1').attributes(), [S('weight.n.01')]) + self.assertEqual(S('heavy.a.1').attributes(), [S('weight.n.01')]) + + # Test pertainyms. + self.assertEqual( + L('English.a.1.English').pertainyms(), [L('england.n.01.England')] + ) + + def test_lch(self): + # Test LCH. + self.assertEqual( + S('person.n.01').lowest_common_hypernyms(S('dog.n.01')), + [S('organism.n.01')], + ) + self.assertEqual( + S('woman.n.01').lowest_common_hypernyms(S('girlfriend.n.02')), + [S('woman.n.01')], + ) + + def test_domains(self): + # Test domains. + self.assertEqual(S('code.n.03').topic_domains(), [S('computer_science.n.01')]) + self.assertEqual(S('pukka.a.01').region_domains(), [S('india.n.01')]) + self.assertEqual(S('freaky.a.01').usage_domains(), [S('slang.n.02')]) + + def test_in_topic_domains(self): + # Test in domains. + self.assertEqual( + S('computer_science.n.01').in_topic_domains()[0], S('access.n.05') + ) + self.assertEqual(S('germany.n.01').in_region_domains()[23], S('trillion.n.02')) + self.assertEqual(S('slang.n.02').in_usage_domains()[1], S('airhead.n.01')) + + def test_wordnet_similarities(self): + # Path based similarities. + self.assertAlmostEqual(S('cat.n.01').path_similarity(S('cat.n.01')), 1.0) + self.assertAlmostEqual(S('dog.n.01').path_similarity(S('cat.n.01')), 0.2) + self.assertAlmostEqual( + S('dog.n.01').lch_similarity(S('cat.n.01')), 2.028, places=3 + ) + self.assertAlmostEqual( + S('dog.n.01').wup_similarity(S('cat.n.01')), 0.8571, places=3 + ) + # Information Content similarities. + brown_ic = wnic.ic('ic-brown.dat') + self.assertAlmostEqual( + S('dog.n.01').jcn_similarity(S('cat.n.01'), brown_ic), 0.4497, places=3 + ) + semcor_ic = wnic.ic('ic-semcor.dat') + self.assertAlmostEqual( + S('dog.n.01').lin_similarity(S('cat.n.01'), semcor_ic), 0.8863, places=3 + ) + + def test_omw_lemma_no_trailing_underscore(self): + expected = [ + u'popolna_sprememba_v_mišljenju', + u'popoln_obrat', + u'preobrat', + u'preobrat_v_mišljenju' + ] + self.assertEqual(S('about-face.n.02').lemma_names(lang='slv'), expected) + + def test_iterable_type_for_all_lemma_names(self): + # Duck-test for iterables. + # See https://stackoverflow.com/a/36230057/610569 + cat_lemmas = wn.all_lemma_names(lang='cat') + eng_lemmas = wn.all_lemma_names(lang='eng') + + self.assertTrue(hasattr(eng_lemmas, '__iter__')) + self.assertTrue(hasattr(eng_lemmas, '__next__') or hasattr(eng_lemmas, 'next')) + self.assertTrue(eng_lemmas.__iter__() is eng_lemmas) + + self.assertTrue(hasattr(cat_lemmas, '__iter__')) + self.assertTrue(hasattr(cat_lemmas, '__next__') or hasattr(eng_lemmas, 'next')) + self.assertTrue(cat_lemmas.__iter__() is cat_lemmas) diff --git a/venv/lib/python3.7/site-packages/numpy/f2py/tests/__init__.py b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/translate/__init__.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/f2py/tests/__init__.py rename to venv.bak/lib/python3.7/site-packages/nltk/test/unit/translate/__init__.py diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/unit/translate/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/translate/__pycache__/__init__.cpython-37.pyc new file mode 100644 index 0000000..a8797b3 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/translate/__pycache__/__init__.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/unit/translate/__pycache__/test_bleu.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/translate/__pycache__/test_bleu.cpython-37.pyc new file mode 100644 index 0000000..9f16f57 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/translate/__pycache__/test_bleu.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/unit/translate/__pycache__/test_gdfa.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/translate/__pycache__/test_gdfa.cpython-37.pyc new file mode 100644 index 0000000..743f425 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/translate/__pycache__/test_gdfa.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/unit/translate/__pycache__/test_ibm1.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/translate/__pycache__/test_ibm1.cpython-37.pyc new file mode 100644 index 0000000..8f2bfff Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/translate/__pycache__/test_ibm1.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/unit/translate/__pycache__/test_ibm2.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/translate/__pycache__/test_ibm2.cpython-37.pyc new file mode 100644 index 0000000..19ea390 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/translate/__pycache__/test_ibm2.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/unit/translate/__pycache__/test_ibm3.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/translate/__pycache__/test_ibm3.cpython-37.pyc new file mode 100644 index 0000000..5649047 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/translate/__pycache__/test_ibm3.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/unit/translate/__pycache__/test_ibm4.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/translate/__pycache__/test_ibm4.cpython-37.pyc new file mode 100644 index 0000000..bab4368 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/translate/__pycache__/test_ibm4.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/unit/translate/__pycache__/test_ibm5.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/translate/__pycache__/test_ibm5.cpython-37.pyc new file mode 100644 index 0000000..58c20db Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/translate/__pycache__/test_ibm5.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/unit/translate/__pycache__/test_ibm_model.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/translate/__pycache__/test_ibm_model.cpython-37.pyc new file mode 100644 index 0000000..e1b65dd Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/translate/__pycache__/test_ibm_model.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/unit/translate/__pycache__/test_nist.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/translate/__pycache__/test_nist.cpython-37.pyc new file mode 100644 index 0000000..5899468 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/translate/__pycache__/test_nist.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/unit/translate/__pycache__/test_stack_decoder.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/translate/__pycache__/test_stack_decoder.cpython-37.pyc new file mode 100644 index 0000000..77c6bec Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/translate/__pycache__/test_stack_decoder.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/unit/translate/test_bleu.py b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/translate/test_bleu.py new file mode 100644 index 0000000..a97d4de --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/translate/test_bleu.py @@ -0,0 +1,271 @@ +# -*- coding: utf-8 -*- +""" +Tests for BLEU translation evaluation metric +""" + +import functools +import io +import unittest + +from nltk.data import find +from nltk.translate.bleu_score import ( + modified_precision, + brevity_penalty, + closest_ref_length, +) +from nltk.translate.bleu_score import sentence_bleu, corpus_bleu, SmoothingFunction + + +class TestBLEU(unittest.TestCase): + def test_modified_precision(self): + """ + Examples from the original BLEU paper + http://www.aclweb.org/anthology/P02-1040.pdf + """ + # Example 1: the "the*" example. + # Reference sentences. + ref1 = 'the cat is on the mat'.split() + ref2 = 'there is a cat on the mat'.split() + # Hypothesis sentence(s). + hyp1 = 'the the the the the the the'.split() + + references = [ref1, ref2] + + # Testing modified unigram precision. + hyp1_unigram_precision = float(modified_precision(references, hyp1, n=1)) + assert round(hyp1_unigram_precision, 4) == 0.2857 + # With assertAlmostEqual at 4 place precision. + self.assertAlmostEqual(hyp1_unigram_precision, 0.28571428, places=4) + + # Testing modified bigram precision. + assert float(modified_precision(references, hyp1, n=2)) == 0.0 + + # Example 2: the "of the" example. + # Reference sentences + ref1 = str( + 'It is a guide to action that ensures that the military ' + 'will forever heed Party commands' + ).split() + ref2 = str( + 'It is the guiding principle which guarantees the military ' + 'forces always being under the command of the Party' + ).split() + ref3 = str( + 'It is the practical guide for the army always to heed ' + 'the directions of the party' + ).split() + # Hypothesis sentence(s). + hyp1 = 'of the'.split() + + references = [ref1, ref2, ref3] + # Testing modified unigram precision. + assert float(modified_precision(references, hyp1, n=1)) == 1.0 + + # Testing modified bigram precision. + assert float(modified_precision(references, hyp1, n=2)) == 1.0 + + # Example 3: Proper MT outputs. + hyp1 = str( + 'It is a guide to action which ensures that the military ' + 'always obeys the commands of the party' + ).split() + hyp2 = str( + 'It is to insure the troops forever hearing the activity ' + 'guidebook that party direct' + ).split() + + references = [ref1, ref2, ref3] + + # Unigram precision. + hyp1_unigram_precision = float(modified_precision(references, hyp1, n=1)) + hyp2_unigram_precision = float(modified_precision(references, hyp2, n=1)) + # Test unigram precision with assertAlmostEqual at 4 place precision. + self.assertAlmostEqual(hyp1_unigram_precision, 0.94444444, places=4) + self.assertAlmostEqual(hyp2_unigram_precision, 0.57142857, places=4) + # Test unigram precision with rounding. + assert round(hyp1_unigram_precision, 4) == 0.9444 + assert round(hyp2_unigram_precision, 4) == 0.5714 + + # Bigram precision + hyp1_bigram_precision = float(modified_precision(references, hyp1, n=2)) + hyp2_bigram_precision = float(modified_precision(references, hyp2, n=2)) + # Test bigram precision with assertAlmostEqual at 4 place precision. + self.assertAlmostEqual(hyp1_bigram_precision, 0.58823529, places=4) + self.assertAlmostEqual(hyp2_bigram_precision, 0.07692307, places=4) + # Test bigram precision with rounding. + assert round(hyp1_bigram_precision, 4) == 0.5882 + assert round(hyp2_bigram_precision, 4) == 0.0769 + + def test_brevity_penalty(self): + # Test case from brevity_penalty_closest function in mteval-v13a.pl. + # Same test cases as in the doctest in nltk.translate.bleu_score.py + references = [['a'] * 11, ['a'] * 8] + hypothesis = ['a'] * 7 + hyp_len = len(hypothesis) + closest_ref_len = closest_ref_length(references, hyp_len) + self.assertAlmostEqual( + brevity_penalty(closest_ref_len, hyp_len), 0.8669, places=4 + ) + + references = [['a'] * 11, ['a'] * 8, ['a'] * 6, ['a'] * 7] + hypothesis = ['a'] * 7 + hyp_len = len(hypothesis) + closest_ref_len = closest_ref_length(references, hyp_len) + assert brevity_penalty(closest_ref_len, hyp_len) == 1.0 + + def test_zero_matches(self): + # Test case where there's 0 matches + references = ['The candidate has no alignment to any of the references'.split()] + hypothesis = 'John loves Mary'.split() + + # Test BLEU to nth order of n-grams, where n is len(hypothesis). + for n in range(1, len(hypothesis)): + weights = [1.0 / n] * n # Uniform weights. + assert sentence_bleu(references, hypothesis, weights) == 0 + + def test_full_matches(self): + # Test case where there's 100% matches + references = ['John loves Mary'.split()] + hypothesis = 'John loves Mary'.split() + + # Test BLEU to nth order of n-grams, where n is len(hypothesis). + for n in range(1, len(hypothesis)): + weights = [1.0 / n] * n # Uniform weights. + assert sentence_bleu(references, hypothesis, weights) == 1.0 + + def test_partial_matches_hypothesis_longer_than_reference(self): + references = ['John loves Mary'.split()] + hypothesis = 'John loves Mary who loves Mike'.split() + # Since no 4-grams matches were found the result should be zero + # exp(w_1 * 1 * w_2 * 1 * w_3 * 1 * w_4 * -inf) = 0 + self.assertAlmostEqual(sentence_bleu(references, hypothesis), 0.0, places=4) + # Checks that the warning has been raised because len(reference) < 4. + try: + self.assertWarns(UserWarning, sentence_bleu, references, hypothesis) + except AttributeError: + pass # unittest.TestCase.assertWarns is only supported in Python >= 3.2. + + +# @unittest.skip("Skipping fringe cases for BLEU.") +class TestBLEUFringeCases(unittest.TestCase): + def test_case_where_n_is_bigger_than_hypothesis_length(self): + # Test BLEU to nth order of n-grams, where n > len(hypothesis). + references = ['John loves Mary ?'.split()] + hypothesis = 'John loves Mary'.split() + n = len(hypothesis) + 1 # + weights = [1.0 / n] * n # Uniform weights. + # Since no n-grams matches were found the result should be zero + # exp(w_1 * 1 * w_2 * 1 * w_3 * 1 * w_4 * -inf) = 0 + self.assertAlmostEqual( + sentence_bleu(references, hypothesis, weights), 0.0, places=4 + ) + # Checks that the warning has been raised because len(hypothesis) < 4. + try: + self.assertWarns(UserWarning, sentence_bleu, references, hypothesis) + except AttributeError: + pass # unittest.TestCase.assertWarns is only supported in Python >= 3.2. + + # Test case where n > len(hypothesis) but so is n > len(reference), and + # it's a special case where reference == hypothesis. + references = ['John loves Mary'.split()] + hypothesis = 'John loves Mary'.split() + # Since no 4-grams matches were found the result should be zero + # exp(w_1 * 1 * w_2 * 1 * w_3 * 1 * w_4 * -inf) = 0 + self.assertAlmostEqual( + sentence_bleu(references, hypothesis, weights), 0.0, places=4 + ) + + def test_empty_hypothesis(self): + # Test case where there's hypothesis is empty. + references = ['The candidate has no alignment to any of the references'.split()] + hypothesis = [] + assert sentence_bleu(references, hypothesis) == 0 + + def test_empty_references(self): + # Test case where there's reference is empty. + references = [[]] + hypothesis = 'John loves Mary'.split() + assert sentence_bleu(references, hypothesis) == 0 + + def test_empty_references_and_hypothesis(self): + # Test case where both references and hypothesis is empty. + references = [[]] + hypothesis = [] + assert sentence_bleu(references, hypothesis) == 0 + + def test_reference_or_hypothesis_shorter_than_fourgrams(self): + # Tese case where the length of reference or hypothesis + # is shorter than 4. + references = ['let it go'.split()] + hypothesis = 'let go it'.split() + # Checks that the value the hypothesis and reference returns is 0.0 + # exp(w_1 * 1 * w_2 * 1 * w_3 * 1 * w_4 * -inf) = 0 + self.assertAlmostEqual(sentence_bleu(references, hypothesis), 0.0, places=4) + # Checks that the warning has been raised. + try: + self.assertWarns(UserWarning, sentence_bleu, references, hypothesis) + except AttributeError: + pass # unittest.TestCase.assertWarns is only supported in Python >= 3.2. + + +class TestBLEUvsMteval13a(unittest.TestCase): + def test_corpus_bleu(self): + ref_file = find('models/wmt15_eval/ref.ru') + hyp_file = find('models/wmt15_eval/google.ru') + mteval_output_file = find('models/wmt15_eval/mteval-13a.output') + + # Reads the BLEU scores from the `mteval-13a.output` file. + # The order of the list corresponds to the order of the ngrams. + with open(mteval_output_file, 'r') as mteval_fin: + # The numbers are located in the last 2nd line of the file. + # The first and 2nd item in the list are the score and system names. + mteval_bleu_scores = map(float, mteval_fin.readlines()[-2].split()[1:-1]) + + with io.open(ref_file, 'r', encoding='utf8') as ref_fin: + with io.open(hyp_file, 'r', encoding='utf8') as hyp_fin: + # Whitespace tokenize the file. + # Note: split() automatically strip(). + hypothesis = list(map(lambda x: x.split(), hyp_fin)) + # Note that the corpus_bleu input is list of list of references. + references = list(map(lambda x: [x.split()], ref_fin)) + # Without smoothing. + for i, mteval_bleu in zip(range(1, 10), mteval_bleu_scores): + nltk_bleu = corpus_bleu( + references, hypothesis, weights=(1.0 / i,) * i + ) + # Check that the BLEU scores difference is less than 0.005 . + # Note: This is an approximate comparison; as much as + # +/- 0.01 BLEU might be "statistically significant", + # the actual translation quality might not be. + assert abs(mteval_bleu - nltk_bleu) < 0.005 + + # With the same smoothing method used in mteval-v13a.pl + chencherry = SmoothingFunction() + for i, mteval_bleu in zip(range(1, 10), mteval_bleu_scores): + nltk_bleu = corpus_bleu( + references, + hypothesis, + weights=(1.0 / i,) * i, + smoothing_function=chencherry.method3, + ) + assert abs(mteval_bleu - nltk_bleu) < 0.005 + + +class TestBLEUWithBadSentence(unittest.TestCase): + def test_corpus_bleu_with_bad_sentence(self): + hyp = "Teo S yb , oe uNb , R , T t , , t Tue Ar saln S , , 5istsi l , 5oe R ulO sae oR R" + ref = str( + "Their tasks include changing a pump on the faulty stokehold ." + "Likewise , two species that are very similar in morphology " + "were distinguished using genetics ." + ) + references = [[ref.split()]] + hypotheses = [hyp.split()] + try: # Check that the warning is raised since no. of 2-grams < 0. + with self.assertWarns(UserWarning): + # Verify that the BLEU output is undesired since no. of 2-grams < 0. + self.assertAlmostEqual( + corpus_bleu(references, hypotheses), 0.0, places=4 + ) + except AttributeError: # unittest.TestCase.assertWarns is only supported in Python >= 3.2. + self.assertAlmostEqual(corpus_bleu(references, hypotheses), 0.0, places=4) diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/unit/translate/test_gdfa.py b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/translate/test_gdfa.py new file mode 100644 index 0000000..58db482 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/translate/test_gdfa.py @@ -0,0 +1,157 @@ +# -*- coding: utf-8 -*- +""" +Tests GDFA alignments +""" + +import functools +import io +import unittest + +from nltk.translate.gdfa import grow_diag_final_and + + +class TestGDFA(unittest.TestCase): + def test_from_eflomal_outputs(self): + """ + Testing GDFA with first 10 eflomal outputs from issue #1829 + https://github.com/nltk/nltk/issues/1829 + """ + # Input. + forwards = [ + '0-0 1-2', + '0-0 1-1', + '0-0 2-1 3-2 4-3 5-4 6-5 7-6 8-7 7-8 9-9 10-10 9-11 11-12 12-13 13-14', + '0-0 1-1 1-2 2-3 3-4 4-5 4-6 5-7 6-8 8-9 9-10', + '0-0 14-1 15-2 16-3 20-5 21-6 22-7 5-8 6-9 7-10 8-11 9-12 10-13 11-14 12-15 13-16 14-17 17-18 18-19 19-20 20-21 23-22 24-23 25-24 26-25 27-27 28-28 29-29 30-30 31-31', + '0-0 1-1 0-2 2-3', + '0-0 2-2 4-4', + '0-0 1-1 2-3 3-4 5-5 7-6 8-7 9-8 10-9 11-10 12-11 13-12 14-13 15-14 16-16 17-17 18-18 19-19 20-20', + '3-0 4-1 6-2 5-3 6-4 7-5 8-6 9-7 10-8 11-9 16-10 9-12 10-13 12-14', + '1-0', + ] + backwards = [ + '0-0 1-2', + '0-0 1-1', + '0-0 2-1 3-2 4-3 5-4 6-5 7-6 8-7 9-8 10-10 11-12 12-11 13-13', + '0-0 1-2 2-3 3-4 4-6 6-8 7-5 8-7 9-8', + '0-0 1-8 2-9 3-10 4-11 5-12 6-11 8-13 9-14 10-15 11-16 12-17 13-18 14-19 15-20 16-21 17-22 18-23 19-24 20-29 21-30 22-31 23-2 24-3 25-4 26-5 27-5 28-6 29-7 30-28 31-31', + '0-0 1-1 2-3', + '0-0 1-1 2-3 4-4', + '0-0 1-1 2-3 3-4 5-5 7-6 8-7 9-8 10-9 11-10 12-11 13-12 14-13 15-14 16-16 17-17 18-18 19-19 20-16 21-18', + '0-0 1-1 3-2 4-1 5-3 6-4 7-5 8-6 9-7 10-8 11-9 12-8 13-9 14-8 15-9 16-10', + '1-0', + ] + source_lens = [2, 3, 3, 15, 11, 33, 4, 6, 23, 18] + target_lens = [2, 4, 3, 16, 12, 33, 5, 6, 22, 16] + # Expected Output. + expected = [ + [(0, 0), (1, 2)], + [(0, 0), (1, 1)], + [ + (0, 0), + (2, 1), + (3, 2), + (4, 3), + (5, 4), + (6, 5), + (7, 6), + (8, 7), + (10, 10), + (11, 12), + ], + [ + (0, 0), + (1, 1), + (1, 2), + (2, 3), + (3, 4), + (4, 5), + (4, 6), + (5, 7), + (6, 8), + (7, 5), + (8, 7), + (8, 9), + (9, 8), + (9, 10), + ], + [ + (0, 0), + (1, 8), + (2, 9), + (3, 10), + (4, 11), + (5, 8), + (6, 9), + (6, 11), + (7, 10), + (8, 11), + (31, 31), + ], + [(0, 0), (0, 2), (1, 1), (2, 3)], + [(0, 0), (1, 1), (2, 2), (2, 3), (4, 4)], + [ + (0, 0), + (1, 1), + (2, 3), + (3, 4), + (5, 5), + (7, 6), + (8, 7), + (9, 8), + (10, 9), + (11, 10), + (12, 11), + (13, 12), + (14, 13), + (15, 14), + (16, 16), + (17, 17), + (18, 18), + (19, 19), + ], + [ + (0, 0), + (1, 1), + (3, 0), + (3, 2), + (4, 1), + (5, 3), + (6, 2), + (6, 4), + (7, 5), + (8, 6), + (9, 7), + (9, 12), + (10, 8), + (10, 13), + (11, 9), + (12, 8), + (12, 14), + (13, 9), + (14, 8), + (15, 9), + (16, 10), + ], + [(1, 0)], + [ + (0, 0), + (1, 1), + (3, 2), + (4, 3), + (5, 4), + (6, 5), + (7, 6), + (9, 10), + (10, 12), + (11, 13), + (12, 14), + (13, 15), + ], + ] + + # Iterate through all 10 examples and check for expected outputs. + for fw, bw, src_len, trg_len, expect in zip( + forwards, backwards, source_lens, target_lens, expected + ): + self.assertListEqual(expect, grow_diag_final_and(src_len, trg_len, fw, bw)) diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/unit/translate/test_ibm1.py b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/translate/test_ibm1.py new file mode 100644 index 0000000..ae8c941 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/translate/test_ibm1.py @@ -0,0 +1,76 @@ +# -*- coding: utf-8 -*- +""" +Tests for IBM Model 1 training methods +""" + +import unittest + +from collections import defaultdict +from nltk.translate import AlignedSent +from nltk.translate import IBMModel +from nltk.translate import IBMModel1 +from nltk.translate.ibm_model import AlignmentInfo + + +class TestIBMModel1(unittest.TestCase): + def test_set_uniform_translation_probabilities(self): + # arrange + corpus = [ + AlignedSent(['ham', 'eggs'], ['schinken', 'schinken', 'eier']), + AlignedSent(['spam', 'spam', 'spam', 'spam'], ['spam', 'spam']), + ] + model1 = IBMModel1(corpus, 0) + + # act + model1.set_uniform_probabilities(corpus) + + # assert + # expected_prob = 1.0 / (target vocab size + 1) + self.assertEqual(model1.translation_table['ham']['eier'], 1.0 / 3) + self.assertEqual(model1.translation_table['eggs'][None], 1.0 / 3) + + def test_set_uniform_translation_probabilities_of_non_domain_values(self): + # arrange + corpus = [ + AlignedSent(['ham', 'eggs'], ['schinken', 'schinken', 'eier']), + AlignedSent(['spam', 'spam', 'spam', 'spam'], ['spam', 'spam']), + ] + model1 = IBMModel1(corpus, 0) + + # act + model1.set_uniform_probabilities(corpus) + + # assert + # examine target words that are not in the training data domain + self.assertEqual(model1.translation_table['parrot']['eier'], IBMModel.MIN_PROB) + + def test_prob_t_a_given_s(self): + # arrange + src_sentence = ["ich", 'esse', 'ja', 'gern', 'räucherschinken'] + trg_sentence = ['i', 'love', 'to', 'eat', 'smoked', 'ham'] + corpus = [AlignedSent(trg_sentence, src_sentence)] + alignment_info = AlignmentInfo( + (0, 1, 4, 0, 2, 5, 5), + [None] + src_sentence, + ['UNUSED'] + trg_sentence, + None, + ) + + translation_table = defaultdict(lambda: defaultdict(float)) + translation_table['i']['ich'] = 0.98 + translation_table['love']['gern'] = 0.98 + translation_table['to'][None] = 0.98 + translation_table['eat']['esse'] = 0.98 + translation_table['smoked']['räucherschinken'] = 0.98 + translation_table['ham']['räucherschinken'] = 0.98 + + model1 = IBMModel1(corpus, 0) + model1.translation_table = translation_table + + # act + probability = model1.prob_t_a_given_s(alignment_info) + + # assert + lexical_translation = 0.98 * 0.98 * 0.98 * 0.98 * 0.98 * 0.98 + expected_probability = lexical_translation + self.assertEqual(round(probability, 4), round(expected_probability, 4)) diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/unit/translate/test_ibm2.py b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/translate/test_ibm2.py new file mode 100644 index 0000000..1d0579b --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/translate/test_ibm2.py @@ -0,0 +1,89 @@ +# -*- coding: utf-8 -*- +""" +Tests for IBM Model 2 training methods +""" + +import unittest + +from collections import defaultdict +from nltk.translate import AlignedSent +from nltk.translate import IBMModel +from nltk.translate import IBMModel2 +from nltk.translate.ibm_model import AlignmentInfo + + +class TestIBMModel2(unittest.TestCase): + def test_set_uniform_alignment_probabilities(self): + # arrange + corpus = [ + AlignedSent(['ham', 'eggs'], ['schinken', 'schinken', 'eier']), + AlignedSent(['spam', 'spam', 'spam', 'spam'], ['spam', 'spam']), + ] + model2 = IBMModel2(corpus, 0) + + # act + model2.set_uniform_probabilities(corpus) + + # assert + # expected_prob = 1.0 / (length of source sentence + 1) + self.assertEqual(model2.alignment_table[0][1][3][2], 1.0 / 4) + self.assertEqual(model2.alignment_table[2][4][2][4], 1.0 / 3) + + def test_set_uniform_alignment_probabilities_of_non_domain_values(self): + # arrange + corpus = [ + AlignedSent(['ham', 'eggs'], ['schinken', 'schinken', 'eier']), + AlignedSent(['spam', 'spam', 'spam', 'spam'], ['spam', 'spam']), + ] + model2 = IBMModel2(corpus, 0) + + # act + model2.set_uniform_probabilities(corpus) + + # assert + # examine i and j values that are not in the training data domain + self.assertEqual(model2.alignment_table[99][1][3][2], IBMModel.MIN_PROB) + self.assertEqual(model2.alignment_table[2][99][2][4], IBMModel.MIN_PROB) + + def test_prob_t_a_given_s(self): + # arrange + src_sentence = ["ich", 'esse', 'ja', 'gern', 'räucherschinken'] + trg_sentence = ['i', 'love', 'to', 'eat', 'smoked', 'ham'] + corpus = [AlignedSent(trg_sentence, src_sentence)] + alignment_info = AlignmentInfo( + (0, 1, 4, 0, 2, 5, 5), + [None] + src_sentence, + ['UNUSED'] + trg_sentence, + None, + ) + + translation_table = defaultdict(lambda: defaultdict(float)) + translation_table['i']['ich'] = 0.98 + translation_table['love']['gern'] = 0.98 + translation_table['to'][None] = 0.98 + translation_table['eat']['esse'] = 0.98 + translation_table['smoked']['räucherschinken'] = 0.98 + translation_table['ham']['räucherschinken'] = 0.98 + + alignment_table = defaultdict( + lambda: defaultdict(lambda: defaultdict(lambda: defaultdict(float))) + ) + alignment_table[0][3][5][6] = 0.97 # None -> to + alignment_table[1][1][5][6] = 0.97 # ich -> i + alignment_table[2][4][5][6] = 0.97 # esse -> eat + alignment_table[4][2][5][6] = 0.97 # gern -> love + alignment_table[5][5][5][6] = 0.96 # räucherschinken -> smoked + alignment_table[5][6][5][6] = 0.96 # räucherschinken -> ham + + model2 = IBMModel2(corpus, 0) + model2.translation_table = translation_table + model2.alignment_table = alignment_table + + # act + probability = model2.prob_t_a_given_s(alignment_info) + + # assert + lexical_translation = 0.98 * 0.98 * 0.98 * 0.98 * 0.98 * 0.98 + alignment = 0.97 * 0.97 * 0.97 * 0.97 * 0.96 * 0.96 + expected_probability = lexical_translation * alignment + self.assertEqual(round(probability, 4), round(expected_probability, 4)) diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/unit/translate/test_ibm3.py b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/translate/test_ibm3.py new file mode 100644 index 0000000..7c42404 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/translate/test_ibm3.py @@ -0,0 +1,108 @@ +# -*- coding: utf-8 -*- +""" +Tests for IBM Model 3 training methods +""" + +import unittest + +from collections import defaultdict +from nltk.translate import AlignedSent +from nltk.translate import IBMModel +from nltk.translate import IBMModel3 +from nltk.translate.ibm_model import AlignmentInfo + + +class TestIBMModel3(unittest.TestCase): + def test_set_uniform_distortion_probabilities(self): + # arrange + corpus = [ + AlignedSent(['ham', 'eggs'], ['schinken', 'schinken', 'eier']), + AlignedSent(['spam', 'spam', 'spam', 'spam'], ['spam', 'spam']), + ] + model3 = IBMModel3(corpus, 0) + + # act + model3.set_uniform_probabilities(corpus) + + # assert + # expected_prob = 1.0 / length of target sentence + self.assertEqual(model3.distortion_table[1][0][3][2], 1.0 / 2) + self.assertEqual(model3.distortion_table[4][2][2][4], 1.0 / 4) + + def test_set_uniform_distortion_probabilities_of_non_domain_values(self): + # arrange + corpus = [ + AlignedSent(['ham', 'eggs'], ['schinken', 'schinken', 'eier']), + AlignedSent(['spam', 'spam', 'spam', 'spam'], ['spam', 'spam']), + ] + model3 = IBMModel3(corpus, 0) + + # act + model3.set_uniform_probabilities(corpus) + + # assert + # examine i and j values that are not in the training data domain + self.assertEqual(model3.distortion_table[0][0][3][2], IBMModel.MIN_PROB) + self.assertEqual(model3.distortion_table[9][2][2][4], IBMModel.MIN_PROB) + self.assertEqual(model3.distortion_table[2][9][2][4], IBMModel.MIN_PROB) + + def test_prob_t_a_given_s(self): + # arrange + src_sentence = ["ich", 'esse', 'ja', 'gern', 'räucherschinken'] + trg_sentence = ['i', 'love', 'to', 'eat', 'smoked', 'ham'] + corpus = [AlignedSent(trg_sentence, src_sentence)] + alignment_info = AlignmentInfo( + (0, 1, 4, 0, 2, 5, 5), + [None] + src_sentence, + ['UNUSED'] + trg_sentence, + [[3], [1], [4], [], [2], [5, 6]], + ) + + distortion_table = defaultdict( + lambda: defaultdict(lambda: defaultdict(lambda: defaultdict(float))) + ) + distortion_table[1][1][5][6] = 0.97 # i -> ich + distortion_table[2][4][5][6] = 0.97 # love -> gern + distortion_table[3][0][5][6] = 0.97 # to -> NULL + distortion_table[4][2][5][6] = 0.97 # eat -> esse + distortion_table[5][5][5][6] = 0.97 # smoked -> räucherschinken + distortion_table[6][5][5][6] = 0.97 # ham -> räucherschinken + + translation_table = defaultdict(lambda: defaultdict(float)) + translation_table['i']['ich'] = 0.98 + translation_table['love']['gern'] = 0.98 + translation_table['to'][None] = 0.98 + translation_table['eat']['esse'] = 0.98 + translation_table['smoked']['räucherschinken'] = 0.98 + translation_table['ham']['räucherschinken'] = 0.98 + + fertility_table = defaultdict(lambda: defaultdict(float)) + fertility_table[1]['ich'] = 0.99 + fertility_table[1]['esse'] = 0.99 + fertility_table[0]['ja'] = 0.99 + fertility_table[1]['gern'] = 0.99 + fertility_table[2]['räucherschinken'] = 0.999 + fertility_table[1][None] = 0.99 + + probabilities = { + 'p1': 0.167, + 'translation_table': translation_table, + 'distortion_table': distortion_table, + 'fertility_table': fertility_table, + 'alignment_table': None, + } + + model3 = IBMModel3(corpus, 0, probabilities) + + # act + probability = model3.prob_t_a_given_s(alignment_info) + + # assert + null_generation = 5 * pow(0.167, 1) * pow(0.833, 4) + fertility = 1 * 0.99 * 1 * 0.99 * 1 * 0.99 * 1 * 0.99 * 2 * 0.999 + lexical_translation = 0.98 * 0.98 * 0.98 * 0.98 * 0.98 * 0.98 + distortion = 0.97 * 0.97 * 0.97 * 0.97 * 0.97 * 0.97 + expected_probability = ( + null_generation * fertility * lexical_translation * distortion + ) + self.assertEqual(round(probability, 4), round(expected_probability, 4)) diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/unit/translate/test_ibm4.py b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/translate/test_ibm4.py new file mode 100644 index 0000000..c6e5398 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/translate/test_ibm4.py @@ -0,0 +1,123 @@ +# -*- coding: utf-8 -*- +""" +Tests for IBM Model 4 training methods +""" + +import unittest + +from collections import defaultdict +from nltk.translate import AlignedSent +from nltk.translate import IBMModel +from nltk.translate import IBMModel4 +from nltk.translate.ibm_model import AlignmentInfo + + +class TestIBMModel4(unittest.TestCase): + def test_set_uniform_distortion_probabilities_of_max_displacements(self): + # arrange + src_classes = {'schinken': 0, 'eier': 0, 'spam': 1} + trg_classes = {'ham': 0, 'eggs': 1, 'spam': 2} + corpus = [ + AlignedSent(['ham', 'eggs'], ['schinken', 'schinken', 'eier']), + AlignedSent(['spam', 'spam', 'spam', 'spam'], ['spam', 'spam']), + ] + model4 = IBMModel4(corpus, 0, src_classes, trg_classes) + + # act + model4.set_uniform_probabilities(corpus) + + # assert + # number of displacement values = + # 2 *(number of words in longest target sentence - 1) + expected_prob = 1.0 / (2 * (4 - 1)) + + # examine the boundary values for (displacement, src_class, trg_class) + self.assertEqual(model4.head_distortion_table[3][0][0], expected_prob) + self.assertEqual(model4.head_distortion_table[-3][1][2], expected_prob) + self.assertEqual(model4.non_head_distortion_table[3][0], expected_prob) + self.assertEqual(model4.non_head_distortion_table[-3][2], expected_prob) + + def test_set_uniform_distortion_probabilities_of_non_domain_values(self): + # arrange + src_classes = {'schinken': 0, 'eier': 0, 'spam': 1} + trg_classes = {'ham': 0, 'eggs': 1, 'spam': 2} + corpus = [ + AlignedSent(['ham', 'eggs'], ['schinken', 'schinken', 'eier']), + AlignedSent(['spam', 'spam', 'spam', 'spam'], ['spam', 'spam']), + ] + model4 = IBMModel4(corpus, 0, src_classes, trg_classes) + + # act + model4.set_uniform_probabilities(corpus) + + # assert + # examine displacement values that are not in the training data domain + self.assertEqual(model4.head_distortion_table[4][0][0], IBMModel.MIN_PROB) + self.assertEqual(model4.head_distortion_table[100][1][2], IBMModel.MIN_PROB) + self.assertEqual(model4.non_head_distortion_table[4][0], IBMModel.MIN_PROB) + self.assertEqual(model4.non_head_distortion_table[100][2], IBMModel.MIN_PROB) + + def test_prob_t_a_given_s(self): + # arrange + src_sentence = ["ich", 'esse', 'ja', 'gern', 'räucherschinken'] + trg_sentence = ['i', 'love', 'to', 'eat', 'smoked', 'ham'] + src_classes = {'räucherschinken': 0, 'ja': 1, 'ich': 2, 'esse': 3, 'gern': 4} + trg_classes = {'ham': 0, 'smoked': 1, 'i': 3, 'love': 4, 'to': 2, 'eat': 4} + corpus = [AlignedSent(trg_sentence, src_sentence)] + alignment_info = AlignmentInfo( + (0, 1, 4, 0, 2, 5, 5), + [None] + src_sentence, + ['UNUSED'] + trg_sentence, + [[3], [1], [4], [], [2], [5, 6]], + ) + + head_distortion_table = defaultdict( + lambda: defaultdict(lambda: defaultdict(float)) + ) + head_distortion_table[1][None][3] = 0.97 # None, i + head_distortion_table[3][2][4] = 0.97 # ich, eat + head_distortion_table[-2][3][4] = 0.97 # esse, love + head_distortion_table[3][4][1] = 0.97 # gern, smoked + + non_head_distortion_table = defaultdict(lambda: defaultdict(float)) + non_head_distortion_table[1][0] = 0.96 # ham + + translation_table = defaultdict(lambda: defaultdict(float)) + translation_table['i']['ich'] = 0.98 + translation_table['love']['gern'] = 0.98 + translation_table['to'][None] = 0.98 + translation_table['eat']['esse'] = 0.98 + translation_table['smoked']['räucherschinken'] = 0.98 + translation_table['ham']['räucherschinken'] = 0.98 + + fertility_table = defaultdict(lambda: defaultdict(float)) + fertility_table[1]['ich'] = 0.99 + fertility_table[1]['esse'] = 0.99 + fertility_table[0]['ja'] = 0.99 + fertility_table[1]['gern'] = 0.99 + fertility_table[2]['räucherschinken'] = 0.999 + fertility_table[1][None] = 0.99 + + probabilities = { + 'p1': 0.167, + 'translation_table': translation_table, + 'head_distortion_table': head_distortion_table, + 'non_head_distortion_table': non_head_distortion_table, + 'fertility_table': fertility_table, + 'alignment_table': None, + } + + model4 = IBMModel4(corpus, 0, src_classes, trg_classes, probabilities) + + # act + probability = model4.prob_t_a_given_s(alignment_info) + + # assert + null_generation = 5 * pow(0.167, 1) * pow(0.833, 4) + fertility = 1 * 0.99 * 1 * 0.99 * 1 * 0.99 * 1 * 0.99 * 2 * 0.999 + lexical_translation = 0.98 * 0.98 * 0.98 * 0.98 * 0.98 * 0.98 + distortion = 0.97 * 0.97 * 1 * 0.97 * 0.97 * 0.96 + expected_probability = ( + null_generation * fertility * lexical_translation * distortion + ) + self.assertEqual(round(probability, 4), round(expected_probability, 4)) diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/unit/translate/test_ibm5.py b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/translate/test_ibm5.py new file mode 100644 index 0000000..a3eecb3 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/translate/test_ibm5.py @@ -0,0 +1,164 @@ +# -*- coding: utf-8 -*- +""" +Tests for IBM Model 5 training methods +""" + +import unittest + +from collections import defaultdict +from nltk.translate import AlignedSent +from nltk.translate import IBMModel +from nltk.translate import IBMModel4 +from nltk.translate import IBMModel5 +from nltk.translate.ibm_model import AlignmentInfo + + +class TestIBMModel5(unittest.TestCase): + def test_set_uniform_vacancy_probabilities_of_max_displacements(self): + # arrange + src_classes = {'schinken': 0, 'eier': 0, 'spam': 1} + trg_classes = {'ham': 0, 'eggs': 1, 'spam': 2} + corpus = [ + AlignedSent(['ham', 'eggs'], ['schinken', 'schinken', 'eier']), + AlignedSent(['spam', 'spam', 'spam', 'spam'], ['spam', 'spam']), + ] + model5 = IBMModel5(corpus, 0, src_classes, trg_classes) + + # act + model5.set_uniform_probabilities(corpus) + + # assert + # number of vacancy difference values = + # 2 * number of words in longest target sentence + expected_prob = 1.0 / (2 * 4) + + # examine the boundary values for (dv, max_v, trg_class) + self.assertEqual(model5.head_vacancy_table[4][4][0], expected_prob) + self.assertEqual(model5.head_vacancy_table[-3][1][2], expected_prob) + self.assertEqual(model5.non_head_vacancy_table[4][4][0], expected_prob) + self.assertEqual(model5.non_head_vacancy_table[-3][1][2], expected_prob) + + def test_set_uniform_vacancy_probabilities_of_non_domain_values(self): + # arrange + src_classes = {'schinken': 0, 'eier': 0, 'spam': 1} + trg_classes = {'ham': 0, 'eggs': 1, 'spam': 2} + corpus = [ + AlignedSent(['ham', 'eggs'], ['schinken', 'schinken', 'eier']), + AlignedSent(['spam', 'spam', 'spam', 'spam'], ['spam', 'spam']), + ] + model5 = IBMModel5(corpus, 0, src_classes, trg_classes) + + # act + model5.set_uniform_probabilities(corpus) + + # assert + # examine dv and max_v values that are not in the training data domain + self.assertEqual(model5.head_vacancy_table[5][4][0], IBMModel.MIN_PROB) + self.assertEqual(model5.head_vacancy_table[-4][1][2], IBMModel.MIN_PROB) + self.assertEqual(model5.head_vacancy_table[4][0][0], IBMModel.MIN_PROB) + self.assertEqual(model5.non_head_vacancy_table[5][4][0], IBMModel.MIN_PROB) + self.assertEqual(model5.non_head_vacancy_table[-4][1][2], IBMModel.MIN_PROB) + + def test_prob_t_a_given_s(self): + # arrange + src_sentence = ["ich", 'esse', 'ja', 'gern', 'räucherschinken'] + trg_sentence = ['i', 'love', 'to', 'eat', 'smoked', 'ham'] + src_classes = {'räucherschinken': 0, 'ja': 1, 'ich': 2, 'esse': 3, 'gern': 4} + trg_classes = {'ham': 0, 'smoked': 1, 'i': 3, 'love': 4, 'to': 2, 'eat': 4} + corpus = [AlignedSent(trg_sentence, src_sentence)] + alignment_info = AlignmentInfo( + (0, 1, 4, 0, 2, 5, 5), + [None] + src_sentence, + ['UNUSED'] + trg_sentence, + [[3], [1], [4], [], [2], [5, 6]], + ) + + head_vacancy_table = defaultdict( + lambda: defaultdict(lambda: defaultdict(float)) + ) + head_vacancy_table[1 - 0][6][3] = 0.97 # ich -> i + head_vacancy_table[3 - 0][5][4] = 0.97 # esse -> eat + head_vacancy_table[1 - 2][4][4] = 0.97 # gern -> love + head_vacancy_table[2 - 0][2][1] = 0.97 # räucherschinken -> smoked + + non_head_vacancy_table = defaultdict( + lambda: defaultdict(lambda: defaultdict(float)) + ) + non_head_vacancy_table[1 - 0][1][0] = 0.96 # räucherschinken -> ham + + translation_table = defaultdict(lambda: defaultdict(float)) + translation_table['i']['ich'] = 0.98 + translation_table['love']['gern'] = 0.98 + translation_table['to'][None] = 0.98 + translation_table['eat']['esse'] = 0.98 + translation_table['smoked']['räucherschinken'] = 0.98 + translation_table['ham']['räucherschinken'] = 0.98 + + fertility_table = defaultdict(lambda: defaultdict(float)) + fertility_table[1]['ich'] = 0.99 + fertility_table[1]['esse'] = 0.99 + fertility_table[0]['ja'] = 0.99 + fertility_table[1]['gern'] = 0.99 + fertility_table[2]['räucherschinken'] = 0.999 + fertility_table[1][None] = 0.99 + + probabilities = { + 'p1': 0.167, + 'translation_table': translation_table, + 'fertility_table': fertility_table, + 'head_vacancy_table': head_vacancy_table, + 'non_head_vacancy_table': non_head_vacancy_table, + 'head_distortion_table': None, + 'non_head_distortion_table': None, + 'alignment_table': None, + } + + model5 = IBMModel5(corpus, 0, src_classes, trg_classes, probabilities) + + # act + probability = model5.prob_t_a_given_s(alignment_info) + + # assert + null_generation = 5 * pow(0.167, 1) * pow(0.833, 4) + fertility = 1 * 0.99 * 1 * 0.99 * 1 * 0.99 * 1 * 0.99 * 2 * 0.999 + lexical_translation = 0.98 * 0.98 * 0.98 * 0.98 * 0.98 * 0.98 + vacancy = 0.97 * 0.97 * 1 * 0.97 * 0.97 * 0.96 + expected_probability = ( + null_generation * fertility * lexical_translation * vacancy + ) + self.assertEqual(round(probability, 4), round(expected_probability, 4)) + + def test_prune(self): + # arrange + alignment_infos = [ + AlignmentInfo((1, 1), None, None, None), + AlignmentInfo((1, 2), None, None, None), + AlignmentInfo((2, 1), None, None, None), + AlignmentInfo((2, 2), None, None, None), + AlignmentInfo((0, 0), None, None, None), + ] + min_factor = IBMModel5.MIN_SCORE_FACTOR + best_score = 0.9 + scores = { + (1, 1): min(min_factor * 1.5, 1) * best_score, # above threshold + (1, 2): best_score, + (2, 1): min_factor * best_score, # at threshold + (2, 2): min_factor * best_score * 0.5, # low score + (0, 0): min(min_factor * 1.1, 1) * 1.2, # above threshold + } + corpus = [AlignedSent(['a'], ['b'])] + original_prob_function = IBMModel4.model4_prob_t_a_given_s + # mock static method + IBMModel4.model4_prob_t_a_given_s = staticmethod( + lambda a, model: scores[a.alignment] + ) + model5 = IBMModel5(corpus, 0, None, None) + + # act + pruned_alignments = model5.prune(alignment_infos) + + # assert + self.assertEqual(len(pruned_alignments), 3) + + # restore static method + IBMModel4.model4_prob_t_a_given_s = original_prob_function diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/unit/translate/test_ibm_model.py b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/translate/test_ibm_model.py new file mode 100644 index 0000000..31383bc --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/translate/test_ibm_model.py @@ -0,0 +1,279 @@ +# -*- coding: utf-8 -*- +""" +Tests for common methods of IBM translation models +""" + +import unittest + +from collections import defaultdict +from nltk.translate import AlignedSent +from nltk.translate import IBMModel +from nltk.translate.ibm_model import AlignmentInfo + + +class TestIBMModel(unittest.TestCase): + __TEST_SRC_SENTENCE = ["j'", 'aime', 'bien', 'jambon'] + __TEST_TRG_SENTENCE = ['i', 'love', 'ham'] + + def test_vocabularies_are_initialized(self): + parallel_corpora = [ + AlignedSent(['one', 'two', 'three', 'four'], ['un', 'deux', 'trois']), + AlignedSent(['five', 'one', 'six'], ['quatre', 'cinq', 'six']), + AlignedSent([], ['sept']), + ] + + ibm_model = IBMModel(parallel_corpora) + self.assertEqual(len(ibm_model.src_vocab), 8) + self.assertEqual(len(ibm_model.trg_vocab), 6) + + def test_vocabularies_are_initialized_even_with_empty_corpora(self): + parallel_corpora = [] + + ibm_model = IBMModel(parallel_corpora) + self.assertEqual(len(ibm_model.src_vocab), 1) # addition of NULL token + self.assertEqual(len(ibm_model.trg_vocab), 0) + + def test_best_model2_alignment(self): + # arrange + sentence_pair = AlignedSent( + TestIBMModel.__TEST_TRG_SENTENCE, TestIBMModel.__TEST_SRC_SENTENCE + ) + # None and 'bien' have zero fertility + translation_table = { + 'i': {"j'": 0.9, 'aime': 0.05, 'bien': 0.02, 'jambon': 0.03, None: 0}, + 'love': {"j'": 0.05, 'aime': 0.9, 'bien': 0.01, 'jambon': 0.01, None: 0.03}, + 'ham': {"j'": 0, 'aime': 0.01, 'bien': 0, 'jambon': 0.99, None: 0}, + } + alignment_table = defaultdict( + lambda: defaultdict(lambda: defaultdict(lambda: defaultdict(lambda: 0.2))) + ) + + ibm_model = IBMModel([]) + ibm_model.translation_table = translation_table + ibm_model.alignment_table = alignment_table + + # act + a_info = ibm_model.best_model2_alignment(sentence_pair) + + # assert + self.assertEqual(a_info.alignment[1:], (1, 2, 4)) # 0th element unused + self.assertEqual(a_info.cepts, [[], [1], [2], [], [3]]) + + def test_best_model2_alignment_does_not_change_pegged_alignment(self): + # arrange + sentence_pair = AlignedSent( + TestIBMModel.__TEST_TRG_SENTENCE, TestIBMModel.__TEST_SRC_SENTENCE + ) + translation_table = { + 'i': {"j'": 0.9, 'aime': 0.05, 'bien': 0.02, 'jambon': 0.03, None: 0}, + 'love': {"j'": 0.05, 'aime': 0.9, 'bien': 0.01, 'jambon': 0.01, None: 0.03}, + 'ham': {"j'": 0, 'aime': 0.01, 'bien': 0, 'jambon': 0.99, None: 0}, + } + alignment_table = defaultdict( + lambda: defaultdict(lambda: defaultdict(lambda: defaultdict(lambda: 0.2))) + ) + + ibm_model = IBMModel([]) + ibm_model.translation_table = translation_table + ibm_model.alignment_table = alignment_table + + # act: force 'love' to be pegged to 'jambon' + a_info = ibm_model.best_model2_alignment(sentence_pair, 2, 4) + # assert + self.assertEqual(a_info.alignment[1:], (1, 4, 4)) + self.assertEqual(a_info.cepts, [[], [1], [], [], [2, 3]]) + + def test_best_model2_alignment_handles_fertile_words(self): + # arrange + sentence_pair = AlignedSent( + ['i', 'really', ',', 'really', 'love', 'ham'], + TestIBMModel.__TEST_SRC_SENTENCE, + ) + # 'bien' produces 2 target words: 'really' and another 'really' + translation_table = { + 'i': {"j'": 0.9, 'aime': 0.05, 'bien': 0.02, 'jambon': 0.03, None: 0}, + 'really': {"j'": 0, 'aime': 0, 'bien': 0.9, 'jambon': 0.01, None: 0.09}, + ',': {"j'": 0, 'aime': 0, 'bien': 0.3, 'jambon': 0, None: 0.7}, + 'love': {"j'": 0.05, 'aime': 0.9, 'bien': 0.01, 'jambon': 0.01, None: 0.03}, + 'ham': {"j'": 0, 'aime': 0.01, 'bien': 0, 'jambon': 0.99, None: 0}, + } + alignment_table = defaultdict( + lambda: defaultdict(lambda: defaultdict(lambda: defaultdict(lambda: 0.2))) + ) + + ibm_model = IBMModel([]) + ibm_model.translation_table = translation_table + ibm_model.alignment_table = alignment_table + + # act + a_info = ibm_model.best_model2_alignment(sentence_pair) + + # assert + self.assertEqual(a_info.alignment[1:], (1, 3, 0, 3, 2, 4)) + self.assertEqual(a_info.cepts, [[3], [1], [5], [2, 4], [6]]) + + def test_best_model2_alignment_handles_empty_src_sentence(self): + # arrange + sentence_pair = AlignedSent(TestIBMModel.__TEST_TRG_SENTENCE, []) + ibm_model = IBMModel([]) + + # act + a_info = ibm_model.best_model2_alignment(sentence_pair) + + # assert + self.assertEqual(a_info.alignment[1:], (0, 0, 0)) + self.assertEqual(a_info.cepts, [[1, 2, 3]]) + + def test_best_model2_alignment_handles_empty_trg_sentence(self): + # arrange + sentence_pair = AlignedSent([], TestIBMModel.__TEST_SRC_SENTENCE) + ibm_model = IBMModel([]) + + # act + a_info = ibm_model.best_model2_alignment(sentence_pair) + + # assert + self.assertEqual(a_info.alignment[1:], ()) + self.assertEqual(a_info.cepts, [[], [], [], [], []]) + + def test_neighboring_finds_neighbor_alignments(self): + # arrange + a_info = AlignmentInfo( + (0, 3, 2), + (None, 'des', 'œufs', 'verts'), + ('UNUSED', 'green', 'eggs'), + [[], [], [2], [1]], + ) + ibm_model = IBMModel([]) + + # act + neighbors = ibm_model.neighboring(a_info) + + # assert + neighbor_alignments = set() + for neighbor in neighbors: + neighbor_alignments.add(neighbor.alignment) + expected_alignments = set( + [ + # moves + (0, 0, 2), + (0, 1, 2), + (0, 2, 2), + (0, 3, 0), + (0, 3, 1), + (0, 3, 3), + # swaps + (0, 2, 3), + # original alignment + (0, 3, 2), + ] + ) + self.assertEqual(neighbor_alignments, expected_alignments) + + def test_neighboring_sets_neighbor_alignment_info(self): + # arrange + a_info = AlignmentInfo( + (0, 3, 2), + (None, 'des', 'œufs', 'verts'), + ('UNUSED', 'green', 'eggs'), + [[], [], [2], [1]], + ) + ibm_model = IBMModel([]) + + # act + neighbors = ibm_model.neighboring(a_info) + + # assert: select a few particular alignments + for neighbor in neighbors: + if neighbor.alignment == (0, 2, 2): + moved_alignment = neighbor + elif neighbor.alignment == (0, 3, 2): + swapped_alignment = neighbor + + self.assertEqual(moved_alignment.cepts, [[], [], [1, 2], []]) + self.assertEqual(swapped_alignment.cepts, [[], [], [2], [1]]) + + def test_neighboring_returns_neighbors_with_pegged_alignment(self): + # arrange + a_info = AlignmentInfo( + (0, 3, 2), + (None, 'des', 'œufs', 'verts'), + ('UNUSED', 'green', 'eggs'), + [[], [], [2], [1]], + ) + ibm_model = IBMModel([]) + + # act: peg 'eggs' to align with 'œufs' + neighbors = ibm_model.neighboring(a_info, 2) + + # assert + neighbor_alignments = set() + for neighbor in neighbors: + neighbor_alignments.add(neighbor.alignment) + expected_alignments = set( + [ + # moves + (0, 0, 2), + (0, 1, 2), + (0, 2, 2), + # no swaps + # original alignment + (0, 3, 2), + ] + ) + self.assertEqual(neighbor_alignments, expected_alignments) + + def test_hillclimb(self): + # arrange + initial_alignment = AlignmentInfo((0, 3, 2), None, None, None) + + def neighboring_mock(a, j): + if a.alignment == (0, 3, 2): + return set( + [ + AlignmentInfo((0, 2, 2), None, None, None), + AlignmentInfo((0, 1, 1), None, None, None), + ] + ) + elif a.alignment == (0, 2, 2): + return set( + [ + AlignmentInfo((0, 3, 3), None, None, None), + AlignmentInfo((0, 4, 4), None, None, None), + ] + ) + return set() + + def prob_t_a_given_s_mock(a): + prob_values = { + (0, 3, 2): 0.5, + (0, 2, 2): 0.6, + (0, 1, 1): 0.4, + (0, 3, 3): 0.6, + (0, 4, 4): 0.7, + } + return prob_values.get(a.alignment, 0.01) + + ibm_model = IBMModel([]) + ibm_model.neighboring = neighboring_mock + ibm_model.prob_t_a_given_s = prob_t_a_given_s_mock + + # act + best_alignment = ibm_model.hillclimb(initial_alignment) + + # assert: hill climbing goes from (0, 3, 2) -> (0, 2, 2) -> (0, 4, 4) + self.assertEqual(best_alignment.alignment, (0, 4, 4)) + + def test_sample(self): + # arrange + sentence_pair = AlignedSent( + TestIBMModel.__TEST_TRG_SENTENCE, TestIBMModel.__TEST_SRC_SENTENCE + ) + ibm_model = IBMModel([]) + ibm_model.prob_t_a_given_s = lambda x: 0.001 + + # act + samples, best_alignment = ibm_model.sample(sentence_pair) + + # assert + self.assertEqual(len(samples), 61) diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/unit/translate/test_nist.py b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/translate/test_nist.py new file mode 100644 index 0000000..84e6342 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/translate/test_nist.py @@ -0,0 +1,37 @@ +# -*- coding: utf-8 -*- +""" +Tests for NIST translation evaluation metric +""" + +import io +import unittest + +from nltk.data import find +from nltk.translate.nist_score import sentence_nist, corpus_nist + + +class TestNIST(unittest.TestCase): + def test_sentence_nist(self): + ref_file = find('models/wmt15_eval/ref.ru') + hyp_file = find('models/wmt15_eval/google.ru') + mteval_output_file = find('models/wmt15_eval/mteval-13a.output') + + # Reads the NIST scores from the `mteval-13a.output` file. + # The order of the list corresponds to the order of the ngrams. + with open(mteval_output_file, 'r') as mteval_fin: + # The numbers are located in the last 4th line of the file. + # The first and 2nd item in the list are the score and system names. + mteval_nist_scores = map(float, mteval_fin.readlines()[-4].split()[1:-1]) + + with io.open(ref_file, 'r', encoding='utf8') as ref_fin: + with io.open(hyp_file, 'r', encoding='utf8') as hyp_fin: + # Whitespace tokenize the file. + # Note: split() automatically strip(). + hypotheses = list(map(lambda x: x.split(), hyp_fin)) + # Note that the corpus_bleu input is list of list of references. + references = list(map(lambda x: [x.split()], ref_fin)) + # Without smoothing. + for i, mteval_nist in zip(range(1, 10), mteval_nist_scores): + nltk_nist = corpus_nist(references, hypotheses, i) + # Check that the NIST scores difference is less than 0.5 + assert abs(mteval_nist - nltk_nist) < 0.05 diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/unit/translate/test_stack_decoder.py b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/translate/test_stack_decoder.py new file mode 100644 index 0000000..5d5f2d4 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/translate/test_stack_decoder.py @@ -0,0 +1,295 @@ +# -*- coding: utf-8 -*- +# Natural Language Toolkit: Stack decoder +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Tah Wei Hoon +# URL: +# For license information, see LICENSE.TXT + +""" +Tests for stack decoder +""" + +import unittest +from collections import defaultdict +from math import log +from nltk.translate import PhraseTable +from nltk.translate import StackDecoder +from nltk.translate.stack_decoder import _Hypothesis, _Stack + + +class TestStackDecoder(unittest.TestCase): + def test_find_all_src_phrases(self): + # arrange + phrase_table = TestStackDecoder.create_fake_phrase_table() + stack_decoder = StackDecoder(phrase_table, None) + sentence = ('my', 'hovercraft', 'is', 'full', 'of', 'eels') + + # act + src_phrase_spans = stack_decoder.find_all_src_phrases(sentence) + + # assert + self.assertEqual(src_phrase_spans[0], [2]) # 'my hovercraft' + self.assertEqual(src_phrase_spans[1], [2]) # 'hovercraft' + self.assertEqual(src_phrase_spans[2], [3]) # 'is' + self.assertEqual(src_phrase_spans[3], [5, 6]) # 'full of', 'full of eels' + self.assertFalse(src_phrase_spans[4]) # no entry starting with 'of' + self.assertEqual(src_phrase_spans[5], [6]) # 'eels' + + def test_distortion_score(self): + # arrange + stack_decoder = StackDecoder(None, None) + stack_decoder.distortion_factor = 0.5 + hypothesis = _Hypothesis() + hypothesis.src_phrase_span = (3, 5) + + # act + score = stack_decoder.distortion_score(hypothesis, (8, 10)) + + # assert + expected_score = log(stack_decoder.distortion_factor) * (8 - 5) + self.assertEqual(score, expected_score) + + def test_distortion_score_of_first_expansion(self): + # arrange + stack_decoder = StackDecoder(None, None) + stack_decoder.distortion_factor = 0.5 + hypothesis = _Hypothesis() + + # act + score = stack_decoder.distortion_score(hypothesis, (8, 10)) + + # assert + # expansion from empty hypothesis always has zero distortion cost + self.assertEqual(score, 0.0) + + def test_compute_future_costs(self): + # arrange + phrase_table = TestStackDecoder.create_fake_phrase_table() + language_model = TestStackDecoder.create_fake_language_model() + stack_decoder = StackDecoder(phrase_table, language_model) + sentence = ('my', 'hovercraft', 'is', 'full', 'of', 'eels') + + # act + future_scores = stack_decoder.compute_future_scores(sentence) + + # assert + self.assertEqual( + future_scores[1][2], + ( + phrase_table.translations_for(('hovercraft',))[0].log_prob + + language_model.probability(('hovercraft',)) + ), + ) + self.assertEqual( + future_scores[0][2], + ( + phrase_table.translations_for(('my', 'hovercraft'))[0].log_prob + + language_model.probability(('my', 'hovercraft')) + ), + ) + + def test_compute_future_costs_for_phrases_not_in_phrase_table(self): + # arrange + phrase_table = TestStackDecoder.create_fake_phrase_table() + language_model = TestStackDecoder.create_fake_language_model() + stack_decoder = StackDecoder(phrase_table, language_model) + sentence = ('my', 'hovercraft', 'is', 'full', 'of', 'eels') + + # act + future_scores = stack_decoder.compute_future_scores(sentence) + + # assert + self.assertEqual( + future_scores[1][3], # 'hovercraft is' is not in phrase table + future_scores[1][2] + future_scores[2][3], + ) # backoff + + def test_future_score(self): + # arrange: sentence with 8 words; words 2, 3, 4 already translated + hypothesis = _Hypothesis() + hypothesis.untranslated_spans = lambda _: [(0, 2), (5, 8)] # mock + future_score_table = defaultdict(lambda: defaultdict(float)) + future_score_table[0][2] = 0.4 + future_score_table[5][8] = 0.5 + stack_decoder = StackDecoder(None, None) + + # act + future_score = stack_decoder.future_score(hypothesis, future_score_table, 8) + + # assert + self.assertEqual(future_score, 0.4 + 0.5) + + def test_valid_phrases(self): + # arrange + hypothesis = _Hypothesis() + # mock untranslated_spans method + hypothesis.untranslated_spans = lambda _: [(0, 2), (3, 6)] + all_phrases_from = [[1, 4], [2], [], [5], [5, 6, 7], [], [7]] + + # act + phrase_spans = StackDecoder.valid_phrases(all_phrases_from, hypothesis) + + # assert + self.assertEqual(phrase_spans, [(0, 1), (1, 2), (3, 5), (4, 5), (4, 6)]) + + @staticmethod + def create_fake_phrase_table(): + phrase_table = PhraseTable() + phrase_table.add(('hovercraft',), ('',), 0.8) + phrase_table.add(('my', 'hovercraft'), ('', ''), 0.7) + phrase_table.add(('my', 'cheese'), ('', ''), 0.7) + phrase_table.add(('is',), ('',), 0.8) + phrase_table.add(('is',), ('',), 0.5) + phrase_table.add(('full', 'of'), ('', ''), 0.01) + phrase_table.add(('full', 'of', 'eels'), ('', '', ''), 0.5) + phrase_table.add(('full', 'of', 'spam'), ('', ''), 0.5) + phrase_table.add(('eels',), ('',), 0.5) + phrase_table.add(('spam',), ('',), 0.5) + return phrase_table + + @staticmethod + def create_fake_language_model(): + # nltk.model should be used here once it is implemented + language_prob = defaultdict(lambda: -999.0) + language_prob[('my',)] = log(0.1) + language_prob[('hovercraft',)] = log(0.1) + language_prob[('is',)] = log(0.1) + language_prob[('full',)] = log(0.1) + language_prob[('of',)] = log(0.1) + language_prob[('eels',)] = log(0.1) + language_prob[('my', 'hovercraft')] = log(0.3) + language_model = type( + '', (object,), {'probability': lambda _, phrase: language_prob[phrase]} + )() + return language_model + + +class TestHypothesis(unittest.TestCase): + def setUp(self): + root = _Hypothesis() + child = _Hypothesis( + raw_score=0.5, + src_phrase_span=(3, 7), + trg_phrase=('hello', 'world'), + previous=root, + ) + grandchild = _Hypothesis( + raw_score=0.4, + src_phrase_span=(1, 2), + trg_phrase=('and', 'goodbye'), + previous=child, + ) + self.hypothesis_chain = grandchild + + def test_translation_so_far(self): + # act + translation = self.hypothesis_chain.translation_so_far() + + # assert + self.assertEqual(translation, ['hello', 'world', 'and', 'goodbye']) + + def test_translation_so_far_for_empty_hypothesis(self): + # arrange + hypothesis = _Hypothesis() + + # act + translation = hypothesis.translation_so_far() + + # assert + self.assertEqual(translation, []) + + def test_total_translated_words(self): + # act + total_translated_words = self.hypothesis_chain.total_translated_words() + + # assert + self.assertEqual(total_translated_words, 5) + + def test_translated_positions(self): + # act + translated_positions = self.hypothesis_chain.translated_positions() + + # assert + translated_positions.sort() + self.assertEqual(translated_positions, [1, 3, 4, 5, 6]) + + def test_untranslated_spans(self): + # act + untranslated_spans = self.hypothesis_chain.untranslated_spans(10) + + # assert + self.assertEqual(untranslated_spans, [(0, 1), (2, 3), (7, 10)]) + + def test_untranslated_spans_for_empty_hypothesis(self): + # arrange + hypothesis = _Hypothesis() + + # act + untranslated_spans = hypothesis.untranslated_spans(10) + + # assert + self.assertEqual(untranslated_spans, [(0, 10)]) + + +class TestStack(unittest.TestCase): + def test_push_bumps_off_worst_hypothesis_when_stack_is_full(self): + # arrange + stack = _Stack(3) + poor_hypothesis = _Hypothesis(0.01) + + # act + stack.push(_Hypothesis(0.2)) + stack.push(poor_hypothesis) + stack.push(_Hypothesis(0.1)) + stack.push(_Hypothesis(0.3)) + + # assert + self.assertFalse(poor_hypothesis in stack) + + def test_push_removes_hypotheses_that_fall_below_beam_threshold(self): + # arrange + stack = _Stack(3, 0.5) + poor_hypothesis = _Hypothesis(0.01) + worse_hypothesis = _Hypothesis(0.009) + + # act + stack.push(poor_hypothesis) + stack.push(worse_hypothesis) + stack.push(_Hypothesis(0.9)) # greatly superior hypothesis + + # assert + self.assertFalse(poor_hypothesis in stack) + self.assertFalse(worse_hypothesis in stack) + + def test_push_does_not_add_hypothesis_that_falls_below_beam_threshold(self): + # arrange + stack = _Stack(3, 0.5) + poor_hypothesis = _Hypothesis(0.01) + + # act + stack.push(_Hypothesis(0.9)) # greatly superior hypothesis + stack.push(poor_hypothesis) + + # assert + self.assertFalse(poor_hypothesis in stack) + + def test_best_returns_the_best_hypothesis(self): + # arrange + stack = _Stack(3) + best_hypothesis = _Hypothesis(0.99) + + # act + stack.push(_Hypothesis(0.0)) + stack.push(best_hypothesis) + stack.push(_Hypothesis(0.5)) + + # assert + self.assertEqual(stack.best(), best_hypothesis) + + def test_best_returns_none_when_stack_is_empty(self): + # arrange + stack = _Stack(3) + + # assert + self.assertEqual(stack.best(), None) diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/unit/utils.py b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/utils.py new file mode 100644 index 0000000..0489b16 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/test/unit/utils.py @@ -0,0 +1,47 @@ +# -*- coding: utf-8 -*- +from __future__ import absolute_import +from unittest import TestCase +from functools import wraps +from nose.plugins.skip import SkipTest +from nltk.util import py26 + + +def skip(reason): + """ + Unconditionally skip a test. + """ + + def decorator(test_item): + is_test_class = isinstance(test_item, type) and issubclass(test_item, TestCase) + + if is_test_class and py26(): + # Patch all test_ methods to raise SkipText exception. + # This is necessary for Python 2.6 because its unittest + # doesn't understand __unittest_skip__. + for meth_name in (m for m in dir(test_item) if m.startswith('test_')): + patched_method = skip(reason)(getattr(test_item, meth_name)) + setattr(test_item, meth_name, patched_method) + + if not is_test_class: + + @wraps(test_item) + def skip_wrapper(*args, **kwargs): + raise SkipTest(reason) + + skip_wrapper.__name__ = test_item.__name__ + test_item = skip_wrapper + + test_item.__unittest_skip__ = True + test_item.__unittest_skip_why__ = reason + return test_item + + return decorator + + +def skipIf(condition, reason): + """ + Skip a test if the condition is true. + """ + if condition: + return skip(reason) + return lambda obj: obj diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/util.doctest b/venv.bak/lib/python3.7/site-packages/nltk/test/util.doctest new file mode 100644 index 0000000..7ba6af1 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/test/util.doctest @@ -0,0 +1,48 @@ +.. Copyright (C) 2001-2019 NLTK Project +.. For license information, see LICENSE.TXT + +================= +Utility functions +================= + + >>> from __future__ import print_function + >>> from nltk.util import * + >>> from nltk.tree import Tree + + >>> print_string("This is a long string, therefore it should break", 25) + This is a long string, + therefore it should break + + >>> re_show("[a-z]+", "sdf123") + {sdf}123 + + >>> tree = Tree(5, + ... [Tree(4, [Tree(2, [1, 3])]), + ... Tree(8, [Tree(6, [7]), 9])]) + >>> for x in breadth_first(tree): + ... if isinstance(x, int): print(x) + ... else: print(x.label()) + 5 + 4 + 8 + 2 + 6 + 9 + 1 + 3 + 7 + >>> for x in breadth_first(tree, maxdepth=2): + ... if isinstance(x, int): print(x) + ... else: print(x.label()) + 5 + 4 + 8 + 2 + 6 + 9 + + >>> invert_dict({1: 2}) + defaultdict(<... 'list'>, {2: 1}) + + >>> invert_dict({1: [3, 4, 5]}) + defaultdict(<... 'list'>, {3: [1], 4: [1], 5: [1]}) diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/wordnet.doctest b/venv.bak/lib/python3.7/site-packages/nltk/test/wordnet.doctest new file mode 100644 index 0000000..5233728 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/test/wordnet.doctest @@ -0,0 +1,605 @@ +.. Copyright (C) 2001-2019 NLTK Project +.. For license information, see LICENSE.TXT + +================= +WordNet Interface +================= + +WordNet is just another NLTK corpus reader, and can be imported like this: + >>> from __future__ import print_function, unicode_literals + >>> from nltk.corpus import wordnet + +For more compact code, we recommend: + + >>> from nltk.corpus import wordnet as wn + +----- +Words +----- + +Look up a word using ``synsets()``; this function has an optional ``pos`` argument +which lets you constrain the part of speech of the word: + + >>> wn.synsets('dog') # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE + [Synset('dog.n.01'), Synset('frump.n.01'), Synset('dog.n.03'), Synset('cad.n.01'), + Synset('frank.n.02'), Synset('pawl.n.01'), Synset('andiron.n.01'), Synset('chase.v.01')] + >>> wn.synsets('dog', pos=wn.VERB) + [Synset('chase.v.01')] + +The other parts of speech are ``NOUN``, ``ADJ`` and ``ADV``. +A synset is identified with a 3-part name of the form: word.pos.nn: + + >>> wn.synset('dog.n.01') + Synset('dog.n.01') + >>> print(wn.synset('dog.n.01').definition()) + a member of the genus Canis (probably descended from the common wolf) that has been domesticated by man since prehistoric times; occurs in many breeds + >>> len(wn.synset('dog.n.01').examples()) + 1 + >>> print(wn.synset('dog.n.01').examples()[0]) + the dog barked all night + >>> wn.synset('dog.n.01').lemmas() + [Lemma('dog.n.01.dog'), Lemma('dog.n.01.domestic_dog'), Lemma('dog.n.01.Canis_familiaris')] + >>> [str(lemma.name()) for lemma in wn.synset('dog.n.01').lemmas()] + ['dog', 'domestic_dog', 'Canis_familiaris'] + >>> wn.lemma('dog.n.01.dog').synset() + Synset('dog.n.01') + +The WordNet corpus reader gives access to the Open Multilingual +WordNet, using ISO-639 language codes. + + >>> sorted(wn.langs()) # doctest: +NORMALIZE_WHITESPACE + ['als', 'arb', 'bul', 'cat', 'cmn', 'dan', 'ell', 'eng', 'eus', 'fas', + 'fin', 'fra', 'glg', 'heb', 'hrv', 'ind', 'ita', 'jpn', 'nld', 'nno', + 'nob', 'pol', 'por', 'qcn', 'slv', 'spa', 'swe', 'tha', 'zsm'] + >>> wn.synsets(b'\xe7\x8a\xac'.decode('utf-8'), lang='jpn') + [Synset('dog.n.01'), Synset('spy.n.01')] + + wn.synset('spy.n.01').lemma_names('jpn') # doctest: +NORMALIZE_WHITESPACE + ['\u3044\u306c', '\u307e\u308f\u3057\u8005', '\u30b9\u30d1\u30a4', '\u56de\u3057\u8005', + '\u56de\u8005', '\u5bc6\u5075', '\u5de5\u4f5c\u54e1', '\u5efb\u3057\u8005', + '\u5efb\u8005', '\u63a2', '\u63a2\u308a', '\u72ac', '\u79d8\u5bc6\u635c\u67fb\u54e1', + '\u8adc\u5831\u54e1', '\u8adc\u8005', '\u9593\u8005', '\u9593\u8adc', '\u96a0\u5bc6'] + + >>> wn.synset('dog.n.01').lemma_names('ita') + ['cane', 'Canis_familiaris'] + >>> wn.lemmas('cane', lang='ita') # doctest: +NORMALIZE_WHITESPACE + [Lemma('dog.n.01.cane'), Lemma('cramp.n.02.cane'), Lemma('hammer.n.01.cane'), Lemma('bad_person.n.01.cane'), + Lemma('incompetent.n.01.cane')] + >>> sorted(wn.synset('dog.n.01').lemmas('dan')) # doctest: +NORMALIZE_WHITESPACE + [Lemma('dog.n.01.hund'), Lemma('dog.n.01.k\xf8ter'), + Lemma('dog.n.01.vovhund'), Lemma('dog.n.01.vovse')] + + sorted(wn.synset('dog.n.01').lemmas('por')) + [Lemma('dog.n.01.cachorra'), Lemma('dog.n.01.cachorro'), Lemma('dog.n.01.cadela'), Lemma('dog.n.01.c\xe3o')] + + >>> dog_lemma = wn.lemma(b'dog.n.01.c\xc3\xa3o'.decode('utf-8'), lang='por') + >>> dog_lemma + Lemma('dog.n.01.c\xe3o') + >>> dog_lemma.lang() + 'por' + >>> len(list(wordnet.all_lemma_names(pos='n', lang='jpn'))) + 64797 + +------- +Synsets +------- + +`Synset`: a set of synonyms that share a common meaning. + + >>> dog = wn.synset('dog.n.01') + >>> dog.hypernyms() + [Synset('canine.n.02'), Synset('domestic_animal.n.01')] + >>> dog.hyponyms() # doctest: +ELLIPSIS + [Synset('basenji.n.01'), Synset('corgi.n.01'), Synset('cur.n.01'), Synset('dalmatian.n.02'), ...] + >>> dog.member_holonyms() + [Synset('canis.n.01'), Synset('pack.n.06')] + >>> dog.root_hypernyms() + [Synset('entity.n.01')] + >>> wn.synset('dog.n.01').lowest_common_hypernyms(wn.synset('cat.n.01')) + [Synset('carnivore.n.01')] + +Each synset contains one or more lemmas, which represent a specific +sense of a specific word. + +Note that some relations are defined by WordNet only over Lemmas: + + >>> good = wn.synset('good.a.01') + >>> good.antonyms() + Traceback (most recent call last): + File "", line 1, in + AttributeError: 'Synset' object has no attribute 'antonyms' + >>> good.lemmas()[0].antonyms() + [Lemma('bad.a.01.bad')] + +The relations that are currently defined in this way are `antonyms`, +`derivationally_related_forms` and `pertainyms`. + +If you know the byte offset used to identify a synset in the original +Princeton WordNet data file, you can use that to instantiate the synset +in NLTK: + + >>> wn.synset_from_pos_and_offset('n', 4543158) + Synset('wagon.n.01') + +------ +Lemmas +------ + + >>> eat = wn.lemma('eat.v.03.eat') + >>> eat + Lemma('feed.v.06.eat') + >>> print(eat.key()) + eat%2:34:02:: + >>> eat.count() + 4 + >>> wn.lemma_from_key(eat.key()) + Lemma('feed.v.06.eat') + >>> wn.lemma_from_key(eat.key()).synset() + Synset('feed.v.06') + >>> wn.lemma_from_key('feebleminded%5:00:00:retarded:00') + Lemma('backward.s.03.feebleminded') + >>> for lemma in wn.synset('eat.v.03').lemmas(): + ... print(lemma, lemma.count()) + ... + Lemma('feed.v.06.feed') 3 + Lemma('feed.v.06.eat') 4 + >>> for lemma in wn.lemmas('eat', 'v'): + ... print(lemma, lemma.count()) + ... + Lemma('eat.v.01.eat') 61 + Lemma('eat.v.02.eat') 13 + Lemma('feed.v.06.eat') 4 + Lemma('eat.v.04.eat') 0 + Lemma('consume.v.05.eat') 0 + Lemma('corrode.v.01.eat') 0 + >>> wn.lemma('jump.v.11.jump') + Lemma('jump.v.11.jump') + +Lemmas can also have relations between them: + + >>> vocal = wn.lemma('vocal.a.01.vocal') + >>> vocal.derivationally_related_forms() + [Lemma('vocalize.v.02.vocalize')] + >>> vocal.pertainyms() + [Lemma('voice.n.02.voice')] + >>> vocal.antonyms() + [Lemma('instrumental.a.01.instrumental')] + +The three relations above exist only on lemmas, not on synsets. + +----------- +Verb Frames +----------- + + >>> wn.synset('think.v.01').frame_ids() + [5, 9] + >>> for lemma in wn.synset('think.v.01').lemmas(): + ... print(lemma, lemma.frame_ids()) + ... print(" | ".join(lemma.frame_strings())) + ... + Lemma('think.v.01.think') [5, 9] + Something think something Adjective/Noun | Somebody think somebody + Lemma('think.v.01.believe') [5, 9] + Something believe something Adjective/Noun | Somebody believe somebody + Lemma('think.v.01.consider') [5, 9] + Something consider something Adjective/Noun | Somebody consider somebody + Lemma('think.v.01.conceive') [5, 9] + Something conceive something Adjective/Noun | Somebody conceive somebody + >>> wn.synset('stretch.v.02').frame_ids() + [8] + >>> for lemma in wn.synset('stretch.v.02').lemmas(): + ... print(lemma, lemma.frame_ids()) + ... print(" | ".join(lemma.frame_strings())) + ... + Lemma('stretch.v.02.stretch') [8, 2] + Somebody stretch something | Somebody stretch + Lemma('stretch.v.02.extend') [8] + Somebody extend something + + +---------- +Similarity +---------- + + >>> dog = wn.synset('dog.n.01') + >>> cat = wn.synset('cat.n.01') + + >>> hit = wn.synset('hit.v.01') + >>> slap = wn.synset('slap.v.01') + + +``synset1.path_similarity(synset2):`` +Return a score denoting how similar two word senses are, based on the +shortest path that connects the senses in the is-a (hypernym/hypnoym) +taxonomy. The score is in the range 0 to 1. By default, there is now +a fake root node added to verbs so for cases where previously a path +could not be found---and None was returned---it should return a value. +The old behavior can be achieved by setting simulate_root to be False. +A score of 1 represents identity i.e. comparing a sense with itself +will return 1. + + >>> dog.path_similarity(cat) # doctest: +ELLIPSIS + 0.2... + + >>> hit.path_similarity(slap) # doctest: +ELLIPSIS + 0.142... + + >>> wn.path_similarity(hit, slap) # doctest: +ELLIPSIS + 0.142... + + >>> print(hit.path_similarity(slap, simulate_root=False)) + None + + >>> print(wn.path_similarity(hit, slap, simulate_root=False)) + None + +``synset1.lch_similarity(synset2):`` +Leacock-Chodorow Similarity: +Return a score denoting how similar two word senses are, based on the +shortest path that connects the senses (as above) and the maximum depth +of the taxonomy in which the senses occur. The relationship is given +as -log(p/2d) where p is the shortest path length and d the taxonomy +depth. + + >>> dog.lch_similarity(cat) # doctest: +ELLIPSIS + 2.028... + + >>> hit.lch_similarity(slap) # doctest: +ELLIPSIS + 1.312... + + >>> wn.lch_similarity(hit, slap) # doctest: +ELLIPSIS + 1.312... + + >>> print(hit.lch_similarity(slap, simulate_root=False)) + None + + >>> print(wn.lch_similarity(hit, slap, simulate_root=False)) + None + +``synset1.wup_similarity(synset2):`` +Wu-Palmer Similarity: +Return a score denoting how similar two word senses are, based on the +depth of the two senses in the taxonomy and that of their Least Common +Subsumer (most specific ancestor node). Note that at this time the +scores given do _not_ always agree with those given by Pedersen's Perl +implementation of Wordnet Similarity. + +The LCS does not necessarily feature in the shortest path connecting the +two senses, as it is by definition the common ancestor deepest in the +taxonomy, not closest to the two senses. Typically, however, it will so +feature. Where multiple candidates for the LCS exist, that whose +shortest path to the root node is the longest will be selected. Where +the LCS has multiple paths to the root, the longer path is used for +the purposes of the calculation. + + >>> dog.wup_similarity(cat) # doctest: +ELLIPSIS + 0.857... + + >>> hit.wup_similarity(slap) + 0.25 + + >>> wn.wup_similarity(hit, slap) + 0.25 + + >>> print(hit.wup_similarity(slap, simulate_root=False)) + None + + >>> print(wn.wup_similarity(hit, slap, simulate_root=False)) + None + +``wordnet_ic`` +Information Content: +Load an information content file from the wordnet_ic corpus. + + >>> from nltk.corpus import wordnet_ic + >>> brown_ic = wordnet_ic.ic('ic-brown.dat') + >>> semcor_ic = wordnet_ic.ic('ic-semcor.dat') + +Or you can create an information content dictionary from a corpus (or +anything that has a words() method). + + >>> from nltk.corpus import genesis + >>> genesis_ic = wn.ic(genesis, False, 0.0) + +``synset1.res_similarity(synset2, ic):`` +Resnik Similarity: +Return a score denoting how similar two word senses are, based on the +Information Content (IC) of the Least Common Subsumer (most specific +ancestor node). Note that for any similarity measure that uses +information content, the result is dependent on the corpus used to +generate the information content and the specifics of how the +information content was created. + + >>> dog.res_similarity(cat, brown_ic) # doctest: +ELLIPSIS + 7.911... + >>> dog.res_similarity(cat, genesis_ic) # doctest: +ELLIPSIS + 7.204... + +``synset1.jcn_similarity(synset2, ic):`` +Jiang-Conrath Similarity +Return a score denoting how similar two word senses are, based on the +Information Content (IC) of the Least Common Subsumer (most specific +ancestor node) and that of the two input Synsets. The relationship is +given by the equation 1 / (IC(s1) + IC(s2) - 2 * IC(lcs)). + + >>> dog.jcn_similarity(cat, brown_ic) # doctest: +ELLIPSIS + 0.449... + >>> dog.jcn_similarity(cat, genesis_ic) # doctest: +ELLIPSIS + 0.285... + +``synset1.lin_similarity(synset2, ic):`` +Lin Similarity: +Return a score denoting how similar two word senses are, based on the +Information Content (IC) of the Least Common Subsumer (most specific +ancestor node) and that of the two input Synsets. The relationship is +given by the equation 2 * IC(lcs) / (IC(s1) + IC(s2)). + + >>> dog.lin_similarity(cat, semcor_ic) # doctest: +ELLIPSIS + 0.886... + + +--------------------- +Access to all Synsets +--------------------- + +Iterate over all the noun synsets: + + >>> for synset in list(wn.all_synsets('n'))[:10]: + ... print(synset) + ... + Synset('entity.n.01') + Synset('physical_entity.n.01') + Synset('abstraction.n.06') + Synset('thing.n.12') + Synset('object.n.01') + Synset('whole.n.02') + Synset('congener.n.03') + Synset('living_thing.n.01') + Synset('organism.n.01') + Synset('benthos.n.02') + +Get all synsets for this word, possibly restricted by POS: + + >>> wn.synsets('dog') # doctest: +ELLIPSIS + [Synset('dog.n.01'), Synset('frump.n.01'), Synset('dog.n.03'), Synset('cad.n.01'), ...] + >>> wn.synsets('dog', pos='v') + [Synset('chase.v.01')] + +Walk through the noun synsets looking at their hypernyms: + + >>> from itertools import islice + >>> for synset in islice(wn.all_synsets('n'), 5): + ... print(synset, synset.hypernyms()) + ... + Synset('entity.n.01') [] + Synset('physical_entity.n.01') [Synset('entity.n.01')] + Synset('abstraction.n.06') [Synset('entity.n.01')] + Synset('thing.n.12') [Synset('physical_entity.n.01')] + Synset('object.n.01') [Synset('physical_entity.n.01')] + + +------ +Morphy +------ + +Look up forms not in WordNet, with the help of Morphy: + + >>> wn.morphy('denied', wn.NOUN) + >>> print(wn.morphy('denied', wn.VERB)) + deny + >>> wn.synsets('denied', wn.NOUN) + [] + >>> wn.synsets('denied', wn.VERB) # doctest: +NORMALIZE_WHITESPACE + [Synset('deny.v.01'), Synset('deny.v.02'), Synset('deny.v.03'), Synset('deny.v.04'), + Synset('deny.v.05'), Synset('traverse.v.03'), Synset('deny.v.07')] + +Morphy uses a combination of inflectional ending rules and exception +lists to handle a variety of different possibilities: + + >>> print(wn.morphy('dogs')) + dog + >>> print(wn.morphy('churches')) + church + >>> print(wn.morphy('aardwolves')) + aardwolf + >>> print(wn.morphy('abaci')) + abacus + >>> print(wn.morphy('book', wn.NOUN)) + book + >>> wn.morphy('hardrock', wn.ADV) + >>> wn.morphy('book', wn.ADJ) + >>> wn.morphy('his', wn.NOUN) + >>> + +--------------- +Synset Closures +--------------- + +Compute transitive closures of synsets + + >>> dog = wn.synset('dog.n.01') + >>> hypo = lambda s: s.hyponyms() + >>> hyper = lambda s: s.hypernyms() + >>> list(dog.closure(hypo, depth=1)) == dog.hyponyms() + True + >>> list(dog.closure(hyper, depth=1)) == dog.hypernyms() + True + >>> list(dog.closure(hypo)) # doctest: +NORMALIZE_WHITESPACE, +ELLIPSIS + [Synset('basenji.n.01'), Synset('corgi.n.01'), Synset('cur.n.01'), + Synset('dalmatian.n.02'), Synset('great_pyrenees.n.01'), + Synset('griffon.n.02'), Synset('hunting_dog.n.01'), Synset('lapdog.n.01'), + Synset('leonberg.n.01'), Synset('mexican_hairless.n.01'), + Synset('newfoundland.n.01'), Synset('pooch.n.01'), Synset('poodle.n.01'), ...] + >>> list(dog.closure(hyper)) # doctest: +NORMALIZE_WHITESPACE + [Synset('canine.n.02'), Synset('domestic_animal.n.01'), Synset('carnivore.n.01'), Synset('animal.n.01'), + Synset('placental.n.01'), Synset('organism.n.01'), Synset('mammal.n.01'), Synset('living_thing.n.01'), + Synset('vertebrate.n.01'), Synset('whole.n.02'), Synset('chordate.n.01'), Synset('object.n.01'), + Synset('physical_entity.n.01'), Synset('entity.n.01')] + + +---------------- +Regression Tests +---------------- + +Bug 85: morphy returns the base form of a word, if it's input is given +as a base form for a POS for which that word is not defined: + + >>> wn.synsets('book', wn.NOUN) + [Synset('book.n.01'), Synset('book.n.02'), Synset('record.n.05'), Synset('script.n.01'), Synset('ledger.n.01'), Synset('book.n.06'), Synset('book.n.07'), Synset('koran.n.01'), Synset('bible.n.01'), Synset('book.n.10'), Synset('book.n.11')] + >>> wn.synsets('book', wn.ADJ) + [] + >>> wn.morphy('book', wn.NOUN) + 'book' + >>> wn.morphy('book', wn.ADJ) + +Bug 160: wup_similarity breaks when the two synsets have no common hypernym + + >>> t = wn.synsets('picasso')[0] + >>> m = wn.synsets('male')[1] + >>> t.wup_similarity(m) # doctest: +ELLIPSIS + 0.631... + + >>> t = wn.synsets('titan')[1] + >>> s = wn.synsets('say', wn.VERB)[0] + >>> print(t.wup_similarity(s)) + None + +Bug 21: "instance of" not included in LCS (very similar to bug 160) + + >>> a = wn.synsets("writings")[0] + >>> b = wn.synsets("scripture")[0] + >>> brown_ic = wordnet_ic.ic('ic-brown.dat') + >>> a.jcn_similarity(b, brown_ic) # doctest: +ELLIPSIS + 0.175... + +Bug 221: Verb root IC is zero + + >>> from nltk.corpus.reader.wordnet import information_content + >>> s = wn.synsets('say', wn.VERB)[0] + >>> information_content(s, brown_ic) # doctest: +ELLIPSIS + 4.623... + +Bug 161: Comparison between WN keys/lemmas should not be case sensitive + + >>> k = wn.synsets("jefferson")[0].lemmas()[0].key() + >>> wn.lemma_from_key(k) + Lemma('jefferson.n.01.Jefferson') + >>> wn.lemma_from_key(k.upper()) + Lemma('jefferson.n.01.Jefferson') + +Bug 99: WordNet root_hypernyms gives incorrect results + + >>> from nltk.corpus import wordnet as wn + >>> for s in wn.all_synsets(wn.NOUN): + ... if s.root_hypernyms()[0] != wn.synset('entity.n.01'): + ... print(s, s.root_hypernyms()) + ... + >>> + +Bug 382: JCN Division by zero error + + >>> tow = wn.synset('tow.v.01') + >>> shlep = wn.synset('shlep.v.02') + >>> from nltk.corpus import wordnet_ic + >>> brown_ic = wordnet_ic.ic('ic-brown.dat') + >>> tow.jcn_similarity(shlep, brown_ic) # doctest: +ELLIPSIS + 1...e+300 + +Bug 428: Depth is zero for instance nouns + + >>> s = wn.synset("lincoln.n.01") + >>> s.max_depth() > 0 + True + +Bug 429: Information content smoothing used old reference to all_synsets + + >>> genesis_ic = wn.ic(genesis, True, 1.0) + +Bug 430: all_synsets used wrong pos lookup when synsets were cached + + >>> for ii in wn.all_synsets(): pass + >>> for ii in wn.all_synsets(): pass + +Bug 470: shortest_path_distance ignored instance hypernyms + + >>> google = wordnet.synsets("google")[0] + >>> earth = wordnet.synsets("earth")[0] + >>> google.wup_similarity(earth) # doctest: +ELLIPSIS + 0.1... + +Bug 484: similarity metrics returned -1 instead of None for no LCS + + >>> t = wn.synsets('fly', wn.VERB)[0] + >>> s = wn.synsets('say', wn.VERB)[0] + >>> print(s.shortest_path_distance(t)) + None + >>> print(s.path_similarity(t, simulate_root=False)) + None + >>> print(s.lch_similarity(t, simulate_root=False)) + None + >>> print(s.wup_similarity(t, simulate_root=False)) + None + +Bug 427: "pants" does not return all the senses it should + + >>> from nltk.corpus import wordnet + >>> wordnet.synsets("pants",'n') + [Synset('bloomers.n.01'), Synset('pant.n.01'), Synset('trouser.n.01'), Synset('gasp.n.01')] + +Bug 482: Some nouns not being lemmatised by WordNetLemmatizer().lemmatize + + >>> from nltk.stem.wordnet import WordNetLemmatizer + >>> WordNetLemmatizer().lemmatize("eggs", pos="n") + 'egg' + >>> WordNetLemmatizer().lemmatize("legs", pos="n") + 'leg' + +Bug 284: instance hypernyms not used in similarity calculations + + >>> wn.synset('john.n.02').lch_similarity(wn.synset('dog.n.01')) # doctest: +ELLIPSIS + 1.335... + >>> wn.synset('john.n.02').wup_similarity(wn.synset('dog.n.01')) # doctest: +ELLIPSIS + 0.571... + >>> wn.synset('john.n.02').res_similarity(wn.synset('dog.n.01'), brown_ic) # doctest: +ELLIPSIS + 2.224... + >>> wn.synset('john.n.02').jcn_similarity(wn.synset('dog.n.01'), brown_ic) # doctest: +ELLIPSIS + 0.075... + >>> wn.synset('john.n.02').lin_similarity(wn.synset('dog.n.01'), brown_ic) # doctest: +ELLIPSIS + 0.252... + >>> wn.synset('john.n.02').hypernym_paths() # doctest: +ELLIPSIS + [[Synset('entity.n.01'), ..., Synset('john.n.02')]] + +Issue 541: add domains to wordnet + + >>> wn.synset('code.n.03').topic_domains() + [Synset('computer_science.n.01')] + >>> wn.synset('pukka.a.01').region_domains() + [Synset('india.n.01')] + >>> wn.synset('freaky.a.01').usage_domains() + [Synset('slang.n.02')] + +Issue 629: wordnet failures when python run with -O optimizations + + >>> # Run the test suite with python -O to check this + >>> wn.synsets("brunch") + [Synset('brunch.n.01'), Synset('brunch.v.01')] + +Issue 395: wordnet returns incorrect result for lowest_common_hypernyms of chef and policeman + + >>> wn.synset('policeman.n.01').lowest_common_hypernyms(wn.synset('chef.n.01')) + [Synset('person.n.01')] + +Bug https://github.com/nltk/nltk/issues/1641: Non-English lemmas containing capital letters cannot be looked up using wordnet.lemmas() or wordnet.synsets() + + >>> wn.lemmas('Londres', lang='fra') + [Lemma('united_kingdom.n.01.Londres'), Lemma('london.n.01.Londres'), Lemma('london.n.02.Londres')] + >>> wn.lemmas('londres', lang='fra') + [Lemma('united_kingdom.n.01.Londres'), Lemma('london.n.01.Londres'), Lemma('london.n.02.Londres')] + +Patch-1 https://github.com/nltk/nltk/pull/2065 Adding 3 functions (relations) to WordNet class + >>> wn.synsets("computer_science")[0].in_topic_domains()[2] + Synset('access_time.n.01') + >>> wn.synsets("France")[0].in_region_domains()[18] + Synset('french.n.01') + >>> wn.synsets("slang")[1].in_usage_domains()[18] + Synset('can-do.s.01') diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/wordnet_fixt.py b/venv.bak/lib/python3.7/site-packages/nltk/test/wordnet_fixt.py new file mode 100644 index 0000000..1412c0d --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/test/wordnet_fixt.py @@ -0,0 +1,8 @@ +# -*- coding: utf-8 -*- +from __future__ import absolute_import + + +def teardown_module(module=None): + from nltk.corpus import wordnet + + wordnet._unload() diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/wordnet_lch.doctest b/venv.bak/lib/python3.7/site-packages/nltk/test/wordnet_lch.doctest new file mode 100644 index 0000000..c2536b4 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/test/wordnet_lch.doctest @@ -0,0 +1,53 @@ +.. Copyright (C) 2001-2019 NLTK Project +.. For license information, see LICENSE.TXT + +=============================== +WordNet Lowest Common Hypernyms +=============================== + +Wordnet's lowest_common_hypernyms() method is based used to locate the +lowest single hypernym that is shared by two given words: + + >>> from nltk.corpus import wordnet as wn + >>> wn.synset('kin.n.01').lowest_common_hypernyms(wn.synset('mother.n.01')) + [Synset('relative.n.01')] + + >>> wn.synset('policeman.n.01').lowest_common_hypernyms(wn.synset('chef.n.01')) + [Synset('person.n.01')] + +This method generally returns a single result, but in some cases, more than one +valid LCH is possible: + + >>> wn.synset('body.n.09').lowest_common_hypernyms(wn.synset('sidereal_day.n.01')) + [Synset('attribute.n.02'), Synset('measure.n.02')] + +In some cases, lowest_common_hypernyms() can return one of the synsets which was +passed to it as an argument: + + >>> wn.synset('woman.n.01').lowest_common_hypernyms(wn.synset('girlfriend.n.02')) + [Synset('woman.n.01')] + +In NLTK 3.0a2 the behavior of lowest_common_hypernyms() was changed to give more +accurate results in a small set of cases, generally when dealing with nouns describing +social roles or jobs. To emulate the pre v3.0a2 behavior, you can set the use_min_depth=True +flag: + + >>> wn.synset('policeman.n.01').lowest_common_hypernyms(wn.synset('chef.n.01')) + [Synset('person.n.01')] + >>> wn.synset('policeman.n.01').lowest_common_hypernyms(wn.synset('chef.n.01'), use_min_depth=True) + [Synset('organism.n.01')] + +In some cases use_min_depth=True may return more or fewer results than the default +behavior: + + >>> wn.synset('woman.n.01').lowest_common_hypernyms(wn.synset('girlfriend.n.02')) + [Synset('woman.n.01')] + >>> wn.synset('woman.n.01').lowest_common_hypernyms(wn.synset('girlfriend.n.02'), use_min_depth=True) + [Synset('organism.n.01'), Synset('woman.n.01')] + +In the general case, however, they tend to return the same results: + + >>> wn.synset('body.n.09').lowest_common_hypernyms(wn.synset('sidereal_day.n.01')) + [Synset('attribute.n.02'), Synset('measure.n.02')] + >>> wn.synset('body.n.09').lowest_common_hypernyms(wn.synset('sidereal_day.n.01'), use_min_depth=True) + [Synset('attribute.n.02'), Synset('measure.n.02')] diff --git a/venv.bak/lib/python3.7/site-packages/nltk/test/wsd.doctest b/venv.bak/lib/python3.7/site-packages/nltk/test/wsd.doctest new file mode 100644 index 0000000..b4d8f90 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/test/wsd.doctest @@ -0,0 +1,68 @@ +.. Copyright (C) 2001-2019 NLTK Project +.. For license information, see LICENSE.TXT + +.. -*- coding: utf-8 -*- + +========================= +Word Sense Disambiguation +========================= + + +Lesk Algorithm +-------------- + + +Performs the classic Lesk algorithm for Word Sense Disambiguation (WSD) using +a the definitions of the ambiguous word. + +Given an ambiguous word and the context in which the word occurs, Lesk returns +a Synset with the highest number of overlapping words between the context +sentence and different definitions from each Synset. + + >>> from nltk.wsd import lesk + >>> sent = ['I', 'went', 'to', 'the', 'bank', 'to', 'deposit', 'money', '.'] + + >>> print(lesk(sent, 'bank', 'n')) + Synset('savings_bank.n.02') + + >>> print(lesk(sent, 'bank')) + Synset('savings_bank.n.02') + +The definitions for "bank" are: + + >>> from nltk.corpus import wordnet as wn + >>> for ss in wn.synsets('bank'): + ... print(ss, ss.definition()) + ... + Synset('bank.n.01') sloping land (especially the slope beside a body of water) + Synset('depository_financial_institution.n.01') a financial institution that accepts deposits and channels the money into lending activities + Synset('bank.n.03') a long ridge or pile + Synset('bank.n.04') an arrangement of similar objects in a row or in tiers + Synset('bank.n.05') a supply or stock held in reserve for future use (especially in emergencies) + Synset('bank.n.06') the funds held by a gambling house or the dealer in some gambling games + Synset('bank.n.07') a slope in the turn of a road or track; the outside is higher than the inside in order to reduce the effects of centrifugal force + Synset('savings_bank.n.02') a container (usually with a slot in the top) for keeping money at home + Synset('bank.n.09') a building in which the business of banking transacted + Synset('bank.n.10') a flight maneuver; aircraft tips laterally about its longitudinal axis (especially in turning) + Synset('bank.v.01') tip laterally + Synset('bank.v.02') enclose with a bank + Synset('bank.v.03') do business with a bank or keep an account at a bank + Synset('bank.v.04') act as the banker in a game or in gambling + Synset('bank.v.05') be in the banking business + Synset('deposit.v.02') put into a bank account + Synset('bank.v.07') cover with ashes so to control the rate of burning + Synset('trust.v.01') have confidence or faith in + +Test disambiguation of POS tagged `able`. + + >>> [(s, s.pos()) for s in wn.synsets('able')] + [(Synset('able.a.01'), 'a'), (Synset('able.s.02'), 's'), (Synset('able.s.03'), 's'), (Synset('able.s.04'), 's')] + >>> sent = 'people should be able to marry a person of their choice'.split() + >>> lesk(sent, 'able') + Synset('able.s.04') + >>> lesk(sent, 'able', pos='a') + Synset('able.a.01') + +Test behavior if there is are no matching senses. + + >>> lesk('John loves Mary'.split(), 'loves', synsets=[]) diff --git a/venv.bak/lib/python3.7/site-packages/nltk/text.py b/venv.bak/lib/python3.7/site-packages/nltk/text.py new file mode 100644 index 0000000..e8a3061 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/text.py @@ -0,0 +1,763 @@ +# Natural Language Toolkit: Texts +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Steven Bird +# Edward Loper +# URL: +# For license information, see LICENSE.TXT + +""" +This module brings together a variety of NLTK functionality for +text analysis, and provides simple, interactive interfaces. +Functionality includes: concordancing, collocation discovery, +regular expression search over tokenized strings, and +distributional similarity. +""" +from __future__ import print_function, division, unicode_literals, absolute_import + +from math import log +from collections import defaultdict, Counter, namedtuple +from functools import reduce +import re +import sys + +from six import text_type + +from nltk.lm import MLE +from nltk.lm.preprocessing import padded_everygram_pipeline +from nltk.probability import FreqDist +from nltk.probability import ConditionalFreqDist as CFD +from nltk.util import tokenwrap, LazyConcatenation +from nltk.metrics import f_measure, BigramAssocMeasures +from nltk.collocations import BigramCollocationFinder +from nltk.compat import python_2_unicode_compatible +from nltk.tokenize import sent_tokenize + +ConcordanceLine = namedtuple( + "ConcordanceLine", + ["left", "query", "right", "offset", "left_print", "right_print", "line"], +) + + +class ContextIndex(object): + """ + A bidirectional index between words and their 'contexts' in a text. + The context of a word is usually defined to be the words that occur + in a fixed window around the word; but other definitions may also + be used by providing a custom context function. + """ + + @staticmethod + def _default_context(tokens, i): + """One left token and one right token, normalized to lowercase""" + left = tokens[i - 1].lower() if i != 0 else "*START*" + right = tokens[i + 1].lower() if i != len(tokens) - 1 else "*END*" + return (left, right) + + def __init__(self, tokens, context_func=None, filter=None, key=lambda x: x): + self._key = key + self._tokens = tokens + if context_func: + self._context_func = context_func + else: + self._context_func = self._default_context + if filter: + tokens = [t for t in tokens if filter(t)] + self._word_to_contexts = CFD( + (self._key(w), self._context_func(tokens, i)) for i, w in enumerate(tokens) + ) + self._context_to_words = CFD( + (self._context_func(tokens, i), self._key(w)) for i, w in enumerate(tokens) + ) + + def tokens(self): + """ + :rtype: list(str) + :return: The document that this context index was + created from. + """ + return self._tokens + + def word_similarity_dict(self, word): + """ + Return a dictionary mapping from words to 'similarity scores,' + indicating how often these two words occur in the same + context. + """ + word = self._key(word) + word_contexts = set(self._word_to_contexts[word]) + + scores = {} + for w, w_contexts in self._word_to_contexts.items(): + scores[w] = f_measure(word_contexts, set(w_contexts)) + + return scores + + def similar_words(self, word, n=20): + scores = defaultdict(int) + for c in self._word_to_contexts[self._key(word)]: + for w in self._context_to_words[c]: + if w != word: + scores[w] += ( + self._context_to_words[c][word] * self._context_to_words[c][w] + ) + return sorted(scores, key=scores.get, reverse=True)[:n] + + def common_contexts(self, words, fail_on_unknown=False): + """ + Find contexts where the specified words can all appear; and + return a frequency distribution mapping each context to the + number of times that context was used. + + :param words: The words used to seed the similarity search + :type words: str + :param fail_on_unknown: If true, then raise a value error if + any of the given words do not occur at all in the index. + """ + words = [self._key(w) for w in words] + contexts = [set(self._word_to_contexts[w]) for w in words] + empty = [words[i] for i in range(len(words)) if not contexts[i]] + common = reduce(set.intersection, contexts) + if empty and fail_on_unknown: + raise ValueError("The following word(s) were not found:", " ".join(words)) + elif not common: + # nothing in common -- just return an empty freqdist. + return FreqDist() + else: + fd = FreqDist( + c for w in words for c in self._word_to_contexts[w] if c in common + ) + return fd + + +@python_2_unicode_compatible +class ConcordanceIndex(object): + """ + An index that can be used to look up the offset locations at which + a given word occurs in a document. + """ + + def __init__(self, tokens, key=lambda x: x): + """ + Construct a new concordance index. + + :param tokens: The document (list of tokens) that this + concordance index was created from. This list can be used + to access the context of a given word occurrence. + :param key: A function that maps each token to a normalized + version that will be used as a key in the index. E.g., if + you use ``key=lambda s:s.lower()``, then the index will be + case-insensitive. + """ + self._tokens = tokens + """The document (list of tokens) that this concordance index + was created from.""" + + self._key = key + """Function mapping each token to an index key (or None).""" + + self._offsets = defaultdict(list) + """Dictionary mapping words (or keys) to lists of offset indices.""" + # Initialize the index (self._offsets) + for index, word in enumerate(tokens): + word = self._key(word) + self._offsets[word].append(index) + + def tokens(self): + """ + :rtype: list(str) + :return: The document that this concordance index was + created from. + """ + return self._tokens + + def offsets(self, word): + """ + :rtype: list(int) + :return: A list of the offset positions at which the given + word occurs. If a key function was specified for the + index, then given word's key will be looked up. + """ + word = self._key(word) + return self._offsets[word] + + def __repr__(self): + return "" % ( + len(self._tokens), + len(self._offsets), + ) + + def find_concordance(self, word, width=80): + """ + Find all concordance lines given the query word. + """ + half_width = (width - len(word) - 2) // 2 + context = width // 4 # approx number of words of context + + # Find the instances of the word to create the ConcordanceLine + concordance_list = [] + offsets = self.offsets(word) + if offsets: + for i in offsets: + query_word = self._tokens[i] + # Find the context of query word. + left_context = self._tokens[max(0, i - context) : i] + right_context = self._tokens[i + 1 : i + context] + # Create the pretty lines with the query_word in the middle. + left_print = " ".join(left_context)[-half_width:] + right_print = " ".join(right_context)[:half_width] + # The WYSIWYG line of the concordance. + line_print = " ".join([left_print, query_word, right_print]) + # Create the ConcordanceLine + concordance_line = ConcordanceLine( + left_context, + query_word, + right_context, + i, + left_print, + right_print, + line_print, + ) + concordance_list.append(concordance_line) + return concordance_list + + def print_concordance(self, word, width=80, lines=25): + """ + Print concordance lines given the query word. + :param word: The target word + :type word: str + :param lines: The number of lines to display (default=25) + :type lines: int + :param width: The width of each line, in characters (default=80) + :type width: int + :param save: The option to save the concordance. + :type save: bool + """ + concordance_list = self.find_concordance(word, width=width) + + if not concordance_list: + print("no matches") + else: + lines = min(lines, len(concordance_list)) + print("Displaying {} of {} matches:".format(lines, len(concordance_list))) + for i, concordance_line in enumerate(concordance_list[:lines]): + print(concordance_line.line) + + +class TokenSearcher(object): + """ + A class that makes it easier to use regular expressions to search + over tokenized strings. The tokenized string is converted to a + string where tokens are marked with angle brackets -- e.g., + ``''``. The regular expression + passed to the ``findall()`` method is modified to treat angle + brackets as non-capturing parentheses, in addition to matching the + token boundaries; and to have ``'.'`` not match the angle brackets. + """ + + def __init__(self, tokens): + self._raw = "".join("<" + w + ">" for w in tokens) + + def findall(self, regexp): + """ + Find instances of the regular expression in the text. + The text is a list of tokens, and a regexp pattern to match + a single token must be surrounded by angle brackets. E.g. + + >>> from nltk.text import TokenSearcher + >>> print('hack'); from nltk.book import text1, text5, text9 + hack... + >>> text5.findall("<.*><.*>") + you rule bro; telling you bro; u twizted bro + >>> text1.findall("(<.*>)") + monied; nervous; dangerous; white; white; white; pious; queer; good; + mature; white; Cape; great; wise; wise; butterless; white; fiendish; + pale; furious; better; certain; complete; dismasted; younger; brave; + brave; brave; brave + >>> text9.findall("{3,}") + thread through those; the thought that; that the thing; the thing + that; that that thing; through these than through; them that the; + through the thick; them that they; thought that the + + :param regexp: A regular expression + :type regexp: str + """ + # preprocess the regular expression + regexp = re.sub(r"\s", "", regexp) + regexp = re.sub(r"<", "(?:<(?:", regexp) + regexp = re.sub(r">", ")>)", regexp) + regexp = re.sub(r"(?]", regexp) + + # perform the search + hits = re.findall(regexp, self._raw) + + # Sanity check + for h in hits: + if not h.startswith("<") and h.endswith(">"): + raise ValueError("Bad regexp for TokenSearcher.findall") + + # postprocess the output + hits = [h[1:-1].split("><") for h in hits] + return hits + + +@python_2_unicode_compatible +class Text(object): + """ + A wrapper around a sequence of simple (string) tokens, which is + intended to support initial exploration of texts (via the + interactive console). Its methods perform a variety of analyses + on the text's contexts (e.g., counting, concordancing, collocation + discovery), and display the results. If you wish to write a + program which makes use of these analyses, then you should bypass + the ``Text`` class, and use the appropriate analysis function or + class directly instead. + + A ``Text`` is typically initialized from a given document or + corpus. E.g.: + + >>> import nltk.corpus + >>> from nltk.text import Text + >>> moby = Text(nltk.corpus.gutenberg.words('melville-moby_dick.txt')) + + """ + + # This defeats lazy loading, but makes things faster. This + # *shouldn't* be necessary because the corpus view *should* be + # doing intelligent caching, but without this it's running slow. + # Look into whether the caching is working correctly. + _COPY_TOKENS = True + + def __init__(self, tokens, name=None): + """ + Create a Text object. + + :param tokens: The source text. + :type tokens: sequence of str + """ + if self._COPY_TOKENS: + tokens = list(tokens) + self.tokens = tokens + + if name: + self.name = name + elif "]" in tokens[:20]: + end = tokens[:20].index("]") + self.name = " ".join(text_type(tok) for tok in tokens[1:end]) + else: + self.name = " ".join(text_type(tok) for tok in tokens[:8]) + "..." + + # //////////////////////////////////////////////////////////// + # Support item & slice access + # //////////////////////////////////////////////////////////// + + def __getitem__(self, i): + return self.tokens[i] + + def __len__(self): + return len(self.tokens) + + # //////////////////////////////////////////////////////////// + # Interactive console methods + # //////////////////////////////////////////////////////////// + + def concordance(self, word, width=79, lines=25): + """ + Prints a concordance for ``word`` with the specified context window. + Word matching is not case-sensitive. + + :param word: The target word + :type word: str + :param width: The width of each line, in characters (default=80) + :type width: int + :param lines: The number of lines to display (default=25) + :type lines: int + + :seealso: ``ConcordanceIndex`` + """ + if "_concordance_index" not in self.__dict__: + self._concordance_index = ConcordanceIndex( + self.tokens, key=lambda s: s.lower() + ) + + return self._concordance_index.print_concordance(word, width, lines) + + def concordance_list(self, word, width=79, lines=25): + """ + Generate a concordance for ``word`` with the specified context window. + Word matching is not case-sensitive. + + :param word: The target word + :type word: str + :param width: The width of each line, in characters (default=80) + :type width: int + :param lines: The number of lines to display (default=25) + :type lines: int + + :seealso: ``ConcordanceIndex`` + """ + if "_concordance_index" not in self.__dict__: + self._concordance_index = ConcordanceIndex( + self.tokens, key=lambda s: s.lower() + ) + return self._concordance_index.find_concordance(word, width)[:lines] + + def collocation_list(self, num=20, window_size=2): + """ + Return collocations derived from the text, ignoring stopwords. + + :param num: The maximum number of collocations to return. + :type num: int + :param window_size: The number of tokens spanned by a collocation (default=2) + :type window_size: int + """ + if not ( + "_collocations" in self.__dict__ + and self._num == num + and self._window_size == window_size + ): + self._num = num + self._window_size = window_size + + # print("Building collocations list") + from nltk.corpus import stopwords + + ignored_words = stopwords.words("english") + finder = BigramCollocationFinder.from_words(self.tokens, window_size) + finder.apply_freq_filter(2) + finder.apply_word_filter(lambda w: len(w) < 3 or w.lower() in ignored_words) + bigram_measures = BigramAssocMeasures() + self._collocations = finder.nbest(bigram_measures.likelihood_ratio, num) + return [w1 + " " + w2 for w1, w2 in self._collocations] + + def collocations(self, num=20, window_size=2): + """ + Print collocations derived from the text, ignoring stopwords. + + :param num: The maximum number of collocations to print. + :type num: int + :param window_size: The number of tokens spanned by a collocation (default=2) + :type window_size: int + """ + + collocation_strings = [ + w1 + " " + w2 for w1, w2 in self.collocation_list(num, window_size) + ] + print(tokenwrap(collocation_strings, separator="; ")) + + def count(self, word): + """ + Count the number of times this word appears in the text. + """ + return self.tokens.count(word) + + def index(self, word): + """ + Find the index of the first occurrence of the word in the text. + """ + return self.tokens.index(word) + + def readability(self, method): + # code from nltk_contrib.readability + raise NotImplementedError + + def similar(self, word, num=20): + """ + Distributional similarity: find other words which appear in the + same contexts as the specified word; list most similar words first. + + :param word: The word used to seed the similarity search + :type word: str + :param num: The number of words to generate (default=20) + :type num: int + :seealso: ContextIndex.similar_words() + """ + if "_word_context_index" not in self.__dict__: + # print('Building word-context index...') + self._word_context_index = ContextIndex( + self.tokens, filter=lambda x: x.isalpha(), key=lambda s: s.lower() + ) + + # words = self._word_context_index.similar_words(word, num) + + word = word.lower() + wci = self._word_context_index._word_to_contexts + if word in wci.conditions(): + contexts = set(wci[word]) + fd = Counter( + w + for w in wci.conditions() + for c in wci[w] + if c in contexts and not w == word + ) + words = [w for w, _ in fd.most_common(num)] + print(tokenwrap(words)) + else: + print("No matches") + + def common_contexts(self, words, num=20): + """ + Find contexts where the specified words appear; list + most frequent common contexts first. + + :param words: The words used to seed the similarity search + :type words: str + :param num: The number of words to generate (default=20) + :type num: int + :seealso: ContextIndex.common_contexts() + """ + if "_word_context_index" not in self.__dict__: + # print('Building word-context index...') + self._word_context_index = ContextIndex( + self.tokens, key=lambda s: s.lower() + ) + + try: + fd = self._word_context_index.common_contexts(words, True) + if not fd: + print("No common contexts were found") + else: + ranked_contexts = [w for w, _ in fd.most_common(num)] + print(tokenwrap(w1 + "_" + w2 for w1, w2 in ranked_contexts)) + + except ValueError as e: + print(e) + + def dispersion_plot(self, words): + """ + Produce a plot showing the distribution of the words through the text. + Requires pylab to be installed. + + :param words: The words to be plotted + :type words: list(str) + :seealso: nltk.draw.dispersion_plot() + """ + from nltk.draw import dispersion_plot + + dispersion_plot(self, words) + + def _train_default_ngram_lm(self, tokenized_sents, n=3): + train_data, padded_sents = padded_everygram_pipeline(n, tokenized_sents) + model = MLE(order=n) + model.fit(train_data, padded_sents) + return model + + def generate(self, length=100, text_seed=None, random_seed=42): + """ + Print random text, generated using a trigram language model. + See also `help(nltk.lm)`. + + :param length: The length of text to generate (default=100) + :type length: int + + :param text_seed: Generation can be conditioned on preceding context. + :type text_seed: list(str) + + :param random_seed: A random seed or an instance of `random.Random`. If provided, + makes the random sampling part of generation reproducible. (default=42) + :type random_seed: int + + """ + # Create the model when using it the first time. + self._tokenized_sents = [ + sent.split(" ") for sent in sent_tokenize(" ".join(self.tokens)) + ] + if not hasattr(self, "trigram_model"): + print("Building ngram index...", file=sys.stderr) + self._trigram_model = self._train_default_ngram_lm( + self._tokenized_sents, n=3 + ) + + generated_tokens = [] + + assert length > 0, "The `length` must be more than 0." + while len(generated_tokens) < length: + for idx, token in enumerate( + self._trigram_model.generate( + length, text_seed=text_seed, random_seed=random_seed + ) + ): + if token == "": + continue + if token == "": + break + generated_tokens.append(token) + random_seed += 1 + + prefix = " ".join(text_seed) + " " if text_seed else "" + output_str = prefix + tokenwrap(generated_tokens[:length]) + print(output_str) + return output_str + + def plot(self, *args): + """ + See documentation for FreqDist.plot() + :seealso: nltk.prob.FreqDist.plot() + """ + self.vocab().plot(*args) + + def vocab(self): + """ + :seealso: nltk.prob.FreqDist + """ + if "_vocab" not in self.__dict__: + # print("Building vocabulary index...") + self._vocab = FreqDist(self) + return self._vocab + + def findall(self, regexp): + """ + Find instances of the regular expression in the text. + The text is a list of tokens, and a regexp pattern to match + a single token must be surrounded by angle brackets. E.g. + + >>> print('hack'); from nltk.book import text1, text5, text9 + hack... + >>> text5.findall("<.*><.*>") + you rule bro; telling you bro; u twizted bro + >>> text1.findall("(<.*>)") + monied; nervous; dangerous; white; white; white; pious; queer; good; + mature; white; Cape; great; wise; wise; butterless; white; fiendish; + pale; furious; better; certain; complete; dismasted; younger; brave; + brave; brave; brave + >>> text9.findall("{3,}") + thread through those; the thought that; that the thing; the thing + that; that that thing; through these than through; them that the; + through the thick; them that they; thought that the + + :param regexp: A regular expression + :type regexp: str + """ + + if "_token_searcher" not in self.__dict__: + self._token_searcher = TokenSearcher(self) + + hits = self._token_searcher.findall(regexp) + hits = [" ".join(h) for h in hits] + print(tokenwrap(hits, "; ")) + + # //////////////////////////////////////////////////////////// + # Helper Methods + # //////////////////////////////////////////////////////////// + + _CONTEXT_RE = re.compile("\w+|[\.\!\?]") + + def _context(self, tokens, i): + """ + One left & one right token, both case-normalized. Skip over + non-sentence-final punctuation. Used by the ``ContextIndex`` + that is created for ``similar()`` and ``common_contexts()``. + """ + # Left context + j = i - 1 + while j >= 0 and not self._CONTEXT_RE.match(tokens[j]): + j -= 1 + left = tokens[j] if j != 0 else "*START*" + + # Right context + j = i + 1 + while j < len(tokens) and not self._CONTEXT_RE.match(tokens[j]): + j += 1 + right = tokens[j] if j != len(tokens) else "*END*" + + return (left, right) + + # //////////////////////////////////////////////////////////// + # String Display + # //////////////////////////////////////////////////////////// + + def __str__(self): + return "" % self.name + + def __repr__(self): + return "" % self.name + + +# Prototype only; this approach will be slow to load +class TextCollection(Text): + """A collection of texts, which can be loaded with list of texts, or + with a corpus consisting of one or more texts, and which supports + counting, concordancing, collocation discovery, etc. Initialize a + TextCollection as follows: + + >>> import nltk.corpus + >>> from nltk.text import TextCollection + >>> print('hack'); from nltk.book import text1, text2, text3 + hack... + >>> gutenberg = TextCollection(nltk.corpus.gutenberg) + >>> mytexts = TextCollection([text1, text2, text3]) + + Iterating over a TextCollection produces all the tokens of all the + texts in order. + """ + + def __init__(self, source): + if hasattr(source, "words"): # bridge to the text corpus reader + source = [source.words(f) for f in source.fileids()] + + self._texts = source + Text.__init__(self, LazyConcatenation(source)) + self._idf_cache = {} + + def tf(self, term, text): + """ The frequency of the term in text. """ + return text.count(term) / len(text) + + def idf(self, term): + """ The number of texts in the corpus divided by the + number of texts that the term appears in. + If a term does not appear in the corpus, 0.0 is returned. """ + # idf values are cached for performance. + idf = self._idf_cache.get(term) + if idf is None: + matches = len([True for text in self._texts if term in text]) + if len(self._texts) == 0: + raise ValueError("IDF undefined for empty document collection") + idf = log(len(self._texts) / matches) if matches else 0.0 + self._idf_cache[term] = idf + return idf + + def tf_idf(self, term, text): + return self.tf(term, text) * self.idf(term) + + +def demo(): + from nltk.corpus import brown + + text = Text(brown.words(categories="news")) + print(text) + print() + print("Concordance:") + text.concordance("news") + print() + print("Distributionally similar words:") + text.similar("news") + print() + print("Collocations:") + text.collocations() + print() + # print("Automatically generated text:") + # text.generate() + # print() + print("Dispersion plot:") + text.dispersion_plot(["news", "report", "said", "announced"]) + print() + print("Vocabulary plot:") + text.plot(50) + print() + print("Indexing:") + print("text[3]:", text[3]) + print("text[3:5]:", text[3:5]) + print("text.vocab()['news']:", text.vocab()["news"]) + + +if __name__ == "__main__": + demo() + +__all__ = [ + "ContextIndex", + "ConcordanceIndex", + "TokenSearcher", + "Text", + "TextCollection", +] diff --git a/venv.bak/lib/python3.7/site-packages/nltk/tgrep.py b/venv.bak/lib/python3.7/site-packages/nltk/tgrep.py new file mode 100644 index 0000000..d5a315a --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/tgrep.py @@ -0,0 +1,1055 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Natural Language Toolkit: TGrep search +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Will Roberts +# URL: +# For license information, see LICENSE.TXT + +''' +============================================ + TGrep search implementation for NLTK trees +============================================ + +This module supports TGrep2 syntax for matching parts of NLTK Trees. +Note that many tgrep operators require the tree passed to be a +``ParentedTree``. + +External links: + +- `Tgrep tutorial `_ +- `Tgrep2 manual `_ +- `Tgrep2 source `_ + +Usage +===== + +>>> from nltk.tree import ParentedTree +>>> from nltk.tgrep import tgrep_nodes, tgrep_positions +>>> tree = ParentedTree.fromstring('(S (NP (DT the) (JJ big) (NN dog)) (VP bit) (NP (DT a) (NN cat)))') +>>> list(tgrep_nodes('NN', [tree])) +[[ParentedTree('NN', ['dog']), ParentedTree('NN', ['cat'])]] +>>> list(tgrep_positions('NN', [tree])) +[[(0, 2), (2, 1)]] +>>> list(tgrep_nodes('DT', [tree])) +[[ParentedTree('DT', ['the']), ParentedTree('DT', ['a'])]] +>>> list(tgrep_nodes('DT $ JJ', [tree])) +[[ParentedTree('DT', ['the'])]] + +This implementation adds syntax to select nodes based on their NLTK +tree position. This syntax is ``N`` plus a Python tuple representing +the tree position. For instance, ``N()``, ``N(0,)``, ``N(0,0)`` are +valid node selectors. Example: + +>>> tree = ParentedTree.fromstring('(S (NP (DT the) (JJ big) (NN dog)) (VP bit) (NP (DT a) (NN cat)))') +>>> tree[0,0] +ParentedTree('DT', ['the']) +>>> tree[0,0].treeposition() +(0, 0) +>>> list(tgrep_nodes('N(0,0)', [tree])) +[[ParentedTree('DT', ['the'])]] + +Caveats: +======== + +- Link modifiers: "?" and "=" are not implemented. +- Tgrep compatibility: Using "@" for "!", "{" for "<", "}" for ">" are + not implemented. +- The "=" and "~" links are not implemented. + +Known Issues: +============= + +- There are some issues with link relations involving leaf nodes + (which are represented as bare strings in NLTK trees). For + instance, consider the tree:: + + (S (A x)) + + The search string ``* !>> S`` should select all nodes which are not + dominated in some way by an ``S`` node (i.e., all nodes which are + not descendants of an ``S``). Clearly, in this tree, the only node + which fulfills this criterion is the top node (since it is not + dominated by anything). However, the code here will find both the + top node and the leaf node ``x``. This is because we cannot recover + the parent of the leaf, since it is stored as a bare string. + + A possible workaround, when performing this kind of search, would be + to filter out all leaf nodes. + +Implementation notes +==================== + +This implementation is (somewhat awkwardly) based on lambda functions +which are predicates on a node. A predicate is a function which is +either True or False; using a predicate function, we can identify sets +of nodes with particular properties. A predicate function, could, for +instance, return True only if a particular node has a label matching a +particular regular expression, and has a daughter node which has no +sisters. Because tgrep2 search strings can do things statefully (such +as substituting in macros, and binding nodes with node labels), the +actual predicate function is declared with three arguments:: + + pred = lambda n, m, l: return True # some logic here + +``n`` + is a node in a tree; this argument must always be given + +``m`` + contains a dictionary, mapping macro names onto predicate functions + +``l`` + is a dictionary to map node labels onto nodes in the tree + +``m`` and ``l`` are declared to default to ``None``, and so need not be +specified in a call to a predicate. Predicates which call other +predicates must always pass the value of these arguments on. The +top-level predicate (constructed by ``_tgrep_exprs_action``) binds the +macro definitions to ``m`` and initialises ``l`` to an empty dictionary. +''' + +from __future__ import absolute_import, print_function, unicode_literals + +import functools +import re + +from six import binary_type, text_type + +try: + import pyparsing +except ImportError: + print('Warning: nltk.tgrep will not work without the `pyparsing` package') + print('installed.') + +import nltk.tree + + +class TgrepException(Exception): + '''Tgrep exception type.''' + + pass + + +def ancestors(node): + ''' + Returns the list of all nodes dominating the given tree node. + This method will not work with leaf nodes, since there is no way + to recover the parent. + ''' + results = [] + try: + current = node.parent() + except AttributeError: + # if node is a leaf, we cannot retrieve its parent + return results + while current: + results.append(current) + current = current.parent() + return results + + +def unique_ancestors(node): + ''' + Returns the list of all nodes dominating the given node, where + there is only a single path of descent. + ''' + results = [] + try: + current = node.parent() + except AttributeError: + # if node is a leaf, we cannot retrieve its parent + return results + while current and len(current) == 1: + results.append(current) + current = current.parent() + return results + + +def _descendants(node): + ''' + Returns the list of all nodes which are descended from the given + tree node in some way. + ''' + try: + treepos = node.treepositions() + except AttributeError: + return [] + return [node[x] for x in treepos[1:]] + + +def _leftmost_descendants(node): + ''' + Returns the set of all nodes descended in some way through + left branches from this node. + ''' + try: + treepos = node.treepositions() + except AttributeError: + return [] + return [node[x] for x in treepos[1:] if all(y == 0 for y in x)] + + +def _rightmost_descendants(node): + ''' + Returns the set of all nodes descended in some way through + right branches from this node. + ''' + try: + rightmost_leaf = max(node.treepositions()) + except AttributeError: + return [] + return [node[rightmost_leaf[:i]] for i in range(1, len(rightmost_leaf) + 1)] + + +def _istree(obj): + '''Predicate to check whether `obj` is a nltk.tree.Tree.''' + return isinstance(obj, nltk.tree.Tree) + + +def _unique_descendants(node): + ''' + Returns the list of all nodes descended from the given node, where + there is only a single path of descent. + ''' + results = [] + current = node + while current and _istree(current) and len(current) == 1: + current = current[0] + results.append(current) + return results + + +def _before(node): + ''' + Returns the set of all nodes that are before the given node. + ''' + try: + pos = node.treeposition() + tree = node.root() + except AttributeError: + return [] + return [tree[x] for x in tree.treepositions() if x[: len(pos)] < pos[: len(x)]] + + +def _immediately_before(node): + ''' + Returns the set of all nodes that are immediately before the given + node. + + Tree node A immediately precedes node B if the last terminal + symbol (word) produced by A immediately precedes the first + terminal symbol produced by B. + ''' + try: + pos = node.treeposition() + tree = node.root() + except AttributeError: + return [] + # go "upwards" from pos until there is a place we can go to the left + idx = len(pos) - 1 + while 0 <= idx and pos[idx] == 0: + idx -= 1 + if idx < 0: + return [] + pos = list(pos[: idx + 1]) + pos[-1] -= 1 + before = tree[pos] + return [before] + _rightmost_descendants(before) + + +def _after(node): + ''' + Returns the set of all nodes that are after the given node. + ''' + try: + pos = node.treeposition() + tree = node.root() + except AttributeError: + return [] + return [tree[x] for x in tree.treepositions() if x[: len(pos)] > pos[: len(x)]] + + +def _immediately_after(node): + ''' + Returns the set of all nodes that are immediately after the given + node. + + Tree node A immediately follows node B if the first terminal + symbol (word) produced by A immediately follows the last + terminal symbol produced by B. + ''' + try: + pos = node.treeposition() + tree = node.root() + current = node.parent() + except AttributeError: + return [] + # go "upwards" from pos until there is a place we can go to the + # right + idx = len(pos) - 1 + while 0 <= idx and pos[idx] == len(current) - 1: + idx -= 1 + current = current.parent() + if idx < 0: + return [] + pos = list(pos[: idx + 1]) + pos[-1] += 1 + after = tree[pos] + return [after] + _leftmost_descendants(after) + + +def _tgrep_node_literal_value(node): + ''' + Gets the string value of a given parse tree node, for comparison + using the tgrep node literal predicates. + ''' + return node.label() if _istree(node) else text_type(node) + + +def _tgrep_macro_use_action(_s, _l, tokens): + ''' + Builds a lambda function which looks up the macro name used. + ''' + assert len(tokens) == 1 + assert tokens[0][0] == '@' + macro_name = tokens[0][1:] + + def macro_use(n, m=None, l=None): + if m is None or macro_name not in m: + raise TgrepException('macro {0} not defined'.format(macro_name)) + return m[macro_name](n, m, l) + + return macro_use + + +def _tgrep_node_action(_s, _l, tokens): + ''' + Builds a lambda function representing a predicate on a tree node + depending on the name of its node. + ''' + # print 'node tokens: ', tokens + if tokens[0] == "'": + # strip initial apostrophe (tgrep2 print command) + tokens = tokens[1:] + if len(tokens) > 1: + # disjunctive definition of a node name + assert list(set(tokens[1::2])) == ['|'] + # recursively call self to interpret each node name definition + tokens = [_tgrep_node_action(None, None, [node]) for node in tokens[::2]] + # capture tokens and return the disjunction + return (lambda t: lambda n, m=None, l=None: any(f(n, m, l) for f in t))(tokens) + else: + if hasattr(tokens[0], '__call__'): + # this is a previously interpreted parenthetical node + # definition (lambda function) + return tokens[0] + elif tokens[0] == '*' or tokens[0] == '__': + return lambda n, m=None, l=None: True + elif tokens[0].startswith('"'): + assert tokens[0].endswith('"') + node_lit = tokens[0][1:-1].replace('\\"', '"').replace('\\\\', '\\') + return ( + lambda s: lambda n, m=None, l=None: _tgrep_node_literal_value(n) == s + )(node_lit) + elif tokens[0].startswith('/'): + assert tokens[0].endswith('/') + node_lit = tokens[0][1:-1] + return ( + lambda r: lambda n, m=None, l=None: r.search( + _tgrep_node_literal_value(n) + ) + )(re.compile(node_lit)) + elif tokens[0].startswith('i@'): + node_func = _tgrep_node_action(_s, _l, [tokens[0][2:].lower()]) + return ( + lambda f: lambda n, m=None, l=None: f( + _tgrep_node_literal_value(n).lower() + ) + )(node_func) + else: + return ( + lambda s: lambda n, m=None, l=None: _tgrep_node_literal_value(n) == s + )(tokens[0]) + + +def _tgrep_parens_action(_s, _l, tokens): + ''' + Builds a lambda function representing a predicate on a tree node + from a parenthetical notation. + ''' + # print 'parenthetical tokens: ', tokens + assert len(tokens) == 3 + assert tokens[0] == '(' + assert tokens[2] == ')' + return tokens[1] + + +def _tgrep_nltk_tree_pos_action(_s, _l, tokens): + ''' + Builds a lambda function representing a predicate on a tree node + which returns true if the node is located at a specific tree + position. + ''' + # recover the tuple from the parsed sting + node_tree_position = tuple(int(x) for x in tokens if x.isdigit()) + # capture the node's tree position + return ( + lambda i: lambda n, m=None, l=None: ( + hasattr(n, 'treeposition') and n.treeposition() == i + ) + )(node_tree_position) + + +def _tgrep_relation_action(_s, _l, tokens): + ''' + Builds a lambda function representing a predicate on a tree node + depending on its relation to other nodes in the tree. + ''' + # print 'relation tokens: ', tokens + # process negation first if needed + negated = False + if tokens[0] == '!': + negated = True + tokens = tokens[1:] + if tokens[0] == '[': + # process square-bracketed relation expressions + assert len(tokens) == 3 + assert tokens[2] == ']' + retval = tokens[1] + else: + # process operator-node relation expressions + assert len(tokens) == 2 + operator, predicate = tokens + # A < B A is the parent of (immediately dominates) B. + if operator == '<': + retval = lambda n, m=None, l=None: ( + _istree(n) and any(predicate(x, m, l) for x in n) + ) + # A > B A is the child of B. + elif operator == '>': + retval = lambda n, m=None, l=None: ( + hasattr(n, 'parent') + and bool(n.parent()) + and predicate(n.parent(), m, l) + ) + # A <, B Synonymous with A <1 B. + elif operator == '<,' or operator == '<1': + retval = lambda n, m=None, l=None: ( + _istree(n) and bool(list(n)) and predicate(n[0], m, l) + ) + # A >, B Synonymous with A >1 B. + elif operator == '>,' or operator == '>1': + retval = lambda n, m=None, l=None: ( + hasattr(n, 'parent') + and bool(n.parent()) + and (n is n.parent()[0]) + and predicate(n.parent(), m, l) + ) + # A N B A is the Nth child of B (the first child is >1). + elif operator[0] == '>' and operator[1:].isdigit(): + idx = int(operator[1:]) + # capture the index parameter + retval = ( + lambda i: lambda n, m=None, l=None: ( + hasattr(n, 'parent') + and bool(n.parent()) + and 0 <= i < len(n.parent()) + and (n is n.parent()[i]) + and predicate(n.parent(), m, l) + ) + )(idx - 1) + # A <' B B is the last child of A (also synonymous with A <-1 B). + # A <- B B is the last child of A (synonymous with A <-1 B). + elif operator == '<\'' or operator == '<-' or operator == '<-1': + retval = lambda n, m=None, l=None: ( + _istree(n) and bool(list(n)) and predicate(n[-1], m, l) + ) + # A >' B A is the last child of B (also synonymous with A >-1 B). + # A >- B A is the last child of B (synonymous with A >-1 B). + elif operator == '>\'' or operator == '>-' or operator == '>-1': + retval = lambda n, m=None, l=None: ( + hasattr(n, 'parent') + and bool(n.parent()) + and (n is n.parent()[-1]) + and predicate(n.parent(), m, l) + ) + # A <-N B B is the N th-to-last child of A (the last child is <-1). + elif operator[:2] == '<-' and operator[2:].isdigit(): + idx = -int(operator[2:]) + # capture the index parameter + retval = ( + lambda i: lambda n, m=None, l=None: ( + _istree(n) + and bool(list(n)) + and 0 <= (i + len(n)) < len(n) + and predicate(n[i + len(n)], m, l) + ) + )(idx) + # A >-N B A is the N th-to-last child of B (the last child is >-1). + elif operator[:2] == '>-' and operator[2:].isdigit(): + idx = -int(operator[2:]) + # capture the index parameter + retval = ( + lambda i: lambda n, m=None, l=None: ( + hasattr(n, 'parent') + and bool(n.parent()) + and 0 <= (i + len(n.parent())) < len(n.parent()) + and (n is n.parent()[i + len(n.parent())]) + and predicate(n.parent(), m, l) + ) + )(idx) + # A <: B B is the only child of A + elif operator == '<:': + retval = lambda n, m=None, l=None: ( + _istree(n) and len(n) == 1 and predicate(n[0], m, l) + ) + # A >: B A is the only child of B. + elif operator == '>:': + retval = lambda n, m=None, l=None: ( + hasattr(n, 'parent') + and bool(n.parent()) + and len(n.parent()) == 1 + and predicate(n.parent(), m, l) + ) + # A << B A dominates B (A is an ancestor of B). + elif operator == '<<': + retval = lambda n, m=None, l=None: ( + _istree(n) and any(predicate(x, m, l) for x in _descendants(n)) + ) + # A >> B A is dominated by B (A is a descendant of B). + elif operator == '>>': + retval = lambda n, m=None, l=None: any( + predicate(x, m, l) for x in ancestors(n) + ) + # A <<, B B is a left-most descendant of A. + elif operator == '<<,' or operator == '<<1': + retval = lambda n, m=None, l=None: ( + _istree(n) and any(predicate(x, m, l) for x in _leftmost_descendants(n)) + ) + # A >>, B A is a left-most descendant of B. + elif operator == '>>,': + retval = lambda n, m=None, l=None: any( + (predicate(x, m, l) and n in _leftmost_descendants(x)) + for x in ancestors(n) + ) + # A <<' B B is a right-most descendant of A. + elif operator == '<<\'': + retval = lambda n, m=None, l=None: ( + _istree(n) + and any(predicate(x, m, l) for x in _rightmost_descendants(n)) + ) + # A >>' B A is a right-most descendant of B. + elif operator == '>>\'': + retval = lambda n, m=None, l=None: any( + (predicate(x, m, l) and n in _rightmost_descendants(x)) + for x in ancestors(n) + ) + # A <<: B There is a single path of descent from A and B is on it. + elif operator == '<<:': + retval = lambda n, m=None, l=None: ( + _istree(n) and any(predicate(x, m, l) for x in _unique_descendants(n)) + ) + # A >>: B There is a single path of descent from B and A is on it. + elif operator == '>>:': + retval = lambda n, m=None, l=None: any( + predicate(x, m, l) for x in unique_ancestors(n) + ) + # A . B A immediately precedes B. + elif operator == '.': + retval = lambda n, m=None, l=None: any( + predicate(x, m, l) for x in _immediately_after(n) + ) + # A , B A immediately follows B. + elif operator == ',': + retval = lambda n, m=None, l=None: any( + predicate(x, m, l) for x in _immediately_before(n) + ) + # A .. B A precedes B. + elif operator == '..': + retval = lambda n, m=None, l=None: any( + predicate(x, m, l) for x in _after(n) + ) + # A ,, B A follows B. + elif operator == ',,': + retval = lambda n, m=None, l=None: any( + predicate(x, m, l) for x in _before(n) + ) + # A $ B A is a sister of B (and A != B). + elif operator == '$' or operator == '%': + retval = lambda n, m=None, l=None: ( + hasattr(n, 'parent') + and bool(n.parent()) + and any(predicate(x, m, l) for x in n.parent() if x is not n) + ) + # A $. B A is a sister of and immediately precedes B. + elif operator == '$.' or operator == '%.': + retval = lambda n, m=None, l=None: ( + hasattr(n, 'right_sibling') + and bool(n.right_sibling()) + and predicate(n.right_sibling(), m, l) + ) + # A $, B A is a sister of and immediately follows B. + elif operator == '$,' or operator == '%,': + retval = lambda n, m=None, l=None: ( + hasattr(n, 'left_sibling') + and bool(n.left_sibling()) + and predicate(n.left_sibling(), m, l) + ) + # A $.. B A is a sister of and precedes B. + elif operator == '$..' or operator == '%..': + retval = lambda n, m=None, l=None: ( + hasattr(n, 'parent') + and hasattr(n, 'parent_index') + and bool(n.parent()) + and any(predicate(x, m, l) for x in n.parent()[n.parent_index() + 1 :]) + ) + # A $,, B A is a sister of and follows B. + elif operator == '$,,' or operator == '%,,': + retval = lambda n, m=None, l=None: ( + hasattr(n, 'parent') + and hasattr(n, 'parent_index') + and bool(n.parent()) + and any(predicate(x, m, l) for x in n.parent()[: n.parent_index()]) + ) + else: + raise TgrepException( + 'cannot interpret tgrep operator "{0}"'.format(operator) + ) + # now return the built function + if negated: + return (lambda r: (lambda n, m=None, l=None: not r(n, m, l)))(retval) + else: + return retval + + +def _tgrep_conjunction_action(_s, _l, tokens, join_char='&'): + ''' + Builds a lambda function representing a predicate on a tree node + from the conjunction of several other such lambda functions. + + This is prototypically called for expressions like + (`tgrep_rel_conjunction`):: + + < NP & < AP < VP + + where tokens is a list of predicates representing the relations + (`< NP`, `< AP`, and `< VP`), possibly with the character `&` + included (as in the example here). + + This is also called for expressions like (`tgrep_node_expr2`):: + + NP < NN + S=s < /NP/=n : s < /VP/=v : n .. v + + tokens[0] is a tgrep_expr predicate; tokens[1:] are an (optional) + list of segmented patterns (`tgrep_expr_labeled`, processed by + `_tgrep_segmented_pattern_action`). + ''' + # filter out the ampersand + tokens = [x for x in tokens if x != join_char] + # print 'relation conjunction tokens: ', tokens + if len(tokens) == 1: + return tokens[0] + else: + return ( + lambda ts: lambda n, m=None, l=None: all( + predicate(n, m, l) for predicate in ts + ) + )(tokens) + + +def _tgrep_segmented_pattern_action(_s, _l, tokens): + ''' + Builds a lambda function representing a segmented pattern. + + Called for expressions like (`tgrep_expr_labeled`):: + + =s .. =v < =n + + This is a segmented pattern, a tgrep2 expression which begins with + a node label. + + The problem is that for segemented_pattern_action (': =v < =s'), + the first element (in this case, =v) is specifically selected by + virtue of matching a particular node in the tree; to retrieve + the node, we need the label, not a lambda function. For node + labels inside a tgrep_node_expr, we need a lambda function which + returns true if the node visited is the same as =v. + + We solve this by creating two copies of a node_label_use in the + grammar; the label use inside a tgrep_expr_labeled has a separate + parse action to the pred use inside a node_expr. See + `_tgrep_node_label_use_action` and + `_tgrep_node_label_pred_use_action`. + ''' + # tokens[0] is a string containing the node label + node_label = tokens[0] + # tokens[1:] is an (optional) list of predicates which must all + # hold of the bound node + reln_preds = tokens[1:] + + def pattern_segment_pred(n, m=None, l=None): + '''This predicate function ignores its node argument.''' + # look up the bound node using its label + if l is None or node_label not in l: + raise TgrepException( + 'node_label ={0} not bound in pattern'.format(node_label) + ) + node = l[node_label] + # match the relation predicates against the node + return all(pred(node, m, l) for pred in reln_preds) + + return pattern_segment_pred + + +def _tgrep_node_label_use_action(_s, _l, tokens): + ''' + Returns the node label used to begin a tgrep_expr_labeled. See + `_tgrep_segmented_pattern_action`. + + Called for expressions like (`tgrep_node_label_use`):: + + =s + + when they appear as the first element of a `tgrep_expr_labeled` + expression (see `_tgrep_segmented_pattern_action`). + + It returns the node label. + ''' + assert len(tokens) == 1 + assert tokens[0].startswith('=') + return tokens[0][1:] + + +def _tgrep_node_label_pred_use_action(_s, _l, tokens): + ''' + Builds a lambda function representing a predicate on a tree node + which describes the use of a previously bound node label. + + Called for expressions like (`tgrep_node_label_use_pred`):: + + =s + + when they appear inside a tgrep_node_expr (for example, inside a + relation). The predicate returns true if and only if its node + argument is identical the the node looked up in the node label + dictionary using the node's label. + ''' + assert len(tokens) == 1 + assert tokens[0].startswith('=') + node_label = tokens[0][1:] + + def node_label_use_pred(n, m=None, l=None): + # look up the bound node using its label + if l is None or node_label not in l: + raise TgrepException( + 'node_label ={0} not bound in pattern'.format(node_label) + ) + node = l[node_label] + # truth means the given node is this node + return n is node + + return node_label_use_pred + + +def _tgrep_bind_node_label_action(_s, _l, tokens): + ''' + Builds a lambda function representing a predicate on a tree node + which can optionally bind a matching node into the tgrep2 string's + label_dict. + + Called for expressions like (`tgrep_node_expr2`):: + + /NP/ + @NP=n + ''' + # tokens[0] is a tgrep_node_expr + if len(tokens) == 1: + return tokens[0] + else: + # if present, tokens[1] is the character '=', and tokens[2] is + # a tgrep_node_label, a string value containing the node label + assert len(tokens) == 3 + assert tokens[1] == '=' + node_pred = tokens[0] + node_label = tokens[2] + + def node_label_bind_pred(n, m=None, l=None): + if node_pred(n, m, l): + # bind `n` into the dictionary `l` + if l is None: + raise TgrepException( + 'cannot bind node_label {0}: label_dict is None'.format( + node_label + ) + ) + l[node_label] = n + return True + else: + return False + + return node_label_bind_pred + + +def _tgrep_rel_disjunction_action(_s, _l, tokens): + ''' + Builds a lambda function representing a predicate on a tree node + from the disjunction of several other such lambda functions. + ''' + # filter out the pipe + tokens = [x for x in tokens if x != '|'] + # print 'relation disjunction tokens: ', tokens + if len(tokens) == 1: + return tokens[0] + elif len(tokens) == 2: + return (lambda a, b: lambda n, m=None, l=None: a(n, m, l) or b(n, m, l))( + tokens[0], tokens[1] + ) + + +def _macro_defn_action(_s, _l, tokens): + ''' + Builds a dictionary structure which defines the given macro. + ''' + assert len(tokens) == 3 + assert tokens[0] == '@' + return {tokens[1]: tokens[2]} + + +def _tgrep_exprs_action(_s, _l, tokens): + ''' + This is the top-lebel node in a tgrep2 search string; the + predicate function it returns binds together all the state of a + tgrep2 search string. + + Builds a lambda function representing a predicate on a tree node + from the disjunction of several tgrep expressions. Also handles + macro definitions and macro name binding, and node label + definitions and node label binding. + ''' + if len(tokens) == 1: + return lambda n, m=None, l=None: tokens[0](n, None, {}) + # filter out all the semicolons + tokens = [x for x in tokens if x != ';'] + # collect all macro definitions + macro_dict = {} + macro_defs = [tok for tok in tokens if isinstance(tok, dict)] + for macro_def in macro_defs: + macro_dict.update(macro_def) + # collect all tgrep expressions + tgrep_exprs = [tok for tok in tokens if not isinstance(tok, dict)] + # create a new scope for the node label dictionary + def top_level_pred(n, m=macro_dict, l=None): + label_dict = {} + # bind macro definitions and OR together all tgrep_exprs + return any(predicate(n, m, label_dict) for predicate in tgrep_exprs) + + return top_level_pred + + +def _build_tgrep_parser(set_parse_actions=True): + ''' + Builds a pyparsing-based parser object for tokenizing and + interpreting tgrep search strings. + ''' + tgrep_op = pyparsing.Optional('!') + pyparsing.Regex('[$%,.<>][%,.<>0-9-\':]*') + tgrep_qstring = pyparsing.QuotedString( + quoteChar='"', escChar='\\', unquoteResults=False + ) + tgrep_node_regex = pyparsing.QuotedString( + quoteChar='/', escChar='\\', unquoteResults=False + ) + tgrep_qstring_icase = pyparsing.Regex('i@\\"(?:[^"\\n\\r\\\\]|(?:\\\\.))*\\"') + tgrep_node_regex_icase = pyparsing.Regex('i@\\/(?:[^/\\n\\r\\\\]|(?:\\\\.))*\\/') + tgrep_node_literal = pyparsing.Regex('[^][ \r\t\n;:.,&|<>()$!@%\'^=]+') + tgrep_expr = pyparsing.Forward() + tgrep_relations = pyparsing.Forward() + tgrep_parens = pyparsing.Literal('(') + tgrep_expr + ')' + tgrep_nltk_tree_pos = ( + pyparsing.Literal('N(') + + pyparsing.Optional( + pyparsing.Word(pyparsing.nums) + + ',' + + pyparsing.Optional( + pyparsing.delimitedList(pyparsing.Word(pyparsing.nums), delim=',') + + pyparsing.Optional(',') + ) + ) + + ')' + ) + tgrep_node_label = pyparsing.Regex('[A-Za-z0-9]+') + tgrep_node_label_use = pyparsing.Combine('=' + tgrep_node_label) + # see _tgrep_segmented_pattern_action + tgrep_node_label_use_pred = tgrep_node_label_use.copy() + macro_name = pyparsing.Regex('[^];:.,&|<>()[$!@%\'^=\r\t\n ]+') + macro_name.setWhitespaceChars('') + macro_use = pyparsing.Combine('@' + macro_name) + tgrep_node_expr = ( + tgrep_node_label_use_pred + | macro_use + | tgrep_nltk_tree_pos + | tgrep_qstring_icase + | tgrep_node_regex_icase + | tgrep_qstring + | tgrep_node_regex + | '*' + | tgrep_node_literal + ) + tgrep_node_expr2 = ( + tgrep_node_expr + + pyparsing.Literal('=').setWhitespaceChars('') + + tgrep_node_label.copy().setWhitespaceChars('') + ) | tgrep_node_expr + tgrep_node = tgrep_parens | ( + pyparsing.Optional("'") + + tgrep_node_expr2 + + pyparsing.ZeroOrMore("|" + tgrep_node_expr) + ) + tgrep_brackets = pyparsing.Optional('!') + '[' + tgrep_relations + ']' + tgrep_relation = tgrep_brackets | (tgrep_op + tgrep_node) + tgrep_rel_conjunction = pyparsing.Forward() + tgrep_rel_conjunction << ( + tgrep_relation + + pyparsing.ZeroOrMore(pyparsing.Optional('&') + tgrep_rel_conjunction) + ) + tgrep_relations << tgrep_rel_conjunction + pyparsing.ZeroOrMore( + "|" + tgrep_relations + ) + tgrep_expr << tgrep_node + pyparsing.Optional(tgrep_relations) + tgrep_expr_labeled = tgrep_node_label_use + pyparsing.Optional(tgrep_relations) + tgrep_expr2 = tgrep_expr + pyparsing.ZeroOrMore(':' + tgrep_expr_labeled) + macro_defn = ( + pyparsing.Literal('@') + pyparsing.White().suppress() + macro_name + tgrep_expr2 + ) + tgrep_exprs = ( + pyparsing.Optional(macro_defn + pyparsing.ZeroOrMore(';' + macro_defn) + ';') + + tgrep_expr2 + + pyparsing.ZeroOrMore(';' + (macro_defn | tgrep_expr2)) + + pyparsing.ZeroOrMore(';').suppress() + ) + if set_parse_actions: + tgrep_node_label_use.setParseAction(_tgrep_node_label_use_action) + tgrep_node_label_use_pred.setParseAction(_tgrep_node_label_pred_use_action) + macro_use.setParseAction(_tgrep_macro_use_action) + tgrep_node.setParseAction(_tgrep_node_action) + tgrep_node_expr2.setParseAction(_tgrep_bind_node_label_action) + tgrep_parens.setParseAction(_tgrep_parens_action) + tgrep_nltk_tree_pos.setParseAction(_tgrep_nltk_tree_pos_action) + tgrep_relation.setParseAction(_tgrep_relation_action) + tgrep_rel_conjunction.setParseAction(_tgrep_conjunction_action) + tgrep_relations.setParseAction(_tgrep_rel_disjunction_action) + macro_defn.setParseAction(_macro_defn_action) + # the whole expression is also the conjunction of two + # predicates: the first node predicate, and the remaining + # relation predicates + tgrep_expr.setParseAction(_tgrep_conjunction_action) + tgrep_expr_labeled.setParseAction(_tgrep_segmented_pattern_action) + tgrep_expr2.setParseAction( + functools.partial(_tgrep_conjunction_action, join_char=':') + ) + tgrep_exprs.setParseAction(_tgrep_exprs_action) + return tgrep_exprs.ignore('#' + pyparsing.restOfLine) + + +def tgrep_tokenize(tgrep_string): + ''' + Tokenizes a TGrep search string into separate tokens. + ''' + parser = _build_tgrep_parser(False) + if isinstance(tgrep_string, binary_type): + tgrep_string = tgrep_string.decode() + return list(parser.parseString(tgrep_string)) + + +def tgrep_compile(tgrep_string): + ''' + Parses (and tokenizes, if necessary) a TGrep search string into a + lambda function. + ''' + parser = _build_tgrep_parser(True) + if isinstance(tgrep_string, binary_type): + tgrep_string = tgrep_string.decode() + return list(parser.parseString(tgrep_string, parseAll=True))[0] + + +def treepositions_no_leaves(tree): + ''' + Returns all the tree positions in the given tree which are not + leaf nodes. + ''' + treepositions = tree.treepositions() + # leaves are treeposition tuples that are not prefixes of any + # other treeposition + prefixes = set() + for pos in treepositions: + for length in range(len(pos)): + prefixes.add(pos[:length]) + return [pos for pos in treepositions if pos in prefixes] + + +def tgrep_positions(pattern, trees, search_leaves=True): + """ + Return the tree positions in the trees which match the given pattern. + + :param pattern: a tgrep search pattern + :type pattern: str or output of tgrep_compile() + :param trees: a sequence of NLTK trees (usually ParentedTrees) + :type trees: iter(ParentedTree) or iter(Tree) + :param search_leaves: whether ot return matching leaf nodes + :type search_leaves: bool + :rtype: iter(tree positions) + """ + + if isinstance(pattern, (binary_type, text_type)): + pattern = tgrep_compile(pattern) + + for tree in trees: + try: + if search_leaves: + positions = tree.treepositions() + else: + positions = treepositions_no_leaves(tree) + yield [position for position in positions if pattern(tree[position])] + except AttributeError: + yield [] + + +def tgrep_nodes(pattern, trees, search_leaves=True): + """ + Return the tree nodes in the trees which match the given pattern. + + :param pattern: a tgrep search pattern + :type pattern: str or output of tgrep_compile() + :param trees: a sequence of NLTK trees (usually ParentedTrees) + :type trees: iter(ParentedTree) or iter(Tree) + :param search_leaves: whether ot return matching leaf nodes + :type search_leaves: bool + :rtype: iter(tree nodes) + """ + + if isinstance(pattern, (binary_type, text_type)): + pattern = tgrep_compile(pattern) + + for tree in trees: + try: + if search_leaves: + positions = tree.treepositions() + else: + positions = treepositions_no_leaves(tree) + yield [tree[position] for position in positions if pattern(tree[position])] + except AttributeError: + yield [] diff --git a/venv.bak/lib/python3.7/site-packages/nltk/tokenize/__init__.py b/venv.bak/lib/python3.7/site-packages/nltk/tokenize/__init__.py new file mode 100644 index 0000000..0bf5d67 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/tokenize/__init__.py @@ -0,0 +1,147 @@ +# -*- coding: utf-8 -*- +# Natural Language Toolkit: Tokenizers +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Edward Loper +# Steven Bird (minor additions) +# Contributors: matthewmc, clouds56 +# URL: +# For license information, see LICENSE.TXT + +r""" +NLTK Tokenizer Package + +Tokenizers divide strings into lists of substrings. For example, +tokenizers can be used to find the words and punctuation in a string: + + >>> from nltk.tokenize import word_tokenize + >>> s = '''Good muffins cost $3.88\nin New York. Please buy me + ... two of them.\n\nThanks.''' + >>> word_tokenize(s) + ['Good', 'muffins', 'cost', '$', '3.88', 'in', 'New', 'York', '.', + 'Please', 'buy', 'me', 'two', 'of', 'them', '.', 'Thanks', '.'] + +This particular tokenizer requires the Punkt sentence tokenization +models to be installed. NLTK also provides a simpler, +regular-expression based tokenizer, which splits text on whitespace +and punctuation: + + >>> from nltk.tokenize import wordpunct_tokenize + >>> wordpunct_tokenize(s) + ['Good', 'muffins', 'cost', '$', '3', '.', '88', 'in', 'New', 'York', '.', + 'Please', 'buy', 'me', 'two', 'of', 'them', '.', 'Thanks', '.'] + +We can also operate at the level of sentences, using the sentence +tokenizer directly as follows: + + >>> from nltk.tokenize import sent_tokenize, word_tokenize + >>> sent_tokenize(s) + ['Good muffins cost $3.88\nin New York.', 'Please buy me\ntwo of them.', 'Thanks.'] + >>> [word_tokenize(t) for t in sent_tokenize(s)] + [['Good', 'muffins', 'cost', '$', '3.88', 'in', 'New', 'York', '.'], + ['Please', 'buy', 'me', 'two', 'of', 'them', '.'], ['Thanks', '.']] + +Caution: when tokenizing a Unicode string, make sure you are not +using an encoded version of the string (it may be necessary to +decode it first, e.g. with ``s.decode("utf8")``. + +NLTK tokenizers can produce token-spans, represented as tuples of integers +having the same semantics as string slices, to support efficient comparison +of tokenizers. (These methods are implemented as generators.) + + >>> from nltk.tokenize import WhitespaceTokenizer + >>> list(WhitespaceTokenizer().span_tokenize(s)) + [(0, 4), (5, 12), (13, 17), (18, 23), (24, 26), (27, 30), (31, 36), (38, 44), + (45, 48), (49, 51), (52, 55), (56, 58), (59, 64), (66, 73)] + +There are numerous ways to tokenize text. If you need more control over +tokenization, see the other methods provided in this package. + +For further information, please see Chapter 3 of the NLTK book. +""" + +import re + +from nltk.data import load +from nltk.tokenize.casual import TweetTokenizer, casual_tokenize +from nltk.tokenize.mwe import MWETokenizer +from nltk.tokenize.punkt import PunktSentenceTokenizer +from nltk.tokenize.regexp import ( + RegexpTokenizer, + WhitespaceTokenizer, + BlanklineTokenizer, + WordPunctTokenizer, + wordpunct_tokenize, + regexp_tokenize, + blankline_tokenize, +) +from nltk.tokenize.repp import ReppTokenizer +from nltk.tokenize.sexpr import SExprTokenizer, sexpr_tokenize +from nltk.tokenize.simple import ( + SpaceTokenizer, + TabTokenizer, + LineTokenizer, + line_tokenize, +) +from nltk.tokenize.texttiling import TextTilingTokenizer +from nltk.tokenize.toktok import ToktokTokenizer +from nltk.tokenize.treebank import TreebankWordTokenizer +from nltk.tokenize.util import string_span_tokenize, regexp_span_tokenize +from nltk.tokenize.stanford_segmenter import StanfordSegmenter +from nltk.tokenize.sonority_sequencing import SyllableTokenizer + + +# Standard sentence tokenizer. +def sent_tokenize(text, language='english'): + """ + Return a sentence-tokenized copy of *text*, + using NLTK's recommended sentence tokenizer + (currently :class:`.PunktSentenceTokenizer` + for the specified language). + + :param text: text to split into sentences + :param language: the model name in the Punkt corpus + """ + tokenizer = load('tokenizers/punkt/{0}.pickle'.format(language)) + return tokenizer.tokenize(text) + + +# Standard word tokenizer. +_treebank_word_tokenizer = TreebankWordTokenizer() + +# See discussion on https://github.com/nltk/nltk/pull/1437 +# Adding to TreebankWordTokenizer, nltk.word_tokenize now splits on +# - chervon quotes u'\xab' and u'\xbb' . +# - unicode quotes u'\u2018', u'\u2019', u'\u201c' and u'\u201d' +# See https://github.com/nltk/nltk/issues/1995#issuecomment-376741608 +# Also, behavior of splitting on clitics now follows Stanford CoreNLP +# - clitics covered (?!re|ve|ll|m|t|s|d)(\w)\b +improved_open_quote_regex = re.compile(u'([«“‘„]|[`]+)', re.U) +improved_open_single_quote_regex = re.compile(r"(?i)(\')(?!re|ve|ll|m|t|s|d)(\w)\b", re.U) +improved_close_quote_regex = re.compile(u'([»”’])', re.U) +improved_punct_regex = re.compile(r'([^\.])(\.)([\]\)}>"\'' u'»”’ ' r']*)\s*$', re.U) +_treebank_word_tokenizer.STARTING_QUOTES.insert(0, (improved_open_quote_regex, r' \1 ')) +_treebank_word_tokenizer.STARTING_QUOTES.append((improved_open_single_quote_regex, r'\1 \2')) +_treebank_word_tokenizer.ENDING_QUOTES.insert(0, (improved_close_quote_regex, r' \1 ')) +_treebank_word_tokenizer.PUNCTUATION.insert(0, (improved_punct_regex, r'\1 \2 \3 ')) + + +def word_tokenize(text, language='english', preserve_line=False): + """ + Return a tokenized copy of *text*, + using NLTK's recommended word tokenizer + (currently an improved :class:`.TreebankWordTokenizer` + along with :class:`.PunktSentenceTokenizer` + for the specified language). + + :param text: text to split into words + :type text: str + :param language: the model name in the Punkt corpus + :type language: str + :param preserve_line: An option to keep the preserve the sentence and not sentence tokenize it. + :type preserve_line: bool + """ + sentences = [text] if preserve_line else sent_tokenize(text, language) + return [ + token for sent in sentences for token in _treebank_word_tokenizer.tokenize(sent) + ] diff --git a/venv.bak/lib/python3.7/site-packages/nltk/tokenize/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/tokenize/__pycache__/__init__.cpython-37.pyc new file mode 100644 index 0000000..76ce994 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/tokenize/__pycache__/__init__.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/tokenize/__pycache__/api.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/tokenize/__pycache__/api.cpython-37.pyc new file mode 100644 index 0000000..f0b6ab3 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/tokenize/__pycache__/api.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/tokenize/__pycache__/casual.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/tokenize/__pycache__/casual.cpython-37.pyc new file mode 100644 index 0000000..0177527 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/tokenize/__pycache__/casual.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/tokenize/__pycache__/mwe.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/tokenize/__pycache__/mwe.cpython-37.pyc new file mode 100644 index 0000000..378e6bf Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/tokenize/__pycache__/mwe.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/tokenize/__pycache__/nist.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/tokenize/__pycache__/nist.cpython-37.pyc new file mode 100644 index 0000000..8473148 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/tokenize/__pycache__/nist.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/tokenize/__pycache__/punkt.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/tokenize/__pycache__/punkt.cpython-37.pyc new file mode 100644 index 0000000..df5ddfc Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/tokenize/__pycache__/punkt.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/tokenize/__pycache__/regexp.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/tokenize/__pycache__/regexp.cpython-37.pyc new file mode 100644 index 0000000..11bbfc5 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/tokenize/__pycache__/regexp.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/tokenize/__pycache__/repp.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/tokenize/__pycache__/repp.cpython-37.pyc new file mode 100644 index 0000000..863e53d Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/tokenize/__pycache__/repp.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/tokenize/__pycache__/sexpr.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/tokenize/__pycache__/sexpr.cpython-37.pyc new file mode 100644 index 0000000..837cd30 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/tokenize/__pycache__/sexpr.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/tokenize/__pycache__/simple.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/tokenize/__pycache__/simple.cpython-37.pyc new file mode 100644 index 0000000..113aad7 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/tokenize/__pycache__/simple.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/tokenize/__pycache__/sonority_sequencing.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/tokenize/__pycache__/sonority_sequencing.cpython-37.pyc new file mode 100644 index 0000000..ec24fc1 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/tokenize/__pycache__/sonority_sequencing.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/tokenize/__pycache__/stanford.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/tokenize/__pycache__/stanford.cpython-37.pyc new file mode 100644 index 0000000..967d4ac Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/tokenize/__pycache__/stanford.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/tokenize/__pycache__/stanford_segmenter.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/tokenize/__pycache__/stanford_segmenter.cpython-37.pyc new file mode 100644 index 0000000..ac01fd0 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/tokenize/__pycache__/stanford_segmenter.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/tokenize/__pycache__/texttiling.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/tokenize/__pycache__/texttiling.cpython-37.pyc new file mode 100644 index 0000000..c0ae9c4 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/tokenize/__pycache__/texttiling.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/tokenize/__pycache__/toktok.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/tokenize/__pycache__/toktok.cpython-37.pyc new file mode 100644 index 0000000..d0d5cef Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/tokenize/__pycache__/toktok.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/tokenize/__pycache__/treebank.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/tokenize/__pycache__/treebank.cpython-37.pyc new file mode 100644 index 0000000..0689d4c Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/tokenize/__pycache__/treebank.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/tokenize/__pycache__/util.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/tokenize/__pycache__/util.cpython-37.pyc new file mode 100644 index 0000000..98efb10 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/tokenize/__pycache__/util.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/tokenize/api.py b/venv.bak/lib/python3.7/site-packages/nltk/tokenize/api.py new file mode 100644 index 0000000..476db21 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/tokenize/api.py @@ -0,0 +1,78 @@ +# Natural Language Toolkit: Tokenizer Interface +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Edward Loper +# Steven Bird +# URL: +# For license information, see LICENSE.TXT + +""" +Tokenizer Interface +""" + +from abc import ABCMeta, abstractmethod +from six import add_metaclass + +from nltk.internals import overridden +from nltk.tokenize.util import string_span_tokenize + + +@add_metaclass(ABCMeta) +class TokenizerI(object): + """ + A processing interface for tokenizing a string. + Subclasses must define ``tokenize()`` or ``tokenize_sents()`` (or both). + """ + + @abstractmethod + def tokenize(self, s): + """ + Return a tokenized copy of *s*. + + :rtype: list of str + """ + if overridden(self.tokenize_sents): + return self.tokenize_sents([s])[0] + + def span_tokenize(self, s): + """ + Identify the tokens using integer offsets ``(start_i, end_i)``, + where ``s[start_i:end_i]`` is the corresponding token. + + :rtype: iter(tuple(int, int)) + """ + raise NotImplementedError() + + def tokenize_sents(self, strings): + """ + Apply ``self.tokenize()`` to each element of ``strings``. I.e.: + + return [self.tokenize(s) for s in strings] + + :rtype: list(list(str)) + """ + return [self.tokenize(s) for s in strings] + + def span_tokenize_sents(self, strings): + """ + Apply ``self.span_tokenize()`` to each element of ``strings``. I.e.: + + return [self.span_tokenize(s) for s in strings] + + :rtype: iter(list(tuple(int, int))) + """ + for s in strings: + yield list(self.span_tokenize(s)) + + +class StringTokenizer(TokenizerI): + """A tokenizer that divides a string into substrings by splitting + on the specified string (defined in subclasses). + """ + + def tokenize(self, s): + return s.split(self._string) + + def span_tokenize(self, s): + for span in string_span_tokenize(s, self._string): + yield span diff --git a/venv.bak/lib/python3.7/site-packages/nltk/tokenize/casual.py b/venv.bak/lib/python3.7/site-packages/nltk/tokenize/casual.py new file mode 100644 index 0000000..fc288d5 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/tokenize/casual.py @@ -0,0 +1,347 @@ +# coding: utf-8 +# +# Natural Language Toolkit: Twitter Tokenizer +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Christopher Potts +# Ewan Klein (modifications) +# Pierpaolo Pantone <> (modifications) +# URL: +# For license information, see LICENSE.TXT +# + + +""" +Twitter-aware tokenizer, designed to be flexible and easy to adapt to new +domains and tasks. The basic logic is this: + +1. The tuple regex_strings defines a list of regular expression + strings. + +2. The regex_strings strings are put, in order, into a compiled + regular expression object called word_re. + +3. The tokenization is done by word_re.findall(s), where s is the + user-supplied string, inside the tokenize() method of the class + Tokenizer. + +4. When instantiating Tokenizer objects, there is a single option: + preserve_case. By default, it is set to True. If it is set to + False, then the tokenizer will downcase everything except for + emoticons. + +""" + + +###################################################################### + +from __future__ import unicode_literals +import re + +from six import int2byte, unichr +from six.moves import html_entities + +###################################################################### +# The following strings are components in the regular expression +# that is used for tokenizing. It's important that phone_number +# appears first in the final regex (since it can contain whitespace). +# It also could matter that tags comes after emoticons, due to the +# possibility of having text like +# +# <:| and some text >:) +# +# Most importantly, the final element should always be last, since it +# does a last ditch whitespace-based tokenization of whatever is left. + +# ToDo: Update with http://en.wikipedia.org/wiki/List_of_emoticons ? + +# This particular element is used in a couple ways, so we define it +# with a name: +EMOTICONS = r""" + (?: + [<>]? + [:;=8] # eyes + [\-o\*\']? # optional nose + [\)\]\(\[dDpP/\:\}\{@\|\\] # mouth + | + [\)\]\(\[dDpP/\:\}\{@\|\\] # mouth + [\-o\*\']? # optional nose + [:;=8] # eyes + [<>]? + | + <3 # heart + )""" + +# URL pattern due to John Gruber, modified by Tom Winzig. See +# https://gist.github.com/winzig/8894715 + +URLS = r""" # Capture 1: entire matched URL + (?: + https?: # URL protocol and colon + (?: + /{1,3} # 1-3 slashes + | # or + [a-z0-9%] # Single letter or digit or '%' + # (Trying not to match e.g. "URI::Escape") + ) + | # or + # looks like domain name followed by a slash: + [a-z0-9.\-]+[.] + (?:[a-z]{2,13}) + / + ) + (?: # One or more: + [^\s()<>{}\[\]]+ # Run of non-space, non-()<>{}[] + | # or + \([^\s()]*?\([^\s()]+\)[^\s()]*?\) # balanced parens, one level deep: (...(...)...) + | + \([^\s]+?\) # balanced parens, non-recursive: (...) + )+ + (?: # End with: + \([^\s()]*?\([^\s()]+\)[^\s()]*?\) # balanced parens, one level deep: (...(...)...) + | + \([^\s]+?\) # balanced parens, non-recursive: (...) + | # or + [^\s`!()\[\]{};:'".,<>?«»“”‘’] # not a space or one of these punct chars + ) + | # OR, the following to match naked domains: + (?: + (?\s]+>""", + # ASCII Arrows + r"""[\-]+>|<[\-]+""", + # Twitter username: + r"""(?:@[\w_]+)""", + # Twitter hashtags: + r"""(?:\#+[\w_]+[\w\'_\-]*[\w_]+)""", + # email addresses + r"""[\w.+-]+@[\w-]+\.(?:[\w-]\.?)+[\w-]""", + # Remaining word types: + r""" + (?:[^\W\d_](?:[^\W\d_]|['\-_])+[^\W\d_]) # Words with apostrophes or dashes. + | + (?:[+\-]?\d+[,/.:-]\d+[+\-]?) # Numbers, including fractions, decimals. + | + (?:[\w_]+) # Words without apostrophes or dashes. + | + (?:\.(?:\s*\.){1,}) # Ellipsis dots. + | + (?:\S) # Everything else that isn't whitespace. + """, +) + +###################################################################### +# This is the core tokenizing regex: + +WORD_RE = re.compile(r"""(%s)""" % "|".join(REGEXPS), re.VERBOSE | re.I | re.UNICODE) + +# WORD_RE performs poorly on these patterns: +HANG_RE = re.compile(r'([^a-zA-Z0-9])\1{3,}') + +# The emoticon string gets its own regex so that we can preserve case for +# them as needed: +EMOTICON_RE = re.compile(EMOTICONS, re.VERBOSE | re.I | re.UNICODE) + +# These are for regularizing HTML entities to Unicode: +ENT_RE = re.compile(r'&(#?(x?))([^&;\s]+);') + + +###################################################################### +# Functions for converting html entities +###################################################################### + + +def _str_to_unicode(text, encoding=None, errors='strict'): + if encoding is None: + encoding = 'utf-8' + if isinstance(text, bytes): + return text.decode(encoding, errors) + return text + + +def _replace_html_entities(text, keep=(), remove_illegal=True, encoding='utf-8'): + """ + Remove entities from text by converting them to their + corresponding unicode character. + + :param text: a unicode string or a byte string encoded in the given + `encoding` (which defaults to 'utf-8'). + + :param list keep: list of entity names which should not be replaced.\ + This supports both numeric entities (``&#nnnn;`` and ``&#hhhh;``) + and named entities (such as `` `` or ``>``). + + :param bool remove_illegal: If `True`, entities that can't be converted are\ + removed. Otherwise, entities that can't be converted are kept "as + is". + + :returns: A unicode string with the entities removed. + + See https://github.com/scrapy/w3lib/blob/master/w3lib/html.py + + >>> from nltk.tokenize.casual import _replace_html_entities + >>> _replace_html_entities(b'Price: £100') + 'Price: \\xa3100' + >>> print(_replace_html_entities(b'Price: £100')) + Price: £100 + >>> + """ + + def _convert_entity(match): + entity_body = match.group(3) + if match.group(1): + try: + if match.group(2): + number = int(entity_body, 16) + else: + number = int(entity_body, 10) + # Numeric character references in the 80-9F range are typically + # interpreted by browsers as representing the characters mapped + # to bytes 80-9F in the Windows-1252 encoding. For more info + # see: https://en.wikipedia.org/wiki/ISO/IEC_8859-1#Similar_character_sets + if 0x80 <= number <= 0x9F: + return int2byte(number).decode('cp1252') + except ValueError: + number = None + else: + if entity_body in keep: + return match.group(0) + else: + number = html_entities.name2codepoint.get(entity_body) + if number is not None: + try: + return unichr(number) + except ValueError: + pass + + return "" if remove_illegal else match.group(0) + + return ENT_RE.sub(_convert_entity, _str_to_unicode(text, encoding)) + + +###################################################################### + + +class TweetTokenizer: + r""" + Tokenizer for tweets. + + >>> from nltk.tokenize import TweetTokenizer + >>> tknzr = TweetTokenizer() + >>> s0 = "This is a cooool #dummysmiley: :-) :-P <3 and some arrows < > -> <--" + >>> tknzr.tokenize(s0) + ['This', 'is', 'a', 'cooool', '#dummysmiley', ':', ':-)', ':-P', '<3', 'and', 'some', 'arrows', '<', '>', '->', '<--'] + + Examples using `strip_handles` and `reduce_len parameters`: + + >>> tknzr = TweetTokenizer(strip_handles=True, reduce_len=True) + >>> s1 = '@remy: This is waaaaayyyy too much for you!!!!!!' + >>> tknzr.tokenize(s1) + [':', 'This', 'is', 'waaayyy', 'too', 'much', 'for', 'you', '!', '!', '!'] + """ + + def __init__(self, preserve_case=True, reduce_len=False, strip_handles=False): + self.preserve_case = preserve_case + self.reduce_len = reduce_len + self.strip_handles = strip_handles + + def tokenize(self, text): + """ + :param text: str + :rtype: list(str) + :return: a tokenized list of strings; concatenating this list returns\ + the original string if `preserve_case=False` + """ + # Fix HTML character entities: + text = _replace_html_entities(text) + # Remove username handles + if self.strip_handles: + text = remove_handles(text) + # Normalize word lengthening + if self.reduce_len: + text = reduce_lengthening(text) + # Shorten problematic sequences of characters + safe_text = HANG_RE.sub(r'\1\1\1', text) + # Tokenize: + words = WORD_RE.findall(safe_text) + # Possibly alter the case, but avoid changing emoticons like :D into :d: + if not self.preserve_case: + words = list( + map((lambda x: x if EMOTICON_RE.search(x) else x.lower()), words) + ) + return words + + +###################################################################### +# Normalization Functions +###################################################################### + + +def reduce_lengthening(text): + """ + Replace repeated character sequences of length 3 or greater with sequences + of length 3. + """ + pattern = re.compile(r"(.)\1{2,}") + return pattern.sub(r"\1\1\1", text) + + +def remove_handles(text): + """ + Remove Twitter username handles from text. + """ + pattern = re.compile( + r"(? +# URL: +# For license information, see LICENSE.TXT + +""" +Multi-Word Expression Tokenizer + +A ``MWETokenizer`` takes a string which has already been divided into tokens and +retokenizes it, merging multi-word expressions into single tokens, using a lexicon +of MWEs: + + + >>> from nltk.tokenize import MWETokenizer + + >>> tokenizer = MWETokenizer([('a', 'little'), ('a', 'little', 'bit'), ('a', 'lot')]) + >>> tokenizer.add_mwe(('in', 'spite', 'of')) + + >>> tokenizer.tokenize('Testing testing testing one two three'.split()) + ['Testing', 'testing', 'testing', 'one', 'two', 'three'] + + >>> tokenizer.tokenize('This is a test in spite'.split()) + ['This', 'is', 'a', 'test', 'in', 'spite'] + + >>> tokenizer.tokenize('In a little or a little bit or a lot in spite of'.split()) + ['In', 'a_little', 'or', 'a_little_bit', 'or', 'a_lot', 'in_spite_of'] + +""" +from nltk.util import Trie + +from nltk.tokenize.api import TokenizerI + + +class MWETokenizer(TokenizerI): + """A tokenizer that processes tokenized text and merges multi-word expressions + into single tokens. + """ + + def __init__(self, mwes=None, separator='_'): + """Initialize the multi-word tokenizer with a list of expressions and a + separator + + :type mwes: list(list(str)) + :param mwes: A sequence of multi-word expressions to be merged, where + each MWE is a sequence of strings. + :type separator: str + :param separator: String that should be inserted between words in a multi-word + expression token. (Default is '_') + + """ + if not mwes: + mwes = [] + self._mwes = Trie(mwes) + self._separator = separator + + def add_mwe(self, mwe): + """Add a multi-word expression to the lexicon (stored as a word trie) + + We use ``util.Trie`` to represent the trie. Its form is a dict of dicts. + The key True marks the end of a valid MWE. + + :param mwe: The multi-word expression we're adding into the word trie + :type mwe: tuple(str) or list(str) + + :Example: + + >>> tokenizer = MWETokenizer() + >>> tokenizer.add_mwe(('a', 'b')) + >>> tokenizer.add_mwe(('a', 'b', 'c')) + >>> tokenizer.add_mwe(('a', 'x')) + >>> expected = {'a': {'x': {True: None}, 'b': {True: None, 'c': {True: None}}}} + >>> tokenizer._mwes == expected + True + + """ + self._mwes.insert(mwe) + + def tokenize(self, text): + """ + + :param text: A list containing tokenized text + :type text: list(str) + :return: A list of the tokenized text with multi-words merged together + :rtype: list(str) + + :Example: + + >>> tokenizer = MWETokenizer([('hors', "d'oeuvre")], separator='+') + >>> tokenizer.tokenize("An hors d'oeuvre tonight, sir?".split()) + ['An', "hors+d'oeuvre", 'tonight,', 'sir?'] + + """ + i = 0 + n = len(text) + result = [] + + while i < n: + if text[i] in self._mwes: + # possible MWE match + j = i + trie = self._mwes + while j < n and text[j] in trie: + trie = trie[text[j]] + j = j + 1 + else: + if Trie.LEAF in trie: + # success! + result.append(self._separator.join(text[i:j])) + i = j + else: + # no match, so backtrack + result.append(text[i]) + i += 1 + else: + result.append(text[i]) + i += 1 + + return result diff --git a/venv.bak/lib/python3.7/site-packages/nltk/tokenize/nist.py b/venv.bak/lib/python3.7/site-packages/nltk/tokenize/nist.py new file mode 100644 index 0000000..28d7e08 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/tokenize/nist.py @@ -0,0 +1,183 @@ +# -*- coding: utf-8 -*- +# Natural Language Toolkit: Python port of the mteval-v14.pl tokenizer. +# +# Copyright (C) 2001-2015 NLTK Project +# Author: Liling Tan (ported from ftp://jaguar.ncsl.nist.gov/mt/resources/mteval-v14.pl) +# Contributors: Ozan Caglayan, Wiktor Stribizew +# +# URL: +# For license information, see LICENSE.TXT + +""" +This is a NLTK port of the tokenizer used in the NIST BLEU evaluation script, +https://github.com/moses-smt/mosesdecoder/blob/master/scripts/generic/mteval-v14.pl#L926 +which was also ported into Python in +https://github.com/lium-lst/nmtpy/blob/master/nmtpy/metrics/mtevalbleu.py#L162 +""" + +from __future__ import unicode_literals + +import io +import re +from six import text_type + +from nltk.corpus import perluniprops +from nltk.tokenize.api import TokenizerI +from nltk.tokenize.util import xml_unescape + + +class NISTTokenizer(TokenizerI): + """ + This NIST tokenizer is sentence-based instead of the original + paragraph-based tokenization from mteval-14.pl; The sentence-based + tokenization is consistent with the other tokenizers available in NLTK. + + >>> from six import text_type + >>> from nltk.tokenize.nist import NISTTokenizer + >>> nist = NISTTokenizer() + >>> s = "Good muffins cost $3.88 in New York." + >>> expected_lower = [u'good', u'muffins', u'cost', u'$', u'3.88', u'in', u'new', u'york', u'.'] + >>> expected_cased = [u'Good', u'muffins', u'cost', u'$', u'3.88', u'in', u'New', u'York', u'.'] + >>> nist.tokenize(s, lowercase=False) == expected_cased + True + >>> nist.tokenize(s, lowercase=True) == expected_lower # Lowercased. + True + + The international_tokenize() is the preferred function when tokenizing + non-european text, e.g. + + >>> from nltk.tokenize.nist import NISTTokenizer + >>> nist = NISTTokenizer() + + # Input strings. + >>> albb = u'Alibaba Group Holding Limited (Chinese: 阿里巴巴集团控股 有限公司) us a Chinese e-commerce company...' + >>> amz = u'Amazon.com, Inc. (/ˈæməzɒn/) is an American electronic commerce...' + >>> rkt = u'Rakuten, Inc. (楽天株式会社 Rakuten Kabushiki-gaisha) is a Japanese electronic commerce and Internet company based in Tokyo.' + + # Expected tokens. + >>> expected_albb = [u'Alibaba', u'Group', u'Holding', u'Limited', u'(', u'Chinese', u':', u'\u963f\u91cc\u5df4\u5df4\u96c6\u56e2\u63a7\u80a1', u'\u6709\u9650\u516c\u53f8', u')'] + >>> expected_amz = [u'Amazon', u'.', u'com', u',', u'Inc', u'.', u'(', u'/', u'\u02c8\xe6', u'm'] + >>> expected_rkt = [u'Rakuten', u',', u'Inc', u'.', u'(', u'\u697d\u5929\u682a\u5f0f\u4f1a\u793e', u'Rakuten', u'Kabushiki', u'-', u'gaisha'] + + >>> nist.international_tokenize(albb)[:10] == expected_albb + True + >>> nist.international_tokenize(amz)[:10] == expected_amz + True + >>> nist.international_tokenize(rkt)[:10] == expected_rkt + True + + # Doctest for patching issue #1926 + >>> sent = u'this is a foo\u2604sentence.' + >>> expected_sent = [u'this', u'is', u'a', u'foo', u'\u2604', u'sentence', u'.'] + >>> nist.international_tokenize(sent) == expected_sent + True + """ + + # Strip "skipped" tags + STRIP_SKIP = re.compile(''), '' + # Strip end-of-line hyphenation and join lines + STRIP_EOL_HYPHEN = re.compile(u'\u2028'), ' ' + # Tokenize punctuation. + PUNCT = re.compile('([\{-\~\[-\` -\&\(-\+\:-\@\/])'), ' \\1 ' + # Tokenize period and comma unless preceded by a digit. + PERIOD_COMMA_PRECEED = re.compile('([^0-9])([\.,])'), '\\1 \\2 ' + # Tokenize period and comma unless followed by a digit. + PERIOD_COMMA_FOLLOW = re.compile('([\.,])([^0-9])'), ' \\1 \\2' + # Tokenize dash when preceded by a digit + DASH_PRECEED_DIGIT = re.compile('([0-9])(-)'), '\\1 \\2 ' + + LANG_DEPENDENT_REGEXES = [ + PUNCT, + PERIOD_COMMA_PRECEED, + PERIOD_COMMA_FOLLOW, + DASH_PRECEED_DIGIT, + ] + + # Perluniprops characters used in NIST tokenizer. + pup_number = text_type(''.join(set(perluniprops.chars('Number')))) # i.e. \p{N} + pup_punct = text_type(''.join(set(perluniprops.chars('Punctuation')))) # i.e. \p{P} + pup_symbol = text_type(''.join(set(perluniprops.chars('Symbol')))) # i.e. \p{S} + + # Python regexes needs to escape some special symbols, see + # see https://stackoverflow.com/q/45670950/610569 + number_regex = re.sub(r'[]^\\-]', r'\\\g<0>', pup_number) + punct_regex = re.sub(r'[]^\\-]', r'\\\g<0>', pup_punct) + symbol_regex = re.sub(r'[]^\\-]', r'\\\g<0>', pup_symbol) + + # Note: In the original perl implementation, \p{Z} and \p{Zl} were used to + # (i) strip trailing and heading spaces and + # (ii) de-deuplicate spaces. + # In Python, this would do: ' '.join(str.strip().split()) + # Thus, the next two lines were commented out. + # Line_Separator = text_type(''.join(perluniprops.chars('Line_Separator'))) # i.e. \p{Zl} + # Separator = text_type(''.join(perluniprops.chars('Separator'))) # i.e. \p{Z} + + # Pads non-ascii strings with space. + NONASCII = re.compile('([\x00-\x7f]+)'), r' \1 ' + # Tokenize any punctuation unless followed AND preceded by a digit. + PUNCT_1 = ( + re.compile(u"([{n}])([{p}])".format(n=number_regex, p=punct_regex)), + '\\1 \\2 ', + ) + PUNCT_2 = ( + re.compile(u"([{p}])([{n}])".format(n=number_regex, p=punct_regex)), + ' \\1 \\2', + ) + # Tokenize symbols + SYMBOLS = re.compile(u"([{s}])".format(s=symbol_regex)), ' \\1 ' + + INTERNATIONAL_REGEXES = [NONASCII, PUNCT_1, PUNCT_2, SYMBOLS] + + def lang_independent_sub(self, text): + """Performs the language independent string substituitions. """ + # It's a strange order of regexes. + # It'll be better to unescape after STRIP_EOL_HYPHEN + # but let's keep it close to the original NIST implementation. + regexp, substitution = self.STRIP_SKIP + text = regexp.sub(substitution, text) + text = xml_unescape(text) + regexp, substitution = self.STRIP_EOL_HYPHEN + text = regexp.sub(substitution, text) + return text + + def tokenize(self, text, lowercase=False, western_lang=True, return_str=False): + text = text_type(text) + # Language independent regex. + text = self.lang_independent_sub(text) + # Language dependent regex. + if western_lang: + # Pad string with whitespace. + text = ' ' + text + ' ' + if lowercase: + text = text.lower() + for regexp, substitution in self.LANG_DEPENDENT_REGEXES: + text = regexp.sub(substitution, text) + # Remove contiguous whitespaces. + text = ' '.join(text.split()) + # Finally, strips heading and trailing spaces + # and converts output string into unicode. + text = text_type(text.strip()) + return text if return_str else text.split() + + def international_tokenize( + self, text, lowercase=False, split_non_ascii=True, return_str=False + ): + text = text_type(text) + # Different from the 'normal' tokenize(), STRIP_EOL_HYPHEN is applied + # first before unescaping. + regexp, substitution = self.STRIP_SKIP + text = regexp.sub(substitution, text) + regexp, substitution = self.STRIP_EOL_HYPHEN + text = regexp.sub(substitution, text) + text = xml_unescape(text) + + if lowercase: + text = text.lower() + + for regexp, substitution in self.INTERNATIONAL_REGEXES: + text = regexp.sub(substitution, text) + + # Make sure that there's only one space only between words. + # Strip leading and trailing spaces. + text = ' '.join(text.strip().split()) + return text if return_str else text.split() diff --git a/venv.bak/lib/python3.7/site-packages/nltk/tokenize/punkt.py b/venv.bak/lib/python3.7/site-packages/nltk/tokenize/punkt.py new file mode 100644 index 0000000..3c1cabe --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/tokenize/punkt.py @@ -0,0 +1,1666 @@ +# Natural Language Toolkit: Punkt sentence tokenizer +# +# Copyright (C) 2001-2019 NLTK Project +# Algorithm: Kiss & Strunk (2006) +# Author: Willy (original Python port) +# Steven Bird (additions) +# Edward Loper (rewrite) +# Joel Nothman (almost rewrite) +# Arthur Darcet (fixes) +# URL: +# For license information, see LICENSE.TXT + +r""" +Punkt Sentence Tokenizer + +This tokenizer divides a text into a list of sentences +by using an unsupervised algorithm to build a model for abbreviation +words, collocations, and words that start sentences. It must be +trained on a large collection of plaintext in the target language +before it can be used. + +The NLTK data package includes a pre-trained Punkt tokenizer for +English. + + >>> import nltk.data + >>> text = ''' + ... Punkt knows that the periods in Mr. Smith and Johann S. Bach + ... do not mark sentence boundaries. And sometimes sentences + ... can start with non-capitalized words. i is a good variable + ... name. + ... ''' + >>> sent_detector = nltk.data.load('tokenizers/punkt/english.pickle') + >>> print('\n-----\n'.join(sent_detector.tokenize(text.strip()))) + Punkt knows that the periods in Mr. Smith and Johann S. Bach + do not mark sentence boundaries. + ----- + And sometimes sentences + can start with non-capitalized words. + ----- + i is a good variable + name. + +(Note that whitespace from the original text, including newlines, is +retained in the output.) + +Punctuation following sentences is also included by default +(from NLTK 3.0 onwards). It can be excluded with the realign_boundaries +flag. + + >>> text = ''' + ... (How does it deal with this parenthesis?) "It should be part of the + ... previous sentence." "(And the same with this one.)" ('And this one!') + ... "('(And (this)) '?)" [(and this. )] + ... ''' + >>> print('\n-----\n'.join( + ... sent_detector.tokenize(text.strip()))) + (How does it deal with this parenthesis?) + ----- + "It should be part of the + previous sentence." + ----- + "(And the same with this one.)" + ----- + ('And this one!') + ----- + "('(And (this)) '?)" + ----- + [(and this. )] + >>> print('\n-----\n'.join( + ... sent_detector.tokenize(text.strip(), realign_boundaries=False))) + (How does it deal with this parenthesis? + ----- + ) "It should be part of the + previous sentence. + ----- + " "(And the same with this one. + ----- + )" ('And this one! + ----- + ') + "('(And (this)) '? + ----- + )" [(and this. + ----- + )] + +However, Punkt is designed to learn parameters (a list of abbreviations, etc.) +unsupervised from a corpus similar to the target domain. The pre-packaged models +may therefore be unsuitable: use ``PunktSentenceTokenizer(text)`` to learn +parameters from the given text. + +:class:`.PunktTrainer` learns parameters such as a list of abbreviations +(without supervision) from portions of text. Using a ``PunktTrainer`` directly +allows for incremental training and modification of the hyper-parameters used +to decide what is considered an abbreviation, etc. + +The algorithm for this tokenizer is described in:: + + Kiss, Tibor and Strunk, Jan (2006): Unsupervised Multilingual Sentence + Boundary Detection. Computational Linguistics 32: 485-525. +""" +from __future__ import print_function, unicode_literals, division + +# TODO: Make orthographic heuristic less susceptible to overtraining +# TODO: Frequent sentence starters optionally exclude always-capitalised words +# FIXME: Problem with ending string with e.g. '!!!' -> '!! !' + +import re +import math +from collections import defaultdict + +from six import string_types + +from nltk.compat import unicode_repr, python_2_unicode_compatible +from nltk.probability import FreqDist +from nltk.tokenize.api import TokenizerI + +###################################################################### +# { Orthographic Context Constants +###################################################################### +# The following constants are used to describe the orthographic +# contexts in which a word can occur. BEG=beginning, MID=middle, +# UNK=unknown, UC=uppercase, LC=lowercase, NC=no case. + +_ORTHO_BEG_UC = 1 << 1 +"""Orthographic context: beginning of a sentence with upper case.""" + +_ORTHO_MID_UC = 1 << 2 +"""Orthographic context: middle of a sentence with upper case.""" + +_ORTHO_UNK_UC = 1 << 3 +"""Orthographic context: unknown position in a sentence with upper case.""" + +_ORTHO_BEG_LC = 1 << 4 +"""Orthographic context: beginning of a sentence with lower case.""" + +_ORTHO_MID_LC = 1 << 5 +"""Orthographic context: middle of a sentence with lower case.""" + +_ORTHO_UNK_LC = 1 << 6 +"""Orthographic context: unknown position in a sentence with lower case.""" + +_ORTHO_UC = _ORTHO_BEG_UC + _ORTHO_MID_UC + _ORTHO_UNK_UC +"""Orthographic context: occurs with upper case.""" + +_ORTHO_LC = _ORTHO_BEG_LC + _ORTHO_MID_LC + _ORTHO_UNK_LC +"""Orthographic context: occurs with lower case.""" + +_ORTHO_MAP = { + ('initial', 'upper'): _ORTHO_BEG_UC, + ('internal', 'upper'): _ORTHO_MID_UC, + ('unknown', 'upper'): _ORTHO_UNK_UC, + ('initial', 'lower'): _ORTHO_BEG_LC, + ('internal', 'lower'): _ORTHO_MID_LC, + ('unknown', 'lower'): _ORTHO_UNK_LC, +} +"""A map from context position and first-letter case to the +appropriate orthographic context flag.""" + +# } (end orthographic context constants) +###################################################################### + +###################################################################### +# { Decision reasons for debugging +###################################################################### + +REASON_DEFAULT_DECISION = 'default decision' +REASON_KNOWN_COLLOCATION = 'known collocation (both words)' +REASON_ABBR_WITH_ORTHOGRAPHIC_HEURISTIC = 'abbreviation + orthographic heuristic' +REASON_ABBR_WITH_SENTENCE_STARTER = 'abbreviation + frequent sentence starter' +REASON_INITIAL_WITH_ORTHOGRAPHIC_HEURISTIC = 'initial + orthographic heuristic' +REASON_NUMBER_WITH_ORTHOGRAPHIC_HEURISTIC = 'initial + orthographic heuristic' +REASON_INITIAL_WITH_SPECIAL_ORTHOGRAPHIC_HEURISTIC = ( + 'initial + special orthographic heuristic' +) + + +# } (end decision reasons for debugging) +###################################################################### + +###################################################################### +# { Language-dependent variables +###################################################################### + + +class PunktLanguageVars(object): + """ + Stores variables, mostly regular expressions, which may be + language-dependent for correct application of the algorithm. + An extension of this class may modify its properties to suit + a language other than English; an instance can then be passed + as an argument to PunktSentenceTokenizer and PunktTrainer + constructors. + """ + + __slots__ = ('_re_period_context', '_re_word_tokenizer') + + def __getstate__(self): + # All modifications to the class are performed by inheritance. + # Non-default parameters to be pickled must be defined in the inherited + # class. + return 1 + + def __setstate__(self, state): + return 1 + + sent_end_chars = ('.', '?', '!') + """Characters which are candidates for sentence boundaries""" + + @property + def _re_sent_end_chars(self): + return '[%s]' % re.escape(''.join(self.sent_end_chars)) + + internal_punctuation = ',:;' # might want to extend this.. + """sentence internal punctuation, which indicates an abbreviation if + preceded by a period-final token.""" + + re_boundary_realignment = re.compile(r'["\')\]}]+?(?:\s+|(?=--)|$)', re.MULTILINE) + """Used to realign punctuation that should be included in a sentence + although it follows the period (or ?, !).""" + + _re_word_start = r"[^\(\"\`{\[:;&\#\*@\)}\]\-,]" + """Excludes some characters from starting word tokens""" + + _re_non_word_chars = r"(?:[?!)\";}\]\*:@\'\({\[])" + """Characters that cannot appear within words""" + + _re_multi_char_punct = r"(?:\-{2,}|\.{2,}|(?:\.\s){2,}\.)" + """Hyphen and ellipsis are multi-character punctuation""" + + _word_tokenize_fmt = r'''( + %(MultiChar)s + | + (?=%(WordStart)s)\S+? # Accept word characters until end is found + (?= # Sequences marking a word's end + \s| # White-space + $| # End-of-string + %(NonWord)s|%(MultiChar)s| # Punctuation + ,(?=$|\s|%(NonWord)s|%(MultiChar)s) # Comma if at end of word + ) + | + \S + )''' + """Format of a regular expression to split punctuation from words, + excluding period.""" + + def _word_tokenizer_re(self): + """Compiles and returns a regular expression for word tokenization""" + try: + return self._re_word_tokenizer + except AttributeError: + self._re_word_tokenizer = re.compile( + self._word_tokenize_fmt + % { + 'NonWord': self._re_non_word_chars, + 'MultiChar': self._re_multi_char_punct, + 'WordStart': self._re_word_start, + }, + re.UNICODE | re.VERBOSE, + ) + return self._re_word_tokenizer + + def word_tokenize(self, s): + """Tokenize a string to split off punctuation other than periods""" + return self._word_tokenizer_re().findall(s) + + _period_context_fmt = r""" + \S* # some word material + %(SentEndChars)s # a potential sentence ending + (?=(?P + %(NonWord)s # either other punctuation + | + \s+(?P\S+) # or whitespace and some other token + ))""" + """Format of a regular expression to find contexts including possible + sentence boundaries. Matches token which the possible sentence boundary + ends, and matches the following token within a lookahead expression.""" + + def period_context_re(self): + """Compiles and returns a regular expression to find contexts + including possible sentence boundaries.""" + try: + return self._re_period_context + except: + self._re_period_context = re.compile( + self._period_context_fmt + % { + 'NonWord': self._re_non_word_chars, + 'SentEndChars': self._re_sent_end_chars, + }, + re.UNICODE | re.VERBOSE, + ) + return self._re_period_context + + +_re_non_punct = re.compile(r'[^\W\d]', re.UNICODE) +"""Matches token types that are not merely punctuation. (Types for +numeric tokens are changed to ##number## and hence contain alpha.)""" + + +# } +###################################################################### + + +# //////////////////////////////////////////////////////////// +# { Helper Functions +# //////////////////////////////////////////////////////////// + + +def _pair_iter(it): + """ + Yields pairs of tokens from the given iterator such that each input + token will appear as the first element in a yielded tuple. The last + pair will have None as its second element. + """ + it = iter(it) + try: + prev = next(it) + except StopIteration: + return + for el in it: + yield (prev, el) + prev = el + yield (prev, None) + + +###################################################################### +# { Punkt Parameters +###################################################################### + + +class PunktParameters(object): + """Stores data used to perform sentence boundary detection with Punkt.""" + + def __init__(self): + self.abbrev_types = set() + """A set of word types for known abbreviations.""" + + self.collocations = set() + """A set of word type tuples for known common collocations + where the first word ends in a period. E.g., ('S.', 'Bach') + is a common collocation in a text that discusses 'Johann + S. Bach'. These count as negative evidence for sentence + boundaries.""" + + self.sent_starters = set() + """A set of word types for words that often appear at the + beginning of sentences.""" + + self.ortho_context = defaultdict(int) + """A dictionary mapping word types to the set of orthographic + contexts that word type appears in. Contexts are represented + by adding orthographic context flags: ...""" + + def clear_abbrevs(self): + self.abbrev_types = set() + + def clear_collocations(self): + self.collocations = set() + + def clear_sent_starters(self): + self.sent_starters = set() + + def clear_ortho_context(self): + self.ortho_context = defaultdict(int) + + def add_ortho_context(self, typ, flag): + self.ortho_context[typ] |= flag + + def _debug_ortho_context(self, typ): + c = self.ortho_context[typ] + if c & _ORTHO_BEG_UC: + yield 'BEG-UC' + if c & _ORTHO_MID_UC: + yield 'MID-UC' + if c & _ORTHO_UNK_UC: + yield 'UNK-UC' + if c & _ORTHO_BEG_LC: + yield 'BEG-LC' + if c & _ORTHO_MID_LC: + yield 'MID-LC' + if c & _ORTHO_UNK_LC: + yield 'UNK-LC' + + +###################################################################### +# { PunktToken +###################################################################### + + +@python_2_unicode_compatible +class PunktToken(object): + """Stores a token of text with annotations produced during + sentence boundary detection.""" + + _properties = ['parastart', 'linestart', 'sentbreak', 'abbr', 'ellipsis'] + __slots__ = ['tok', 'type', 'period_final'] + _properties + + def __init__(self, tok, **params): + self.tok = tok + self.type = self._get_type(tok) + self.period_final = tok.endswith('.') + + for p in self._properties: + setattr(self, p, None) + for k in params: + setattr(self, k, params[k]) + + # //////////////////////////////////////////////////////////// + # { Regular expressions for properties + # //////////////////////////////////////////////////////////// + # Note: [A-Za-z] is approximated by [^\W\d] in the general case. + _RE_ELLIPSIS = re.compile(r'\.\.+$') + _RE_NUMERIC = re.compile(r'^-?[\.,]?\d[\d,\.-]*\.?$') + _RE_INITIAL = re.compile(r'[^\W\d]\.$', re.UNICODE) + _RE_ALPHA = re.compile(r'[^\W\d]+$', re.UNICODE) + + # //////////////////////////////////////////////////////////// + # { Derived properties + # //////////////////////////////////////////////////////////// + + def _get_type(self, tok): + """Returns a case-normalized representation of the token.""" + return self._RE_NUMERIC.sub('##number##', tok.lower()) + + @property + def type_no_period(self): + """ + The type with its final period removed if it has one. + """ + if len(self.type) > 1 and self.type[-1] == '.': + return self.type[:-1] + return self.type + + @property + def type_no_sentperiod(self): + """ + The type with its final period removed if it is marked as a + sentence break. + """ + if self.sentbreak: + return self.type_no_period + return self.type + + @property + def first_upper(self): + """True if the token's first character is uppercase.""" + return self.tok[0].isupper() + + @property + def first_lower(self): + """True if the token's first character is lowercase.""" + return self.tok[0].islower() + + @property + def first_case(self): + if self.first_lower: + return 'lower' + elif self.first_upper: + return 'upper' + return 'none' + + @property + def is_ellipsis(self): + """True if the token text is that of an ellipsis.""" + return self._RE_ELLIPSIS.match(self.tok) + + @property + def is_number(self): + """True if the token text is that of a number.""" + return self.type.startswith('##number##') + + @property + def is_initial(self): + """True if the token text is that of an initial.""" + return self._RE_INITIAL.match(self.tok) + + @property + def is_alpha(self): + """True if the token text is all alphabetic.""" + return self._RE_ALPHA.match(self.tok) + + @property + def is_non_punct(self): + """True if the token is either a number or is alphabetic.""" + return _re_non_punct.search(self.type) + + # //////////////////////////////////////////////////////////// + # { String representation + # //////////////////////////////////////////////////////////// + + def __repr__(self): + """ + A string representation of the token that can reproduce it + with eval(), which lists all the token's non-default + annotations. + """ + typestr = ' type=%s,' % unicode_repr(self.type) if self.type != self.tok else '' + + propvals = ', '.join( + '%s=%s' % (p, unicode_repr(getattr(self, p))) + for p in self._properties + if getattr(self, p) + ) + + return '%s(%s,%s %s)' % ( + self.__class__.__name__, + unicode_repr(self.tok), + typestr, + propvals, + ) + + def __str__(self): + """ + A string representation akin to that used by Kiss and Strunk. + """ + res = self.tok + if self.abbr: + res += '' + if self.ellipsis: + res += '' + if self.sentbreak: + res += '' + return res + + +###################################################################### +# { Punkt base class +###################################################################### + + +class PunktBaseClass(object): + """ + Includes common components of PunktTrainer and PunktSentenceTokenizer. + """ + + def __init__(self, lang_vars=None, token_cls=PunktToken, params=None): + if lang_vars is None: + lang_vars = PunktLanguageVars() + if params is None: + params = PunktParameters() + self._params = params + self._lang_vars = lang_vars + self._Token = token_cls + """The collection of parameters that determines the behavior + of the punkt tokenizer.""" + + # //////////////////////////////////////////////////////////// + # { Word tokenization + # //////////////////////////////////////////////////////////// + + def _tokenize_words(self, plaintext): + """ + Divide the given text into tokens, using the punkt word + segmentation regular expression, and generate the resulting list + of tokens augmented as three-tuples with two boolean values for whether + the given token occurs at the start of a paragraph or a new line, + respectively. + """ + parastart = False + for line in plaintext.split('\n'): + if line.strip(): + line_toks = iter(self._lang_vars.word_tokenize(line)) + + try: + tok = next(line_toks) + except StopIteration: + continue + + yield self._Token(tok, parastart=parastart, linestart=True) + parastart = False + + for t in line_toks: + yield self._Token(t) + else: + parastart = True + + # //////////////////////////////////////////////////////////// + # { Annotation Procedures + # //////////////////////////////////////////////////////////// + + def _annotate_first_pass(self, tokens): + """ + Perform the first pass of annotation, which makes decisions + based purely based on the word type of each word: + + - '?', '!', and '.' are marked as sentence breaks. + - sequences of two or more periods are marked as ellipsis. + - any word ending in '.' that's a known abbreviation is + marked as an abbreviation. + - any other word ending in '.' is marked as a sentence break. + + Return these annotations as a tuple of three sets: + + - sentbreak_toks: The indices of all sentence breaks. + - abbrev_toks: The indices of all abbreviations. + - ellipsis_toks: The indices of all ellipsis marks. + """ + for aug_tok in tokens: + self._first_pass_annotation(aug_tok) + yield aug_tok + + def _first_pass_annotation(self, aug_tok): + """ + Performs type-based annotation on a single token. + """ + + tok = aug_tok.tok + + if tok in self._lang_vars.sent_end_chars: + aug_tok.sentbreak = True + elif aug_tok.is_ellipsis: + aug_tok.ellipsis = True + elif aug_tok.period_final and not tok.endswith('..'): + if ( + tok[:-1].lower() in self._params.abbrev_types + or tok[:-1].lower().split('-')[-1] in self._params.abbrev_types + ): + + aug_tok.abbr = True + else: + aug_tok.sentbreak = True + + return + + +###################################################################### +# { Punkt Trainer +###################################################################### + + +class PunktTrainer(PunktBaseClass): + """Learns parameters used in Punkt sentence boundary detection.""" + + def __init__( + self, train_text=None, verbose=False, lang_vars=None, token_cls=PunktToken + ): + + PunktBaseClass.__init__(self, lang_vars=lang_vars, token_cls=token_cls) + + self._type_fdist = FreqDist() + """A frequency distribution giving the frequency of each + case-normalized token type in the training data.""" + + self._num_period_toks = 0 + """The number of words ending in period in the training data.""" + + self._collocation_fdist = FreqDist() + """A frequency distribution giving the frequency of all + bigrams in the training data where the first word ends in a + period. Bigrams are encoded as tuples of word types. + Especially common collocations are extracted from this + frequency distribution, and stored in + ``_params``.``collocations ``.""" + + self._sent_starter_fdist = FreqDist() + """A frequency distribution giving the frequency of all words + that occur at the training data at the beginning of a sentence + (after the first pass of annotation). Especially common + sentence starters are extracted from this frequency + distribution, and stored in ``_params.sent_starters``. + """ + + self._sentbreak_count = 0 + """The total number of sentence breaks identified in training, used for + calculating the frequent sentence starter heuristic.""" + + self._finalized = True + """A flag as to whether the training has been finalized by finding + collocations and sentence starters, or whether finalize_training() + still needs to be called.""" + + if train_text: + self.train(train_text, verbose, finalize=True) + + def get_params(self): + """ + Calculates and returns parameters for sentence boundary detection as + derived from training.""" + if not self._finalized: + self.finalize_training() + return self._params + + # //////////////////////////////////////////////////////////// + # { Customization Variables + # //////////////////////////////////////////////////////////// + + ABBREV = 0.3 + """cut-off value whether a 'token' is an abbreviation""" + + IGNORE_ABBREV_PENALTY = False + """allows the disabling of the abbreviation penalty heuristic, which + exponentially disadvantages words that are found at times without a + final period.""" + + ABBREV_BACKOFF = 5 + """upper cut-off for Mikheev's(2002) abbreviation detection algorithm""" + + COLLOCATION = 7.88 + """minimal log-likelihood value that two tokens need to be considered + as a collocation""" + + SENT_STARTER = 30 + """minimal log-likelihood value that a token requires to be considered + as a frequent sentence starter""" + + INCLUDE_ALL_COLLOCS = False + """this includes as potential collocations all word pairs where the first + word ends in a period. It may be useful in corpora where there is a lot + of variation that makes abbreviations like Mr difficult to identify.""" + + INCLUDE_ABBREV_COLLOCS = False + """this includes as potential collocations all word pairs where the first + word is an abbreviation. Such collocations override the orthographic + heuristic, but not the sentence starter heuristic. This is overridden by + INCLUDE_ALL_COLLOCS, and if both are false, only collocations with initials + and ordinals are considered.""" + """""" + + MIN_COLLOC_FREQ = 1 + """this sets a minimum bound on the number of times a bigram needs to + appear before it can be considered a collocation, in addition to log + likelihood statistics. This is useful when INCLUDE_ALL_COLLOCS is True.""" + + # //////////////////////////////////////////////////////////// + # { Training.. + # //////////////////////////////////////////////////////////// + + def train(self, text, verbose=False, finalize=True): + """ + Collects training data from a given text. If finalize is True, it + will determine all the parameters for sentence boundary detection. If + not, this will be delayed until get_params() or finalize_training() is + called. If verbose is True, abbreviations found will be listed. + """ + # Break the text into tokens; record which token indices correspond to + # line starts and paragraph starts; and determine their types. + self._train_tokens(self._tokenize_words(text), verbose) + if finalize: + self.finalize_training(verbose) + + def train_tokens(self, tokens, verbose=False, finalize=True): + """ + Collects training data from a given list of tokens. + """ + self._train_tokens((self._Token(t) for t in tokens), verbose) + if finalize: + self.finalize_training(verbose) + + def _train_tokens(self, tokens, verbose): + self._finalized = False + + # Ensure tokens are a list + tokens = list(tokens) + + # Find the frequency of each case-normalized type. (Don't + # strip off final periods.) Also keep track of the number of + # tokens that end in periods. + for aug_tok in tokens: + self._type_fdist[aug_tok.type] += 1 + if aug_tok.period_final: + self._num_period_toks += 1 + + # Look for new abbreviations, and for types that no longer are + unique_types = self._unique_types(tokens) + for abbr, score, is_add in self._reclassify_abbrev_types(unique_types): + if score >= self.ABBREV: + if is_add: + self._params.abbrev_types.add(abbr) + if verbose: + print((' Abbreviation: [%6.4f] %s' % (score, abbr))) + else: + if not is_add: + self._params.abbrev_types.remove(abbr) + if verbose: + print((' Removed abbreviation: [%6.4f] %s' % (score, abbr))) + + # Make a preliminary pass through the document, marking likely + # sentence breaks, abbreviations, and ellipsis tokens. + tokens = list(self._annotate_first_pass(tokens)) + + # Check what contexts each word type can appear in, given the + # case of its first letter. + self._get_orthography_data(tokens) + + # We need total number of sentence breaks to find sentence starters + self._sentbreak_count += self._get_sentbreak_count(tokens) + + # The remaining heuristics relate to pairs of tokens where the first + # ends in a period. + for aug_tok1, aug_tok2 in _pair_iter(tokens): + if not aug_tok1.period_final or not aug_tok2: + continue + + # Is the first token a rare abbreviation? + if self._is_rare_abbrev_type(aug_tok1, aug_tok2): + self._params.abbrev_types.add(aug_tok1.type_no_period) + if verbose: + print((' Rare Abbrev: %s' % aug_tok1.type)) + + # Does second token have a high likelihood of starting a sentence? + if self._is_potential_sent_starter(aug_tok2, aug_tok1): + self._sent_starter_fdist[aug_tok2.type] += 1 + + # Is this bigram a potential collocation? + if self._is_potential_collocation(aug_tok1, aug_tok2): + self._collocation_fdist[ + (aug_tok1.type_no_period, aug_tok2.type_no_sentperiod) + ] += 1 + + def _unique_types(self, tokens): + return set(aug_tok.type for aug_tok in tokens) + + def finalize_training(self, verbose=False): + """ + Uses data that has been gathered in training to determine likely + collocations and sentence starters. + """ + self._params.clear_sent_starters() + for typ, ll in self._find_sent_starters(): + self._params.sent_starters.add(typ) + if verbose: + print((' Sent Starter: [%6.4f] %r' % (ll, typ))) + + self._params.clear_collocations() + for (typ1, typ2), ll in self._find_collocations(): + self._params.collocations.add((typ1, typ2)) + if verbose: + print((' Collocation: [%6.4f] %r+%r' % (ll, typ1, typ2))) + + self._finalized = True + + # //////////////////////////////////////////////////////////// + # { Overhead reduction + # //////////////////////////////////////////////////////////// + + def freq_threshold( + self, ortho_thresh=2, type_thresh=2, colloc_thres=2, sentstart_thresh=2 + ): + """ + Allows memory use to be reduced after much training by removing data + about rare tokens that are unlikely to have a statistical effect with + further training. Entries occurring above the given thresholds will be + retained. + """ + if ortho_thresh > 1: + old_oc = self._params.ortho_context + self._params.clear_ortho_context() + for tok in self._type_fdist: + count = self._type_fdist[tok] + if count >= ortho_thresh: + self._params.ortho_context[tok] = old_oc[tok] + + self._type_fdist = self._freq_threshold(self._type_fdist, type_thresh) + self._collocation_fdist = self._freq_threshold( + self._collocation_fdist, colloc_thres + ) + self._sent_starter_fdist = self._freq_threshold( + self._sent_starter_fdist, sentstart_thresh + ) + + def _freq_threshold(self, fdist, threshold): + """ + Returns a FreqDist containing only data with counts below a given + threshold, as well as a mapping (None -> count_removed). + """ + # We assume that there is more data below the threshold than above it + # and so create a new FreqDist rather than working in place. + res = FreqDist() + num_removed = 0 + for tok in fdist: + count = fdist[tok] + if count < threshold: + num_removed += 1 + else: + res[tok] += count + res[None] += num_removed + return res + + # //////////////////////////////////////////////////////////// + # { Orthographic data + # //////////////////////////////////////////////////////////// + + def _get_orthography_data(self, tokens): + """ + Collect information about whether each token type occurs + with different case patterns (i) overall, (ii) at + sentence-initial positions, and (iii) at sentence-internal + positions. + """ + # 'initial' or 'internal' or 'unknown' + context = 'internal' + tokens = list(tokens) + + for aug_tok in tokens: + # If we encounter a paragraph break, then it's a good sign + # that it's a sentence break. But err on the side of + # caution (by not positing a sentence break) if we just + # saw an abbreviation. + if aug_tok.parastart and context != 'unknown': + context = 'initial' + + # If we're at the beginning of a line, then we can't decide + # between 'internal' and 'initial'. + if aug_tok.linestart and context == 'internal': + context = 'unknown' + + # Find the case-normalized type of the token. If it's a + # sentence-final token, strip off the period. + typ = aug_tok.type_no_sentperiod + + # Update the orthographic context table. + flag = _ORTHO_MAP.get((context, aug_tok.first_case), 0) + if flag: + self._params.add_ortho_context(typ, flag) + + # Decide whether the next word is at a sentence boundary. + if aug_tok.sentbreak: + if not (aug_tok.is_number or aug_tok.is_initial): + context = 'initial' + else: + context = 'unknown' + elif aug_tok.ellipsis or aug_tok.abbr: + context = 'unknown' + else: + context = 'internal' + + # //////////////////////////////////////////////////////////// + # { Abbreviations + # //////////////////////////////////////////////////////////// + + def _reclassify_abbrev_types(self, types): + """ + (Re)classifies each given token if + - it is period-final and not a known abbreviation; or + - it is not period-final and is otherwise a known abbreviation + by checking whether its previous classification still holds according + to the heuristics of section 3. + Yields triples (abbr, score, is_add) where abbr is the type in question, + score is its log-likelihood with penalties applied, and is_add specifies + whether the present type is a candidate for inclusion or exclusion as an + abbreviation, such that: + - (is_add and score >= 0.3) suggests a new abbreviation; and + - (not is_add and score < 0.3) suggests excluding an abbreviation. + """ + # (While one could recalculate abbreviations from all .-final tokens at + # every iteration, in cases requiring efficiency, the number of tokens + # in the present training document will be much less.) + + for typ in types: + # Check some basic conditions, to rule out words that are + # clearly not abbrev_types. + if not _re_non_punct.search(typ) or typ == '##number##': + continue + + if typ.endswith('.'): + if typ in self._params.abbrev_types: + continue + typ = typ[:-1] + is_add = True + else: + if typ not in self._params.abbrev_types: + continue + is_add = False + + # Count how many periods & nonperiods are in the + # candidate. + num_periods = typ.count('.') + 1 + num_nonperiods = len(typ) - num_periods + 1 + + # Let be the candidate without the period, and + # be the period. Find a log likelihood ratio that + # indicates whether occurs as a single unit (high + # value of ll), or as two independent units and + # (low value of ll). + count_with_period = self._type_fdist[typ + '.'] + count_without_period = self._type_fdist[typ] + ll = self._dunning_log_likelihood( + count_with_period + count_without_period, + self._num_period_toks, + count_with_period, + self._type_fdist.N(), + ) + + # Apply three scaling factors to 'tweak' the basic log + # likelihood ratio: + # F_length: long word -> less likely to be an abbrev + # F_periods: more periods -> more likely to be an abbrev + # F_penalty: penalize occurrences w/o a period + f_length = math.exp(-num_nonperiods) + f_periods = num_periods + f_penalty = int(self.IGNORE_ABBREV_PENALTY) or math.pow( + num_nonperiods, -count_without_period + ) + score = ll * f_length * f_periods * f_penalty + + yield typ, score, is_add + + def find_abbrev_types(self): + """ + Recalculates abbreviations given type frequencies, despite no prior + determination of abbreviations. + This fails to include abbreviations otherwise found as "rare". + """ + self._params.clear_abbrevs() + tokens = (typ for typ in self._type_fdist if typ and typ.endswith('.')) + for abbr, score, is_add in self._reclassify_abbrev_types(tokens): + if score >= self.ABBREV: + self._params.abbrev_types.add(abbr) + + # This function combines the work done by the original code's + # functions `count_orthography_context`, `get_orthography_count`, + # and `get_rare_abbreviations`. + def _is_rare_abbrev_type(self, cur_tok, next_tok): + """ + A word type is counted as a rare abbreviation if... + - it's not already marked as an abbreviation + - it occurs fewer than ABBREV_BACKOFF times + - either it is followed by a sentence-internal punctuation + mark, *or* it is followed by a lower-case word that + sometimes appears with upper case, but never occurs with + lower case at the beginning of sentences. + """ + if cur_tok.abbr or not cur_tok.sentbreak: + return False + + # Find the case-normalized type of the token. If it's + # a sentence-final token, strip off the period. + typ = cur_tok.type_no_sentperiod + + # Proceed only if the type hasn't been categorized as an + # abbreviation already, and is sufficiently rare... + count = self._type_fdist[typ] + self._type_fdist[typ[:-1]] + if typ in self._params.abbrev_types or count >= self.ABBREV_BACKOFF: + return False + + # Record this token as an abbreviation if the next + # token is a sentence-internal punctuation mark. + # [XX] :1 or check the whole thing?? + if next_tok.tok[:1] in self._lang_vars.internal_punctuation: + return True + + # Record this type as an abbreviation if the next + # token... (i) starts with a lower case letter, + # (ii) sometimes occurs with an uppercase letter, + # and (iii) never occus with an uppercase letter + # sentence-internally. + # [xx] should the check for (ii) be modified?? + elif next_tok.first_lower: + typ2 = next_tok.type_no_sentperiod + typ2ortho_context = self._params.ortho_context[typ2] + if (typ2ortho_context & _ORTHO_BEG_UC) and not ( + typ2ortho_context & _ORTHO_MID_UC + ): + return True + + # //////////////////////////////////////////////////////////// + # { Log Likelihoods + # //////////////////////////////////////////////////////////// + + # helper for _reclassify_abbrev_types: + @staticmethod + def _dunning_log_likelihood(count_a, count_b, count_ab, N): + """ + A function that calculates the modified Dunning log-likelihood + ratio scores for abbreviation candidates. The details of how + this works is available in the paper. + """ + p1 = count_b / N + p2 = 0.99 + + null_hypo = count_ab * math.log(p1) + (count_a - count_ab) * math.log(1.0 - p1) + alt_hypo = count_ab * math.log(p2) + (count_a - count_ab) * math.log(1.0 - p2) + + likelihood = null_hypo - alt_hypo + + return -2.0 * likelihood + + @staticmethod + def _col_log_likelihood(count_a, count_b, count_ab, N): + """ + A function that will just compute log-likelihood estimate, in + the original paper it's described in algorithm 6 and 7. + + This *should* be the original Dunning log-likelihood values, + unlike the previous log_l function where it used modified + Dunning log-likelihood values + """ + p = count_b / N + p1 = count_ab / count_a + try: + p2 = (count_b - count_ab) / (N - count_a) + except ZeroDivisionError as e: + p2 = 1 + + try: + summand1 = count_ab * math.log(p) + (count_a - count_ab) * math.log(1.0 - p) + except ValueError as e: + summand1 = 0 + + try: + summand2 = (count_b - count_ab) * math.log(p) + ( + N - count_a - count_b + count_ab + ) * math.log(1.0 - p) + except ValueError as e: + summand2 = 0 + + if count_a == count_ab or p1 <= 0 or p1 >= 1: + summand3 = 0 + else: + summand3 = count_ab * math.log(p1) + (count_a - count_ab) * math.log( + 1.0 - p1 + ) + + if count_b == count_ab or p2 <= 0 or p2 >= 1: + summand4 = 0 + else: + summand4 = (count_b - count_ab) * math.log(p2) + ( + N - count_a - count_b + count_ab + ) * math.log(1.0 - p2) + + likelihood = summand1 + summand2 - summand3 - summand4 + + return -2.0 * likelihood + + # //////////////////////////////////////////////////////////// + # { Collocation Finder + # //////////////////////////////////////////////////////////// + + def _is_potential_collocation(self, aug_tok1, aug_tok2): + """ + Returns True if the pair of tokens may form a collocation given + log-likelihood statistics. + """ + return ( + ( + self.INCLUDE_ALL_COLLOCS + or (self.INCLUDE_ABBREV_COLLOCS and aug_tok1.abbr) + or (aug_tok1.sentbreak and (aug_tok1.is_number or aug_tok1.is_initial)) + ) + and aug_tok1.is_non_punct + and aug_tok2.is_non_punct + ) + + def _find_collocations(self): + """ + Generates likely collocations and their log-likelihood. + """ + for types in self._collocation_fdist: + try: + typ1, typ2 = types + except TypeError: + # types may be None after calling freq_threshold() + continue + if typ2 in self._params.sent_starters: + continue + + col_count = self._collocation_fdist[types] + typ1_count = self._type_fdist[typ1] + self._type_fdist[typ1 + '.'] + typ2_count = self._type_fdist[typ2] + self._type_fdist[typ2 + '.'] + if ( + typ1_count > 1 + and typ2_count > 1 + and self.MIN_COLLOC_FREQ < col_count <= min(typ1_count, typ2_count) + ): + + ll = self._col_log_likelihood( + typ1_count, typ2_count, col_count, self._type_fdist.N() + ) + # Filter out the not-so-collocative + if ll >= self.COLLOCATION and ( + self._type_fdist.N() / typ1_count > typ2_count / col_count + ): + yield (typ1, typ2), ll + + # //////////////////////////////////////////////////////////// + # { Sentence-Starter Finder + # //////////////////////////////////////////////////////////// + + def _is_potential_sent_starter(self, cur_tok, prev_tok): + """ + Returns True given a token and the token that preceds it if it + seems clear that the token is beginning a sentence. + """ + # If a token (i) is preceded by a sentece break that is + # not a potential ordinal number or initial, and (ii) is + # alphabetic, then it is a a sentence-starter. + return ( + prev_tok.sentbreak + and not (prev_tok.is_number or prev_tok.is_initial) + and cur_tok.is_alpha + ) + + def _find_sent_starters(self): + """ + Uses collocation heuristics for each candidate token to + determine if it frequently starts sentences. + """ + for typ in self._sent_starter_fdist: + if not typ: + continue + + typ_at_break_count = self._sent_starter_fdist[typ] + typ_count = self._type_fdist[typ] + self._type_fdist[typ + '.'] + if typ_count < typ_at_break_count: + # needed after freq_threshold + continue + + ll = self._col_log_likelihood( + self._sentbreak_count, + typ_count, + typ_at_break_count, + self._type_fdist.N(), + ) + + if ( + ll >= self.SENT_STARTER + and self._type_fdist.N() / self._sentbreak_count + > typ_count / typ_at_break_count + ): + yield typ, ll + + def _get_sentbreak_count(self, tokens): + """ + Returns the number of sentence breaks marked in a given set of + augmented tokens. + """ + return sum(1 for aug_tok in tokens if aug_tok.sentbreak) + + +###################################################################### +# { Punkt Sentence Tokenizer +###################################################################### + + +class PunktSentenceTokenizer(PunktBaseClass, TokenizerI): + """ + A sentence tokenizer which uses an unsupervised algorithm to build + a model for abbreviation words, collocations, and words that start + sentences; and then uses that model to find sentence boundaries. + This approach has been shown to work well for many European + languages. + """ + + def __init__( + self, train_text=None, verbose=False, lang_vars=None, token_cls=PunktToken + ): + """ + train_text can either be the sole training text for this sentence + boundary detector, or can be a PunktParameters object. + """ + PunktBaseClass.__init__(self, lang_vars=lang_vars, token_cls=token_cls) + + if train_text: + self._params = self.train(train_text, verbose) + + def train(self, train_text, verbose=False): + """ + Derives parameters from a given training text, or uses the parameters + given. Repeated calls to this method destroy previous parameters. For + incremental training, instantiate a separate PunktTrainer instance. + """ + if not isinstance(train_text, string_types): + return train_text + return PunktTrainer( + train_text, lang_vars=self._lang_vars, token_cls=self._Token + ).get_params() + + # //////////////////////////////////////////////////////////// + # { Tokenization + # //////////////////////////////////////////////////////////// + + def tokenize(self, text, realign_boundaries=True): + """ + Given a text, returns a list of the sentences in that text. + """ + return list(self.sentences_from_text(text, realign_boundaries)) + + def debug_decisions(self, text): + """ + Classifies candidate periods as sentence breaks, yielding a dict for + each that may be used to understand why the decision was made. + + See format_debug_decision() to help make this output readable. + """ + + for match in self._lang_vars.period_context_re().finditer(text): + decision_text = match.group() + match.group('after_tok') + tokens = self._tokenize_words(decision_text) + tokens = list(self._annotate_first_pass(tokens)) + while not tokens[0].period_final: + tokens.pop(0) + yield dict( + period_index=match.end() - 1, + text=decision_text, + type1=tokens[0].type, + type2=tokens[1].type, + type1_in_abbrs=bool(tokens[0].abbr), + type1_is_initial=bool(tokens[0].is_initial), + type2_is_sent_starter=tokens[1].type_no_sentperiod + in self._params.sent_starters, + type2_ortho_heuristic=self._ortho_heuristic(tokens[1]), + type2_ortho_contexts=set( + self._params._debug_ortho_context(tokens[1].type_no_sentperiod) + ), + collocation=(tokens[0].type_no_sentperiod, tokens[1].type_no_sentperiod) + in self._params.collocations, + reason=self._second_pass_annotation(tokens[0], tokens[1]) + or REASON_DEFAULT_DECISION, + break_decision=tokens[0].sentbreak, + ) + + def span_tokenize(self, text, realign_boundaries=True): + """ + Given a text, generates (start, end) spans of sentences + in the text. + """ + slices = self._slices_from_text(text) + if realign_boundaries: + slices = self._realign_boundaries(text, slices) + for sl in slices: + yield (sl.start, sl.stop) + + def sentences_from_text(self, text, realign_boundaries=True): + """ + Given a text, generates the sentences in that text by only + testing candidate sentence breaks. If realign_boundaries is + True, includes in the sentence closing punctuation that + follows the period. + """ + return [text[s:e] for s, e in self.span_tokenize(text, realign_boundaries)] + + def _slices_from_text(self, text): + last_break = 0 + for match in self._lang_vars.period_context_re().finditer(text): + context = match.group() + match.group('after_tok') + if self.text_contains_sentbreak(context): + yield slice(last_break, match.end()) + if match.group('next_tok'): + # next sentence starts after whitespace + last_break = match.start('next_tok') + else: + # next sentence starts at following punctuation + last_break = match.end() + # The last sentence should not contain trailing whitespace. + yield slice(last_break, len(text.rstrip())) + + def _realign_boundaries(self, text, slices): + """ + Attempts to realign punctuation that falls after the period but + should otherwise be included in the same sentence. + + For example: "(Sent1.) Sent2." will otherwise be split as:: + + ["(Sent1.", ") Sent1."]. + + This method will produce:: + + ["(Sent1.)", "Sent2."]. + """ + realign = 0 + for sl1, sl2 in _pair_iter(slices): + sl1 = slice(sl1.start + realign, sl1.stop) + if not sl2: + if text[sl1]: + yield sl1 + continue + + m = self._lang_vars.re_boundary_realignment.match(text[sl2]) + if m: + yield slice(sl1.start, sl2.start + len(m.group(0).rstrip())) + realign = m.end() + else: + realign = 0 + if text[sl1]: + yield sl1 + + def text_contains_sentbreak(self, text): + """ + Returns True if the given text includes a sentence break. + """ + found = False # used to ignore last token + for t in self._annotate_tokens(self._tokenize_words(text)): + if found: + return True + if t.sentbreak: + found = True + return False + + def sentences_from_text_legacy(self, text): + """ + Given a text, generates the sentences in that text. Annotates all + tokens, rather than just those with possible sentence breaks. Should + produce the same results as ``sentences_from_text``. + """ + tokens = self._annotate_tokens(self._tokenize_words(text)) + return self._build_sentence_list(text, tokens) + + def sentences_from_tokens(self, tokens): + """ + Given a sequence of tokens, generates lists of tokens, each list + corresponding to a sentence. + """ + tokens = iter(self._annotate_tokens(self._Token(t) for t in tokens)) + sentence = [] + for aug_tok in tokens: + sentence.append(aug_tok.tok) + if aug_tok.sentbreak: + yield sentence + sentence = [] + if sentence: + yield sentence + + def _annotate_tokens(self, tokens): + """ + Given a set of tokens augmented with markers for line-start and + paragraph-start, returns an iterator through those tokens with full + annotation including predicted sentence breaks. + """ + # Make a preliminary pass through the document, marking likely + # sentence breaks, abbreviations, and ellipsis tokens. + tokens = self._annotate_first_pass(tokens) + + # Make a second pass through the document, using token context + # information to change our preliminary decisions about where + # sentence breaks, abbreviations, and ellipsis occurs. + tokens = self._annotate_second_pass(tokens) + + ## [XX] TESTING + # tokens = list(tokens) + # self.dump(tokens) + + return tokens + + def _build_sentence_list(self, text, tokens): + """ + Given the original text and the list of augmented word tokens, + construct and return a tokenized list of sentence strings. + """ + # Most of the work here is making sure that we put the right + # pieces of whitespace back in all the right places. + + # Our position in the source text, used to keep track of which + # whitespace to add: + pos = 0 + + # A regular expression that finds pieces of whitespace: + WS_REGEXP = re.compile(r'\s*') + + sentence = '' + for aug_tok in tokens: + tok = aug_tok.tok + + # Find the whitespace before this token, and update pos. + ws = WS_REGEXP.match(text, pos).group() + pos += len(ws) + + # Some of the rules used by the punkt word tokenizer + # strip whitespace out of the text, resulting in tokens + # that contain whitespace in the source text. If our + # token doesn't match, see if adding whitespace helps. + # If so, then use the version with whitespace. + if text[pos : pos + len(tok)] != tok: + pat = '\s*'.join(re.escape(c) for c in tok) + m = re.compile(pat).match(text, pos) + if m: + tok = m.group() + + # Move our position pointer to the end of the token. + assert text[pos : pos + len(tok)] == tok + pos += len(tok) + + # Add this token. If it's not at the beginning of the + # sentence, then include any whitespace that separated it + # from the previous token. + if sentence: + sentence += ws + sentence += tok + + # If we're at a sentence break, then start a new sentence. + if aug_tok.sentbreak: + yield sentence + sentence = '' + + # If the last sentence is emtpy, discard it. + if sentence: + yield sentence + + # [XX] TESTING + def dump(self, tokens): + print('writing to /tmp/punkt.new...') + with open('/tmp/punkt.new', 'w') as outfile: + for aug_tok in tokens: + if aug_tok.parastart: + outfile.write('\n\n') + elif aug_tok.linestart: + outfile.write('\n') + else: + outfile.write(' ') + + outfile.write(str(aug_tok)) + + # //////////////////////////////////////////////////////////// + # { Customization Variables + # //////////////////////////////////////////////////////////// + + PUNCTUATION = tuple(';:,.!?') + + # //////////////////////////////////////////////////////////// + # { Annotation Procedures + # //////////////////////////////////////////////////////////// + + def _annotate_second_pass(self, tokens): + """ + Performs a token-based classification (section 4) over the given + tokens, making use of the orthographic heuristic (4.1.1), collocation + heuristic (4.1.2) and frequent sentence starter heuristic (4.1.3). + """ + for t1, t2 in _pair_iter(tokens): + self._second_pass_annotation(t1, t2) + yield t1 + + def _second_pass_annotation(self, aug_tok1, aug_tok2): + """ + Performs token-based classification over a pair of contiguous tokens + updating the first. + """ + # Is it the last token? We can't do anything then. + if not aug_tok2: + return + + tok = aug_tok1.tok + if not aug_tok1.period_final: + # We only care about words ending in periods. + return + + typ = aug_tok1.type_no_period + next_tok = aug_tok2.tok + next_typ = aug_tok2.type_no_sentperiod + tok_is_initial = aug_tok1.is_initial + + # [4.1.2. Collocation Heuristic] If there's a + # collocation between the word before and after the + # period, then label tok as an abbreviation and NOT + # a sentence break. Note that collocations with + # frequent sentence starters as their second word are + # excluded in training. + if (typ, next_typ) in self._params.collocations: + aug_tok1.sentbreak = False + aug_tok1.abbr = True + return REASON_KNOWN_COLLOCATION + + # [4.2. Token-Based Reclassification of Abbreviations] If + # the token is an abbreviation or an ellipsis, then decide + # whether we should *also* classify it as a sentbreak. + if (aug_tok1.abbr or aug_tok1.ellipsis) and (not tok_is_initial): + # [4.1.1. Orthographic Heuristic] Check if there's + # orthogrpahic evidence about whether the next word + # starts a sentence or not. + is_sent_starter = self._ortho_heuristic(aug_tok2) + if is_sent_starter == True: + aug_tok1.sentbreak = True + return REASON_ABBR_WITH_ORTHOGRAPHIC_HEURISTIC + + # [4.1.3. Frequent Sentence Starter Heruistic] If the + # next word is capitalized, and is a member of the + # frequent-sentence-starters list, then label tok as a + # sentence break. + if aug_tok2.first_upper and next_typ in self._params.sent_starters: + aug_tok1.sentbreak = True + return REASON_ABBR_WITH_SENTENCE_STARTER + + # [4.3. Token-Based Detection of Initials and Ordinals] + # Check if any initials or ordinals tokens that are marked + # as sentbreaks should be reclassified as abbreviations. + if tok_is_initial or typ == '##number##': + + # [4.1.1. Orthographic Heuristic] Check if there's + # orthogrpahic evidence about whether the next word + # starts a sentence or not. + is_sent_starter = self._ortho_heuristic(aug_tok2) + + if is_sent_starter == False: + aug_tok1.sentbreak = False + aug_tok1.abbr = True + if tok_is_initial: + return REASON_INITIAL_WITH_ORTHOGRAPHIC_HEURISTIC + else: + return REASON_NUMBER_WITH_ORTHOGRAPHIC_HEURISTIC + + # Special heuristic for initials: if orthogrpahic + # heuristc is unknown, and next word is always + # capitalized, then mark as abbrev (eg: J. Bach). + if ( + is_sent_starter == 'unknown' + and tok_is_initial + and aug_tok2.first_upper + and not (self._params.ortho_context[next_typ] & _ORTHO_LC) + ): + aug_tok1.sentbreak = False + aug_tok1.abbr = True + return REASON_INITIAL_WITH_SPECIAL_ORTHOGRAPHIC_HEURISTIC + + return + + def _ortho_heuristic(self, aug_tok): + """ + Decide whether the given token is the first token in a sentence. + """ + # Sentences don't start with punctuation marks: + if aug_tok.tok in self.PUNCTUATION: + return False + + ortho_context = self._params.ortho_context[aug_tok.type_no_sentperiod] + + # If the word is capitalized, occurs at least once with a + # lower case first letter, and never occurs with an upper case + # first letter sentence-internally, then it's a sentence starter. + if ( + aug_tok.first_upper + and (ortho_context & _ORTHO_LC) + and not (ortho_context & _ORTHO_MID_UC) + ): + return True + + # If the word is lower case, and either (a) we've seen it used + # with upper case, or (b) we've never seen it used + # sentence-initially with lower case, then it's not a sentence + # starter. + if aug_tok.first_lower and ( + (ortho_context & _ORTHO_UC) or not (ortho_context & _ORTHO_BEG_LC) + ): + return False + + # Otherwise, we're not sure. + return 'unknown' + + +DEBUG_DECISION_FMT = '''Text: %(text)r (at offset %(period_index)d) +Sentence break? %(break_decision)s (%(reason)s) +Collocation? %(collocation)s +%(type1)r: + known abbreviation: %(type1_in_abbrs)s + is initial: %(type1_is_initial)s +%(type2)r: + known sentence starter: %(type2_is_sent_starter)s + orthographic heuristic suggests is a sentence starter? %(type2_ortho_heuristic)s + orthographic contexts in training: %(type2_ortho_contexts)s +''' + + +def format_debug_decision(d): + return DEBUG_DECISION_FMT % d + + +def demo(text, tok_cls=PunktSentenceTokenizer, train_cls=PunktTrainer): + """Builds a punkt model and applies it to the same text""" + cleanup = ( + lambda s: re.compile(r'(?:\r|^\s+)', re.MULTILINE).sub('', s).replace('\n', ' ') + ) + trainer = train_cls() + trainer.INCLUDE_ALL_COLLOCS = True + trainer.train(text) + sbd = tok_cls(trainer.get_params()) + for l in sbd.sentences_from_text(text): + print(cleanup(l)) diff --git a/venv.bak/lib/python3.7/site-packages/nltk/tokenize/regexp.py b/venv.bak/lib/python3.7/site-packages/nltk/tokenize/regexp.py new file mode 100644 index 0000000..9f7a1ee --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/tokenize/regexp.py @@ -0,0 +1,223 @@ +# Natural Language Toolkit: Tokenizers +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Edward Loper +# Steven Bird +# Trevor Cohn +# URL: +# For license information, see LICENSE.TXT + +r""" +Regular-Expression Tokenizers + +A ``RegexpTokenizer`` splits a string into substrings using a regular expression. +For example, the following tokenizer forms tokens out of alphabetic sequences, +money expressions, and any other non-whitespace sequences: + + >>> from nltk.tokenize import RegexpTokenizer + >>> s = "Good muffins cost $3.88\nin New York. Please buy me\ntwo of them.\n\nThanks." + >>> tokenizer = RegexpTokenizer('\w+|\$[\d\.]+|\S+') + >>> tokenizer.tokenize(s) + ['Good', 'muffins', 'cost', '$3.88', 'in', 'New', 'York', '.', + 'Please', 'buy', 'me', 'two', 'of', 'them', '.', 'Thanks', '.'] + +A ``RegexpTokenizer`` can use its regexp to match delimiters instead: + + >>> tokenizer = RegexpTokenizer('\s+', gaps=True) + >>> tokenizer.tokenize(s) + ['Good', 'muffins', 'cost', '$3.88', 'in', 'New', 'York.', + 'Please', 'buy', 'me', 'two', 'of', 'them.', 'Thanks.'] + +Note that empty tokens are not returned when the delimiter appears at +the start or end of the string. + +The material between the tokens is discarded. For example, +the following tokenizer selects just the capitalized words: + + >>> capword_tokenizer = RegexpTokenizer('[A-Z]\w+') + >>> capword_tokenizer.tokenize(s) + ['Good', 'New', 'York', 'Please', 'Thanks'] + +This module contains several subclasses of ``RegexpTokenizer`` +that use pre-defined regular expressions. + + >>> from nltk.tokenize import BlanklineTokenizer + >>> # Uses '\s*\n\s*\n\s*': + >>> BlanklineTokenizer().tokenize(s) + ['Good muffins cost $3.88\nin New York. Please buy me\ntwo of them.', + 'Thanks.'] + +All of the regular expression tokenizers are also available as functions: + + >>> from nltk.tokenize import regexp_tokenize, wordpunct_tokenize, blankline_tokenize + >>> regexp_tokenize(s, pattern='\w+|\$[\d\.]+|\S+') + ['Good', 'muffins', 'cost', '$3.88', 'in', 'New', 'York', '.', + 'Please', 'buy', 'me', 'two', 'of', 'them', '.', 'Thanks', '.'] + >>> wordpunct_tokenize(s) + ['Good', 'muffins', 'cost', '$', '3', '.', '88', 'in', 'New', 'York', + '.', 'Please', 'buy', 'me', 'two', 'of', 'them', '.', 'Thanks', '.'] + >>> blankline_tokenize(s) + ['Good muffins cost $3.88\nin New York. Please buy me\ntwo of them.', 'Thanks.'] + +Caution: The function ``regexp_tokenize()`` takes the text as its +first argument, and the regular expression pattern as its second +argument. This differs from the conventions used by Python's +``re`` functions, where the pattern is always the first argument. +(This is for consistency with the other NLTK tokenizers.) +""" +from __future__ import unicode_literals + +import re + +from nltk.tokenize.api import TokenizerI +from nltk.tokenize.util import regexp_span_tokenize +from nltk.compat import python_2_unicode_compatible + + +@python_2_unicode_compatible +class RegexpTokenizer(TokenizerI): + """ + A tokenizer that splits a string using a regular expression, which + matches either the tokens or the separators between tokens. + + >>> tokenizer = RegexpTokenizer('\w+|\$[\d\.]+|\S+') + + :type pattern: str + :param pattern: The pattern used to build this tokenizer. + (This pattern must not contain capturing parentheses; + Use non-capturing parentheses, e.g. (?:...), instead) + :type gaps: bool + :param gaps: True if this tokenizer's pattern should be used + to find separators between tokens; False if this + tokenizer's pattern should be used to find the tokens + themselves. + :type discard_empty: bool + :param discard_empty: True if any empty tokens `''` + generated by the tokenizer should be discarded. Empty + tokens can only be generated if `_gaps == True`. + :type flags: int + :param flags: The regexp flags used to compile this + tokenizer's pattern. By default, the following flags are + used: `re.UNICODE | re.MULTILINE | re.DOTALL`. + + """ + + def __init__( + self, + pattern, + gaps=False, + discard_empty=True, + flags=re.UNICODE | re.MULTILINE | re.DOTALL, + ): + # If they gave us a regexp object, extract the pattern. + pattern = getattr(pattern, 'pattern', pattern) + + self._pattern = pattern + self._gaps = gaps + self._discard_empty = discard_empty + self._flags = flags + self._regexp = None + + def _check_regexp(self): + if self._regexp is None: + self._regexp = re.compile(self._pattern, self._flags) + + def tokenize(self, text): + self._check_regexp() + # If our regexp matches gaps, use re.split: + if self._gaps: + if self._discard_empty: + return [tok for tok in self._regexp.split(text) if tok] + else: + return self._regexp.split(text) + + # If our regexp matches tokens, use re.findall: + else: + return self._regexp.findall(text) + + def span_tokenize(self, text): + self._check_regexp() + + if self._gaps: + for left, right in regexp_span_tokenize(text, self._regexp): + if not (self._discard_empty and left == right): + yield left, right + else: + for m in re.finditer(self._regexp, text): + yield m.span() + + def __repr__(self): + return '%s(pattern=%r, gaps=%r, discard_empty=%r, flags=%r)' % ( + self.__class__.__name__, + self._pattern, + self._gaps, + self._discard_empty, + self._flags, + ) + + +class WhitespaceTokenizer(RegexpTokenizer): + r""" + Tokenize a string on whitespace (space, tab, newline). + In general, users should use the string ``split()`` method instead. + + >>> from nltk.tokenize import WhitespaceTokenizer + >>> s = "Good muffins cost $3.88\nin New York. Please buy me\ntwo of them.\n\nThanks." + >>> WhitespaceTokenizer().tokenize(s) + ['Good', 'muffins', 'cost', '$3.88', 'in', 'New', 'York.', + 'Please', 'buy', 'me', 'two', 'of', 'them.', 'Thanks.'] + """ + + def __init__(self): + RegexpTokenizer.__init__(self, r'\s+', gaps=True) + + +class BlanklineTokenizer(RegexpTokenizer): + """ + Tokenize a string, treating any sequence of blank lines as a delimiter. + Blank lines are defined as lines containing no characters, except for + space or tab characters. + """ + + def __init__(self): + RegexpTokenizer.__init__(self, r'\s*\n\s*\n\s*', gaps=True) + + +class WordPunctTokenizer(RegexpTokenizer): + """ + Tokenize a text into a sequence of alphabetic and + non-alphabetic characters, using the regexp ``\w+|[^\w\s]+``. + + >>> from nltk.tokenize import WordPunctTokenizer + >>> s = "Good muffins cost $3.88\\nin New York. Please buy me\\ntwo of them.\\n\\nThanks." + >>> WordPunctTokenizer().tokenize(s) + ['Good', 'muffins', 'cost', '$', '3', '.', '88', 'in', 'New', 'York', + '.', 'Please', 'buy', 'me', 'two', 'of', 'them', '.', 'Thanks', '.'] + """ + + def __init__(self): + RegexpTokenizer.__init__(self, r'\w+|[^\w\s]+') + + +###################################################################### +# { Tokenization Functions +###################################################################### + + +def regexp_tokenize( + text, + pattern, + gaps=False, + discard_empty=True, + flags=re.UNICODE | re.MULTILINE | re.DOTALL, +): + """ + Return a tokenized copy of *text*. See :class:`.RegexpTokenizer` + for descriptions of the arguments. + """ + tokenizer = RegexpTokenizer(pattern, gaps, discard_empty, flags) + return tokenizer.tokenize(text) + + +blankline_tokenize = BlanklineTokenizer().tokenize +wordpunct_tokenize = WordPunctTokenizer().tokenize diff --git a/venv.bak/lib/python3.7/site-packages/nltk/tokenize/repp.py b/venv.bak/lib/python3.7/site-packages/nltk/tokenize/repp.py new file mode 100644 index 0000000..2cf7a50 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/tokenize/repp.py @@ -0,0 +1,155 @@ +# -*- coding: utf-8 -*- +# Natural Language Toolkit: Interface to the Repp Tokenizer +# +# Copyright (C) 2001-2015 NLTK Project +# Authors: Rebecca Dridan and Stephan Oepen +# Contributors: Liling Tan +# +# URL: +# For license information, see LICENSE.TXT + +from __future__ import unicode_literals, print_function + +import os +import re +import sys +import subprocess +import tempfile + +from six import text_type + +from nltk.data import ZipFilePathPointer +from nltk.internals import find_dir + +from nltk.tokenize.api import TokenizerI + + +class ReppTokenizer(TokenizerI): + """ + A class for word tokenization using the REPP parser described in + Rebecca Dridan and Stephan Oepen (2012) Tokenization: Returning to a + Long Solved Problem - A Survey, Contrastive Experiment, Recommendations, + and Toolkit. In ACL. http://anthology.aclweb.org/P/P12/P12-2.pdf#page=406 + + >>> sents = ['Tokenization is widely regarded as a solved problem due to the high accuracy that rulebased tokenizers achieve.' , + ... 'But rule-based tokenizers are hard to maintain and their rules language specific.' , + ... 'We evaluated our method on three languages and obtained error rates of 0.27% (English), 0.35% (Dutch) and 0.76% (Italian) for our best models.' + ... ] + >>> tokenizer = ReppTokenizer('/home/alvas/repp/') # doctest: +SKIP + >>> for sent in sents: # doctest: +SKIP + ... tokenizer.tokenize(sent) # doctest: +SKIP + ... + (u'Tokenization', u'is', u'widely', u'regarded', u'as', u'a', u'solved', u'problem', u'due', u'to', u'the', u'high', u'accuracy', u'that', u'rulebased', u'tokenizers', u'achieve', u'.') + (u'But', u'rule-based', u'tokenizers', u'are', u'hard', u'to', u'maintain', u'and', u'their', u'rules', u'language', u'specific', u'.') + (u'We', u'evaluated', u'our', u'method', u'on', u'three', u'languages', u'and', u'obtained', u'error', u'rates', u'of', u'0.27', u'%', u'(', u'English', u')', u',', u'0.35', u'%', u'(', u'Dutch', u')', u'and', u'0.76', u'%', u'(', u'Italian', u')', u'for', u'our', u'best', u'models', u'.') + + >>> for sent in tokenizer.tokenize_sents(sents): # doctest: +SKIP + ... print sent # doctest: +SKIP + ... + (u'Tokenization', u'is', u'widely', u'regarded', u'as', u'a', u'solved', u'problem', u'due', u'to', u'the', u'high', u'accuracy', u'that', u'rulebased', u'tokenizers', u'achieve', u'.') + (u'But', u'rule-based', u'tokenizers', u'are', u'hard', u'to', u'maintain', u'and', u'their', u'rules', u'language', u'specific', u'.') + (u'We', u'evaluated', u'our', u'method', u'on', u'three', u'languages', u'and', u'obtained', u'error', u'rates', u'of', u'0.27', u'%', u'(', u'English', u')', u',', u'0.35', u'%', u'(', u'Dutch', u')', u'and', u'0.76', u'%', u'(', u'Italian', u')', u'for', u'our', u'best', u'models', u'.') + >>> for sent in tokenizer.tokenize_sents(sents, keep_token_positions=True): # doctest: +SKIP + ... print sent # doctest: +SKIP + ... + [(u'Tokenization', 0, 12), (u'is', 13, 15), (u'widely', 16, 22), (u'regarded', 23, 31), (u'as', 32, 34), (u'a', 35, 36), (u'solved', 37, 43), (u'problem', 44, 51), (u'due', 52, 55), (u'to', 56, 58), (u'the', 59, 62), (u'high', 63, 67), (u'accuracy', 68, 76), (u'that', 77, 81), (u'rulebased', 82, 91), (u'tokenizers', 92, 102), (u'achieve', 103, 110), (u'.', 110, 111)] + [(u'But', 0, 3), (u'rule-based', 4, 14), (u'tokenizers', 15, 25), (u'are', 26, 29), (u'hard', 30, 34), (u'to', 35, 37), (u'maintain', 38, 46), (u'and', 47, 50), (u'their', 51, 56), (u'rules', 57, 62), (u'language', 63, 71), (u'specific', 72, 80), (u'.', 80, 81)] + [(u'We', 0, 2), (u'evaluated', 3, 12), (u'our', 13, 16), (u'method', 17, 23), (u'on', 24, 26), (u'three', 27, 32), (u'languages', 33, 42), (u'and', 43, 46), (u'obtained', 47, 55), (u'error', 56, 61), (u'rates', 62, 67), (u'of', 68, 70), (u'0.27', 71, 75), (u'%', 75, 76), (u'(', 77, 78), (u'English', 78, 85), (u')', 85, 86), (u',', 86, 87), (u'0.35', 88, 92), (u'%', 92, 93), (u'(', 94, 95), (u'Dutch', 95, 100), (u')', 100, 101), (u'and', 102, 105), (u'0.76', 106, 110), (u'%', 110, 111), (u'(', 112, 113), (u'Italian', 113, 120), (u')', 120, 121), (u'for', 122, 125), (u'our', 126, 129), (u'best', 130, 134), (u'models', 135, 141), (u'.', 141, 142)] + """ + + def __init__(self, repp_dir, encoding='utf8'): + self.repp_dir = self.find_repptokenizer(repp_dir) + # Set a directory to store the temporary files. + self.working_dir = tempfile.gettempdir() + # Set an encoding for the input strings. + self.encoding = encoding + + def tokenize(self, sentence): + """ + Use Repp to tokenize a single sentence. + + :param sentence: A single sentence string. + :type sentence: str + :return: A tuple of tokens. + :rtype: tuple(str) + """ + return next(self.tokenize_sents([sentence])) + + def tokenize_sents(self, sentences, keep_token_positions=False): + """ + Tokenize multiple sentences using Repp. + + :param sentences: A list of sentence strings. + :type sentences: list(str) + :return: A list of tuples of tokens + :rtype: iter(tuple(str)) + """ + with tempfile.NamedTemporaryFile( + prefix='repp_input.', dir=self.working_dir, mode='w', delete=False + ) as input_file: + # Write sentences to temporary input file. + for sent in sentences: + input_file.write(text_type(sent) + '\n') + input_file.close() + # Generate command to run REPP. + cmd = self.generate_repp_command(input_file.name) + # Decode the stdout and strips the ending newline. + repp_output = self._execute(cmd).decode(self.encoding).strip() + for tokenized_sent in self.parse_repp_outputs(repp_output): + if not keep_token_positions: + # Removes token position information. + tokenized_sent, starts, ends = zip(*tokenized_sent) + yield tokenized_sent + + def generate_repp_command(self, inputfilename): + """ + This module generates the REPP command to be used at the terminal. + + :param inputfilename: path to the input file + :type inputfilename: str + """ + cmd = [self.repp_dir + '/src/repp'] + cmd += ['-c', self.repp_dir + '/erg/repp.set'] + cmd += ['--format', 'triple'] + cmd += [inputfilename] + return cmd + + @staticmethod + def _execute(cmd): + p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + stdout, stderr = p.communicate() + return stdout + + @staticmethod + def parse_repp_outputs(repp_output): + """ + This module parses the tri-tuple format that REPP outputs using the + "--format triple" option and returns an generator with tuple of string + tokens. + + :param repp_output: + :type repp_output: type + :return: an iterable of the tokenized sentences as tuples of strings + :rtype: iter(tuple) + """ + line_regex = re.compile('^\((\d+), (\d+), (.+)\)$', re.MULTILINE) + for section in repp_output.split('\n\n'): + words_with_positions = [ + (token, int(start), int(end)) + for start, end, token in line_regex.findall(section) + ] + words = tuple(t[2] for t in words_with_positions) + yield words_with_positions + + def find_repptokenizer(self, repp_dirname): + """ + A module to find REPP tokenizer binary and its *repp.set* config file. + """ + if os.path.exists(repp_dirname): # If a full path is given. + _repp_dir = repp_dirname + else: # Try to find path to REPP directory in environment variables. + _repp_dir = find_dir(repp_dirname, env_vars=('REPP_TOKENIZER',)) + # Checks for the REPP binary and erg/repp.set config file. + assert os.path.exists(_repp_dir + '/src/repp') + assert os.path.exists(_repp_dir + '/erg/repp.set') + return _repp_dir diff --git a/venv.bak/lib/python3.7/site-packages/nltk/tokenize/sexpr.py b/venv.bak/lib/python3.7/site-packages/nltk/tokenize/sexpr.py new file mode 100644 index 0000000..e2a1dd6 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/tokenize/sexpr.py @@ -0,0 +1,140 @@ +# Natural Language Toolkit: Tokenizers +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Yoav Goldberg +# Steven Bird (minor edits) +# URL: +# For license information, see LICENSE.TXT + +""" +S-Expression Tokenizer + +``SExprTokenizer`` is used to find parenthesized expressions in a +string. In particular, it divides a string into a sequence of +substrings that are either parenthesized expressions (including any +nested parenthesized expressions), or other whitespace-separated +tokens. + + >>> from nltk.tokenize import SExprTokenizer + >>> SExprTokenizer().tokenize('(a b (c d)) e f (g)') + ['(a b (c d))', 'e', 'f', '(g)'] + +By default, `SExprTokenizer` will raise a ``ValueError`` exception if +used to tokenize an expression with non-matching parentheses: + + >>> SExprTokenizer().tokenize('c) d) e (f (g') + Traceback (most recent call last): + ... + ValueError: Un-matched close paren at char 1 + +The ``strict`` argument can be set to False to allow for +non-matching parentheses. Any unmatched close parentheses will be +listed as their own s-expression; and the last partial sexpr with +unmatched open parentheses will be listed as its own sexpr: + + >>> SExprTokenizer(strict=False).tokenize('c) d) e (f (g') + ['c', ')', 'd', ')', 'e', '(f (g'] + +The characters used for open and close parentheses may be customized +using the ``parens`` argument to the `SExprTokenizer` constructor: + + >>> SExprTokenizer(parens='{}').tokenize('{a b {c d}} e f {g}') + ['{a b {c d}}', 'e', 'f', '{g}'] + +The s-expression tokenizer is also available as a function: + + >>> from nltk.tokenize import sexpr_tokenize + >>> sexpr_tokenize('(a b (c d)) e f (g)') + ['(a b (c d))', 'e', 'f', '(g)'] + +""" + +import re + +from nltk.tokenize.api import TokenizerI + + +class SExprTokenizer(TokenizerI): + """ + A tokenizer that divides strings into s-expressions. + An s-expresion can be either: + + - a parenthesized expression, including any nested parenthesized + expressions, or + - a sequence of non-whitespace non-parenthesis characters. + + For example, the string ``(a (b c)) d e (f)`` consists of four + s-expressions: ``(a (b c))``, ``d``, ``e``, and ``(f)``. + + By default, the characters ``(`` and ``)`` are treated as open and + close parentheses, but alternative strings may be specified. + + :param parens: A two-element sequence specifying the open and close parentheses + that should be used to find sexprs. This will typically be either a + two-character string, or a list of two strings. + :type parens: str or list + :param strict: If true, then raise an exception when tokenizing an ill-formed sexpr. + """ + + def __init__(self, parens='()', strict=True): + if len(parens) != 2: + raise ValueError('parens must contain exactly two strings') + self._strict = strict + self._open_paren = parens[0] + self._close_paren = parens[1] + self._paren_regexp = re.compile( + '%s|%s' % (re.escape(parens[0]), re.escape(parens[1])) + ) + + def tokenize(self, text): + """ + Return a list of s-expressions extracted from *text*. + For example: + + >>> SExprTokenizer().tokenize('(a b (c d)) e f (g)') + ['(a b (c d))', 'e', 'f', '(g)'] + + All parentheses are assumed to mark s-expressions. + (No special processing is done to exclude parentheses that occur + inside strings, or following backslash characters.) + + If the given expression contains non-matching parentheses, + then the behavior of the tokenizer depends on the ``strict`` + parameter to the constructor. If ``strict`` is ``True``, then + raise a ``ValueError``. If ``strict`` is ``False``, then any + unmatched close parentheses will be listed as their own + s-expression; and the last partial s-expression with unmatched open + parentheses will be listed as its own s-expression: + + >>> SExprTokenizer(strict=False).tokenize('c) d) e (f (g') + ['c', ')', 'd', ')', 'e', '(f (g'] + + :param text: the string to be tokenized + :type text: str or iter(str) + :rtype: iter(str) + """ + result = [] + pos = 0 + depth = 0 + for m in self._paren_regexp.finditer(text): + paren = m.group() + if depth == 0: + result += text[pos : m.start()].split() + pos = m.start() + if paren == self._open_paren: + depth += 1 + if paren == self._close_paren: + if self._strict and depth == 0: + raise ValueError('Un-matched close paren at char %d' % m.start()) + depth = max(0, depth - 1) + if depth == 0: + result.append(text[pos : m.end()]) + pos = m.end() + if self._strict and depth > 0: + raise ValueError('Un-matched open paren at char %d' % pos) + if pos < len(text): + result.append(text[pos:]) + return result + + +sexpr_tokenize = SExprTokenizer().tokenize diff --git a/venv.bak/lib/python3.7/site-packages/nltk/tokenize/simple.py b/venv.bak/lib/python3.7/site-packages/nltk/tokenize/simple.py new file mode 100644 index 0000000..c467678 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/tokenize/simple.py @@ -0,0 +1,140 @@ +# Natural Language Toolkit: Simple Tokenizers +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Edward Loper +# Steven Bird +# URL: +# For license information, see LICENSE.TXT + +r""" +Simple Tokenizers + +These tokenizers divide strings into substrings using the string +``split()`` method. +When tokenizing using a particular delimiter string, use +the string ``split()`` method directly, as this is more efficient. + +The simple tokenizers are *not* available as separate functions; +instead, you should just use the string ``split()`` method directly: + + >>> s = "Good muffins cost $3.88\nin New York. Please buy me\ntwo of them.\n\nThanks." + >>> s.split() + ['Good', 'muffins', 'cost', '$3.88', 'in', 'New', 'York.', + 'Please', 'buy', 'me', 'two', 'of', 'them.', 'Thanks.'] + >>> s.split(' ') + ['Good', 'muffins', 'cost', '$3.88\nin', 'New', 'York.', '', + 'Please', 'buy', 'me\ntwo', 'of', 'them.\n\nThanks.'] + >>> s.split('\n') + ['Good muffins cost $3.88', 'in New York. Please buy me', + 'two of them.', '', 'Thanks.'] + +The simple tokenizers are mainly useful because they follow the +standard ``TokenizerI`` interface, and so can be used with any code +that expects a tokenizer. For example, these tokenizers can be used +to specify the tokenization conventions when building a `CorpusReader`. + +""" +from __future__ import unicode_literals +from nltk.tokenize.api import TokenizerI, StringTokenizer +from nltk.tokenize.util import string_span_tokenize, regexp_span_tokenize + + +class SpaceTokenizer(StringTokenizer): + r"""Tokenize a string using the space character as a delimiter, + which is the same as ``s.split(' ')``. + + >>> from nltk.tokenize import SpaceTokenizer + >>> s = "Good muffins cost $3.88\nin New York. Please buy me\ntwo of them.\n\nThanks." + >>> SpaceTokenizer().tokenize(s) + ['Good', 'muffins', 'cost', '$3.88\nin', 'New', 'York.', '', + 'Please', 'buy', 'me\ntwo', 'of', 'them.\n\nThanks.'] + """ + + _string = ' ' + + +class TabTokenizer(StringTokenizer): + r"""Tokenize a string use the tab character as a delimiter, + the same as ``s.split('\t')``. + + >>> from nltk.tokenize import TabTokenizer + >>> TabTokenizer().tokenize('a\tb c\n\t d') + ['a', 'b c\n', ' d'] + """ + + _string = '\t' + + +class CharTokenizer(StringTokenizer): + """Tokenize a string into individual characters. If this functionality + is ever required directly, use ``for char in string``. + """ + + def tokenize(self, s): + return list(s) + + def span_tokenize(self, s): + for i, j in enumerate(range(1, len(s) + 1)): + yield i, j + + +class LineTokenizer(TokenizerI): + r"""Tokenize a string into its lines, optionally discarding blank lines. + This is similar to ``s.split('\n')``. + + >>> from nltk.tokenize import LineTokenizer + >>> s = "Good muffins cost $3.88\nin New York. Please buy me\ntwo of them.\n\nThanks." + >>> LineTokenizer(blanklines='keep').tokenize(s) + ['Good muffins cost $3.88', 'in New York. Please buy me', + 'two of them.', '', 'Thanks.'] + >>> # same as [l for l in s.split('\n') if l.strip()]: + >>> LineTokenizer(blanklines='discard').tokenize(s) + ['Good muffins cost $3.88', 'in New York. Please buy me', + 'two of them.', 'Thanks.'] + + :param blanklines: Indicates how blank lines should be handled. Valid values are: + + - ``discard``: strip blank lines out of the token list before returning it. + A line is considered blank if it contains only whitespace characters. + - ``keep``: leave all blank lines in the token list. + - ``discard-eof``: if the string ends with a newline, then do not generate + a corresponding token ``''`` after that newline. + """ + + def __init__(self, blanklines='discard'): + valid_blanklines = ('discard', 'keep', 'discard-eof') + if blanklines not in valid_blanklines: + raise ValueError( + 'Blank lines must be one of: %s' % ' '.join(valid_blanklines) + ) + + self._blanklines = blanklines + + def tokenize(self, s): + lines = s.splitlines() + # If requested, strip off blank lines. + if self._blanklines == 'discard': + lines = [l for l in lines if l.rstrip()] + elif self._blanklines == 'discard-eof': + if lines and not lines[-1].strip(): + lines.pop() + return lines + + # discard-eof not implemented + def span_tokenize(self, s): + if self._blanklines == 'keep': + for span in string_span_tokenize(s, r'\n'): + yield span + else: + for span in regexp_span_tokenize(s, r'\n(\s+\n)*'): + yield span + + +###################################################################### +# { Tokenization Functions +###################################################################### +# XXX: it is stated in module docs that there is no function versions + + +def line_tokenize(text, blanklines='discard'): + return LineTokenizer(blanklines).tokenize(text) diff --git a/venv.bak/lib/python3.7/site-packages/nltk/tokenize/sonority_sequencing.py b/venv.bak/lib/python3.7/site-packages/nltk/tokenize/sonority_sequencing.py new file mode 100644 index 0000000..1ac71b4 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/tokenize/sonority_sequencing.py @@ -0,0 +1,187 @@ +# Natural Language Toolkit: Tokenizers +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Christopher Hench +# Alex Estes +# URL: +# For license information, see LICENSE.TXT + +""" +The Sonority Sequencing Principle (SSP) is a language agnostic algorithm proposed +by Otto Jesperson in 1904. The sonorous quality of a phoneme is judged by the +openness of the lips. Syllable breaks occur before troughs in sonority. For more +on the SSP see Selkirk (1984). + +The default implementation uses the English alphabet, but the `sonority_hiearchy` +can be modified to IPA or any other alphabet for the use-case. The SSP is a +universal syllabification algorithm, but that does not mean it performs equally +across languages. Bartlett et al. (2009) is a good benchmark for English accuracy +if utilizing IPA (pg. 311). + +Importantly, if a custom hiearchy is supplied and vowels span across more than +one level, they should be given separately to the `vowels` class attribute. + +References: +- Otto Jespersen. 1904. Lehrbuch der Phonetik. + Leipzig, Teubner. Chapter 13, Silbe, pp. 185-203. +- Elisabeth Selkirk. 1984. On the major class features and syllable theory. + In Aronoff & Oehrle (eds.) Language Sound Structure: Studies in Phonology. + Cambridge, MIT Press. pp. 107-136. +- Susan Bartlett, et al. 2009. On the Syllabification of Phonemes. + In HLT-NAACL. pp. 308-316. +""" + +from __future__ import unicode_literals +import warnings + +import re +from string import punctuation + +from nltk.tokenize.api import TokenizerI +from nltk.util import ngrams + + +class SyllableTokenizer(TokenizerI): + """ + Syllabifies words based on the Sonority Sequencing Principle (SSP). + + >>> from nltk import word_tokenize + >>> SSP = SyllableTokenizer() + >>> SSP.tokenize('justification') + ['jus', 'ti', 'fi', 'ca', 'tion'] + >>> text = "This is a foobar-like sentence." + >>> [SSP.tokenize(token) for token in word_tokenize(text)] + [['This'], ['is'], ['a'], ['foo', 'bar', '-', 'li', 'ke'], ['sen', 'ten', 'ce'], ['.']] + """ + + def __init__(self, lang='en', sonority_hierarchy=False): + """ + :param lang: Language parameter, default is English, 'en' + :type lang: str + :param sonority_hierarchy: Sonority hierarchy according to the + Sonority Sequencing Principle. + :type sonority_hierarchy: list(str) + """ + # Sonority hierarchy should be provided in descending order. + # If vowels are spread across multiple levels, they should be + # passed assigned self.vowels var together, otherwise should be + # placed in first index of hierarchy. + if not sonority_hierarchy and lang == 'en': + sonority_hierarchy = ['aeiouy', # vowels. + 'lmnrw', # nasals. + 'zvsf', # fricatives. + 'bcdgtkpqxhj' # stops. + ] + + self.vowels = sonority_hierarchy[0] + self.phoneme_map = {} + for i, level in enumerate(sonority_hierarchy): + for c in level: + sonority_level = len(sonority_hierarchy) - i + self.phoneme_map[c] = sonority_level + self.phoneme_map[c.upper()] = sonority_level + + def assign_values(self, token): + """ + Assigns each phoneme its value from the sonority hierarchy. + Note: Sentence/text has to be tokenized first. + + :param token: Single word or token + :type token: str + :return: List of tuples, first element is character/phoneme and + second is the soronity value. + :rtype: list(tuple(str, int)) + """ + syllables_values = [] + for c in token: + try: + syllables_values.append((c, self.phoneme_map[c])) + except KeyError: + if c not in punctuation: + warnings.warn("Character not defined in sonority_hierarchy," + " assigning as vowel: '{}'".format(c)) + syllables_values.append((c, max(self.phoneme_map.values()))) + self.vowels += c + else: # If it's a punctuation, assing -1. + syllables_values.append((c, -1)) + return syllables_values + + def validate_syllables(self, syllable_list): + """ + Ensures each syllable has at least one vowel. + If the following syllable doesn't have vowel, add it to the current one. + + :param syllable_list: Single word or token broken up into syllables. + :type syllable_list: list(str) + :return: Single word or token broken up into syllables + (with added syllables if necessary) + :rtype: list(str) + """ + valid_syllables = [] + front = "" + for i, syllable in enumerate(syllable_list): + if syllable in punctuation: + valid_syllables.append(syllable) + continue + if not re.search('|'.join(self.vowels), syllable): + if len(valid_syllables) == 0: + front += syllable + else: + valid_syllables = valid_syllables[:-1] + [valid_syllables[-1] + syllable] + else: + if len(valid_syllables) == 0: + valid_syllables.append(front + syllable) + else: + valid_syllables.append(syllable) + + return valid_syllables + + def tokenize(self, token): + """ + Apply the SSP to return a list of syllables. + Note: Sentence/text has to be tokenized first. + + :param token: Single word or token + :type token: str + :return syllable_list: Single word or token broken up into syllables. + :rtype: list(str) + """ + # assign values from hierarchy + syllables_values = self.assign_values(token) + + # if only one vowel return word + if sum(token.count(x) for x in self.vowels) <= 1: + return [token] + + syllable_list = [] + syllable = syllables_values[0][0] # start syllable with first phoneme + for trigram in ngrams(syllables_values, n=3): + phonemes, values = zip(*trigram) + # Sonority of previous, focal and following phoneme + prev_value, focal_value, next_value = values + # Focal phoneme. + focal_phoneme = phonemes[1] + + # These cases trigger syllable break. + if focal_value == -1: # If it's a punctuation, just break. + syllable_list.append(syllable) + syllable_list.append(focal_phoneme) + syllable = "" + elif prev_value >= focal_value == next_value: + syllable += focal_phoneme + syllable_list.append(syllable) + syllable = "" + + elif prev_value > focal_value < next_value: + syllable_list.append(syllable) + syllable = "" + syllable += focal_phoneme + + # no syllable break + else: + syllable += focal_phoneme + + syllable += syllables_values[-1][0] # append last phoneme + syllable_list.append(syllable) + + return self.validate_syllables(syllable_list) diff --git a/venv.bak/lib/python3.7/site-packages/nltk/tokenize/stanford.py b/venv.bak/lib/python3.7/site-packages/nltk/tokenize/stanford.py new file mode 100644 index 0000000..93fb219 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/tokenize/stanford.py @@ -0,0 +1,133 @@ +# -*- coding: utf-8 -*- +# Natural Language Toolkit: Interface to the Stanford Tokenizer +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Steven Xu +# +# URL: +# For license information, see LICENSE.TXT + +from __future__ import unicode_literals, print_function + +import tempfile +import os +import json +from subprocess import PIPE +import warnings + +from six import text_type + +from nltk.internals import find_jar, config_java, java, _java_options +from nltk.tokenize.api import TokenizerI +from nltk.parse.corenlp import CoreNLPParser + +_stanford_url = 'https://nlp.stanford.edu/software/tokenizer.shtml' + + +class StanfordTokenizer(TokenizerI): + r""" + Interface to the Stanford Tokenizer + + >>> from nltk.tokenize.stanford import StanfordTokenizer + >>> s = "Good muffins cost $3.88\nin New York. Please buy me\ntwo of them.\nThanks." + >>> StanfordTokenizer().tokenize(s) + ['Good', 'muffins', 'cost', '$', '3.88', 'in', 'New', 'York', '.', 'Please', 'buy', 'me', 'two', 'of', 'them', '.', 'Thanks', '.'] + >>> s = "The colour of the wall is blue." + >>> StanfordTokenizer(options={"americanize": True}).tokenize(s) + ['The', 'color', 'of', 'the', 'wall', 'is', 'blue', '.'] + """ + + _JAR = 'stanford-postagger.jar' + + def __init__( + self, + path_to_jar=None, + encoding='utf8', + options=None, + verbose=False, + java_options='-mx1000m', + ): + # Raise deprecation warning. + warnings.warn( + str( + "\nThe StanfordTokenizer will " + "be deprecated in version 3.2.5.\n" + "Please use \033[91mnltk.parse.corenlp.CoreNLPParser\033[0m instead.'" + ), + DeprecationWarning, + stacklevel=2, + ) + + self._stanford_jar = find_jar( + self._JAR, + path_to_jar, + env_vars=('STANFORD_POSTAGGER',), + searchpath=(), + url=_stanford_url, + verbose=verbose, + ) + + self._encoding = encoding + self.java_options = java_options + + options = {} if options is None else options + self._options_cmd = ','.join( + '{0}={1}'.format(key, val) for key, val in options.items() + ) + + @staticmethod + def _parse_tokenized_output(s): + return s.splitlines() + + def tokenize(self, s): + """ + Use stanford tokenizer's PTBTokenizer to tokenize multiple sentences. + """ + cmd = ['edu.stanford.nlp.process.PTBTokenizer'] + return self._parse_tokenized_output(self._execute(cmd, s)) + + def _execute(self, cmd, input_, verbose=False): + encoding = self._encoding + cmd.extend(['-charset', encoding]) + _options_cmd = self._options_cmd + if _options_cmd: + cmd.extend(['-options', self._options_cmd]) + + default_options = ' '.join(_java_options) + + # Configure java. + config_java(options=self.java_options, verbose=verbose) + + # Windows is incompatible with NamedTemporaryFile() without passing in delete=False. + with tempfile.NamedTemporaryFile(mode='wb', delete=False) as input_file: + # Write the actual sentences to the temporary input file + if isinstance(input_, text_type) and encoding: + input_ = input_.encode(encoding) + input_file.write(input_) + input_file.flush() + + cmd.append(input_file.name) + + # Run the tagger and get the output. + stdout, stderr = java( + cmd, classpath=self._stanford_jar, stdout=PIPE, stderr=PIPE + ) + stdout = stdout.decode(encoding) + + os.unlink(input_file.name) + + # Return java configurations to their default values. + config_java(options=default_options, verbose=False) + + return stdout + + +def setup_module(module): + from nose import SkipTest + + try: + StanfordTokenizer() + except LookupError: + raise SkipTest( + 'doctests from nltk.tokenize.stanford are skipped because the stanford postagger jar doesn\'t exist' + ) diff --git a/venv.bak/lib/python3.7/site-packages/nltk/tokenize/stanford_segmenter.py b/venv.bak/lib/python3.7/site-packages/nltk/tokenize/stanford_segmenter.py new file mode 100644 index 0000000..858c4d8 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/tokenize/stanford_segmenter.py @@ -0,0 +1,314 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# Natural Language Toolkit: Interface to the Stanford Segmenter +# for Chinese and Arabic +# +# Copyright (C) 2001-2019 NLTK Project +# Author: 52nlp <52nlpcn@gmail.com> +# Casper Lehmann-Strøm +# Alex Constantin +# +# URL: +# For license information, see LICENSE.TXT + +from __future__ import unicode_literals, print_function + +import tempfile +import os +import json +import warnings +from subprocess import PIPE + +from six import text_type + +from nltk import compat +from nltk.internals import ( + find_jar, + find_file, + find_dir, + config_java, + java, + _java_options, +) +from nltk.tokenize.api import TokenizerI + + +_stanford_url = 'https://nlp.stanford.edu/software' + + +class StanfordSegmenter(TokenizerI): + """Interface to the Stanford Segmenter + + If stanford-segmenter version is older than 2016-10-31, then path_to_slf4j + should be provieded, for example:: + + seg = StanfordSegmenter(path_to_slf4j='/YOUR_PATH/slf4j-api.jar') + + >>> from nltk.tokenize.stanford_segmenter import StanfordSegmenter + >>> seg = StanfordSegmenter() + >>> seg.default_config('zh') + >>> sent = u'这是斯坦福中文分词器测试' + >>> print(seg.segment(sent)) + \u8fd9 \u662f \u65af\u5766\u798f \u4e2d\u6587 \u5206\u8bcd\u5668 \u6d4b\u8bd5 + + >>> seg.default_config('ar') + >>> sent = u'هذا هو تصنيف ستانفورد العربي للكلمات' + >>> print(seg.segment(sent.split())) + \u0647\u0630\u0627 \u0647\u0648 \u062a\u0635\u0646\u064a\u0641 \u0633\u062a\u0627\u0646\u0641\u0648\u0631\u062f \u0627\u0644\u0639\u0631\u0628\u064a \u0644 \u0627\u0644\u0643\u0644\u0645\u0627\u062a + + """ + + _JAR = 'stanford-segmenter.jar' + + def __init__( + self, + path_to_jar=None, + path_to_slf4j=None, + java_class=None, + path_to_model=None, + path_to_dict=None, + path_to_sihan_corpora_dict=None, + sihan_post_processing='false', + keep_whitespaces='false', + encoding='UTF-8', + options=None, + verbose=False, + java_options='-mx2g', + ): + # Raise deprecation warning. + warnings.simplefilter('always', DeprecationWarning) + warnings.warn( + str( + "\nThe StanfordTokenizer will " + "be deprecated in version 3.2.5.\n" + "Please use \033[91mnltk.parse.corenlp.CoreNLPTokenizer\033[0m instead.'" + ), + DeprecationWarning, + stacklevel=2, + ) + warnings.simplefilter('ignore', DeprecationWarning) + + stanford_segmenter = find_jar( + self._JAR, + path_to_jar, + env_vars=('STANFORD_SEGMENTER',), + searchpath=(), + url=_stanford_url, + verbose=verbose, + ) + if path_to_slf4j is not None: + slf4j = find_jar( + 'slf4j-api.jar', + path_to_slf4j, + env_vars=('SLF4J', 'STANFORD_SEGMENTER'), + searchpath=(), + url=_stanford_url, + verbose=verbose, + ) + else: + slf4j = None + + # This is passed to java as the -cp option, the old version of segmenter needs slf4j. + # The new version of stanford-segmenter-2016-10-31 doesn't need slf4j + self._stanford_jar = os.pathsep.join( + _ for _ in [stanford_segmenter, slf4j] if _ is not None + ) + + self._java_class = java_class + self._model = path_to_model + self._sihan_corpora_dict = path_to_sihan_corpora_dict + self._sihan_post_processing = sihan_post_processing + self._keep_whitespaces = keep_whitespaces + self._dict = path_to_dict + + self._encoding = encoding + self.java_options = java_options + options = {} if options is None else options + self._options_cmd = ','.join( + '{0}={1}'.format(key, json.dumps(val)) for key, val in options.items() + ) + + def default_config(self, lang): + """ + Attempt to intialize Stanford Word Segmenter for the specified language + using the STANFORD_SEGMENTER and STANFORD_MODELS environment variables + """ + + search_path = () + if os.environ.get('STANFORD_SEGMENTER'): + search_path = {os.path.join(os.environ.get('STANFORD_SEGMENTER'), 'data')} + + # init for Chinese-specific files + self._dict = None + self._sihan_corpora_dict = None + self._sihan_post_processing = 'false' + + if lang == 'ar': + self._java_class = ( + 'edu.stanford.nlp.international.arabic.process.ArabicSegmenter' + ) + model = 'arabic-segmenter-atb+bn+arztrain.ser.gz' + + elif lang == 'zh': + self._java_class = 'edu.stanford.nlp.ie.crf.CRFClassifier' + model = 'pku.gz' + self._sihan_post_processing = 'true' + + path_to_dict = 'dict-chris6.ser.gz' + try: + self._dict = find_file( + path_to_dict, + searchpath=search_path, + url=_stanford_url, + verbose=False, + env_vars=('STANFORD_MODELS',), + ) + except LookupError: + raise LookupError( + "Could not find '%s' (tried using env. " + "variables STANFORD_MODELS and /data/)" + % path_to_dict + ) + + sihan_dir = './data/' + try: + path_to_sihan_dir = find_dir( + sihan_dir, + url=_stanford_url, + verbose=False, + env_vars=('STANFORD_SEGMENTER',), + ) + self._sihan_corpora_dict = os.path.join(path_to_sihan_dir, sihan_dir) + except LookupError: + raise LookupError( + "Could not find '%s' (tried using the " + "STANFORD_SEGMENTER environment variable)" % sihan_dir + ) + else: + raise LookupError("Unsupported language {}".format(lang)) + + try: + self._model = find_file( + model, + searchpath=search_path, + url=_stanford_url, + verbose=False, + env_vars=('STANFORD_MODELS', 'STANFORD_SEGMENTER'), + ) + except LookupError: + raise LookupError( + "Could not find '%s' (tried using env. " + "variables STANFORD_MODELS and /data/)" % model + ) + + def tokenize(self, s): + super().tokenize(s) + + def segment_file(self, input_file_path): + """ + """ + cmd = [ + self._java_class, + '-loadClassifier', + self._model, + '-keepAllWhitespaces', + self._keep_whitespaces, + '-textFile', + input_file_path, + ] + if self._sihan_corpora_dict is not None: + cmd.extend( + [ + '-serDictionary', + self._dict, + '-sighanCorporaDict', + self._sihan_corpora_dict, + '-sighanPostProcessing', + self._sihan_post_processing, + ] + ) + + stdout = self._execute(cmd) + + return stdout + + def segment(self, tokens): + return self.segment_sents([tokens]) + + def segment_sents(self, sentences): + """ + """ + encoding = self._encoding + # Create a temporary input file + _input_fh, self._input_file_path = tempfile.mkstemp(text=True) + + # Write the actural sentences to the temporary input file + _input_fh = os.fdopen(_input_fh, 'wb') + _input = '\n'.join((' '.join(x) for x in sentences)) + if isinstance(_input, text_type) and encoding: + _input = _input.encode(encoding) + _input_fh.write(_input) + _input_fh.close() + + cmd = [ + self._java_class, + '-loadClassifier', + self._model, + '-keepAllWhitespaces', + self._keep_whitespaces, + '-textFile', + self._input_file_path, + ] + if self._sihan_corpora_dict is not None: + cmd.extend( + [ + '-serDictionary', + self._dict, + '-sighanCorporaDict', + self._sihan_corpora_dict, + '-sighanPostProcessing', + self._sihan_post_processing, + ] + ) + + stdout = self._execute(cmd) + + # Delete the temporary file + os.unlink(self._input_file_path) + + return stdout + + def _execute(self, cmd, verbose=False): + encoding = self._encoding + cmd.extend(['-inputEncoding', encoding]) + _options_cmd = self._options_cmd + if _options_cmd: + cmd.extend(['-options', self._options_cmd]) + + default_options = ' '.join(_java_options) + + # Configure java. + config_java(options=self.java_options, verbose=verbose) + + stdout, _stderr = java( + cmd, classpath=self._stanford_jar, stdout=PIPE, stderr=PIPE + ) + stdout = stdout.decode(encoding) + + # Return java configurations to their default values. + config_java(options=default_options, verbose=False) + + return stdout + + +def setup_module(module): + from nose import SkipTest + + try: + seg = StanfordSegmenter() + seg.default_config('ar') + seg.default_config('zh') + except LookupError as e: + raise SkipTest( + 'Tests for nltk.tokenize.stanford_segmenter skipped: %s' % str(e) + ) diff --git a/venv.bak/lib/python3.7/site-packages/nltk/tokenize/texttiling.py b/venv.bak/lib/python3.7/site-packages/nltk/tokenize/texttiling.py new file mode 100644 index 0000000..83da7bf --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/tokenize/texttiling.py @@ -0,0 +1,477 @@ +# Natural Language Toolkit: TextTiling +# +# Copyright (C) 2001-2019 NLTK Project +# Author: George Boutsioukis +# +# URL: +# For license information, see LICENSE.TXT + +import re +import math + +try: + import numpy +except ImportError: + pass + +from nltk.tokenize.api import TokenizerI + +BLOCK_COMPARISON, VOCABULARY_INTRODUCTION = 0, 1 +LC, HC = 0, 1 +DEFAULT_SMOOTHING = [0] + + +class TextTilingTokenizer(TokenizerI): + """Tokenize a document into topical sections using the TextTiling algorithm. + This algorithm detects subtopic shifts based on the analysis of lexical + co-occurrence patterns. + + The process starts by tokenizing the text into pseudosentences of + a fixed size w. Then, depending on the method used, similarity + scores are assigned at sentence gaps. The algorithm proceeds by + detecting the peak differences between these scores and marking + them as boundaries. The boundaries are normalized to the closest + paragraph break and the segmented text is returned. + + :param w: Pseudosentence size + :type w: int + :param k: Size (in sentences) of the block used in the block comparison method + :type k: int + :param similarity_method: The method used for determining similarity scores: + `BLOCK_COMPARISON` (default) or `VOCABULARY_INTRODUCTION`. + :type similarity_method: constant + :param stopwords: A list of stopwords that are filtered out (defaults to NLTK's stopwords corpus) + :type stopwords: list(str) + :param smoothing_method: The method used for smoothing the score plot: + `DEFAULT_SMOOTHING` (default) + :type smoothing_method: constant + :param smoothing_width: The width of the window used by the smoothing method + :type smoothing_width: int + :param smoothing_rounds: The number of smoothing passes + :type smoothing_rounds: int + :param cutoff_policy: The policy used to determine the number of boundaries: + `HC` (default) or `LC` + :type cutoff_policy: constant + + >>> from nltk.corpus import brown + >>> tt = TextTilingTokenizer(demo_mode=True) + >>> text = brown.raw()[:4000] + >>> s, ss, d, b = tt.tokenize(text) + >>> b + [0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0] + """ + + def __init__( + self, + w=20, + k=10, + similarity_method=BLOCK_COMPARISON, + stopwords=None, + smoothing_method=DEFAULT_SMOOTHING, + smoothing_width=2, + smoothing_rounds=1, + cutoff_policy=HC, + demo_mode=False, + ): + + if stopwords is None: + from nltk.corpus import stopwords + + stopwords = stopwords.words('english') + self.__dict__.update(locals()) + del self.__dict__['self'] + + def tokenize(self, text): + """Return a tokenized copy of *text*, where each "token" represents + a separate topic.""" + + lowercase_text = text.lower() + paragraph_breaks = self._mark_paragraph_breaks(text) + text_length = len(lowercase_text) + + # Tokenization step starts here + + # Remove punctuation + nopunct_text = ''.join( + c for c in lowercase_text if re.match("[a-z\-\' \n\t]", c) + ) + nopunct_par_breaks = self._mark_paragraph_breaks(nopunct_text) + + tokseqs = self._divide_to_tokensequences(nopunct_text) + + # The morphological stemming step mentioned in the TextTile + # paper is not implemented. A comment in the original C + # implementation states that it offers no benefit to the + # process. It might be interesting to test the existing + # stemmers though. + # words = _stem_words(words) + + # Filter stopwords + for ts in tokseqs: + ts.wrdindex_list = [ + wi for wi in ts.wrdindex_list if wi[0] not in self.stopwords + ] + + token_table = self._create_token_table(tokseqs, nopunct_par_breaks) + # End of the Tokenization step + + # Lexical score determination + if self.similarity_method == BLOCK_COMPARISON: + gap_scores = self._block_comparison(tokseqs, token_table) + elif self.similarity_method == VOCABULARY_INTRODUCTION: + raise NotImplementedError("Vocabulary introduction not implemented") + else: + raise ValueError( + "Similarity method {} not recognized".format(self.similarity_method) + ) + + if self.smoothing_method == DEFAULT_SMOOTHING: + smooth_scores = self._smooth_scores(gap_scores) + else: + raise ValueError( + "Smoothing method {} not recognized".format(self.smoothing_method) + ) + # End of Lexical score Determination + + # Boundary identification + depth_scores = self._depth_scores(smooth_scores) + segment_boundaries = self._identify_boundaries(depth_scores) + + normalized_boundaries = self._normalize_boundaries( + text, segment_boundaries, paragraph_breaks + ) + # End of Boundary Identification + segmented_text = [] + prevb = 0 + + for b in normalized_boundaries: + if b == 0: + continue + segmented_text.append(text[prevb:b]) + prevb = b + + if prevb < text_length: # append any text that may be remaining + segmented_text.append(text[prevb:]) + + if not segmented_text: + segmented_text = [text] + + if self.demo_mode: + return gap_scores, smooth_scores, depth_scores, segment_boundaries + return segmented_text + + def _block_comparison(self, tokseqs, token_table): + """Implements the block comparison method""" + + def blk_frq(tok, block): + ts_occs = filter(lambda o: o[0] in block, token_table[tok].ts_occurences) + freq = sum([tsocc[1] for tsocc in ts_occs]) + return freq + + gap_scores = [] + numgaps = len(tokseqs) - 1 + + for curr_gap in range(numgaps): + score_dividend, score_divisor_b1, score_divisor_b2 = 0.0, 0.0, 0.0 + score = 0.0 + # adjust window size for boundary conditions + if curr_gap < self.k - 1: + window_size = curr_gap + 1 + elif curr_gap > numgaps - self.k: + window_size = numgaps - curr_gap + else: + window_size = self.k + + b1 = [ts.index for ts in tokseqs[curr_gap - window_size + 1 : curr_gap + 1]] + b2 = [ts.index for ts in tokseqs[curr_gap + 1 : curr_gap + window_size + 1]] + + for t in token_table: + score_dividend += blk_frq(t, b1) * blk_frq(t, b2) + score_divisor_b1 += blk_frq(t, b1) ** 2 + score_divisor_b2 += blk_frq(t, b2) ** 2 + try: + score = score_dividend / math.sqrt(score_divisor_b1 * score_divisor_b2) + except ZeroDivisionError: + pass # score += 0.0 + + gap_scores.append(score) + + return gap_scores + + def _smooth_scores(self, gap_scores): + "Wraps the smooth function from the SciPy Cookbook" + return list( + smooth(numpy.array(gap_scores[:]), window_len=self.smoothing_width + 1) + ) + + def _mark_paragraph_breaks(self, text): + """Identifies indented text or line breaks as the beginning of + paragraphs""" + MIN_PARAGRAPH = 100 + pattern = re.compile("[ \t\r\f\v]*\n[ \t\r\f\v]*\n[ \t\r\f\v]*") + matches = pattern.finditer(text) + + last_break = 0 + pbreaks = [0] + for pb in matches: + if pb.start() - last_break < MIN_PARAGRAPH: + continue + else: + pbreaks.append(pb.start()) + last_break = pb.start() + + return pbreaks + + def _divide_to_tokensequences(self, text): + "Divides the text into pseudosentences of fixed size" + w = self.w + wrdindex_list = [] + matches = re.finditer("\w+", text) + for match in matches: + wrdindex_list.append((match.group(), match.start())) + return [ + TokenSequence(i / w, wrdindex_list[i : i + w]) + for i in range(0, len(wrdindex_list), w) + ] + + def _create_token_table(self, token_sequences, par_breaks): + "Creates a table of TokenTableFields" + token_table = {} + current_par = 0 + current_tok_seq = 0 + pb_iter = par_breaks.__iter__() + current_par_break = next(pb_iter) + if current_par_break == 0: + try: + current_par_break = next(pb_iter) # skip break at 0 + except StopIteration: + raise ValueError( + "No paragraph breaks were found(text too short perhaps?)" + ) + for ts in token_sequences: + for word, index in ts.wrdindex_list: + try: + while index > current_par_break: + current_par_break = next(pb_iter) + current_par += 1 + except StopIteration: + # hit bottom + pass + + if word in token_table: + token_table[word].total_count += 1 + + if token_table[word].last_par != current_par: + token_table[word].last_par = current_par + token_table[word].par_count += 1 + + if token_table[word].last_tok_seq != current_tok_seq: + token_table[word].last_tok_seq = current_tok_seq + token_table[word].ts_occurences.append([current_tok_seq, 1]) + else: + token_table[word].ts_occurences[-1][1] += 1 + else: # new word + token_table[word] = TokenTableField( + first_pos=index, + ts_occurences=[[current_tok_seq, 1]], + total_count=1, + par_count=1, + last_par=current_par, + last_tok_seq=current_tok_seq, + ) + + current_tok_seq += 1 + + return token_table + + def _identify_boundaries(self, depth_scores): + """Identifies boundaries at the peaks of similarity score + differences""" + + boundaries = [0 for x in depth_scores] + + avg = sum(depth_scores) / len(depth_scores) + stdev = numpy.std(depth_scores) + + # SB: what is the purpose of this conditional? + if self.cutoff_policy == LC: + cutoff = avg - stdev / 2.0 + else: + cutoff = avg - stdev / 2.0 + + depth_tuples = sorted(zip(depth_scores, range(len(depth_scores)))) + depth_tuples.reverse() + hp = list(filter(lambda x: x[0] > cutoff, depth_tuples)) + + for dt in hp: + boundaries[dt[1]] = 1 + for dt2 in hp: # undo if there is a boundary close already + if ( + dt[1] != dt2[1] + and abs(dt2[1] - dt[1]) < 4 + and boundaries[dt2[1]] == 1 + ): + boundaries[dt[1]] = 0 + return boundaries + + def _depth_scores(self, scores): + """Calculates the depth of each gap, i.e. the average difference + between the left and right peaks and the gap's score""" + + depth_scores = [0 for x in scores] + # clip boundaries: this holds on the rule of thumb(my thumb) + # that a section shouldn't be smaller than at least 2 + # pseudosentences for small texts and around 5 for larger ones. + + clip = min(max(len(scores) // 10, 2), 5) + index = clip + + for gapscore in scores[clip:-clip]: + lpeak = gapscore + for score in scores[index::-1]: + if score >= lpeak: + lpeak = score + else: + break + rpeak = gapscore + for score in scores[index:]: + if score >= rpeak: + rpeak = score + else: + break + depth_scores[index] = lpeak + rpeak - 2 * gapscore + index += 1 + + return depth_scores + + def _normalize_boundaries(self, text, boundaries, paragraph_breaks): + """Normalize the boundaries identified to the original text's + paragraph breaks""" + + norm_boundaries = [] + char_count, word_count, gaps_seen = 0, 0, 0 + seen_word = False + + for char in text: + char_count += 1 + if char in " \t\n" and seen_word: + seen_word = False + word_count += 1 + if char not in " \t\n" and not seen_word: + seen_word = True + if gaps_seen < len(boundaries) and word_count > ( + max(gaps_seen * self.w, self.w) + ): + if boundaries[gaps_seen] == 1: + # find closest paragraph break + best_fit = len(text) + for br in paragraph_breaks: + if best_fit > abs(br - char_count): + best_fit = abs(br - char_count) + bestbr = br + else: + break + if bestbr not in norm_boundaries: # avoid duplicates + norm_boundaries.append(bestbr) + gaps_seen += 1 + + return norm_boundaries + + +class TokenTableField(object): + """A field in the token table holding parameters for each token, + used later in the process""" + + def __init__( + self, + first_pos, + ts_occurences, + total_count=1, + par_count=1, + last_par=0, + last_tok_seq=None, + ): + self.__dict__.update(locals()) + del self.__dict__['self'] + + +class TokenSequence(object): + "A token list with its original length and its index" + + def __init__(self, index, wrdindex_list, original_length=None): + original_length = original_length or len(wrdindex_list) + self.__dict__.update(locals()) + del self.__dict__['self'] + + +# Pasted from the SciPy cookbook: http://www.scipy.org/Cookbook/SignalSmooth +def smooth(x, window_len=11, window='flat'): + """smooth the data using a window with requested size. + + This method is based on the convolution of a scaled window with the signal. + The signal is prepared by introducing reflected copies of the signal + (with the window size) in both ends so that transient parts are minimized + in the beginning and end part of the output signal. + + :param x: the input signal + :param window_len: the dimension of the smoothing window; should be an odd integer + :param window: the type of window from 'flat', 'hanning', 'hamming', 'bartlett', 'blackman' + flat window will produce a moving average smoothing. + + :return: the smoothed signal + + example:: + + t=linspace(-2,2,0.1) + x=sin(t)+randn(len(t))*0.1 + y=smooth(x) + + :see also: numpy.hanning, numpy.hamming, numpy.bartlett, numpy.blackman, numpy.convolve, + scipy.signal.lfilter + + TODO: the window parameter could be the window itself if an array instead of a string + """ + + if x.ndim != 1: + raise ValueError("smooth only accepts 1 dimension arrays.") + + if x.size < window_len: + raise ValueError("Input vector needs to be bigger than window size.") + + if window_len < 3: + return x + + if window not in ['flat', 'hanning', 'hamming', 'bartlett', 'blackman']: + raise ValueError( + "Window is on of 'flat', 'hanning', 'hamming', 'bartlett', 'blackman'" + ) + + s = numpy.r_[2 * x[0] - x[window_len:1:-1], x, 2 * x[-1] - x[-1:-window_len:-1]] + + # print(len(s)) + if window == 'flat': # moving average + w = numpy.ones(window_len, 'd') + else: + w = eval('numpy.' + window + '(window_len)') + + y = numpy.convolve(w / w.sum(), s, mode='same') + + return y[window_len - 1 : -window_len + 1] + + +def demo(text=None): + from nltk.corpus import brown + from matplotlib import pylab + + tt = TextTilingTokenizer(demo_mode=True) + if text is None: + text = brown.raw()[:10000] + s, ss, d, b = tt.tokenize(text) + pylab.xlabel("Sentence Gap index") + pylab.ylabel("Gap Scores") + pylab.plot(range(len(s)), s, label="Gap Scores") + pylab.plot(range(len(ss)), ss, label="Smoothed Gap scores") + pylab.plot(range(len(d)), d, label="Depth scores") + pylab.stem(range(len(b)), b) + pylab.legend() + pylab.show() diff --git a/venv.bak/lib/python3.7/site-packages/nltk/tokenize/toktok.py b/venv.bak/lib/python3.7/site-packages/nltk/tokenize/toktok.py new file mode 100644 index 0000000..9779725 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/tokenize/toktok.py @@ -0,0 +1,181 @@ +# -*- coding: utf-8 -*- +# Natural Language Toolkit: Python port of the tok-tok.pl tokenizer. +# +# Copyright (C) 2001-2015 NLTK Project +# Author: Jon Dehdari +# Contributors: Liling Tan, Selcuk Ayguney, ikegami, Martijn Pieters +# +# URL: +# For license information, see LICENSE.TXT + +""" +The tok-tok tokenizer is a simple, general tokenizer, where the input has one +sentence per line; thus only final period is tokenized. + +Tok-tok has been tested on, and gives reasonably good results for English, +Persian, Russian, Czech, French, German, Vietnamese, Tajik, and a few others. +The input should be in UTF-8 encoding. + +Reference: +Jon Dehdari. 2014. A Neurophysiologically-Inspired Statistical Language +Model (Doctoral dissertation). Columbus, OH, USA: The Ohio State University. +""" + +import re +from six import text_type + +from nltk.tokenize.api import TokenizerI + + +class ToktokTokenizer(TokenizerI): + """ + This is a Python port of the tok-tok.pl from + https://github.com/jonsafari/tok-tok/blob/master/tok-tok.pl + + >>> toktok = ToktokTokenizer() + >>> text = u'Is 9.5 or 525,600 my favorite number?' + >>> print (toktok.tokenize(text, return_str=True)) + Is 9.5 or 525,600 my favorite number ? + >>> text = u'The https://github.com/jonsafari/tok-tok/blob/master/tok-tok.pl is a website with/and/or slashes and sort of weird : things' + >>> print (toktok.tokenize(text, return_str=True)) + The https://github.com/jonsafari/tok-tok/blob/master/tok-tok.pl is a website with/and/or slashes and sort of weird : things + >>> text = u'\xa1This, is a sentence with weird\xbb symbols\u2026 appearing everywhere\xbf' + >>> expected = u'\xa1 This , is a sentence with weird \xbb symbols \u2026 appearing everywhere \xbf' + >>> assert toktok.tokenize(text, return_str=True) == expected + >>> toktok.tokenize(text) == [u'\xa1', u'This', u',', u'is', u'a', u'sentence', u'with', u'weird', u'\xbb', u'symbols', u'\u2026', u'appearing', u'everywhere', u'\xbf'] + True + """ + + # Replace non-breaking spaces with normal spaces. + NON_BREAKING = re.compile(u"\u00A0"), " " + + # Pad some funky punctuation. + FUNKY_PUNCT_1 = re.compile(u'([،;؛¿!"\])}»›”؟¡%٪°±©®।॥…])'), r" \1 " + # Pad more funky punctuation. + FUNKY_PUNCT_2 = re.compile(u'([({\[“‘„‚«‹「『])'), r" \1 " + # Pad En dash and em dash + EN_EM_DASHES = re.compile(u'([–—])'), r" \1 " + + # Replace problematic character with numeric character reference. + AMPERCENT = re.compile('& '), '& ' + TAB = re.compile('\t'), ' ' + PIPE = re.compile('\|'), ' | ' + + # Pad numbers with commas to keep them from further tokenization. + COMMA_IN_NUM = re.compile(r'(? "something ..." + # "something." -> "something ." + FINAL_PERIOD_1 = re.compile(r"(? "... stuff ." + FINAL_PERIOD_2 = re.compile(r"""(? +# Michael Heilman (re-port from http://www.cis.upenn.edu/~treebank/tokenizer.sed) +# +# URL: +# For license information, see LICENSE.TXT + +r""" + +Penn Treebank Tokenizer + +The Treebank tokenizer uses regular expressions to tokenize text as in Penn Treebank. +This implementation is a port of the tokenizer sed script written by Robert McIntyre +and available at http://www.cis.upenn.edu/~treebank/tokenizer.sed. +""" + +import re +from nltk.tokenize.api import TokenizerI +from nltk.tokenize.util import align_tokens + + +class MacIntyreContractions: + """ + List of contractions adapted from Robert MacIntyre's tokenizer. + """ + + CONTRACTIONS2 = [ + r"(?i)\b(can)(?#X)(not)\b", + r"(?i)\b(d)(?#X)('ye)\b", + r"(?i)\b(gim)(?#X)(me)\b", + r"(?i)\b(gon)(?#X)(na)\b", + r"(?i)\b(got)(?#X)(ta)\b", + r"(?i)\b(lem)(?#X)(me)\b", + r"(?i)\b(mor)(?#X)('n)\b", + r"(?i)\b(wan)(?#X)(na)\s", + ] + CONTRACTIONS3 = [r"(?i) ('t)(?#X)(is)\b", r"(?i) ('t)(?#X)(was)\b"] + CONTRACTIONS4 = [r"(?i)\b(whad)(dd)(ya)\b", r"(?i)\b(wha)(t)(cha)\b"] + + +class TreebankWordTokenizer(TokenizerI): + """ + The Treebank tokenizer uses regular expressions to tokenize text as in Penn Treebank. + This is the method that is invoked by ``word_tokenize()``. It assumes that the + text has already been segmented into sentences, e.g. using ``sent_tokenize()``. + + This tokenizer performs the following steps: + + - split standard contractions, e.g. ``don't`` -> ``do n't`` and ``they'll`` -> ``they 'll`` + - treat most punctuation characters as separate tokens + - split off commas and single quotes, when followed by whitespace + - separate periods that appear at the end of line + + >>> from nltk.tokenize import TreebankWordTokenizer + >>> s = '''Good muffins cost $3.88\\nin New York. Please buy me\\ntwo of them.\\nThanks.''' + >>> TreebankWordTokenizer().tokenize(s) + ['Good', 'muffins', 'cost', '$', '3.88', 'in', 'New', 'York.', 'Please', 'buy', 'me', 'two', 'of', 'them.', 'Thanks', '.'] + >>> s = "They'll save and invest more." + >>> TreebankWordTokenizer().tokenize(s) + ['They', "'ll", 'save', 'and', 'invest', 'more', '.'] + >>> s = "hi, my name can't hello," + >>> TreebankWordTokenizer().tokenize(s) + ['hi', ',', 'my', 'name', 'ca', "n't", 'hello', ','] + """ + + # starting quotes + STARTING_QUOTES = [ + (re.compile(r'^\"'), r'``'), + (re.compile(r'(``)'), r' \1 '), + (re.compile(r"([ \(\[{<])(\"|\'{2})"), r'\1 `` '), + ] + + # punctuation + PUNCTUATION = [ + (re.compile(r'([:,])([^\d])'), r' \1 \2'), + (re.compile(r'([:,])$'), r' \1 '), + (re.compile(r'\.\.\.'), r' ... '), + (re.compile(r'[;@#$%&]'), r' \g<0> '), + ( + re.compile(r'([^\.])(\.)([\]\)}>"\']*)\s*$'), + r'\1 \2\3 ', + ), # Handles the final period. + (re.compile(r'[?!]'), r' \g<0> '), + (re.compile(r"([^'])' "), r"\1 ' "), + ] + + # Pads parentheses + PARENS_BRACKETS = (re.compile(r'[\]\[\(\)\{\}\<\>]'), r' \g<0> ') + + # Optionally: Convert parentheses, brackets and converts them to PTB symbols. + CONVERT_PARENTHESES = [ + (re.compile(r'\('), '-LRB-'), + (re.compile(r'\)'), '-RRB-'), + (re.compile(r'\['), '-LSB-'), + (re.compile(r'\]'), '-RSB-'), + (re.compile(r'\{'), '-LCB-'), + (re.compile(r'\}'), '-RCB-'), + ] + + DOUBLE_DASHES = (re.compile(r'--'), r' -- ') + + # ending quotes + ENDING_QUOTES = [ + (re.compile(r'"'), " '' "), + (re.compile(r'(\S)(\'\')'), r'\1 \2 '), + (re.compile(r"([^' ])('[sS]|'[mM]|'[dD]|') "), r"\1 \2 "), + (re.compile(r"([^' ])('ll|'LL|'re|'RE|'ve|'VE|n't|N'T) "), r"\1 \2 "), + ] + + # List of contractions adapted from Robert MacIntyre's tokenizer. + _contractions = MacIntyreContractions() + CONTRACTIONS2 = list(map(re.compile, _contractions.CONTRACTIONS2)) + CONTRACTIONS3 = list(map(re.compile, _contractions.CONTRACTIONS3)) + + def tokenize(self, text, convert_parentheses=False, return_str=False): + for regexp, substitution in self.STARTING_QUOTES: + text = regexp.sub(substitution, text) + + for regexp, substitution in self.PUNCTUATION: + text = regexp.sub(substitution, text) + + # Handles parentheses. + regexp, substitution = self.PARENS_BRACKETS + text = regexp.sub(substitution, text) + # Optionally convert parentheses + if convert_parentheses: + for regexp, substitution in self.CONVERT_PARENTHESES: + text = regexp.sub(substitution, text) + + # Handles double dash. + regexp, substitution = self.DOUBLE_DASHES + text = regexp.sub(substitution, text) + + # add extra space to make things easier + text = " " + text + " " + + for regexp, substitution in self.ENDING_QUOTES: + text = regexp.sub(substitution, text) + + for regexp in self.CONTRACTIONS2: + text = regexp.sub(r' \1 \2 ', text) + for regexp in self.CONTRACTIONS3: + text = regexp.sub(r' \1 \2 ', text) + + # We are not using CONTRACTIONS4 since + # they are also commented out in the SED scripts + # for regexp in self._contractions.CONTRACTIONS4: + # text = regexp.sub(r' \1 \2 \3 ', text) + + return text if return_str else text.split() + + def span_tokenize(self, text): + """ + Uses the post-hoc nltk.tokens.align_tokens to return the offset spans. + + >>> from nltk.tokenize import TreebankWordTokenizer + >>> s = '''Good muffins cost $3.88\\nin New (York). Please (buy) me\\ntwo of them.\\n(Thanks).''' + >>> expected = [(0, 4), (5, 12), (13, 17), (18, 19), (19, 23), + ... (24, 26), (27, 30), (31, 32), (32, 36), (36, 37), (37, 38), + ... (40, 46), (47, 48), (48, 51), (51, 52), (53, 55), (56, 59), + ... (60, 62), (63, 68), (69, 70), (70, 76), (76, 77), (77, 78)] + >>> list(TreebankWordTokenizer().span_tokenize(s)) == expected + True + >>> expected = ['Good', 'muffins', 'cost', '$', '3.88', 'in', + ... 'New', '(', 'York', ')', '.', 'Please', '(', 'buy', ')', + ... 'me', 'two', 'of', 'them.', '(', 'Thanks', ')', '.'] + >>> [s[start:end] for start, end in TreebankWordTokenizer().span_tokenize(s)] == expected + True + + Additional example + >>> from nltk.tokenize import TreebankWordTokenizer + >>> s = '''I said, "I'd like to buy some ''good muffins" which cost $3.88\\n each in New (York)."''' + >>> expected = [(0, 1), (2, 6), (6, 7), (8, 9), (9, 10), (10, 12), + ... (13, 17), (18, 20), (21, 24), (25, 29), (30, 32), (32, 36), + ... (37, 44), (44, 45), (46, 51), (52, 56), (57, 58), (58, 62), + ... (64, 68), (69, 71), (72, 75), (76, 77), (77, 81), (81, 82), + ... (82, 83), (83, 84)] + >>> list(TreebankWordTokenizer().span_tokenize(s)) == expected + True + >>> expected = ['I', 'said', ',', '"', 'I', "'d", 'like', 'to', + ... 'buy', 'some', "''", "good", 'muffins', '"', 'which', 'cost', + ... '$', '3.88', 'each', 'in', 'New', '(', 'York', ')', '.', '"'] + >>> [s[start:end] for start, end in TreebankWordTokenizer().span_tokenize(s)] == expected + True + + """ + raw_tokens = self.tokenize(text) + + # Convert converted quotes back to original double quotes + # Do this only if original text contains double quote(s) or double + # single-quotes (because '' might be transformed to `` if it is + # treated as starting quotes). + if ('"' in text) or ("''" in text): + # Find double quotes and converted quotes + matched = [m.group() for m in re.finditer(r"``|'{2}|\"", text)] + + # Replace converted quotes back to double quotes + tokens = [ + matched.pop(0) if tok in ['"', "``", "''"] else tok + for tok in raw_tokens + ] + else: + tokens = raw_tokens + + for tok in align_tokens(tokens, text): + yield tok + + +class TreebankWordDetokenizer(TokenizerI): + """ + The Treebank detokenizer uses the reverse regex operations corresponding to + the Treebank tokenizer's regexes. + + Note: + - There're additional assumption mades when undoing the padding of [;@#$%&] + punctuation symbols that isn't presupposed in the TreebankTokenizer. + - There're additional regexes added in reversing the parentheses tokenization, + - the r'([\]\)\}\>])\s([:;,.])' removes the additional right padding added + to the closing parentheses precedding [:;,.]. + - It's not possible to return the original whitespaces as they were because + there wasn't explicit records of where '\n', '\t' or '\s' were removed at + the text.split() operation. + + >>> from nltk.tokenize.treebank import TreebankWordTokenizer, TreebankWordDetokenizer + >>> s = '''Good muffins cost $3.88\\nin New York. Please buy me\\ntwo of them.\\nThanks.''' + >>> d = TreebankWordDetokenizer() + >>> t = TreebankWordTokenizer() + >>> toks = t.tokenize(s) + >>> d.detokenize(toks) + 'Good muffins cost $3.88 in New York. Please buy me two of them. Thanks.' + + The MXPOST parentheses substitution can be undone using the `convert_parentheses` + parameter: + + >>> s = '''Good muffins cost $3.88\\nin New (York). Please (buy) me\\ntwo of them.\\n(Thanks).''' + >>> expected_tokens = ['Good', 'muffins', 'cost', '$', '3.88', 'in', + ... 'New', '-LRB-', 'York', '-RRB-', '.', 'Please', '-LRB-', 'buy', + ... '-RRB-', 'me', 'two', 'of', 'them.', '-LRB-', 'Thanks', '-RRB-', '.'] + >>> expected_tokens == t.tokenize(s, convert_parentheses=True) + True + >>> expected_detoken = 'Good muffins cost $3.88 in New (York). Please (buy) me two of them. (Thanks).' + >>> expected_detoken == d.detokenize(t.tokenize(s, convert_parentheses=True), convert_parentheses=True) + True + + During tokenization it's safe to add more spaces but during detokenization, + simply undoing the padding doesn't really help. + + - During tokenization, left and right pad is added to [!?], when + detokenizing, only left shift the [!?] is needed. + Thus (re.compile(r'\s([?!])'), r'\g<1>') + + - During tokenization [:,] are left and right padded but when detokenizing, + only left shift is necessary and we keep right pad after comma/colon + if the string after is a non-digit. + Thus (re.compile(r'\s([:,])\s([^\d])'), r'\1 \2') + + >>> from nltk.tokenize.treebank import TreebankWordDetokenizer + >>> toks = ['hello', ',', 'i', 'ca', "n't", 'feel', 'my', 'feet', '!', 'Help', '!', '!'] + >>> twd = TreebankWordDetokenizer() + >>> twd.detokenize(toks) + "hello, i can't feel my feet! Help!!" + + >>> toks = ['hello', ',', 'i', "can't", 'feel', ';', 'my', 'feet', '!', + ... 'Help', '!', '!', 'He', 'said', ':', 'Help', ',', 'help', '?', '!'] + >>> twd.detokenize(toks) + "hello, i can't feel; my feet! Help!! He said: Help, help?!" + """ + + _contractions = MacIntyreContractions() + CONTRACTIONS2 = [ + re.compile(pattern.replace('(?#X)', '\s')) + for pattern in _contractions.CONTRACTIONS2 + ] + CONTRACTIONS3 = [ + re.compile(pattern.replace('(?#X)', '\s')) + for pattern in _contractions.CONTRACTIONS3 + ] + + # ending quotes + ENDING_QUOTES = [ + (re.compile(r"([^' ])\s('ll|'LL|'re|'RE|'ve|'VE|n't|N'T) "), r"\1\2 "), + (re.compile(r"([^' ])\s('[sS]|'[mM]|'[dD]|') "), r"\1\2 "), + (re.compile(r'(\S)(\'\')'), r'\1\2 '), + (re.compile(r" '' "), '"'), + ] + + # Handles double dashes + DOUBLE_DASHES = (re.compile(r' -- '), r'--') + + # Optionally: Convert parentheses, brackets and converts them from PTB symbols. + CONVERT_PARENTHESES = [ + (re.compile('-LRB-'), '('), + (re.compile('-RRB-'), ')'), + (re.compile('-LSB-'), '['), + (re.compile('-RSB-'), ']'), + (re.compile('-LCB-'), '{'), + (re.compile('-RCB-'), '}'), + ] + + # Undo padding on parentheses. + PARENS_BRACKETS = [ + (re.compile(r'\s([\[\(\{\<])\s'), r' \g<1>'), + (re.compile(r'\s([\]\)\}\>])\s'), r'\g<1> '), + (re.compile(r'([\]\)\}\>])\s([:;,.])'), r'\1\2'), + ] + + # punctuation + PUNCTUATION = [ + (re.compile(r"([^'])\s'\s"), r"\1' "), + (re.compile(r'\s([?!])'), r'\g<1>'), # Strip left pad for [?!] + # (re.compile(r'\s([?!])\s'), r'\g<1>'), + (re.compile(r'([^\.])\s(\.)([\]\)}>"\']*)\s*$'), r'\1\2\3'), + # When tokenizing, [;@#$%&] are padded with whitespace regardless of + # whether there are spaces before or after them. + # But during detokenization, we need to distinguish between left/right + # pad, so we split this up. + (re.compile(r'\s([#$])\s'), r' \g<1>'), # Left pad. + (re.compile(r'\s([;%])\s'), r'\g<1> '), # Right pad. + (re.compile(r'\s([&])\s'), r' \g<1> '), # Unknown pad. + (re.compile(r'\s\.\.\.\s'), r'...'), + (re.compile(r'\s([:,])\s$'), r'\1'), + ( + re.compile(r'\s([:,])\s([^\d])'), + r'\1 \2', + ) # Keep right pad after comma/colon before non-digits. + # (re.compile(r'\s([:,])\s([^\d])'), r'\1\2') + ] + + # starting quotes + STARTING_QUOTES = [ + (re.compile(r'([ (\[{<])\s``'), r'\1"'), + (re.compile(r'\s(``)\s'), r'\1'), + (re.compile(r'^``'), r'\"'), + ] + + def tokenize(self, tokens, convert_parentheses=False): + """ + Treebank detokenizer, created by undoing the regexes from + the TreebankWordTokenizer.tokenize. + + :param tokens: A list of strings, i.e. tokenized text. + :type tokens: list(str) + :return: str + """ + text = ' '.join(tokens) + # Reverse the contractions regexes. + # Note: CONTRACTIONS4 are not used in tokenization. + for regexp in self.CONTRACTIONS3: + text = regexp.sub(r'\1\2', text) + for regexp in self.CONTRACTIONS2: + text = regexp.sub(r'\1\2', text) + + # Reverse the regexes applied for ending quotes. + for regexp, substitution in self.ENDING_QUOTES: + text = regexp.sub(substitution, text) + + # Undo the space padding. + text = text.strip() + + # Reverse the padding on double dashes. + regexp, substitution = self.DOUBLE_DASHES + text = regexp.sub(substitution, text) + + if convert_parentheses: + for regexp, substitution in self.CONVERT_PARENTHESES: + text = regexp.sub(substitution, text) + + # Reverse the padding regexes applied for parenthesis/brackets. + for regexp, substitution in self.PARENS_BRACKETS: + text = regexp.sub(substitution, text) + + # Reverse the regexes applied for punctuations. + for regexp, substitution in self.PUNCTUATION: + text = regexp.sub(substitution, text) + + # Reverse the regexes applied for starting quotes. + for regexp, substitution in self.STARTING_QUOTES: + text = regexp.sub(substitution, text) + + return text.strip() + + def detokenize(self, tokens, convert_parentheses=False): + """ Duck-typing the abstract *tokenize()*.""" + return self.tokenize(tokens, convert_parentheses) diff --git a/venv.bak/lib/python3.7/site-packages/nltk/tokenize/util.py b/venv.bak/lib/python3.7/site-packages/nltk/tokenize/util.py new file mode 100644 index 0000000..a91f129 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/tokenize/util.py @@ -0,0 +1,296 @@ +# -*- coding: utf-8 -*- +# Natural Language Toolkit: Tokenizer Utilities +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Steven Bird +# URL: +# For license information, see LICENSE.TXT + +from re import finditer +from xml.sax.saxutils import escape, unescape + + +def string_span_tokenize(s, sep): + r""" + Return the offsets of the tokens in *s*, as a sequence of ``(start, end)`` + tuples, by splitting the string at each occurrence of *sep*. + + >>> from nltk.tokenize.util import string_span_tokenize + >>> s = '''Good muffins cost $3.88\nin New York. Please buy me + ... two of them.\n\nThanks.''' + >>> list(string_span_tokenize(s, " ")) + [(0, 4), (5, 12), (13, 17), (18, 26), (27, 30), (31, 36), (37, 37), + (38, 44), (45, 48), (49, 55), (56, 58), (59, 73)] + + :param s: the string to be tokenized + :type s: str + :param sep: the token separator + :type sep: str + :rtype: iter(tuple(int, int)) + """ + if len(sep) == 0: + raise ValueError("Token delimiter must not be empty") + left = 0 + while True: + try: + right = s.index(sep, left) + if right != 0: + yield left, right + except ValueError: + if left != len(s): + yield left, len(s) + break + + left = right + len(sep) + + +def regexp_span_tokenize(s, regexp): + r""" + Return the offsets of the tokens in *s*, as a sequence of ``(start, end)`` + tuples, by splitting the string at each successive match of *regexp*. + + >>> from nltk.tokenize.util import regexp_span_tokenize + >>> s = '''Good muffins cost $3.88\nin New York. Please buy me + ... two of them.\n\nThanks.''' + >>> list(regexp_span_tokenize(s, r'\s')) + [(0, 4), (5, 12), (13, 17), (18, 23), (24, 26), (27, 30), (31, 36), + (38, 44), (45, 48), (49, 51), (52, 55), (56, 58), (59, 64), (66, 73)] + + :param s: the string to be tokenized + :type s: str + :param regexp: regular expression that matches token separators (must not be empty) + :type regexp: str + :rtype: iter(tuple(int, int)) + """ + left = 0 + for m in finditer(regexp, s): + right, next = m.span() + if right != left: + yield left, right + left = next + yield left, len(s) + + +def spans_to_relative(spans): + r""" + Return a sequence of relative spans, given a sequence of spans. + + >>> from nltk.tokenize import WhitespaceTokenizer + >>> from nltk.tokenize.util import spans_to_relative + >>> s = '''Good muffins cost $3.88\nin New York. Please buy me + ... two of them.\n\nThanks.''' + >>> list(spans_to_relative(WhitespaceTokenizer().span_tokenize(s))) + [(0, 4), (1, 7), (1, 4), (1, 5), (1, 2), (1, 3), (1, 5), (2, 6), + (1, 3), (1, 2), (1, 3), (1, 2), (1, 5), (2, 7)] + + :param spans: a sequence of (start, end) offsets of the tokens + :type spans: iter(tuple(int, int)) + :rtype: iter(tuple(int, int)) + """ + prev = 0 + for left, right in spans: + yield left - prev, right - left + prev = right + + +class CJKChars(object): + """ + An object that enumerates the code points of the CJK characters as listed on + http://en.wikipedia.org/wiki/Basic_Multilingual_Plane#Basic_Multilingual_Plane + + This is a Python port of the CJK code point enumerations of Moses tokenizer: + https://github.com/moses-smt/mosesdecoder/blob/master/scripts/tokenizer/detokenizer.perl#L309 + """ + + # Hangul Jamo (1100–11FF) + Hangul_Jamo = (4352, 4607) # (ord(u"\u1100"), ord(u"\u11ff")) + + # CJK Radicals Supplement (2E80–2EFF) + # Kangxi Radicals (2F00–2FDF) + # Ideographic Description Characters (2FF0–2FFF) + # CJK Symbols and Punctuation (3000–303F) + # Hiragana (3040–309F) + # Katakana (30A0–30FF) + # Bopomofo (3100–312F) + # Hangul Compatibility Jamo (3130–318F) + # Kanbun (3190–319F) + # Bopomofo Extended (31A0–31BF) + # CJK Strokes (31C0–31EF) + # Katakana Phonetic Extensions (31F0–31FF) + # Enclosed CJK Letters and Months (3200–32FF) + # CJK Compatibility (3300–33FF) + # CJK Unified Ideographs Extension A (3400–4DBF) + # Yijing Hexagram Symbols (4DC0–4DFF) + # CJK Unified Ideographs (4E00–9FFF) + # Yi Syllables (A000–A48F) + # Yi Radicals (A490–A4CF) + CJK_Radicals = (11904, 42191) # (ord(u"\u2e80"), ord(u"\ua4cf")) + + # Phags-pa (A840–A87F) + Phags_Pa = (43072, 43135) # (ord(u"\ua840"), ord(u"\ua87f")) + + # Hangul Syllables (AC00–D7AF) + Hangul_Syllables = (44032, 55215) # (ord(u"\uAC00"), ord(u"\uD7AF")) + + # CJK Compatibility Ideographs (F900–FAFF) + CJK_Compatibility_Ideographs = (63744, 64255) # (ord(u"\uF900"), ord(u"\uFAFF")) + + # CJK Compatibility Forms (FE30–FE4F) + CJK_Compatibility_Forms = (65072, 65103) # (ord(u"\uFE30"), ord(u"\uFE4F")) + + # Range U+FF65–FFDC encodes halfwidth forms, of Katakana and Hangul characters + Katakana_Hangul_Halfwidth = (65381, 65500) # (ord(u"\uFF65"), ord(u"\uFFDC")) + + # Supplementary Ideographic Plane 20000–2FFFF + Supplementary_Ideographic_Plane = ( + 131072, + 196607, + ) # (ord(u"\U00020000"), ord(u"\U0002FFFF")) + + ranges = [ + Hangul_Jamo, + CJK_Radicals, + Phags_Pa, + Hangul_Syllables, + CJK_Compatibility_Ideographs, + CJK_Compatibility_Forms, + Katakana_Hangul_Halfwidth, + Supplementary_Ideographic_Plane, + ] + + +def is_cjk(character): + """ + Python port of Moses' code to check for CJK character. + + >>> CJKChars().ranges + [(4352, 4607), (11904, 42191), (43072, 43135), (44032, 55215), (63744, 64255), (65072, 65103), (65381, 65500), (131072, 196607)] + >>> is_cjk(u'\u33fe') + True + >>> is_cjk(u'\uFE5F') + False + + :param character: The character that needs to be checked. + :type character: char + :return: bool + """ + return any( + [ + start <= ord(character) <= end + for start, end in [ + (4352, 4607), + (11904, 42191), + (43072, 43135), + (44032, 55215), + (63744, 64255), + (65072, 65103), + (65381, 65500), + (131072, 196607), + ] + ] + ) + + +def xml_escape(text): + """ + This function transforms the input text into an "escaped" version suitable + for well-formed XML formatting. + + Note that the default xml.sax.saxutils.escape() function don't escape + some characters that Moses does so we have to manually add them to the + entities dictionary. + + >>> input_str = ''')| & < > ' " ] [''' + >>> expected_output = ''')| & < > ' " ] [''' + >>> escape(input_str) == expected_output + True + >>> xml_escape(input_str) + ')| & < > ' " ] [' + + :param text: The text that needs to be escaped. + :type text: str + :rtype: str + """ + return escape( + text, + entities={ + r"'": r"'", + r'"': r""", + r"|": r"|", + r"[": r"[", + r"]": r"]", + }, + ) + + +def xml_unescape(text): + """ + This function transforms the "escaped" version suitable + for well-formed XML formatting into humanly-readable string. + + Note that the default xml.sax.saxutils.unescape() function don't unescape + some characters that Moses does so we have to manually add them to the + entities dictionary. + + >>> from xml.sax.saxutils import unescape + >>> s = ')| & < > ' " ] [' + >>> expected = ''')| & < > \' " ] [''' + >>> xml_unescape(s) == expected + True + + :param text: The text that needs to be unescaped. + :type text: str + :rtype: str + """ + return unescape( + text, + entities={ + r"'": r"'", + r""": r'"', + r"|": r"|", + r"[": r"[", + r"]": r"]", + }, + ) + + +def align_tokens(tokens, sentence): + """ + This module attempt to find the offsets of the tokens in *s*, as a sequence + of ``(start, end)`` tuples, given the tokens and also the source string. + + >>> from nltk.tokenize import TreebankWordTokenizer + >>> from nltk.tokenize.util import align_tokens + >>> s = str("The plane, bound for St Petersburg, crashed in Egypt's " + ... "Sinai desert just 23 minutes after take-off from Sharm el-Sheikh " + ... "on Saturday.") + >>> tokens = TreebankWordTokenizer().tokenize(s) + >>> expected = [(0, 3), (4, 9), (9, 10), (11, 16), (17, 20), (21, 23), + ... (24, 34), (34, 35), (36, 43), (44, 46), (47, 52), (52, 54), + ... (55, 60), (61, 67), (68, 72), (73, 75), (76, 83), (84, 89), + ... (90, 98), (99, 103), (104, 109), (110, 119), (120, 122), + ... (123, 131), (131, 132)] + >>> output = list(align_tokens(tokens, s)) + >>> len(tokens) == len(expected) == len(output) # Check that length of tokens and tuples are the same. + True + >>> expected == list(align_tokens(tokens, s)) # Check that the output is as expected. + True + >>> tokens == [s[start:end] for start, end in output] # Check that the slices of the string corresponds to the tokens. + True + + :param tokens: The list of strings that are the result of tokenization + :type tokens: list(str) + :param sentence: The original string + :type sentence: str + :rtype: list(tuple(int,int)) + """ + point = 0 + offsets = [] + for token in tokens: + try: + start = sentence.index(token, point) + except ValueError: + raise ValueError('substring "{}" not found in "{}"'.format(token, sentence)) + point = start + len(token) + offsets.append((start, point)) + return offsets diff --git a/venv.bak/lib/python3.7/site-packages/nltk/toolbox.py b/venv.bak/lib/python3.7/site-packages/nltk/toolbox.py new file mode 100644 index 0000000..74f4dbc --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/toolbox.py @@ -0,0 +1,539 @@ +# coding: utf-8 +# Natural Language Toolkit: Toolbox Reader +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Greg Aumann +# URL: +# For license information, see LICENSE.TXT + +""" +Module for reading, writing and manipulating +Toolbox databases and settings files. +""" +from __future__ import print_function + +import re, codecs +from xml.etree.ElementTree import ElementTree, TreeBuilder, Element, SubElement + +from six import u + +from nltk.compat import StringIO, PY3 +from nltk.data import PathPointer, find + + +class StandardFormat(object): + """ + Class for reading and processing standard format marker files and strings. + """ + + def __init__(self, filename=None, encoding=None): + self._encoding = encoding + if filename is not None: + self.open(filename) + + def open(self, sfm_file): + """ + Open a standard format marker file for sequential reading. + + :param sfm_file: name of the standard format marker input file + :type sfm_file: str + """ + if isinstance(sfm_file, PathPointer): + # [xx] We don't use 'rU' mode here -- do we need to? + # (PathPointer.open doesn't take a mode option) + self._file = sfm_file.open(self._encoding) + else: + self._file = codecs.open(sfm_file, 'rU', self._encoding) + + def open_string(self, s): + """ + Open a standard format marker string for sequential reading. + + :param s: string to parse as a standard format marker input file + :type s: str + """ + self._file = StringIO(s) + + def raw_fields(self): + """ + Return an iterator that returns the next field in a (marker, value) + tuple. Linebreaks and trailing white space are preserved except + for the final newline in each field. + + :rtype: iter(tuple(str, str)) + """ + join_string = '\n' + line_regexp = r'^%s(?:\\(\S+)\s*)?(.*)$' + # discard a BOM in the first line + first_line_pat = re.compile(line_regexp % '(?:\xef\xbb\xbf)?') + line_pat = re.compile(line_regexp % '') + # need to get first line outside the loop for correct handling + # of the first marker if it spans multiple lines + file_iter = iter(self._file) + # PEP 479, prevent RuntimeError when StopIteration is raised inside generator + try: + line = next(file_iter) + except StopIteration: + # no more data is available, terminate the generator + return + mobj = re.match(first_line_pat, line) + mkr, line_value = mobj.groups() + value_lines = [line_value] + self.line_num = 0 + for line in file_iter: + self.line_num += 1 + mobj = re.match(line_pat, line) + line_mkr, line_value = mobj.groups() + if line_mkr: + yield (mkr, join_string.join(value_lines)) + mkr = line_mkr + value_lines = [line_value] + else: + value_lines.append(line_value) + self.line_num += 1 + yield (mkr, join_string.join(value_lines)) + + def fields( + self, + strip=True, + unwrap=True, + encoding=None, + errors='strict', + unicode_fields=None, + ): + """ + Return an iterator that returns the next field in a ``(marker, value)`` + tuple, where ``marker`` and ``value`` are unicode strings if an ``encoding`` + was specified in the ``fields()`` method. Otherwise they are non-unicode strings. + + :param strip: strip trailing whitespace from the last line of each field + :type strip: bool + :param unwrap: Convert newlines in a field to spaces. + :type unwrap: bool + :param encoding: Name of an encoding to use. If it is specified then + the ``fields()`` method returns unicode strings rather than non + unicode strings. + :type encoding: str or None + :param errors: Error handling scheme for codec. Same as the ``decode()`` + builtin string method. + :type errors: str + :param unicode_fields: Set of marker names whose values are UTF-8 encoded. + Ignored if encoding is None. If the whole file is UTF-8 encoded set + ``encoding='utf8'`` and leave ``unicode_fields`` with its default + value of None. + :type unicode_fields: sequence + :rtype: iter(tuple(str, str)) + """ + if encoding is None and unicode_fields is not None: + raise ValueError('unicode_fields is set but not encoding.') + unwrap_pat = re.compile(r'\n+') + for mkr, val in self.raw_fields(): + if encoding and not PY3: # kludge - already decoded in PY3? + if unicode_fields is not None and mkr in unicode_fields: + val = val.decode('utf8', errors) + else: + val = val.decode(encoding, errors) + mkr = mkr.decode(encoding, errors) + if unwrap: + val = unwrap_pat.sub(' ', val) + if strip: + val = val.rstrip() + yield (mkr, val) + + def close(self): + """Close a previously opened standard format marker file or string.""" + self._file.close() + try: + del self.line_num + except AttributeError: + pass + + +class ToolboxData(StandardFormat): + def parse(self, grammar=None, **kwargs): + if grammar: + return self._chunk_parse(grammar=grammar, **kwargs) + else: + return self._record_parse(**kwargs) + + def _record_parse(self, key=None, **kwargs): + """ + Returns an element tree structure corresponding to a toolbox data file with + all markers at the same level. + + Thus the following Toolbox database:: + \_sh v3.0 400 Rotokas Dictionary + \_DateStampHasFourDigitYear + + \lx kaa + \ps V.A + \ge gag + \gp nek i pas + + \lx kaa + \ps V.B + \ge strangle + \gp pasim nek + + after parsing will end up with the same structure (ignoring the extra + whitespace) as the following XML fragment after being parsed by + ElementTree:: + +
    + <_sh>v3.0 400 Rotokas Dictionary + <_DateStampHasFourDigitYear/> +
    + + + kaa + V.A + gag + nek i pas + + + + kaa + V.B + strangle + pasim nek + +
    + + :param key: Name of key marker at the start of each record. If set to + None (the default value) the first marker that doesn't begin with + an underscore is assumed to be the key. + :type key: str + :param kwargs: Keyword arguments passed to ``StandardFormat.fields()`` + :type kwargs: dict + :rtype: ElementTree._ElementInterface + :return: contents of toolbox data divided into header and records + """ + builder = TreeBuilder() + builder.start('toolbox_data', {}) + builder.start('header', {}) + in_records = False + for mkr, value in self.fields(**kwargs): + if key is None and not in_records and mkr[0] != '_': + key = mkr + if mkr == key: + if in_records: + builder.end('record') + else: + builder.end('header') + in_records = True + builder.start('record', {}) + builder.start(mkr, {}) + builder.data(value) + builder.end(mkr) + if in_records: + builder.end('record') + else: + builder.end('header') + builder.end('toolbox_data') + return builder.close() + + def _tree2etree(self, parent): + from nltk.tree import Tree + + root = Element(parent.label()) + for child in parent: + if isinstance(child, Tree): + root.append(self._tree2etree(child)) + else: + text, tag = child + e = SubElement(root, tag) + e.text = text + return root + + def _chunk_parse(self, grammar=None, root_label='record', trace=0, **kwargs): + """ + Returns an element tree structure corresponding to a toolbox data file + parsed according to the chunk grammar. + + :type grammar: str + :param grammar: Contains the chunking rules used to parse the + database. See ``chunk.RegExp`` for documentation. + :type root_label: str + :param root_label: The node value that should be used for the + top node of the chunk structure. + :type trace: int + :param trace: The level of tracing that should be used when + parsing a text. ``0`` will generate no tracing output; + ``1`` will generate normal tracing output; and ``2`` or + higher will generate verbose tracing output. + :type kwargs: dict + :param kwargs: Keyword arguments passed to ``toolbox.StandardFormat.fields()`` + :rtype: ElementTree._ElementInterface + """ + from nltk import chunk + from nltk.tree import Tree + + cp = chunk.RegexpParser(grammar, root_label=root_label, trace=trace) + db = self.parse(**kwargs) + tb_etree = Element('toolbox_data') + header = db.find('header') + tb_etree.append(header) + for record in db.findall('record'): + parsed = cp.parse([(elem.text, elem.tag) for elem in record]) + tb_etree.append(self._tree2etree(parsed)) + return tb_etree + + +_is_value = re.compile(r"\S") + + +def to_sfm_string(tree, encoding=None, errors='strict', unicode_fields=None): + """ + Return a string with a standard format representation of the toolbox + data in tree (tree can be a toolbox database or a single record). + + :param tree: flat representation of toolbox data (whole database or single record) + :type tree: ElementTree._ElementInterface + :param encoding: Name of an encoding to use. + :type encoding: str + :param errors: Error handling scheme for codec. Same as the ``encode()`` + builtin string method. + :type errors: str + :param unicode_fields: + :type unicode_fields: dict(str) or set(str) + :rtype: str + """ + if tree.tag == 'record': + root = Element('toolbox_data') + root.append(tree) + tree = root + + if tree.tag != 'toolbox_data': + raise ValueError("not a toolbox_data element structure") + if encoding is None and unicode_fields is not None: + raise ValueError( + "if encoding is not specified then neither should unicode_fields" + ) + l = [] + for rec in tree: + l.append('\n') + for field in rec: + mkr = field.tag + value = field.text + if encoding is not None: + if unicode_fields is not None and mkr in unicode_fields: + cur_encoding = 'utf8' + else: + cur_encoding = encoding + if re.search(_is_value, value): + l.append( + (u("\\%s %s\n") % (mkr, value)).encode(cur_encoding, errors) + ) + else: + l.append( + (u("\\%s%s\n") % (mkr, value)).encode(cur_encoding, errors) + ) + else: + if re.search(_is_value, value): + l.append("\\%s %s\n" % (mkr, value)) + else: + l.append("\\%s%s\n" % (mkr, value)) + return ''.join(l[1:]) + + +class ToolboxSettings(StandardFormat): + """This class is the base class for settings files.""" + + def __init__(self): + super(ToolboxSettings, self).__init__() + + def parse(self, encoding=None, errors='strict', **kwargs): + """ + Return the contents of toolbox settings file with a nested structure. + + :param encoding: encoding used by settings file + :type encoding: str + :param errors: Error handling scheme for codec. Same as ``decode()`` builtin method. + :type errors: str + :param kwargs: Keyword arguments passed to ``StandardFormat.fields()`` + :type kwargs: dict + :rtype: ElementTree._ElementInterface + """ + builder = TreeBuilder() + for mkr, value in self.fields(encoding=encoding, errors=errors, **kwargs): + # Check whether the first char of the field marker + # indicates a block start (+) or end (-) + block = mkr[0] + if block in ("+", "-"): + mkr = mkr[1:] + else: + block = None + # Build tree on the basis of block char + if block == "+": + builder.start(mkr, {}) + builder.data(value) + elif block == '-': + builder.end(mkr) + else: + builder.start(mkr, {}) + builder.data(value) + builder.end(mkr) + return builder.close() + + +def to_settings_string(tree, encoding=None, errors='strict', unicode_fields=None): + # write XML to file + l = list() + _to_settings_string( + tree.getroot(), + l, + encoding=encoding, + errors=errors, + unicode_fields=unicode_fields, + ) + return ''.join(l) + + +def _to_settings_string(node, l, **kwargs): + # write XML to file + tag = node.tag + text = node.text + if len(node) == 0: + if text: + l.append('\\%s %s\n' % (tag, text)) + else: + l.append('\\%s\n' % tag) + else: + if text: + l.append('\\+%s %s\n' % (tag, text)) + else: + l.append('\\+%s\n' % tag) + for n in node: + _to_settings_string(n, l, **kwargs) + l.append('\\-%s\n' % tag) + return + + +def remove_blanks(elem): + """ + Remove all elements and subelements with no text and no child elements. + + :param elem: toolbox data in an elementtree structure + :type elem: ElementTree._ElementInterface + """ + out = list() + for child in elem: + remove_blanks(child) + if child.text or len(child) > 0: + out.append(child) + elem[:] = out + + +def add_default_fields(elem, default_fields): + """ + Add blank elements and subelements specified in default_fields. + + :param elem: toolbox data in an elementtree structure + :type elem: ElementTree._ElementInterface + :param default_fields: fields to add to each type of element and subelement + :type default_fields: dict(tuple) + """ + for field in default_fields.get(elem.tag, []): + if elem.find(field) is None: + SubElement(elem, field) + for child in elem: + add_default_fields(child, default_fields) + + +def sort_fields(elem, field_orders): + """ + Sort the elements and subelements in order specified in field_orders. + + :param elem: toolbox data in an elementtree structure + :type elem: ElementTree._ElementInterface + :param field_orders: order of fields for each type of element and subelement + :type field_orders: dict(tuple) + """ + order_dicts = dict() + for field, order in field_orders.items(): + order_dicts[field] = order_key = dict() + for i, subfield in enumerate(order): + order_key[subfield] = i + _sort_fields(elem, order_dicts) + + +def _sort_fields(elem, orders_dicts): + """sort the children of elem""" + try: + order = orders_dicts[elem.tag] + except KeyError: + pass + else: + tmp = sorted( + [((order.get(child.tag, 1e9), i), child) for i, child in enumerate(elem)] + ) + elem[:] = [child for key, child in tmp] + for child in elem: + if len(child): + _sort_fields(child, orders_dicts) + + +def add_blank_lines(tree, blanks_before, blanks_between): + """ + Add blank lines before all elements and subelements specified in blank_before. + + :param elem: toolbox data in an elementtree structure + :type elem: ElementTree._ElementInterface + :param blank_before: elements and subelements to add blank lines before + :type blank_before: dict(tuple) + """ + try: + before = blanks_before[tree.tag] + between = blanks_between[tree.tag] + except KeyError: + for elem in tree: + if len(elem): + add_blank_lines(elem, blanks_before, blanks_between) + else: + last_elem = None + for elem in tree: + tag = elem.tag + if last_elem is not None and last_elem.tag != tag: + if tag in before and last_elem is not None: + e = last_elem.getiterator()[-1] + e.text = (e.text or "") + "\n" + else: + if tag in between: + e = last_elem.getiterator()[-1] + e.text = (e.text or "") + "\n" + if len(elem): + add_blank_lines(elem, blanks_before, blanks_between) + last_elem = elem + + +def demo(): + from itertools import islice + + # zip_path = find('corpora/toolbox.zip') + # lexicon = ToolboxData(ZipFilePathPointer(zip_path, 'toolbox/rotokas.dic')).parse() + file_path = find('corpora/toolbox/rotokas.dic') + lexicon = ToolboxData(file_path).parse() + print('first field in fourth record:') + print(lexicon[3][0].tag) + print(lexicon[3][0].text) + + print('\nfields in sequential order:') + for field in islice(lexicon.find('record'), 10): + print(field.tag, field.text) + + print('\nlx fields:') + for field in islice(lexicon.findall('record/lx'), 10): + print(field.text) + + settings = ToolboxSettings() + file_path = find('corpora/toolbox/MDF/MDF_AltH.typ') + settings.open(file_path) + # settings.open(ZipFilePathPointer(zip_path, entry='toolbox/MDF/MDF_AltH.typ')) + tree = settings.parse(unwrap=False, encoding='cp1252') + print(tree.find('expset/expMDF/rtfPageSetup/paperSize').text) + settings_tree = ElementTree(tree) + print(to_settings_string(settings_tree).encode('utf8')) + + +if __name__ == '__main__': + demo() diff --git a/venv.bak/lib/python3.7/site-packages/nltk/translate/__init__.py b/venv.bak/lib/python3.7/site-packages/nltk/translate/__init__.py new file mode 100644 index 0000000..f4d3a00 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/translate/__init__.py @@ -0,0 +1,25 @@ +# -*- coding: utf-8 -*- +# Natural Language Toolkit: Machine Translation +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Steven Bird , Tah Wei Hoon +# URL: +# For license information, see LICENSE.TXT + +""" +Experimental features for machine translation. +These interfaces are prone to change. +""" + +from nltk.translate.api import AlignedSent, Alignment, PhraseTable +from nltk.translate.ibm_model import IBMModel +from nltk.translate.ibm1 import IBMModel1 +from nltk.translate.ibm2 import IBMModel2 +from nltk.translate.ibm3 import IBMModel3 +from nltk.translate.ibm4 import IBMModel4 +from nltk.translate.ibm5 import IBMModel5 +from nltk.translate.bleu_score import sentence_bleu as bleu +from nltk.translate.ribes_score import sentence_ribes as ribes +from nltk.translate.meteor_score import meteor_score as meteor +from nltk.translate.metrics import alignment_error_rate +from nltk.translate.stack_decoder import StackDecoder diff --git a/venv.bak/lib/python3.7/site-packages/nltk/translate/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/translate/__pycache__/__init__.cpython-37.pyc new file mode 100644 index 0000000..a549d28 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/translate/__pycache__/__init__.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/translate/__pycache__/api.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/translate/__pycache__/api.cpython-37.pyc new file mode 100644 index 0000000..acb51e9 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/translate/__pycache__/api.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/translate/__pycache__/bleu_score.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/translate/__pycache__/bleu_score.cpython-37.pyc new file mode 100644 index 0000000..3ae8bd6 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/translate/__pycache__/bleu_score.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/translate/__pycache__/chrf_score.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/translate/__pycache__/chrf_score.cpython-37.pyc new file mode 100644 index 0000000..3bc7d7b Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/translate/__pycache__/chrf_score.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/translate/__pycache__/gale_church.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/translate/__pycache__/gale_church.cpython-37.pyc new file mode 100644 index 0000000..1099cdb Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/translate/__pycache__/gale_church.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/translate/__pycache__/gdfa.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/translate/__pycache__/gdfa.cpython-37.pyc new file mode 100644 index 0000000..d5b4713 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/translate/__pycache__/gdfa.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/translate/__pycache__/gleu_score.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/translate/__pycache__/gleu_score.cpython-37.pyc new file mode 100644 index 0000000..afb99f6 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/translate/__pycache__/gleu_score.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/translate/__pycache__/ibm1.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/translate/__pycache__/ibm1.cpython-37.pyc new file mode 100644 index 0000000..8145df7 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/translate/__pycache__/ibm1.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/translate/__pycache__/ibm2.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/translate/__pycache__/ibm2.cpython-37.pyc new file mode 100644 index 0000000..3a28cdc Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/translate/__pycache__/ibm2.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/translate/__pycache__/ibm3.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/translate/__pycache__/ibm3.cpython-37.pyc new file mode 100644 index 0000000..8a6e03d Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/translate/__pycache__/ibm3.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/translate/__pycache__/ibm4.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/translate/__pycache__/ibm4.cpython-37.pyc new file mode 100644 index 0000000..70f004f Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/translate/__pycache__/ibm4.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/translate/__pycache__/ibm5.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/translate/__pycache__/ibm5.cpython-37.pyc new file mode 100644 index 0000000..6553e60 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/translate/__pycache__/ibm5.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/translate/__pycache__/ibm_model.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/translate/__pycache__/ibm_model.cpython-37.pyc new file mode 100644 index 0000000..483205a Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/translate/__pycache__/ibm_model.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/translate/__pycache__/meteor_score.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/translate/__pycache__/meteor_score.cpython-37.pyc new file mode 100644 index 0000000..8db2392 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/translate/__pycache__/meteor_score.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/translate/__pycache__/metrics.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/translate/__pycache__/metrics.cpython-37.pyc new file mode 100644 index 0000000..bded43f Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/translate/__pycache__/metrics.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/translate/__pycache__/nist_score.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/translate/__pycache__/nist_score.cpython-37.pyc new file mode 100644 index 0000000..23554a2 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/translate/__pycache__/nist_score.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/translate/__pycache__/phrase_based.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/translate/__pycache__/phrase_based.cpython-37.pyc new file mode 100644 index 0000000..61ed27d Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/translate/__pycache__/phrase_based.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/translate/__pycache__/ribes_score.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/translate/__pycache__/ribes_score.cpython-37.pyc new file mode 100644 index 0000000..8fd3a89 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/translate/__pycache__/ribes_score.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/translate/__pycache__/stack_decoder.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/translate/__pycache__/stack_decoder.cpython-37.pyc new file mode 100644 index 0000000..251e023 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/translate/__pycache__/stack_decoder.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/translate/api.py b/venv.bak/lib/python3.7/site-packages/nltk/translate/api.py new file mode 100644 index 0000000..b889410 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/translate/api.py @@ -0,0 +1,339 @@ +# Natural Language Toolkit: API for alignment and translation objects +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Will Zhang +# Guan Gui +# Steven Bird +# Tah Wei Hoon +# URL: +# For license information, see LICENSE.TXT + +from __future__ import print_function, unicode_literals +import subprocess +from collections import namedtuple + +from nltk.compat import python_2_unicode_compatible + + +@python_2_unicode_compatible +class AlignedSent(object): + """ + Return an aligned sentence object, which encapsulates two sentences + along with an ``Alignment`` between them. + + Typically used in machine translation to represent a sentence and + its translation. + + >>> from nltk.translate import AlignedSent, Alignment + >>> algnsent = AlignedSent(['klein', 'ist', 'das', 'Haus'], + ... ['the', 'house', 'is', 'small'], Alignment.fromstring('0-3 1-2 2-0 3-1')) + >>> algnsent.words + ['klein', 'ist', 'das', 'Haus'] + >>> algnsent.mots + ['the', 'house', 'is', 'small'] + >>> algnsent.alignment + Alignment([(0, 3), (1, 2), (2, 0), (3, 1)]) + >>> from nltk.corpus import comtrans + >>> print(comtrans.aligned_sents()[54]) + 'So why should EU arm...'> + >>> print(comtrans.aligned_sents()[54].alignment) + 0-0 0-1 1-0 2-2 3-4 3-5 4-7 5-8 6-3 7-9 8-9 9-10 9-11 10-12 11-6 12-6 13-13 + + :param words: Words in the target language sentence + :type words: list(str) + :param mots: Words in the source language sentence + :type mots: list(str) + :param alignment: Word-level alignments between ``words`` and ``mots``. + Each alignment is represented as a 2-tuple (words_index, mots_index). + :type alignment: Alignment + """ + + def __init__(self, words, mots, alignment=None): + self._words = words + self._mots = mots + if alignment is None: + self.alignment = Alignment([]) + else: + assert type(alignment) is Alignment + self.alignment = alignment + + @property + def words(self): + return self._words + + @property + def mots(self): + return self._mots + + def _get_alignment(self): + return self._alignment + + def _set_alignment(self, alignment): + _check_alignment(len(self.words), len(self.mots), alignment) + self._alignment = alignment + + alignment = property(_get_alignment, _set_alignment) + + def __repr__(self): + """ + Return a string representation for this ``AlignedSent``. + + :rtype: str + """ + words = "[%s]" % (", ".join("'%s'" % w for w in self._words)) + mots = "[%s]" % (", ".join("'%s'" % w for w in self._mots)) + + return "AlignedSent(%s, %s, %r)" % (words, mots, self._alignment) + + def _to_dot(self): + """ + Dot representation of the aligned sentence + """ + s = 'graph align {\n' + s += 'node[shape=plaintext]\n' + + # Declare node + for w in self._words: + s += '"%s_source" [label="%s"] \n' % (w, w) + + for w in self._mots: + s += '"%s_target" [label="%s"] \n' % (w, w) + + # Alignment + for u, v in self._alignment: + s += '"%s_source" -- "%s_target" \n' % (self._words[u], self._mots[v]) + + # Connect the source words + for i in range(len(self._words) - 1): + s += '"%s_source" -- "%s_source" [style=invis]\n' % ( + self._words[i], + self._words[i + 1], + ) + + # Connect the target words + for i in range(len(self._mots) - 1): + s += '"%s_target" -- "%s_target" [style=invis]\n' % ( + self._mots[i], + self._mots[i + 1], + ) + + # Put it in the same rank + s += '{rank = same; %s}\n' % (' '.join('"%s_source"' % w for w in self._words)) + s += '{rank = same; %s}\n' % (' '.join('"%s_target"' % w for w in self._mots)) + + s += '}' + + return s + + def _repr_svg_(self): + """ + Ipython magic : show SVG representation of this ``AlignedSent``. + """ + dot_string = self._to_dot().encode('utf8') + output_format = 'svg' + try: + process = subprocess.Popen( + ['dot', '-T%s' % output_format], + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) + except OSError: + raise Exception('Cannot find the dot binary from Graphviz package') + out, err = process.communicate(dot_string) + + return out.decode('utf8') + + def __str__(self): + """ + Return a human-readable string representation for this ``AlignedSent``. + + :rtype: str + """ + source = " ".join(self._words)[:20] + "..." + target = " ".join(self._mots)[:20] + "..." + return " '%s'>" % (source, target) + + def invert(self): + """ + Return the aligned sentence pair, reversing the directionality + + :rtype: AlignedSent + """ + return AlignedSent(self._mots, self._words, self._alignment.invert()) + + +@python_2_unicode_compatible +class Alignment(frozenset): + """ + A storage class for representing alignment between two sequences, s1, s2. + In general, an alignment is a set of tuples of the form (i, j, ...) + representing an alignment between the i-th element of s1 and the + j-th element of s2. Tuples are extensible (they might contain + additional data, such as a boolean to indicate sure vs possible alignments). + + >>> from nltk.translate import Alignment + >>> a = Alignment([(0, 0), (0, 1), (1, 2), (2, 2)]) + >>> a.invert() + Alignment([(0, 0), (1, 0), (2, 1), (2, 2)]) + >>> print(a.invert()) + 0-0 1-0 2-1 2-2 + >>> a[0] + [(0, 1), (0, 0)] + >>> a.invert()[2] + [(2, 1), (2, 2)] + >>> b = Alignment([(0, 0), (0, 1)]) + >>> b.issubset(a) + True + >>> c = Alignment.fromstring('0-0 0-1') + >>> b == c + True + """ + + def __new__(cls, pairs): + self = frozenset.__new__(cls, pairs) + self._len = max(p[0] for p in self) if self != frozenset([]) else 0 + self._index = None + return self + + @classmethod + def fromstring(cls, s): + """ + Read a giza-formatted string and return an Alignment object. + + >>> Alignment.fromstring('0-0 2-1 9-2 21-3 10-4 7-5') + Alignment([(0, 0), (2, 1), (7, 5), (9, 2), (10, 4), (21, 3)]) + + :type s: str + :param s: the positional alignments in giza format + :rtype: Alignment + :return: An Alignment object corresponding to the string representation ``s``. + """ + + return Alignment([_giza2pair(a) for a in s.split()]) + + def __getitem__(self, key): + """ + Look up the alignments that map from a given index or slice. + """ + if not self._index: + self._build_index() + return self._index.__getitem__(key) + + def invert(self): + """ + Return an Alignment object, being the inverted mapping. + """ + return Alignment(((p[1], p[0]) + p[2:]) for p in self) + + def range(self, positions=None): + """ + Work out the range of the mapping from the given positions. + If no positions are specified, compute the range of the entire mapping. + """ + image = set() + if not self._index: + self._build_index() + if not positions: + positions = list(range(len(self._index))) + for p in positions: + image.update(f for _, f in self._index[p]) + return sorted(image) + + def __repr__(self): + """ + Produce a Giza-formatted string representing the alignment. + """ + return "Alignment(%r)" % sorted(self) + + def __str__(self): + """ + Produce a Giza-formatted string representing the alignment. + """ + return " ".join("%d-%d" % p[:2] for p in sorted(self)) + + def _build_index(self): + """ + Build a list self._index such that self._index[i] is a list + of the alignments originating from word i. + """ + self._index = [[] for _ in range(self._len + 1)] + for p in self: + self._index[p[0]].append(p) + + +def _giza2pair(pair_string): + i, j = pair_string.split("-") + return int(i), int(j) + + +def _naacl2pair(pair_string): + i, j, p = pair_string.split("-") + return int(i), int(j) + + +def _check_alignment(num_words, num_mots, alignment): + """ + Check whether the alignments are legal. + + :param num_words: the number of source language words + :type num_words: int + :param num_mots: the number of target language words + :type num_mots: int + :param alignment: alignment to be checked + :type alignment: Alignment + :raise IndexError: if alignment falls outside the sentence + """ + + assert type(alignment) is Alignment + + if not all(0 <= pair[0] < num_words for pair in alignment): + raise IndexError("Alignment is outside boundary of words") + if not all(pair[1] is None or 0 <= pair[1] < num_mots for pair in alignment): + raise IndexError("Alignment is outside boundary of mots") + + +PhraseTableEntry = namedtuple('PhraseTableEntry', ['trg_phrase', 'log_prob']) + + +class PhraseTable(object): + """ + In-memory store of translations for a given phrase, and the log + probability of the those translations + """ + + def __init__(self): + self.src_phrases = dict() + + def translations_for(self, src_phrase): + """ + Get the translations for a source language phrase + + :param src_phrase: Source language phrase of interest + :type src_phrase: tuple(str) + + :return: A list of target language phrases that are translations + of ``src_phrase``, ordered in decreasing order of + likelihood. Each list element is a tuple of the target + phrase and its log probability. + :rtype: list(PhraseTableEntry) + """ + return self.src_phrases[src_phrase] + + def add(self, src_phrase, trg_phrase, log_prob): + """ + :type src_phrase: tuple(str) + :type trg_phrase: tuple(str) + + :param log_prob: Log probability that given ``src_phrase``, + ``trg_phrase`` is its translation + :type log_prob: float + """ + entry = PhraseTableEntry(trg_phrase=trg_phrase, log_prob=log_prob) + if src_phrase not in self.src_phrases: + self.src_phrases[src_phrase] = [] + self.src_phrases[src_phrase].append(entry) + self.src_phrases[src_phrase].sort(key=lambda e: e.log_prob, reverse=True) + + def __contains__(self, src_phrase): + return src_phrase in self.src_phrases diff --git a/venv.bak/lib/python3.7/site-packages/nltk/translate/bleu_score.py b/venv.bak/lib/python3.7/site-packages/nltk/translate/bleu_score.py new file mode 100644 index 0000000..b81c360 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/translate/bleu_score.py @@ -0,0 +1,642 @@ +# -*- coding: utf-8 -*- +# Natural Language Toolkit: BLEU Score +# +# Copyright (C) 2001-2019 NLTK Project +# Authors: Chin Yee Lee, Hengfeng Li, Ruxin Hou, Calvin Tanujaya Lim +# Contributors: Björn Mattsson, Dmitrijs Milajevs, Liling Tan +# URL: +# For license information, see LICENSE.TXT + +"""BLEU score implementation.""" +from __future__ import division + +import math +import sys +import fractions +import warnings +from collections import Counter + +from nltk.util import ngrams + +try: + fractions.Fraction(0, 1000, _normalize=False) + from fractions import Fraction +except TypeError: + from nltk.compat import Fraction + + +def sentence_bleu( + references, + hypothesis, + weights=(0.25, 0.25, 0.25, 0.25), + smoothing_function=None, + auto_reweigh=False, +): + """ + Calculate BLEU score (Bilingual Evaluation Understudy) from + Papineni, Kishore, Salim Roukos, Todd Ward, and Wei-Jing Zhu. 2002. + "BLEU: a method for automatic evaluation of machine translation." + In Proceedings of ACL. http://www.aclweb.org/anthology/P02-1040.pdf + + >>> hypothesis1 = ['It', 'is', 'a', 'guide', 'to', 'action', 'which', + ... 'ensures', 'that', 'the', 'military', 'always', + ... 'obeys', 'the', 'commands', 'of', 'the', 'party'] + + >>> hypothesis2 = ['It', 'is', 'to', 'insure', 'the', 'troops', + ... 'forever', 'hearing', 'the', 'activity', 'guidebook', + ... 'that', 'party', 'direct'] + + >>> reference1 = ['It', 'is', 'a', 'guide', 'to', 'action', 'that', + ... 'ensures', 'that', 'the', 'military', 'will', 'forever', + ... 'heed', 'Party', 'commands'] + + >>> reference2 = ['It', 'is', 'the', 'guiding', 'principle', 'which', + ... 'guarantees', 'the', 'military', 'forces', 'always', + ... 'being', 'under', 'the', 'command', 'of', 'the', + ... 'Party'] + + >>> reference3 = ['It', 'is', 'the', 'practical', 'guide', 'for', 'the', + ... 'army', 'always', 'to', 'heed', 'the', 'directions', + ... 'of', 'the', 'party'] + + >>> sentence_bleu([reference1, reference2, reference3], hypothesis1) # doctest: +ELLIPSIS + 0.5045... + + If there is no ngrams overlap for any order of n-grams, BLEU returns the + value 0. This is because the precision for the order of n-grams without + overlap is 0, and the geometric mean in the final BLEU score computation + multiplies the 0 with the precision of other n-grams. This results in 0 + (independently of the precision of the othe n-gram orders). The following + example has zero 3-gram and 4-gram overlaps: + + >>> round(sentence_bleu([reference1, reference2, reference3], hypothesis2),4) # doctest: +ELLIPSIS + 0.0 + + To avoid this harsh behaviour when no ngram overlaps are found a smoothing + function can be used. + + >>> chencherry = SmoothingFunction() + >>> sentence_bleu([reference1, reference2, reference3], hypothesis2, + ... smoothing_function=chencherry.method1) # doctest: +ELLIPSIS + 0.0370... + + The default BLEU calculates a score for up to 4-grams using uniform + weights (this is called BLEU-4). To evaluate your translations with + higher/lower order ngrams, use customized weights. E.g. when accounting + for up to 5-grams with uniform weights (this is called BLEU-5) use: + + >>> weights = (1./5., 1./5., 1./5., 1./5., 1./5.) + >>> sentence_bleu([reference1, reference2, reference3], hypothesis1, weights) # doctest: +ELLIPSIS + 0.3920... + + :param references: reference sentences + :type references: list(list(str)) + :param hypothesis: a hypothesis sentence + :type hypothesis: list(str) + :param weights: weights for unigrams, bigrams, trigrams and so on + :type weights: list(float) + :param smoothing_function: + :type smoothing_function: SmoothingFunction + :param auto_reweigh: Option to re-normalize the weights uniformly. + :type auto_reweigh: bool + :return: The sentence-level BLEU score. + :rtype: float + """ + return corpus_bleu( + [references], [hypothesis], weights, smoothing_function, auto_reweigh + ) + + +def corpus_bleu( + list_of_references, + hypotheses, + weights=(0.25, 0.25, 0.25, 0.25), + smoothing_function=None, + auto_reweigh=False, +): + """ + Calculate a single corpus-level BLEU score (aka. system-level BLEU) for all + the hypotheses and their respective references. + + Instead of averaging the sentence level BLEU scores (i.e. marco-average + precision), the original BLEU metric (Papineni et al. 2002) accounts for + the micro-average precision (i.e. summing the numerators and denominators + for each hypothesis-reference(s) pairs before the division). + + >>> hyp1 = ['It', 'is', 'a', 'guide', 'to', 'action', 'which', + ... 'ensures', 'that', 'the', 'military', 'always', + ... 'obeys', 'the', 'commands', 'of', 'the', 'party'] + >>> ref1a = ['It', 'is', 'a', 'guide', 'to', 'action', 'that', + ... 'ensures', 'that', 'the', 'military', 'will', 'forever', + ... 'heed', 'Party', 'commands'] + >>> ref1b = ['It', 'is', 'the', 'guiding', 'principle', 'which', + ... 'guarantees', 'the', 'military', 'forces', 'always', + ... 'being', 'under', 'the', 'command', 'of', 'the', 'Party'] + >>> ref1c = ['It', 'is', 'the', 'practical', 'guide', 'for', 'the', + ... 'army', 'always', 'to', 'heed', 'the', 'directions', + ... 'of', 'the', 'party'] + + >>> hyp2 = ['he', 'read', 'the', 'book', 'because', 'he', 'was', + ... 'interested', 'in', 'world', 'history'] + >>> ref2a = ['he', 'was', 'interested', 'in', 'world', 'history', + ... 'because', 'he', 'read', 'the', 'book'] + + >>> list_of_references = [[ref1a, ref1b, ref1c], [ref2a]] + >>> hypotheses = [hyp1, hyp2] + >>> corpus_bleu(list_of_references, hypotheses) # doctest: +ELLIPSIS + 0.5920... + + The example below show that corpus_bleu() is different from averaging + sentence_bleu() for hypotheses + + >>> score1 = sentence_bleu([ref1a, ref1b, ref1c], hyp1) + >>> score2 = sentence_bleu([ref2a], hyp2) + >>> (score1 + score2) / 2 # doctest: +ELLIPSIS + 0.6223... + + :param list_of_references: a corpus of lists of reference sentences, w.r.t. hypotheses + :type list_of_references: list(list(list(str))) + :param hypotheses: a list of hypothesis sentences + :type hypotheses: list(list(str)) + :param weights: weights for unigrams, bigrams, trigrams and so on + :type weights: list(float) + :param smoothing_function: + :type smoothing_function: SmoothingFunction + :param auto_reweigh: Option to re-normalize the weights uniformly. + :type auto_reweigh: bool + :return: The corpus-level BLEU score. + :rtype: float + """ + # Before proceeding to compute BLEU, perform sanity checks. + + p_numerators = Counter() # Key = ngram order, and value = no. of ngram matches. + p_denominators = Counter() # Key = ngram order, and value = no. of ngram in ref. + hyp_lengths, ref_lengths = 0, 0 + + assert len(list_of_references) == len(hypotheses), ( + "The number of hypotheses and their reference(s) should be the " "same " + ) + + # Iterate through each hypothesis and their corresponding references. + for references, hypothesis in zip(list_of_references, hypotheses): + # For each order of ngram, calculate the numerator and + # denominator for the corpus-level modified precision. + for i, _ in enumerate(weights, start=1): + p_i = modified_precision(references, hypothesis, i) + p_numerators[i] += p_i.numerator + p_denominators[i] += p_i.denominator + + # Calculate the hypothesis length and the closest reference length. + # Adds them to the corpus-level hypothesis and reference counts. + hyp_len = len(hypothesis) + hyp_lengths += hyp_len + ref_lengths += closest_ref_length(references, hyp_len) + + # Calculate corpus-level brevity penalty. + bp = brevity_penalty(ref_lengths, hyp_lengths) + + # Uniformly re-weighting based on maximum hypothesis lengths if largest + # order of n-grams < 4 and weights is set at default. + if auto_reweigh: + if hyp_lengths < 4 and weights == (0.25, 0.25, 0.25, 0.25): + weights = (1 / hyp_lengths,) * hyp_lengths + + # Collects the various precision values for the different ngram orders. + p_n = [ + Fraction(p_numerators[i], p_denominators[i], _normalize=False) + for i, _ in enumerate(weights, start=1) + ] + + # Returns 0 if there's no matching n-grams + # We only need to check for p_numerators[1] == 0, since if there's + # no unigrams, there won't be any higher order ngrams. + if p_numerators[1] == 0: + return 0 + + # If there's no smoothing, set use method0 from SmoothinFunction class. + if not smoothing_function: + smoothing_function = SmoothingFunction().method0 + # Smoothen the modified precision. + # Note: smoothing_function() may convert values into floats; + # it tries to retain the Fraction object as much as the + # smoothing method allows. + p_n = smoothing_function( + p_n, references=references, hypothesis=hypothesis, hyp_len=hyp_lengths + ) + s = (w_i * math.log(p_i) for w_i, p_i in zip(weights, p_n)) + s = bp * math.exp(math.fsum(s)) + return s + + +def modified_precision(references, hypothesis, n): + """ + Calculate modified ngram precision. + + The normal precision method may lead to some wrong translations with + high-precision, e.g., the translation, in which a word of reference + repeats several times, has very high precision. + + This function only returns the Fraction object that contains the numerator + and denominator necessary to calculate the corpus-level precision. + To calculate the modified precision for a single pair of hypothesis and + references, cast the Fraction object into a float. + + The famous "the the the ... " example shows that you can get BLEU precision + by duplicating high frequency words. + + >>> reference1 = 'the cat is on the mat'.split() + >>> reference2 = 'there is a cat on the mat'.split() + >>> hypothesis1 = 'the the the the the the the'.split() + >>> references = [reference1, reference2] + >>> float(modified_precision(references, hypothesis1, n=1)) # doctest: +ELLIPSIS + 0.2857... + + In the modified n-gram precision, a reference word will be considered + exhausted after a matching hypothesis word is identified, e.g. + + >>> reference1 = ['It', 'is', 'a', 'guide', 'to', 'action', 'that', + ... 'ensures', 'that', 'the', 'military', 'will', + ... 'forever', 'heed', 'Party', 'commands'] + >>> reference2 = ['It', 'is', 'the', 'guiding', 'principle', 'which', + ... 'guarantees', 'the', 'military', 'forces', 'always', + ... 'being', 'under', 'the', 'command', 'of', 'the', + ... 'Party'] + >>> reference3 = ['It', 'is', 'the', 'practical', 'guide', 'for', 'the', + ... 'army', 'always', 'to', 'heed', 'the', 'directions', + ... 'of', 'the', 'party'] + >>> hypothesis = 'of the'.split() + >>> references = [reference1, reference2, reference3] + >>> float(modified_precision(references, hypothesis, n=1)) + 1.0 + >>> float(modified_precision(references, hypothesis, n=2)) + 1.0 + + An example of a normal machine translation hypothesis: + + >>> hypothesis1 = ['It', 'is', 'a', 'guide', 'to', 'action', 'which', + ... 'ensures', 'that', 'the', 'military', 'always', + ... 'obeys', 'the', 'commands', 'of', 'the', 'party'] + + >>> hypothesis2 = ['It', 'is', 'to', 'insure', 'the', 'troops', + ... 'forever', 'hearing', 'the', 'activity', 'guidebook', + ... 'that', 'party', 'direct'] + + >>> reference1 = ['It', 'is', 'a', 'guide', 'to', 'action', 'that', + ... 'ensures', 'that', 'the', 'military', 'will', + ... 'forever', 'heed', 'Party', 'commands'] + + >>> reference2 = ['It', 'is', 'the', 'guiding', 'principle', 'which', + ... 'guarantees', 'the', 'military', 'forces', 'always', + ... 'being', 'under', 'the', 'command', 'of', 'the', + ... 'Party'] + + >>> reference3 = ['It', 'is', 'the', 'practical', 'guide', 'for', 'the', + ... 'army', 'always', 'to', 'heed', 'the', 'directions', + ... 'of', 'the', 'party'] + >>> references = [reference1, reference2, reference3] + >>> float(modified_precision(references, hypothesis1, n=1)) # doctest: +ELLIPSIS + 0.9444... + >>> float(modified_precision(references, hypothesis2, n=1)) # doctest: +ELLIPSIS + 0.5714... + >>> float(modified_precision(references, hypothesis1, n=2)) # doctest: +ELLIPSIS + 0.5882352941176471 + >>> float(modified_precision(references, hypothesis2, n=2)) # doctest: +ELLIPSIS + 0.07692... + + + :param references: A list of reference translations. + :type references: list(list(str)) + :param hypothesis: A hypothesis translation. + :type hypothesis: list(str) + :param n: The ngram order. + :type n: int + :return: BLEU's modified precision for the nth order ngram. + :rtype: Fraction + """ + # Extracts all ngrams in hypothesis + # Set an empty Counter if hypothesis is empty. + counts = Counter(ngrams(hypothesis, n)) if len(hypothesis) >= n else Counter() + # Extract a union of references' counts. + # max_counts = reduce(or_, [Counter(ngrams(ref, n)) for ref in references]) + max_counts = {} + for reference in references: + reference_counts = ( + Counter(ngrams(reference, n)) if len(reference) >= n else Counter() + ) + for ngram in counts: + max_counts[ngram] = max(max_counts.get(ngram, 0), reference_counts[ngram]) + + # Assigns the intersection between hypothesis and references' counts. + clipped_counts = { + ngram: min(count, max_counts[ngram]) for ngram, count in counts.items() + } + + numerator = sum(clipped_counts.values()) + # Ensures that denominator is minimum 1 to avoid ZeroDivisionError. + # Usually this happens when the ngram order is > len(reference). + denominator = max(1, sum(counts.values())) + + return Fraction(numerator, denominator, _normalize=False) + + +def closest_ref_length(references, hyp_len): + """ + This function finds the reference that is the closest length to the + hypothesis. The closest reference length is referred to as *r* variable + from the brevity penalty formula in Papineni et. al. (2002) + + :param references: A list of reference translations. + :type references: list(list(str)) + :param hyp_len: The length of the hypothesis. + :type hyp_len: int + :return: The length of the reference that's closest to the hypothesis. + :rtype: int + """ + ref_lens = (len(reference) for reference in references) + closest_ref_len = min( + ref_lens, key=lambda ref_len: (abs(ref_len - hyp_len), ref_len) + ) + return closest_ref_len + + +def brevity_penalty(closest_ref_len, hyp_len): + """ + Calculate brevity penalty. + + As the modified n-gram precision still has the problem from the short + length sentence, brevity penalty is used to modify the overall BLEU + score according to length. + + An example from the paper. There are three references with length 12, 15 + and 17. And a concise hypothesis of the length 12. The brevity penalty is 1. + + >>> reference1 = list('aaaaaaaaaaaa') # i.e. ['a'] * 12 + >>> reference2 = list('aaaaaaaaaaaaaaa') # i.e. ['a'] * 15 + >>> reference3 = list('aaaaaaaaaaaaaaaaa') # i.e. ['a'] * 17 + >>> hypothesis = list('aaaaaaaaaaaa') # i.e. ['a'] * 12 + >>> references = [reference1, reference2, reference3] + >>> hyp_len = len(hypothesis) + >>> closest_ref_len = closest_ref_length(references, hyp_len) + >>> brevity_penalty(closest_ref_len, hyp_len) + 1.0 + + In case a hypothesis translation is shorter than the references, penalty is + applied. + + >>> references = [['a'] * 28, ['a'] * 28] + >>> hypothesis = ['a'] * 12 + >>> hyp_len = len(hypothesis) + >>> closest_ref_len = closest_ref_length(references, hyp_len) + >>> brevity_penalty(closest_ref_len, hyp_len) + 0.2635971381157267 + + The length of the closest reference is used to compute the penalty. If the + length of a hypothesis is 12, and the reference lengths are 13 and 2, the + penalty is applied because the hypothesis length (12) is less then the + closest reference length (13). + + >>> references = [['a'] * 13, ['a'] * 2] + >>> hypothesis = ['a'] * 12 + >>> hyp_len = len(hypothesis) + >>> closest_ref_len = closest_ref_length(references, hyp_len) + >>> brevity_penalty(closest_ref_len, hyp_len) # doctest: +ELLIPSIS + 0.9200... + + The brevity penalty doesn't depend on reference order. More importantly, + when two reference sentences are at the same distance, the shortest + reference sentence length is used. + + >>> references = [['a'] * 13, ['a'] * 11] + >>> hypothesis = ['a'] * 12 + >>> hyp_len = len(hypothesis) + >>> closest_ref_len = closest_ref_length(references, hyp_len) + >>> bp1 = brevity_penalty(closest_ref_len, hyp_len) + >>> hyp_len = len(hypothesis) + >>> closest_ref_len = closest_ref_length(reversed(references), hyp_len) + >>> bp2 = brevity_penalty(closest_ref_len, hyp_len) + >>> bp1 == bp2 == 1 + True + + A test example from mteval-v13a.pl (starting from the line 705): + + >>> references = [['a'] * 11, ['a'] * 8] + >>> hypothesis = ['a'] * 7 + >>> hyp_len = len(hypothesis) + >>> closest_ref_len = closest_ref_length(references, hyp_len) + >>> brevity_penalty(closest_ref_len, hyp_len) # doctest: +ELLIPSIS + 0.8668... + + >>> references = [['a'] * 11, ['a'] * 8, ['a'] * 6, ['a'] * 7] + >>> hypothesis = ['a'] * 7 + >>> hyp_len = len(hypothesis) + >>> closest_ref_len = closest_ref_length(references, hyp_len) + >>> brevity_penalty(closest_ref_len, hyp_len) + 1.0 + + :param hyp_len: The length of the hypothesis for a single sentence OR the + sum of all the hypotheses' lengths for a corpus + :type hyp_len: int + :param closest_ref_len: The length of the closest reference for a single + hypothesis OR the sum of all the closest references for every hypotheses. + :type closest_ref_len: int + :return: BLEU's brevity penalty. + :rtype: float + """ + if hyp_len > closest_ref_len: + return 1 + # If hypothesis is empty, brevity penalty = 0 should result in BLEU = 0.0 + elif hyp_len == 0: + return 0 + else: + return math.exp(1 - closest_ref_len / hyp_len) + + +class SmoothingFunction: + """ + This is an implementation of the smoothing techniques + for segment-level BLEU scores that was presented in + Boxing Chen and Collin Cherry (2014) A Systematic Comparison of + Smoothing Techniques for Sentence-Level BLEU. In WMT14. + http://acl2014.org/acl2014/W14-33/pdf/W14-3346.pdf + """ + + def __init__(self, epsilon=0.1, alpha=5, k=5): + """ + This will initialize the parameters required for the various smoothing + techniques, the default values are set to the numbers used in the + experiments from Chen and Cherry (2014). + + >>> hypothesis1 = ['It', 'is', 'a', 'guide', 'to', 'action', 'which', 'ensures', + ... 'that', 'the', 'military', 'always', 'obeys', 'the', + ... 'commands', 'of', 'the', 'party'] + >>> reference1 = ['It', 'is', 'a', 'guide', 'to', 'action', 'that', 'ensures', + ... 'that', 'the', 'military', 'will', 'forever', 'heed', + ... 'Party', 'commands'] + + >>> chencherry = SmoothingFunction() + >>> print (sentence_bleu([reference1], hypothesis1)) # doctest: +ELLIPSIS + 0.4118... + >>> print (sentence_bleu([reference1], hypothesis1, smoothing_function=chencherry.method0)) # doctest: +ELLIPSIS + 0.4118... + >>> print (sentence_bleu([reference1], hypothesis1, smoothing_function=chencherry.method1)) # doctest: +ELLIPSIS + 0.4118... + >>> print (sentence_bleu([reference1], hypothesis1, smoothing_function=chencherry.method2)) # doctest: +ELLIPSIS + 0.4489... + >>> print (sentence_bleu([reference1], hypothesis1, smoothing_function=chencherry.method3)) # doctest: +ELLIPSIS + 0.4118... + >>> print (sentence_bleu([reference1], hypothesis1, smoothing_function=chencherry.method4)) # doctest: +ELLIPSIS + 0.4118... + >>> print (sentence_bleu([reference1], hypothesis1, smoothing_function=chencherry.method5)) # doctest: +ELLIPSIS + 0.4905... + >>> print (sentence_bleu([reference1], hypothesis1, smoothing_function=chencherry.method6)) # doctest: +ELLIPSIS + 0.4135... + >>> print (sentence_bleu([reference1], hypothesis1, smoothing_function=chencherry.method7)) # doctest: +ELLIPSIS + 0.4905... + + :param epsilon: the epsilon value use in method 1 + :type epsilon: float + :param alpha: the alpha value use in method 6 + :type alpha: int + :param k: the k value use in method 4 + :type k: int + """ + self.epsilon = epsilon + self.alpha = alpha + self.k = k + + def method0(self, p_n, *args, **kwargs): + """ + No smoothing. + """ + p_n_new = [] + for i, p_i in enumerate(p_n): + if p_i.numerator != 0: + p_n_new.append(p_i) + else: + _msg = str( + "\nThe hypothesis contains 0 counts of {}-gram overlaps.\n" + "Therefore the BLEU score evaluates to 0, independently of\n" + "how many N-gram overlaps of lower order it contains.\n" + "Consider using lower n-gram order or use " + "SmoothingFunction()" + ).format(i + 1) + warnings.warn(_msg) + # When numerator==0 where denonminator==0 or !=0, the result + # for the precision score should be equal to 0 or undefined. + # Due to BLEU geometric mean computation in logarithm space, + # we we need to take the return sys.float_info.min such that + # math.log(sys.float_info.min) returns a 0 precision score. + p_n_new.append(sys.float_info.min) + return p_n_new + + def method1(self, p_n, *args, **kwargs): + """ + Smoothing method 1: Add *epsilon* counts to precision with 0 counts. + """ + return [ + (p_i.numerator + self.epsilon) / p_i.denominator + if p_i.numerator == 0 + else p_i + for p_i in p_n + ] + + def method2(self, p_n, *args, **kwargs): + """ + Smoothing method 2: Add 1 to both numerator and denominator from + Chin-Yew Lin and Franz Josef Och (2004) Automatic evaluation of + machine translation quality using longest common subsequence and + skip-bigram statistics. In ACL04. + """ + return [ + Fraction(p_i.numerator + 1, p_i.denominator + 1, _normalize=False) + for p_i in p_n + ] + + def method3(self, p_n, *args, **kwargs): + """ + Smoothing method 3: NIST geometric sequence smoothing + The smoothing is computed by taking 1 / ( 2^k ), instead of 0, for each + precision score whose matching n-gram count is null. + k is 1 for the first 'n' value for which the n-gram match count is null/ + For example, if the text contains: + - one 2-gram match + - and (consequently) two 1-gram matches + the n-gram count for each individual precision score would be: + - n=1 => prec_count = 2 (two unigrams) + - n=2 => prec_count = 1 (one bigram) + - n=3 => prec_count = 1/2 (no trigram, taking 'smoothed' value of 1 / ( 2^k ), with k=1) + - n=4 => prec_count = 1/4 (no fourgram, taking 'smoothed' value of 1 / ( 2^k ), with k=2) + """ + incvnt = 1 # From the mteval-v13a.pl, it's referred to as k. + for i, p_i in enumerate(p_n): + if p_i.numerator == 0: + p_n[i] = 1 / (2 ** incvnt * p_i.denominator) + incvnt += 1 + return p_n + + def method4(self, p_n, references, hypothesis, hyp_len, *args, **kwargs): + """ + Smoothing method 4: + Shorter translations may have inflated precision values due to having + smaller denominators; therefore, we give them proportionally + smaller smoothed counts. Instead of scaling to 1/(2^k), Chen and Cherry + suggests dividing by 1/ln(len(T)), where T is the length of the translation. + """ + for i, p_i in enumerate(p_n): + if p_i.numerator == 0 and hyp_len != 0: + incvnt = i + 1 * self.k / math.log( + hyp_len + ) # Note that this K is different from the K from NIST. + p_n[i] = 1 / incvnt + return p_n + + def method5(self, p_n, references, hypothesis, hyp_len, *args, **kwargs): + """ + Smoothing method 5: + The matched counts for similar values of n should be similar. To a + calculate the n-gram matched count, it averages the n−1, n and n+1 gram + matched counts. + """ + m = {} + # Requires an precision value for an addition ngram order. + p_n_plus1 = p_n + [modified_precision(references, hypothesis, 5)] + m[-1] = p_n[0] + 1 + for i, p_i in enumerate(p_n): + p_n[i] = (m[i - 1] + p_i + p_n_plus1[i + 1]) / 3 + m[i] = p_n[i] + return p_n + + def method6(self, p_n, references, hypothesis, hyp_len, *args, **kwargs): + """ + Smoothing method 6: + Interpolates the maximum likelihood estimate of the precision *p_n* with + a prior estimate *pi0*. The prior is estimated by assuming that the ratio + between pn and pn−1 will be the same as that between pn−1 and pn−2; from + Gao and He (2013) Training MRF-Based Phrase Translation Models using + Gradient Ascent. In NAACL. + """ + # This smoothing only works when p_1 and p_2 is non-zero. + # Raise an error with an appropriate message when the input is too short + # to use this smoothing technique. + assert p_n[2], "This smoothing method requires non-zero precision for bigrams." + for i, p_i in enumerate(p_n): + if i in [0, 1]: # Skips the first 2 orders of ngrams. + continue + else: + pi0 = 0 if p_n[i - 2] == 0 else p_n[i - 1] ** 2 / p_n[i - 2] + # No. of ngrams in translation that matches the reference. + m = p_i.numerator + # No. of ngrams in translation. + l = sum(1 for _ in ngrams(hypothesis, i + 1)) + # Calculates the interpolated precision. + p_n[i] = (m + self.alpha * pi0) / (l + self.alpha) + return p_n + + def method7(self, p_n, references, hypothesis, hyp_len, *args, **kwargs): + """ + Smoothing method 7: + Interpolates methods 5 and 6. + """ + p_n = self.method4(p_n, references, hypothesis, hyp_len) + p_n = self.method5(p_n, references, hypothesis, hyp_len) + return p_n diff --git a/venv.bak/lib/python3.7/site-packages/nltk/translate/chrf_score.py b/venv.bak/lib/python3.7/site-packages/nltk/translate/chrf_score.py new file mode 100644 index 0000000..f77a026 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/translate/chrf_score.py @@ -0,0 +1,224 @@ +# -*- coding: utf-8 -*- +# Natural Language Toolkit: ChrF score +# +# Copyright (C) 2001-2019 NLTK Project +# Authors: Maja Popovic +# Contributors: Liling Tan, Aleš Tamchyna (Memsource) +# URL: +# For license information, see LICENSE.TXT + +""" ChrF score implementation """ +from __future__ import division +from collections import Counter, defaultdict +import re + +from nltk.util import ngrams + + +def sentence_chrf( + reference, hypothesis, min_len=1, max_len=6, beta=3.0, ignore_whitespace=True +): + """ + Calculates the sentence level CHRF (Character n-gram F-score) described in + - Maja Popovic. 2015. CHRF: Character n-gram F-score for Automatic MT Evaluation. + In Proceedings of the 10th Workshop on Machine Translation. + http://www.statmt.org/wmt15/pdf/WMT49.pdf + - Maja Popovic. 2016. CHRF Deconstructed: β Parameters and n-gram Weights. + In Proceedings of the 1st Conference on Machine Translation. + http://www.statmt.org/wmt16/pdf/W16-2341.pdf + + This implementation of CHRF only supports a single reference at the moment. + + For details not reported in the paper, consult Maja Popovic's original + implementation: https://github.com/m-popovic/chrF + + The code should output results equivalent to running CHRF++ with the + following options: -nw 0 -b 3 + + An example from the original BLEU paper + http://www.aclweb.org/anthology/P02-1040.pdf + + >>> ref1 = str('It is a guide to action that ensures that the military ' + ... 'will forever heed Party commands').split() + >>> hyp1 = str('It is a guide to action which ensures that the military ' + ... 'always obeys the commands of the party').split() + >>> hyp2 = str('It is to insure the troops forever hearing the activity ' + ... 'guidebook that party direct').split() + >>> sentence_chrf(ref1, hyp1) # doctest: +ELLIPSIS + 0.6349... + >>> sentence_chrf(ref1, hyp2) # doctest: +ELLIPSIS + 0.3330... + + The infamous "the the the ... " example + + >>> ref = 'the cat is on the mat'.split() + >>> hyp = 'the the the the the the the'.split() + >>> sentence_chrf(ref, hyp) # doctest: +ELLIPSIS + 0.1468... + + An example to show that this function allows users to use strings instead of + tokens, i.e. list(str) as inputs. + + >>> ref1 = str('It is a guide to action that ensures that the military ' + ... 'will forever heed Party commands') + >>> hyp1 = str('It is a guide to action which ensures that the military ' + ... 'always obeys the commands of the party') + >>> sentence_chrf(ref1, hyp1) # doctest: +ELLIPSIS + 0.6349... + >>> type(ref1) == type(hyp1) == str + True + >>> sentence_chrf(ref1.split(), hyp1.split()) # doctest: +ELLIPSIS + 0.6349... + + To skip the unigrams and only use 2- to 3-grams: + + >>> sentence_chrf(ref1, hyp1, min_len=2, max_len=3) # doctest: +ELLIPSIS + 0.6617... + + :param references: reference sentence + :type references: list(str) / str + :param hypothesis: a hypothesis sentence + :type hypothesis: list(str) / str + :param min_len: The minimum order of n-gram this function should extract. + :type min_len: int + :param max_len: The maximum order of n-gram this function should extract. + :type max_len: int + :param beta: the parameter to assign more importance to recall over precision + :type beta: float + :param ignore_whitespace: ignore whitespace characters in scoring + :type ignore_whitespace: bool + :return: the sentence level CHRF score. + :rtype: float + """ + return corpus_chrf( + [reference], + [hypothesis], + min_len, + max_len, + beta=beta, + ignore_whitespace=ignore_whitespace, + ) + + +def _preprocess(sent, ignore_whitespace): + if type(sent) != str: + # turn list of tokens into a string + sent = ' '.join(sent) + + if ignore_whitespace: + sent = re.sub(r'\s+', '', sent) + return sent + + +def chrf_precision_recall_fscore_support( + reference, hypothesis, n, beta=3.0, epsilon=1e-16 +): + """ + This function computes the precision, recall and fscore from the ngram + overlaps. It returns the `support` which is the true positive score. + + By underspecifying the input type, the function will be agnostic as to how + it computes the ngrams and simply take the whichever element in the list; + it could be either token or character. + + :param reference: The reference sentence. + :type reference: list + :param hypothesis: The hypothesis sentence. + :type hypothesis: list + :param n: Extract up to the n-th order ngrams + :type n: int + :param beta: The parameter to assign more importance to recall over precision. + :type beta: float + :param epsilon: The fallback value if the hypothesis or reference is empty. + :type epsilon: float + :return: Returns the precision, recall and f-score and support (true positive). + :rtype: tuple(float) + """ + ref_ngrams = Counter(ngrams(reference, n)) + hyp_ngrams = Counter(ngrams(hypothesis, n)) + + # calculate the number of ngram matches + overlap_ngrams = ref_ngrams & hyp_ngrams + tp = sum(overlap_ngrams.values()) # True positives. + tpfp = sum(hyp_ngrams.values()) # True positives + False positives. + tpfn = sum(ref_ngrams.values()) # True positives + False negatives. + + try: + prec = tp / tpfp # precision + rec = tp / tpfn # recall + factor = beta ** 2 + fscore = (1 + factor) * (prec * rec) / (factor * prec + rec) + except ZeroDivisionError: + prec = rec = fscore = epsilon + return prec, rec, fscore, tp + + +def corpus_chrf( + references, hypotheses, min_len=1, max_len=6, beta=3.0, ignore_whitespace=True +): + """ + Calculates the corpus level CHRF (Character n-gram F-score), it is the + macro-averaged value of the sentence/segment level CHRF score. + + This implementation of CHRF only supports a single reference at the moment. + + >>> ref1 = str('It is a guide to action that ensures that the military ' + ... 'will forever heed Party commands').split() + >>> ref2 = str('It is the guiding principle which guarantees the military ' + ... 'forces always being under the command of the Party').split() + >>> + >>> hyp1 = str('It is a guide to action which ensures that the military ' + ... 'always obeys the commands of the party').split() + >>> hyp2 = str('It is to insure the troops forever hearing the activity ' + ... 'guidebook that party direct') + >>> corpus_chrf([ref1, ref2, ref1, ref2], [hyp1, hyp2, hyp2, hyp1]) # doctest: +ELLIPSIS + 0.3910... + + :param references: a corpus of list of reference sentences, w.r.t. hypotheses + :type references: list(list(str)) + :param hypotheses: a list of hypothesis sentences + :type hypotheses: list(list(str)) + :param min_len: The minimum order of n-gram this function should extract. + :type min_len: int + :param max_len: The maximum order of n-gram this function should extract. + :type max_len: int + :param beta: the parameter to assign more importance to recall over precision + :type beta: float + :param ignore_whitespace: ignore whitespace characters in scoring + :type ignore_whitespace: bool + :return: the sentence level CHRF score. + :rtype: float + """ + + assert len(references) == len( + hypotheses + ), "The number of hypotheses and their references should be the same" + num_sents = len(hypotheses) + + # Keep f-scores for each n-gram order separate + ngram_fscores = defaultdict(lambda: list()) + + # Iterate through each hypothesis and their corresponding references. + for reference, hypothesis in zip(references, hypotheses): + + # preprocess both reference and hypothesis + reference = _preprocess(reference, ignore_whitespace) + hypothesis = _preprocess(hypothesis, ignore_whitespace) + + # Calculate f-scores for each sentence and for each n-gram order + # separately. + for n in range(min_len, max_len + 1): + # Compute the precision, recall, fscore and support. + prec, rec, fscore, tp = chrf_precision_recall_fscore_support( + reference, hypothesis, n, beta=beta + ) + ngram_fscores[n].append(fscore) + + # how many n-gram sizes + num_ngram_sizes = len(ngram_fscores) + + # sum of f-scores over all sentences for each n-gram order + total_scores = [sum(fscores) for n, fscores in ngram_fscores.items()] + + # macro-average over n-gram orders and over all sentences + return (sum(total_scores) / num_ngram_sizes) / num_sents diff --git a/venv.bak/lib/python3.7/site-packages/nltk/translate/gale_church.py b/venv.bak/lib/python3.7/site-packages/nltk/translate/gale_church.py new file mode 100644 index 0000000..582951c --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/translate/gale_church.py @@ -0,0 +1,275 @@ +# -*- coding: utf-8 -*- + +# Natural Language Toolkit: Gale-Church Aligner +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Torsten Marek +# Contributor: Cassidy Laidlaw, Liling Tan +# URL: +# For license information, see LICENSE.TXT + +""" + +A port of the Gale-Church Aligner. + +Gale & Church (1993), A Program for Aligning Sentences in Bilingual Corpora. +http://aclweb.org/anthology/J93-1004.pdf + +""" + +from __future__ import division +import math + +try: + from scipy.stats import norm + from norm import logsf as norm_logsf +except ImportError: + + def erfcc(x): + """Complementary error function.""" + z = abs(x) + t = 1 / (1 + 0.5 * z) + r = t * math.exp( + -z * z + - 1.26551223 + + t + * ( + 1.00002368 + + t + * ( + 0.37409196 + + t + * ( + 0.09678418 + + t + * ( + -0.18628806 + + t + * ( + 0.27886807 + + t + * ( + -1.13520398 + + t + * (1.48851587 + t * (-0.82215223 + t * 0.17087277)) + ) + ) + ) + ) + ) + ) + ) + if x >= 0.0: + return r + else: + return 2.0 - r + + def norm_cdf(x): + """Return the area under the normal distribution from M{-∞..x}.""" + return 1 - 0.5 * erfcc(x / math.sqrt(2)) + + def norm_logsf(x): + try: + return math.log(1 - norm_cdf(x)) + except ValueError: + return float('-inf') + + +LOG2 = math.log(2) + + +class LanguageIndependent(object): + # These are the language-independent probabilities and parameters + # given in Gale & Church + + # for the computation, l_1 is always the language with less characters + PRIORS = { + (1, 0): 0.0099, + (0, 1): 0.0099, + (1, 1): 0.89, + (2, 1): 0.089, + (1, 2): 0.089, + (2, 2): 0.011, + } + + AVERAGE_CHARACTERS = 1 + VARIANCE_CHARACTERS = 6.8 + + +def trace(backlinks, source_sents_lens, target_sents_lens): + """ + Traverse the alignment cost from the tracebacks and retrieves + appropriate sentence pairs. + + :param backlinks: A dictionary where the key is the alignment points and value is the cost (referencing the LanguageIndependent.PRIORS) + :type backlinks: dict + :param source_sents_lens: A list of target sentences' lengths + :type source_sents_lens: list(int) + :param target_sents_lens: A list of target sentences' lengths + :type target_sents_lens: list(int) + """ + links = [] + position = (len(source_sents_lens), len(target_sents_lens)) + while position != (0, 0) and all(p >= 0 for p in position): + try: + s, t = backlinks[position] + except TypeError: + position = (position[0] - 1, position[1] - 1) + continue + for i in range(s): + for j in range(t): + links.append((position[0] - i - 1, position[1] - j - 1)) + position = (position[0] - s, position[1] - t) + + return links[::-1] + + +def align_log_prob(i, j, source_sents, target_sents, alignment, params): + """Returns the log probability of the two sentences C{source_sents[i]}, C{target_sents[j]} + being aligned with a specific C{alignment}. + + @param i: The offset of the source sentence. + @param j: The offset of the target sentence. + @param source_sents: The list of source sentence lengths. + @param target_sents: The list of target sentence lengths. + @param alignment: The alignment type, a tuple of two integers. + @param params: The sentence alignment parameters. + + @returns: The log probability of a specific alignment between the two sentences, given the parameters. + """ + l_s = sum(source_sents[i - offset - 1] for offset in range(alignment[0])) + l_t = sum(target_sents[j - offset - 1] for offset in range(alignment[1])) + try: + # actually, the paper says l_s * params.VARIANCE_CHARACTERS, this is based on the C + # reference implementation. With l_s in the denominator, insertions are impossible. + m = (l_s + l_t / params.AVERAGE_CHARACTERS) / 2 + delta = (l_s * params.AVERAGE_CHARACTERS - l_t) / math.sqrt( + m * params.VARIANCE_CHARACTERS + ) + except ZeroDivisionError: + return float('-inf') + + return -(LOG2 + norm_logsf(abs(delta)) + math.log(params.PRIORS[alignment])) + + +def align_blocks(source_sents_lens, target_sents_lens, params=LanguageIndependent): + """Return the sentence alignment of two text blocks (usually paragraphs). + + >>> align_blocks([5,5,5], [7,7,7]) + [(0, 0), (1, 1), (2, 2)] + >>> align_blocks([10,5,5], [12,20]) + [(0, 0), (1, 1), (2, 1)] + >>> align_blocks([12,20], [10,5,5]) + [(0, 0), (1, 1), (1, 2)] + >>> align_blocks([10,2,10,10,2,10], [12,3,20,3,12]) + [(0, 0), (1, 1), (2, 2), (3, 2), (4, 3), (5, 4)] + + @param source_sents_lens: The list of source sentence lengths. + @param target_sents_lens: The list of target sentence lengths. + @param params: the sentence alignment parameters. + @return: The sentence alignments, a list of index pairs. + """ + + alignment_types = list(params.PRIORS.keys()) + + # there are always three rows in the history (with the last of them being filled) + D = [[]] + + backlinks = {} + + for i in range(len(source_sents_lens) + 1): + for j in range(len(target_sents_lens) + 1): + min_dist = float('inf') + min_align = None + for a in alignment_types: + prev_i = -1 - a[0] + prev_j = j - a[1] + if prev_i < -len(D) or prev_j < 0: + continue + p = D[prev_i][prev_j] + align_log_prob( + i, j, source_sents_lens, target_sents_lens, a, params + ) + if p < min_dist: + min_dist = p + min_align = a + + if min_dist == float('inf'): + min_dist = 0 + + backlinks[(i, j)] = min_align + D[-1].append(min_dist) + + if len(D) > 2: + D.pop(0) + D.append([]) + + return trace(backlinks, source_sents_lens, target_sents_lens) + + +def align_texts(source_blocks, target_blocks, params=LanguageIndependent): + """Creates the sentence alignment of two texts. + + Texts can consist of several blocks. Block boundaries cannot be crossed by sentence + alignment links. + + Each block consists of a list that contains the lengths (in characters) of the sentences + in this block. + + @param source_blocks: The list of blocks in the source text. + @param target_blocks: The list of blocks in the target text. + @param params: the sentence alignment parameters. + + @returns: A list of sentence alignment lists + """ + if len(source_blocks) != len(target_blocks): + raise ValueError( + "Source and target texts do not have the same number of blocks." + ) + + return [ + align_blocks(source_block, target_block, params) + for source_block, target_block in zip(source_blocks, target_blocks) + ] + + +# File I/O functions; may belong in a corpus reader + + +def split_at(it, split_value): + """Splits an iterator C{it} at values of C{split_value}. + + Each instance of C{split_value} is swallowed. The iterator produces + subiterators which need to be consumed fully before the next subiterator + can be used. + """ + + def _chunk_iterator(first): + v = first + while v != split_value: + yield v + v = it.next() + + while True: + yield _chunk_iterator(it.next()) + + +def parse_token_stream(stream, soft_delimiter, hard_delimiter): + """Parses a stream of tokens and splits it into sentences (using C{soft_delimiter} tokens) + and blocks (using C{hard_delimiter} tokens) for use with the L{align_texts} function. + """ + return [ + [ + sum(len(token) for token in sentence_it) + for sentence_it in split_at(block_it, soft_delimiter) + ] + for block_it in split_at(stream, hard_delimiter) + ] + + +# Code for test files in nltk_contrib/align/data/*.tok +# import sys +# from contextlib import nested +# with nested(open(sys.argv[1], "r"), open(sys.argv[2], "r")) as (s, t): +# source = parse_token_stream((l.strip() for l in s), ".EOS", ".EOP") +# target = parse_token_stream((l.strip() for l in t), ".EOS", ".EOP") +# print align_texts(source, target) diff --git a/venv.bak/lib/python3.7/site-packages/nltk/translate/gdfa.py b/venv.bak/lib/python3.7/site-packages/nltk/translate/gdfa.py new file mode 100644 index 0000000..bdea805 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/translate/gdfa.py @@ -0,0 +1,139 @@ +# -*- coding: utf-8 -*- +# Natural Language Toolkit: GDFA word alignment symmetrization +# +# Copyright (C) 2001-2019 NLTK Project +# Authors: Liling Tan +# URL: +# For license information, see LICENSE.TXT + +from collections import defaultdict + + +def grow_diag_final_and(srclen, trglen, e2f, f2e): + """ + This module symmetrisatizes the source-to-target and target-to-source + word alignment output and produces, aka. GDFA algorithm (Koehn, 2005). + + Step 1: Find the intersection of the bidirectional alignment. + + Step 2: Search for additional neighbor alignment points to be added, given + these criteria: (i) neighbor alignments points are not in the + intersection and (ii) neighbor alignments are in the union. + + Step 3: Add all other alignment points thats not in the intersection, not in + the neighboring alignments that met the criteria but in the original + foward/backward alignment outputs. + + >>> forw = ('0-0 2-1 9-2 21-3 10-4 7-5 11-6 9-7 12-8 1-9 3-10 ' + ... '4-11 17-12 17-13 25-14 13-15 24-16 11-17 28-18') + >>> back = ('0-0 1-9 2-9 3-10 4-11 5-12 6-6 7-5 8-6 9-7 10-4 ' + ... '11-6 12-8 13-12 15-12 17-13 18-13 19-12 20-13 ' + ... '21-3 22-12 23-14 24-17 25-15 26-17 27-18 28-18') + >>> srctext = ("この よう な ハロー 白色 わい 星 の L 関数 " + ... "は L と 共 に 不連続 に 増加 する こと が " + ... "期待 さ れる こと を 示し た 。") + >>> trgtext = ("Therefore , we expect that the luminosity function " + ... "of such halo white dwarfs increases discontinuously " + ... "with the luminosity .") + >>> srclen = len(srctext.split()) + >>> trglen = len(trgtext.split()) + >>> + >>> gdfa = grow_diag_final_and(srclen, trglen, forw, back) + >>> gdfa == sorted(set([(28, 18), (6, 6), (24, 17), (2, 1), (15, 12), (13, 12), + ... (2, 9), (3, 10), (26, 17), (25, 15), (8, 6), (9, 7), (20, + ... 13), (18, 13), (0, 0), (10, 4), (13, 15), (23, 14), (7, 5), + ... (25, 14), (1, 9), (17, 13), (4, 11), (11, 17), (9, 2), (22, + ... 12), (27, 18), (24, 16), (21, 3), (19, 12), (17, 12), (5, + ... 12), (11, 6), (12, 8)])) + True + + References: + Koehn, P., A. Axelrod, A. Birch, C. Callison, M. Osborne, and D. Talbot. + 2005. Edinburgh System Description for the 2005 IWSLT Speech + Translation Evaluation. In MT Eval Workshop. + + :type srclen: int + :param srclen: the number of tokens in the source language + :type trglen: int + :param trglen: the number of tokens in the target language + :type e2f: str + :param e2f: the forward word alignment outputs from source-to-target + language (in pharaoh output format) + :type f2e: str + :param f2e: the backward word alignment outputs from target-to-source + language (in pharaoh output format) + :rtype: set(tuple(int)) + :return: the symmetrized alignment points from the GDFA algorithm + """ + + # Converts pharaoh text format into list of tuples. + e2f = [tuple(map(int, a.split('-'))) for a in e2f.split()] + f2e = [tuple(map(int, a.split('-'))) for a in f2e.split()] + + neighbors = [(-1, 0), (0, -1), (1, 0), (0, 1), (-1, -1), (-1, 1), (1, -1), (1, 1)] + alignment = set(e2f).intersection(set(f2e)) # Find the intersection. + union = set(e2f).union(set(f2e)) + + # *aligned* is used to check if neighbors are aligned in grow_diag() + aligned = defaultdict(set) + for i, j in alignment: + aligned['e'].add(i) + aligned['f'].add(j) + + def grow_diag(): + """ + Search for the neighbor points and them to the intersected alignment + points if criteria are met. + """ + prev_len = len(alignment) - 1 + # iterate until no new points added + while prev_len < len(alignment): + no_new_points = True + # for english word e = 0 ... en + for e in range(srclen): + # for foreign word f = 0 ... fn + for f in range(trglen): + # if ( e aligned with f) + if (e, f) in alignment: + # for each neighboring point (e-new, f-new) + for neighbor in neighbors: + neighbor = tuple(i + j for i, j in zip((e, f), neighbor)) + e_new, f_new = neighbor + # if ( ( e-new not aligned and f-new not aligned) + # and (e-new, f-new in union(e2f, f2e) ) + if ( + e_new not in aligned and f_new not in aligned + ) and neighbor in union: + alignment.add(neighbor) + aligned['e'].add(e_new) + aligned['f'].add(f_new) + prev_len += 1 + no_new_points = False + # iterate until no new points added + if no_new_points: + break + + def final_and(a): + """ + Adds remaining points that are not in the intersection, not in the + neighboring alignments but in the original *e2f* and *f2e* alignments + """ + # for english word e = 0 ... en + for e_new in range(srclen): + # for foreign word f = 0 ... fn + for f_new in range(trglen): + # if ( ( e-new not aligned and f-new not aligned) + # and (e-new, f-new in union(e2f, f2e) ) + if ( + e_new not in aligned + and f_new not in aligned + and (e_new, f_new) in union + ): + alignment.add((e_new, f_new)) + aligned['e'].add(e_new) + aligned['f'].add(f_new) + + grow_diag() + final_and(e2f) + final_and(f2e) + return sorted(alignment) diff --git a/venv.bak/lib/python3.7/site-packages/nltk/translate/gleu_score.py b/venv.bak/lib/python3.7/site-packages/nltk/translate/gleu_score.py new file mode 100644 index 0000000..43c3e99 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/translate/gleu_score.py @@ -0,0 +1,191 @@ +# -*- coding: utf-8 -*- +# Natural Language Toolkit: GLEU Score +# +# Copyright (C) 2001-2019 NLTK Project +# Authors: +# Contributors: Mike Schuster, Michael Wayne Goodman, Liling Tan +# URL: +# For license information, see LICENSE.TXT + +""" GLEU score implementation. """ +from __future__ import division +from collections import Counter + +from nltk.util import ngrams, everygrams + + +def sentence_gleu(references, hypothesis, min_len=1, max_len=4): + """ + Calculates the sentence level GLEU (Google-BLEU) score described in + + Yonghui Wu, Mike Schuster, Zhifeng Chen, Quoc V. Le, Mohammad Norouzi, + Wolfgang Macherey, Maxim Krikun, Yuan Cao, Qin Gao, Klaus Macherey, + Jeff Klingner, Apurva Shah, Melvin Johnson, Xiaobing Liu, Lukasz Kaiser, + Stephan Gouws, Yoshikiyo Kato, Taku Kudo, Hideto Kazawa, Keith Stevens, + George Kurian, Nishant Patil, Wei Wang, Cliff Young, Jason Smith, + Jason Riesa, Alex Rudnick, Oriol Vinyals, Greg Corrado, Macduff Hughes, + Jeffrey Dean. (2016) Google’s Neural Machine Translation System: + Bridging the Gap between Human and Machine Translation. + eprint arXiv:1609.08144. https://arxiv.org/pdf/1609.08144v2.pdf + Retrieved on 27 Oct 2016. + + From Wu et al. (2016): + "The BLEU score has some undesirable properties when used for single + sentences, as it was designed to be a corpus measure. We therefore + use a slightly different score for our RL experiments which we call + the 'GLEU score'. For the GLEU score, we record all sub-sequences of + 1, 2, 3 or 4 tokens in output and target sequence (n-grams). We then + compute a recall, which is the ratio of the number of matching n-grams + to the number of total n-grams in the target (ground truth) sequence, + and a precision, which is the ratio of the number of matching n-grams + to the number of total n-grams in the generated output sequence. Then + GLEU score is simply the minimum of recall and precision. This GLEU + score's range is always between 0 (no matches) and 1 (all match) and + it is symmetrical when switching output and target. According to + our experiments, GLEU score correlates quite well with the BLEU + metric on a corpus level but does not have its drawbacks for our per + sentence reward objective." + + Note: The initial implementation only allowed a single reference, but now + a list of references is required (which is consistent with + bleu_score.sentence_bleu()). + + The infamous "the the the ... " example + + >>> ref = 'the cat is on the mat'.split() + >>> hyp = 'the the the the the the the'.split() + >>> sentence_gleu([ref], hyp) # doctest: +ELLIPSIS + 0.0909... + + An example to evaluate normal machine translation outputs + + >>> ref1 = str('It is a guide to action that ensures that the military ' + ... 'will forever heed Party commands').split() + >>> hyp1 = str('It is a guide to action which ensures that the military ' + ... 'always obeys the commands of the party').split() + >>> hyp2 = str('It is to insure the troops forever hearing the activity ' + ... 'guidebook that party direct').split() + >>> sentence_gleu([ref1], hyp1) # doctest: +ELLIPSIS + 0.4393... + >>> sentence_gleu([ref1], hyp2) # doctest: +ELLIPSIS + 0.1206... + + :param references: a list of reference sentences + :type references: list(list(str)) + :param hypothesis: a hypothesis sentence + :type hypothesis: list(str) + :param min_len: The minimum order of n-gram this function should extract. + :type min_len: int + :param max_len: The maximum order of n-gram this function should extract. + :type max_len: int + :return: the sentence level GLEU score. + :rtype: float + """ + return corpus_gleu([references], [hypothesis], min_len=min_len, max_len=max_len) + + +def corpus_gleu(list_of_references, hypotheses, min_len=1, max_len=4): + """ + Calculate a single corpus-level GLEU score (aka. system-level GLEU) for all + the hypotheses and their respective references. + + Instead of averaging the sentence level GLEU scores (i.e. macro-average + precision), Wu et al. (2016) sum up the matching tokens and the max of + hypothesis and reference tokens for each sentence, then compute using the + aggregate values. + + From Mike Schuster (via email): + "For the corpus, we just add up the two statistics n_match and + n_all = max(n_all_output, n_all_target) for all sentences, then + calculate gleu_score = n_match / n_all, so it is not just a mean of + the sentence gleu scores (in our case, longer sentences count more, + which I think makes sense as they are more difficult to translate)." + + >>> hyp1 = ['It', 'is', 'a', 'guide', 'to', 'action', 'which', + ... 'ensures', 'that', 'the', 'military', 'always', + ... 'obeys', 'the', 'commands', 'of', 'the', 'party'] + >>> ref1a = ['It', 'is', 'a', 'guide', 'to', 'action', 'that', + ... 'ensures', 'that', 'the', 'military', 'will', 'forever', + ... 'heed', 'Party', 'commands'] + >>> ref1b = ['It', 'is', 'the', 'guiding', 'principle', 'which', + ... 'guarantees', 'the', 'military', 'forces', 'always', + ... 'being', 'under', 'the', 'command', 'of', 'the', 'Party'] + >>> ref1c = ['It', 'is', 'the', 'practical', 'guide', 'for', 'the', + ... 'army', 'always', 'to', 'heed', 'the', 'directions', + ... 'of', 'the', 'party'] + + >>> hyp2 = ['he', 'read', 'the', 'book', 'because', 'he', 'was', + ... 'interested', 'in', 'world', 'history'] + >>> ref2a = ['he', 'was', 'interested', 'in', 'world', 'history', + ... 'because', 'he', 'read', 'the', 'book'] + + >>> list_of_references = [[ref1a, ref1b, ref1c], [ref2a]] + >>> hypotheses = [hyp1, hyp2] + >>> corpus_gleu(list_of_references, hypotheses) # doctest: +ELLIPSIS + 0.5673... + + The example below show that corpus_gleu() is different from averaging + sentence_gleu() for hypotheses + + >>> score1 = sentence_gleu([ref1a], hyp1) + >>> score2 = sentence_gleu([ref2a], hyp2) + >>> (score1 + score2) / 2 # doctest: +ELLIPSIS + 0.6144... + + :param list_of_references: a list of reference sentences, w.r.t. hypotheses + :type list_of_references: list(list(list(str))) + :param hypotheses: a list of hypothesis sentences + :type hypotheses: list(list(str)) + :param min_len: The minimum order of n-gram this function should extract. + :type min_len: int + :param max_len: The maximum order of n-gram this function should extract. + :type max_len: int + :return: The corpus-level GLEU score. + :rtype: float + """ + # sanity check + assert len(list_of_references) == len( + hypotheses + ), "The number of hypotheses and their reference(s) should be the same" + + # sum matches and max-token-lengths over all sentences + corpus_n_match = 0 + corpus_n_all = 0 + + for references, hypothesis in zip(list_of_references, hypotheses): + hyp_ngrams = Counter(everygrams(hypothesis, min_len, max_len)) + tpfp = sum(hyp_ngrams.values()) # True positives + False positives. + + hyp_counts = [] + for reference in references: + ref_ngrams = Counter(everygrams(reference, min_len, max_len)) + tpfn = sum(ref_ngrams.values()) # True positives + False negatives. + + overlap_ngrams = ref_ngrams & hyp_ngrams + tp = sum(overlap_ngrams.values()) # True positives. + + # While GLEU is defined as the minimum of precision and + # recall, we can reduce the number of division operations by one by + # instead finding the maximum of the denominators for the precision + # and recall formulae, since the numerators are the same: + # precision = tp / tpfp + # recall = tp / tpfn + # gleu_score = min(precision, recall) == tp / max(tpfp, tpfn) + n_all = max(tpfp, tpfn) + + if n_all > 0: + hyp_counts.append((tp, n_all)) + + # use the reference yielding the highest score + if hyp_counts: + n_match, n_all = max(hyp_counts, key=lambda hc: hc[0] / hc[1]) + corpus_n_match += n_match + corpus_n_all += n_all + + # corner case: empty corpus or empty references---don't divide by zero! + if corpus_n_all == 0: + gleu_score = 0.0 + else: + gleu_score = corpus_n_match / corpus_n_all + + return gleu_score diff --git a/venv.bak/lib/python3.7/site-packages/nltk/translate/ibm1.py b/venv.bak/lib/python3.7/site-packages/nltk/translate/ibm1.py new file mode 100644 index 0000000..ff243fd --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/translate/ibm1.py @@ -0,0 +1,251 @@ +# -*- coding: utf-8 -*- +# Natural Language Toolkit: IBM Model 1 +# +# Copyright (C) 2001-2013 NLTK Project +# Author: Chin Yee Lee +# Hengfeng Li +# Ruxin Hou +# Calvin Tanujaya Lim +# Based on earlier version by: +# Will Zhang +# Guan Gui +# URL: +# For license information, see LICENSE.TXT + +""" +Lexical translation model that ignores word order. + +In IBM Model 1, word order is ignored for simplicity. As long as the +word alignments are equivalent, it doesn't matter where the word occurs +in the source or target sentence. Thus, the following three alignments +are equally likely. + +Source: je mange du jambon +Target: i eat some ham +Alignment: (0,0) (1,1) (2,2) (3,3) + +Source: je mange du jambon +Target: some ham eat i +Alignment: (0,2) (1,3) (2,1) (3,1) + +Source: du jambon je mange +Target: eat i some ham +Alignment: (0,3) (1,2) (2,0) (3,1) + +Note that an alignment is represented here as +(word_index_in_target, word_index_in_source). + +The EM algorithm used in Model 1 is: +E step - In the training data, count how many times a source language + word is translated into a target language word, weighted by + the prior probability of the translation. + +M step - Estimate the new probability of translation based on the + counts from the Expectation step. + + +Notations: +i: Position in the source sentence + Valid values are 0 (for NULL), 1, 2, ..., length of source sentence +j: Position in the target sentence + Valid values are 1, 2, ..., length of target sentence +s: A word in the source language +t: A word in the target language + + +References: +Philipp Koehn. 2010. Statistical Machine Translation. +Cambridge University Press, New York. + +Peter E Brown, Stephen A. Della Pietra, Vincent J. Della Pietra, and +Robert L. Mercer. 1993. The Mathematics of Statistical Machine +Translation: Parameter Estimation. Computational Linguistics, 19 (2), +263-311. +""" + +from __future__ import division +from collections import defaultdict +from nltk.translate import AlignedSent +from nltk.translate import Alignment +from nltk.translate import IBMModel +from nltk.translate.ibm_model import Counts +import warnings + + +class IBMModel1(IBMModel): + """ + Lexical translation model that ignores word order + + >>> bitext = [] + >>> bitext.append(AlignedSent(['klein', 'ist', 'das', 'haus'], ['the', 'house', 'is', 'small'])) + >>> bitext.append(AlignedSent(['das', 'haus', 'ist', 'ja', 'groß'], ['the', 'house', 'is', 'big'])) + >>> bitext.append(AlignedSent(['das', 'buch', 'ist', 'ja', 'klein'], ['the', 'book', 'is', 'small'])) + >>> bitext.append(AlignedSent(['das', 'haus'], ['the', 'house'])) + >>> bitext.append(AlignedSent(['das', 'buch'], ['the', 'book'])) + >>> bitext.append(AlignedSent(['ein', 'buch'], ['a', 'book'])) + + >>> ibm1 = IBMModel1(bitext, 5) + + >>> print(ibm1.translation_table['buch']['book']) + 0.889... + >>> print(ibm1.translation_table['das']['book']) + 0.061... + >>> print(ibm1.translation_table['buch'][None]) + 0.113... + >>> print(ibm1.translation_table['ja'][None]) + 0.072... + + >>> test_sentence = bitext[2] + >>> test_sentence.words + ['das', 'buch', 'ist', 'ja', 'klein'] + >>> test_sentence.mots + ['the', 'book', 'is', 'small'] + >>> test_sentence.alignment + Alignment([(0, 0), (1, 1), (2, 2), (3, 2), (4, 3)]) + + """ + + def __init__(self, sentence_aligned_corpus, iterations, probability_tables=None): + """ + Train on ``sentence_aligned_corpus`` and create a lexical + translation model. + + Translation direction is from ``AlignedSent.mots`` to + ``AlignedSent.words``. + + :param sentence_aligned_corpus: Sentence-aligned parallel corpus + :type sentence_aligned_corpus: list(AlignedSent) + + :param iterations: Number of iterations to run training algorithm + :type iterations: int + + :param probability_tables: Optional. Use this to pass in custom + probability values. If not specified, probabilities will be + set to a uniform distribution, or some other sensible value. + If specified, the following entry must be present: + ``translation_table``. + See ``IBMModel`` for the type and purpose of this table. + :type probability_tables: dict[str]: object + """ + super(IBMModel1, self).__init__(sentence_aligned_corpus) + + if probability_tables is None: + self.set_uniform_probabilities(sentence_aligned_corpus) + else: + # Set user-defined probabilities + self.translation_table = probability_tables['translation_table'] + + for n in range(0, iterations): + self.train(sentence_aligned_corpus) + + self.align_all(sentence_aligned_corpus) + + def set_uniform_probabilities(self, sentence_aligned_corpus): + initial_prob = 1 / len(self.trg_vocab) + if initial_prob < IBMModel.MIN_PROB: + warnings.warn( + "Target language vocabulary is too large (" + + str(len(self.trg_vocab)) + + " words). " + "Results may be less accurate." + ) + + for t in self.trg_vocab: + self.translation_table[t] = defaultdict(lambda: initial_prob) + + def train(self, parallel_corpus): + counts = Counts() + for aligned_sentence in parallel_corpus: + trg_sentence = aligned_sentence.words + src_sentence = [None] + aligned_sentence.mots + + # E step (a): Compute normalization factors to weigh counts + total_count = self.prob_all_alignments(src_sentence, trg_sentence) + + # E step (b): Collect counts + for t in trg_sentence: + for s in src_sentence: + count = self.prob_alignment_point(s, t) + normalized_count = count / total_count[t] + counts.t_given_s[t][s] += normalized_count + counts.any_t_given_s[s] += normalized_count + + # M step: Update probabilities with maximum likelihood estimate + self.maximize_lexical_translation_probabilities(counts) + + def prob_all_alignments(self, src_sentence, trg_sentence): + """ + Computes the probability of all possible word alignments, + expressed as a marginal distribution over target words t + + Each entry in the return value represents the contribution to + the total alignment probability by the target word t. + + To obtain probability(alignment | src_sentence, trg_sentence), + simply sum the entries in the return value. + + :return: Probability of t for all s in ``src_sentence`` + :rtype: dict(str): float + """ + alignment_prob_for_t = defaultdict(lambda: 0.0) + for t in trg_sentence: + for s in src_sentence: + alignment_prob_for_t[t] += self.prob_alignment_point(s, t) + return alignment_prob_for_t + + def prob_alignment_point(self, s, t): + """ + Probability that word ``t`` in the target sentence is aligned to + word ``s`` in the source sentence + """ + return self.translation_table[t][s] + + def prob_t_a_given_s(self, alignment_info): + """ + Probability of target sentence and an alignment given the + source sentence + """ + prob = 1.0 + + for j, i in enumerate(alignment_info.alignment): + if j == 0: + continue # skip the dummy zeroeth element + trg_word = alignment_info.trg_sentence[j] + src_word = alignment_info.src_sentence[i] + prob *= self.translation_table[trg_word][src_word] + + return max(prob, IBMModel.MIN_PROB) + + def align_all(self, parallel_corpus): + for sentence_pair in parallel_corpus: + self.align(sentence_pair) + + def align(self, sentence_pair): + """ + Determines the best word alignment for one sentence pair from + the corpus that the model was trained on. + + The best alignment will be set in ``sentence_pair`` when the + method returns. In contrast with the internal implementation of + IBM models, the word indices in the ``Alignment`` are zero- + indexed, not one-indexed. + + :param sentence_pair: A sentence in the source language and its + counterpart sentence in the target language + :type sentence_pair: AlignedSent + """ + best_alignment = [] + + for j, trg_word in enumerate(sentence_pair.words): + # Initialize trg_word to align with the NULL token + best_prob = max(self.translation_table[trg_word][None], IBMModel.MIN_PROB) + best_alignment_point = None + for i, src_word in enumerate(sentence_pair.mots): + align_prob = self.translation_table[trg_word][src_word] + if align_prob >= best_prob: # prefer newer word in case of tie + best_prob = align_prob + best_alignment_point = i + + best_alignment.append((j, best_alignment_point)) + + sentence_pair.alignment = Alignment(best_alignment) diff --git a/venv.bak/lib/python3.7/site-packages/nltk/translate/ibm2.py b/venv.bak/lib/python3.7/site-packages/nltk/translate/ibm2.py new file mode 100644 index 0000000..e235f59 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/translate/ibm2.py @@ -0,0 +1,321 @@ +# -*- coding: utf-8 -*- +# Natural Language Toolkit: IBM Model 2 +# +# Copyright (C) 2001-2013 NLTK Project +# Authors: Chin Yee Lee, Hengfeng Li, Ruxin Hou, Calvin Tanujaya Lim +# URL: +# For license information, see LICENSE.TXT + +""" +Lexical translation model that considers word order. + +IBM Model 2 improves on Model 1 by accounting for word order. +An alignment probability is introduced, a(i | j,l,m), which predicts +a source word position, given its aligned target word's position. + +The EM algorithm used in Model 2 is: +E step - In the training data, collect counts, weighted by prior + probabilities. + (a) count how many times a source language word is translated + into a target language word + (b) count how many times a particular position in the source + sentence is aligned to a particular position in the target + sentence + +M step - Estimate new probabilities based on the counts from the E step + + +Notations: +i: Position in the source sentence + Valid values are 0 (for NULL), 1, 2, ..., length of source sentence +j: Position in the target sentence + Valid values are 1, 2, ..., length of target sentence +l: Number of words in the source sentence, excluding NULL +m: Number of words in the target sentence +s: A word in the source language +t: A word in the target language + + +References: +Philipp Koehn. 2010. Statistical Machine Translation. +Cambridge University Press, New York. + +Peter E Brown, Stephen A. Della Pietra, Vincent J. Della Pietra, and +Robert L. Mercer. 1993. The Mathematics of Statistical Machine +Translation: Parameter Estimation. Computational Linguistics, 19 (2), +263-311. +""" + +from __future__ import division + +import warnings +from collections import defaultdict + +from nltk.translate import AlignedSent +from nltk.translate import Alignment +from nltk.translate import IBMModel +from nltk.translate import IBMModel1 +from nltk.translate.ibm_model import Counts + + +class IBMModel2(IBMModel): + """ + Lexical translation model that considers word order + + >>> bitext = [] + >>> bitext.append(AlignedSent(['klein', 'ist', 'das', 'haus'], ['the', 'house', 'is', 'small'])) + >>> bitext.append(AlignedSent(['das', 'haus', 'ist', 'ja', 'groß'], ['the', 'house', 'is', 'big'])) + >>> bitext.append(AlignedSent(['das', 'buch', 'ist', 'ja', 'klein'], ['the', 'book', 'is', 'small'])) + >>> bitext.append(AlignedSent(['das', 'haus'], ['the', 'house'])) + >>> bitext.append(AlignedSent(['das', 'buch'], ['the', 'book'])) + >>> bitext.append(AlignedSent(['ein', 'buch'], ['a', 'book'])) + + >>> ibm2 = IBMModel2(bitext, 5) + + >>> print(round(ibm2.translation_table['buch']['book'], 3)) + 1.0 + >>> print(round(ibm2.translation_table['das']['book'], 3)) + 0.0 + >>> print(round(ibm2.translation_table['buch'][None], 3)) + 0.0 + >>> print(round(ibm2.translation_table['ja'][None], 3)) + 0.0 + + >>> print(ibm2.alignment_table[1][1][2][2]) + 0.938... + >>> print(round(ibm2.alignment_table[1][2][2][2], 3)) + 0.0 + >>> print(round(ibm2.alignment_table[2][2][4][5], 3)) + 1.0 + + >>> test_sentence = bitext[2] + >>> test_sentence.words + ['das', 'buch', 'ist', 'ja', 'klein'] + >>> test_sentence.mots + ['the', 'book', 'is', 'small'] + >>> test_sentence.alignment + Alignment([(0, 0), (1, 1), (2, 2), (3, 2), (4, 3)]) + + """ + + def __init__(self, sentence_aligned_corpus, iterations, probability_tables=None): + """ + Train on ``sentence_aligned_corpus`` and create a lexical + translation model and an alignment model. + + Translation direction is from ``AlignedSent.mots`` to + ``AlignedSent.words``. + + :param sentence_aligned_corpus: Sentence-aligned parallel corpus + :type sentence_aligned_corpus: list(AlignedSent) + + :param iterations: Number of iterations to run training algorithm + :type iterations: int + + :param probability_tables: Optional. Use this to pass in custom + probability values. If not specified, probabilities will be + set to a uniform distribution, or some other sensible value. + If specified, all the following entries must be present: + ``translation_table``, ``alignment_table``. + See ``IBMModel`` for the type and purpose of these tables. + :type probability_tables: dict[str]: object + """ + super(IBMModel2, self).__init__(sentence_aligned_corpus) + + if probability_tables is None: + # Get translation probabilities from IBM Model 1 + # Run more iterations of training for Model 1, since it is + # faster than Model 2 + ibm1 = IBMModel1(sentence_aligned_corpus, 2 * iterations) + self.translation_table = ibm1.translation_table + self.set_uniform_probabilities(sentence_aligned_corpus) + else: + # Set user-defined probabilities + self.translation_table = probability_tables['translation_table'] + self.alignment_table = probability_tables['alignment_table'] + + for n in range(0, iterations): + self.train(sentence_aligned_corpus) + + self.align_all(sentence_aligned_corpus) + + def set_uniform_probabilities(self, sentence_aligned_corpus): + # a(i | j,l,m) = 1 / (l+1) for all i, j, l, m + l_m_combinations = set() + for aligned_sentence in sentence_aligned_corpus: + l = len(aligned_sentence.mots) + m = len(aligned_sentence.words) + if (l, m) not in l_m_combinations: + l_m_combinations.add((l, m)) + initial_prob = 1 / (l + 1) + if initial_prob < IBMModel.MIN_PROB: + warnings.warn( + "A source sentence is too long (" + + str(l) + + " words). Results may be less accurate." + ) + + for i in range(0, l + 1): + for j in range(1, m + 1): + self.alignment_table[i][j][l][m] = initial_prob + + def train(self, parallel_corpus): + counts = Model2Counts() + for aligned_sentence in parallel_corpus: + src_sentence = [None] + aligned_sentence.mots + trg_sentence = ['UNUSED'] + aligned_sentence.words # 1-indexed + l = len(aligned_sentence.mots) + m = len(aligned_sentence.words) + + # E step (a): Compute normalization factors to weigh counts + total_count = self.prob_all_alignments(src_sentence, trg_sentence) + + # E step (b): Collect counts + for j in range(1, m + 1): + t = trg_sentence[j] + for i in range(0, l + 1): + s = src_sentence[i] + count = self.prob_alignment_point(i, j, src_sentence, trg_sentence) + normalized_count = count / total_count[t] + + counts.update_lexical_translation(normalized_count, s, t) + counts.update_alignment(normalized_count, i, j, l, m) + + # M step: Update probabilities with maximum likelihood estimates + self.maximize_lexical_translation_probabilities(counts) + self.maximize_alignment_probabilities(counts) + + def maximize_alignment_probabilities(self, counts): + MIN_PROB = IBMModel.MIN_PROB + for i, j_s in counts.alignment.items(): + for j, src_sentence_lengths in j_s.items(): + for l, trg_sentence_lengths in src_sentence_lengths.items(): + for m in trg_sentence_lengths: + estimate = ( + counts.alignment[i][j][l][m] + / counts.alignment_for_any_i[j][l][m] + ) + self.alignment_table[i][j][l][m] = max(estimate, MIN_PROB) + + def prob_all_alignments(self, src_sentence, trg_sentence): + """ + Computes the probability of all possible word alignments, + expressed as a marginal distribution over target words t + + Each entry in the return value represents the contribution to + the total alignment probability by the target word t. + + To obtain probability(alignment | src_sentence, trg_sentence), + simply sum the entries in the return value. + + :return: Probability of t for all s in ``src_sentence`` + :rtype: dict(str): float + """ + alignment_prob_for_t = defaultdict(lambda: 0.0) + for j in range(1, len(trg_sentence)): + t = trg_sentence[j] + for i in range(0, len(src_sentence)): + alignment_prob_for_t[t] += self.prob_alignment_point( + i, j, src_sentence, trg_sentence + ) + return alignment_prob_for_t + + def prob_alignment_point(self, i, j, src_sentence, trg_sentence): + """ + Probability that position j in ``trg_sentence`` is aligned to + position i in the ``src_sentence`` + """ + l = len(src_sentence) - 1 + m = len(trg_sentence) - 1 + s = src_sentence[i] + t = trg_sentence[j] + return self.translation_table[t][s] * self.alignment_table[i][j][l][m] + + def prob_t_a_given_s(self, alignment_info): + """ + Probability of target sentence and an alignment given the + source sentence + """ + prob = 1.0 + l = len(alignment_info.src_sentence) - 1 + m = len(alignment_info.trg_sentence) - 1 + + for j, i in enumerate(alignment_info.alignment): + if j == 0: + continue # skip the dummy zeroeth element + trg_word = alignment_info.trg_sentence[j] + src_word = alignment_info.src_sentence[i] + prob *= ( + self.translation_table[trg_word][src_word] + * self.alignment_table[i][j][l][m] + ) + + return max(prob, IBMModel.MIN_PROB) + + def align_all(self, parallel_corpus): + for sentence_pair in parallel_corpus: + self.align(sentence_pair) + + def align(self, sentence_pair): + """ + Determines the best word alignment for one sentence pair from + the corpus that the model was trained on. + + The best alignment will be set in ``sentence_pair`` when the + method returns. In contrast with the internal implementation of + IBM models, the word indices in the ``Alignment`` are zero- + indexed, not one-indexed. + + :param sentence_pair: A sentence in the source language and its + counterpart sentence in the target language + :type sentence_pair: AlignedSent + """ + best_alignment = [] + + l = len(sentence_pair.mots) + m = len(sentence_pair.words) + + for j, trg_word in enumerate(sentence_pair.words): + # Initialize trg_word to align with the NULL token + best_prob = ( + self.translation_table[trg_word][None] + * self.alignment_table[0][j + 1][l][m] + ) + best_prob = max(best_prob, IBMModel.MIN_PROB) + best_alignment_point = None + for i, src_word in enumerate(sentence_pair.mots): + align_prob = ( + self.translation_table[trg_word][src_word] + * self.alignment_table[i + 1][j + 1][l][m] + ) + if align_prob >= best_prob: + best_prob = align_prob + best_alignment_point = i + + best_alignment.append((j, best_alignment_point)) + + sentence_pair.alignment = Alignment(best_alignment) + + +class Model2Counts(Counts): + """ + Data object to store counts of various parameters during training. + Includes counts for alignment. + """ + + def __init__(self): + super(Model2Counts, self).__init__() + self.alignment = defaultdict( + lambda: defaultdict(lambda: defaultdict(lambda: defaultdict(lambda: 0.0))) + ) + self.alignment_for_any_i = defaultdict( + lambda: defaultdict(lambda: defaultdict(lambda: 0.0)) + ) + + def update_lexical_translation(self, count, s, t): + self.t_given_s[t][s] += count + self.any_t_given_s[s] += count + + def update_alignment(self, count, i, j, l, m): + self.alignment[i][j][l][m] += count + self.alignment_for_any_i[j][l][m] += count diff --git a/venv.bak/lib/python3.7/site-packages/nltk/translate/ibm3.py b/venv.bak/lib/python3.7/site-packages/nltk/translate/ibm3.py new file mode 100644 index 0000000..2c7c618 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/translate/ibm3.py @@ -0,0 +1,348 @@ +# -*- coding: utf-8 -*- +# Natural Language Toolkit: IBM Model 3 +# +# Copyright (C) 2001-2013 NLTK Project +# Authors: Chin Yee Lee, Hengfeng Li, Ruxin Hou, Calvin Tanujaya Lim +# URL: +# For license information, see LICENSE.TXT + +""" +Translation model that considers how a word can be aligned to +multiple words in another language. + +IBM Model 3 improves on Model 2 by directly modeling the phenomenon +where a word in one language may be translated into zero or more words +in another. This is expressed by the fertility probability, +n(phi | source word). + +If a source word translates into more than one word, it is possible to +generate sentences that have the same alignment in multiple ways. This +is modeled by a distortion step. The distortion probability, d(j|i,l,m), +predicts a target word position, given its aligned source word's +position. The distortion probability replaces the alignment probability +of Model 2. + +The fertility probability is not applicable for NULL. Target words that +align to NULL are assumed to be distributed uniformly in the target +sentence. The existence of these words is modeled by p1, the probability +that a target word produced by a real source word requires another +target word that is produced by NULL. + +The EM algorithm used in Model 3 is: +E step - In the training data, collect counts, weighted by prior + probabilities. + (a) count how many times a source language word is translated + into a target language word + (b) count how many times a particular position in the target + sentence is aligned to a particular position in the source + sentence + (c) count how many times a source word is aligned to phi number + of target words + (d) count how many times NULL is aligned to a target word + +M step - Estimate new probabilities based on the counts from the E step + +Because there are too many possible alignments, only the most probable +ones are considered. First, the best alignment is determined using prior +probabilities. Then, a hill climbing approach is used to find other good +candidates. + + +Notations: +i: Position in the source sentence + Valid values are 0 (for NULL), 1, 2, ..., length of source sentence +j: Position in the target sentence + Valid values are 1, 2, ..., length of target sentence +l: Number of words in the source sentence, excluding NULL +m: Number of words in the target sentence +s: A word in the source language +t: A word in the target language +phi: Fertility, the number of target words produced by a source word +p1: Probability that a target word produced by a source word is + accompanied by another target word that is aligned to NULL +p0: 1 - p1 + + +References: +Philipp Koehn. 2010. Statistical Machine Translation. +Cambridge University Press, New York. + +Peter E Brown, Stephen A. Della Pietra, Vincent J. Della Pietra, and +Robert L. Mercer. 1993. The Mathematics of Statistical Machine +Translation: Parameter Estimation. Computational Linguistics, 19 (2), +263-311. +""" + +from __future__ import division + +import warnings +from collections import defaultdict +from math import factorial + +from nltk.translate import AlignedSent +from nltk.translate import Alignment +from nltk.translate import IBMModel +from nltk.translate import IBMModel2 +from nltk.translate.ibm_model import Counts + + +class IBMModel3(IBMModel): + """ + Translation model that considers how a word can be aligned to + multiple words in another language + + >>> bitext = [] + >>> bitext.append(AlignedSent(['klein', 'ist', 'das', 'haus'], ['the', 'house', 'is', 'small'])) + >>> bitext.append(AlignedSent(['das', 'haus', 'war', 'ja', 'groß'], ['the', 'house', 'was', 'big'])) + >>> bitext.append(AlignedSent(['das', 'buch', 'ist', 'ja', 'klein'], ['the', 'book', 'is', 'small'])) + >>> bitext.append(AlignedSent(['ein', 'haus', 'ist', 'klein'], ['a', 'house', 'is', 'small'])) + >>> bitext.append(AlignedSent(['das', 'haus'], ['the', 'house'])) + >>> bitext.append(AlignedSent(['das', 'buch'], ['the', 'book'])) + >>> bitext.append(AlignedSent(['ein', 'buch'], ['a', 'book'])) + >>> bitext.append(AlignedSent(['ich', 'fasse', 'das', 'buch', 'zusammen'], ['i', 'summarize', 'the', 'book'])) + >>> bitext.append(AlignedSent(['fasse', 'zusammen'], ['summarize'])) + + >>> ibm3 = IBMModel3(bitext, 5) + + >>> print(round(ibm3.translation_table['buch']['book'], 3)) + 1.0 + >>> print(round(ibm3.translation_table['das']['book'], 3)) + 0.0 + >>> print(round(ibm3.translation_table['ja'][None], 3)) + 1.0 + + >>> print(round(ibm3.distortion_table[1][1][2][2], 3)) + 1.0 + >>> print(round(ibm3.distortion_table[1][2][2][2], 3)) + 0.0 + >>> print(round(ibm3.distortion_table[2][2][4][5], 3)) + 0.75 + + >>> print(round(ibm3.fertility_table[2]['summarize'], 3)) + 1.0 + >>> print(round(ibm3.fertility_table[1]['book'], 3)) + 1.0 + + >>> print(ibm3.p1) + 0.054... + + >>> test_sentence = bitext[2] + >>> test_sentence.words + ['das', 'buch', 'ist', 'ja', 'klein'] + >>> test_sentence.mots + ['the', 'book', 'is', 'small'] + >>> test_sentence.alignment + Alignment([(0, 0), (1, 1), (2, 2), (3, None), (4, 3)]) + + """ + + def __init__(self, sentence_aligned_corpus, iterations, probability_tables=None): + """ + Train on ``sentence_aligned_corpus`` and create a lexical + translation model, a distortion model, a fertility model, and a + model for generating NULL-aligned words. + + Translation direction is from ``AlignedSent.mots`` to + ``AlignedSent.words``. + + :param sentence_aligned_corpus: Sentence-aligned parallel corpus + :type sentence_aligned_corpus: list(AlignedSent) + + :param iterations: Number of iterations to run training algorithm + :type iterations: int + + :param probability_tables: Optional. Use this to pass in custom + probability values. If not specified, probabilities will be + set to a uniform distribution, or some other sensible value. + If specified, all the following entries must be present: + ``translation_table``, ``alignment_table``, + ``fertility_table``, ``p1``, ``distortion_table``. + See ``IBMModel`` for the type and purpose of these tables. + :type probability_tables: dict[str]: object + """ + super(IBMModel3, self).__init__(sentence_aligned_corpus) + self.reset_probabilities() + + if probability_tables is None: + # Get translation and alignment probabilities from IBM Model 2 + ibm2 = IBMModel2(sentence_aligned_corpus, iterations) + self.translation_table = ibm2.translation_table + self.alignment_table = ibm2.alignment_table + self.set_uniform_probabilities(sentence_aligned_corpus) + else: + # Set user-defined probabilities + self.translation_table = probability_tables['translation_table'] + self.alignment_table = probability_tables['alignment_table'] + self.fertility_table = probability_tables['fertility_table'] + self.p1 = probability_tables['p1'] + self.distortion_table = probability_tables['distortion_table'] + + for n in range(0, iterations): + self.train(sentence_aligned_corpus) + + def reset_probabilities(self): + super(IBMModel3, self).reset_probabilities() + self.distortion_table = defaultdict( + lambda: defaultdict( + lambda: defaultdict(lambda: defaultdict(lambda: self.MIN_PROB)) + ) + ) + """ + dict[int][int][int][int]: float. Probability(j | i,l,m). + Values accessed as ``distortion_table[j][i][l][m]``. + """ + + def set_uniform_probabilities(self, sentence_aligned_corpus): + # d(j | i,l,m) = 1 / m for all i, j, l, m + l_m_combinations = set() + for aligned_sentence in sentence_aligned_corpus: + l = len(aligned_sentence.mots) + m = len(aligned_sentence.words) + if (l, m) not in l_m_combinations: + l_m_combinations.add((l, m)) + initial_prob = 1 / m + if initial_prob < IBMModel.MIN_PROB: + warnings.warn( + "A target sentence is too long (" + + str(m) + + " words). Results may be less accurate." + ) + for j in range(1, m + 1): + for i in range(0, l + 1): + self.distortion_table[j][i][l][m] = initial_prob + + # simple initialization, taken from GIZA++ + self.fertility_table[0] = defaultdict(lambda: 0.2) + self.fertility_table[1] = defaultdict(lambda: 0.65) + self.fertility_table[2] = defaultdict(lambda: 0.1) + self.fertility_table[3] = defaultdict(lambda: 0.04) + MAX_FERTILITY = 10 + initial_fert_prob = 0.01 / (MAX_FERTILITY - 4) + for phi in range(4, MAX_FERTILITY): + self.fertility_table[phi] = defaultdict(lambda: initial_fert_prob) + + self.p1 = 0.5 + + def train(self, parallel_corpus): + counts = Model3Counts() + for aligned_sentence in parallel_corpus: + l = len(aligned_sentence.mots) + m = len(aligned_sentence.words) + + # Sample the alignment space + sampled_alignments, best_alignment = self.sample(aligned_sentence) + # Record the most probable alignment + aligned_sentence.alignment = Alignment( + best_alignment.zero_indexed_alignment() + ) + + # E step (a): Compute normalization factors to weigh counts + total_count = self.prob_of_alignments(sampled_alignments) + + # E step (b): Collect counts + for alignment_info in sampled_alignments: + count = self.prob_t_a_given_s(alignment_info) + normalized_count = count / total_count + + for j in range(1, m + 1): + counts.update_lexical_translation( + normalized_count, alignment_info, j + ) + counts.update_distortion(normalized_count, alignment_info, j, l, m) + + counts.update_null_generation(normalized_count, alignment_info) + counts.update_fertility(normalized_count, alignment_info) + + # M step: Update probabilities with maximum likelihood estimates + # If any probability is less than MIN_PROB, clamp it to MIN_PROB + existing_alignment_table = self.alignment_table + self.reset_probabilities() + self.alignment_table = existing_alignment_table # don't retrain + + self.maximize_lexical_translation_probabilities(counts) + self.maximize_distortion_probabilities(counts) + self.maximize_fertility_probabilities(counts) + self.maximize_null_generation_probabilities(counts) + + def maximize_distortion_probabilities(self, counts): + MIN_PROB = IBMModel.MIN_PROB + for j, i_s in counts.distortion.items(): + for i, src_sentence_lengths in i_s.items(): + for l, trg_sentence_lengths in src_sentence_lengths.items(): + for m in trg_sentence_lengths: + estimate = ( + counts.distortion[j][i][l][m] + / counts.distortion_for_any_j[i][l][m] + ) + self.distortion_table[j][i][l][m] = max(estimate, MIN_PROB) + + def prob_t_a_given_s(self, alignment_info): + """ + Probability of target sentence and an alignment given the + source sentence + """ + src_sentence = alignment_info.src_sentence + trg_sentence = alignment_info.trg_sentence + l = len(src_sentence) - 1 # exclude NULL + m = len(trg_sentence) - 1 + p1 = self.p1 + p0 = 1 - p1 + + probability = 1.0 + MIN_PROB = IBMModel.MIN_PROB + + # Combine NULL insertion probability + null_fertility = alignment_info.fertility_of_i(0) + probability *= pow(p1, null_fertility) * pow(p0, m - 2 * null_fertility) + if probability < MIN_PROB: + return MIN_PROB + + # Compute combination (m - null_fertility) choose null_fertility + for i in range(1, null_fertility + 1): + probability *= (m - null_fertility - i + 1) / i + if probability < MIN_PROB: + return MIN_PROB + + # Combine fertility probabilities + for i in range(1, l + 1): + fertility = alignment_info.fertility_of_i(i) + probability *= ( + factorial(fertility) * self.fertility_table[fertility][src_sentence[i]] + ) + if probability < MIN_PROB: + return MIN_PROB + + # Combine lexical and distortion probabilities + for j in range(1, m + 1): + t = trg_sentence[j] + i = alignment_info.alignment[j] + s = src_sentence[i] + + probability *= ( + self.translation_table[t][s] * self.distortion_table[j][i][l][m] + ) + if probability < MIN_PROB: + return MIN_PROB + + return probability + + +class Model3Counts(Counts): + """ + Data object to store counts of various parameters during training. + Includes counts for distortion. + """ + + def __init__(self): + super(Model3Counts, self).__init__() + self.distortion = defaultdict( + lambda: defaultdict(lambda: defaultdict(lambda: defaultdict(lambda: 0.0))) + ) + self.distortion_for_any_j = defaultdict( + lambda: defaultdict(lambda: defaultdict(lambda: 0.0)) + ) + + def update_distortion(self, count, alignment_info, j, l, m): + i = alignment_info.alignment[j] + self.distortion[j][i][l][m] += count + self.distortion_for_any_j[i][l][m] += count diff --git a/venv.bak/lib/python3.7/site-packages/nltk/translate/ibm4.py b/venv.bak/lib/python3.7/site-packages/nltk/translate/ibm4.py new file mode 100644 index 0000000..323dd4d --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/translate/ibm4.py @@ -0,0 +1,490 @@ +# -*- coding: utf-8 -*- +# Natural Language Toolkit: IBM Model 4 +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Tah Wei Hoon +# URL: +# For license information, see LICENSE.TXT + +""" +Translation model that reorders output words based on their type and +distance from other related words in the output sentence. + +IBM Model 4 improves the distortion model of Model 3, motivated by the +observation that certain words tend to be re-ordered in a predictable +way relative to one another. For example, in English +usually has its order flipped as in French. + +Model 4 requires words in the source and target vocabularies to be +categorized into classes. This can be linguistically driven, like parts +of speech (adjective, nouns, prepositions, etc). Word classes can also +be obtained by statistical methods. The original IBM Model 4 uses an +information theoretic approach to group words into 50 classes for each +vocabulary. + +Terminology: +Cept: + A source word with non-zero fertility i.e. aligned to one or more + target words. +Tablet: + The set of target word(s) aligned to a cept. +Head of cept: + The first word of the tablet of that cept. +Center of cept: + The average position of the words in that cept's tablet. If the + value is not an integer, the ceiling is taken. + For example, for a tablet with words in positions 2, 5, 6 in the + target sentence, the center of the corresponding cept is + ceil((2 + 5 + 6) / 3) = 5 +Displacement: + For a head word, defined as (position of head word - position of + previous cept's center). Can be positive or negative. + For a non-head word, defined as (position of non-head word - + position of previous word in the same tablet). Always positive, + because successive words in a tablet are assumed to appear to the + right of the previous word. + +In contrast to Model 3 which reorders words in a tablet independently of +other words, Model 4 distinguishes between three cases. +(1) Words generated by NULL are distributed uniformly. +(2) For a head word t, its position is modeled by the probability + d_head(displacement | word_class_s(s),word_class_t(t)), + where s is the previous cept, and word_class_s and word_class_t maps + s and t to a source and target language word class respectively. +(3) For a non-head word t, its position is modeled by the probability + d_non_head(displacement | word_class_t(t)) + +The EM algorithm used in Model 4 is: +E step - In the training data, collect counts, weighted by prior + probabilities. + (a) count how many times a source language word is translated + into a target language word + (b) for a particular word class, count how many times a head + word is located at a particular displacement from the + previous cept's center + (c) for a particular word class, count how many times a + non-head word is located at a particular displacement from + the previous target word + (d) count how many times a source word is aligned to phi number + of target words + (e) count how many times NULL is aligned to a target word + +M step - Estimate new probabilities based on the counts from the E step + +Like Model 3, there are too many possible alignments to consider. Thus, +a hill climbing approach is used to sample good candidates. + + +Notations: +i: Position in the source sentence + Valid values are 0 (for NULL), 1, 2, ..., length of source sentence +j: Position in the target sentence + Valid values are 1, 2, ..., length of target sentence +l: Number of words in the source sentence, excluding NULL +m: Number of words in the target sentence +s: A word in the source language +t: A word in the target language +phi: Fertility, the number of target words produced by a source word +p1: Probability that a target word produced by a source word is + accompanied by another target word that is aligned to NULL +p0: 1 - p1 +dj: Displacement, Δj + + +References: +Philipp Koehn. 2010. Statistical Machine Translation. +Cambridge University Press, New York. + +Peter E Brown, Stephen A. Della Pietra, Vincent J. Della Pietra, and +Robert L. Mercer. 1993. The Mathematics of Statistical Machine +Translation: Parameter Estimation. Computational Linguistics, 19 (2), +263-311. +""" + +from __future__ import division + +import warnings +from collections import defaultdict +from math import factorial + +from nltk.translate import AlignedSent +from nltk.translate import Alignment +from nltk.translate import IBMModel +from nltk.translate import IBMModel3 +from nltk.translate.ibm_model import Counts +from nltk.translate.ibm_model import longest_target_sentence_length + + +class IBMModel4(IBMModel): + """ + Translation model that reorders output words based on their type and + their distance from other related words in the output sentence + + >>> bitext = [] + >>> bitext.append(AlignedSent(['klein', 'ist', 'das', 'haus'], ['the', 'house', 'is', 'small'])) + >>> bitext.append(AlignedSent(['das', 'haus', 'war', 'ja', 'groß'], ['the', 'house', 'was', 'big'])) + >>> bitext.append(AlignedSent(['das', 'buch', 'ist', 'ja', 'klein'], ['the', 'book', 'is', 'small'])) + >>> bitext.append(AlignedSent(['ein', 'haus', 'ist', 'klein'], ['a', 'house', 'is', 'small'])) + >>> bitext.append(AlignedSent(['das', 'haus'], ['the', 'house'])) + >>> bitext.append(AlignedSent(['das', 'buch'], ['the', 'book'])) + >>> bitext.append(AlignedSent(['ein', 'buch'], ['a', 'book'])) + >>> bitext.append(AlignedSent(['ich', 'fasse', 'das', 'buch', 'zusammen'], ['i', 'summarize', 'the', 'book'])) + >>> bitext.append(AlignedSent(['fasse', 'zusammen'], ['summarize'])) + >>> src_classes = {'the': 0, 'a': 0, 'small': 1, 'big': 1, 'house': 2, 'book': 2, 'is': 3, 'was': 3, 'i': 4, 'summarize': 5 } + >>> trg_classes = {'das': 0, 'ein': 0, 'haus': 1, 'buch': 1, 'klein': 2, 'groß': 2, 'ist': 3, 'war': 3, 'ja': 4, 'ich': 5, 'fasse': 6, 'zusammen': 6 } + + >>> ibm4 = IBMModel4(bitext, 5, src_classes, trg_classes) + + >>> print(round(ibm4.translation_table['buch']['book'], 3)) + 1.0 + >>> print(round(ibm4.translation_table['das']['book'], 3)) + 0.0 + >>> print(round(ibm4.translation_table['ja'][None], 3)) + 1.0 + + >>> print(round(ibm4.head_distortion_table[1][0][1], 3)) + 1.0 + >>> print(round(ibm4.head_distortion_table[2][0][1], 3)) + 0.0 + >>> print(round(ibm4.non_head_distortion_table[3][6], 3)) + 0.5 + + >>> print(round(ibm4.fertility_table[2]['summarize'], 3)) + 1.0 + >>> print(round(ibm4.fertility_table[1]['book'], 3)) + 1.0 + + >>> print(ibm4.p1) + 0.033... + + >>> test_sentence = bitext[2] + >>> test_sentence.words + ['das', 'buch', 'ist', 'ja', 'klein'] + >>> test_sentence.mots + ['the', 'book', 'is', 'small'] + >>> test_sentence.alignment + Alignment([(0, 0), (1, 1), (2, 2), (3, None), (4, 3)]) + + """ + + def __init__( + self, + sentence_aligned_corpus, + iterations, + source_word_classes, + target_word_classes, + probability_tables=None, + ): + """ + Train on ``sentence_aligned_corpus`` and create a lexical + translation model, distortion models, a fertility model, and a + model for generating NULL-aligned words. + + Translation direction is from ``AlignedSent.mots`` to + ``AlignedSent.words``. + + :param sentence_aligned_corpus: Sentence-aligned parallel corpus + :type sentence_aligned_corpus: list(AlignedSent) + + :param iterations: Number of iterations to run training algorithm + :type iterations: int + + :param source_word_classes: Lookup table that maps a source word + to its word class, the latter represented by an integer id + :type source_word_classes: dict[str]: int + + :param target_word_classes: Lookup table that maps a target word + to its word class, the latter represented by an integer id + :type target_word_classes: dict[str]: int + + :param probability_tables: Optional. Use this to pass in custom + probability values. If not specified, probabilities will be + set to a uniform distribution, or some other sensible value. + If specified, all the following entries must be present: + ``translation_table``, ``alignment_table``, + ``fertility_table``, ``p1``, ``head_distortion_table``, + ``non_head_distortion_table``. See ``IBMModel`` and + ``IBMModel4`` for the type and purpose of these tables. + :type probability_tables: dict[str]: object + """ + super(IBMModel4, self).__init__(sentence_aligned_corpus) + self.reset_probabilities() + self.src_classes = source_word_classes + self.trg_classes = target_word_classes + + if probability_tables is None: + # Get probabilities from IBM model 3 + ibm3 = IBMModel3(sentence_aligned_corpus, iterations) + self.translation_table = ibm3.translation_table + self.alignment_table = ibm3.alignment_table + self.fertility_table = ibm3.fertility_table + self.p1 = ibm3.p1 + self.set_uniform_probabilities(sentence_aligned_corpus) + else: + # Set user-defined probabilities + self.translation_table = probability_tables['translation_table'] + self.alignment_table = probability_tables['alignment_table'] + self.fertility_table = probability_tables['fertility_table'] + self.p1 = probability_tables['p1'] + self.head_distortion_table = probability_tables['head_distortion_table'] + self.non_head_distortion_table = probability_tables[ + 'non_head_distortion_table' + ] + + for n in range(0, iterations): + self.train(sentence_aligned_corpus) + + def reset_probabilities(self): + super(IBMModel4, self).reset_probabilities() + self.head_distortion_table = defaultdict( + lambda: defaultdict(lambda: defaultdict(lambda: self.MIN_PROB)) + ) + """ + dict[int][int][int]: float. Probability(displacement of head + word | word class of previous cept,target word class). + Values accessed as ``distortion_table[dj][src_class][trg_class]``. + """ + + self.non_head_distortion_table = defaultdict( + lambda: defaultdict(lambda: self.MIN_PROB) + ) + """ + dict[int][int]: float. Probability(displacement of non-head + word | target word class). + Values accessed as ``distortion_table[dj][trg_class]``. + """ + + def set_uniform_probabilities(self, sentence_aligned_corpus): + """ + Set distortion probabilities uniformly to + 1 / cardinality of displacement values + """ + max_m = longest_target_sentence_length(sentence_aligned_corpus) + + # The maximum displacement is m-1, when a word is in the last + # position m of the target sentence and the previously placed + # word is in the first position. + # Conversely, the minimum displacement is -(m-1). + # Thus, the displacement range is (m-1) - (-(m-1)). Note that + # displacement cannot be zero and is not included in the range. + if max_m <= 1: + initial_prob = IBMModel.MIN_PROB + else: + initial_prob = 1 / (2 * (max_m - 1)) + if initial_prob < IBMModel.MIN_PROB: + warnings.warn( + "A target sentence is too long (" + + str(max_m) + + " words). Results may be less accurate." + ) + + for dj in range(1, max_m): + self.head_distortion_table[dj] = defaultdict( + lambda: defaultdict(lambda: initial_prob) + ) + self.head_distortion_table[-dj] = defaultdict( + lambda: defaultdict(lambda: initial_prob) + ) + self.non_head_distortion_table[dj] = defaultdict(lambda: initial_prob) + self.non_head_distortion_table[-dj] = defaultdict(lambda: initial_prob) + + def train(self, parallel_corpus): + counts = Model4Counts() + for aligned_sentence in parallel_corpus: + m = len(aligned_sentence.words) + + # Sample the alignment space + sampled_alignments, best_alignment = self.sample(aligned_sentence) + # Record the most probable alignment + aligned_sentence.alignment = Alignment( + best_alignment.zero_indexed_alignment() + ) + + # E step (a): Compute normalization factors to weigh counts + total_count = self.prob_of_alignments(sampled_alignments) + + # E step (b): Collect counts + for alignment_info in sampled_alignments: + count = self.prob_t_a_given_s(alignment_info) + normalized_count = count / total_count + + for j in range(1, m + 1): + counts.update_lexical_translation( + normalized_count, alignment_info, j + ) + counts.update_distortion( + normalized_count, + alignment_info, + j, + self.src_classes, + self.trg_classes, + ) + + counts.update_null_generation(normalized_count, alignment_info) + counts.update_fertility(normalized_count, alignment_info) + + # M step: Update probabilities with maximum likelihood estimates + # If any probability is less than MIN_PROB, clamp it to MIN_PROB + existing_alignment_table = self.alignment_table + self.reset_probabilities() + self.alignment_table = existing_alignment_table # don't retrain + + self.maximize_lexical_translation_probabilities(counts) + self.maximize_distortion_probabilities(counts) + self.maximize_fertility_probabilities(counts) + self.maximize_null_generation_probabilities(counts) + + def maximize_distortion_probabilities(self, counts): + head_d_table = self.head_distortion_table + for dj, src_classes in counts.head_distortion.items(): + for s_cls, trg_classes in src_classes.items(): + for t_cls in trg_classes: + estimate = ( + counts.head_distortion[dj][s_cls][t_cls] + / counts.head_distortion_for_any_dj[s_cls][t_cls] + ) + head_d_table[dj][s_cls][t_cls] = max(estimate, IBMModel.MIN_PROB) + + non_head_d_table = self.non_head_distortion_table + for dj, trg_classes in counts.non_head_distortion.items(): + for t_cls in trg_classes: + estimate = ( + counts.non_head_distortion[dj][t_cls] + / counts.non_head_distortion_for_any_dj[t_cls] + ) + non_head_d_table[dj][t_cls] = max(estimate, IBMModel.MIN_PROB) + + def prob_t_a_given_s(self, alignment_info): + """ + Probability of target sentence and an alignment given the + source sentence + """ + return IBMModel4.model4_prob_t_a_given_s(alignment_info, self) + + @staticmethod # exposed for Model 5 to use + def model4_prob_t_a_given_s(alignment_info, ibm_model): + probability = 1.0 + MIN_PROB = IBMModel.MIN_PROB + + def null_generation_term(): + # Binomial distribution: B(m - null_fertility, p1) + value = 1.0 + p1 = ibm_model.p1 + p0 = 1 - p1 + null_fertility = alignment_info.fertility_of_i(0) + m = len(alignment_info.trg_sentence) - 1 + value *= pow(p1, null_fertility) * pow(p0, m - 2 * null_fertility) + if value < MIN_PROB: + return MIN_PROB + + # Combination: (m - null_fertility) choose null_fertility + for i in range(1, null_fertility + 1): + value *= (m - null_fertility - i + 1) / i + return value + + def fertility_term(): + value = 1.0 + src_sentence = alignment_info.src_sentence + for i in range(1, len(src_sentence)): + fertility = alignment_info.fertility_of_i(i) + value *= ( + factorial(fertility) + * ibm_model.fertility_table[fertility][src_sentence[i]] + ) + if value < MIN_PROB: + return MIN_PROB + return value + + def lexical_translation_term(j): + t = alignment_info.trg_sentence[j] + i = alignment_info.alignment[j] + s = alignment_info.src_sentence[i] + return ibm_model.translation_table[t][s] + + def distortion_term(j): + t = alignment_info.trg_sentence[j] + i = alignment_info.alignment[j] + if i == 0: + # case 1: t is aligned to NULL + return 1.0 + if alignment_info.is_head_word(j): + # case 2: t is the first word of a tablet + previous_cept = alignment_info.previous_cept(j) + src_class = None + if previous_cept is not None: + previous_s = alignment_info.src_sentence[previous_cept] + src_class = ibm_model.src_classes[previous_s] + trg_class = ibm_model.trg_classes[t] + dj = j - alignment_info.center_of_cept(previous_cept) + return ibm_model.head_distortion_table[dj][src_class][trg_class] + + # case 3: t is a subsequent word of a tablet + previous_position = alignment_info.previous_in_tablet(j) + trg_class = ibm_model.trg_classes[t] + dj = j - previous_position + return ibm_model.non_head_distortion_table[dj][trg_class] + + # end nested functions + + # Abort computation whenever probability falls below MIN_PROB at + # any point, since MIN_PROB can be considered as zero + probability *= null_generation_term() + if probability < MIN_PROB: + return MIN_PROB + + probability *= fertility_term() + if probability < MIN_PROB: + return MIN_PROB + + for j in range(1, len(alignment_info.trg_sentence)): + probability *= lexical_translation_term(j) + if probability < MIN_PROB: + return MIN_PROB + + probability *= distortion_term(j) + if probability < MIN_PROB: + return MIN_PROB + + return probability + + +class Model4Counts(Counts): + """ + Data object to store counts of various parameters during training. + Includes counts for distortion. + """ + + def __init__(self): + super(Model4Counts, self).__init__() + self.head_distortion = defaultdict( + lambda: defaultdict(lambda: defaultdict(lambda: 0.0)) + ) + self.head_distortion_for_any_dj = defaultdict(lambda: defaultdict(lambda: 0.0)) + self.non_head_distortion = defaultdict(lambda: defaultdict(lambda: 0.0)) + self.non_head_distortion_for_any_dj = defaultdict(lambda: 0.0) + + def update_distortion(self, count, alignment_info, j, src_classes, trg_classes): + i = alignment_info.alignment[j] + t = alignment_info.trg_sentence[j] + if i == 0: + # case 1: t is aligned to NULL + pass + elif alignment_info.is_head_word(j): + # case 2: t is the first word of a tablet + previous_cept = alignment_info.previous_cept(j) + if previous_cept is not None: + previous_src_word = alignment_info.src_sentence[previous_cept] + src_class = src_classes[previous_src_word] + else: + src_class = None + trg_class = trg_classes[t] + dj = j - alignment_info.center_of_cept(previous_cept) + self.head_distortion[dj][src_class][trg_class] += count + self.head_distortion_for_any_dj[src_class][trg_class] += count + else: + # case 3: t is a subsequent word of a tablet + previous_j = alignment_info.previous_in_tablet(j) + trg_class = trg_classes[t] + dj = j - previous_j + self.non_head_distortion[dj][trg_class] += count + self.non_head_distortion_for_any_dj[trg_class] += count diff --git a/venv.bak/lib/python3.7/site-packages/nltk/translate/ibm5.py b/venv.bak/lib/python3.7/site-packages/nltk/translate/ibm5.py new file mode 100644 index 0000000..b1b44e7 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/translate/ibm5.py @@ -0,0 +1,664 @@ +# -*- coding: utf-8 -*- +# Natural Language Toolkit: IBM Model 5 +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Tah Wei Hoon +# URL: +# For license information, see LICENSE.TXT + +""" +Translation model that keeps track of vacant positions in the target +sentence to decide where to place translated words. + +Translation can be viewed as a process where each word in the source +sentence is stepped through sequentially, generating translated words +for each source word. The target sentence can be viewed as being made +up of ``m`` empty slots initially, which gradually fill up as generated +words are placed in them. + +Models 3 and 4 use distortion probabilities to decide how to place +translated words. For simplicity, these models ignore the history of +which slots have already been occupied with translated words. +Consider the placement of the last translated word: there is only one +empty slot left in the target sentence, so the distortion probability +should be 1.0 for that position and 0.0 everywhere else. However, the +distortion probabilities for Models 3 and 4 are set up such that all +positions are under consideration. + +IBM Model 5 fixes this deficiency by accounting for occupied slots +during translation. It introduces the vacancy function v(j), the number +of vacancies up to, and including, position j in the target sentence. + +Terminology: +Maximum vacancy: + The number of valid slots that a word can be placed in. + This is not necessarily the same as the number of vacant slots. + For example, if a tablet contains more than one word, the head word + cannot be placed at the last vacant slot because there will be no + space for the other words in the tablet. The number of valid slots + has to take into account the length of the tablet. + Non-head words cannot be placed before the head word, so vacancies + to the left of the head word are ignored. +Vacancy difference: + For a head word: (v(j) - v(center of previous cept)) + Can be positive or negative. + For a non-head word: (v(j) - v(position of previously placed word)) + Always positive, because successive words in a tablet are assumed to + appear to the right of the previous word. + +Positioning of target words fall under three cases: +(1) Words generated by NULL are distributed uniformly +(2) For a head word t, its position is modeled by the probability + v_head(dv | max_v,word_class_t(t)) +(3) For a non-head word t, its position is modeled by the probability + v_non_head(dv | max_v,word_class_t(t)) +dv and max_v are defined differently for head and non-head words. + +The EM algorithm used in Model 5 is: +E step - In the training data, collect counts, weighted by prior + probabilities. + (a) count how many times a source language word is translated + into a target language word + (b) for a particular word class and maximum vacancy, count how + many times a head word and the previous cept's center have + a particular difference in number of vacancies + (b) for a particular word class and maximum vacancy, count how + many times a non-head word and the previous target word + have a particular difference in number of vacancies + (d) count how many times a source word is aligned to phi number + of target words + (e) count how many times NULL is aligned to a target word + +M step - Estimate new probabilities based on the counts from the E step + +Like Model 4, there are too many possible alignments to consider. Thus, +a hill climbing approach is used to sample good candidates. In addition, +pruning is used to weed out unlikely alignments based on Model 4 scores. + + +Notations: +i: Position in the source sentence + Valid values are 0 (for NULL), 1, 2, ..., length of source sentence +j: Position in the target sentence + Valid values are 1, 2, ..., length of target sentence +l: Number of words in the source sentence, excluding NULL +m: Number of words in the target sentence +s: A word in the source language +t: A word in the target language +phi: Fertility, the number of target words produced by a source word +p1: Probability that a target word produced by a source word is + accompanied by another target word that is aligned to NULL +p0: 1 - p1 +max_v: Maximum vacancy +dv: Vacancy difference, Δv + +The definition of v_head here differs from GIZA++, section 4.7 of +[Brown et al., 1993], and [Koehn, 2010]. In the latter cases, v_head is +v_head(v(j) | v(center of previous cept),max_v,word_class(t)). + +Here, we follow appendix B of [Brown et al., 1993] and combine v(j) with +v(center of previous cept) to obtain dv: +v_head(v(j) - v(center of previous cept) | max_v,word_class(t)). + + +References: +Philipp Koehn. 2010. Statistical Machine Translation. +Cambridge University Press, New York. + +Peter E Brown, Stephen A. Della Pietra, Vincent J. Della Pietra, and +Robert L. Mercer. 1993. The Mathematics of Statistical Machine +Translation: Parameter Estimation. Computational Linguistics, 19 (2), +263-311. +""" + +from __future__ import division + +import warnings +from collections import defaultdict +from math import factorial + +from nltk.translate import AlignedSent +from nltk.translate import Alignment +from nltk.translate import IBMModel +from nltk.translate import IBMModel4 +from nltk.translate.ibm_model import Counts +from nltk.translate.ibm_model import longest_target_sentence_length + + +class IBMModel5(IBMModel): + """ + Translation model that keeps track of vacant positions in the target + sentence to decide where to place translated words + + >>> bitext = [] + >>> bitext.append(AlignedSent(['klein', 'ist', 'das', 'haus'], ['the', 'house', 'is', 'small'])) + >>> bitext.append(AlignedSent(['das', 'haus', 'war', 'ja', 'groß'], ['the', 'house', 'was', 'big'])) + >>> bitext.append(AlignedSent(['das', 'buch', 'ist', 'ja', 'klein'], ['the', 'book', 'is', 'small'])) + >>> bitext.append(AlignedSent(['ein', 'haus', 'ist', 'klein'], ['a', 'house', 'is', 'small'])) + >>> bitext.append(AlignedSent(['das', 'haus'], ['the', 'house'])) + >>> bitext.append(AlignedSent(['das', 'buch'], ['the', 'book'])) + >>> bitext.append(AlignedSent(['ein', 'buch'], ['a', 'book'])) + >>> bitext.append(AlignedSent(['ich', 'fasse', 'das', 'buch', 'zusammen'], ['i', 'summarize', 'the', 'book'])) + >>> bitext.append(AlignedSent(['fasse', 'zusammen'], ['summarize'])) + >>> src_classes = {'the': 0, 'a': 0, 'small': 1, 'big': 1, 'house': 2, 'book': 2, 'is': 3, 'was': 3, 'i': 4, 'summarize': 5 } + >>> trg_classes = {'das': 0, 'ein': 0, 'haus': 1, 'buch': 1, 'klein': 2, 'groß': 2, 'ist': 3, 'war': 3, 'ja': 4, 'ich': 5, 'fasse': 6, 'zusammen': 6 } + + >>> ibm5 = IBMModel5(bitext, 5, src_classes, trg_classes) + + >>> print(round(ibm5.head_vacancy_table[1][1][1], 3)) + 1.0 + >>> print(round(ibm5.head_vacancy_table[2][1][1], 3)) + 0.0 + >>> print(round(ibm5.non_head_vacancy_table[3][3][6], 3)) + 1.0 + + >>> print(round(ibm5.fertility_table[2]['summarize'], 3)) + 1.0 + >>> print(round(ibm5.fertility_table[1]['book'], 3)) + 1.0 + + >>> print(ibm5.p1) + 0.033... + + >>> test_sentence = bitext[2] + >>> test_sentence.words + ['das', 'buch', 'ist', 'ja', 'klein'] + >>> test_sentence.mots + ['the', 'book', 'is', 'small'] + >>> test_sentence.alignment + Alignment([(0, 0), (1, 1), (2, 2), (3, None), (4, 3)]) + + """ + + MIN_SCORE_FACTOR = 0.2 + """ + Alignments with scores below this factor are pruned during sampling + """ + + def __init__( + self, + sentence_aligned_corpus, + iterations, + source_word_classes, + target_word_classes, + probability_tables=None, + ): + """ + Train on ``sentence_aligned_corpus`` and create a lexical + translation model, vacancy models, a fertility model, and a + model for generating NULL-aligned words. + + Translation direction is from ``AlignedSent.mots`` to + ``AlignedSent.words``. + + :param sentence_aligned_corpus: Sentence-aligned parallel corpus + :type sentence_aligned_corpus: list(AlignedSent) + + :param iterations: Number of iterations to run training algorithm + :type iterations: int + + :param source_word_classes: Lookup table that maps a source word + to its word class, the latter represented by an integer id + :type source_word_classes: dict[str]: int + + :param target_word_classes: Lookup table that maps a target word + to its word class, the latter represented by an integer id + :type target_word_classes: dict[str]: int + + :param probability_tables: Optional. Use this to pass in custom + probability values. If not specified, probabilities will be + set to a uniform distribution, or some other sensible value. + If specified, all the following entries must be present: + ``translation_table``, ``alignment_table``, + ``fertility_table``, ``p1``, ``head_distortion_table``, + ``non_head_distortion_table``, ``head_vacancy_table``, + ``non_head_vacancy_table``. See ``IBMModel``, ``IBMModel4``, + and ``IBMModel5`` for the type and purpose of these tables. + :type probability_tables: dict[str]: object + """ + super(IBMModel5, self).__init__(sentence_aligned_corpus) + self.reset_probabilities() + self.src_classes = source_word_classes + self.trg_classes = target_word_classes + + if probability_tables is None: + # Get probabilities from IBM model 4 + ibm4 = IBMModel4( + sentence_aligned_corpus, + iterations, + source_word_classes, + target_word_classes, + ) + self.translation_table = ibm4.translation_table + self.alignment_table = ibm4.alignment_table + self.fertility_table = ibm4.fertility_table + self.p1 = ibm4.p1 + self.head_distortion_table = ibm4.head_distortion_table + self.non_head_distortion_table = ibm4.non_head_distortion_table + self.set_uniform_probabilities(sentence_aligned_corpus) + else: + # Set user-defined probabilities + self.translation_table = probability_tables['translation_table'] + self.alignment_table = probability_tables['alignment_table'] + self.fertility_table = probability_tables['fertility_table'] + self.p1 = probability_tables['p1'] + self.head_distortion_table = probability_tables['head_distortion_table'] + self.non_head_distortion_table = probability_tables[ + 'non_head_distortion_table' + ] + self.head_vacancy_table = probability_tables['head_vacancy_table'] + self.non_head_vacancy_table = probability_tables['non_head_vacancy_table'] + + for n in range(0, iterations): + self.train(sentence_aligned_corpus) + + def reset_probabilities(self): + super(IBMModel5, self).reset_probabilities() + self.head_vacancy_table = defaultdict( + lambda: defaultdict(lambda: defaultdict(lambda: self.MIN_PROB)) + ) + """ + dict[int][int][int]: float. Probability(vacancy difference | + number of remaining valid positions,target word class). + Values accessed as ``head_vacancy_table[dv][v_max][trg_class]``. + """ + + self.non_head_vacancy_table = defaultdict( + lambda: defaultdict(lambda: defaultdict(lambda: self.MIN_PROB)) + ) + """ + dict[int][int][int]: float. Probability(vacancy difference | + number of remaining valid positions,target word class). + Values accessed as ``non_head_vacancy_table[dv][v_max][trg_class]``. + """ + + def set_uniform_probabilities(self, sentence_aligned_corpus): + """ + Set vacancy probabilities uniformly to + 1 / cardinality of vacancy difference values + """ + max_m = longest_target_sentence_length(sentence_aligned_corpus) + + # The maximum vacancy difference occurs when a word is placed in + # the last available position m of the target sentence and the + # previous word position has no vacancies. + # The minimum is 1-max_v, when a word is placed in the first + # available position and the previous word is placed beyond the + # last available position. + # Thus, the number of possible vacancy difference values is + # (max_v) - (1-max_v) + 1 = 2 * max_v. + if max_m > 0 and (1 / (2 * max_m)) < IBMModel.MIN_PROB: + warnings.warn( + "A target sentence is too long (" + + str(max_m) + + " words). Results may be less accurate." + ) + + for max_v in range(1, max_m + 1): + for dv in range(1, max_m + 1): + initial_prob = 1 / (2 * max_v) + self.head_vacancy_table[dv][max_v] = defaultdict(lambda: initial_prob) + self.head_vacancy_table[-(dv - 1)][max_v] = defaultdict( + lambda: initial_prob + ) + self.non_head_vacancy_table[dv][max_v] = defaultdict( + lambda: initial_prob + ) + self.non_head_vacancy_table[-(dv - 1)][max_v] = defaultdict( + lambda: initial_prob + ) + + def train(self, parallel_corpus): + counts = Model5Counts() + for aligned_sentence in parallel_corpus: + l = len(aligned_sentence.mots) + m = len(aligned_sentence.words) + + # Sample the alignment space + sampled_alignments, best_alignment = self.sample(aligned_sentence) + # Record the most probable alignment + aligned_sentence.alignment = Alignment( + best_alignment.zero_indexed_alignment() + ) + + # E step (a): Compute normalization factors to weigh counts + total_count = self.prob_of_alignments(sampled_alignments) + + # E step (b): Collect counts + for alignment_info in sampled_alignments: + count = self.prob_t_a_given_s(alignment_info) + normalized_count = count / total_count + + for j in range(1, m + 1): + counts.update_lexical_translation( + normalized_count, alignment_info, j + ) + + slots = Slots(m) + for i in range(1, l + 1): + counts.update_vacancy( + normalized_count, alignment_info, i, self.trg_classes, slots + ) + + counts.update_null_generation(normalized_count, alignment_info) + counts.update_fertility(normalized_count, alignment_info) + + # M step: Update probabilities with maximum likelihood estimates + # If any probability is less than MIN_PROB, clamp it to MIN_PROB + existing_alignment_table = self.alignment_table + self.reset_probabilities() + self.alignment_table = existing_alignment_table # don't retrain + + self.maximize_lexical_translation_probabilities(counts) + self.maximize_vacancy_probabilities(counts) + self.maximize_fertility_probabilities(counts) + self.maximize_null_generation_probabilities(counts) + + def sample(self, sentence_pair): + """ + Sample the most probable alignments from the entire alignment + space according to Model 4 + + Note that Model 4 scoring is used instead of Model 5 because the + latter is too expensive to compute. + + First, determine the best alignment according to IBM Model 2. + With this initial alignment, use hill climbing to determine the + best alignment according to a IBM Model 4. Add this + alignment and its neighbors to the sample set. Repeat this + process with other initial alignments obtained by pegging an + alignment point. Finally, prune alignments that have + substantially lower Model 4 scores than the best alignment. + + :param sentence_pair: Source and target language sentence pair + to generate a sample of alignments from + :type sentence_pair: AlignedSent + + :return: A set of best alignments represented by their ``AlignmentInfo`` + and the best alignment of the set for convenience + :rtype: set(AlignmentInfo), AlignmentInfo + """ + sampled_alignments, best_alignment = super(IBMModel5, self).sample( + sentence_pair + ) + return self.prune(sampled_alignments), best_alignment + + def prune(self, alignment_infos): + """ + Removes alignments from ``alignment_infos`` that have + substantially lower Model 4 scores than the best alignment + + :return: Pruned alignments + :rtype: set(AlignmentInfo) + """ + alignments = [] + best_score = 0 + + for alignment_info in alignment_infos: + score = IBMModel4.model4_prob_t_a_given_s(alignment_info, self) + best_score = max(score, best_score) + alignments.append((alignment_info, score)) + + threshold = IBMModel5.MIN_SCORE_FACTOR * best_score + alignments = [a[0] for a in alignments if a[1] > threshold] + return set(alignments) + + def hillclimb(self, alignment_info, j_pegged=None): + """ + Starting from the alignment in ``alignment_info``, look at + neighboring alignments iteratively for the best one, according + to Model 4 + + Note that Model 4 scoring is used instead of Model 5 because the + latter is too expensive to compute. + + There is no guarantee that the best alignment in the alignment + space will be found, because the algorithm might be stuck in a + local maximum. + + :param j_pegged: If specified, the search will be constrained to + alignments where ``j_pegged`` remains unchanged + :type j_pegged: int + + :return: The best alignment found from hill climbing + :rtype: AlignmentInfo + """ + alignment = alignment_info # alias with shorter name + max_probability = IBMModel4.model4_prob_t_a_given_s(alignment, self) + + while True: + old_alignment = alignment + for neighbor_alignment in self.neighboring(alignment, j_pegged): + neighbor_probability = IBMModel4.model4_prob_t_a_given_s( + neighbor_alignment, self + ) + + if neighbor_probability > max_probability: + alignment = neighbor_alignment + max_probability = neighbor_probability + + if alignment == old_alignment: + # Until there are no better alignments + break + + alignment.score = max_probability + return alignment + + def prob_t_a_given_s(self, alignment_info): + """ + Probability of target sentence and an alignment given the + source sentence + """ + probability = 1.0 + MIN_PROB = IBMModel.MIN_PROB + slots = Slots(len(alignment_info.trg_sentence) - 1) + + def null_generation_term(): + # Binomial distribution: B(m - null_fertility, p1) + value = 1.0 + p1 = self.p1 + p0 = 1 - p1 + null_fertility = alignment_info.fertility_of_i(0) + m = len(alignment_info.trg_sentence) - 1 + value *= pow(p1, null_fertility) * pow(p0, m - 2 * null_fertility) + if value < MIN_PROB: + return MIN_PROB + + # Combination: (m - null_fertility) choose null_fertility + for i in range(1, null_fertility + 1): + value *= (m - null_fertility - i + 1) / i + return value + + def fertility_term(): + value = 1.0 + src_sentence = alignment_info.src_sentence + for i in range(1, len(src_sentence)): + fertility = alignment_info.fertility_of_i(i) + value *= ( + factorial(fertility) + * self.fertility_table[fertility][src_sentence[i]] + ) + if value < MIN_PROB: + return MIN_PROB + return value + + def lexical_translation_term(j): + t = alignment_info.trg_sentence[j] + i = alignment_info.alignment[j] + s = alignment_info.src_sentence[i] + return self.translation_table[t][s] + + def vacancy_term(i): + value = 1.0 + tablet = alignment_info.cepts[i] + tablet_length = len(tablet) + total_vacancies = slots.vacancies_at(len(slots)) + + # case 1: NULL-aligned words + if tablet_length == 0: + return value + + # case 2: head word + j = tablet[0] + previous_cept = alignment_info.previous_cept(j) + previous_center = alignment_info.center_of_cept(previous_cept) + dv = slots.vacancies_at(j) - slots.vacancies_at(previous_center) + max_v = total_vacancies - tablet_length + 1 + trg_class = self.trg_classes[alignment_info.trg_sentence[j]] + value *= self.head_vacancy_table[dv][max_v][trg_class] + slots.occupy(j) # mark position as occupied + total_vacancies -= 1 + if value < MIN_PROB: + return MIN_PROB + + # case 3: non-head words + for k in range(1, tablet_length): + previous_position = tablet[k - 1] + previous_vacancies = slots.vacancies_at(previous_position) + j = tablet[k] + dv = slots.vacancies_at(j) - previous_vacancies + max_v = total_vacancies - tablet_length + k + 1 - previous_vacancies + trg_class = self.trg_classes[alignment_info.trg_sentence[j]] + value *= self.non_head_vacancy_table[dv][max_v][trg_class] + slots.occupy(j) # mark position as occupied + total_vacancies -= 1 + if value < MIN_PROB: + return MIN_PROB + + return value + + # end nested functions + + # Abort computation whenever probability falls below MIN_PROB at + # any point, since MIN_PROB can be considered as zero + probability *= null_generation_term() + if probability < MIN_PROB: + return MIN_PROB + + probability *= fertility_term() + if probability < MIN_PROB: + return MIN_PROB + + for j in range(1, len(alignment_info.trg_sentence)): + probability *= lexical_translation_term(j) + if probability < MIN_PROB: + return MIN_PROB + + for i in range(1, len(alignment_info.src_sentence)): + probability *= vacancy_term(i) + if probability < MIN_PROB: + return MIN_PROB + + return probability + + def maximize_vacancy_probabilities(self, counts): + MIN_PROB = IBMModel.MIN_PROB + head_vacancy_table = self.head_vacancy_table + for dv, max_vs in counts.head_vacancy.items(): + for max_v, trg_classes in max_vs.items(): + for t_cls in trg_classes: + estimate = ( + counts.head_vacancy[dv][max_v][t_cls] + / counts.head_vacancy_for_any_dv[max_v][t_cls] + ) + head_vacancy_table[dv][max_v][t_cls] = max(estimate, MIN_PROB) + + non_head_vacancy_table = self.non_head_vacancy_table + for dv, max_vs in counts.non_head_vacancy.items(): + for max_v, trg_classes in max_vs.items(): + for t_cls in trg_classes: + estimate = ( + counts.non_head_vacancy[dv][max_v][t_cls] + / counts.non_head_vacancy_for_any_dv[max_v][t_cls] + ) + non_head_vacancy_table[dv][max_v][t_cls] = max(estimate, MIN_PROB) + + +class Model5Counts(Counts): + """ + Data object to store counts of various parameters during training. + Includes counts for vacancies. + """ + + def __init__(self): + super(Model5Counts, self).__init__() + self.head_vacancy = defaultdict( + lambda: defaultdict(lambda: defaultdict(lambda: 0.0)) + ) + self.head_vacancy_for_any_dv = defaultdict(lambda: defaultdict(lambda: 0.0)) + self.non_head_vacancy = defaultdict( + lambda: defaultdict(lambda: defaultdict(lambda: 0.0)) + ) + self.non_head_vacancy_for_any_dv = defaultdict(lambda: defaultdict(lambda: 0.0)) + + def update_vacancy(self, count, alignment_info, i, trg_classes, slots): + """ + :param count: Value to add to the vacancy counts + :param alignment_info: Alignment under consideration + :param i: Source word position under consideration + :param trg_classes: Target word classes + :param slots: Vacancy states of the slots in the target sentence. + Output parameter that will be modified as new words are placed + in the target sentence. + """ + tablet = alignment_info.cepts[i] + tablet_length = len(tablet) + total_vacancies = slots.vacancies_at(len(slots)) + + # case 1: NULL aligned words + if tablet_length == 0: + return # ignore zero fertility words + + # case 2: head word + j = tablet[0] + previous_cept = alignment_info.previous_cept(j) + previous_center = alignment_info.center_of_cept(previous_cept) + dv = slots.vacancies_at(j) - slots.vacancies_at(previous_center) + max_v = total_vacancies - tablet_length + 1 + trg_class = trg_classes[alignment_info.trg_sentence[j]] + self.head_vacancy[dv][max_v][trg_class] += count + self.head_vacancy_for_any_dv[max_v][trg_class] += count + slots.occupy(j) # mark position as occupied + total_vacancies -= 1 + + # case 3: non-head words + for k in range(1, tablet_length): + previous_position = tablet[k - 1] + previous_vacancies = slots.vacancies_at(previous_position) + j = tablet[k] + dv = slots.vacancies_at(j) - previous_vacancies + max_v = total_vacancies - tablet_length + k + 1 - previous_vacancies + trg_class = trg_classes[alignment_info.trg_sentence[j]] + self.non_head_vacancy[dv][max_v][trg_class] += count + self.non_head_vacancy_for_any_dv[max_v][trg_class] += count + slots.occupy(j) # mark position as occupied + total_vacancies -= 1 + + +class Slots(object): + """ + Represents positions in a target sentence. Used to keep track of + which slot (position) is occupied. + """ + + def __init__(self, target_sentence_length): + self._slots = [False] * (target_sentence_length + 1) # 1-indexed + + def occupy(self, position): + """ + :return: Mark slot at ``position`` as occupied + """ + self._slots[position] = True + + def vacancies_at(self, position): + """ + :return: Number of vacant slots up to, and including, ``position`` + """ + vacancies = 0 + for k in range(1, position + 1): + if not self._slots[k]: + vacancies += 1 + return vacancies + + def __len__(self): + return len(self._slots) - 1 # exclude dummy zeroeth element diff --git a/venv.bak/lib/python3.7/site-packages/nltk/translate/ibm_model.py b/venv.bak/lib/python3.7/site-packages/nltk/translate/ibm_model.py new file mode 100644 index 0000000..24f6928 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/translate/ibm_model.py @@ -0,0 +1,550 @@ +# -*- coding: utf-8 -*- +# Natural Language Toolkit: IBM Model Core +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Tah Wei Hoon +# URL: +# For license information, see LICENSE.TXT + +""" +Common methods and classes for all IBM models. See ``IBMModel1``, +``IBMModel2``, ``IBMModel3``, ``IBMModel4``, and ``IBMModel5`` +for specific implementations. + +The IBM models are a series of generative models that learn lexical +translation probabilities, p(target language word|source language word), +given a sentence-aligned parallel corpus. + +The models increase in sophistication from model 1 to 5. Typically, the +output of lower models is used to seed the higher models. All models +use the Expectation-Maximization (EM) algorithm to learn various +probability tables. + +Words in a sentence are one-indexed. The first word of a sentence has +position 1, not 0. Index 0 is reserved in the source sentence for the +NULL token. The concept of position does not apply to NULL, but it is +indexed at 0 by convention. + +Each target word is aligned to exactly one source word or the NULL +token. + +References: +Philipp Koehn. 2010. Statistical Machine Translation. +Cambridge University Press, New York. + +Peter E Brown, Stephen A. Della Pietra, Vincent J. Della Pietra, and +Robert L. Mercer. 1993. The Mathematics of Statistical Machine +Translation: Parameter Estimation. Computational Linguistics, 19 (2), +263-311. +""" +from __future__ import division +from bisect import insort_left +from collections import defaultdict +from copy import deepcopy +from math import ceil + + +def longest_target_sentence_length(sentence_aligned_corpus): + """ + :param sentence_aligned_corpus: Parallel corpus under consideration + :type sentence_aligned_corpus: list(AlignedSent) + :return: Number of words in the longest target language sentence + of ``sentence_aligned_corpus`` + """ + max_m = 0 + for aligned_sentence in sentence_aligned_corpus: + m = len(aligned_sentence.words) + max_m = max(m, max_m) + return max_m + + +class IBMModel(object): + """ + Abstract base class for all IBM models + """ + + # Avoid division by zero and precision errors by imposing a minimum + # value for probabilities. Note that this approach is theoretically + # incorrect, since it may create probabilities that sum to more + # than 1. In practice, the contribution of probabilities with MIN_PROB + # is tiny enough that the value of MIN_PROB can be treated as zero. + MIN_PROB = 1.0e-12 # GIZA++ is more liberal and uses 1.0e-7 + + def __init__(self, sentence_aligned_corpus): + self.init_vocab(sentence_aligned_corpus) + self.reset_probabilities() + + def reset_probabilities(self): + self.translation_table = defaultdict( + lambda: defaultdict(lambda: IBMModel.MIN_PROB) + ) + """ + dict[str][str]: float. Probability(target word | source word). + Values accessed as ``translation_table[target_word][source_word]``. + """ + + self.alignment_table = defaultdict( + lambda: defaultdict( + lambda: defaultdict(lambda: defaultdict(lambda: IBMModel.MIN_PROB)) + ) + ) + """ + dict[int][int][int][int]: float. Probability(i | j,l,m). + Values accessed as ``alignment_table[i][j][l][m]``. + Used in model 2 and hill climbing in models 3 and above + """ + + self.fertility_table = defaultdict(lambda: defaultdict(lambda: self.MIN_PROB)) + """ + dict[int][str]: float. Probability(fertility | source word). + Values accessed as ``fertility_table[fertility][source_word]``. + Used in model 3 and higher. + """ + + self.p1 = 0.5 + """ + Probability that a generated word requires another target word + that is aligned to NULL. + Used in model 3 and higher. + """ + + def set_uniform_probabilities(self, sentence_aligned_corpus): + """ + Initialize probability tables to a uniform distribution + + Derived classes should implement this accordingly. + """ + pass + + def init_vocab(self, sentence_aligned_corpus): + src_vocab = set() + trg_vocab = set() + for aligned_sentence in sentence_aligned_corpus: + trg_vocab.update(aligned_sentence.words) + src_vocab.update(aligned_sentence.mots) + # Add the NULL token + src_vocab.add(None) + + self.src_vocab = src_vocab + """ + set(str): All source language words used in training + """ + + self.trg_vocab = trg_vocab + """ + set(str): All target language words used in training + """ + + def sample(self, sentence_pair): + """ + Sample the most probable alignments from the entire alignment + space + + First, determine the best alignment according to IBM Model 2. + With this initial alignment, use hill climbing to determine the + best alignment according to a higher IBM Model. Add this + alignment and its neighbors to the sample set. Repeat this + process with other initial alignments obtained by pegging an + alignment point. + + Hill climbing may be stuck in a local maxima, hence the pegging + and trying out of different alignments. + + :param sentence_pair: Source and target language sentence pair + to generate a sample of alignments from + :type sentence_pair: AlignedSent + + :return: A set of best alignments represented by their ``AlignmentInfo`` + and the best alignment of the set for convenience + :rtype: set(AlignmentInfo), AlignmentInfo + """ + sampled_alignments = set() + l = len(sentence_pair.mots) + m = len(sentence_pair.words) + + # Start from the best model 2 alignment + initial_alignment = self.best_model2_alignment(sentence_pair) + potential_alignment = self.hillclimb(initial_alignment) + sampled_alignments.update(self.neighboring(potential_alignment)) + best_alignment = potential_alignment + + # Start from other model 2 alignments, + # with the constraint that j is aligned (pegged) to i + for j in range(1, m + 1): + for i in range(0, l + 1): + initial_alignment = self.best_model2_alignment(sentence_pair, j, i) + potential_alignment = self.hillclimb(initial_alignment, j) + neighbors = self.neighboring(potential_alignment, j) + sampled_alignments.update(neighbors) + if potential_alignment.score > best_alignment.score: + best_alignment = potential_alignment + + return sampled_alignments, best_alignment + + def best_model2_alignment(self, sentence_pair, j_pegged=None, i_pegged=0): + """ + Finds the best alignment according to IBM Model 2 + + Used as a starting point for hill climbing in Models 3 and + above, because it is easier to compute than the best alignments + in higher models + + :param sentence_pair: Source and target language sentence pair + to be word-aligned + :type sentence_pair: AlignedSent + + :param j_pegged: If specified, the alignment point of j_pegged + will be fixed to i_pegged + :type j_pegged: int + + :param i_pegged: Alignment point to j_pegged + :type i_pegged: int + """ + src_sentence = [None] + sentence_pair.mots + trg_sentence = ['UNUSED'] + sentence_pair.words # 1-indexed + + l = len(src_sentence) - 1 # exclude NULL + m = len(trg_sentence) - 1 + + alignment = [0] * (m + 1) # init all alignments to NULL + cepts = [[] for i in range((l + 1))] # init all cepts to empty list + + for j in range(1, m + 1): + if j == j_pegged: + # use the pegged alignment instead of searching for best one + best_i = i_pegged + else: + best_i = 0 + max_alignment_prob = IBMModel.MIN_PROB + t = trg_sentence[j] + + for i in range(0, l + 1): + s = src_sentence[i] + alignment_prob = ( + self.translation_table[t][s] * self.alignment_table[i][j][l][m] + ) + + if alignment_prob >= max_alignment_prob: + max_alignment_prob = alignment_prob + best_i = i + + alignment[j] = best_i + cepts[best_i].append(j) + + return AlignmentInfo( + tuple(alignment), tuple(src_sentence), tuple(trg_sentence), cepts + ) + + def hillclimb(self, alignment_info, j_pegged=None): + """ + Starting from the alignment in ``alignment_info``, look at + neighboring alignments iteratively for the best one + + There is no guarantee that the best alignment in the alignment + space will be found, because the algorithm might be stuck in a + local maximum. + + :param j_pegged: If specified, the search will be constrained to + alignments where ``j_pegged`` remains unchanged + :type j_pegged: int + + :return: The best alignment found from hill climbing + :rtype: AlignmentInfo + """ + alignment = alignment_info # alias with shorter name + max_probability = self.prob_t_a_given_s(alignment) + + while True: + old_alignment = alignment + for neighbor_alignment in self.neighboring(alignment, j_pegged): + neighbor_probability = self.prob_t_a_given_s(neighbor_alignment) + + if neighbor_probability > max_probability: + alignment = neighbor_alignment + max_probability = neighbor_probability + + if alignment == old_alignment: + # Until there are no better alignments + break + + alignment.score = max_probability + return alignment + + def neighboring(self, alignment_info, j_pegged=None): + """ + Determine the neighbors of ``alignment_info``, obtained by + moving or swapping one alignment point + + :param j_pegged: If specified, neighbors that have a different + alignment point from j_pegged will not be considered + :type j_pegged: int + + :return: A set neighboring alignments represented by their + ``AlignmentInfo`` + :rtype: set(AlignmentInfo) + """ + neighbors = set() + + l = len(alignment_info.src_sentence) - 1 # exclude NULL + m = len(alignment_info.trg_sentence) - 1 + original_alignment = alignment_info.alignment + original_cepts = alignment_info.cepts + + for j in range(1, m + 1): + if j != j_pegged: + # Add alignments that differ by one alignment point + for i in range(0, l + 1): + new_alignment = list(original_alignment) + new_cepts = deepcopy(original_cepts) + old_i = original_alignment[j] + + # update alignment + new_alignment[j] = i + + # update cepts + insort_left(new_cepts[i], j) + new_cepts[old_i].remove(j) + + new_alignment_info = AlignmentInfo( + tuple(new_alignment), + alignment_info.src_sentence, + alignment_info.trg_sentence, + new_cepts, + ) + neighbors.add(new_alignment_info) + + for j in range(1, m + 1): + if j != j_pegged: + # Add alignments that have two alignment points swapped + for other_j in range(1, m + 1): + if other_j != j_pegged and other_j != j: + new_alignment = list(original_alignment) + new_cepts = deepcopy(original_cepts) + other_i = original_alignment[other_j] + i = original_alignment[j] + + # update alignments + new_alignment[j] = other_i + new_alignment[other_j] = i + + # update cepts + new_cepts[other_i].remove(other_j) + insort_left(new_cepts[other_i], j) + new_cepts[i].remove(j) + insort_left(new_cepts[i], other_j) + + new_alignment_info = AlignmentInfo( + tuple(new_alignment), + alignment_info.src_sentence, + alignment_info.trg_sentence, + new_cepts, + ) + neighbors.add(new_alignment_info) + + return neighbors + + def maximize_lexical_translation_probabilities(self, counts): + for t, src_words in counts.t_given_s.items(): + for s in src_words: + estimate = counts.t_given_s[t][s] / counts.any_t_given_s[s] + self.translation_table[t][s] = max(estimate, IBMModel.MIN_PROB) + + def maximize_fertility_probabilities(self, counts): + for phi, src_words in counts.fertility.items(): + for s in src_words: + estimate = counts.fertility[phi][s] / counts.fertility_for_any_phi[s] + self.fertility_table[phi][s] = max(estimate, IBMModel.MIN_PROB) + + def maximize_null_generation_probabilities(self, counts): + p1_estimate = counts.p1 / (counts.p1 + counts.p0) + p1_estimate = max(p1_estimate, IBMModel.MIN_PROB) + # Clip p1 if it is too large, because p0 = 1 - p1 should not be + # smaller than MIN_PROB + self.p1 = min(p1_estimate, 1 - IBMModel.MIN_PROB) + + def prob_of_alignments(self, alignments): + probability = 0 + for alignment_info in alignments: + probability += self.prob_t_a_given_s(alignment_info) + return probability + + def prob_t_a_given_s(self, alignment_info): + """ + Probability of target sentence and an alignment given the + source sentence + + All required information is assumed to be in ``alignment_info`` + and self. + + Derived classes should override this method + """ + return 0.0 + + +class AlignmentInfo(object): + """ + Helper data object for training IBM Models 3 and up + + Read-only. For a source sentence and its counterpart in the target + language, this class holds information about the sentence pair's + alignment, cepts, and fertility. + + Warning: Alignments are one-indexed here, in contrast to + nltk.translate.Alignment and AlignedSent, which are zero-indexed + This class is not meant to be used outside of IBM models. + """ + + def __init__(self, alignment, src_sentence, trg_sentence, cepts): + if not isinstance(alignment, tuple): + raise TypeError( + "The alignment must be a tuple because it is used " + "to uniquely identify AlignmentInfo objects." + ) + + self.alignment = alignment + """ + tuple(int): Alignment function. ``alignment[j]`` is the position + in the source sentence that is aligned to the position j in the + target sentence. + """ + + self.src_sentence = src_sentence + """ + tuple(str): Source sentence referred to by this object. + Should include NULL token (None) in index 0. + """ + + self.trg_sentence = trg_sentence + """ + tuple(str): Target sentence referred to by this object. + Should have a dummy element in index 0 so that the first word + starts from index 1. + """ + + self.cepts = cepts + """ + list(list(int)): The positions of the target words, in + ascending order, aligned to a source word position. For example, + cepts[4] = (2, 3, 7) means that words in positions 2, 3 and 7 + of the target sentence are aligned to the word in position 4 of + the source sentence + """ + + self.score = None + """ + float: Optional. Probability of alignment, as defined by the + IBM model that assesses this alignment + """ + + def fertility_of_i(self, i): + """ + Fertility of word in position ``i`` of the source sentence + """ + return len(self.cepts[i]) + + def is_head_word(self, j): + """ + :return: Whether the word in position ``j`` of the target + sentence is a head word + """ + i = self.alignment[j] + return self.cepts[i][0] == j + + def center_of_cept(self, i): + """ + :return: The ceiling of the average positions of the words in + the tablet of cept ``i``, or 0 if ``i`` is None + """ + if i is None: + return 0 + + average_position = sum(self.cepts[i]) / len(self.cepts[i]) + return int(ceil(average_position)) + + def previous_cept(self, j): + """ + :return: The previous cept of ``j``, or None if ``j`` belongs to + the first cept + """ + i = self.alignment[j] + if i == 0: + raise ValueError( + "Words aligned to NULL cannot have a previous " + "cept because NULL has no position" + ) + previous_cept = i - 1 + while previous_cept > 0 and self.fertility_of_i(previous_cept) == 0: + previous_cept -= 1 + + if previous_cept <= 0: + previous_cept = None + return previous_cept + + def previous_in_tablet(self, j): + """ + :return: The position of the previous word that is in the same + tablet as ``j``, or None if ``j`` is the first word of the + tablet + """ + i = self.alignment[j] + tablet_position = self.cepts[i].index(j) + if tablet_position == 0: + return None + return self.cepts[i][tablet_position - 1] + + def zero_indexed_alignment(self): + """ + :return: Zero-indexed alignment, suitable for use in external + ``nltk.translate`` modules like ``nltk.translate.Alignment`` + :rtype: list(tuple) + """ + zero_indexed_alignment = [] + for j in range(1, len(self.trg_sentence)): + i = self.alignment[j] - 1 + if i < 0: + i = None # alignment to NULL token + zero_indexed_alignment.append((j - 1, i)) + return zero_indexed_alignment + + def __eq__(self, other): + return self.alignment == other.alignment + + def __ne__(self, other): + return not self == other + + def __hash__(self): + return hash(self.alignment) + + +class Counts(object): + """ + Data object to store counts of various parameters during training + """ + + def __init__(self): + self.t_given_s = defaultdict(lambda: defaultdict(lambda: 0.0)) + self.any_t_given_s = defaultdict(lambda: 0.0) + self.p0 = 0.0 + self.p1 = 0.0 + self.fertility = defaultdict(lambda: defaultdict(lambda: 0.0)) + self.fertility_for_any_phi = defaultdict(lambda: 0.0) + + def update_lexical_translation(self, count, alignment_info, j): + i = alignment_info.alignment[j] + t = alignment_info.trg_sentence[j] + s = alignment_info.src_sentence[i] + self.t_given_s[t][s] += count + self.any_t_given_s[s] += count + + def update_null_generation(self, count, alignment_info): + m = len(alignment_info.trg_sentence) - 1 + fertility_of_null = alignment_info.fertility_of_i(0) + self.p1 += fertility_of_null * count + self.p0 += (m - 2 * fertility_of_null) * count + + def update_fertility(self, count, alignment_info): + for i in range(0, len(alignment_info.src_sentence)): + s = alignment_info.src_sentence[i] + phi = alignment_info.fertility_of_i(i) + self.fertility[phi][s] += count + self.fertility_for_any_phi[s] += count diff --git a/venv.bak/lib/python3.7/site-packages/nltk/translate/meteor_score.py b/venv.bak/lib/python3.7/site-packages/nltk/translate/meteor_score.py new file mode 100644 index 0000000..719d128 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/translate/meteor_score.py @@ -0,0 +1,378 @@ +# -*- coding: utf-8 -*- +# Natural Language Toolkit: Machine Translation +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Uday Krishna +# URL: +# For license information, see LICENSE.TXT + + +from nltk.stem.porter import PorterStemmer +from nltk.corpus import wordnet +from itertools import chain, product + +def _generate_enums(hypothesis, reference, preprocess=str.lower): + """ + Takes in string inputs for hypothesis and reference and returns + enumerated word lists for each of them + + :param hypothesis: hypothesis string + :type hypothesis: str + :param reference: reference string + :type reference: str + :preprocess: preprocessing method (default str.lower) + :type preprocess: method + :return: enumerated words list + :rtype: list of 2D tuples, list of 2D tuples + """ + hypothesis_list = list(enumerate(preprocess(hypothesis).split())) + reference_list = list(enumerate(preprocess(reference).split())) + return hypothesis_list, reference_list + +def exact_match(hypothesis, reference): + """ + matches exact words in hypothesis and reference + and returns a word mapping based on the enumerated + word id between hypothesis and reference + + :param hypothesis: hypothesis string + :type hypothesis: str + :param reference: reference string + :type reference: str + :return: enumerated matched tuples, enumerated unmatched hypothesis tuples, + enumerated unmatched reference tuples + :rtype: list of 2D tuples, list of 2D tuples, list of 2D tuples + """ + hypothesis_list, reference_list = _generate_enums(hypothesis, reference) + return _match_enums(hypothesis_list, reference_list) + +def _match_enums(enum_hypothesis_list, enum_reference_list): + """ + matches exact words in hypothesis and reference and returns + a word mapping between enum_hypothesis_list and enum_reference_list + based on the enumerated word id. + + :param enum_hypothesis_list: enumerated hypothesis list + :type enum_hypothesis_list: list of tuples + :param enum_reference_list: enumerated reference list + :type enum_reference_list: list of 2D tuples + :return: enumerated matched tuples, enumerated unmatched hypothesis tuples, + enumerated unmatched reference tuples + :rtype: list of 2D tuples, list of 2D tuples, list of 2D tuples + """ + word_match = [] + for i in range(len(enum_hypothesis_list))[::-1]: + for j in range(len(enum_reference_list))[::-1]: + if enum_hypothesis_list[i][1] == enum_reference_list[j][1]: + word_match.append((enum_hypothesis_list[i][0],enum_reference_list[j][0])) + (enum_hypothesis_list.pop(i)[1],enum_reference_list.pop(j)[1]) + break + return word_match, enum_hypothesis_list, enum_reference_list + +def _enum_stem_match(enum_hypothesis_list, enum_reference_list, stemmer = PorterStemmer()): + """ + Stems each word and matches them in hypothesis and reference + and returns a word mapping between enum_hypothesis_list and + enum_reference_list based on the enumerated word id. The function also + returns a enumerated list of unmatched words for hypothesis and reference. + + :param enum_hypothesis_list: + :type enum_hypothesis_list: + :param enum_reference_list: + :type enum_reference_list: + :param stemmer: nltk.stem.api.StemmerI object (default PorterStemmer()) + :type stemmer: nltk.stem.api.StemmerI or any class that implements a stem method + :return: enumerated matched tuples, enumerated unmatched hypothesis tuples, + enumerated unmatched reference tuples + :rtype: list of 2D tuples, list of 2D tuples, list of 2D tuples + """ + stemmed_enum_list1 = [(word_pair[0],stemmer.stem(word_pair[1])) \ + for word_pair in enum_hypothesis_list] + + stemmed_enum_list2 = [(word_pair[0],stemmer.stem(word_pair[1])) \ + for word_pair in enum_reference_list] + + word_match, enum_unmat_hypo_list, enum_unmat_ref_list = \ + _match_enums(stemmed_enum_list1, stemmed_enum_list2) + + enum_unmat_hypo_list = list(zip(*enum_unmat_hypo_list)) if len(enum_unmat_hypo_list)>0 else [] + + enum_unmat_ref_list = list(zip(*enum_unmat_ref_list)) if len(enum_unmat_ref_list)>0 else [] + + enum_hypothesis_list = list(filter(lambda x:x[0] not in enum_unmat_hypo_list, + enum_hypothesis_list)) + + enum_reference_list = list(filter(lambda x:x[0] not in enum_unmat_ref_list, + enum_reference_list)) + + return word_match, enum_hypothesis_list, enum_reference_list + +def stem_match(hypothesis, reference, stemmer = PorterStemmer()): + """ + Stems each word and matches them in hypothesis and reference + and returns a word mapping between hypothesis and reference + + :param hypothesis: + :type hypothesis: + :param reference: + :type reference: + :param stemmer: nltk.stem.api.StemmerI object (default PorterStemmer()) + :type stemmer: nltk.stem.api.StemmerI or any class that + implements a stem method + :return: enumerated matched tuples, enumerated unmatched hypothesis tuples, + enumerated unmatched reference tuples + :rtype: list of 2D tuples, list of 2D tuples, list of 2D tuples + """ + enum_hypothesis_list, enum_reference_list = _generate_enums(hypothesis, reference) + return _enum_stem_match(enum_hypothesis_list, enum_reference_list, stemmer = stemmer) + +def _enum_wordnetsyn_match(enum_hypothesis_list, enum_reference_list, wordnet = wordnet): + """ + Matches each word in reference to a word in hypothesis + if any synonym of a hypothesis word is the exact match + to the reference word. + + :param enum_hypothesis_list: enumerated hypothesis list + :param enum_reference_list: enumerated reference list + :param wordnet: a wordnet corpus reader object (default nltk.corpus.wordnet) + :type wordnet: WordNetCorpusReader + :return: list of matched tuples, unmatched hypothesis list, unmatched reference list + :rtype: list of tuples, list of tuples, list of tuples + + """ + word_match = [] + for i in range(len(enum_hypothesis_list))[::-1]: + hypothesis_syns = set(chain(*[[lemma.name() for lemma in synset.lemmas() + if lemma.name().find('_')<0] + for synset in \ + wordnet.synsets( + enum_hypothesis_list[i][1])] + )).union({enum_hypothesis_list[i][1]}) + for j in range(len(enum_reference_list))[::-1]: + if enum_reference_list[j][1] in hypothesis_syns: + word_match.append((enum_hypothesis_list[i][0],enum_reference_list[j][0])) + enum_hypothesis_list.pop(i),enum_reference_list.pop(j) + break + return word_match, enum_hypothesis_list, enum_reference_list + +def wordnetsyn_match(hypothesis, reference, wordnet = wordnet): + """ + Matches each word in reference to a word in hypothesis if any synonym + of a hypothesis word is the exact match to the reference word. + + :param hypothesis: hypothesis string + :param reference: reference string + :param wordnet: a wordnet corpus reader object (default nltk.corpus.wordnet) + :type wordnet: WordNetCorpusReader + :return: list of mapped tuples + :rtype: list of tuples + """ + enum_hypothesis_list, enum_reference_list = _generate_enums(hypothesis, reference) + return _enum_wordnetsyn_match(enum_hypothesis_list, enum_reference_list, wordnet = wordnet) + +def _enum_allign_words(enum_hypothesis_list, enum_reference_list, + stemmer=PorterStemmer(), wordnet = wordnet): + """ + Aligns/matches words in the hypothesis to reference by sequentially + applying exact match, stemmed match and wordnet based synonym match. + in case there are multiple matches the match which has the least number + of crossing is chosen. Takes enumerated list as input instead of + string input + + :param enum_hypothesis_list: enumerated hypothesis list + :param enum_reference_list: enumerated reference list + :param stemmer: nltk.stem.api.StemmerI object (default PorterStemmer()) + :type stemmer: nltk.stem.api.StemmerI or any class that implements a stem method + :param wordnet: a wordnet corpus reader object (default nltk.corpus.wordnet) + :type wordnet: WordNetCorpusReader + :return: sorted list of matched tuples, unmatched hypothesis list, + unmatched reference list + :rtype: list of tuples, list of tuples, list of tuples + """ + exact_matches, enum_hypothesis_list, enum_reference_list = \ + _match_enums(enum_hypothesis_list, enum_reference_list) + + stem_matches, enum_hypothesis_list, enum_reference_list = \ + _enum_stem_match(enum_hypothesis_list, enum_reference_list, + stemmer = stemmer) + + wns_matches, enum_hypothesis_list, enum_reference_list = \ + _enum_wordnetsyn_match(enum_hypothesis_list, enum_reference_list, + wordnet = wordnet) + + return (sorted(exact_matches + stem_matches + wns_matches, key=lambda wordpair:wordpair[0]), + enum_hypothesis_list, enum_reference_list) + +def allign_words(hypothesis, reference, stemmer = PorterStemmer(), wordnet = wordnet): + """ + Aligns/matches words in the hypothesis to reference by sequentially + applying exact match, stemmed match and wordnet based synonym match. + In case there are multiple matches the match which has the least number + of crossing is chosen. + + :param hypothesis: hypothesis string + :param reference: reference string + :param stemmer: nltk.stem.api.StemmerI object (default PorterStemmer()) + :type stemmer: nltk.stem.api.StemmerI or any class that implements a stem method + :param wordnet: a wordnet corpus reader object (default nltk.corpus.wordnet) + :type wordnet: WordNetCorpusReader + :return: sorted list of matched tuples, unmatched hypothesis list, unmatched reference list + :rtype: list of tuples, list of tuples, list of tuples + """ + enum_hypothesis_list, enum_reference_list = _generate_enums(hypothesis, reference) + return _enum_allign_words(enum_hypothesis_list, enum_reference_list, stemmer= stemmer, + wordnet= wordnet) + +def _count_chunks(matches): + """ + Counts the fewest possible number of chunks such that matched unigrams + of each chunk are adjacent to each other. This is used to caluclate the + fragmentation part of the metric. + + :param matches: list containing a mapping of matched words (output of allign_words) + :return: Number of chunks a sentence is divided into post allignment + :rtype: int + """ + i=0 + chunks = 1 + while(i>> hypothesis1 = 'It is a guide to action which ensures that the military always obeys the commands of the party' + + >>> reference1 = 'It is a guide to action that ensures that the military will forever heed Party commands' + + + >>> round(single_meteor_score(reference1, hypothesis1),4) + 0.7398 + + If there is no words match during the alignment the method returns the + score as 0. We can safely return a zero instead of raising a + division by zero error as no match usually implies a bad translation. + + >>> round(meteor_score('this is a cat', 'non matching hypothesis'),4) + 0.0 + + :param references: reference sentences + :type references: list(str) + :param hypothesis: a hypothesis sentence + :type hypothesis: str + :param preprocess: preprocessing function (default str.lower) + :type preprocess: method + :param stemmer: nltk.stem.api.StemmerI object (default PorterStemmer()) + :type stemmer: nltk.stem.api.StemmerI or any class that implements a stem method + :param wordnet: a wordnet corpus reader object (default nltk.corpus.wordnet) + :type wordnet: WordNetCorpusReader + :param alpha: parameter for controlling relative weights of precision and recall. + :type alpha: float + :param beta: parameter for controlling shape of penalty as a + function of as a function of fragmentation. + :type beta: float + :param gamma: relative weight assigned to fragmentation penality. + :type gamma: float + :return: The sentence-level METEOR score. + :rtype: float + """ + enum_hypothesis, enum_reference = _generate_enums(hypothesis, + reference, + preprocess = preprocess) + translation_length = len(enum_hypothesis) + reference_length = len(enum_reference) + matches, _, _ = _enum_allign_words(enum_hypothesis, enum_reference) + matches_count = len(matches) + try: + precision = float(matches_count)/translation_length + recall = float(matches_count)/reference_length + fmean = (precision*recall)/(alpha*precision+(1-alpha)*recall) + chunk_count = float(_count_chunks(matches)) + frag_frac = chunk_count/matches_count + except ZeroDivisionError: + return 0.0 + penalty = gamma*frag_frac**beta + return (1-penalty)*fmean + +def meteor_score(references, + hypothesis, + preprocess = str.lower, + stemmer = PorterStemmer(), + wordnet = wordnet, + alpha=0.9, + beta=3, + gamma=0.5): + """ + Calculates METEOR score for hypothesis with multiple references as + described in "Meteor: An Automatic Metric for MT Evaluation with + HighLevels of Correlation with Human Judgments" by Alon Lavie and + Abhaya Agarwal, in Proceedings of ACL. + http://www.cs.cmu.edu/~alavie/METEOR/pdf/Lavie-Agarwal-2007-METEOR.pdf + + + In case of multiple references the best score is chosen. This method + iterates over single_meteor_score and picks the best pair among all + the references for a given hypothesis + + >>> hypothesis1 = 'It is a guide to action which ensures that the military always obeys the commands of the party' + >>> hypothesis2 = 'It is to insure the troops forever hearing the activity guidebook that party direct' + + >>> reference1 = 'It is a guide to action that ensures that the military will forever heed Party commands' + >>> reference2 = 'It is the guiding principle which guarantees the military forces always being under the command of the Party' + >>> reference3 = 'It is the practical guide for the army always to heed the directions of the party' + + >>> round(meteor_score([reference1, reference2, reference3], hypothesis1),4) + 0.7398 + + If there is no words match during the alignment the method returns the + score as 0. We can safely return a zero instead of raising a + division by zero error as no match usually implies a bad translation. + + >>> round(meteor_score(['this is a cat'], 'non matching hypothesis'),4) + 0.0 + + :param references: reference sentences + :type references: list(str) + :param hypothesis: a hypothesis sentence + :type hypothesis: str + :param preprocess: preprocessing function (default str.lower) + :type preprocess: method + :param stemmer: nltk.stem.api.StemmerI object (default PorterStemmer()) + :type stemmer: nltk.stem.api.StemmerI or any class that implements a stem method + :param wordnet: a wordnet corpus reader object (default nltk.corpus.wordnet) + :type wordnet: WordNetCorpusReader + :param alpha: parameter for controlling relative weights of precision and recall. + :type alpha: float + :param beta: parameter for controlling shape of penalty as a function + of as a function of fragmentation. + :type beta: float + :param gamma: relative weight assigned to fragmentation penality. + :type gamma: float + :return: The sentence-level METEOR score. + :rtype: float + """ + return max([single_meteor_score(reference, + hypothesis, + stemmer = stemmer, + wordnet = wordnet, + alpha = alpha, + beta = beta, + gamma = gamma) for reference in references]) diff --git a/venv.bak/lib/python3.7/site-packages/nltk/translate/metrics.py b/venv.bak/lib/python3.7/site-packages/nltk/translate/metrics.py new file mode 100644 index 0000000..a984f96 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/translate/metrics.py @@ -0,0 +1,42 @@ +# Natural Language Toolkit: Translation metrics +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Will Zhang +# Guan Gui +# Steven Bird +# URL: +# For license information, see LICENSE.TXT +from __future__ import division + + +def alignment_error_rate(reference, hypothesis, possible=None): + """ + Return the Alignment Error Rate (AER) of an alignment + with respect to a "gold standard" reference alignment. + Return an error rate between 0.0 (perfect alignment) and 1.0 (no + alignment). + + >>> from nltk.translate import Alignment + >>> ref = Alignment([(0, 0), (1, 1), (2, 2)]) + >>> test = Alignment([(0, 0), (1, 2), (2, 1)]) + >>> alignment_error_rate(ref, test) # doctest: +ELLIPSIS + 0.6666666666666667 + + :type reference: Alignment + :param reference: A gold standard alignment (sure alignments) + :type hypothesis: Alignment + :param hypothesis: A hypothesis alignment (aka. candidate alignments) + :type possible: Alignment or None + :param possible: A gold standard reference of possible alignments + (defaults to *reference* if None) + :rtype: float or None + """ + + if possible is None: + possible = reference + else: + assert reference.issubset(possible) # sanity check + + return 1.0 - (len(hypothesis & reference) + len(hypothesis & possible)) / float( + len(hypothesis) + len(reference) + ) diff --git a/venv.bak/lib/python3.7/site-packages/nltk/translate/nist_score.py b/venv.bak/lib/python3.7/site-packages/nltk/translate/nist_score.py new file mode 100644 index 0000000..57b2074 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/translate/nist_score.py @@ -0,0 +1,197 @@ +# -*- coding: utf-8 -*- +# Natural Language Toolkit: NIST Score +# +# Copyright (C) 2001-2019 NLTK Project +# Authors: +# Contributors: +# URL: +# For license information, see LICENSE.TXT + +"""NIST score implementation.""" +from __future__ import division + +import math +import fractions +from collections import Counter + +from nltk.util import ngrams + + +def sentence_nist(references, hypothesis, n=5): + """ + Calculate NIST score from + George Doddington. 2002. "Automatic evaluation of machine translation quality + using n-gram co-occurrence statistics." Proceedings of HLT. + Morgan Kaufmann Publishers Inc. http://dl.acm.org/citation.cfm?id=1289189.1289273 + + DARPA commissioned NIST to develop an MT evaluation facility based on the BLEU + score. The official script used by NIST to compute BLEU and NIST score is + mteval-14.pl. The main differences are: + + - BLEU uses geometric mean of the ngram overlaps, NIST uses arithmetic mean. + - NIST has a different brevity penalty + - NIST score from mteval-14.pl has a self-contained tokenizer + + Note: The mteval-14.pl includes a smoothing function for BLEU score that is NOT + used in the NIST score computation. + + >>> hypothesis1 = ['It', 'is', 'a', 'guide', 'to', 'action', 'which', + ... 'ensures', 'that', 'the', 'military', 'always', + ... 'obeys', 'the', 'commands', 'of', 'the', 'party'] + + >>> hypothesis2 = ['It', 'is', 'to', 'insure', 'the', 'troops', + ... 'forever', 'hearing', 'the', 'activity', 'guidebook', + ... 'that', 'party', 'direct'] + + >>> reference1 = ['It', 'is', 'a', 'guide', 'to', 'action', 'that', + ... 'ensures', 'that', 'the', 'military', 'will', 'forever', + ... 'heed', 'Party', 'commands'] + + >>> reference2 = ['It', 'is', 'the', 'guiding', 'principle', 'which', + ... 'guarantees', 'the', 'military', 'forces', 'always', + ... 'being', 'under', 'the', 'command', 'of', 'the', + ... 'Party'] + + >>> reference3 = ['It', 'is', 'the', 'practical', 'guide', 'for', 'the', + ... 'army', 'always', 'to', 'heed', 'the', 'directions', + ... 'of', 'the', 'party'] + + >>> sentence_nist([reference1, reference2, reference3], hypothesis1) # doctest: +ELLIPSIS + 3.3709... + + >>> sentence_nist([reference1, reference2, reference3], hypothesis2) # doctest: +ELLIPSIS + 1.4619... + + :param references: reference sentences + :type references: list(list(str)) + :param hypothesis: a hypothesis sentence + :type hypothesis: list(str) + :param n: highest n-gram order + :type n: int + """ + return corpus_nist([references], [hypothesis], n) + + +def corpus_nist(list_of_references, hypotheses, n=5): + """ + Calculate a single corpus-level NIST score (aka. system-level BLEU) for all + the hypotheses and their respective references. + + :param references: a corpus of lists of reference sentences, w.r.t. hypotheses + :type references: list(list(list(str))) + :param hypotheses: a list of hypothesis sentences + :type hypotheses: list(list(str)) + :param n: highest n-gram order + :type n: int + """ + # Before proceeding to compute NIST, perform sanity checks. + assert len(list_of_references) == len( + hypotheses + ), "The number of hypotheses and their reference(s) should be the same" + + # Collect the ngram coounts from the reference sentences. + ngram_freq = Counter() + total_reference_words = 0 + for ( + references + ) in list_of_references: # For each source sent, there's a list of reference sents. + for reference in references: + # For each order of ngram, count the ngram occurrences. + for i in range(1, n + 1): + ngram_freq.update(ngrams(reference, i)) + total_reference_words += len(reference) + + # Compute the information weights based on the reference sentences. + # Eqn 2 in Doddington (2002): + # Info(w_1 ... w_n) = log_2 [ (# of occurrences of w_1 ... w_n-1) / (# of occurrences of w_1 ... w_n) ] + information_weights = {} + for _ngram in ngram_freq: # w_1 ... w_n + _mgram = _ngram[:-1] # w_1 ... w_n-1 + # From https://github.com/moses-smt/mosesdecoder/blob/master/scripts/generic/mteval-v13a.pl#L546 + # it's computed as such: + # denominator = ngram_freq[_mgram] if _mgram and _mgram in ngram_freq else denominator = total_reference_words + # information_weights[_ngram] = -1 * math.log(ngram_freq[_ngram]/denominator) / math.log(2) + # + # Mathematically, it's equivalent to the our implementation: + if _mgram and _mgram in ngram_freq: + numerator = ngram_freq[_mgram] + else: + numerator = total_reference_words + information_weights[_ngram] = math.log(numerator / ngram_freq[_ngram], 2) + + # Micro-average. + nist_precision_numerator_per_ngram = Counter() + nist_precision_denominator_per_ngram = Counter() + l_ref, l_sys = 0, 0 + # For each order of ngram. + for i in range(1, n + 1): + # Iterate through each hypothesis and their corresponding references. + for references, hypothesis in zip(list_of_references, hypotheses): + hyp_len = len(hypothesis) + + # Find reference with the best NIST score. + nist_score_per_ref = [] + for reference in references: + _ref_len = len(reference) + # Counter of ngrams in hypothesis. + hyp_ngrams = ( + Counter(ngrams(hypothesis, i)) + if len(hypothesis) >= i + else Counter() + ) + ref_ngrams = ( + Counter(ngrams(reference, i)) if len(reference) >= i else Counter() + ) + ngram_overlaps = hyp_ngrams & ref_ngrams + # Precision part of the score in Eqn 3 + _numerator = sum( + information_weights[_ngram] * count + for _ngram, count in ngram_overlaps.items() + ) + _denominator = sum(hyp_ngrams.values()) + _precision = 0 if _denominator == 0 else _numerator / _denominator + nist_score_per_ref.append( + (_precision, _numerator, _denominator, _ref_len) + ) + # Best reference. + precision, numerator, denominator, ref_len = max(nist_score_per_ref) + nist_precision_numerator_per_ngram[i] += numerator + nist_precision_denominator_per_ngram[i] += denominator + l_ref += ref_len + l_sys += hyp_len + + # Final NIST micro-average mean aggregation. + nist_precision = 0 + for i in nist_precision_numerator_per_ngram: + precision = ( + nist_precision_numerator_per_ngram[i] + / nist_precision_denominator_per_ngram[i] + ) + nist_precision += precision + # Eqn 3 in Doddington(2002) + return nist_precision * nist_length_penalty(l_ref, l_sys) + + +def nist_length_penalty(ref_len, hyp_len): + """ + Calculates the NIST length penalty, from Eq. 3 in Doddington (2002) + + penalty = exp( beta * log( min( len(hyp)/len(ref) , 1.0 ))) + + where, + + `beta` is chosen to make the brevity penalty factor = 0.5 when the + no. of words in the system output (hyp) is 2/3 of the average + no. of words in the reference translation (ref) + + The NIST penalty is different from BLEU's such that it minimize the impact + of the score of small variations in the length of a translation. + See Fig. 4 in Doddington (2002) + """ + ratio = hyp_len / ref_len + if 0 < ratio < 1: + ratio_x, score_x = 1.5, 0.5 + beta = math.log(score_x) / math.log(ratio_x) ** 2 + return math.exp(beta * math.log(ratio) ** 2) + else: # ratio <= 0 or ratio >= 1 + return max(min(ratio, 1.0), 0.0) diff --git a/venv.bak/lib/python3.7/site-packages/nltk/translate/phrase_based.py b/venv.bak/lib/python3.7/site-packages/nltk/translate/phrase_based.py new file mode 100644 index 0000000..b8ada5c --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/translate/phrase_based.py @@ -0,0 +1,196 @@ +# -*- coding: utf-8 -*- +# Natural Language Toolkit: Phrase Extraction Algorithm +# +# Copyright (C) 2001-2019 NLTK Project +# Authors: Liling Tan, Fredrik Hedman, Petra Barancikova +# URL: +# For license information, see LICENSE.TXT + + +def extract( + f_start, + f_end, + e_start, + e_end, + alignment, + f_aligned, + srctext, + trgtext, + srclen, + trglen, + max_phrase_length, +): + """ + This function checks for alignment point consistency and extracts + phrases using the chunk of consistent phrases. + + A phrase pair (e, f ) is consistent with an alignment A if and only if: + + (i) No English words in the phrase pair are aligned to words outside it. + + ∀e i ∈ e, (e i , f j ) ∈ A ⇒ f j ∈ f + + (ii) No Foreign words in the phrase pair are aligned to words outside it. + + ∀f j ∈ f , (e i , f j ) ∈ A ⇒ e i ∈ e + + (iii) The phrase pair contains at least one alignment point. + + ∃e i ∈ e ̄ , f j ∈ f ̄ s.t. (e i , f j ) ∈ A + + :type f_start: int + :param f_start: Starting index of the possible foreign language phrases + :type f_end: int + :param f_end: Starting index of the possible foreign language phrases + :type e_start: int + :param e_start: Starting index of the possible source language phrases + :type e_end: int + :param e_end: Starting index of the possible source language phrases + :type srctext: list + :param srctext: The source language tokens, a list of string. + :type trgtext: list + :param trgtext: The target language tokens, a list of string. + :type srclen: int + :param srclen: The number of tokens in the source language tokens. + :type trglen: int + :param trglen: The number of tokens in the target language tokens. + """ + + if f_end < 0: # 0-based indexing. + return {} + # Check if alignment points are consistent. + for e, f in alignment: + if (f_start <= f <= f_end) and (e < e_start or e > e_end): + return {} + + # Add phrase pairs (incl. additional unaligned f) + phrases = set() + fs = f_start + while True: + fe = min(f_end, f_start + max_phrase_length - 1) + while True: + # add phrase pair ([e_start, e_end], [fs, fe]) to set E + # Need to +1 in range to include the end-point. + src_phrase = " ".join(srctext[e_start : e_end + 1]) + trg_phrase = " ".join(trgtext[fs : fe + 1]) + # Include more data for later ordering. + phrases.add( + ((e_start, e_end + 1), (f_start, f_end + 1), src_phrase, trg_phrase) + ) + fe += 1 + if fe in f_aligned or fe >= trglen: + break + fs -= 1 + if fs in f_aligned or fs < 0: + break + return phrases + + +def phrase_extraction(srctext, trgtext, alignment, max_phrase_length=0): + """ + Phrase extraction algorithm extracts all consistent phrase pairs from + a word-aligned sentence pair. + + The idea is to loop over all possible source language (e) phrases and find + the minimal foreign phrase (f) that matches each of them. Matching is done + by identifying all alignment points for the source phrase and finding the + shortest foreign phrase that includes all the foreign counterparts for the + source words. + + In short, a phrase alignment has to + (a) contain all alignment points for all covered words + (b) contain at least one alignment point + + >>> srctext = "michael assumes that he will stay in the house" + >>> trgtext = "michael geht davon aus , dass er im haus bleibt" + >>> alignment = [(0,0), (1,1), (1,2), (1,3), (2,5), (3,6), (4,9), + ... (5,9), (6,7), (7,7), (8,8)] + >>> phrases = phrase_extraction(srctext, trgtext, alignment) + >>> for i in sorted(phrases): + ... print(i) + ... + ((0, 1), (0, 1), 'michael', 'michael') + ((0, 2), (0, 4), 'michael assumes', 'michael geht davon aus') + ((0, 2), (0, 4), 'michael assumes', 'michael geht davon aus ,') + ((0, 3), (0, 6), 'michael assumes that', 'michael geht davon aus , dass') + ((0, 4), (0, 7), 'michael assumes that he', 'michael geht davon aus , dass er') + ((0, 9), (0, 10), 'michael assumes that he will stay in the house', 'michael geht davon aus , dass er im haus bleibt') + ((1, 2), (1, 4), 'assumes', 'geht davon aus') + ((1, 2), (1, 4), 'assumes', 'geht davon aus ,') + ((1, 3), (1, 6), 'assumes that', 'geht davon aus , dass') + ((1, 4), (1, 7), 'assumes that he', 'geht davon aus , dass er') + ((1, 9), (1, 10), 'assumes that he will stay in the house', 'geht davon aus , dass er im haus bleibt') + ((2, 3), (5, 6), 'that', ', dass') + ((2, 3), (5, 6), 'that', 'dass') + ((2, 4), (5, 7), 'that he', ', dass er') + ((2, 4), (5, 7), 'that he', 'dass er') + ((2, 9), (5, 10), 'that he will stay in the house', ', dass er im haus bleibt') + ((2, 9), (5, 10), 'that he will stay in the house', 'dass er im haus bleibt') + ((3, 4), (6, 7), 'he', 'er') + ((3, 9), (6, 10), 'he will stay in the house', 'er im haus bleibt') + ((4, 6), (9, 10), 'will stay', 'bleibt') + ((4, 9), (7, 10), 'will stay in the house', 'im haus bleibt') + ((6, 8), (7, 8), 'in the', 'im') + ((6, 9), (7, 9), 'in the house', 'im haus') + ((8, 9), (8, 9), 'house', 'haus') + + :type srctext: str + :param srctext: The sentence string from the source language. + :type trgtext: str + :param trgtext: The sentence string from the target language. + :type alignment: list(tuple) + :param alignment: The word alignment outputs as list of tuples, where + the first elements of tuples are the source words' indices and + second elements are the target words' indices. This is also the output + format of nltk.translate.ibm1 + :rtype: list(tuple) + :return: A list of tuples, each element in a list is a phrase and each + phrase is a tuple made up of (i) its source location, (ii) its target + location, (iii) the source phrase and (iii) the target phrase. The phrase + list of tuples represents all the possible phrases extracted from the + word alignments. + :type max_phrase_length: int + :param max_phrase_length: maximal phrase length, if 0 or not specified + it is set to a length of the longer sentence (srctext or trgtext). + """ + + srctext = srctext.split() # e + trgtext = trgtext.split() # f + srclen = len(srctext) # len(e) + trglen = len(trgtext) # len(f) + # Keeps an index of which source/target words that are aligned. + f_aligned = [j for _, j in alignment] + max_phrase_length = max_phrase_length or max(srclen, trglen) + + # set of phrase pairs BP + bp = set() + + for e_start in range(srclen): + max_idx = min(srclen, e_start + max_phrase_length) + for e_end in range(e_start, max_idx): + # // find the minimally matching foreign phrase + # (f start , f end ) = ( length(f), 0 ) + # f_start ∈ [0, len(f) - 1]; f_end ∈ [0, len(f) - 1] + f_start, f_end = trglen - 1, -1 # 0-based indexing + + for e, f in alignment: + if e_start <= e <= e_end: + f_start = min(f, f_start) + f_end = max(f, f_end) + # add extract (f start , f end , e start , e end ) to set BP + phrases = extract( + f_start, + f_end, + e_start, + e_end, + alignment, + f_aligned, + srctext, + trgtext, + srclen, + trglen, + max_phrase_length, + ) + if phrases: + bp.update(phrases) + return bp diff --git a/venv.bak/lib/python3.7/site-packages/nltk/translate/ribes_score.py b/venv.bak/lib/python3.7/site-packages/nltk/translate/ribes_score.py new file mode 100644 index 0000000..fac42b0 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/translate/ribes_score.py @@ -0,0 +1,325 @@ +# -*- coding: utf-8 -*- +# Natural Language Toolkit: RIBES Score +# +# Copyright (C) 2001-2019 NLTK Project +# Contributors: Katsuhito Sudoh, Liling Tan, Kasramvd, J.F.Sebastian +# Mark Byers, ekhumoro, P. Ortiz +# URL: +# For license information, see LICENSE.TXT +""" RIBES score implementation """ +from __future__ import division +from itertools import islice +import math + +from nltk.util import ngrams, choose + + +def sentence_ribes(references, hypothesis, alpha=0.25, beta=0.10): + """ + The RIBES (Rank-based Intuitive Bilingual Evaluation Score) from + Hideki Isozaki, Tsutomu Hirao, Kevin Duh, Katsuhito Sudoh and + Hajime Tsukada. 2010. "Automatic Evaluation of Translation Quality for + Distant Language Pairs". In Proceedings of EMNLP. + http://www.aclweb.org/anthology/D/D10/D10-1092.pdf + + The generic RIBES scores used in shared task, e.g. Workshop for + Asian Translation (WAT) uses the following RIBES calculations: + + RIBES = kendall_tau * (alpha**p1) * (beta**bp) + + Please note that this re-implementation differs from the official + RIBES implementation and though it emulates the results as describe + in the original paper, there are further optimization implemented + in the official RIBES script. + + Users are encouraged to use the official RIBES script instead of this + implementation when evaluating your machine translation system. Refer + to http://www.kecl.ntt.co.jp/icl/lirg/ribes/ for the official script. + + :param references: a list of reference sentences + :type reference: list(list(str)) + :param hypothesis: a hypothesis sentence + :type hypothesis: list(str) + :param alpha: hyperparameter used as a prior for the unigram precision. + :type alpha: float + :param beta: hyperparameter used as a prior for the brevity penalty. + :type beta: float + :return: The best ribes score from one of the references. + :rtype: float + """ + best_ribes = -1.0 + # Calculates RIBES for each reference and returns the best score. + for reference in references: + # Collects the *worder* from the ranked correlation alignments. + worder = word_rank_alignment(reference, hypothesis) + nkt = kendall_tau(worder) + + # Calculates the brevity penalty + bp = min(1.0, math.exp(1.0 - len(reference) / len(hypothesis))) + + # Calculates the unigram precision, *p1* + p1 = len(worder) / len(hypothesis) + + _ribes = nkt * (p1 ** alpha) * (bp ** beta) + + if _ribes > best_ribes: # Keeps the best score. + best_ribes = _ribes + + return best_ribes + + +def corpus_ribes(list_of_references, hypotheses, alpha=0.25, beta=0.10): + """ + This function "calculates RIBES for a system output (hypothesis) with + multiple references, and returns "best" score among multi-references and + individual scores. The scores are corpus-wise, i.e., averaged by the number + of sentences." (c.f. RIBES version 1.03.1 code). + + Different from BLEU's micro-average precision, RIBES calculates the + macro-average precision by averaging the best RIBES score for each pair of + hypothesis and its corresponding references + + >>> hyp1 = ['It', 'is', 'a', 'guide', 'to', 'action', 'which', + ... 'ensures', 'that', 'the', 'military', 'always', + ... 'obeys', 'the', 'commands', 'of', 'the', 'party'] + >>> ref1a = ['It', 'is', 'a', 'guide', 'to', 'action', 'that', + ... 'ensures', 'that', 'the', 'military', 'will', 'forever', + ... 'heed', 'Party', 'commands'] + >>> ref1b = ['It', 'is', 'the', 'guiding', 'principle', 'which', + ... 'guarantees', 'the', 'military', 'forces', 'always', + ... 'being', 'under', 'the', 'command', 'of', 'the', 'Party'] + >>> ref1c = ['It', 'is', 'the', 'practical', 'guide', 'for', 'the', + ... 'army', 'always', 'to', 'heed', 'the', 'directions', + ... 'of', 'the', 'party'] + + >>> hyp2 = ['he', 'read', 'the', 'book', 'because', 'he', 'was', + ... 'interested', 'in', 'world', 'history'] + >>> ref2a = ['he', 'was', 'interested', 'in', 'world', 'history', + ... 'because', 'he', 'read', 'the', 'book'] + + >>> list_of_references = [[ref1a, ref1b, ref1c], [ref2a]] + >>> hypotheses = [hyp1, hyp2] + >>> round(corpus_ribes(list_of_references, hypotheses),4) + 0.3597 + + :param references: a corpus of lists of reference sentences, w.r.t. hypotheses + :type references: list(list(list(str))) + :param hypotheses: a list of hypothesis sentences + :type hypotheses: list(list(str)) + :param alpha: hyperparameter used as a prior for the unigram precision. + :type alpha: float + :param beta: hyperparameter used as a prior for the brevity penalty. + :type beta: float + :return: The best ribes score from one of the references. + :rtype: float + """ + corpus_best_ribes = 0.0 + # Iterate through each hypothesis and their corresponding references. + for references, hypothesis in zip(list_of_references, hypotheses): + corpus_best_ribes += sentence_ribes(references, hypothesis, alpha, beta) + return corpus_best_ribes / len(hypotheses) + + +def position_of_ngram(ngram, sentence): + """ + This function returns the position of the first instance of the ngram + appearing in a sentence. + + Note that one could also use string as follows but the code is a little + convoluted with type casting back and forth: + + char_pos = ' '.join(sent)[:' '.join(sent).index(' '.join(ngram))] + word_pos = char_pos.count(' ') + + Another way to conceive this is: + + return next(i for i, ng in enumerate(ngrams(sentence, len(ngram))) + if ng == ngram) + + :param ngram: The ngram that needs to be searched + :type ngram: tuple + :param sentence: The list of tokens to search from. + :type sentence: list(str) + """ + # Iterates through the ngrams in sentence. + for i, sublist in enumerate(ngrams(sentence, len(ngram))): + # Returns the index of the word when ngram matches. + if ngram == sublist: + return i + + +def word_rank_alignment(reference, hypothesis, character_based=False): + """ + This is the word rank alignment algorithm described in the paper to produce + the *worder* list, i.e. a list of word indices of the hypothesis word orders + w.r.t. the list of reference words. + + Below is (H0, R0) example from the Isozaki et al. 2010 paper, + note the examples are indexed from 1 but the results here are indexed from 0: + + >>> ref = str('he was interested in world history because he ' + ... 'read the book').split() + >>> hyp = str('he read the book because he was interested in world ' + ... 'history').split() + >>> word_rank_alignment(ref, hyp) + [7, 8, 9, 10, 6, 0, 1, 2, 3, 4, 5] + + The (H1, R1) example from the paper, note the 0th index: + + >>> ref = 'John hit Bob yesterday'.split() + >>> hyp = 'Bob hit John yesterday'.split() + >>> word_rank_alignment(ref, hyp) + [2, 1, 0, 3] + + Here is the (H2, R2) example from the paper, note the 0th index here too: + + >>> ref = 'the boy read the book'.split() + >>> hyp = 'the book was read by the boy'.split() + >>> word_rank_alignment(ref, hyp) + [3, 4, 2, 0, 1] + + :param reference: a reference sentence + :type reference: list(str) + :param hypothesis: a hypothesis sentence + :type hypothesis: list(str) + """ + worder = [] + hyp_len = len(hypothesis) + # Stores a list of possible ngrams from the reference sentence. + # This is used for matching context window later in the algorithm. + ref_ngrams = [] + hyp_ngrams = [] + for n in range(1, len(reference) + 1): + for ng in ngrams(reference, n): + ref_ngrams.append(ng) + for ng in ngrams(hypothesis, n): + hyp_ngrams.append(ng) + for i, h_word in enumerate(hypothesis): + # If word is not in the reference, continue. + if h_word not in reference: + continue + # If we can determine one-to-one word correspondence for unigrams that + # only appear once in both the reference and hypothesis. + elif hypothesis.count(h_word) == reference.count(h_word) == 1: + worder.append(reference.index(h_word)) + else: + max_window_size = max(i, hyp_len - i + 1) + for window in range(1, max_window_size): + if i + window < hyp_len: # If searching the right context is possible. + # Retrieve the right context window. + right_context_ngram = tuple(islice(hypothesis, i, i + window + 1)) + num_times_in_ref = ref_ngrams.count(right_context_ngram) + num_times_in_hyp = hyp_ngrams.count(right_context_ngram) + # If ngram appears only once in both ref and hyp. + if num_times_in_ref == num_times_in_hyp == 1: + # Find the position of ngram that matched the reference. + pos = position_of_ngram(right_context_ngram, reference) + worder.append(pos) # Add the positions of the ngram. + break + if window <= i: # If searching the left context is possible. + # Retrieve the left context window. + left_context_ngram = tuple(islice(hypothesis, i - window, i + 1)) + num_times_in_ref = ref_ngrams.count(left_context_ngram) + num_times_in_hyp = hyp_ngrams.count(left_context_ngram) + if num_times_in_ref == num_times_in_hyp == 1: + # Find the position of ngram that matched the reference. + pos = position_of_ngram(left_context_ngram, reference) + # Add the positions of the ngram. + worder.append(pos + len(left_context_ngram) - 1) + break + return worder + + +def find_increasing_sequences(worder): + """ + Given the *worder* list, this function groups monotonic +1 sequences. + + >>> worder = [7, 8, 9, 10, 6, 0, 1, 2, 3, 4, 5] + >>> list(find_increasing_sequences(worder)) + [(7, 8, 9, 10), (0, 1, 2, 3, 4, 5)] + + :param worder: The worder list output from word_rank_alignment + :param type: list(int) + """ + items = iter(worder) + a, b = None, next(items, None) + result = [b] + while b is not None: + a, b = b, next(items, None) + if b is not None and a + 1 == b: + result.append(b) + else: + if len(result) > 1: + yield tuple(result) + result = [b] + + +def kendall_tau(worder, normalize=True): + """ + Calculates the Kendall's Tau correlation coefficient given the *worder* + list of word alignments from word_rank_alignment(), using the formula: + + tau = 2 * num_increasing_pairs / num_possible pairs -1 + + Note that the no. of increasing pairs can be discontinuous in the *worder* + list and each each increasing sequence can be tabulated as choose(len(seq), 2) + no. of increasing pairs, e.g. + + >>> worder = [7, 8, 9, 10, 6, 0, 1, 2, 3, 4, 5] + >>> number_possible_pairs = choose(len(worder), 2) + >>> round(kendall_tau(worder, normalize=False),3) + -0.236 + >>> round(kendall_tau(worder),3) + 0.382 + + :param worder: The worder list output from word_rank_alignment + :type worder: list(int) + :param normalize: Flag to indicate normalization + :type normalize: boolean + :return: The Kendall's Tau correlation coefficient. + :rtype: float + """ + worder_len = len(worder) + # Extract the groups of increasing/monotonic sequences. + increasing_sequences = find_increasing_sequences(worder) + # Calculate no. of increasing_pairs in *worder* list. + num_increasing_pairs = sum(choose(len(seq), 2) for seq in increasing_sequences) + # Calculate no. of possible pairs. + num_possible_pairs = choose(worder_len, 2) + # Kendall's Tau computation. + tau = 2 * num_increasing_pairs / num_possible_pairs - 1 + if normalize: # If normalized, the tau output falls between 0.0 to 1.0 + return (tau + 1) / 2 + else: # Otherwise, the tau outputs falls between -1.0 to +1.0 + return tau + + +def spearman_rho(worder, normalize=True): + """ + Calculates the Spearman's Rho correlation coefficient given the *worder* + list of word alignment from word_rank_alignment(), using the formula: + + rho = 1 - sum(d**2) / choose(len(worder)+1, 3) + + Given that d is the sum of difference between the *worder* list of indices + and the original word indices from the reference sentence. + + Using the (H0,R0) and (H5, R5) example from the paper + + >>> worder = [7, 8, 9, 10, 6, 0, 1, 2, 3, 4, 5] + >>> round(spearman_rho(worder, normalize=False), 3) + -0.591 + >>> round(spearman_rho(worder), 3) + 0.205 + + :param worder: The worder list output from word_rank_alignment + :param type: list(int) + """ + worder_len = len(worder) + sum_d_square = sum((wi - i) ** 2 for wi, i in zip(worder, range(worder_len))) + rho = 1 - sum_d_square / choose(worder_len + 1, 3) + + if normalize: # If normalized, the rho output falls between 0.0 to 1.0 + return (rho + 1) / 2 + else: # Otherwise, the rho outputs falls between -1.0 to +1.0 + return rho diff --git a/venv.bak/lib/python3.7/site-packages/nltk/translate/stack_decoder.py b/venv.bak/lib/python3.7/site-packages/nltk/translate/stack_decoder.py new file mode 100644 index 0000000..2b4194a --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/translate/stack_decoder.py @@ -0,0 +1,516 @@ +# -*- coding: utf-8 -*- +# Natural Language Toolkit: Stack decoder +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Tah Wei Hoon +# URL: +# For license information, see LICENSE.TXT + +""" +A decoder that uses stacks to implement phrase-based translation. + +In phrase-based translation, the source sentence is segmented into +phrases of one or more words, and translations for those phrases are +used to build the target sentence. + +Hypothesis data structures are used to keep track of the source words +translated so far and the partial output. A hypothesis can be expanded +by selecting an untranslated phrase, looking up its translation in a +phrase table, and appending that translation to the partial output. +Translation is complete when a hypothesis covers all source words. + +The search space is huge because the source sentence can be segmented +in different ways, the source phrases can be selected in any order, +and there could be multiple translations for the same source phrase in +the phrase table. To make decoding tractable, stacks are used to limit +the number of candidate hypotheses by doing histogram and/or threshold +pruning. + +Hypotheses with the same number of words translated are placed in the +same stack. In histogram pruning, each stack has a size limit, and +the hypothesis with the lowest score is removed when the stack is full. +In threshold pruning, hypotheses that score below a certain threshold +of the best hypothesis in that stack are removed. + +Hypothesis scoring can include various factors such as phrase +translation probability, language model probability, length of +translation, cost of remaining words to be translated, and so on. + + +References: +Philipp Koehn. 2010. Statistical Machine Translation. +Cambridge University Press, New York. +""" + +import warnings +from collections import defaultdict +from math import log + + +class StackDecoder(object): + """ + Phrase-based stack decoder for machine translation + + >>> from nltk.translate import PhraseTable + >>> phrase_table = PhraseTable() + >>> phrase_table.add(('niemand',), ('nobody',), log(0.8)) + >>> phrase_table.add(('niemand',), ('no', 'one'), log(0.2)) + >>> phrase_table.add(('erwartet',), ('expects',), log(0.8)) + >>> phrase_table.add(('erwartet',), ('expecting',), log(0.2)) + >>> phrase_table.add(('niemand', 'erwartet'), ('one', 'does', 'not', 'expect'), log(0.1)) + >>> phrase_table.add(('die', 'spanische', 'inquisition'), ('the', 'spanish', 'inquisition'), log(0.8)) + >>> phrase_table.add(('!',), ('!',), log(0.8)) + + >>> # nltk.model should be used here once it is implemented + >>> from collections import defaultdict + >>> language_prob = defaultdict(lambda: -999.0) + >>> language_prob[('nobody',)] = log(0.5) + >>> language_prob[('expects',)] = log(0.4) + >>> language_prob[('the', 'spanish', 'inquisition')] = log(0.2) + >>> language_prob[('!',)] = log(0.1) + >>> language_model = type('',(object,),{'probability_change': lambda self, context, phrase: language_prob[phrase], 'probability': lambda self, phrase: language_prob[phrase]})() + + >>> stack_decoder = StackDecoder(phrase_table, language_model) + + >>> stack_decoder.translate(['niemand', 'erwartet', 'die', 'spanische', 'inquisition', '!']) + ['nobody', 'expects', 'the', 'spanish', 'inquisition', '!'] + + """ + + def __init__(self, phrase_table, language_model): + """ + :param phrase_table: Table of translations for source language + phrases and the log probabilities for those translations. + :type phrase_table: PhraseTable + + :param language_model: Target language model. Must define a + ``probability_change`` method that calculates the change in + log probability of a sentence, if a given string is appended + to it. + This interface is experimental and will likely be replaced + with nltk.model once it is implemented. + :type language_model: object + """ + self.phrase_table = phrase_table + self.language_model = language_model + + self.word_penalty = 0.0 + """ + float: Influences the translation length exponentially. + If positive, shorter translations are preferred. + If negative, longer translations are preferred. + If zero, no penalty is applied. + """ + + self.beam_threshold = 0.0 + """ + float: Hypotheses that score below this factor of the best + hypothesis in a stack are dropped from consideration. + Value between 0.0 and 1.0. + """ + + self.stack_size = 100 + """ + int: Maximum number of hypotheses to consider in a stack. + Higher values increase the likelihood of a good translation, + but increases processing time. + """ + + self.__distortion_factor = 0.5 + self.__compute_log_distortion() + + @property + def distortion_factor(self): + """ + float: Amount of reordering of source phrases. + Lower values favour monotone translation, suitable when + word order is similar for both source and target languages. + Value between 0.0 and 1.0. Default 0.5. + """ + return self.__distortion_factor + + @distortion_factor.setter + def distortion_factor(self, d): + self.__distortion_factor = d + self.__compute_log_distortion() + + def __compute_log_distortion(self): + # cache log(distortion_factor) so we don't have to recompute it + # when scoring hypotheses + if self.__distortion_factor == 0.0: + self.__log_distortion_factor = log(1e-9) # 1e-9 is almost zero + else: + self.__log_distortion_factor = log(self.__distortion_factor) + + def translate(self, src_sentence): + """ + :param src_sentence: Sentence to be translated + :type src_sentence: list(str) + + :return: Translated sentence + :rtype: list(str) + """ + sentence = tuple(src_sentence) # prevent accidental modification + sentence_length = len(sentence) + stacks = [ + _Stack(self.stack_size, self.beam_threshold) + for _ in range(0, sentence_length + 1) + ] + empty_hypothesis = _Hypothesis() + stacks[0].push(empty_hypothesis) + + all_phrases = self.find_all_src_phrases(sentence) + future_score_table = self.compute_future_scores(sentence) + for stack in stacks: + for hypothesis in stack: + possible_expansions = StackDecoder.valid_phrases( + all_phrases, hypothesis + ) + for src_phrase_span in possible_expansions: + src_phrase = sentence[src_phrase_span[0] : src_phrase_span[1]] + for translation_option in self.phrase_table.translations_for( + src_phrase + ): + raw_score = self.expansion_score( + hypothesis, translation_option, src_phrase_span + ) + new_hypothesis = _Hypothesis( + raw_score=raw_score, + src_phrase_span=src_phrase_span, + trg_phrase=translation_option.trg_phrase, + previous=hypothesis, + ) + new_hypothesis.future_score = self.future_score( + new_hypothesis, future_score_table, sentence_length + ) + total_words = new_hypothesis.total_translated_words() + stacks[total_words].push(new_hypothesis) + + if not stacks[sentence_length]: + warnings.warn( + 'Unable to translate all words. ' + 'The source sentence contains words not in ' + 'the phrase table' + ) + # Instead of returning empty output, perhaps a partial + # translation could be returned + return [] + + best_hypothesis = stacks[sentence_length].best() + return best_hypothesis.translation_so_far() + + def find_all_src_phrases(self, src_sentence): + """ + Finds all subsequences in src_sentence that have a phrase + translation in the translation table + + :type src_sentence: tuple(str) + + :return: Subsequences that have a phrase translation, + represented as a table of lists of end positions. + For example, if result[2] is [5, 6, 9], then there are + three phrases starting from position 2 in ``src_sentence``, + ending at positions 5, 6, and 9 exclusive. The list of + ending positions are in ascending order. + :rtype: list(list(int)) + """ + sentence_length = len(src_sentence) + phrase_indices = [[] for _ in src_sentence] + for start in range(0, sentence_length): + for end in range(start + 1, sentence_length + 1): + potential_phrase = src_sentence[start:end] + if potential_phrase in self.phrase_table: + phrase_indices[start].append(end) + return phrase_indices + + def compute_future_scores(self, src_sentence): + """ + Determines the approximate scores for translating every + subsequence in ``src_sentence`` + + Future scores can be used a look-ahead to determine the + difficulty of translating the remaining parts of a src_sentence. + + :type src_sentence: tuple(str) + + :return: Scores of subsequences referenced by their start and + end positions. For example, result[2][5] is the score of the + subsequence covering positions 2, 3, and 4. + :rtype: dict(int: (dict(int): float)) + """ + scores = defaultdict(lambda: defaultdict(lambda: float('-inf'))) + for seq_length in range(1, len(src_sentence) + 1): + for start in range(0, len(src_sentence) - seq_length + 1): + end = start + seq_length + phrase = src_sentence[start:end] + if phrase in self.phrase_table: + score = self.phrase_table.translations_for(phrase)[ + 0 + ].log_prob # pick best (first) translation + # Warning: API of language_model is subject to change + score += self.language_model.probability(phrase) + scores[start][end] = score + + # check if a better score can be obtained by combining + # two child subsequences + for mid in range(start + 1, end): + combined_score = scores[start][mid] + scores[mid][end] + if combined_score > scores[start][end]: + scores[start][end] = combined_score + return scores + + def future_score(self, hypothesis, future_score_table, sentence_length): + """ + Determines the approximate score for translating the + untranslated words in ``hypothesis`` + """ + score = 0.0 + for span in hypothesis.untranslated_spans(sentence_length): + score += future_score_table[span[0]][span[1]] + return score + + def expansion_score(self, hypothesis, translation_option, src_phrase_span): + """ + Calculate the score of expanding ``hypothesis`` with + ``translation_option`` + + :param hypothesis: Hypothesis being expanded + :type hypothesis: _Hypothesis + + :param translation_option: Information about the proposed expansion + :type translation_option: PhraseTableEntry + + :param src_phrase_span: Word position span of the source phrase + :type src_phrase_span: tuple(int, int) + """ + score = hypothesis.raw_score + score += translation_option.log_prob + # The API of language_model is subject to change; it could accept + # a string, a list of words, and/or some other type + score += self.language_model.probability_change( + hypothesis, translation_option.trg_phrase + ) + score += self.distortion_score(hypothesis, src_phrase_span) + score -= self.word_penalty * len(translation_option.trg_phrase) + return score + + def distortion_score(self, hypothesis, next_src_phrase_span): + if not hypothesis.src_phrase_span: + return 0.0 + next_src_phrase_start = next_src_phrase_span[0] + prev_src_phrase_end = hypothesis.src_phrase_span[1] + distortion_distance = next_src_phrase_start - prev_src_phrase_end + return abs(distortion_distance) * self.__log_distortion_factor + + @staticmethod + def valid_phrases(all_phrases_from, hypothesis): + """ + Extract phrases from ``all_phrases_from`` that contains words + that have not been translated by ``hypothesis`` + + :param all_phrases_from: Phrases represented by their spans, in + the same format as the return value of + ``find_all_src_phrases`` + :type all_phrases_from: list(list(int)) + + :type hypothesis: _Hypothesis + + :return: A list of phrases, represented by their spans, that + cover untranslated positions. + :rtype: list(tuple(int, int)) + """ + untranslated_spans = hypothesis.untranslated_spans(len(all_phrases_from)) + valid_phrases = [] + for available_span in untranslated_spans: + start = available_span[0] + available_end = available_span[1] + while start < available_end: + for phrase_end in all_phrases_from[start]: + if phrase_end > available_end: + # Subsequent elements in all_phrases_from[start] + # will also be > available_end, since the + # elements are in ascending order + break + valid_phrases.append((start, phrase_end)) + start += 1 + return valid_phrases + + +class _Hypothesis(object): + """ + Partial solution to a translation. + + Records the word positions of the phrase being translated, its + translation, raw score, and the cost of the untranslated parts of + the sentence. When the next phrase is selected to build upon the + partial solution, a new _Hypothesis object is created, with a back + pointer to the previous hypothesis. + + To find out which words have been translated so far, look at the + ``src_phrase_span`` in the hypothesis chain. Similarly, the + translation output can be found by traversing up the chain. + """ + + def __init__( + self, + raw_score=0.0, + src_phrase_span=(), + trg_phrase=(), + previous=None, + future_score=0.0, + ): + """ + :param raw_score: Likelihood of hypothesis so far. + Higher is better. Does not account for untranslated words. + :type raw_score: float + + :param src_phrase_span: Span of word positions covered by the + source phrase in this hypothesis expansion. For example, + (2, 5) means that the phrase is from the second word up to, + but not including the fifth word in the source sentence. + :type src_phrase_span: tuple(int) + + :param trg_phrase: Translation of the source phrase in this + hypothesis expansion + :type trg_phrase: tuple(str) + + :param previous: Previous hypothesis before expansion to this one + :type previous: _Hypothesis + + :param future_score: Approximate score for translating the + remaining words not covered by this hypothesis. Higher means + that the remaining words are easier to translate. + :type future_score: float + """ + self.raw_score = raw_score + self.src_phrase_span = src_phrase_span + self.trg_phrase = trg_phrase + self.previous = previous + self.future_score = future_score + + def score(self): + """ + Overall score of hypothesis after accounting for local and + global features + """ + return self.raw_score + self.future_score + + def untranslated_spans(self, sentence_length): + """ + Starting from each untranslated word, find the longest + continuous span of untranslated positions + + :param sentence_length: Length of source sentence being + translated by the hypothesis + :type sentence_length: int + + :rtype: list(tuple(int, int)) + """ + translated_positions = self.translated_positions() + translated_positions.sort() + translated_positions.append(sentence_length) # add sentinel position + + untranslated_spans = [] + start = 0 + # each untranslated span must end in one of the translated_positions + for end in translated_positions: + if start < end: + untranslated_spans.append((start, end)) + start = end + 1 + + return untranslated_spans + + def translated_positions(self): + """ + List of positions in the source sentence of words already + translated. The list is not sorted. + + :rtype: list(int) + """ + translated_positions = [] + current_hypothesis = self + while current_hypothesis.previous is not None: + translated_span = current_hypothesis.src_phrase_span + translated_positions.extend(range(translated_span[0], translated_span[1])) + current_hypothesis = current_hypothesis.previous + return translated_positions + + def total_translated_words(self): + return len(self.translated_positions()) + + def translation_so_far(self): + translation = [] + self.__build_translation(self, translation) + return translation + + def __build_translation(self, hypothesis, output): + if hypothesis.previous is None: + return + self.__build_translation(hypothesis.previous, output) + output.extend(hypothesis.trg_phrase) + + +class _Stack(object): + """ + Collection of _Hypothesis objects + """ + + def __init__(self, max_size=100, beam_threshold=0.0): + """ + :param beam_threshold: Hypotheses that score less than this + factor of the best hypothesis are discarded from the stack. + Value must be between 0.0 and 1.0. + :type beam_threshold: float + """ + self.max_size = max_size + self.items = [] + + if beam_threshold == 0.0: + self.__log_beam_threshold = float('-inf') + else: + self.__log_beam_threshold = log(beam_threshold) + + def push(self, hypothesis): + """ + Add ``hypothesis`` to the stack. + Removes lowest scoring hypothesis if the stack is full. + After insertion, hypotheses that score less than + ``beam_threshold`` times the score of the best hypothesis + are removed. + """ + self.items.append(hypothesis) + self.items.sort(key=lambda h: h.score(), reverse=True) + while len(self.items) > self.max_size: + self.items.pop() + self.threshold_prune() + + def threshold_prune(self): + if not self.items: + return + # log(score * beam_threshold) = log(score) + log(beam_threshold) + threshold = self.items[0].score() + self.__log_beam_threshold + for hypothesis in reversed(self.items): + if hypothesis.score() < threshold: + self.items.pop() + else: + break + + def best(self): + """ + :return: Hypothesis with the highest score in the stack + :rtype: _Hypothesis + """ + if self.items: + return self.items[0] + return None + + def __iter__(self): + return iter(self.items) + + def __contains__(self, hypothesis): + return hypothesis in self.items + + def __bool__(self): + return len(self.items) != 0 + + __nonzero__ = __bool__ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/tree.py b/venv.bak/lib/python3.7/site-packages/nltk/tree.py new file mode 100644 index 0000000..1b5e355 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/tree.py @@ -0,0 +1,1792 @@ +# -*- coding: utf-8 -*- +# Natural Language Toolkit: Text Trees +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Edward Loper +# Steven Bird +# Peter Ljunglöf +# Nathan Bodenstab (tree transforms) +# URL: +# For license information, see LICENSE.TXT + +""" +Class for representing hierarchical language structures, such as +syntax trees and morphological trees. +""" +from __future__ import print_function, unicode_literals + +import re +import sys +from abc import ABCMeta, abstractmethod + +from six import string_types, add_metaclass + +from nltk.grammar import Production, Nonterminal +from nltk.probability import ProbabilisticMixIn +from nltk.util import slice_bounds +from nltk.compat import python_2_unicode_compatible, unicode_repr +from nltk.internals import raise_unorderable_types + +# TODO: add LabelledTree (can be used for dependency trees) + +###################################################################### +## Trees +###################################################################### + + +@python_2_unicode_compatible +class Tree(list): + """ + A Tree represents a hierarchical grouping of leaves and subtrees. + For example, each constituent in a syntax tree is represented by a single Tree. + + A tree's children are encoded as a list of leaves and subtrees, + where a leaf is a basic (non-tree) value; and a subtree is a + nested Tree. + + >>> from nltk.tree import Tree + >>> print(Tree(1, [2, Tree(3, [4]), 5])) + (1 2 (3 4) 5) + >>> vp = Tree('VP', [Tree('V', ['saw']), + ... Tree('NP', ['him'])]) + >>> s = Tree('S', [Tree('NP', ['I']), vp]) + >>> print(s) + (S (NP I) (VP (V saw) (NP him))) + >>> print(s[1]) + (VP (V saw) (NP him)) + >>> print(s[1,1]) + (NP him) + >>> t = Tree.fromstring("(S (NP I) (VP (V saw) (NP him)))") + >>> s == t + True + >>> t[1][1].set_label('X') + >>> t[1][1].label() + 'X' + >>> print(t) + (S (NP I) (VP (V saw) (X him))) + >>> t[0], t[1,1] = t[1,1], t[0] + >>> print(t) + (S (X him) (VP (V saw) (NP I))) + + The length of a tree is the number of children it has. + + >>> len(t) + 2 + + The set_label() and label() methods allow individual constituents + to be labeled. For example, syntax trees use this label to specify + phrase tags, such as "NP" and "VP". + + Several Tree methods use "tree positions" to specify + children or descendants of a tree. Tree positions are defined as + follows: + + - The tree position *i* specifies a Tree's *i*\ th child. + - The tree position ``()`` specifies the Tree itself. + - If *p* is the tree position of descendant *d*, then + *p+i* specifies the *i*\ th child of *d*. + + I.e., every tree position is either a single index *i*, + specifying ``tree[i]``; or a sequence *i1, i2, ..., iN*, + specifying ``tree[i1][i2]...[iN]``. + + Construct a new tree. This constructor can be called in one + of two ways: + + - ``Tree(label, children)`` constructs a new tree with the + specified label and list of children. + + - ``Tree.fromstring(s)`` constructs a new tree by parsing the string ``s``. + """ + + def __init__(self, node, children=None): + if children is None: + raise TypeError( + "%s: Expected a node value and child list " % type(self).__name__ + ) + elif isinstance(children, string_types): + raise TypeError( + "%s() argument 2 should be a list, not a " + "string" % type(self).__name__ + ) + else: + list.__init__(self, children) + self._label = node + + # //////////////////////////////////////////////////////////// + # Comparison operators + # //////////////////////////////////////////////////////////// + + def __eq__(self, other): + return self.__class__ is other.__class__ and (self._label, list(self)) == ( + other._label, + list(other), + ) + + def __lt__(self, other): + if not isinstance(other, Tree): + # raise_unorderable_types("<", self, other) + # Sometimes children can be pure strings, + # so we need to be able to compare with non-trees: + return self.__class__.__name__ < other.__class__.__name__ + elif self.__class__ is other.__class__: + return (self._label, list(self)) < (other._label, list(other)) + else: + return self.__class__.__name__ < other.__class__.__name__ + + # @total_ordering doesn't work here, since the class inherits from a builtin class + __ne__ = lambda self, other: not self == other + __gt__ = lambda self, other: not (self < other or self == other) + __le__ = lambda self, other: self < other or self == other + __ge__ = lambda self, other: not self < other + + # //////////////////////////////////////////////////////////// + # Disabled list operations + # //////////////////////////////////////////////////////////// + + def __mul__(self, v): + raise TypeError('Tree does not support multiplication') + + def __rmul__(self, v): + raise TypeError('Tree does not support multiplication') + + def __add__(self, v): + raise TypeError('Tree does not support addition') + + def __radd__(self, v): + raise TypeError('Tree does not support addition') + + # //////////////////////////////////////////////////////////// + # Indexing (with support for tree positions) + # //////////////////////////////////////////////////////////// + + def __getitem__(self, index): + if isinstance(index, (int, slice)): + return list.__getitem__(self, index) + elif isinstance(index, (list, tuple)): + if len(index) == 0: + return self + elif len(index) == 1: + return self[index[0]] + else: + return self[index[0]][index[1:]] + else: + raise TypeError( + "%s indices must be integers, not %s" + % (type(self).__name__, type(index).__name__) + ) + + def __setitem__(self, index, value): + if isinstance(index, (int, slice)): + return list.__setitem__(self, index, value) + elif isinstance(index, (list, tuple)): + if len(index) == 0: + raise IndexError('The tree position () may not be ' 'assigned to.') + elif len(index) == 1: + self[index[0]] = value + else: + self[index[0]][index[1:]] = value + else: + raise TypeError( + "%s indices must be integers, not %s" + % (type(self).__name__, type(index).__name__) + ) + + def __delitem__(self, index): + if isinstance(index, (int, slice)): + return list.__delitem__(self, index) + elif isinstance(index, (list, tuple)): + if len(index) == 0: + raise IndexError('The tree position () may not be deleted.') + elif len(index) == 1: + del self[index[0]] + else: + del self[index[0]][index[1:]] + else: + raise TypeError( + "%s indices must be integers, not %s" + % (type(self).__name__, type(index).__name__) + ) + + # //////////////////////////////////////////////////////////// + # Basic tree operations + # //////////////////////////////////////////////////////////// + + def _get_node(self): + """Outdated method to access the node value; use the label() method instead.""" + raise NotImplementedError("Use label() to access a node label.") + + def _set_node(self, value): + """Outdated method to set the node value; use the set_label() method instead.""" + raise NotImplementedError("Use set_label() method to set a node label.") + + node = property(_get_node, _set_node) + + def label(self): + """ + Return the node label of the tree. + + >>> t = Tree.fromstring('(S (NP (D the) (N dog)) (VP (V chased) (NP (D the) (N cat))))') + >>> t.label() + 'S' + + :return: the node label (typically a string) + :rtype: any + """ + return self._label + + def set_label(self, label): + """ + Set the node label of the tree. + + >>> t = Tree.fromstring("(S (NP (D the) (N dog)) (VP (V chased) (NP (D the) (N cat))))") + >>> t.set_label("T") + >>> print(t) + (T (NP (D the) (N dog)) (VP (V chased) (NP (D the) (N cat)))) + + :param label: the node label (typically a string) + :type label: any + """ + self._label = label + + def leaves(self): + """ + Return the leaves of the tree. + + >>> t = Tree.fromstring("(S (NP (D the) (N dog)) (VP (V chased) (NP (D the) (N cat))))") + >>> t.leaves() + ['the', 'dog', 'chased', 'the', 'cat'] + + :return: a list containing this tree's leaves. + The order reflects the order of the + leaves in the tree's hierarchical structure. + :rtype: list + """ + leaves = [] + for child in self: + if isinstance(child, Tree): + leaves.extend(child.leaves()) + else: + leaves.append(child) + return leaves + + def flatten(self): + """ + Return a flat version of the tree, with all non-root non-terminals removed. + + >>> t = Tree.fromstring("(S (NP (D the) (N dog)) (VP (V chased) (NP (D the) (N cat))))") + >>> print(t.flatten()) + (S the dog chased the cat) + + :return: a tree consisting of this tree's root connected directly to + its leaves, omitting all intervening non-terminal nodes. + :rtype: Tree + """ + return Tree(self.label(), self.leaves()) + + def height(self): + """ + Return the height of the tree. + + >>> t = Tree.fromstring("(S (NP (D the) (N dog)) (VP (V chased) (NP (D the) (N cat))))") + >>> t.height() + 5 + >>> print(t[0,0]) + (D the) + >>> t[0,0].height() + 2 + + :return: The height of this tree. The height of a tree + containing no children is 1; the height of a tree + containing only leaves is 2; and the height of any other + tree is one plus the maximum of its children's + heights. + :rtype: int + """ + max_child_height = 0 + for child in self: + if isinstance(child, Tree): + max_child_height = max(max_child_height, child.height()) + else: + max_child_height = max(max_child_height, 1) + return 1 + max_child_height + + def treepositions(self, order='preorder'): + """ + >>> t = Tree.fromstring("(S (NP (D the) (N dog)) (VP (V chased) (NP (D the) (N cat))))") + >>> t.treepositions() # doctest: +ELLIPSIS + [(), (0,), (0, 0), (0, 0, 0), (0, 1), (0, 1, 0), (1,), (1, 0), (1, 0, 0), ...] + >>> for pos in t.treepositions('leaves'): + ... t[pos] = t[pos][::-1].upper() + >>> print(t) + (S (NP (D EHT) (N GOD)) (VP (V DESAHC) (NP (D EHT) (N TAC)))) + + :param order: One of: ``preorder``, ``postorder``, ``bothorder``, + ``leaves``. + """ + positions = [] + if order in ('preorder', 'bothorder'): + positions.append(()) + for i, child in enumerate(self): + if isinstance(child, Tree): + childpos = child.treepositions(order) + positions.extend((i,) + p for p in childpos) + else: + positions.append((i,)) + if order in ('postorder', 'bothorder'): + positions.append(()) + return positions + + def subtrees(self, filter=None): + """ + Generate all the subtrees of this tree, optionally restricted + to trees matching the filter function. + + >>> t = Tree.fromstring("(S (NP (D the) (N dog)) (VP (V chased) (NP (D the) (N cat))))") + >>> for s in t.subtrees(lambda t: t.height() == 2): + ... print(s) + (D the) + (N dog) + (V chased) + (D the) + (N cat) + + :type filter: function + :param filter: the function to filter all local trees + """ + if not filter or filter(self): + yield self + for child in self: + if isinstance(child, Tree): + for subtree in child.subtrees(filter): + yield subtree + + def productions(self): + """ + Generate the productions that correspond to the non-terminal nodes of the tree. + For each subtree of the form (P: C1 C2 ... Cn) this produces a production of the + form P -> C1 C2 ... Cn. + + >>> t = Tree.fromstring("(S (NP (D the) (N dog)) (VP (V chased) (NP (D the) (N cat))))") + >>> t.productions() + [S -> NP VP, NP -> D N, D -> 'the', N -> 'dog', VP -> V NP, V -> 'chased', + NP -> D N, D -> 'the', N -> 'cat'] + + :rtype: list(Production) + """ + + if not isinstance(self._label, string_types): + raise TypeError( + 'Productions can only be generated from trees having node labels that are strings' + ) + + prods = [Production(Nonterminal(self._label), _child_names(self))] + for child in self: + if isinstance(child, Tree): + prods += child.productions() + return prods + + def pos(self): + """ + Return a sequence of pos-tagged words extracted from the tree. + + >>> t = Tree.fromstring("(S (NP (D the) (N dog)) (VP (V chased) (NP (D the) (N cat))))") + >>> t.pos() + [('the', 'D'), ('dog', 'N'), ('chased', 'V'), ('the', 'D'), ('cat', 'N')] + + :return: a list of tuples containing leaves and pre-terminals (part-of-speech tags). + The order reflects the order of the leaves in the tree's hierarchical structure. + :rtype: list(tuple) + """ + pos = [] + for child in self: + if isinstance(child, Tree): + pos.extend(child.pos()) + else: + pos.append((child, self._label)) + return pos + + def leaf_treeposition(self, index): + """ + :return: The tree position of the ``index``-th leaf in this + tree. I.e., if ``tp=self.leaf_treeposition(i)``, then + ``self[tp]==self.leaves()[i]``. + + :raise IndexError: If this tree contains fewer than ``index+1`` + leaves, or if ``index<0``. + """ + if index < 0: + raise IndexError('index must be non-negative') + + stack = [(self, ())] + while stack: + value, treepos = stack.pop() + if not isinstance(value, Tree): + if index == 0: + return treepos + else: + index -= 1 + else: + for i in range(len(value) - 1, -1, -1): + stack.append((value[i], treepos + (i,))) + + raise IndexError('index must be less than or equal to len(self)') + + def treeposition_spanning_leaves(self, start, end): + """ + :return: The tree position of the lowest descendant of this + tree that dominates ``self.leaves()[start:end]``. + :raise ValueError: if ``end <= start`` + """ + if end <= start: + raise ValueError('end must be greater than start') + # Find the tree positions of the start & end leaves, and + # take the longest common subsequence. + start_treepos = self.leaf_treeposition(start) + end_treepos = self.leaf_treeposition(end - 1) + # Find the first index where they mismatch: + for i in range(len(start_treepos)): + if i == len(end_treepos) or start_treepos[i] != end_treepos[i]: + return start_treepos[:i] + return start_treepos + + # //////////////////////////////////////////////////////////// + # Transforms + # //////////////////////////////////////////////////////////// + + def chomsky_normal_form( + self, + factor="right", + horzMarkov=None, + vertMarkov=0, + childChar="|", + parentChar="^", + ): + """ + This method can modify a tree in three ways: + + 1. Convert a tree into its Chomsky Normal Form (CNF) + equivalent -- Every subtree has either two non-terminals + or one terminal as its children. This process requires + the creation of more"artificial" non-terminal nodes. + 2. Markov (vertical) smoothing of children in new artificial + nodes + 3. Horizontal (parent) annotation of nodes + + :param factor: Right or left factoring method (default = "right") + :type factor: str = [left|right] + :param horzMarkov: Markov order for sibling smoothing in artificial nodes (None (default) = include all siblings) + :type horzMarkov: int | None + :param vertMarkov: Markov order for parent smoothing (0 (default) = no vertical annotation) + :type vertMarkov: int | None + :param childChar: A string used in construction of the artificial nodes, separating the head of the + original subtree from the child nodes that have yet to be expanded (default = "|") + :type childChar: str + :param parentChar: A string used to separate the node representation from its vertical annotation + :type parentChar: str + """ + from nltk.treetransforms import chomsky_normal_form + + chomsky_normal_form(self, factor, horzMarkov, vertMarkov, childChar, parentChar) + + def un_chomsky_normal_form( + self, expandUnary=True, childChar="|", parentChar="^", unaryChar="+" + ): + """ + This method modifies the tree in three ways: + + 1. Transforms a tree in Chomsky Normal Form back to its + original structure (branching greater than two) + 2. Removes any parent annotation (if it exists) + 3. (optional) expands unary subtrees (if previously + collapsed with collapseUnary(...) ) + + :param expandUnary: Flag to expand unary or not (default = True) + :type expandUnary: bool + :param childChar: A string separating the head node from its children in an artificial node (default = "|") + :type childChar: str + :param parentChar: A sting separating the node label from its parent annotation (default = "^") + :type parentChar: str + :param unaryChar: A string joining two non-terminals in a unary production (default = "+") + :type unaryChar: str + """ + from nltk.treetransforms import un_chomsky_normal_form + + un_chomsky_normal_form(self, expandUnary, childChar, parentChar, unaryChar) + + def collapse_unary(self, collapsePOS=False, collapseRoot=False, joinChar="+"): + """ + Collapse subtrees with a single child (ie. unary productions) + into a new non-terminal (Tree node) joined by 'joinChar'. + This is useful when working with algorithms that do not allow + unary productions, and completely removing the unary productions + would require loss of useful information. The Tree is modified + directly (since it is passed by reference) and no value is returned. + + :param collapsePOS: 'False' (default) will not collapse the parent of leaf nodes (ie. + Part-of-Speech tags) since they are always unary productions + :type collapsePOS: bool + :param collapseRoot: 'False' (default) will not modify the root production + if it is unary. For the Penn WSJ treebank corpus, this corresponds + to the TOP -> productions. + :type collapseRoot: bool + :param joinChar: A string used to connect collapsed node values (default = "+") + :type joinChar: str + """ + from nltk.treetransforms import collapse_unary + + collapse_unary(self, collapsePOS, collapseRoot, joinChar) + + # //////////////////////////////////////////////////////////// + # Convert, copy + # //////////////////////////////////////////////////////////// + + @classmethod + def convert(cls, tree): + """ + Convert a tree between different subtypes of Tree. ``cls`` determines + which class will be used to encode the new tree. + + :type tree: Tree + :param tree: The tree that should be converted. + :return: The new Tree. + """ + if isinstance(tree, Tree): + children = [cls.convert(child) for child in tree] + return cls(tree._label, children) + else: + return tree + + def __copy__(self): + return self.copy() + + def __deepcopy__(self, memo): + return self.copy(deep=True) + + def copy(self, deep=False): + if not deep: + return type(self)(self._label, self) + else: + return type(self).convert(self) + + + def _frozen_class(self): + return ImmutableTree + + def freeze(self, leaf_freezer=None): + frozen_class = self._frozen_class() + if leaf_freezer is None: + newcopy = frozen_class.convert(self) + else: + newcopy = self.copy(deep=True) + for pos in newcopy.treepositions('leaves'): + newcopy[pos] = leaf_freezer(newcopy[pos]) + newcopy = frozen_class.convert(newcopy) + hash(newcopy) # Make sure the leaves are hashable. + return newcopy + + # //////////////////////////////////////////////////////////// + # Parsing + # //////////////////////////////////////////////////////////// + + @classmethod + def fromstring( + cls, + s, + brackets='()', + read_node=None, + read_leaf=None, + node_pattern=None, + leaf_pattern=None, + remove_empty_top_bracketing=False, + ): + """ + Read a bracketed tree string and return the resulting tree. + Trees are represented as nested brackettings, such as:: + + (S (NP (NNP John)) (VP (V runs))) + + :type s: str + :param s: The string to read + + :type brackets: str (length=2) + :param brackets: The bracket characters used to mark the + beginning and end of trees and subtrees. + + :type read_node: function + :type read_leaf: function + :param read_node, read_leaf: If specified, these functions + are applied to the substrings of ``s`` corresponding to + nodes and leaves (respectively) to obtain the values for + those nodes and leaves. They should have the following + signature: + + read_node(str) -> value + + For example, these functions could be used to process nodes + and leaves whose values should be some type other than + string (such as ``FeatStruct``). + Note that by default, node strings and leaf strings are + delimited by whitespace and brackets; to override this + default, use the ``node_pattern`` and ``leaf_pattern`` + arguments. + + :type node_pattern: str + :type leaf_pattern: str + :param node_pattern, leaf_pattern: Regular expression patterns + used to find node and leaf substrings in ``s``. By + default, both nodes patterns are defined to match any + sequence of non-whitespace non-bracket characters. + + :type remove_empty_top_bracketing: bool + :param remove_empty_top_bracketing: If the resulting tree has + an empty node label, and is length one, then return its + single child instead. This is useful for treebank trees, + which sometimes contain an extra level of bracketing. + + :return: A tree corresponding to the string representation ``s``. + If this class method is called using a subclass of Tree, + then it will return a tree of that type. + :rtype: Tree + """ + if not isinstance(brackets, string_types) or len(brackets) != 2: + raise TypeError('brackets must be a length-2 string') + if re.search('\s', brackets): + raise TypeError('whitespace brackets not allowed') + # Construct a regexp that will tokenize the string. + open_b, close_b = brackets + open_pattern, close_pattern = (re.escape(open_b), re.escape(close_b)) + if node_pattern is None: + node_pattern = '[^\s%s%s]+' % (open_pattern, close_pattern) + if leaf_pattern is None: + leaf_pattern = '[^\s%s%s]+' % (open_pattern, close_pattern) + token_re = re.compile( + '%s\s*(%s)?|%s|(%s)' + % (open_pattern, node_pattern, close_pattern, leaf_pattern) + ) + # Walk through each token, updating a stack of trees. + stack = [(None, [])] # list of (node, children) tuples + for match in token_re.finditer(s): + token = match.group() + # Beginning of a tree/subtree + if token[0] == open_b: + if len(stack) == 1 and len(stack[0][1]) > 0: + cls._parse_error(s, match, 'end-of-string') + label = token[1:].lstrip() + if read_node is not None: + label = read_node(label) + stack.append((label, [])) + # End of a tree/subtree + elif token == close_b: + if len(stack) == 1: + if len(stack[0][1]) == 0: + cls._parse_error(s, match, open_b) + else: + cls._parse_error(s, match, 'end-of-string') + label, children = stack.pop() + stack[-1][1].append(cls(label, children)) + # Leaf node + else: + if len(stack) == 1: + cls._parse_error(s, match, open_b) + if read_leaf is not None: + token = read_leaf(token) + stack[-1][1].append(token) + + # check that we got exactly one complete tree. + if len(stack) > 1: + cls._parse_error(s, 'end-of-string', close_b) + elif len(stack[0][1]) == 0: + cls._parse_error(s, 'end-of-string', open_b) + else: + assert stack[0][0] is None + assert len(stack[0][1]) == 1 + tree = stack[0][1][0] + + # If the tree has an extra level with node='', then get rid of + # it. E.g.: "((S (NP ...) (VP ...)))" + if remove_empty_top_bracketing and tree._label == '' and len(tree) == 1: + tree = tree[0] + # return the tree. + return tree + + @classmethod + def _parse_error(cls, s, match, expecting): + """ + Display a friendly error message when parsing a tree string fails. + :param s: The string we're parsing. + :param match: regexp match of the problem token. + :param expecting: what we expected to see instead. + """ + # Construct a basic error message + if match == 'end-of-string': + pos, token = len(s), 'end-of-string' + else: + pos, token = match.start(), match.group() + msg = '%s.read(): expected %r but got %r\n%sat index %d.' % ( + cls.__name__, + expecting, + token, + ' ' * 12, + pos, + ) + # Add a display showing the error token itsels: + s = s.replace('\n', ' ').replace('\t', ' ') + offset = pos + if len(s) > pos + 10: + s = s[: pos + 10] + '...' + if pos > 10: + s = '...' + s[pos - 10 :] + offset = 13 + msg += '\n%s"%s"\n%s^' % (' ' * 16, s, ' ' * (17 + offset)) + raise ValueError(msg) + + # //////////////////////////////////////////////////////////// + # Visualization & String Representation + # //////////////////////////////////////////////////////////// + + def draw(self): + """ + Open a new window containing a graphical diagram of this tree. + """ + from nltk.draw.tree import draw_trees + + draw_trees(self) + + def pretty_print(self, sentence=None, highlight=(), stream=None, **kwargs): + """ + Pretty-print this tree as ASCII or Unicode art. + For explanation of the arguments, see the documentation for + `nltk.treeprettyprinter.TreePrettyPrinter`. + """ + from nltk.treeprettyprinter import TreePrettyPrinter + + print(TreePrettyPrinter(self, sentence, highlight).text(**kwargs), file=stream) + + def __repr__(self): + childstr = ", ".join(unicode_repr(c) for c in self) + return '%s(%s, [%s])' % ( + type(self).__name__, + unicode_repr(self._label), + childstr, + ) + + def _repr_png_(self): + """ + Draws and outputs in PNG for ipython. + PNG is used instead of PDF, since it can be displayed in the qt console and + has wider browser support. + """ + import os + import base64 + import subprocess + import tempfile + from nltk.draw.tree import tree_to_treesegment + from nltk.draw.util import CanvasFrame + from nltk.internals import find_binary + + _canvas_frame = CanvasFrame() + widget = tree_to_treesegment(_canvas_frame.canvas(), self) + _canvas_frame.add_widget(widget) + x, y, w, h = widget.bbox() + # print_to_file uses scrollregion to set the width and height of the pdf. + _canvas_frame.canvas()['scrollregion'] = (0, 0, w, h) + with tempfile.NamedTemporaryFile() as file: + in_path = '{0:}.ps'.format(file.name) + out_path = '{0:}.png'.format(file.name) + _canvas_frame.print_to_file(in_path) + _canvas_frame.destroy_widget(widget) + try: + subprocess.call( + [ + find_binary( + 'gs', + binary_names=['gswin32c.exe', 'gswin64c.exe'], + env_vars=['PATH'], + verbose=False, + ) + ] + + '-q -dEPSCrop -sDEVICE=png16m -r90 -dTextAlphaBits=4 -dGraphicsAlphaBits=4 -dSAFER -dBATCH -dNOPAUSE -sOutputFile={0:} {1:}'.format( + out_path, in_path + ).split() + ) + except LookupError: + pre_error_message = str("The Ghostscript executable isn't found.\n" + "See http://web.mit.edu/ghostscript/www/Install.htm\n" + "If you're using a Mac, you can try installing\n" + "https://docs.brew.sh/Installation then `brew install ghostscript`") + print(pre_error_message, file=sys.stderr) + raise LookupError + + with open(out_path, 'rb') as sr: + res = sr.read() + os.remove(in_path) + os.remove(out_path) + return base64.b64encode(res).decode() + + def __str__(self): + return self.pformat() + + def pprint(self, **kwargs): + """ + Print a string representation of this Tree to 'stream' + """ + + if "stream" in kwargs: + stream = kwargs["stream"] + del kwargs["stream"] + else: + stream = None + print(self.pformat(**kwargs), file=stream) + + def pformat(self, margin=70, indent=0, nodesep='', parens='()', quotes=False): + """ + :return: A pretty-printed string representation of this tree. + :rtype: str + :param margin: The right margin at which to do line-wrapping. + :type margin: int + :param indent: The indentation level at which printing + begins. This number is used to decide how far to indent + subsequent lines. + :type indent: int + :param nodesep: A string that is used to separate the node + from the children. E.g., the default value ``':'`` gives + trees like ``(S: (NP: I) (VP: (V: saw) (NP: it)))``. + """ + + # Try writing it on one line. + s = self._pformat_flat(nodesep, parens, quotes) + if len(s) + indent < margin: + return s + + # If it doesn't fit on one line, then write it on multi-lines. + if isinstance(self._label, string_types): + s = '%s%s%s' % (parens[0], self._label, nodesep) + else: + s = '%s%s%s' % (parens[0], unicode_repr(self._label), nodesep) + for child in self: + if isinstance(child, Tree): + s += ( + '\n' + + ' ' * (indent + 2) + + child.pformat(margin, indent + 2, nodesep, parens, quotes) + ) + elif isinstance(child, tuple): + s += '\n' + ' ' * (indent + 2) + "/".join(child) + elif isinstance(child, string_types) and not quotes: + s += '\n' + ' ' * (indent + 2) + '%s' % child + else: + s += '\n' + ' ' * (indent + 2) + unicode_repr(child) + return s + parens[1] + + def pformat_latex_qtree(self): + r""" + Returns a representation of the tree compatible with the + LaTeX qtree package. This consists of the string ``\Tree`` + followed by the tree represented in bracketed notation. + + For example, the following result was generated from a parse tree of + the sentence ``The announcement astounded us``:: + + \Tree [.I'' [.N'' [.D The ] [.N' [.N announcement ] ] ] + [.I' [.V'' [.V' [.V astounded ] [.N'' [.N' [.N us ] ] ] ] ] ] ] + + See http://www.ling.upenn.edu/advice/latex.html for the LaTeX + style file for the qtree package. + + :return: A latex qtree representation of this tree. + :rtype: str + """ + reserved_chars = re.compile('([#\$%&~_\{\}])') + + pformat = self.pformat(indent=6, nodesep='', parens=('[.', ' ]')) + return r'\Tree ' + re.sub(reserved_chars, r'\\\1', pformat) + + def _pformat_flat(self, nodesep, parens, quotes): + childstrs = [] + for child in self: + if isinstance(child, Tree): + childstrs.append(child._pformat_flat(nodesep, parens, quotes)) + elif isinstance(child, tuple): + childstrs.append("/".join(child)) + elif isinstance(child, string_types) and not quotes: + childstrs.append('%s' % child) + else: + childstrs.append(unicode_repr(child)) + if isinstance(self._label, string_types): + return '%s%s%s %s%s' % ( + parens[0], + self._label, + nodesep, + " ".join(childstrs), + parens[1], + ) + else: + return '%s%s%s %s%s' % ( + parens[0], + unicode_repr(self._label), + nodesep, + " ".join(childstrs), + parens[1], + ) + + +class ImmutableTree(Tree): + def __init__(self, node, children=None): + super(ImmutableTree, self).__init__(node, children) + # Precompute our hash value. This ensures that we're really + # immutable. It also means we only have to calculate it once. + try: + self._hash = hash((self._label, tuple(self))) + except (TypeError, ValueError): + raise ValueError( + "%s: node value and children " "must be immutable" % type(self).__name__ + ) + + def __setitem__(self, index, value): + raise ValueError('%s may not be modified' % type(self).__name__) + + def __setslice__(self, i, j, value): + raise ValueError('%s may not be modified' % type(self).__name__) + + def __delitem__(self, index): + raise ValueError('%s may not be modified' % type(self).__name__) + + def __delslice__(self, i, j): + raise ValueError('%s may not be modified' % type(self).__name__) + + def __iadd__(self, other): + raise ValueError('%s may not be modified' % type(self).__name__) + + def __imul__(self, other): + raise ValueError('%s may not be modified' % type(self).__name__) + + def append(self, v): + raise ValueError('%s may not be modified' % type(self).__name__) + + def extend(self, v): + raise ValueError('%s may not be modified' % type(self).__name__) + + def pop(self, v=None): + raise ValueError('%s may not be modified' % type(self).__name__) + + def remove(self, v): + raise ValueError('%s may not be modified' % type(self).__name__) + + def reverse(self): + raise ValueError('%s may not be modified' % type(self).__name__) + + def sort(self): + raise ValueError('%s may not be modified' % type(self).__name__) + + def __hash__(self): + return self._hash + + def set_label(self, value): + """ + Set the node label. This will only succeed the first time the + node label is set, which should occur in ImmutableTree.__init__(). + """ + if hasattr(self, '_label'): + raise ValueError('%s may not be modified' % type(self).__name__) + self._label = value + + +###################################################################### +## Parented trees +###################################################################### +@add_metaclass(ABCMeta) +class AbstractParentedTree(Tree): + """ + An abstract base class for a ``Tree`` that automatically maintains + pointers to parent nodes. These parent pointers are updated + whenever any change is made to a tree's structure. Two subclasses + are currently defined: + + - ``ParentedTree`` is used for tree structures where each subtree + has at most one parent. This class should be used in cases + where there is no"sharing" of subtrees. + + - ``MultiParentedTree`` is used for tree structures where a + subtree may have zero or more parents. This class should be + used in cases where subtrees may be shared. + + Subclassing + =========== + The ``AbstractParentedTree`` class redefines all operations that + modify a tree's structure to call two methods, which are used by + subclasses to update parent information: + + - ``_setparent()`` is called whenever a new child is added. + - ``_delparent()`` is called whenever a child is removed. + """ + + def __init__(self, node, children=None): + super(AbstractParentedTree, self).__init__(node, children) + # If children is None, the tree is read from node, and + # all parents will be set during parsing. + if children is not None: + # Otherwise we have to set the parent of the children. + # Iterate over self, and *not* children, because children + # might be an iterator. + for i, child in enumerate(self): + if isinstance(child, Tree): + self._setparent(child, i, dry_run=True) + for i, child in enumerate(self): + if isinstance(child, Tree): + self._setparent(child, i) + + # //////////////////////////////////////////////////////////// + # Parent management + # //////////////////////////////////////////////////////////// + @abstractmethod + def _setparent(self, child, index, dry_run=False): + """ + Update the parent pointer of ``child`` to point to ``self``. This + method is only called if the type of ``child`` is ``Tree``; + i.e., it is not called when adding a leaf to a tree. This method + is always called before the child is actually added to the + child list of ``self``. + + :type child: Tree + :type index: int + :param index: The index of ``child`` in ``self``. + :raise TypeError: If ``child`` is a tree with an impropriate + type. Typically, if ``child`` is a tree, then its type needs + to match the type of ``self``. This prevents mixing of + different tree types (single-parented, multi-parented, and + non-parented). + :param dry_run: If true, the don't actually set the child's + parent pointer; just check for any error conditions, and + raise an exception if one is found. + """ + + @abstractmethod + def _delparent(self, child, index): + """ + Update the parent pointer of ``child`` to not point to self. This + method is only called if the type of ``child`` is ``Tree``; i.e., it + is not called when removing a leaf from a tree. This method + is always called before the child is actually removed from the + child list of ``self``. + + :type child: Tree + :type index: int + :param index: The index of ``child`` in ``self``. + """ + + # //////////////////////////////////////////////////////////// + # Methods that add/remove children + # //////////////////////////////////////////////////////////// + # Every method that adds or removes a child must make + # appropriate calls to _setparent() and _delparent(). + + def __delitem__(self, index): + # del ptree[start:stop] + if isinstance(index, slice): + start, stop, step = slice_bounds(self, index, allow_step=True) + # Clear all the children pointers. + for i in range(start, stop, step): + if isinstance(self[i], Tree): + self._delparent(self[i], i) + # Delete the children from our child list. + super(AbstractParentedTree, self).__delitem__(index) + + # del ptree[i] + elif isinstance(index, int): + if index < 0: + index += len(self) + if index < 0: + raise IndexError('index out of range') + # Clear the child's parent pointer. + if isinstance(self[index], Tree): + self._delparent(self[index], index) + # Remove the child from our child list. + super(AbstractParentedTree, self).__delitem__(index) + + elif isinstance(index, (list, tuple)): + # del ptree[()] + if len(index) == 0: + raise IndexError('The tree position () may not be deleted.') + # del ptree[(i,)] + elif len(index) == 1: + del self[index[0]] + # del ptree[i1, i2, i3] + else: + del self[index[0]][index[1:]] + + else: + raise TypeError( + "%s indices must be integers, not %s" + % (type(self).__name__, type(index).__name__) + ) + + def __setitem__(self, index, value): + # ptree[start:stop] = value + if isinstance(index, slice): + start, stop, step = slice_bounds(self, index, allow_step=True) + # make a copy of value, in case it's an iterator + if not isinstance(value, (list, tuple)): + value = list(value) + # Check for any error conditions, so we can avoid ending + # up in an inconsistent state if an error does occur. + for i, child in enumerate(value): + if isinstance(child, Tree): + self._setparent(child, start + i * step, dry_run=True) + # clear the child pointers of all parents we're removing + for i in range(start, stop, step): + if isinstance(self[i], Tree): + self._delparent(self[i], i) + # set the child pointers of the new children. We do this + # after clearing *all* child pointers, in case we're e.g. + # reversing the elements in a tree. + for i, child in enumerate(value): + if isinstance(child, Tree): + self._setparent(child, start + i * step) + # finally, update the content of the child list itself. + super(AbstractParentedTree, self).__setitem__(index, value) + + # ptree[i] = value + elif isinstance(index, int): + if index < 0: + index += len(self) + if index < 0: + raise IndexError('index out of range') + # if the value is not changing, do nothing. + if value is self[index]: + return + # Set the new child's parent pointer. + if isinstance(value, Tree): + self._setparent(value, index) + # Remove the old child's parent pointer + if isinstance(self[index], Tree): + self._delparent(self[index], index) + # Update our child list. + super(AbstractParentedTree, self).__setitem__(index, value) + + elif isinstance(index, (list, tuple)): + # ptree[()] = value + if len(index) == 0: + raise IndexError('The tree position () may not be assigned to.') + # ptree[(i,)] = value + elif len(index) == 1: + self[index[0]] = value + # ptree[i1, i2, i3] = value + else: + self[index[0]][index[1:]] = value + + else: + raise TypeError( + "%s indices must be integers, not %s" + % (type(self).__name__, type(index).__name__) + ) + + def append(self, child): + if isinstance(child, Tree): + self._setparent(child, len(self)) + super(AbstractParentedTree, self).append(child) + + def extend(self, children): + for child in children: + if isinstance(child, Tree): + self._setparent(child, len(self)) + super(AbstractParentedTree, self).append(child) + + def insert(self, index, child): + # Handle negative indexes. Note that if index < -len(self), + # we do *not* raise an IndexError, unlike __getitem__. This + # is done for consistency with list.__getitem__ and list.index. + if index < 0: + index += len(self) + if index < 0: + index = 0 + # Set the child's parent, and update our child list. + if isinstance(child, Tree): + self._setparent(child, index) + super(AbstractParentedTree, self).insert(index, child) + + def pop(self, index=-1): + if index < 0: + index += len(self) + if index < 0: + raise IndexError('index out of range') + if isinstance(self[index], Tree): + self._delparent(self[index], index) + return super(AbstractParentedTree, self).pop(index) + + # n.b.: like `list`, this is done by equality, not identity! + # To remove a specific child, use del ptree[i]. + def remove(self, child): + index = self.index(child) + if isinstance(self[index], Tree): + self._delparent(self[index], index) + super(AbstractParentedTree, self).remove(child) + + # We need to implement __getslice__ and friends, even though + # they're deprecated, because otherwise list.__getslice__ will get + # called (since we're subclassing from list). Just delegate to + # __getitem__ etc., but use max(0, start) and max(0, stop) because + # because negative indices are already handled *before* + # __getslice__ is called; and we don't want to double-count them. + if hasattr(list, '__getslice__'): + + def __getslice__(self, start, stop): + return self.__getitem__(slice(max(0, start), max(0, stop))) + + def __delslice__(self, start, stop): + return self.__delitem__(slice(max(0, start), max(0, stop))) + + def __setslice__(self, start, stop, value): + return self.__setitem__(slice(max(0, start), max(0, stop)), value) + + +class ParentedTree(AbstractParentedTree): + """ + A ``Tree`` that automatically maintains parent pointers for + single-parented trees. The following are methods for querying + the structure of a parented tree: ``parent``, ``parent_index``, + ``left_sibling``, ``right_sibling``, ``root``, ``treeposition``. + + Each ``ParentedTree`` may have at most one parent. In + particular, subtrees may not be shared. Any attempt to reuse a + single ``ParentedTree`` as a child of more than one parent (or + as multiple children of the same parent) will cause a + ``ValueError`` exception to be raised. + + ``ParentedTrees`` should never be used in the same tree as ``Trees`` + or ``MultiParentedTrees``. Mixing tree implementations may result + in incorrect parent pointers and in ``TypeError`` exceptions. + """ + + def __init__(self, node, children=None): + self._parent = None + """The parent of this Tree, or None if it has no parent.""" + super(ParentedTree, self).__init__(node, children) + if children is None: + # If children is None, the tree is read from node. + # After parsing, the parent of the immediate children + # will point to an intermediate tree, not self. + # We fix this by brute force: + for i, child in enumerate(self): + if isinstance(child, Tree): + child._parent = None + self._setparent(child, i) + + def _frozen_class(self): + return ImmutableParentedTree + + # ///////////////////////////////////////////////////////////////// + # Methods + # ///////////////////////////////////////////////////////////////// + + def parent(self): + """The parent of this tree, or None if it has no parent.""" + return self._parent + + def parent_index(self): + """ + The index of this tree in its parent. I.e., + ``ptree.parent()[ptree.parent_index()] is ptree``. Note that + ``ptree.parent_index()`` is not necessarily equal to + ``ptree.parent.index(ptree)``, since the ``index()`` method + returns the first child that is equal to its argument. + """ + if self._parent is None: + return None + for i, child in enumerate(self._parent): + if child is self: + return i + assert False, 'expected to find self in self._parent!' + + def left_sibling(self): + """The left sibling of this tree, or None if it has none.""" + parent_index = self.parent_index() + if self._parent and parent_index > 0: + return self._parent[parent_index - 1] + return None # no left sibling + + def right_sibling(self): + """The right sibling of this tree, or None if it has none.""" + parent_index = self.parent_index() + if self._parent and parent_index < (len(self._parent) - 1): + return self._parent[parent_index + 1] + return None # no right sibling + + def root(self): + """ + The root of this tree. I.e., the unique ancestor of this tree + whose parent is None. If ``ptree.parent()`` is None, then + ``ptree`` is its own root. + """ + root = self + while root.parent() is not None: + root = root.parent() + return root + + def treeposition(self): + """ + The tree position of this tree, relative to the root of the + tree. I.e., ``ptree.root[ptree.treeposition] is ptree``. + """ + if self.parent() is None: + return () + else: + return self.parent().treeposition() + (self.parent_index(),) + + # ///////////////////////////////////////////////////////////////// + # Parent Management + # ///////////////////////////////////////////////////////////////// + + def _delparent(self, child, index): + # Sanity checks + assert isinstance(child, ParentedTree) + assert self[index] is child + assert child._parent is self + + # Delete child's parent pointer. + child._parent = None + + def _setparent(self, child, index, dry_run=False): + # If the child's type is incorrect, then complain. + if not isinstance(child, ParentedTree): + raise TypeError( + 'Can not insert a non-ParentedTree ' + 'into a ParentedTree' + ) + + # If child already has a parent, then complain. + if child._parent is not None: + raise ValueError('Can not insert a subtree that already ' 'has a parent.') + + # Set child's parent pointer & index. + if not dry_run: + child._parent = self + + +class MultiParentedTree(AbstractParentedTree): + """ + A ``Tree`` that automatically maintains parent pointers for + multi-parented trees. The following are methods for querying the + structure of a multi-parented tree: ``parents()``, ``parent_indices()``, + ``left_siblings()``, ``right_siblings()``, ``roots``, ``treepositions``. + + Each ``MultiParentedTree`` may have zero or more parents. In + particular, subtrees may be shared. If a single + ``MultiParentedTree`` is used as multiple children of the same + parent, then that parent will appear multiple times in its + ``parents()`` method. + + ``MultiParentedTrees`` should never be used in the same tree as + ``Trees`` or ``ParentedTrees``. Mixing tree implementations may + result in incorrect parent pointers and in ``TypeError`` exceptions. + """ + + def __init__(self, node, children=None): + self._parents = [] + """A list of this tree's parents. This list should not + contain duplicates, even if a parent contains this tree + multiple times.""" + super(MultiParentedTree, self).__init__(node, children) + if children is None: + # If children is None, the tree is read from node. + # After parsing, the parent(s) of the immediate children + # will point to an intermediate tree, not self. + # We fix this by brute force: + for i, child in enumerate(self): + if isinstance(child, Tree): + child._parents = [] + self._setparent(child, i) + + def _frozen_class(self): + return ImmutableMultiParentedTree + + # ///////////////////////////////////////////////////////////////// + # Methods + # ///////////////////////////////////////////////////////////////// + + def parents(self): + """ + The set of parents of this tree. If this tree has no parents, + then ``parents`` is the empty set. To check if a tree is used + as multiple children of the same parent, use the + ``parent_indices()`` method. + + :type: list(MultiParentedTree) + """ + return list(self._parents) + + def left_siblings(self): + """ + A list of all left siblings of this tree, in any of its parent + trees. A tree may be its own left sibling if it is used as + multiple contiguous children of the same parent. A tree may + appear multiple times in this list if it is the left sibling + of this tree with respect to multiple parents. + + :type: list(MultiParentedTree) + """ + return [ + parent[index - 1] + for (parent, index) in self._get_parent_indices() + if index > 0 + ] + + def right_siblings(self): + """ + A list of all right siblings of this tree, in any of its parent + trees. A tree may be its own right sibling if it is used as + multiple contiguous children of the same parent. A tree may + appear multiple times in this list if it is the right sibling + of this tree with respect to multiple parents. + + :type: list(MultiParentedTree) + """ + return [ + parent[index + 1] + for (parent, index) in self._get_parent_indices() + if index < (len(parent) - 1) + ] + + def _get_parent_indices(self): + return [ + (parent, index) + for parent in self._parents + for index, child in enumerate(parent) + if child is self + ] + + def roots(self): + """ + The set of all roots of this tree. This set is formed by + tracing all possible parent paths until trees with no parents + are found. + + :type: list(MultiParentedTree) + """ + return list(self._get_roots_helper({}).values()) + + def _get_roots_helper(self, result): + if self._parents: + for parent in self._parents: + parent._get_roots_helper(result) + else: + result[id(self)] = self + return result + + def parent_indices(self, parent): + """ + Return a list of the indices where this tree occurs as a child + of ``parent``. If this child does not occur as a child of + ``parent``, then the empty list is returned. The following is + always true:: + + for parent_index in ptree.parent_indices(parent): + parent[parent_index] is ptree + """ + if parent not in self._parents: + return [] + else: + return [index for (index, child) in enumerate(parent) if child is self] + + def treepositions(self, root): + """ + Return a list of all tree positions that can be used to reach + this multi-parented tree starting from ``root``. I.e., the + following is always true:: + + for treepos in ptree.treepositions(root): + root[treepos] is ptree + """ + if self is root: + return [()] + else: + return [ + treepos + (index,) + for parent in self._parents + for treepos in parent.treepositions(root) + for (index, child) in enumerate(parent) + if child is self + ] + + # ///////////////////////////////////////////////////////////////// + # Parent Management + # ///////////////////////////////////////////////////////////////// + + def _delparent(self, child, index): + # Sanity checks + assert isinstance(child, MultiParentedTree) + assert self[index] is child + assert len([p for p in child._parents if p is self]) == 1 + + # If the only copy of child in self is at index, then delete + # self from child's parent list. + for i, c in enumerate(self): + if c is child and i != index: + break + else: + child._parents.remove(self) + + def _setparent(self, child, index, dry_run=False): + # If the child's type is incorrect, then complain. + if not isinstance(child, MultiParentedTree): + raise TypeError( + 'Can not insert a non-MultiParentedTree ' + 'into a MultiParentedTree' + ) + + # Add self as a parent pointer if it's not already listed. + if not dry_run: + for parent in child._parents: + if parent is self: + break + else: + child._parents.append(self) + + +class ImmutableParentedTree(ImmutableTree, ParentedTree): + pass + + +class ImmutableMultiParentedTree(ImmutableTree, MultiParentedTree): + pass + + +###################################################################### +## Probabilistic trees +###################################################################### + + +@python_2_unicode_compatible +class ProbabilisticTree(Tree, ProbabilisticMixIn): + def __init__(self, node, children=None, **prob_kwargs): + Tree.__init__(self, node, children) + ProbabilisticMixIn.__init__(self, **prob_kwargs) + + # We have to patch up these methods to make them work right: + def _frozen_class(self): + return ImmutableProbabilisticTree + + def __repr__(self): + return '%s (p=%r)' % (Tree.unicode_repr(self), self.prob()) + + def __str__(self): + return '%s (p=%.6g)' % (self.pformat(margin=60), self.prob()) + + def copy(self, deep=False): + if not deep: + return type(self)(self._label, self, prob=self.prob()) + else: + return type(self).convert(self) + + @classmethod + def convert(cls, val): + if isinstance(val, Tree): + children = [cls.convert(child) for child in val] + if isinstance(val, ProbabilisticMixIn): + return cls(val._label, children, prob=val.prob()) + else: + return cls(val._label, children, prob=1.0) + else: + return val + + def __eq__(self, other): + return self.__class__ is other.__class__ and ( + self._label, + list(self), + self.prob(), + ) == (other._label, list(other), other.prob()) + + def __lt__(self, other): + if not isinstance(other, Tree): + raise_unorderable_types("<", self, other) + if self.__class__ is other.__class__: + return (self._label, list(self), self.prob()) < ( + other._label, + list(other), + other.prob(), + ) + else: + return self.__class__.__name__ < other.__class__.__name__ + + +@python_2_unicode_compatible +class ImmutableProbabilisticTree(ImmutableTree, ProbabilisticMixIn): + def __init__(self, node, children=None, **prob_kwargs): + ImmutableTree.__init__(self, node, children) + ProbabilisticMixIn.__init__(self, **prob_kwargs) + self._hash = hash((self._label, tuple(self), self.prob())) + + # We have to patch up these methods to make them work right: + def _frozen_class(self): + return ImmutableProbabilisticTree + + def __repr__(self): + return '%s [%s]' % (Tree.unicode_repr(self), self.prob()) + + def __str__(self): + return '%s [%s]' % (self.pformat(margin=60), self.prob()) + + def copy(self, deep=False): + if not deep: + return type(self)(self._label, self, prob=self.prob()) + else: + return type(self).convert(self) + + @classmethod + def convert(cls, val): + if isinstance(val, Tree): + children = [cls.convert(child) for child in val] + if isinstance(val, ProbabilisticMixIn): + return cls(val._label, children, prob=val.prob()) + else: + return cls(val._label, children, prob=1.0) + else: + return val + + +def _child_names(tree): + names = [] + for child in tree: + if isinstance(child, Tree): + names.append(Nonterminal(child._label)) + else: + names.append(child) + return names + + +###################################################################### +## Parsing +###################################################################### + + +def bracket_parse(s): + """ + Use Tree.read(s, remove_empty_top_bracketing=True) instead. + """ + raise NameError("Use Tree.read(s, remove_empty_top_bracketing=True) instead.") + + +def sinica_parse(s): + """ + Parse a Sinica Treebank string and return a tree. Trees are represented as nested brackettings, + as shown in the following example (X represents a Chinese character): + S(goal:NP(Head:Nep:XX)|theme:NP(Head:Nhaa:X)|quantity:Dab:X|Head:VL2:X)#0(PERIODCATEGORY) + + :return: A tree corresponding to the string representation. + :rtype: Tree + :param s: The string to be converted + :type s: str + """ + tokens = re.split(r'([()| ])', s) + for i in range(len(tokens)): + if tokens[i] == '(': + tokens[i - 1], tokens[i] = ( + tokens[i], + tokens[i - 1], + ) # pull nonterminal inside parens + elif ':' in tokens[i]: + fields = tokens[i].split(':') + if len(fields) == 2: # non-terminal + tokens[i] = fields[1] + else: + tokens[i] = "(" + fields[-2] + " " + fields[-1] + ")" + elif tokens[i] == '|': + tokens[i] = '' + + treebank_string = " ".join(tokens) + return Tree.fromstring(treebank_string, remove_empty_top_bracketing=True) + + +# s = re.sub(r'^#[^\s]*\s', '', s) # remove leading identifier +# s = re.sub(r'\w+:', '', s) # remove role tags + +# return s + +###################################################################### +## Demonstration +###################################################################### + + +def demo(): + """ + A demonstration showing how Trees and Trees can be + used. This demonstration creates a Tree, and loads a + Tree from the Treebank corpus, + and shows the results of calling several of their methods. + """ + + from nltk import Tree, ProbabilisticTree + + # Demonstrate tree parsing. + s = '(S (NP (DT the) (NN cat)) (VP (VBD ate) (NP (DT a) (NN cookie))))' + t = Tree.fromstring(s) + print("Convert bracketed string into tree:") + print(t) + print(t.__repr__()) + + print("Display tree properties:") + print(t.label()) # tree's constituent type + print(t[0]) # tree's first child + print(t[1]) # tree's second child + print(t.height()) + print(t.leaves()) + print(t[1]) + print(t[1, 1]) + print(t[1, 1, 0]) + + # Demonstrate tree modification. + the_cat = t[0] + the_cat.insert(1, Tree.fromstring('(JJ big)')) + print("Tree modification:") + print(t) + t[1, 1, 1] = Tree.fromstring('(NN cake)') + print(t) + print() + + # Tree transforms + print("Collapse unary:") + t.collapse_unary() + print(t) + print("Chomsky normal form:") + t.chomsky_normal_form() + print(t) + print() + + # Demonstrate probabilistic trees. + pt = ProbabilisticTree('x', ['y', 'z'], prob=0.5) + print("Probabilistic Tree:") + print(pt) + print() + + # Demonstrate parsing of treebank output format. + t = Tree.fromstring(t.pformat()) + print("Convert tree to bracketed string and back again:") + print(t) + print() + + # Demonstrate LaTeX output + print("LaTeX output:") + print(t.pformat_latex_qtree()) + print() + + # Demonstrate Productions + print("Production output:") + print(t.productions()) + print() + + # Demonstrate tree nodes containing objects other than strings + t.set_label(('test', 3)) + print(t) + + +__all__ = [ + 'ImmutableProbabilisticTree', + 'ImmutableTree', + 'ProbabilisticMixIn', + 'ProbabilisticTree', + 'Tree', + 'bracket_parse', + 'sinica_parse', + 'ParentedTree', + 'MultiParentedTree', + 'ImmutableParentedTree', + 'ImmutableMultiParentedTree', +] diff --git a/venv.bak/lib/python3.7/site-packages/nltk/treeprettyprinter.py b/venv.bak/lib/python3.7/site-packages/nltk/treeprettyprinter.py new file mode 100644 index 0000000..260f431 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/treeprettyprinter.py @@ -0,0 +1,626 @@ +# -*- coding: utf-8 -*- +# Natural Language Toolkit: ASCII visualization of NLTK trees +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Andreas van Cranenburgh +# Peter Ljunglöf +# URL: +# For license information, see LICENSE.TXT + +""" +Pretty-printing of discontinuous trees. +Adapted from the disco-dop project, by Andreas van Cranenburgh. +https://github.com/andreasvc/disco-dop + +Interesting reference (not used for this code): +T. Eschbach et al., Orth. Hypergraph Drawing, Journal of +Graph Algorithms and Applications, 10(2) 141--157 (2006)149. +http://jgaa.info/accepted/2006/EschbachGuentherBecker2006.10.2.pdf +""" + +from __future__ import division, print_function, unicode_literals + +import re +from cgi import escape +from collections import defaultdict +from operator import itemgetter + +from nltk.util import OrderedDict +from nltk.compat import python_2_unicode_compatible +from nltk.tree import Tree + +ANSICOLOR = { + 'black': 30, + 'red': 31, + 'green': 32, + 'yellow': 33, + 'blue': 34, + 'magenta': 35, + 'cyan': 36, + 'white': 37, +} + + +@python_2_unicode_compatible +class TreePrettyPrinter(object): + """ + Pretty-print a tree in text format, either as ASCII or Unicode. + The tree can be a normal tree, or discontinuous. + + ``TreePrettyPrinter(tree, sentence=None, highlight=())`` + creates an object from which different visualizations can be created. + + :param tree: a Tree object. + :param sentence: a list of words (strings). If `sentence` is given, + `tree` must contain integers as leaves, which are taken as indices + in `sentence`. Using this you can display a discontinuous tree. + :param highlight: Optionally, a sequence of Tree objects in `tree` which + should be highlighted. Has the effect of only applying colors to nodes + in this sequence (nodes should be given as Tree objects, terminals as + indices). + + >>> from nltk.tree import Tree + >>> tree = Tree.fromstring('(S (NP Mary) (VP walks))') + >>> print(TreePrettyPrinter(tree).text()) + ... # doctest: +NORMALIZE_WHITESPACE + S + ____|____ + NP VP + | | + Mary walks + """ + + def __init__(self, tree, sentence=None, highlight=()): + if sentence is None: + leaves = tree.leaves() + if ( + leaves + and not any(len(a) == 0 for a in tree.subtrees()) + and all(isinstance(a, int) for a in leaves) + ): + sentence = [str(a) for a in leaves] + else: + # this deals with empty nodes (frontier non-terminals) + # and multiple/mixed terminals under non-terminals. + tree = tree.copy(True) + sentence = [] + for a in tree.subtrees(): + if len(a) == 0: + a.append(len(sentence)) + sentence.append(None) + elif any(not isinstance(b, Tree) for b in a): + for n, b in enumerate(a): + if not isinstance(b, Tree): + a[n] = len(sentence) + if type(b) == tuple: + b = '/'.join(b) + sentence.append('%s' % b) + self.nodes, self.coords, self.edges, self.highlight = self.nodecoords( + tree, sentence, highlight + ) + + def __str__(self): + return self.text() + + def __repr__(self): + return '' % len(self.nodes) + + @staticmethod + def nodecoords(tree, sentence, highlight): + """ + Produce coordinates of nodes on a grid. + + Objective: + + - Produce coordinates for a non-overlapping placement of nodes and + horizontal lines. + - Order edges so that crossing edges cross a minimal number of previous + horizontal lines (never vertical lines). + + Approach: + + - bottom up level order traversal (start at terminals) + - at each level, identify nodes which cannot be on the same row + - identify nodes which cannot be in the same column + - place nodes into a grid at (row, column) + - order child-parent edges with crossing edges last + + Coordinates are (row, column); the origin (0, 0) is at the top left; + the root node is on row 0. Coordinates do not consider the size of a + node (which depends on font, &c), so the width of a column of the grid + should be automatically determined by the element with the greatest + width in that column. Alternatively, the integer coordinates could be + converted to coordinates in which the distances between adjacent nodes + are non-uniform. + + Produces tuple (nodes, coords, edges, highlighted) where: + + - nodes[id]: Tree object for the node with this integer id + - coords[id]: (n, m) coordinate where to draw node with id in the grid + - edges[id]: parent id of node with this id (ordered dictionary) + - highlighted: set of ids that should be highlighted + """ + + def findcell(m, matrix, startoflevel, children): + """ + Find vacant row, column index for node ``m``. + Iterate over current rows for this level (try lowest first) + and look for cell between first and last child of this node, + add new row to level if no free row available. + """ + candidates = [a for _, a in children[m]] + minidx, maxidx = min(candidates), max(candidates) + leaves = tree[m].leaves() + center = scale * sum(leaves) // len(leaves) # center of gravity + if minidx < maxidx and not minidx < center < maxidx: + center = sum(candidates) // len(candidates) + if max(candidates) - min(candidates) > 2 * scale: + center -= center % scale # round to unscaled coordinate + if minidx < maxidx and not minidx < center < maxidx: + center += scale + if ids[m] == 0: + startoflevel = len(matrix) + for rowidx in range(startoflevel, len(matrix) + 1): + if rowidx == len(matrix): # need to add a new row + matrix.append( + [ + vertline if a not in (corner, None) else None + for a in matrix[-1] + ] + ) + row = matrix[rowidx] + i = j = center + if len(children[m]) == 1: # place unaries directly above child + return rowidx, next(iter(children[m]))[1] + elif all( + a is None or a == vertline + for a in row[min(candidates) : max(candidates) + 1] + ): + # find free column + for n in range(scale): + i = j = center + n + while j > minidx or i < maxidx: + if i < maxidx and ( + matrix[rowidx][i] is None or i in candidates + ): + return rowidx, i + elif j > minidx and ( + matrix[rowidx][j] is None or j in candidates + ): + return rowidx, j + i += scale + j -= scale + raise ValueError( + 'could not find a free cell for:\n%s\n%s' + 'min=%d; max=%d' % (tree[m], minidx, maxidx, dumpmatrix()) + ) + + def dumpmatrix(): + """Dump matrix contents for debugging purposes.""" + return '\n'.join( + '%2d: %s' % (n, ' '.join(('%2r' % i)[:2] for i in row)) + for n, row in enumerate(matrix) + ) + + leaves = tree.leaves() + if not all(isinstance(n, int) for n in leaves): + raise ValueError('All leaves must be integer indices.') + if len(leaves) != len(set(leaves)): + raise ValueError('Indices must occur at most once.') + if not all(0 <= n < len(sentence) for n in leaves): + raise ValueError( + 'All leaves must be in the interval 0..n ' + 'with n=len(sentence)\ntokens: %d indices: ' + '%r\nsentence: %s' % (len(sentence), tree.leaves(), sentence) + ) + vertline, corner = -1, -2 # constants + tree = tree.copy(True) + for a in tree.subtrees(): + a.sort(key=lambda n: min(n.leaves()) if isinstance(n, Tree) else n) + scale = 2 + crossed = set() + # internal nodes and lexical nodes (no frontiers) + positions = tree.treepositions() + maxdepth = max(map(len, positions)) + 1 + childcols = defaultdict(set) + matrix = [[None] * (len(sentence) * scale)] + nodes = {} + ids = dict((a, n) for n, a in enumerate(positions)) + highlighted_nodes = set( + n for a, n in ids.items() if not highlight or tree[a] in highlight + ) + levels = dict((n, []) for n in range(maxdepth - 1)) + terminals = [] + for a in positions: + node = tree[a] + if isinstance(node, Tree): + levels[maxdepth - node.height()].append(a) + else: + terminals.append(a) + + for n in levels: + levels[n].sort(key=lambda n: max(tree[n].leaves()) - min(tree[n].leaves())) + terminals.sort() + positions = set(positions) + + for m in terminals: + i = int(tree[m]) * scale + assert matrix[0][i] is None, (matrix[0][i], m, i) + matrix[0][i] = ids[m] + nodes[ids[m]] = sentence[tree[m]] + if nodes[ids[m]] is None: + nodes[ids[m]] = '...' + highlighted_nodes.discard(ids[m]) + positions.remove(m) + childcols[m[:-1]].add((0, i)) + + # add other nodes centered on their children, + # if the center is already taken, back off + # to the left and right alternately, until an empty cell is found. + for n in sorted(levels, reverse=True): + nodesatdepth = levels[n] + startoflevel = len(matrix) + matrix.append( + [vertline if a not in (corner, None) else None for a in matrix[-1]] + ) + for m in nodesatdepth: # [::-1]: + if n < maxdepth - 1 and childcols[m]: + _, pivot = min(childcols[m], key=itemgetter(1)) + if set( + a[:-1] + for row in matrix[:-1] + for a in row[:pivot] + if isinstance(a, tuple) + ) & set( + a[:-1] + for row in matrix[:-1] + for a in row[pivot:] + if isinstance(a, tuple) + ): + crossed.add(m) + + rowidx, i = findcell(m, matrix, startoflevel, childcols) + positions.remove(m) + + # block positions where children of this node branch out + for _, x in childcols[m]: + matrix[rowidx][x] = corner + # assert m == () or matrix[rowidx][i] in (None, corner), ( + # matrix[rowidx][i], m, str(tree), ' '.join(sentence)) + # node itself + matrix[rowidx][i] = ids[m] + nodes[ids[m]] = tree[m] + # add column to the set of children for its parent + if m != (): + childcols[m[:-1]].add((rowidx, i)) + assert len(positions) == 0 + + # remove unused columns, right to left + for m in range(scale * len(sentence) - 1, -1, -1): + if not any(isinstance(row[m], (Tree, int)) for row in matrix): + for row in matrix: + del row[m] + + # remove unused rows, reverse + matrix = [ + row + for row in reversed(matrix) + if not all(a is None or a == vertline for a in row) + ] + + # collect coordinates of nodes + coords = {} + for n, _ in enumerate(matrix): + for m, i in enumerate(matrix[n]): + if isinstance(i, int) and i >= 0: + coords[i] = n, m + + # move crossed edges last + positions = sorted( + [a for level in levels.values() for a in level], + key=lambda a: a[:-1] in crossed, + ) + + # collect edges from node to node + edges = OrderedDict() + for i in reversed(positions): + for j, _ in enumerate(tree[i]): + edges[ids[i + (j,)]] = ids[i] + + return nodes, coords, edges, highlighted_nodes + + def text( + self, + nodedist=1, + unicodelines=False, + html=False, + ansi=False, + nodecolor='blue', + leafcolor='red', + funccolor='green', + abbreviate=None, + maxwidth=16, + ): + """ + :return: ASCII art for a discontinuous tree. + + :param unicodelines: whether to use Unicode line drawing characters + instead of plain (7-bit) ASCII. + :param html: whether to wrap output in html code (default plain text). + :param ansi: whether to produce colors with ANSI escape sequences + (only effective when html==False). + :param leafcolor, nodecolor: specify colors of leaves and phrasal + nodes; effective when either html or ansi is True. + :param abbreviate: if True, abbreviate labels longer than 5 characters. + If integer, abbreviate labels longer than `abbr` characters. + :param maxwidth: maximum number of characters before a label starts to + wrap; pass None to disable. + """ + if abbreviate == True: + abbreviate = 5 + if unicodelines: + horzline = '\u2500' + leftcorner = '\u250c' + rightcorner = '\u2510' + vertline = ' \u2502 ' + tee = horzline + '\u252C' + horzline + bottom = horzline + '\u2534' + horzline + cross = horzline + '\u253c' + horzline + ellipsis = '\u2026' + else: + horzline = '_' + leftcorner = rightcorner = ' ' + vertline = ' | ' + tee = 3 * horzline + cross = bottom = '_|_' + ellipsis = '.' + + def crosscell(cur, x=vertline): + """Overwrite center of this cell with a vertical branch.""" + splitl = len(cur) - len(cur) // 2 - len(x) // 2 - 1 + lst = list(cur) + lst[splitl : splitl + len(x)] = list(x) + return ''.join(lst) + + result = [] + matrix = defaultdict(dict) + maxnodewith = defaultdict(lambda: 3) + maxnodeheight = defaultdict(lambda: 1) + maxcol = 0 + minchildcol = {} + maxchildcol = {} + childcols = defaultdict(set) + labels = {} + wrapre = re.compile( + '(.{%d,%d}\\b\\W*|.{%d})' % (maxwidth - 4, maxwidth, maxwidth) + ) + # collect labels and coordinates + for a in self.nodes: + row, column = self.coords[a] + matrix[row][column] = a + maxcol = max(maxcol, column) + label = ( + self.nodes[a].label() + if isinstance(self.nodes[a], Tree) + else self.nodes[a] + ) + if abbreviate and len(label) > abbreviate: + label = label[:abbreviate] + ellipsis + if maxwidth and len(label) > maxwidth: + label = wrapre.sub(r'\1\n', label).strip() + label = label.split('\n') + maxnodeheight[row] = max(maxnodeheight[row], len(label)) + maxnodewith[column] = max(maxnodewith[column], max(map(len, label))) + labels[a] = label + if a not in self.edges: + continue # e.g., root + parent = self.edges[a] + childcols[parent].add((row, column)) + minchildcol[parent] = min(minchildcol.get(parent, column), column) + maxchildcol[parent] = max(maxchildcol.get(parent, column), column) + # bottom up level order traversal + for row in sorted(matrix, reverse=True): + noderows = [ + [''.center(maxnodewith[col]) for col in range(maxcol + 1)] + for _ in range(maxnodeheight[row]) + ] + branchrow = [''.center(maxnodewith[col]) for col in range(maxcol + 1)] + for col in matrix[row]: + n = matrix[row][col] + node = self.nodes[n] + text = labels[n] + if isinstance(node, Tree): + # draw horizontal branch towards children for this node + if n in minchildcol and minchildcol[n] < maxchildcol[n]: + i, j = minchildcol[n], maxchildcol[n] + a, b = (maxnodewith[i] + 1) // 2 - 1, maxnodewith[j] // 2 + branchrow[i] = ((' ' * a) + leftcorner).ljust( + maxnodewith[i], horzline + ) + branchrow[j] = (rightcorner + (' ' * b)).rjust( + maxnodewith[j], horzline + ) + for i in range(minchildcol[n] + 1, maxchildcol[n]): + if i == col and any(a == i for _, a in childcols[n]): + line = cross + elif i == col: + line = bottom + elif any(a == i for _, a in childcols[n]): + line = tee + else: + line = horzline + branchrow[i] = line.center(maxnodewith[i], horzline) + else: # if n and n in minchildcol: + branchrow[col] = crosscell(branchrow[col]) + text = [a.center(maxnodewith[col]) for a in text] + color = nodecolor if isinstance(node, Tree) else leafcolor + if isinstance(node, Tree) and node.label().startswith('-'): + color = funccolor + if html: + text = [escape(a) for a in text] + if n in self.highlight: + text = ['%s' % (color, a) for a in text] + elif ansi and n in self.highlight: + text = ['\x1b[%d;1m%s\x1b[0m' % (ANSICOLOR[color], a) for a in text] + for x in range(maxnodeheight[row]): + # draw vertical lines in partially filled multiline node + # labels, but only if it's not a frontier node. + noderows[x][col] = ( + text[x] + if x < len(text) + else (vertline if childcols[n] else ' ').center( + maxnodewith[col], ' ' + ) + ) + # for each column, if there is a node below us which has a parent + # above us, draw a vertical branch in that column. + if row != max(matrix): + for n, (childrow, col) in self.coords.items(): + if n > 0 and self.coords[self.edges[n]][0] < row < childrow: + branchrow[col] = crosscell(branchrow[col]) + if col not in matrix[row]: + for noderow in noderows: + noderow[col] = crosscell(noderow[col]) + branchrow = [ + a + ((a[-1] if a[-1] != ' ' else b[0]) * nodedist) + for a, b in zip(branchrow, branchrow[1:] + [' ']) + ] + result.append(''.join(branchrow)) + result.extend( + (' ' * nodedist).join(noderow) for noderow in reversed(noderows) + ) + return '\n'.join(reversed(result)) + '\n' + + def svg(self, nodecolor='blue', leafcolor='red', funccolor='green'): + """ + :return: SVG representation of a tree. + """ + fontsize = 12 + hscale = 40 + vscale = 25 + hstart = vstart = 20 + width = max(col for _, col in self.coords.values()) + height = max(row for row, _ in self.coords.values()) + result = [ + '' + % ( + width * 3, + height * 2.5, + -hstart, + -vstart, + width * hscale + 3 * hstart, + height * vscale + 3 * vstart, + ) + ] + + children = defaultdict(set) + for n in self.nodes: + if n: + children[self.edges[n]].add(n) + + # horizontal branches from nodes to children + for node in self.nodes: + if not children[node]: + continue + y, x = self.coords[node] + x *= hscale + y *= vscale + x += hstart + y += vstart + fontsize // 2 + childx = [self.coords[c][1] for c in children[node]] + xmin = hstart + hscale * min(childx) + xmax = hstart + hscale * max(childx) + result.append( + '\t' % (xmin, y, xmax, y) + ) + result.append( + '\t' % (x, y, x, y - fontsize // 3) + ) + + # vertical branches from children to parents + for child, parent in self.edges.items(): + y, _ = self.coords[parent] + y *= vscale + y += vstart + fontsize // 2 + childy, childx = self.coords[child] + childx *= hscale + childy *= vscale + childx += hstart + childy += vstart - fontsize + result += [ + '\t' % (childx, childy, childx, y + 5), + '\t' % (childx, childy, childx, y), + ] + + # write nodes with coordinates + for n, (row, column) in self.coords.items(): + node = self.nodes[n] + x = column * hscale + hstart + y = row * vscale + vstart + if n in self.highlight: + color = nodecolor if isinstance(node, Tree) else leafcolor + if isinstance(node, Tree) and node.label().startswith('-'): + color = funccolor + else: + color = 'black' + result += [ + '\t%s' + % ( + color, + fontsize, + x, + y, + escape(node.label() if isinstance(node, Tree) else node), + ) + ] + + result += [''] + return '\n'.join(result) + + +def test(): + """Do some tree drawing tests.""" + + def print_tree(n, tree, sentence=None, ansi=True, **xargs): + print() + print('{0}: "{1}"'.format(n, ' '.join(sentence or tree.leaves()))) + print(tree) + print() + drawtree = TreePrettyPrinter(tree, sentence) + try: + print(drawtree.text(unicodelines=ansi, ansi=ansi, **xargs)) + except (UnicodeDecodeError, UnicodeEncodeError): + print(drawtree.text(unicodelines=False, ansi=False, **xargs)) + + from nltk.corpus import treebank + + for n in [0, 1440, 1591, 2771, 2170]: + tree = treebank.parsed_sents()[n] + print_tree(n, tree, nodedist=2, maxwidth=8) + print() + print('ASCII version:') + print(TreePrettyPrinter(tree).text(nodedist=2)) + + tree = Tree.fromstring( + '(top (punct 8) (smain (noun 0) (verb 1) (inf (verb 5) (inf (verb 6) ' + '(conj (inf (pp (prep 2) (np (det 3) (noun 4))) (verb 7)) (inf (verb 9)) ' + '(vg 10) (inf (verb 11)))))) (punct 12))', + read_leaf=int, + ) + sentence = ( + 'Ze had met haar moeder kunnen gaan winkelen ,' + ' zwemmen of terrassen .'.split() + ) + print_tree('Discontinuous tree', tree, sentence, nodedist=2) + + +__all__ = ['TreePrettyPrinter'] + +if __name__ == '__main__': + test() diff --git a/venv.bak/lib/python3.7/site-packages/nltk/treetransforms.py b/venv.bak/lib/python3.7/site-packages/nltk/treetransforms.py new file mode 100644 index 0000000..c64ac70 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/treetransforms.py @@ -0,0 +1,338 @@ +# Natural Language Toolkit: Tree Transformations +# +# Copyright (C) 2005-2007 Oregon Graduate Institute +# Author: Nathan Bodenstab +# URL: +# For license information, see LICENSE.TXT + +""" +A collection of methods for tree (grammar) transformations used +in parsing natural language. + +Although many of these methods are technically grammar transformations +(ie. Chomsky Norm Form), when working with treebanks it is much more +natural to visualize these modifications in a tree structure. Hence, +we will do all transformation directly to the tree itself. +Transforming the tree directly also allows us to do parent annotation. +A grammar can then be simply induced from the modified tree. + +The following is a short tutorial on the available transformations. + + 1. Chomsky Normal Form (binarization) + + It is well known that any grammar has a Chomsky Normal Form (CNF) + equivalent grammar where CNF is defined by every production having + either two non-terminals or one terminal on its right hand side. + When we have hierarchically structured data (ie. a treebank), it is + natural to view this in terms of productions where the root of every + subtree is the head (left hand side) of the production and all of + its children are the right hand side constituents. In order to + convert a tree into CNF, we simply need to ensure that every subtree + has either two subtrees as children (binarization), or one leaf node + (non-terminal). In order to binarize a subtree with more than two + children, we must introduce artificial nodes. + + There are two popular methods to convert a tree into CNF: left + factoring and right factoring. The following example demonstrates + the difference between them. Example:: + + Original Right-Factored Left-Factored + + A A A + / | \ / \ / \ + B C D ==> B A| OR A| D + / \ / \ + C D B C + + 2. Parent Annotation + + In addition to binarizing the tree, there are two standard + modifications to node labels we can do in the same traversal: parent + annotation and Markov order-N smoothing (or sibling smoothing). + + The purpose of parent annotation is to refine the probabilities of + productions by adding a small amount of context. With this simple + addition, a CYK (inside-outside, dynamic programming chart parse) + can improve from 74% to 79% accuracy. A natural generalization from + parent annotation is to grandparent annotation and beyond. The + tradeoff becomes accuracy gain vs. computational complexity. We + must also keep in mind data sparcity issues. Example:: + + Original Parent Annotation + + A A^ + / | \ / \ + B C D ==> B^
    A|^ where ? is the + / \ parent of A + C^ D^ + + + 3. Markov order-N smoothing + + Markov smoothing combats data sparcity issues as well as decreasing + computational requirements by limiting the number of children + included in artificial nodes. In practice, most people use an order + 2 grammar. Example:: + + Original No Smoothing Markov order 1 Markov order 2 etc. + + __A__ A A A + / /|\ \ / \ / \ / \ + B C D E F ==> B A| ==> B A| ==> B A| + / \ / \ / \ + C ... C ... C ... + + + + Annotation decisions can be thought about in the vertical direction + (parent, grandparent, etc) and the horizontal direction (number of + siblings to keep). Parameters to the following functions specify + these values. For more information see: + + Dan Klein and Chris Manning (2003) "Accurate Unlexicalized + Parsing", ACL-03. http://www.aclweb.org/anthology/P03-1054 + + 4. Unary Collapsing + + Collapse unary productions (ie. subtrees with a single child) into a + new non-terminal (Tree node). This is useful when working with + algorithms that do not allow unary productions, yet you do not wish + to lose the parent information. Example:: + + A + | + B ==> A+B + / \ / \ + C D C D + +""" +from __future__ import print_function + +from nltk.tree import Tree + + +def chomsky_normal_form( + tree, factor="right", horzMarkov=None, vertMarkov=0, childChar="|", parentChar="^" +): + # assume all subtrees have homogeneous children + # assume all terminals have no siblings + + # A semi-hack to have elegant looking code below. As a result, + # any subtree with a branching factor greater than 999 will be incorrectly truncated. + if horzMarkov is None: + horzMarkov = 999 + + # Traverse the tree depth-first keeping a list of ancestor nodes to the root. + # I chose not to use the tree.treepositions() method since it requires + # two traversals of the tree (one to get the positions, one to iterate + # over them) and node access time is proportional to the height of the node. + # This method is 7x faster which helps when parsing 40,000 sentences. + + nodeList = [(tree, [tree.label()])] + while nodeList != []: + node, parent = nodeList.pop() + if isinstance(node, Tree): + + # parent annotation + parentString = "" + originalNode = node.label() + if vertMarkov != 0 and node != tree and isinstance(node[0], Tree): + parentString = "%s<%s>" % (parentChar, "-".join(parent)) + node.set_label(node.label() + parentString) + parent = [originalNode] + parent[: vertMarkov - 1] + + # add children to the agenda before we mess with them + for child in node: + nodeList.append((child, parent)) + + # chomsky normal form factorization + if len(node) > 2: + childNodes = [child.label() for child in node] + nodeCopy = node.copy() + node[0:] = [] # delete the children + + curNode = node + numChildren = len(nodeCopy) + for i in range(1, numChildren - 1): + if factor == "right": + newHead = "%s%s<%s>%s" % ( + originalNode, + childChar, + "-".join( + childNodes[i : min([i + horzMarkov, numChildren])] + ), + parentString, + ) # create new head + newNode = Tree(newHead, []) + curNode[0:] = [nodeCopy.pop(0), newNode] + else: + newHead = "%s%s<%s>%s" % ( + originalNode, + childChar, + "-".join( + childNodes[max([numChildren - i - horzMarkov, 0]) : -i] + ), + parentString, + ) + newNode = Tree(newHead, []) + curNode[0:] = [newNode, nodeCopy.pop()] + + curNode = newNode + + curNode[0:] = [child for child in nodeCopy] + + +def un_chomsky_normal_form( + tree, expandUnary=True, childChar="|", parentChar="^", unaryChar="+" +): + # Traverse the tree-depth first keeping a pointer to the parent for modification purposes. + nodeList = [(tree, [])] + while nodeList != []: + node, parent = nodeList.pop() + if isinstance(node, Tree): + # if the node contains the 'childChar' character it means that + # it is an artificial node and can be removed, although we still need + # to move its children to its parent + childIndex = node.label().find(childChar) + if childIndex != -1: + nodeIndex = parent.index(node) + parent.remove(parent[nodeIndex]) + # Generated node was on the left if the nodeIndex is 0 which + # means the grammar was left factored. We must insert the children + # at the beginning of the parent's children + if nodeIndex == 0: + parent.insert(0, node[0]) + parent.insert(1, node[1]) + else: + parent.extend([node[0], node[1]]) + + # parent is now the current node so the children of parent will be added to the agenda + node = parent + else: + parentIndex = node.label().find(parentChar) + if parentIndex != -1: + # strip the node name of the parent annotation + node.set_label(node.label()[:parentIndex]) + + # expand collapsed unary productions + if expandUnary == True: + unaryIndex = node.label().find(unaryChar) + if unaryIndex != -1: + newNode = Tree( + node.label()[unaryIndex + 1 :], [i for i in node] + ) + node.set_label(node.label()[:unaryIndex]) + node[0:] = [newNode] + + for child in node: + nodeList.append((child, node)) + + +def collapse_unary(tree, collapsePOS=False, collapseRoot=False, joinChar="+"): + """ + Collapse subtrees with a single child (ie. unary productions) + into a new non-terminal (Tree node) joined by 'joinChar'. + This is useful when working with algorithms that do not allow + unary productions, and completely removing the unary productions + would require loss of useful information. The Tree is modified + directly (since it is passed by reference) and no value is returned. + + :param tree: The Tree to be collapsed + :type tree: Tree + :param collapsePOS: 'False' (default) will not collapse the parent of leaf nodes (ie. + Part-of-Speech tags) since they are always unary productions + :type collapsePOS: bool + :param collapseRoot: 'False' (default) will not modify the root production + if it is unary. For the Penn WSJ treebank corpus, this corresponds + to the TOP -> productions. + :type collapseRoot: bool + :param joinChar: A string used to connect collapsed node values (default = "+") + :type joinChar: str + """ + + if collapseRoot == False and isinstance(tree, Tree) and len(tree) == 1: + nodeList = [tree[0]] + else: + nodeList = [tree] + + # depth-first traversal of tree + while nodeList != []: + node = nodeList.pop() + if isinstance(node, Tree): + if ( + len(node) == 1 + and isinstance(node[0], Tree) + and (collapsePOS == True or isinstance(node[0, 0], Tree)) + ): + node.set_label(node.label() + joinChar + node[0].label()) + node[0:] = [child for child in node[0]] + # since we assigned the child's children to the current node, + # evaluate the current node again + nodeList.append(node) + else: + for child in node: + nodeList.append(child) + + +################################################################# +# Demonstration +################################################################# + + +def demo(): + """ + A demonstration showing how each tree transform can be used. + """ + + from nltk.draw.tree import draw_trees + from nltk import tree, treetransforms + from copy import deepcopy + + # original tree from WSJ bracketed text + sentence = """(TOP + (S + (S + (VP + (VBN Turned) + (ADVP (RB loose)) + (PP + (IN in) + (NP + (NP (NNP Shane) (NNP Longman) (POS 's)) + (NN trading) + (NN room))))) + (, ,) + (NP (DT the) (NN yuppie) (NNS dealers)) + (VP (AUX do) (NP (NP (RB little)) (ADJP (RB right)))) + (. .)))""" + t = tree.Tree.fromstring(sentence, remove_empty_top_bracketing=True) + + # collapse subtrees with only one child + collapsedTree = deepcopy(t) + treetransforms.collapse_unary(collapsedTree) + + # convert the tree to CNF + cnfTree = deepcopy(collapsedTree) + treetransforms.chomsky_normal_form(cnfTree) + + # convert the tree to CNF with parent annotation (one level) and horizontal smoothing of order two + parentTree = deepcopy(collapsedTree) + treetransforms.chomsky_normal_form(parentTree, horzMarkov=2, vertMarkov=1) + + # convert the tree back to its original form (used to make CYK results comparable) + original = deepcopy(parentTree) + treetransforms.un_chomsky_normal_form(original) + + # convert tree back to bracketed text + sentence2 = original.pprint() + print(sentence) + print(sentence2) + print("Sentences the same? ", sentence == sentence2) + + draw_trees(t, collapsedTree, cnfTree, parentTree, original) + + +if __name__ == '__main__': + demo() + +__all__ = ["chomsky_normal_form", "un_chomsky_normal_form", "collapse_unary"] diff --git a/venv.bak/lib/python3.7/site-packages/nltk/twitter/__init__.py b/venv.bak/lib/python3.7/site-packages/nltk/twitter/__init__.py new file mode 100644 index 0000000..2d848e0 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/twitter/__init__.py @@ -0,0 +1,36 @@ +# -*- coding: utf-8 -*- +# Natural Language Toolkit: Twitter +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Ewan Klein +# URL: +# For license information, see LICENSE.TXT + +""" +NLTK Twitter Package + +This package contains classes for retrieving Tweet documents using the +Twitter API. + +""" +try: + import twython +except ImportError: + import warnings + + warnings.warn( + "The twython library has not been installed. " + "Some functionality from the twitter package will not be available." + ) +else: + from nltk.twitter.util import Authenticate, credsfromfile + from nltk.twitter.twitterclient import ( + Streamer, + Query, + Twitter, + TweetViewer, + TweetWriter, + ) + + +from nltk.twitter.common import json2csv diff --git a/venv.bak/lib/python3.7/site-packages/nltk/twitter/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/twitter/__pycache__/__init__.cpython-37.pyc new file mode 100644 index 0000000..fcff3a6 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/twitter/__pycache__/__init__.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/twitter/__pycache__/api.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/twitter/__pycache__/api.cpython-37.pyc new file mode 100644 index 0000000..7a4b110 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/twitter/__pycache__/api.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/twitter/__pycache__/common.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/twitter/__pycache__/common.cpython-37.pyc new file mode 100644 index 0000000..a7af780 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/twitter/__pycache__/common.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/twitter/__pycache__/twitter_demo.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/twitter/__pycache__/twitter_demo.cpython-37.pyc new file mode 100644 index 0000000..6faf754 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/twitter/__pycache__/twitter_demo.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/twitter/__pycache__/twitterclient.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/twitter/__pycache__/twitterclient.cpython-37.pyc new file mode 100644 index 0000000..5be1bba Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/twitter/__pycache__/twitterclient.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/twitter/__pycache__/util.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/nltk/twitter/__pycache__/util.cpython-37.pyc new file mode 100644 index 0000000..62ac728 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/nltk/twitter/__pycache__/util.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/nltk/twitter/api.py b/venv.bak/lib/python3.7/site-packages/nltk/twitter/api.py new file mode 100644 index 0000000..2cce2b7 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/twitter/api.py @@ -0,0 +1,151 @@ +# -*- coding: utf-8 -*- +# Natural Language Toolkit: Twitter API +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Ewan Klein +# Lorenzo Rubio +# URL: +# For license information, see LICENSE.TXT + +""" +This module provides an interface for TweetHandlers, and support for timezone +handling. +""" + +import time as _time +from abc import ABCMeta, abstractmethod +from datetime import tzinfo, timedelta, datetime + +from six import add_metaclass + +from nltk.compat import UTC + + +class LocalTimezoneOffsetWithUTC(tzinfo): + """ + This is not intended to be a general purpose class for dealing with the + local timezone. In particular: + + * it assumes that the date passed has been created using + `datetime(..., tzinfo=Local)`, where `Local` is an instance of + the object `LocalTimezoneOffsetWithUTC`; + * for such an object, it returns the offset with UTC, used for date comparisons. + + Reference: https://docs.python.org/3/library/datetime.html + """ + + STDOFFSET = timedelta(seconds=-_time.timezone) + + if _time.daylight: + DSTOFFSET = timedelta(seconds=-_time.altzone) + else: + DSTOFFSET = STDOFFSET + + def utcoffset(self, dt): + """ + Access the relevant time offset. + """ + return self.DSTOFFSET + + +LOCAL = LocalTimezoneOffsetWithUTC() + + +@add_metaclass(ABCMeta) +class BasicTweetHandler(object): + """ + Minimal implementation of `TweetHandler`. + + Counts the number of Tweets and decides when the client should stop + fetching them. + """ + + def __init__(self, limit=20): + self.limit = limit + self.counter = 0 + + """ + A flag to indicate to the client whether to stop fetching data given + some condition (e.g., reaching a date limit). + """ + self.do_stop = False + + """ + Stores the id of the last fetched Tweet to handle pagination. + """ + self.max_id = None + + def do_continue(self): + """ + Returns `False` if the client should stop fetching Tweets. + """ + return self.counter < self.limit and not self.do_stop + + +class TweetHandlerI(BasicTweetHandler): + """ + Interface class whose subclasses should implement a handle method that + Twitter clients can delegate to. + """ + + def __init__(self, limit=20, upper_date_limit=None, lower_date_limit=None): + """ + :param int limit: The number of data items to process in the current\ + round of processing. + + :param tuple upper_date_limit: The date at which to stop collecting\ + new data. This should be entered as a tuple which can serve as the\ + argument to `datetime.datetime`.\ + E.g. `date_limit=(2015, 4, 1, 12, 40)` for 12:30 pm on April 1 2015. + + :param tuple lower_date_limit: The date at which to stop collecting\ + new data. See `upper_data_limit` for formatting. + """ + BasicTweetHandler.__init__(self, limit) + + self.upper_date_limit = None + self.lower_date_limit = None + if upper_date_limit: + self.upper_date_limit = datetime(*upper_date_limit, tzinfo=LOCAL) + if lower_date_limit: + self.lower_date_limit = datetime(*lower_date_limit, tzinfo=LOCAL) + + self.startingup = True + + @abstractmethod + def handle(self, data): + """ + Deal appropriately with data returned by the Twitter API + """ + + @abstractmethod + def on_finish(self): + """ + Actions when the tweet limit has been reached + """ + + def check_date_limit(self, data, verbose=False): + """ + Validate date limits. + """ + if self.upper_date_limit or self.lower_date_limit: + date_fmt = '%a %b %d %H:%M:%S +0000 %Y' + tweet_date = datetime.strptime(data['created_at'], date_fmt).replace( + tzinfo=UTC + ) + if (self.upper_date_limit and tweet_date > self.upper_date_limit) or ( + self.lower_date_limit and tweet_date < self.lower_date_limit + ): + if self.upper_date_limit: + message = "earlier" + date_limit = self.upper_date_limit + else: + message = "later" + date_limit = self.lower_date_limit + if verbose: + print( + "Date limit {0} is {1} than date of current tweet {2}".format( + date_limit, message, tweet_date + ) + ) + self.do_stop = True diff --git a/venv.bak/lib/python3.7/site-packages/nltk/twitter/common.py b/venv.bak/lib/python3.7/site-packages/nltk/twitter/common.py new file mode 100644 index 0000000..453b0c0 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/twitter/common.py @@ -0,0 +1,279 @@ +# -*- coding: utf-8 -*- +# Natural Language Toolkit: Twitter client +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Ewan Klein +# Lorenzo Rubio +# URL: +# For license information, see LICENSE.TXT + +""" +Utility functions for the :module:`twitterclient` module which do not require +the `twython` library to have been installed. +""" +from __future__ import print_function + +import csv +import gzip +import json + +from nltk import compat + +HIER_SEPARATOR = "." + + +def extract_fields(tweet, fields): + """ + Extract field values from a full tweet and return them as a list + + :param json tweet: The tweet in JSON format + :param list fields: The fields to be extracted from the tweet + :rtype: list(str) + """ + out = [] + for field in fields: + try: + _add_field_to_out(tweet, field, out) + except TypeError: + raise RuntimeError( + 'Fatal error when extracting fields. Cannot find field ', field + ) + return out + + +def _add_field_to_out(json, field, out): + if _is_composed_key(field): + key, value = _get_key_value_composed(field) + _add_field_to_out(json[key], value, out) + else: + out += [json[field]] + + +def _is_composed_key(field): + if HIER_SEPARATOR in field: + return True + return False + + +def _get_key_value_composed(field): + out = field.split(HIER_SEPARATOR) + # there could be up to 3 levels + key = out[0] + value = HIER_SEPARATOR.join(out[1:]) + return key, value + + +def _get_entity_recursive(json, entity): + if not json: + return None + elif isinstance(json, dict): + for key, value in json.items(): + if key == entity: + return value + # 'entities' and 'extended_entities' are wrappers in Twitter json + # structure that contain other Twitter objects. See: + # https://dev.twitter.com/overview/api/entities-in-twitter-objects + + if key == 'entities' or key == 'extended_entities': + candidate = _get_entity_recursive(value, entity) + if candidate is not None: + return candidate + return None + elif isinstance(json, list): + for item in json: + candidate = _get_entity_recursive(item, entity) + if candidate is not None: + return candidate + return None + else: + return None + + +def json2csv( + fp, outfile, fields, encoding='utf8', errors='replace', gzip_compress=False +): + """ + Extract selected fields from a file of line-separated JSON tweets and + write to a file in CSV format. + + This utility function allows a file of full tweets to be easily converted + to a CSV file for easier processing. For example, just TweetIDs or + just the text content of the Tweets can be extracted. + + Additionally, the function allows combinations of fields of other Twitter + objects (mainly the users, see below). + + For Twitter entities (e.g. hashtags of a Tweet), and for geolocation, see + `json2csv_entities` + + :param str infile: The name of the file containing full tweets + + :param str outfile: The name of the text file where results should be\ + written + + :param list fields: The list of fields to be extracted. Useful examples\ + are 'id_str' for the tweetID and 'text' for the text of the tweet. See\ + for a full list of fields.\ + e. g.: ['id_str'], ['id', 'text', 'favorite_count', 'retweet_count']\ + Additionally, it allows IDs from other Twitter objects, e. g.,\ + ['id', 'text', 'user.id', 'user.followers_count', 'user.friends_count'] + + :param error: Behaviour for encoding errors, see\ + https://docs.python.org/3/library/codecs.html#codec-base-classes + + :param gzip_compress: if `True`, output files are compressed with gzip + """ + (writer, outf) = outf_writer_compat(outfile, encoding, errors, gzip_compress) + # write the list of fields as header + writer.writerow(fields) + # process the file + for line in fp: + tweet = json.loads(line) + row = extract_fields(tweet, fields) + writer.writerow(row) + outf.close() + + +def outf_writer_compat(outfile, encoding, errors, gzip_compress=False): + """ + Identify appropriate CSV writer given the Python version + """ + if compat.PY3: + if gzip_compress: + outf = gzip.open(outfile, 'wt', encoding=encoding, errors=errors) + else: + outf = open(outfile, 'w', encoding=encoding, errors=errors) + writer = csv.writer(outf) + else: + if gzip_compress: + outf = gzip.open(outfile, 'wb') + else: + outf = open(outfile, 'wb') + writer = compat.UnicodeWriter(outf, encoding=encoding, errors=errors) + return (writer, outf) + + +def json2csv_entities( + tweets_file, + outfile, + main_fields, + entity_type, + entity_fields, + encoding='utf8', + errors='replace', + gzip_compress=False, +): + """ + Extract selected fields from a file of line-separated JSON tweets and + write to a file in CSV format. + + This utility function allows a file of full Tweets to be easily converted + to a CSV file for easier processing of Twitter entities. For example, the + hashtags or media elements of a tweet can be extracted. + + It returns one line per entity of a Tweet, e.g. if a tweet has two hashtags + there will be two lines in the output file, one per hashtag + + :param tweets_file: the file-like object containing full Tweets + + :param str outfile: The path of the text file where results should be\ + written + + :param list main_fields: The list of fields to be extracted from the main\ + object, usually the tweet. Useful examples: 'id_str' for the tweetID. See\ + for a full list of fields. + e. g.: ['id_str'], ['id', 'text', 'favorite_count', 'retweet_count'] + If `entity_type` is expressed with hierarchy, then it is the list of\ + fields of the object that corresponds to the key of the entity_type,\ + (e.g., for entity_type='user.urls', the fields in the main_fields list\ + belong to the user object; for entity_type='place.bounding_box', the\ + files in the main_field list belong to the place object of the tweet). + + :param list entity_type: The name of the entity: 'hashtags', 'media',\ + 'urls' and 'user_mentions' for the tweet object. For a user object,\ + this needs to be expressed with a hierarchy: `'user.urls'`. For the\ + bounding box of the Tweet location, use `'place.bounding_box'`. + + :param list entity_fields: The list of fields to be extracted from the\ + entity. E.g. `['text']` (of the Tweet) + + :param error: Behaviour for encoding errors, see\ + https://docs.python.org/3/library/codecs.html#codec-base-classes + + :param gzip_compress: if `True`, ouput files are compressed with gzip + """ + + (writer, outf) = outf_writer_compat(outfile, encoding, errors, gzip_compress) + header = get_header_field_list(main_fields, entity_type, entity_fields) + writer.writerow(header) + for line in tweets_file: + tweet = json.loads(line) + if _is_composed_key(entity_type): + key, value = _get_key_value_composed(entity_type) + object_json = _get_entity_recursive(tweet, key) + if not object_json: + # this can happen in the case of "place" + continue + object_fields = extract_fields(object_json, main_fields) + items = _get_entity_recursive(object_json, value) + _write_to_file(object_fields, items, entity_fields, writer) + else: + tweet_fields = extract_fields(tweet, main_fields) + items = _get_entity_recursive(tweet, entity_type) + _write_to_file(tweet_fields, items, entity_fields, writer) + outf.close() + + +def get_header_field_list(main_fields, entity_type, entity_fields): + if _is_composed_key(entity_type): + key, value = _get_key_value_composed(entity_type) + main_entity = key + sub_entity = value + else: + main_entity = None + sub_entity = entity_type + + if main_entity: + output1 = [HIER_SEPARATOR.join([main_entity, x]) for x in main_fields] + else: + output1 = main_fields + output2 = [HIER_SEPARATOR.join([sub_entity, x]) for x in entity_fields] + return output1 + output2 + + +def _write_to_file(object_fields, items, entity_fields, writer): + if not items: + # it could be that the entity is just not present for the tweet + # e.g. tweet hashtag is always present, even as [], however + # tweet media may not be present + return + if isinstance(items, dict): + # this happens e.g. for "place" of a tweet + row = object_fields + # there might be composed keys in de list of required fields + entity_field_values = [x for x in entity_fields if not _is_composed_key(x)] + entity_field_composed = [x for x in entity_fields if _is_composed_key(x)] + for field in entity_field_values: + value = items[field] + if isinstance(value, list): + row += value + else: + row += [value] + # now check required dictionaries + for d in entity_field_composed: + kd, vd = _get_key_value_composed(d) + json_dict = items[kd] + if not isinstance(json_dict, dict): + raise RuntimeError( + """Key {0} does not contain a dictionary + in the json file""".format( + kd + ) + ) + row += [json_dict[vd]] + writer.writerow(row) + return + # in general it is a list + for item in items: + row = object_fields + extract_fields(item, entity_fields) + writer.writerow(row) diff --git a/venv.bak/lib/python3.7/site-packages/nltk/twitter/twitter_demo.py b/venv.bak/lib/python3.7/site-packages/nltk/twitter/twitter_demo.py new file mode 100644 index 0000000..967728b --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/twitter/twitter_demo.py @@ -0,0 +1,310 @@ +# -*- coding: utf-8 -*- +# Natural Language Toolkit: Twitter client +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Ewan Klein +# Lorenzo Rubio +# URL: +# For license information, see LICENSE.TXT + +""" +Examples to demo the :py:mod:`twitterclient` code. + +These demo functions should all run, with the following caveats: + +* You must have obtained API keys from Twitter, and installed them according to + the instructions in the `twitter HOWTO `_. + +* If you are on a slow network, some of the calls to the Twitter API may + timeout. + +* If you are being rate limited while searching, you will receive a 420 + error response. + +* Your terminal window / console must be able to display UTF-8 encoded characters. + +For documentation about the Twitter APIs, see `The Streaming APIs Overview +`_ and `The REST APIs Overview +`_. + +For error codes see Twitter's +`Error Codes and Responses ` +""" +from __future__ import print_function + +import datetime +from functools import wraps +import json + +from nltk.compat import StringIO + +from nltk.twitter import ( + Query, + Streamer, + Twitter, + TweetViewer, + TweetWriter, + credsfromfile, +) + + +SPACER = '###################################' + + +def verbose(func): + """Decorator for demo functions""" + + @wraps(func) + def with_formatting(*args, **kwargs): + print() + print(SPACER) + print("Using %s" % (func.__name__)) + print(SPACER) + return func(*args, **kwargs) + + return with_formatting + + +def yesterday(): + """ + Get yesterday's datetime as a 5-tuple. + """ + date = datetime.datetime.now() + date -= datetime.timedelta(days=1) + date_tuple = date.timetuple()[:6] + return date_tuple + + +def setup(): + """ + Initialize global variables for the demos. + """ + global USERIDS, FIELDS + + USERIDS = ['759251', '612473', '15108702', '6017542', '2673523800'] + # UserIDs corresponding to\ + # @CNN, @BBCNews, @ReutersLive, @BreakingNews, @AJELive + FIELDS = ['id_str'] + + +@verbose +def twitterclass_demo(): + """ + Use the simplified :class:`Twitter` class to write some tweets to a file. + """ + tw = Twitter() + print("Track from the public stream\n") + tw.tweets(keywords='love, hate', limit=10) # public stream + print(SPACER) + print("Search past Tweets\n") + tw = Twitter() + tw.tweets(keywords='love, hate', stream=False, limit=10) # search past tweets + print(SPACER) + print( + "Follow two accounts in the public stream" + + " -- be prepared to wait a few minutes\n" + ) + tw = Twitter() + tw.tweets(follow=['759251', '6017542'], stream=True, limit=5) # public stream + + +@verbose +def sampletoscreen_demo(limit=20): + """ + Sample from the Streaming API and send output to terminal. + """ + oauth = credsfromfile() + client = Streamer(**oauth) + client.register(TweetViewer(limit=limit)) + client.sample() + + +@verbose +def tracktoscreen_demo(track="taylor swift", limit=10): + """ + Track keywords from the public Streaming API and send output to terminal. + """ + oauth = credsfromfile() + client = Streamer(**oauth) + client.register(TweetViewer(limit=limit)) + client.filter(track=track) + + +@verbose +def search_demo(keywords='nltk'): + """ + Use the REST API to search for past tweets containing a given keyword. + """ + oauth = credsfromfile() + client = Query(**oauth) + for tweet in client.search_tweets(keywords=keywords, limit=10): + print(tweet['text']) + + +@verbose +def tweets_by_user_demo(user='NLTK_org', count=200): + """ + Use the REST API to search for past tweets by a given user. + """ + oauth = credsfromfile() + client = Query(**oauth) + client.register(TweetWriter()) + client.user_tweets(user, count) + + +@verbose +def lookup_by_userid_demo(): + """ + Use the REST API to convert a userID to a screen name. + """ + oauth = credsfromfile() + client = Query(**oauth) + user_info = client.user_info_from_id(USERIDS) + for info in user_info: + name = info['screen_name'] + followers = info['followers_count'] + following = info['friends_count'] + print("{0}, followers: {1}, following: {2}".format(name, followers, following)) + + +@verbose +def followtoscreen_demo(limit=10): + """ + Using the Streaming API, select just the tweets from a specified list of + userIDs. + + This is will only give results in a reasonable time if the users in + question produce a high volume of tweets, and may even so show some delay. + """ + oauth = credsfromfile() + client = Streamer(**oauth) + client.register(TweetViewer(limit=limit)) + client.statuses.filter(follow=USERIDS) + + +@verbose +def streamtofile_demo(limit=20): + """ + Write 20 tweets sampled from the public Streaming API to a file. + """ + oauth = credsfromfile() + client = Streamer(**oauth) + client.register(TweetWriter(limit=limit, repeat=False)) + client.statuses.sample() + + +@verbose +def limit_by_time_demo(keywords="nltk"): + """ + Query the REST API for Tweets about NLTK since yesterday and send + the output to terminal. + + This example makes the assumption that there are sufficient Tweets since + yesterday for the date to be an effective cut-off. + """ + date = yesterday() + dt_date = datetime.datetime(*date) + oauth = credsfromfile() + client = Query(**oauth) + client.register(TweetViewer(limit=100, lower_date_limit=date)) + + print("Cutoff date: {}\n".format(dt_date)) + + for tweet in client.search_tweets(keywords=keywords): + print("{} ".format(tweet['created_at']), end='') + client.handler.handle(tweet) + + +@verbose +def corpusreader_demo(): + """ + Use :module:`TwitterCorpusReader` tp read a file of tweets, and print out + + * some full tweets in JSON format; + * some raw strings from the tweets (i.e., the value of the `text` field); and + * the result of tokenising the raw strings. + + """ + from nltk.corpus import twitter_samples as tweets + + print() + print("Complete tweet documents") + print(SPACER) + for tweet in tweets.docs("tweets.20150430-223406.json")[:1]: + print(json.dumps(tweet, indent=1, sort_keys=True)) + + print() + print("Raw tweet strings:") + print(SPACER) + for text in tweets.strings("tweets.20150430-223406.json")[:15]: + print(text) + + print() + print("Tokenized tweet strings:") + print(SPACER) + for toks in tweets.tokenized("tweets.20150430-223406.json")[:15]: + print(toks) + + +@verbose +def expand_tweetids_demo(): + """ + Given a file object containing a list of Tweet IDs, fetch the + corresponding full Tweets, if available. + + """ + ids_f = StringIO( + """\ + 588665495492124672 + 588665495487909888 + 588665495508766721 + 588665495513006080 + 588665495517200384 + 588665495487811584 + 588665495525588992 + 588665495487844352 + 588665495492014081 + 588665495512948737""" + ) + oauth = credsfromfile() + client = Query(**oauth) + hydrated = client.expand_tweetids(ids_f) + + for tweet in hydrated: + id_str = tweet['id_str'] + print('id: {}'.format(id_str)) + text = tweet['text'] + if text.startswith('@null'): + text = "[Tweet not available]" + print(text + '\n') + + +ALL = [ + twitterclass_demo, + sampletoscreen_demo, + tracktoscreen_demo, + search_demo, + tweets_by_user_demo, + lookup_by_userid_demo, + followtoscreen_demo, + streamtofile_demo, + limit_by_time_demo, + corpusreader_demo, + expand_tweetids_demo, +] + +""" +Select demo functions to run. E.g. replace the following line with "DEMOS = +ALL[8:]" to execute only the final three demos. +""" +DEMOS = ALL[:] + +if __name__ == "__main__": + setup() + + for demo in DEMOS: + demo() + + print("\n" + SPACER) + print("All demos completed") + print(SPACER) diff --git a/venv.bak/lib/python3.7/site-packages/nltk/twitter/twitterclient.py b/venv.bak/lib/python3.7/site-packages/nltk/twitter/twitterclient.py new file mode 100644 index 0000000..6083fa2 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/twitter/twitterclient.py @@ -0,0 +1,558 @@ +# -*- coding: utf-8 -*- +# Natural Language Toolkit: Twitter client +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Ewan Klein +# Lorenzo Rubio +# URL: +# For license information, see LICENSE.TXT + + +""" +NLTK Twitter client + +This module offers methods for collecting and processing Tweets. Most of the +functionality depends on access to the Twitter APIs, and this is handled via +the third party Twython library. + +If one of the methods below returns an integer, it is probably a `Twitter +error code `_. For +example, the response of '420' means that you have reached the limit of the +requests you can currently make to the Twitter API. Currently, `rate limits +for the search API `_ are +divided into 15 minute windows. +""" + +import datetime +import itertools +import json +import os +import time +import gzip + +import requests + +from twython import Twython, TwythonStreamer +from twython.exceptions import TwythonRateLimitError, TwythonError + +from nltk.twitter.util import credsfromfile, guess_path +from nltk.twitter.api import TweetHandlerI, BasicTweetHandler + + +class Streamer(TwythonStreamer): + """ + Retrieve data from the Twitter Streaming API. + + The streaming API requires + `OAuth 1.0 `_ authentication. + """ + + def __init__(self, app_key, app_secret, oauth_token, oauth_token_secret): + + self.handler = None + self.do_continue = True + TwythonStreamer.__init__( + self, app_key, app_secret, oauth_token, oauth_token_secret + ) + + def register(self, handler): + """ + Register a method for handling Tweets. + + :param TweetHandlerI handler: method for viewing + """ + self.handler = handler + + def on_success(self, data): + """ + :param data: response from Twitter API + """ + if self.do_continue: + if self.handler is not None: + if 'text' in data: + self.handler.counter += 1 + self.handler.handle(data) + self.do_continue = self.handler.do_continue() + else: + raise ValueError("No data handler has been registered.") + else: + self.disconnect() + self.handler.on_finish() + + def on_error(self, status_code, data): + """ + :param status_code: The status code returned by the Twitter API + :param data: The response from Twitter API + + """ + print(status_code) + + def sample(self): + """ + Wrapper for 'statuses / sample' API call + """ + while self.do_continue: + + # Stream in an endless loop until limit is reached. See twython + # issue 288: https://github.com/ryanmcgrath/twython/issues/288 + # colditzjb commented on 9 Dec 2014 + + try: + self.statuses.sample() + except requests.exceptions.ChunkedEncodingError as e: + if e is not None: + print("Error (stream will continue): {0}".format(e)) + continue + + def filter(self, track='', follow='', lang='en'): + """ + Wrapper for 'statuses / filter' API call + """ + while self.do_continue: + # Stream in an endless loop until limit is reached + + try: + if track == '' and follow == '': + msg = "Please supply a value for 'track', 'follow'" + raise ValueError(msg) + self.statuses.filter(track=track, follow=follow, lang=lang) + except requests.exceptions.ChunkedEncodingError as e: + if e is not None: + print("Error (stream will continue): {0}".format(e)) + continue + + +class Query(Twython): + """ + Retrieve data from the Twitter REST API. + """ + + def __init__(self, app_key, app_secret, oauth_token, oauth_token_secret): + self.handler = None + self.do_continue = True + Twython.__init__(self, app_key, app_secret, oauth_token, oauth_token_secret) + + def register(self, handler): + """ + Register a method for handling Tweets. + + :param TweetHandlerI handler: method for viewing or writing Tweets to a file. + """ + self.handler = handler + + def expand_tweetids(self, ids_f, verbose=True): + """ + Given a file object containing a list of Tweet IDs, fetch the + corresponding full Tweets from the Twitter API. + + The API call `statuses/lookup` will fail to retrieve a Tweet if the + user has deleted it. + + This call to the Twitter API is rate-limited. See + for details. + + :param ids_f: input file object consisting of Tweet IDs, one to a line + :return: iterable of Tweet objects in JSON format + """ + ids = [line.strip() for line in ids_f if line] + + if verbose: + print("Counted {0} Tweet IDs in {1}.".format(len(ids), ids_f)) + + # The Twitter endpoint takes lists of up to 100 ids, so we chunk the + # ids. + id_chunks = [ids[i : i + 100] for i in range(0, len(ids), 100)] + + chunked_tweets = (self.lookup_status(id=chunk) for chunk in id_chunks) + + return itertools.chain.from_iterable(chunked_tweets) + + def _search_tweets(self, keywords, limit=100, lang='en'): + """ + Assumes that the handler has been informed. Fetches Tweets from + search_tweets generator output and passses them to handler + + :param str keywords: A list of query terms to search for, written as\ + a comma-separated string. + :param int limit: Number of Tweets to process + :param str lang: language + """ + while True: + tweets = self.search_tweets( + keywords=keywords, limit=limit, lang=lang, max_id=self.handler.max_id + ) + for tweet in tweets: + self.handler.handle(tweet) + if not (self.handler.do_continue() and self.handler.repeat): + break + self.handler.on_finish() + + def search_tweets( + self, + keywords, + limit=100, + lang='en', + max_id=None, + retries_after_twython_exception=0, + ): + """ + Call the REST API ``'search/tweets'`` endpoint with some plausible + defaults. See `the Twitter search documentation + `_ for more information + about admissible search parameters. + + :param str keywords: A list of query terms to search for, written as\ + a comma-separated string + :param int limit: Number of Tweets to process + :param str lang: language + :param int max_id: id of the last tweet fetched + :param int retries_after_twython_exception: number of retries when\ + searching Tweets before raising an exception + :rtype: python generator + """ + if not self.handler: + # if no handler is provided, `BasicTweetHandler` provides minimum + # functionality for limiting the number of Tweets retrieved + self.handler = BasicTweetHandler(limit=limit) + + count_from_query = 0 + if max_id: + self.handler.max_id = max_id + else: + results = self.search( + q=keywords, count=min(100, limit), lang=lang, result_type='recent' + ) + count = len(results['statuses']) + if count == 0: + print("No Tweets available through REST API for those keywords") + return + count_from_query = count + self.handler.max_id = results['statuses'][count - 1]['id'] - 1 + + for result in results['statuses']: + yield result + self.handler.counter += 1 + if self.handler.do_continue() == False: + return + + # Pagination loop: keep fetching Tweets until the desired count is + # reached while dealing with Twitter rate limits. + retries = 0 + while count_from_query < limit: + try: + mcount = min(100, limit - count_from_query) + results = self.search( + q=keywords, + count=mcount, + lang=lang, + max_id=self.handler.max_id, + result_type='recent', + ) + except TwythonRateLimitError as e: + print("Waiting for 15 minutes -{0}".format(e)) + time.sleep(15 * 60) # wait 15 minutes + continue + except TwythonError as e: + print("Fatal error in Twython request -{0}".format(e)) + if retries_after_twython_exception == retries: + raise e + retries += 1 + + count = len(results['statuses']) + if count == 0: + print("No more Tweets available through rest api") + return + count_from_query += count + # the max_id is also present in the Tweet metadata + # results['search_metadata']['next_results'], but as part of a + # query and difficult to fetch. This is doing the equivalent + # (last tweet id minus one) + self.handler.max_id = results['statuses'][count - 1]['id'] - 1 + + for result in results['statuses']: + yield result + self.handler.counter += 1 + if self.handler.do_continue() == False: + return + + def user_info_from_id(self, userids): + """ + Convert a list of userIDs into a variety of information about the users. + + See . + + :param list userids: A list of integer strings corresponding to Twitter userIDs + :rtype: list(json) + """ + return [self.show_user(user_id=userid) for userid in userids] + + def user_tweets(self, screen_name, limit, include_rts='false'): + """ + Return a collection of the most recent Tweets posted by the user + + :param str user: The user's screen name; the initial '@' symbol\ + should be omitted + :param int limit: The number of Tweets to recover; 200 is the maximum allowed + :param str include_rts: Whether to include statuses which have been\ + retweeted by the user; possible values are 'true' and 'false' + """ + data = self.get_user_timeline( + screen_name=screen_name, count=limit, include_rts=include_rts + ) + for item in data: + self.handler.handle(item) + + +class Twitter(object): + """ + Wrapper class with restricted functionality and fewer options. + """ + + def __init__(self): + self._oauth = credsfromfile() + self.streamer = Streamer(**self._oauth) + self.query = Query(**self._oauth) + + def tweets( + self, + keywords='', + follow='', + to_screen=True, + stream=True, + limit=100, + date_limit=None, + lang='en', + repeat=False, + gzip_compress=False, + ): + """ + Process some Tweets in a simple manner. + + :param str keywords: Keywords to use for searching or filtering + :param list follow: UserIDs to use for filtering Tweets from the public stream + :param bool to_screen: If `True`, display the tweet texts on the screen,\ + otherwise print to a file + + :param bool stream: If `True`, use the live public stream,\ + otherwise search past public Tweets + + :param int limit: The number of data items to process in the current\ + round of processing. + + :param tuple date_limit: The date at which to stop collecting\ + new data. This should be entered as a tuple which can serve as the\ + argument to `datetime.datetime`.\ + E.g. `date_limit=(2015, 4, 1, 12, 40)` for 12:30 pm on April 1 2015. + Note that, in the case of streaming, this is the maximum date, i.e.\ + a date in the future; if not, it is the minimum date, i.e. a date\ + in the past + + :param str lang: language + + :param bool repeat: A flag to determine whether multiple files should\ + be written. If `True`, the length of each file will be set by the\ + value of `limit`. Use only if `to_screen` is `False`. See also + :py:func:`handle`. + + :param gzip_compress: if `True`, output files are compressed with gzip. + """ + if stream: + upper_date_limit = date_limit + lower_date_limit = None + else: + upper_date_limit = None + lower_date_limit = date_limit + + if to_screen: + handler = TweetViewer( + limit=limit, + upper_date_limit=upper_date_limit, + lower_date_limit=lower_date_limit, + ) + else: + handler = TweetWriter( + limit=limit, + upper_date_limit=upper_date_limit, + lower_date_limit=lower_date_limit, + repeat=repeat, + gzip_compress=gzip_compress, + ) + + if to_screen: + handler = TweetViewer(limit=limit) + else: + if stream: + upper_date_limit = date_limit + lower_date_limit = None + else: + upper_date_limit = None + lower_date_limit = date_limit + + handler = TweetWriter( + limit=limit, + upper_date_limit=upper_date_limit, + lower_date_limit=lower_date_limit, + repeat=repeat, + gzip_compress=gzip_compress, + ) + + if stream: + self.streamer.register(handler) + if keywords == '' and follow == '': + self.streamer.sample() + else: + self.streamer.filter(track=keywords, follow=follow, lang=lang) + else: + self.query.register(handler) + if keywords == '': + raise ValueError("Please supply at least one keyword to search for.") + else: + self.query._search_tweets(keywords, limit=limit, lang=lang) + + +class TweetViewer(TweetHandlerI): + """ + Handle data by sending it to the terminal. + """ + + def handle(self, data): + """ + Direct data to `sys.stdout` + + :return: return ``False`` if processing should cease, otherwise return ``True``. + :rtype: bool + :param data: Tweet object returned by Twitter API + """ + text = data['text'] + print(text) + + self.check_date_limit(data) + if self.do_stop: + return + + def on_finish(self): + print('Written {0} Tweets'.format(self.counter)) + + +class TweetWriter(TweetHandlerI): + """ + Handle data by writing it to a file. + """ + + def __init__( + self, + limit=2000, + upper_date_limit=None, + lower_date_limit=None, + fprefix='tweets', + subdir='twitter-files', + repeat=False, + gzip_compress=False, + ): + """ + The difference between the upper and lower date limits depends on + whether Tweets are coming in an ascending date order (i.e. when + streaming) or descending date order (i.e. when searching past Tweets). + + :param int limit: number of data items to process in the current\ + round of processing. + + :param tuple upper_date_limit: The date at which to stop collecting new\ + data. This should be entered as a tuple which can serve as the\ + argument to `datetime.datetime`. E.g. `upper_date_limit=(2015, 4, 1, 12,\ + 40)` for 12:30 pm on April 1 2015. + + :param tuple lower_date_limit: The date at which to stop collecting new\ + data. See `upper_data_limit` for formatting. + + :param str fprefix: The prefix to use in creating file names for Tweet\ + collections. + + :param str subdir: The name of the directory where Tweet collection\ + files should be stored. + + :param bool repeat: flag to determine whether multiple files should be\ + written. If `True`, the length of each file will be set by the value\ + of `limit`. See also :py:func:`handle`. + + :param gzip_compress: if `True`, ouput files are compressed with gzip. + """ + self.fprefix = fprefix + self.subdir = guess_path(subdir) + self.gzip_compress = gzip_compress + self.fname = self.timestamped_file() + self.repeat = repeat + self.output = None + TweetHandlerI.__init__(self, limit, upper_date_limit, lower_date_limit) + + def timestamped_file(self): + """ + :return: timestamped file name + :rtype: str + """ + subdir = self.subdir + fprefix = self.fprefix + if subdir: + if not os.path.exists(subdir): + os.mkdir(subdir) + + fname = os.path.join(subdir, fprefix) + fmt = '%Y%m%d-%H%M%S' + timestamp = datetime.datetime.now().strftime(fmt) + if self.gzip_compress: + suffix = '.gz' + else: + suffix = '' + outfile = '{0}.{1}.json{2}'.format(fname, timestamp, suffix) + return outfile + + def handle(self, data): + """ + Write Twitter data as line-delimited JSON into one or more files. + + :return: return `False` if processing should cease, otherwise return `True`. + :param data: tweet object returned by Twitter API + """ + if self.startingup: + if self.gzip_compress: + self.output = gzip.open(self.fname, 'w') + else: + self.output = open(self.fname, 'w') + print('Writing to {0}'.format(self.fname)) + + json_data = json.dumps(data) + if self.gzip_compress: + self.output.write((json_data + "\n").encode('utf-8')) + else: + self.output.write(json_data + "\n") + + self.check_date_limit(data) + if self.do_stop: + return + + self.startingup = False + + def on_finish(self): + print('Written {0} Tweets'.format(self.counter)) + if self.output: + self.output.close() + + def do_continue(self): + if self.repeat == False: + return TweetHandlerI.do_continue(self) + + if self.do_stop: + # stop for a functional cause (e.g. date limit) + return False + + if self.counter == self.limit: + # repeat is True, thus close output file and + # create a new one + self._restart_file() + return True + + def _restart_file(self): + self.on_finish() + self.fname = self.timestamped_file() + self.startingup = True + self.counter = 0 diff --git a/venv.bak/lib/python3.7/site-packages/nltk/twitter/util.py b/venv.bak/lib/python3.7/site-packages/nltk/twitter/util.py new file mode 100644 index 0000000..888ed75 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/twitter/util.py @@ -0,0 +1,152 @@ +# -*- coding: utf-8 -*- +# Natural Language Toolkit: Twitter client +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Ewan Klein +# Lorenzo Rubio +# URL: +# For license information, see LICENSE.TXT + +""" +Authentication utilities to accompany :module:`twitterclient`. +""" + +from __future__ import print_function + +import os +import pprint +from twython import Twython + + +def credsfromfile(creds_file=None, subdir=None, verbose=False): + """ + Convenience function for authentication + """ + return Authenticate().load_creds( + creds_file=creds_file, subdir=subdir, verbose=verbose + ) + + +class Authenticate(object): + """ + Methods for authenticating with Twitter. + """ + + def __init__(self): + self.creds_file = 'credentials.txt' + self.creds_fullpath = None + + self.oauth = {} + try: + self.twitter_dir = os.environ['TWITTER'] + self.creds_subdir = self.twitter_dir + except KeyError: + self.twitter_dir = None + self.creds_subdir = None + + def load_creds(self, creds_file=None, subdir=None, verbose=False): + """ + Read OAuth credentials from a text file. + + :: + File format for OAuth 1 + ======================= + app_key=YOUR_APP_KEY + app_secret=YOUR_APP_SECRET + oauth_token=OAUTH_TOKEN + oauth_token_secret=OAUTH_TOKEN_SECRET + + + :: + File format for OAuth 2 + ======================= + + app_key=YOUR_APP_KEY + app_secret=YOUR_APP_SECRET + access_token=ACCESS_TOKEN + + :param str file_name: File containing credentials. ``None`` (default) reads\ + data from `TWITTER/'credentials.txt'` + """ + if creds_file is not None: + self.creds_file = creds_file + + if subdir is None: + if self.creds_subdir is None: + msg = ( + "Supply a value to the 'subdir' parameter or" + + " set the TWITTER environment variable." + ) + raise ValueError(msg) + else: + self.creds_subdir = subdir + + self.creds_fullpath = os.path.normpath( + os.path.join(self.creds_subdir, self.creds_file) + ) + + if not os.path.isfile(self.creds_fullpath): + raise OSError('Cannot find file {}'.format(self.creds_fullpath)) + + with open(self.creds_fullpath) as infile: + if verbose: + print('Reading credentials file {}'.format(self.creds_fullpath)) + + for line in infile: + if '=' in line: + name, value = line.split('=', 1) + self.oauth[name.strip()] = value.strip() + + self._validate_creds_file(verbose=verbose) + + return self.oauth + + def _validate_creds_file(self, verbose=False): + """Check validity of a credentials file.""" + oauth1 = False + oauth1_keys = ['app_key', 'app_secret', 'oauth_token', 'oauth_token_secret'] + oauth2 = False + oauth2_keys = ['app_key', 'app_secret', 'access_token'] + if all(k in self.oauth for k in oauth1_keys): + oauth1 = True + elif all(k in self.oauth for k in oauth2_keys): + oauth2 = True + + if not (oauth1 or oauth2): + msg = 'Missing or incorrect entries in {}\n'.format(self.creds_file) + msg += pprint.pformat(self.oauth) + raise ValueError(msg) + elif verbose: + print('Credentials file "{}" looks good'.format(self.creds_file)) + + +def add_access_token(creds_file=None): + """ + For OAuth 2, retrieve an access token for an app and append it to a + credentials file. + """ + if creds_file is None: + path = os.path.dirname(__file__) + creds_file = os.path.join(path, 'credentials2.txt') + oauth2 = credsfromfile(creds_file=creds_file) + app_key = oauth2['app_key'] + app_secret = oauth2['app_secret'] + + twitter = Twython(app_key, app_secret, oauth_version=2) + access_token = twitter.obtain_access_token() + tok = 'access_token={}\n'.format(access_token) + with open(creds_file, 'a') as infile: + print(tok, file=infile) + + +def guess_path(pth): + """ + If the path is not absolute, guess that it is a subdirectory of the + user's home directory. + + :param str pth: The pathname of the directory where files of tweets should be written + """ + if os.path.isabs(pth): + return pth + else: + return os.path.expanduser(os.path.join("~", pth)) diff --git a/venv.bak/lib/python3.7/site-packages/nltk/util.py b/venv.bak/lib/python3.7/site-packages/nltk/util.py new file mode 100644 index 0000000..b4c5b00 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/util.py @@ -0,0 +1,827 @@ +# Natural Language Toolkit: Utility functions +# +# Copyright (C) 2001-2019 NLTK Project +# Author: Steven Bird +# URL: +# For license information, see LICENSE.TXT +from __future__ import print_function + +import sys +import inspect +import locale +import re +import types +import textwrap +import pydoc +import bisect +import os + +from itertools import islice, chain, combinations +from pprint import pprint +from collections import defaultdict, deque +from sys import version_info + +from six import class_types, string_types, text_type +from six.moves.urllib.request import ( + build_opener, + install_opener, + getproxies, + ProxyHandler, + ProxyBasicAuthHandler, + ProxyDigestAuthHandler, + HTTPPasswordMgrWithDefaultRealm, +) + +from nltk.internals import slice_bounds, raise_unorderable_types +from nltk.collections import * +from nltk.compat import python_2_unicode_compatible + + +###################################################################### +# Short usage message +###################################################################### + + +def usage(obj, selfname='self'): + str(obj) # In case it's lazy, this will load it. + + if not isinstance(obj, class_types): + obj = obj.__class__ + + print('%s supports the following operations:' % obj.__name__) + for (name, method) in sorted(pydoc.allmethods(obj).items()): + if name.startswith('_'): + continue + if getattr(method, '__deprecated__', False): + continue + + if sys.version_info[0] >= 3: + getargspec = inspect.getfullargspec + else: + getargspec = inspect.getargspec + args, varargs, varkw, defaults = getargspec(method)[:4] + if ( + args + and args[0] == 'self' + and (defaults is None or len(args) > len(defaults)) + ): + args = args[1:] + name = '%s.%s' % (selfname, name) + argspec = inspect.formatargspec(args, varargs, varkw, defaults) + print( + textwrap.fill( + '%s%s' % (name, argspec), + initial_indent=' - ', + subsequent_indent=' ' * (len(name) + 5), + ) + ) + + +########################################################################## +# IDLE +########################################################################## + + +def in_idle(): + """ + Return True if this function is run within idle. Tkinter + programs that are run in idle should never call ``Tk.mainloop``; so + this function should be used to gate all calls to ``Tk.mainloop``. + + :warning: This function works by checking ``sys.stdin``. If the + user has modified ``sys.stdin``, then it may return incorrect + results. + :rtype: bool + """ + import sys + + return sys.stdin.__class__.__name__ in ('PyShell', 'RPCProxy') + + +########################################################################## +# PRETTY PRINTING +########################################################################## + + +def pr(data, start=0, end=None): + """ + Pretty print a sequence of data items + + :param data: the data stream to print + :type data: sequence or iter + :param start: the start position + :type start: int + :param end: the end position + :type end: int + """ + pprint(list(islice(data, start, end))) + + +def print_string(s, width=70): + """ + Pretty print a string, breaking lines on whitespace + + :param s: the string to print, consisting of words and spaces + :type s: str + :param width: the display width + :type width: int + """ + print('\n'.join(textwrap.wrap(s, width=width))) + + +def tokenwrap(tokens, separator=" ", width=70): + """ + Pretty print a list of text tokens, breaking lines on whitespace + + :param tokens: the tokens to print + :type tokens: list + :param separator: the string to use to separate tokens + :type separator: str + :param width: the display width (default=70) + :type width: int + """ + return '\n'.join(textwrap.wrap(separator.join(tokens), width=width)) + + +########################################################################## +# Python version +########################################################################## + + +def py25(): + return version_info[0] == 2 and version_info[1] == 5 + + +def py26(): + return version_info[0] == 2 and version_info[1] == 6 + + +def py27(): + return version_info[0] == 2 and version_info[1] == 7 + + +########################################################################## +# Indexing +########################################################################## + + +class Index(defaultdict): + def __init__(self, pairs): + defaultdict.__init__(self, list) + for key, value in pairs: + self[key].append(value) + + +###################################################################### +## Regexp display (thanks to David Mertz) +###################################################################### + + +def re_show(regexp, string, left="{", right="}"): + """ + Return a string with markers surrounding the matched substrings. + Search str for substrings matching ``regexp`` and wrap the matches + with braces. This is convenient for learning about regular expressions. + + :param regexp: The regular expression. + :type regexp: str + :param string: The string being matched. + :type string: str + :param left: The left delimiter (printed before the matched substring) + :type left: str + :param right: The right delimiter (printed after the matched substring) + :type right: str + :rtype: str + """ + print(re.compile(regexp, re.M).sub(left + r"\g<0>" + right, string.rstrip())) + + +########################################################################## +# READ FROM FILE OR STRING +########################################################################## + +# recipe from David Mertz +def filestring(f): + if hasattr(f, 'read'): + return f.read() + elif isinstance(f, string_types): + with open(f, 'r') as infile: + return infile.read() + else: + raise ValueError("Must be called with a filename or file-like object") + + +########################################################################## +# Breadth-First Search +########################################################################## + + +def breadth_first(tree, children=iter, maxdepth=-1): + """Traverse the nodes of a tree in breadth-first order. + (No need to check for cycles.) + The first argument should be the tree root; + children should be a function taking as argument a tree node + and returning an iterator of the node's children. + """ + queue = deque([(tree, 0)]) + + while queue: + node, depth = queue.popleft() + yield node + + if depth != maxdepth: + try: + queue.extend((c, depth + 1) for c in children(node)) + except TypeError: + pass + + +########################################################################## +# Guess Character Encoding +########################################################################## + +# adapted from io.py in the docutils extension module (http://docutils.sourceforge.net) +# http://www.pyzine.com/Issue008/Section_Articles/article_Encodings.html + + +def guess_encoding(data): + """ + Given a byte string, attempt to decode it. + Tries the standard 'UTF8' and 'latin-1' encodings, + Plus several gathered from locale information. + + The calling program *must* first call:: + + locale.setlocale(locale.LC_ALL, '') + + If successful it returns ``(decoded_unicode, successful_encoding)``. + If unsuccessful it raises a ``UnicodeError``. + """ + successful_encoding = None + # we make 'utf-8' the first encoding + encodings = ['utf-8'] + # + # next we add anything we can learn from the locale + try: + encodings.append(locale.nl_langinfo(locale.CODESET)) + except AttributeError: + pass + try: + encodings.append(locale.getlocale()[1]) + except (AttributeError, IndexError): + pass + try: + encodings.append(locale.getdefaultlocale()[1]) + except (AttributeError, IndexError): + pass + # + # we try 'latin-1' last + encodings.append('latin-1') + for enc in encodings: + # some of the locale calls + # may have returned None + if not enc: + continue + try: + decoded = text_type(data, enc) + successful_encoding = enc + + except (UnicodeError, LookupError): + pass + else: + break + if not successful_encoding: + raise UnicodeError( + 'Unable to decode input data. ' + 'Tried the following encodings: %s.' + % ', '.join([repr(enc) for enc in encodings if enc]) + ) + else: + return (decoded, successful_encoding) + + +########################################################################## +# Remove repeated elements from a list deterministcally +########################################################################## + + +def unique_list(xs): + seen = set() + # not seen.add(x) here acts to make the code shorter without using if statements, seen.add(x) always returns None. + return [x for x in xs if x not in seen and not seen.add(x)] + + +########################################################################## +# Invert a dictionary +########################################################################## + + +def invert_dict(d): + inverted_dict = defaultdict(list) + for key in d: + if hasattr(d[key], '__iter__'): + for term in d[key]: + inverted_dict[term].append(key) + else: + inverted_dict[d[key]] = key + return inverted_dict + + +########################################################################## +# Utilities for directed graphs: transitive closure, and inversion +# The graph is represented as a dictionary of sets +########################################################################## + + +def transitive_closure(graph, reflexive=False): + """ + Calculate the transitive closure of a directed graph, + optionally the reflexive transitive closure. + + The algorithm is a slight modification of the "Marking Algorithm" of + Ioannidis & Ramakrishnan (1998) "Efficient Transitive Closure Algorithms". + + :param graph: the initial graph, represented as a dictionary of sets + :type graph: dict(set) + :param reflexive: if set, also make the closure reflexive + :type reflexive: bool + :rtype: dict(set) + """ + if reflexive: + base_set = lambda k: set([k]) + else: + base_set = lambda k: set() + # The graph U_i in the article: + agenda_graph = dict((k, graph[k].copy()) for k in graph) + # The graph M_i in the article: + closure_graph = dict((k, base_set(k)) for k in graph) + for i in graph: + agenda = agenda_graph[i] + closure = closure_graph[i] + while agenda: + j = agenda.pop() + closure.add(j) + closure |= closure_graph.setdefault(j, base_set(j)) + agenda |= agenda_graph.get(j, base_set(j)) + agenda -= closure + return closure_graph + + +def invert_graph(graph): + """ + Inverts a directed graph. + + :param graph: the graph, represented as a dictionary of sets + :type graph: dict(set) + :return: the inverted graph + :rtype: dict(set) + """ + inverted = {} + for key in graph: + for value in graph[key]: + inverted.setdefault(value, set()).add(key) + return inverted + + +########################################################################## +# HTML Cleaning +########################################################################## + + +def clean_html(html): + raise NotImplementedError( + "To remove HTML markup, use BeautifulSoup's get_text() function" + ) + + +def clean_url(url): + raise NotImplementedError( + "To remove HTML markup, use BeautifulSoup's get_text() function" + ) + + +########################################################################## +# FLATTEN LISTS +########################################################################## + + +def flatten(*args): + """ + Flatten a list. + + >>> from nltk.util import flatten + >>> flatten(1, 2, ['b', 'a' , ['c', 'd']], 3) + [1, 2, 'b', 'a', 'c', 'd', 3] + + :param args: items and lists to be combined into a single list + :rtype: list + """ + + x = [] + for l in args: + if not isinstance(l, (list, tuple)): + l = [l] + for item in l: + if isinstance(item, (list, tuple)): + x.extend(flatten(item)) + else: + x.append(item) + return x + + +########################################################################## +# Ngram iteration +########################################################################## + + +def pad_sequence( + sequence, + n, + pad_left=False, + pad_right=False, + left_pad_symbol=None, + right_pad_symbol=None, +): + """ + Returns a padded sequence of items before ngram extraction. + + >>> list(pad_sequence([1,2,3,4,5], 2, pad_left=True, pad_right=True, left_pad_symbol='', right_pad_symbol='')) + ['', 1, 2, 3, 4, 5, ''] + >>> list(pad_sequence([1,2,3,4,5], 2, pad_left=True, left_pad_symbol='')) + ['', 1, 2, 3, 4, 5] + >>> list(pad_sequence([1,2,3,4,5], 2, pad_right=True, right_pad_symbol='')) + [1, 2, 3, 4, 5, ''] + + :param sequence: the source data to be padded + :type sequence: sequence or iter + :param n: the degree of the ngrams + :type n: int + :param pad_left: whether the ngrams should be left-padded + :type pad_left: bool + :param pad_right: whether the ngrams should be right-padded + :type pad_right: bool + :param left_pad_symbol: the symbol to use for left padding (default is None) + :type left_pad_symbol: any + :param right_pad_symbol: the symbol to use for right padding (default is None) + :type right_pad_symbol: any + :rtype: sequence or iter + """ + sequence = iter(sequence) + if pad_left: + sequence = chain((left_pad_symbol,) * (n - 1), sequence) + if pad_right: + sequence = chain(sequence, (right_pad_symbol,) * (n - 1)) + return sequence + + +# add a flag to pad the sequence so we get peripheral ngrams? + + +def ngrams( + sequence, + n, + pad_left=False, + pad_right=False, + left_pad_symbol=None, + right_pad_symbol=None, +): + """ + Return the ngrams generated from a sequence of items, as an iterator. + For example: + + >>> from nltk.util import ngrams + >>> list(ngrams([1,2,3,4,5], 3)) + [(1, 2, 3), (2, 3, 4), (3, 4, 5)] + + Wrap with list for a list version of this function. Set pad_left + or pad_right to true in order to get additional ngrams: + + >>> list(ngrams([1,2,3,4,5], 2, pad_right=True)) + [(1, 2), (2, 3), (3, 4), (4, 5), (5, None)] + >>> list(ngrams([1,2,3,4,5], 2, pad_right=True, right_pad_symbol='
    ')) + [(1, 2), (2, 3), (3, 4), (4, 5), (5, '
    ')] + >>> list(ngrams([1,2,3,4,5], 2, pad_left=True, left_pad_symbol='')) + [('', 1), (1, 2), (2, 3), (3, 4), (4, 5)] + >>> list(ngrams([1,2,3,4,5], 2, pad_left=True, pad_right=True, left_pad_symbol='', right_pad_symbol='')) + [('', 1), (1, 2), (2, 3), (3, 4), (4, 5), (5, '')] + + + :param sequence: the source data to be converted into ngrams + :type sequence: sequence or iter + :param n: the degree of the ngrams + :type n: int + :param pad_left: whether the ngrams should be left-padded + :type pad_left: bool + :param pad_right: whether the ngrams should be right-padded + :type pad_right: bool + :param left_pad_symbol: the symbol to use for left padding (default is None) + :type left_pad_symbol: any + :param right_pad_symbol: the symbol to use for right padding (default is None) + :type right_pad_symbol: any + :rtype: sequence or iter + """ + sequence = pad_sequence( + sequence, n, pad_left, pad_right, left_pad_symbol, right_pad_symbol + ) + + history = [] + while n > 1: + # PEP 479, prevent RuntimeError from being raised when StopIteration bubbles out of generator + try: + next_item = next(sequence) + except StopIteration: + # no more data, terminate the generator + return + history.append(next_item) + n -= 1 + for item in sequence: + history.append(item) + yield tuple(history) + del history[0] + + +def bigrams(sequence, **kwargs): + """ + Return the bigrams generated from a sequence of items, as an iterator. + For example: + + >>> from nltk.util import bigrams + >>> list(bigrams([1,2,3,4,5])) + [(1, 2), (2, 3), (3, 4), (4, 5)] + + Use bigrams for a list version of this function. + + :param sequence: the source data to be converted into bigrams + :type sequence: sequence or iter + :rtype: iter(tuple) + """ + + for item in ngrams(sequence, 2, **kwargs): + yield item + + +def trigrams(sequence, **kwargs): + """ + Return the trigrams generated from a sequence of items, as an iterator. + For example: + + >>> from nltk.util import trigrams + >>> list(trigrams([1,2,3,4,5])) + [(1, 2, 3), (2, 3, 4), (3, 4, 5)] + + Use trigrams for a list version of this function. + + :param sequence: the source data to be converted into trigrams + :type sequence: sequence or iter + :rtype: iter(tuple) + """ + + for item in ngrams(sequence, 3, **kwargs): + yield item + + +def everygrams(sequence, min_len=1, max_len=-1, **kwargs): + """ + Returns all possible ngrams generated from a sequence of items, as an iterator. + + >>> sent = 'a b c'.split() + >>> list(everygrams(sent)) + [('a',), ('b',), ('c',), ('a', 'b'), ('b', 'c'), ('a', 'b', 'c')] + >>> list(everygrams(sent, max_len=2)) + [('a',), ('b',), ('c',), ('a', 'b'), ('b', 'c')] + + :param sequence: the source data to be converted into trigrams + :type sequence: sequence or iter + :param min_len: minimum length of the ngrams, aka. n-gram order/degree of ngram + :type min_len: int + :param max_len: maximum length of the ngrams (set to length of sequence by default) + :type max_len: int + :rtype: iter(tuple) + """ + + if max_len == -1: + max_len = len(sequence) + for n in range(min_len, max_len + 1): + for ng in ngrams(sequence, n, **kwargs): + yield ng + + +def skipgrams(sequence, n, k, **kwargs): + """ + Returns all possible skipgrams generated from a sequence of items, as an iterator. + Skipgrams are ngrams that allows tokens to be skipped. + Refer to http://homepages.inf.ed.ac.uk/ballison/pdf/lrec_skipgrams.pdf + + >>> sent = "Insurgents killed in ongoing fighting".split() + >>> list(skipgrams(sent, 2, 2)) + [('Insurgents', 'killed'), ('Insurgents', 'in'), ('Insurgents', 'ongoing'), ('killed', 'in'), ('killed', 'ongoing'), ('killed', 'fighting'), ('in', 'ongoing'), ('in', 'fighting'), ('ongoing', 'fighting')] + >>> list(skipgrams(sent, 3, 2)) + [('Insurgents', 'killed', 'in'), ('Insurgents', 'killed', 'ongoing'), ('Insurgents', 'killed', 'fighting'), ('Insurgents', 'in', 'ongoing'), ('Insurgents', 'in', 'fighting'), ('Insurgents', 'ongoing', 'fighting'), ('killed', 'in', 'ongoing'), ('killed', 'in', 'fighting'), ('killed', 'ongoing', 'fighting'), ('in', 'ongoing', 'fighting')] + + :param sequence: the source data to be converted into trigrams + :type sequence: sequence or iter + :param n: the degree of the ngrams + :type n: int + :param k: the skip distance + :type k: int + :rtype: iter(tuple) + """ + + # Pads the sequence as desired by **kwargs. + if 'pad_left' in kwargs or 'pad_right' in kwargs: + sequence = pad_sequence(sequence, n, **kwargs) + + # Note when iterating through the ngrams, the pad_right here is not + # the **kwargs padding, it's for the algorithm to detect the SENTINEL + # object on the right pad to stop inner loop. + SENTINEL = object() + for ngram in ngrams(sequence, n + k, pad_right=True, right_pad_symbol=SENTINEL): + head = ngram[:1] + tail = ngram[1:] + for skip_tail in combinations(tail, n - 1): + if skip_tail[-1] is SENTINEL: + continue + yield head + skip_tail + + +###################################################################### +# Binary Search in a File +###################################################################### + +# inherited from pywordnet, by Oliver Steele +def binary_search_file(file, key, cache={}, cacheDepth=-1): + """ + Return the line from the file with first word key. + Searches through a sorted file using the binary search algorithm. + + :type file: file + :param file: the file to be searched through. + :type key: str + :param key: the identifier we are searching for. + """ + + key = key + ' ' + keylen = len(key) + start = 0 + currentDepth = 0 + + if hasattr(file, 'name'): + end = os.stat(file.name).st_size - 1 + else: + file.seek(0, 2) + end = file.tell() - 1 + file.seek(0) + + while start < end: + lastState = start, end + middle = (start + end) // 2 + + if cache.get(middle): + offset, line = cache[middle] + + else: + line = "" + while True: + file.seek(max(0, middle - 1)) + if middle > 0: + file.discard_line() + offset = file.tell() + line = file.readline() + if line != "": + break + # at EOF; try to find start of the last line + middle = (start + middle) // 2 + if middle == end - 1: + return None + if currentDepth < cacheDepth: + cache[middle] = (offset, line) + + if offset > end: + assert end != middle - 1, "infinite loop" + end = middle - 1 + elif line[:keylen] == key: + return line + elif line > key: + assert end != middle - 1, "infinite loop" + end = middle - 1 + elif line < key: + start = offset + len(line) - 1 + + currentDepth += 1 + thisState = start, end + + if lastState == thisState: + # Detects the condition where we're searching past the end + # of the file, which is otherwise difficult to detect + return None + + return None + + +###################################################################### +# Proxy configuration +###################################################################### + + +def set_proxy(proxy, user=None, password=''): + """ + Set the HTTP proxy for Python to download through. + + If ``proxy`` is None then tries to set proxy from environment or system + settings. + + :param proxy: The HTTP proxy server to use. For example: + 'http://proxy.example.com:3128/' + :param user: The username to authenticate with. Use None to disable + authentication. + :param password: The password to authenticate with. + """ + from nltk import compat + + if proxy is None: + # Try and find the system proxy settings + try: + proxy = getproxies()['http'] + except KeyError: + raise ValueError('Could not detect default proxy settings') + + # Set up the proxy handler + proxy_handler = ProxyHandler({'https': proxy, 'http': proxy}) + opener = build_opener(proxy_handler) + + if user is not None: + # Set up basic proxy authentication if provided + password_manager = HTTPPasswordMgrWithDefaultRealm() + password_manager.add_password(realm=None, uri=proxy, user=user, passwd=password) + opener.add_handler(ProxyBasicAuthHandler(password_manager)) + opener.add_handler(ProxyDigestAuthHandler(password_manager)) + + # Overide the existing url opener + install_opener(opener) + + +###################################################################### +# ElementTree pretty printing from http://www.effbot.org/zone/element-lib.htm +###################################################################### + + +def elementtree_indent(elem, level=0): + """ + Recursive function to indent an ElementTree._ElementInterface + used for pretty printing. Run indent on elem and then output + in the normal way. + + :param elem: element to be indented. will be modified. + :type elem: ElementTree._ElementInterface + :param level: level of indentation for this element + :type level: nonnegative integer + :rtype: ElementTree._ElementInterface + :return: Contents of elem indented to reflect its structure + """ + + i = "\n" + level * " " + if len(elem): + if not elem.text or not elem.text.strip(): + elem.text = i + " " + for elem in elem: + elementtree_indent(elem, level + 1) + if not elem.tail or not elem.tail.strip(): + elem.tail = i + else: + if level and (not elem.tail or not elem.tail.strip()): + elem.tail = i + + +###################################################################### +# Mathematical approximations +###################################################################### + + +def choose(n, k): + """ + This function is a fast way to calculate binomial coefficients, commonly + known as nCk, i.e. the number of combinations of n things taken k at a time. + (https://en.wikipedia.org/wiki/Binomial_coefficient). + + This is the *scipy.special.comb()* with long integer computation but this + approximation is faster, see https://github.com/nltk/nltk/issues/1181 + + >>> choose(4, 2) + 6 + >>> choose(6, 2) + 15 + + :param n: The number of things. + :type n: int + :param r: The number of times a thing is taken. + :type r: int + """ + if 0 <= k <= n: + ntok, ktok = 1, 1 + for t in range(1, min(k, n - k) + 1): + ntok *= n + ktok *= t + n -= 1 + return ntok // ktok + else: + return 0 diff --git a/venv.bak/lib/python3.7/site-packages/nltk/wsd.py b/venv.bak/lib/python3.7/site-packages/nltk/wsd.py new file mode 100644 index 0000000..611f649 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/nltk/wsd.py @@ -0,0 +1,51 @@ +# Natural Language Toolkit: Word Sense Disambiguation Algorithms +# +# Authors: Liling Tan , +# Dmitrijs Milajevs +# +# Copyright (C) 2001-2019 NLTK Project +# URL: +# For license information, see LICENSE.TXT + +from nltk.corpus import wordnet + + +def lesk(context_sentence, ambiguous_word, pos=None, synsets=None): + """Return a synset for an ambiguous word in a context. + + :param iter context_sentence: The context sentence where the ambiguous word + occurs, passed as an iterable of words. + :param str ambiguous_word: The ambiguous word that requires WSD. + :param str pos: A specified Part-of-Speech (POS). + :param iter synsets: Possible synsets of the ambiguous word. + :return: ``lesk_sense`` The Synset() object with the highest signature overlaps. + + This function is an implementation of the original Lesk algorithm (1986) [1]. + + Usage example:: + + >>> lesk(['I', 'went', 'to', 'the', 'bank', 'to', 'deposit', 'money', '.'], 'bank', 'n') + Synset('savings_bank.n.02') + + [1] Lesk, Michael. "Automatic sense disambiguation using machine + readable dictionaries: how to tell a pine cone from an ice cream + cone." Proceedings of the 5th Annual International Conference on + Systems Documentation. ACM, 1986. + http://dl.acm.org/citation.cfm?id=318728 + """ + + context = set(context_sentence) + if synsets is None: + synsets = wordnet.synsets(ambiguous_word) + + if pos: + synsets = [ss for ss in synsets if str(ss.pos()) == pos] + + if not synsets: + return None + + _, sense = max( + (len(context.intersection(ss.definition().split())), ss) for ss in synsets + ) + + return sense diff --git a/venv/lib/python3.7/site-packages/numpy-1.18.2.dist-info/INSTALLER b/venv.bak/lib/python3.7/site-packages/numpy-1.18.2.dist-info/INSTALLER similarity index 100% rename from venv/lib/python3.7/site-packages/numpy-1.18.2.dist-info/INSTALLER rename to venv.bak/lib/python3.7/site-packages/numpy-1.18.2.dist-info/INSTALLER diff --git a/venv/lib/python3.7/site-packages/numpy-1.18.2.dist-info/LICENSE.txt b/venv.bak/lib/python3.7/site-packages/numpy-1.18.2.dist-info/LICENSE.txt similarity index 100% rename from venv/lib/python3.7/site-packages/numpy-1.18.2.dist-info/LICENSE.txt rename to venv.bak/lib/python3.7/site-packages/numpy-1.18.2.dist-info/LICENSE.txt diff --git a/venv/lib/python3.7/site-packages/numpy-1.18.2.dist-info/LICENSES_bundled.txt b/venv.bak/lib/python3.7/site-packages/numpy-1.18.2.dist-info/LICENSES_bundled.txt similarity index 100% rename from venv/lib/python3.7/site-packages/numpy-1.18.2.dist-info/LICENSES_bundled.txt rename to venv.bak/lib/python3.7/site-packages/numpy-1.18.2.dist-info/LICENSES_bundled.txt diff --git a/venv/lib/python3.7/site-packages/numpy-1.18.2.dist-info/METADATA b/venv.bak/lib/python3.7/site-packages/numpy-1.18.2.dist-info/METADATA similarity index 100% rename from venv/lib/python3.7/site-packages/numpy-1.18.2.dist-info/METADATA rename to venv.bak/lib/python3.7/site-packages/numpy-1.18.2.dist-info/METADATA diff --git a/venv/lib/python3.7/site-packages/numpy-1.18.2.dist-info/RECORD b/venv.bak/lib/python3.7/site-packages/numpy-1.18.2.dist-info/RECORD similarity index 100% rename from venv/lib/python3.7/site-packages/numpy-1.18.2.dist-info/RECORD rename to venv.bak/lib/python3.7/site-packages/numpy-1.18.2.dist-info/RECORD diff --git a/venv/lib/python3.7/site-packages/numpy-1.18.2.dist-info/WHEEL b/venv.bak/lib/python3.7/site-packages/numpy-1.18.2.dist-info/WHEEL similarity index 100% rename from venv/lib/python3.7/site-packages/numpy-1.18.2.dist-info/WHEEL rename to venv.bak/lib/python3.7/site-packages/numpy-1.18.2.dist-info/WHEEL diff --git a/venv/lib/python3.7/site-packages/numpy-1.18.2.dist-info/entry_points.txt b/venv.bak/lib/python3.7/site-packages/numpy-1.18.2.dist-info/entry_points.txt similarity index 100% rename from venv/lib/python3.7/site-packages/numpy-1.18.2.dist-info/entry_points.txt rename to venv.bak/lib/python3.7/site-packages/numpy-1.18.2.dist-info/entry_points.txt diff --git a/venv/lib/python3.7/site-packages/numpy-1.18.2.dist-info/top_level.txt b/venv.bak/lib/python3.7/site-packages/numpy-1.18.2.dist-info/top_level.txt similarity index 100% rename from venv/lib/python3.7/site-packages/numpy-1.18.2.dist-info/top_level.txt rename to venv.bak/lib/python3.7/site-packages/numpy-1.18.2.dist-info/top_level.txt diff --git a/venv/lib/python3.7/site-packages/numpy/.dylibs/libgcc_s.1.dylib b/venv.bak/lib/python3.7/site-packages/numpy/.dylibs/libgcc_s.1.dylib similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/.dylibs/libgcc_s.1.dylib rename to venv.bak/lib/python3.7/site-packages/numpy/.dylibs/libgcc_s.1.dylib diff --git a/venv/lib/python3.7/site-packages/numpy/.dylibs/libgfortran.3.dylib b/venv.bak/lib/python3.7/site-packages/numpy/.dylibs/libgfortran.3.dylib similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/.dylibs/libgfortran.3.dylib rename to venv.bak/lib/python3.7/site-packages/numpy/.dylibs/libgfortran.3.dylib diff --git a/venv/lib/python3.7/site-packages/numpy/.dylibs/libopenblasp-r0.3.7.dylib b/venv.bak/lib/python3.7/site-packages/numpy/.dylibs/libopenblasp-r0.3.7.dylib similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/.dylibs/libopenblasp-r0.3.7.dylib rename to venv.bak/lib/python3.7/site-packages/numpy/.dylibs/libopenblasp-r0.3.7.dylib diff --git a/venv/lib/python3.7/site-packages/numpy/.dylibs/libquadmath.0.dylib b/venv.bak/lib/python3.7/site-packages/numpy/.dylibs/libquadmath.0.dylib similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/.dylibs/libquadmath.0.dylib rename to venv.bak/lib/python3.7/site-packages/numpy/.dylibs/libquadmath.0.dylib diff --git a/venv/lib/python3.7/site-packages/numpy/LICENSE.txt b/venv.bak/lib/python3.7/site-packages/numpy/LICENSE.txt similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/LICENSE.txt rename to venv.bak/lib/python3.7/site-packages/numpy/LICENSE.txt diff --git a/venv/lib/python3.7/site-packages/numpy/__config__.py b/venv.bak/lib/python3.7/site-packages/numpy/__config__.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/__config__.py rename to venv.bak/lib/python3.7/site-packages/numpy/__config__.py diff --git a/venv/lib/python3.7/site-packages/numpy/__init__.py b/venv.bak/lib/python3.7/site-packages/numpy/__init__.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/__init__.py rename to venv.bak/lib/python3.7/site-packages/numpy/__init__.py diff --git a/venv/lib/python3.7/site-packages/numpy/__pycache__/__config__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/__pycache__/__config__.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/__pycache__/__config__.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/__pycache__/__config__.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/__pycache__/__init__.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/__pycache__/__init__.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/__pycache__/__init__.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/__pycache__/_distributor_init.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/__pycache__/_distributor_init.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/__pycache__/_distributor_init.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/__pycache__/_distributor_init.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/__pycache__/_globals.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/__pycache__/_globals.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/__pycache__/_globals.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/__pycache__/_globals.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/__pycache__/_pytesttester.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/__pycache__/_pytesttester.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/__pycache__/_pytesttester.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/__pycache__/_pytesttester.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/__pycache__/conftest.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/__pycache__/conftest.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/__pycache__/conftest.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/__pycache__/conftest.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/__pycache__/ctypeslib.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/__pycache__/ctypeslib.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/__pycache__/ctypeslib.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/__pycache__/ctypeslib.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/__pycache__/dual.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/__pycache__/dual.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/__pycache__/dual.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/__pycache__/dual.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/__pycache__/matlib.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/__pycache__/matlib.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/__pycache__/matlib.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/__pycache__/matlib.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/__pycache__/setup.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/__pycache__/setup.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/__pycache__/setup.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/__pycache__/setup.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/__pycache__/version.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/__pycache__/version.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/__pycache__/version.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/__pycache__/version.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/_distributor_init.py b/venv.bak/lib/python3.7/site-packages/numpy/_distributor_init.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/_distributor_init.py rename to venv.bak/lib/python3.7/site-packages/numpy/_distributor_init.py diff --git a/venv/lib/python3.7/site-packages/numpy/_globals.py b/venv.bak/lib/python3.7/site-packages/numpy/_globals.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/_globals.py rename to venv.bak/lib/python3.7/site-packages/numpy/_globals.py diff --git a/venv/lib/python3.7/site-packages/numpy/_pytesttester.py b/venv.bak/lib/python3.7/site-packages/numpy/_pytesttester.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/_pytesttester.py rename to venv.bak/lib/python3.7/site-packages/numpy/_pytesttester.py diff --git a/venv/lib/python3.7/site-packages/numpy/compat/__init__.py b/venv.bak/lib/python3.7/site-packages/numpy/compat/__init__.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/compat/__init__.py rename to venv.bak/lib/python3.7/site-packages/numpy/compat/__init__.py diff --git a/venv/lib/python3.7/site-packages/numpy/compat/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/compat/__pycache__/__init__.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/compat/__pycache__/__init__.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/compat/__pycache__/__init__.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/compat/__pycache__/_inspect.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/compat/__pycache__/_inspect.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/compat/__pycache__/_inspect.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/compat/__pycache__/_inspect.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/compat/__pycache__/py3k.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/compat/__pycache__/py3k.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/compat/__pycache__/py3k.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/compat/__pycache__/py3k.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/compat/__pycache__/setup.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/compat/__pycache__/setup.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/compat/__pycache__/setup.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/compat/__pycache__/setup.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/compat/_inspect.py b/venv.bak/lib/python3.7/site-packages/numpy/compat/_inspect.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/compat/_inspect.py rename to venv.bak/lib/python3.7/site-packages/numpy/compat/_inspect.py diff --git a/venv/lib/python3.7/site-packages/numpy/compat/py3k.py b/venv.bak/lib/python3.7/site-packages/numpy/compat/py3k.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/compat/py3k.py rename to venv.bak/lib/python3.7/site-packages/numpy/compat/py3k.py diff --git a/venv/lib/python3.7/site-packages/numpy/compat/setup.py b/venv.bak/lib/python3.7/site-packages/numpy/compat/setup.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/compat/setup.py rename to venv.bak/lib/python3.7/site-packages/numpy/compat/setup.py diff --git a/venv/lib/python3.7/site-packages/numpy/fft/tests/__init__.py b/venv.bak/lib/python3.7/site-packages/numpy/compat/tests/__init__.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/fft/tests/__init__.py rename to venv.bak/lib/python3.7/site-packages/numpy/compat/tests/__init__.py diff --git a/venv/lib/python3.7/site-packages/numpy/compat/tests/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/compat/tests/__pycache__/__init__.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/compat/tests/__pycache__/__init__.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/compat/tests/__pycache__/__init__.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/compat/tests/__pycache__/test_compat.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/compat/tests/__pycache__/test_compat.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/compat/tests/__pycache__/test_compat.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/compat/tests/__pycache__/test_compat.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/compat/tests/test_compat.py b/venv.bak/lib/python3.7/site-packages/numpy/compat/tests/test_compat.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/compat/tests/test_compat.py rename to venv.bak/lib/python3.7/site-packages/numpy/compat/tests/test_compat.py diff --git a/venv/lib/python3.7/site-packages/numpy/conftest.py b/venv.bak/lib/python3.7/site-packages/numpy/conftest.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/conftest.py rename to venv.bak/lib/python3.7/site-packages/numpy/conftest.py diff --git a/venv/lib/python3.7/site-packages/numpy/core/__init__.py b/venv.bak/lib/python3.7/site-packages/numpy/core/__init__.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/__init__.py rename to venv.bak/lib/python3.7/site-packages/numpy/core/__init__.py diff --git a/venv/lib/python3.7/site-packages/numpy/core/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/core/__pycache__/__init__.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/__pycache__/__init__.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/core/__pycache__/__init__.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/core/__pycache__/_add_newdocs.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/core/__pycache__/_add_newdocs.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/__pycache__/_add_newdocs.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/core/__pycache__/_add_newdocs.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/core/__pycache__/_asarray.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/core/__pycache__/_asarray.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/__pycache__/_asarray.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/core/__pycache__/_asarray.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/core/__pycache__/_dtype.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/core/__pycache__/_dtype.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/__pycache__/_dtype.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/core/__pycache__/_dtype.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/core/__pycache__/_dtype_ctypes.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/core/__pycache__/_dtype_ctypes.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/__pycache__/_dtype_ctypes.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/core/__pycache__/_dtype_ctypes.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/core/__pycache__/_exceptions.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/core/__pycache__/_exceptions.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/__pycache__/_exceptions.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/core/__pycache__/_exceptions.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/core/__pycache__/_internal.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/core/__pycache__/_internal.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/__pycache__/_internal.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/core/__pycache__/_internal.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/core/__pycache__/_methods.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/core/__pycache__/_methods.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/__pycache__/_methods.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/core/__pycache__/_methods.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/core/__pycache__/_string_helpers.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/core/__pycache__/_string_helpers.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/__pycache__/_string_helpers.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/core/__pycache__/_string_helpers.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/core/__pycache__/_type_aliases.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/core/__pycache__/_type_aliases.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/__pycache__/_type_aliases.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/core/__pycache__/_type_aliases.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/core/__pycache__/_ufunc_config.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/core/__pycache__/_ufunc_config.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/__pycache__/_ufunc_config.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/core/__pycache__/_ufunc_config.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/core/__pycache__/arrayprint.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/core/__pycache__/arrayprint.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/__pycache__/arrayprint.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/core/__pycache__/arrayprint.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/core/__pycache__/cversions.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/core/__pycache__/cversions.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/__pycache__/cversions.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/core/__pycache__/cversions.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/core/__pycache__/defchararray.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/core/__pycache__/defchararray.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/__pycache__/defchararray.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/core/__pycache__/defchararray.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/core/__pycache__/einsumfunc.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/core/__pycache__/einsumfunc.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/__pycache__/einsumfunc.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/core/__pycache__/einsumfunc.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/core/__pycache__/fromnumeric.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/core/__pycache__/fromnumeric.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/__pycache__/fromnumeric.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/core/__pycache__/fromnumeric.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/core/__pycache__/function_base.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/core/__pycache__/function_base.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/__pycache__/function_base.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/core/__pycache__/function_base.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/core/__pycache__/generate_numpy_api.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/core/__pycache__/generate_numpy_api.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/__pycache__/generate_numpy_api.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/core/__pycache__/generate_numpy_api.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/core/__pycache__/getlimits.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/core/__pycache__/getlimits.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/__pycache__/getlimits.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/core/__pycache__/getlimits.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/core/__pycache__/machar.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/core/__pycache__/machar.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/__pycache__/machar.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/core/__pycache__/machar.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/core/__pycache__/memmap.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/core/__pycache__/memmap.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/__pycache__/memmap.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/core/__pycache__/memmap.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/core/__pycache__/multiarray.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/core/__pycache__/multiarray.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/__pycache__/multiarray.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/core/__pycache__/multiarray.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/core/__pycache__/numeric.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/core/__pycache__/numeric.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/__pycache__/numeric.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/core/__pycache__/numeric.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/core/__pycache__/numerictypes.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/core/__pycache__/numerictypes.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/__pycache__/numerictypes.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/core/__pycache__/numerictypes.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/core/__pycache__/overrides.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/core/__pycache__/overrides.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/__pycache__/overrides.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/core/__pycache__/overrides.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/core/__pycache__/records.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/core/__pycache__/records.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/__pycache__/records.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/core/__pycache__/records.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/core/__pycache__/setup.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/core/__pycache__/setup.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/__pycache__/setup.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/core/__pycache__/setup.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/core/__pycache__/setup_common.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/core/__pycache__/setup_common.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/__pycache__/setup_common.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/core/__pycache__/setup_common.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/core/__pycache__/shape_base.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/core/__pycache__/shape_base.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/__pycache__/shape_base.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/core/__pycache__/shape_base.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/core/__pycache__/umath.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/core/__pycache__/umath.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/__pycache__/umath.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/core/__pycache__/umath.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/core/__pycache__/umath_tests.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/core/__pycache__/umath_tests.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/__pycache__/umath_tests.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/core/__pycache__/umath_tests.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/core/_add_newdocs.py b/venv.bak/lib/python3.7/site-packages/numpy/core/_add_newdocs.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/_add_newdocs.py rename to venv.bak/lib/python3.7/site-packages/numpy/core/_add_newdocs.py diff --git a/venv/lib/python3.7/site-packages/numpy/core/_asarray.py b/venv.bak/lib/python3.7/site-packages/numpy/core/_asarray.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/_asarray.py rename to venv.bak/lib/python3.7/site-packages/numpy/core/_asarray.py diff --git a/venv/lib/python3.7/site-packages/numpy/core/_dtype.py b/venv.bak/lib/python3.7/site-packages/numpy/core/_dtype.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/_dtype.py rename to venv.bak/lib/python3.7/site-packages/numpy/core/_dtype.py diff --git a/venv/lib/python3.7/site-packages/numpy/core/_dtype_ctypes.py b/venv.bak/lib/python3.7/site-packages/numpy/core/_dtype_ctypes.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/_dtype_ctypes.py rename to venv.bak/lib/python3.7/site-packages/numpy/core/_dtype_ctypes.py diff --git a/venv/lib/python3.7/site-packages/numpy/core/_exceptions.py b/venv.bak/lib/python3.7/site-packages/numpy/core/_exceptions.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/_exceptions.py rename to venv.bak/lib/python3.7/site-packages/numpy/core/_exceptions.py diff --git a/venv/lib/python3.7/site-packages/numpy/core/_internal.py b/venv.bak/lib/python3.7/site-packages/numpy/core/_internal.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/_internal.py rename to venv.bak/lib/python3.7/site-packages/numpy/core/_internal.py diff --git a/venv/lib/python3.7/site-packages/numpy/core/_methods.py b/venv.bak/lib/python3.7/site-packages/numpy/core/_methods.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/_methods.py rename to venv.bak/lib/python3.7/site-packages/numpy/core/_methods.py diff --git a/venv/lib/python3.7/site-packages/numpy/core/_multiarray_tests.cpython-37m-darwin.so b/venv.bak/lib/python3.7/site-packages/numpy/core/_multiarray_tests.cpython-37m-darwin.so similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/_multiarray_tests.cpython-37m-darwin.so rename to venv.bak/lib/python3.7/site-packages/numpy/core/_multiarray_tests.cpython-37m-darwin.so diff --git a/venv/lib/python3.7/site-packages/numpy/core/_multiarray_umath.cpython-37m-darwin.so b/venv.bak/lib/python3.7/site-packages/numpy/core/_multiarray_umath.cpython-37m-darwin.so similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/_multiarray_umath.cpython-37m-darwin.so rename to venv.bak/lib/python3.7/site-packages/numpy/core/_multiarray_umath.cpython-37m-darwin.so diff --git a/venv/lib/python3.7/site-packages/numpy/core/_operand_flag_tests.cpython-37m-darwin.so b/venv.bak/lib/python3.7/site-packages/numpy/core/_operand_flag_tests.cpython-37m-darwin.so similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/_operand_flag_tests.cpython-37m-darwin.so rename to venv.bak/lib/python3.7/site-packages/numpy/core/_operand_flag_tests.cpython-37m-darwin.so diff --git a/venv/lib/python3.7/site-packages/numpy/core/_rational_tests.cpython-37m-darwin.so b/venv.bak/lib/python3.7/site-packages/numpy/core/_rational_tests.cpython-37m-darwin.so similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/_rational_tests.cpython-37m-darwin.so rename to venv.bak/lib/python3.7/site-packages/numpy/core/_rational_tests.cpython-37m-darwin.so diff --git a/venv/lib/python3.7/site-packages/numpy/core/_string_helpers.py b/venv.bak/lib/python3.7/site-packages/numpy/core/_string_helpers.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/_string_helpers.py rename to venv.bak/lib/python3.7/site-packages/numpy/core/_string_helpers.py diff --git a/venv/lib/python3.7/site-packages/numpy/core/_struct_ufunc_tests.cpython-37m-darwin.so b/venv.bak/lib/python3.7/site-packages/numpy/core/_struct_ufunc_tests.cpython-37m-darwin.so similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/_struct_ufunc_tests.cpython-37m-darwin.so rename to venv.bak/lib/python3.7/site-packages/numpy/core/_struct_ufunc_tests.cpython-37m-darwin.so diff --git a/venv/lib/python3.7/site-packages/numpy/core/_type_aliases.py b/venv.bak/lib/python3.7/site-packages/numpy/core/_type_aliases.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/_type_aliases.py rename to venv.bak/lib/python3.7/site-packages/numpy/core/_type_aliases.py diff --git a/venv/lib/python3.7/site-packages/numpy/core/_ufunc_config.py b/venv.bak/lib/python3.7/site-packages/numpy/core/_ufunc_config.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/_ufunc_config.py rename to venv.bak/lib/python3.7/site-packages/numpy/core/_ufunc_config.py diff --git a/venv/lib/python3.7/site-packages/numpy/core/_umath_tests.cpython-37m-darwin.so b/venv.bak/lib/python3.7/site-packages/numpy/core/_umath_tests.cpython-37m-darwin.so similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/_umath_tests.cpython-37m-darwin.so rename to venv.bak/lib/python3.7/site-packages/numpy/core/_umath_tests.cpython-37m-darwin.so diff --git a/venv/lib/python3.7/site-packages/numpy/core/arrayprint.py b/venv.bak/lib/python3.7/site-packages/numpy/core/arrayprint.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/arrayprint.py rename to venv.bak/lib/python3.7/site-packages/numpy/core/arrayprint.py diff --git a/venv/lib/python3.7/site-packages/numpy/core/cversions.py b/venv.bak/lib/python3.7/site-packages/numpy/core/cversions.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/cversions.py rename to venv.bak/lib/python3.7/site-packages/numpy/core/cversions.py diff --git a/venv/lib/python3.7/site-packages/numpy/core/defchararray.py b/venv.bak/lib/python3.7/site-packages/numpy/core/defchararray.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/defchararray.py rename to venv.bak/lib/python3.7/site-packages/numpy/core/defchararray.py diff --git a/venv/lib/python3.7/site-packages/numpy/core/einsumfunc.py b/venv.bak/lib/python3.7/site-packages/numpy/core/einsumfunc.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/einsumfunc.py rename to venv.bak/lib/python3.7/site-packages/numpy/core/einsumfunc.py diff --git a/venv/lib/python3.7/site-packages/numpy/core/fromnumeric.py b/venv.bak/lib/python3.7/site-packages/numpy/core/fromnumeric.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/fromnumeric.py rename to venv.bak/lib/python3.7/site-packages/numpy/core/fromnumeric.py diff --git a/venv/lib/python3.7/site-packages/numpy/core/function_base.py b/venv.bak/lib/python3.7/site-packages/numpy/core/function_base.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/function_base.py rename to venv.bak/lib/python3.7/site-packages/numpy/core/function_base.py diff --git a/venv/lib/python3.7/site-packages/numpy/core/generate_numpy_api.py b/venv.bak/lib/python3.7/site-packages/numpy/core/generate_numpy_api.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/generate_numpy_api.py rename to venv.bak/lib/python3.7/site-packages/numpy/core/generate_numpy_api.py diff --git a/venv/lib/python3.7/site-packages/numpy/core/getlimits.py b/venv.bak/lib/python3.7/site-packages/numpy/core/getlimits.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/getlimits.py rename to venv.bak/lib/python3.7/site-packages/numpy/core/getlimits.py diff --git a/venv/lib/python3.7/site-packages/numpy/core/include/numpy/__multiarray_api.h b/venv.bak/lib/python3.7/site-packages/numpy/core/include/numpy/__multiarray_api.h similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/include/numpy/__multiarray_api.h rename to venv.bak/lib/python3.7/site-packages/numpy/core/include/numpy/__multiarray_api.h diff --git a/venv/lib/python3.7/site-packages/numpy/core/include/numpy/__ufunc_api.h b/venv.bak/lib/python3.7/site-packages/numpy/core/include/numpy/__ufunc_api.h similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/include/numpy/__ufunc_api.h rename to venv.bak/lib/python3.7/site-packages/numpy/core/include/numpy/__ufunc_api.h diff --git a/venv/lib/python3.7/site-packages/numpy/core/include/numpy/_neighborhood_iterator_imp.h b/venv.bak/lib/python3.7/site-packages/numpy/core/include/numpy/_neighborhood_iterator_imp.h similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/include/numpy/_neighborhood_iterator_imp.h rename to venv.bak/lib/python3.7/site-packages/numpy/core/include/numpy/_neighborhood_iterator_imp.h diff --git a/venv/lib/python3.7/site-packages/numpy/core/include/numpy/_numpyconfig.h b/venv.bak/lib/python3.7/site-packages/numpy/core/include/numpy/_numpyconfig.h similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/include/numpy/_numpyconfig.h rename to venv.bak/lib/python3.7/site-packages/numpy/core/include/numpy/_numpyconfig.h diff --git a/venv/lib/python3.7/site-packages/numpy/core/include/numpy/arrayobject.h b/venv.bak/lib/python3.7/site-packages/numpy/core/include/numpy/arrayobject.h similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/include/numpy/arrayobject.h rename to venv.bak/lib/python3.7/site-packages/numpy/core/include/numpy/arrayobject.h diff --git a/venv/lib/python3.7/site-packages/numpy/core/include/numpy/arrayscalars.h b/venv.bak/lib/python3.7/site-packages/numpy/core/include/numpy/arrayscalars.h similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/include/numpy/arrayscalars.h rename to venv.bak/lib/python3.7/site-packages/numpy/core/include/numpy/arrayscalars.h diff --git a/venv/lib/python3.7/site-packages/numpy/core/include/numpy/halffloat.h b/venv.bak/lib/python3.7/site-packages/numpy/core/include/numpy/halffloat.h similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/include/numpy/halffloat.h rename to venv.bak/lib/python3.7/site-packages/numpy/core/include/numpy/halffloat.h diff --git a/venv/lib/python3.7/site-packages/numpy/core/include/numpy/multiarray_api.txt b/venv.bak/lib/python3.7/site-packages/numpy/core/include/numpy/multiarray_api.txt similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/include/numpy/multiarray_api.txt rename to venv.bak/lib/python3.7/site-packages/numpy/core/include/numpy/multiarray_api.txt diff --git a/venv/lib/python3.7/site-packages/numpy/core/include/numpy/ndarrayobject.h b/venv.bak/lib/python3.7/site-packages/numpy/core/include/numpy/ndarrayobject.h similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/include/numpy/ndarrayobject.h rename to venv.bak/lib/python3.7/site-packages/numpy/core/include/numpy/ndarrayobject.h diff --git a/venv/lib/python3.7/site-packages/numpy/core/include/numpy/ndarraytypes.h b/venv.bak/lib/python3.7/site-packages/numpy/core/include/numpy/ndarraytypes.h similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/include/numpy/ndarraytypes.h rename to venv.bak/lib/python3.7/site-packages/numpy/core/include/numpy/ndarraytypes.h diff --git a/venv/lib/python3.7/site-packages/numpy/core/include/numpy/noprefix.h b/venv.bak/lib/python3.7/site-packages/numpy/core/include/numpy/noprefix.h similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/include/numpy/noprefix.h rename to venv.bak/lib/python3.7/site-packages/numpy/core/include/numpy/noprefix.h diff --git a/venv/lib/python3.7/site-packages/numpy/core/include/numpy/npy_1_7_deprecated_api.h b/venv.bak/lib/python3.7/site-packages/numpy/core/include/numpy/npy_1_7_deprecated_api.h similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/include/numpy/npy_1_7_deprecated_api.h rename to venv.bak/lib/python3.7/site-packages/numpy/core/include/numpy/npy_1_7_deprecated_api.h diff --git a/venv/lib/python3.7/site-packages/numpy/core/include/numpy/npy_3kcompat.h b/venv.bak/lib/python3.7/site-packages/numpy/core/include/numpy/npy_3kcompat.h similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/include/numpy/npy_3kcompat.h rename to venv.bak/lib/python3.7/site-packages/numpy/core/include/numpy/npy_3kcompat.h diff --git a/venv/lib/python3.7/site-packages/numpy/core/include/numpy/npy_common.h b/venv.bak/lib/python3.7/site-packages/numpy/core/include/numpy/npy_common.h similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/include/numpy/npy_common.h rename to venv.bak/lib/python3.7/site-packages/numpy/core/include/numpy/npy_common.h diff --git a/venv/lib/python3.7/site-packages/numpy/core/include/numpy/npy_cpu.h b/venv.bak/lib/python3.7/site-packages/numpy/core/include/numpy/npy_cpu.h similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/include/numpy/npy_cpu.h rename to venv.bak/lib/python3.7/site-packages/numpy/core/include/numpy/npy_cpu.h diff --git a/venv/lib/python3.7/site-packages/numpy/core/include/numpy/npy_endian.h b/venv.bak/lib/python3.7/site-packages/numpy/core/include/numpy/npy_endian.h similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/include/numpy/npy_endian.h rename to venv.bak/lib/python3.7/site-packages/numpy/core/include/numpy/npy_endian.h diff --git a/venv/lib/python3.7/site-packages/numpy/core/include/numpy/npy_interrupt.h b/venv.bak/lib/python3.7/site-packages/numpy/core/include/numpy/npy_interrupt.h similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/include/numpy/npy_interrupt.h rename to venv.bak/lib/python3.7/site-packages/numpy/core/include/numpy/npy_interrupt.h diff --git a/venv/lib/python3.7/site-packages/numpy/core/include/numpy/npy_math.h b/venv.bak/lib/python3.7/site-packages/numpy/core/include/numpy/npy_math.h similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/include/numpy/npy_math.h rename to venv.bak/lib/python3.7/site-packages/numpy/core/include/numpy/npy_math.h diff --git a/venv/lib/python3.7/site-packages/numpy/core/include/numpy/npy_no_deprecated_api.h b/venv.bak/lib/python3.7/site-packages/numpy/core/include/numpy/npy_no_deprecated_api.h similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/include/numpy/npy_no_deprecated_api.h rename to venv.bak/lib/python3.7/site-packages/numpy/core/include/numpy/npy_no_deprecated_api.h diff --git a/venv/lib/python3.7/site-packages/numpy/core/include/numpy/npy_os.h b/venv.bak/lib/python3.7/site-packages/numpy/core/include/numpy/npy_os.h similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/include/numpy/npy_os.h rename to venv.bak/lib/python3.7/site-packages/numpy/core/include/numpy/npy_os.h diff --git a/venv/lib/python3.7/site-packages/numpy/core/include/numpy/numpyconfig.h b/venv.bak/lib/python3.7/site-packages/numpy/core/include/numpy/numpyconfig.h similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/include/numpy/numpyconfig.h rename to venv.bak/lib/python3.7/site-packages/numpy/core/include/numpy/numpyconfig.h diff --git a/venv/lib/python3.7/site-packages/numpy/core/include/numpy/old_defines.h b/venv.bak/lib/python3.7/site-packages/numpy/core/include/numpy/old_defines.h similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/include/numpy/old_defines.h rename to venv.bak/lib/python3.7/site-packages/numpy/core/include/numpy/old_defines.h diff --git a/venv/lib/python3.7/site-packages/numpy/core/include/numpy/oldnumeric.h b/venv.bak/lib/python3.7/site-packages/numpy/core/include/numpy/oldnumeric.h similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/include/numpy/oldnumeric.h rename to venv.bak/lib/python3.7/site-packages/numpy/core/include/numpy/oldnumeric.h diff --git a/venv/lib/python3.7/site-packages/numpy/core/include/numpy/random/bitgen.h b/venv.bak/lib/python3.7/site-packages/numpy/core/include/numpy/random/bitgen.h similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/include/numpy/random/bitgen.h rename to venv.bak/lib/python3.7/site-packages/numpy/core/include/numpy/random/bitgen.h diff --git a/venv/lib/python3.7/site-packages/numpy/core/include/numpy/random/distributions.h b/venv.bak/lib/python3.7/site-packages/numpy/core/include/numpy/random/distributions.h similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/include/numpy/random/distributions.h rename to venv.bak/lib/python3.7/site-packages/numpy/core/include/numpy/random/distributions.h diff --git a/venv/lib/python3.7/site-packages/numpy/core/include/numpy/ufunc_api.txt b/venv.bak/lib/python3.7/site-packages/numpy/core/include/numpy/ufunc_api.txt similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/include/numpy/ufunc_api.txt rename to venv.bak/lib/python3.7/site-packages/numpy/core/include/numpy/ufunc_api.txt diff --git a/venv/lib/python3.7/site-packages/numpy/core/include/numpy/ufuncobject.h b/venv.bak/lib/python3.7/site-packages/numpy/core/include/numpy/ufuncobject.h similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/include/numpy/ufuncobject.h rename to venv.bak/lib/python3.7/site-packages/numpy/core/include/numpy/ufuncobject.h diff --git a/venv/lib/python3.7/site-packages/numpy/core/include/numpy/utils.h b/venv.bak/lib/python3.7/site-packages/numpy/core/include/numpy/utils.h similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/include/numpy/utils.h rename to venv.bak/lib/python3.7/site-packages/numpy/core/include/numpy/utils.h diff --git a/venv/lib/python3.7/site-packages/numpy/core/lib/libnpymath.a b/venv.bak/lib/python3.7/site-packages/numpy/core/lib/libnpymath.a similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/lib/libnpymath.a rename to venv.bak/lib/python3.7/site-packages/numpy/core/lib/libnpymath.a diff --git a/venv/lib/python3.7/site-packages/numpy/core/lib/npy-pkg-config/mlib.ini b/venv.bak/lib/python3.7/site-packages/numpy/core/lib/npy-pkg-config/mlib.ini similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/lib/npy-pkg-config/mlib.ini rename to venv.bak/lib/python3.7/site-packages/numpy/core/lib/npy-pkg-config/mlib.ini diff --git a/venv/lib/python3.7/site-packages/numpy/core/lib/npy-pkg-config/npymath.ini b/venv.bak/lib/python3.7/site-packages/numpy/core/lib/npy-pkg-config/npymath.ini similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/lib/npy-pkg-config/npymath.ini rename to venv.bak/lib/python3.7/site-packages/numpy/core/lib/npy-pkg-config/npymath.ini diff --git a/venv/lib/python3.7/site-packages/numpy/core/machar.py b/venv.bak/lib/python3.7/site-packages/numpy/core/machar.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/machar.py rename to venv.bak/lib/python3.7/site-packages/numpy/core/machar.py diff --git a/venv/lib/python3.7/site-packages/numpy/core/memmap.py b/venv.bak/lib/python3.7/site-packages/numpy/core/memmap.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/memmap.py rename to venv.bak/lib/python3.7/site-packages/numpy/core/memmap.py diff --git a/venv/lib/python3.7/site-packages/numpy/core/multiarray.py b/venv.bak/lib/python3.7/site-packages/numpy/core/multiarray.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/multiarray.py rename to venv.bak/lib/python3.7/site-packages/numpy/core/multiarray.py diff --git a/venv/lib/python3.7/site-packages/numpy/core/numeric.py b/venv.bak/lib/python3.7/site-packages/numpy/core/numeric.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/numeric.py rename to venv.bak/lib/python3.7/site-packages/numpy/core/numeric.py diff --git a/venv/lib/python3.7/site-packages/numpy/core/numerictypes.py b/venv.bak/lib/python3.7/site-packages/numpy/core/numerictypes.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/numerictypes.py rename to venv.bak/lib/python3.7/site-packages/numpy/core/numerictypes.py diff --git a/venv/lib/python3.7/site-packages/numpy/core/overrides.py b/venv.bak/lib/python3.7/site-packages/numpy/core/overrides.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/overrides.py rename to venv.bak/lib/python3.7/site-packages/numpy/core/overrides.py diff --git a/venv/lib/python3.7/site-packages/numpy/core/records.py b/venv.bak/lib/python3.7/site-packages/numpy/core/records.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/records.py rename to venv.bak/lib/python3.7/site-packages/numpy/core/records.py diff --git a/venv/lib/python3.7/site-packages/numpy/core/setup.py b/venv.bak/lib/python3.7/site-packages/numpy/core/setup.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/setup.py rename to venv.bak/lib/python3.7/site-packages/numpy/core/setup.py diff --git a/venv/lib/python3.7/site-packages/numpy/core/setup_common.py b/venv.bak/lib/python3.7/site-packages/numpy/core/setup_common.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/setup_common.py rename to venv.bak/lib/python3.7/site-packages/numpy/core/setup_common.py diff --git a/venv/lib/python3.7/site-packages/numpy/core/shape_base.py b/venv.bak/lib/python3.7/site-packages/numpy/core/shape_base.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/shape_base.py rename to venv.bak/lib/python3.7/site-packages/numpy/core/shape_base.py diff --git a/venv/lib/python3.7/site-packages/numpy/lib/tests/__init__.py b/venv.bak/lib/python3.7/site-packages/numpy/core/tests/__init__.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/lib/tests/__init__.py rename to venv.bak/lib/python3.7/site-packages/numpy/core/tests/__init__.py diff --git a/venv/lib/python3.7/site-packages/numpy/core/tests/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/core/tests/__pycache__/__init__.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/tests/__pycache__/__init__.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/core/tests/__pycache__/__init__.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/core/tests/__pycache__/_locales.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/core/tests/__pycache__/_locales.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/tests/__pycache__/_locales.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/core/tests/__pycache__/_locales.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test__exceptions.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test__exceptions.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test__exceptions.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test__exceptions.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_abc.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_abc.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_abc.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_abc.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_api.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_api.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_api.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_api.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_arrayprint.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_arrayprint.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_arrayprint.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_arrayprint.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_datetime.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_datetime.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_datetime.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_datetime.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_defchararray.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_defchararray.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_defchararray.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_defchararray.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_deprecations.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_deprecations.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_deprecations.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_deprecations.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_dtype.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_dtype.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_dtype.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_dtype.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_einsum.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_einsum.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_einsum.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_einsum.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_errstate.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_errstate.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_errstate.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_errstate.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_extint128.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_extint128.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_extint128.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_extint128.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_function_base.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_function_base.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_function_base.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_function_base.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_getlimits.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_getlimits.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_getlimits.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_getlimits.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_half.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_half.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_half.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_half.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_indexerrors.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_indexerrors.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_indexerrors.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_indexerrors.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_indexing.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_indexing.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_indexing.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_indexing.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_issue14735.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_issue14735.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_issue14735.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_issue14735.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_item_selection.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_item_selection.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_item_selection.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_item_selection.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_longdouble.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_longdouble.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_longdouble.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_longdouble.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_machar.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_machar.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_machar.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_machar.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_mem_overlap.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_mem_overlap.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_mem_overlap.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_mem_overlap.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_memmap.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_memmap.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_memmap.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_memmap.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_multiarray.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_multiarray.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_multiarray.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_multiarray.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_nditer.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_nditer.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_nditer.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_nditer.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_numeric.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_numeric.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_numeric.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_numeric.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_numerictypes.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_numerictypes.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_numerictypes.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_numerictypes.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_overrides.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_overrides.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_overrides.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_overrides.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_print.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_print.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_print.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_print.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_records.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_records.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_records.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_records.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_regression.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_regression.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_regression.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_regression.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_scalar_ctors.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_scalar_ctors.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_scalar_ctors.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_scalar_ctors.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_scalar_methods.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_scalar_methods.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_scalar_methods.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_scalar_methods.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_scalarbuffer.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_scalarbuffer.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_scalarbuffer.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_scalarbuffer.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_scalarinherit.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_scalarinherit.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_scalarinherit.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_scalarinherit.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_scalarmath.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_scalarmath.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_scalarmath.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_scalarmath.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_scalarprint.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_scalarprint.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_scalarprint.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_scalarprint.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_shape_base.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_shape_base.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_shape_base.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_shape_base.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_ufunc.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_ufunc.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_ufunc.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_ufunc.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_umath.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_umath.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_umath.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_umath.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_umath_accuracy.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_umath_accuracy.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_umath_accuracy.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_umath_accuracy.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_umath_complex.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_umath_complex.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_umath_complex.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_umath_complex.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_unicode.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_unicode.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_unicode.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/core/tests/__pycache__/test_unicode.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/core/tests/_locales.py b/venv.bak/lib/python3.7/site-packages/numpy/core/tests/_locales.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/tests/_locales.py rename to venv.bak/lib/python3.7/site-packages/numpy/core/tests/_locales.py diff --git a/venv/lib/python3.7/site-packages/numpy/core/tests/data/astype_copy.pkl b/venv.bak/lib/python3.7/site-packages/numpy/core/tests/data/astype_copy.pkl similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/tests/data/astype_copy.pkl rename to venv.bak/lib/python3.7/site-packages/numpy/core/tests/data/astype_copy.pkl diff --git a/venv/lib/python3.7/site-packages/numpy/core/tests/data/recarray_from_file.fits b/venv.bak/lib/python3.7/site-packages/numpy/core/tests/data/recarray_from_file.fits similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/tests/data/recarray_from_file.fits rename to venv.bak/lib/python3.7/site-packages/numpy/core/tests/data/recarray_from_file.fits diff --git a/venv/lib/python3.7/site-packages/numpy/core/tests/data/umath-validation-set-README b/venv.bak/lib/python3.7/site-packages/numpy/core/tests/data/umath-validation-set-README similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/tests/data/umath-validation-set-README rename to venv.bak/lib/python3.7/site-packages/numpy/core/tests/data/umath-validation-set-README diff --git a/venv/lib/python3.7/site-packages/numpy/core/tests/data/umath-validation-set-cos b/venv.bak/lib/python3.7/site-packages/numpy/core/tests/data/umath-validation-set-cos similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/tests/data/umath-validation-set-cos rename to venv.bak/lib/python3.7/site-packages/numpy/core/tests/data/umath-validation-set-cos diff --git a/venv/lib/python3.7/site-packages/numpy/core/tests/data/umath-validation-set-exp b/venv.bak/lib/python3.7/site-packages/numpy/core/tests/data/umath-validation-set-exp similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/tests/data/umath-validation-set-exp rename to venv.bak/lib/python3.7/site-packages/numpy/core/tests/data/umath-validation-set-exp diff --git a/venv/lib/python3.7/site-packages/numpy/core/tests/data/umath-validation-set-log b/venv.bak/lib/python3.7/site-packages/numpy/core/tests/data/umath-validation-set-log similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/tests/data/umath-validation-set-log rename to venv.bak/lib/python3.7/site-packages/numpy/core/tests/data/umath-validation-set-log diff --git a/venv/lib/python3.7/site-packages/numpy/core/tests/data/umath-validation-set-sin b/venv.bak/lib/python3.7/site-packages/numpy/core/tests/data/umath-validation-set-sin similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/tests/data/umath-validation-set-sin rename to venv.bak/lib/python3.7/site-packages/numpy/core/tests/data/umath-validation-set-sin diff --git a/venv/lib/python3.7/site-packages/numpy/core/tests/test__exceptions.py b/venv.bak/lib/python3.7/site-packages/numpy/core/tests/test__exceptions.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/tests/test__exceptions.py rename to venv.bak/lib/python3.7/site-packages/numpy/core/tests/test__exceptions.py diff --git a/venv/lib/python3.7/site-packages/numpy/core/tests/test_abc.py b/venv.bak/lib/python3.7/site-packages/numpy/core/tests/test_abc.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/tests/test_abc.py rename to venv.bak/lib/python3.7/site-packages/numpy/core/tests/test_abc.py diff --git a/venv/lib/python3.7/site-packages/numpy/core/tests/test_api.py b/venv.bak/lib/python3.7/site-packages/numpy/core/tests/test_api.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/tests/test_api.py rename to venv.bak/lib/python3.7/site-packages/numpy/core/tests/test_api.py diff --git a/venv/lib/python3.7/site-packages/numpy/core/tests/test_arrayprint.py b/venv.bak/lib/python3.7/site-packages/numpy/core/tests/test_arrayprint.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/tests/test_arrayprint.py rename to venv.bak/lib/python3.7/site-packages/numpy/core/tests/test_arrayprint.py diff --git a/venv/lib/python3.7/site-packages/numpy/core/tests/test_datetime.py b/venv.bak/lib/python3.7/site-packages/numpy/core/tests/test_datetime.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/tests/test_datetime.py rename to venv.bak/lib/python3.7/site-packages/numpy/core/tests/test_datetime.py diff --git a/venv/lib/python3.7/site-packages/numpy/core/tests/test_defchararray.py b/venv.bak/lib/python3.7/site-packages/numpy/core/tests/test_defchararray.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/tests/test_defchararray.py rename to venv.bak/lib/python3.7/site-packages/numpy/core/tests/test_defchararray.py diff --git a/venv/lib/python3.7/site-packages/numpy/core/tests/test_deprecations.py b/venv.bak/lib/python3.7/site-packages/numpy/core/tests/test_deprecations.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/tests/test_deprecations.py rename to venv.bak/lib/python3.7/site-packages/numpy/core/tests/test_deprecations.py diff --git a/venv/lib/python3.7/site-packages/numpy/core/tests/test_dtype.py b/venv.bak/lib/python3.7/site-packages/numpy/core/tests/test_dtype.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/tests/test_dtype.py rename to venv.bak/lib/python3.7/site-packages/numpy/core/tests/test_dtype.py diff --git a/venv/lib/python3.7/site-packages/numpy/core/tests/test_einsum.py b/venv.bak/lib/python3.7/site-packages/numpy/core/tests/test_einsum.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/tests/test_einsum.py rename to venv.bak/lib/python3.7/site-packages/numpy/core/tests/test_einsum.py diff --git a/venv/lib/python3.7/site-packages/numpy/core/tests/test_errstate.py b/venv.bak/lib/python3.7/site-packages/numpy/core/tests/test_errstate.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/tests/test_errstate.py rename to venv.bak/lib/python3.7/site-packages/numpy/core/tests/test_errstate.py diff --git a/venv/lib/python3.7/site-packages/numpy/core/tests/test_extint128.py b/venv.bak/lib/python3.7/site-packages/numpy/core/tests/test_extint128.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/tests/test_extint128.py rename to venv.bak/lib/python3.7/site-packages/numpy/core/tests/test_extint128.py diff --git a/venv/lib/python3.7/site-packages/numpy/core/tests/test_function_base.py b/venv.bak/lib/python3.7/site-packages/numpy/core/tests/test_function_base.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/tests/test_function_base.py rename to venv.bak/lib/python3.7/site-packages/numpy/core/tests/test_function_base.py diff --git a/venv/lib/python3.7/site-packages/numpy/core/tests/test_getlimits.py b/venv.bak/lib/python3.7/site-packages/numpy/core/tests/test_getlimits.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/tests/test_getlimits.py rename to venv.bak/lib/python3.7/site-packages/numpy/core/tests/test_getlimits.py diff --git a/venv/lib/python3.7/site-packages/numpy/core/tests/test_half.py b/venv.bak/lib/python3.7/site-packages/numpy/core/tests/test_half.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/tests/test_half.py rename to venv.bak/lib/python3.7/site-packages/numpy/core/tests/test_half.py diff --git a/venv/lib/python3.7/site-packages/numpy/core/tests/test_indexerrors.py b/venv.bak/lib/python3.7/site-packages/numpy/core/tests/test_indexerrors.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/tests/test_indexerrors.py rename to venv.bak/lib/python3.7/site-packages/numpy/core/tests/test_indexerrors.py diff --git a/venv/lib/python3.7/site-packages/numpy/core/tests/test_indexing.py b/venv.bak/lib/python3.7/site-packages/numpy/core/tests/test_indexing.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/tests/test_indexing.py rename to venv.bak/lib/python3.7/site-packages/numpy/core/tests/test_indexing.py diff --git a/venv/lib/python3.7/site-packages/numpy/core/tests/test_issue14735.py b/venv.bak/lib/python3.7/site-packages/numpy/core/tests/test_issue14735.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/tests/test_issue14735.py rename to venv.bak/lib/python3.7/site-packages/numpy/core/tests/test_issue14735.py diff --git a/venv/lib/python3.7/site-packages/numpy/core/tests/test_item_selection.py b/venv.bak/lib/python3.7/site-packages/numpy/core/tests/test_item_selection.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/tests/test_item_selection.py rename to venv.bak/lib/python3.7/site-packages/numpy/core/tests/test_item_selection.py diff --git a/venv/lib/python3.7/site-packages/numpy/core/tests/test_longdouble.py b/venv.bak/lib/python3.7/site-packages/numpy/core/tests/test_longdouble.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/tests/test_longdouble.py rename to venv.bak/lib/python3.7/site-packages/numpy/core/tests/test_longdouble.py diff --git a/venv/lib/python3.7/site-packages/numpy/core/tests/test_machar.py b/venv.bak/lib/python3.7/site-packages/numpy/core/tests/test_machar.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/tests/test_machar.py rename to venv.bak/lib/python3.7/site-packages/numpy/core/tests/test_machar.py diff --git a/venv/lib/python3.7/site-packages/numpy/core/tests/test_mem_overlap.py b/venv.bak/lib/python3.7/site-packages/numpy/core/tests/test_mem_overlap.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/tests/test_mem_overlap.py rename to venv.bak/lib/python3.7/site-packages/numpy/core/tests/test_mem_overlap.py diff --git a/venv/lib/python3.7/site-packages/numpy/core/tests/test_memmap.py b/venv.bak/lib/python3.7/site-packages/numpy/core/tests/test_memmap.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/tests/test_memmap.py rename to venv.bak/lib/python3.7/site-packages/numpy/core/tests/test_memmap.py diff --git a/venv/lib/python3.7/site-packages/numpy/core/tests/test_multiarray.py b/venv.bak/lib/python3.7/site-packages/numpy/core/tests/test_multiarray.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/tests/test_multiarray.py rename to venv.bak/lib/python3.7/site-packages/numpy/core/tests/test_multiarray.py diff --git a/venv/lib/python3.7/site-packages/numpy/core/tests/test_nditer.py b/venv.bak/lib/python3.7/site-packages/numpy/core/tests/test_nditer.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/tests/test_nditer.py rename to venv.bak/lib/python3.7/site-packages/numpy/core/tests/test_nditer.py diff --git a/venv/lib/python3.7/site-packages/numpy/core/tests/test_numeric.py b/venv.bak/lib/python3.7/site-packages/numpy/core/tests/test_numeric.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/tests/test_numeric.py rename to venv.bak/lib/python3.7/site-packages/numpy/core/tests/test_numeric.py diff --git a/venv/lib/python3.7/site-packages/numpy/core/tests/test_numerictypes.py b/venv.bak/lib/python3.7/site-packages/numpy/core/tests/test_numerictypes.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/tests/test_numerictypes.py rename to venv.bak/lib/python3.7/site-packages/numpy/core/tests/test_numerictypes.py diff --git a/venv/lib/python3.7/site-packages/numpy/core/tests/test_overrides.py b/venv.bak/lib/python3.7/site-packages/numpy/core/tests/test_overrides.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/tests/test_overrides.py rename to venv.bak/lib/python3.7/site-packages/numpy/core/tests/test_overrides.py diff --git a/venv/lib/python3.7/site-packages/numpy/core/tests/test_print.py b/venv.bak/lib/python3.7/site-packages/numpy/core/tests/test_print.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/tests/test_print.py rename to venv.bak/lib/python3.7/site-packages/numpy/core/tests/test_print.py diff --git a/venv/lib/python3.7/site-packages/numpy/core/tests/test_records.py b/venv.bak/lib/python3.7/site-packages/numpy/core/tests/test_records.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/tests/test_records.py rename to venv.bak/lib/python3.7/site-packages/numpy/core/tests/test_records.py diff --git a/venv/lib/python3.7/site-packages/numpy/core/tests/test_regression.py b/venv.bak/lib/python3.7/site-packages/numpy/core/tests/test_regression.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/tests/test_regression.py rename to venv.bak/lib/python3.7/site-packages/numpy/core/tests/test_regression.py diff --git a/venv/lib/python3.7/site-packages/numpy/core/tests/test_scalar_ctors.py b/venv.bak/lib/python3.7/site-packages/numpy/core/tests/test_scalar_ctors.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/tests/test_scalar_ctors.py rename to venv.bak/lib/python3.7/site-packages/numpy/core/tests/test_scalar_ctors.py diff --git a/venv/lib/python3.7/site-packages/numpy/core/tests/test_scalar_methods.py b/venv.bak/lib/python3.7/site-packages/numpy/core/tests/test_scalar_methods.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/tests/test_scalar_methods.py rename to venv.bak/lib/python3.7/site-packages/numpy/core/tests/test_scalar_methods.py diff --git a/venv/lib/python3.7/site-packages/numpy/core/tests/test_scalarbuffer.py b/venv.bak/lib/python3.7/site-packages/numpy/core/tests/test_scalarbuffer.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/tests/test_scalarbuffer.py rename to venv.bak/lib/python3.7/site-packages/numpy/core/tests/test_scalarbuffer.py diff --git a/venv/lib/python3.7/site-packages/numpy/core/tests/test_scalarinherit.py b/venv.bak/lib/python3.7/site-packages/numpy/core/tests/test_scalarinherit.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/tests/test_scalarinherit.py rename to venv.bak/lib/python3.7/site-packages/numpy/core/tests/test_scalarinherit.py diff --git a/venv/lib/python3.7/site-packages/numpy/core/tests/test_scalarmath.py b/venv.bak/lib/python3.7/site-packages/numpy/core/tests/test_scalarmath.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/tests/test_scalarmath.py rename to venv.bak/lib/python3.7/site-packages/numpy/core/tests/test_scalarmath.py diff --git a/venv/lib/python3.7/site-packages/numpy/core/tests/test_scalarprint.py b/venv.bak/lib/python3.7/site-packages/numpy/core/tests/test_scalarprint.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/tests/test_scalarprint.py rename to venv.bak/lib/python3.7/site-packages/numpy/core/tests/test_scalarprint.py diff --git a/venv/lib/python3.7/site-packages/numpy/core/tests/test_shape_base.py b/venv.bak/lib/python3.7/site-packages/numpy/core/tests/test_shape_base.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/tests/test_shape_base.py rename to venv.bak/lib/python3.7/site-packages/numpy/core/tests/test_shape_base.py diff --git a/venv/lib/python3.7/site-packages/numpy/core/tests/test_ufunc.py b/venv.bak/lib/python3.7/site-packages/numpy/core/tests/test_ufunc.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/tests/test_ufunc.py rename to venv.bak/lib/python3.7/site-packages/numpy/core/tests/test_ufunc.py diff --git a/venv/lib/python3.7/site-packages/numpy/core/tests/test_umath.py b/venv.bak/lib/python3.7/site-packages/numpy/core/tests/test_umath.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/tests/test_umath.py rename to venv.bak/lib/python3.7/site-packages/numpy/core/tests/test_umath.py diff --git a/venv/lib/python3.7/site-packages/numpy/core/tests/test_umath_accuracy.py b/venv.bak/lib/python3.7/site-packages/numpy/core/tests/test_umath_accuracy.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/tests/test_umath_accuracy.py rename to venv.bak/lib/python3.7/site-packages/numpy/core/tests/test_umath_accuracy.py diff --git a/venv/lib/python3.7/site-packages/numpy/core/tests/test_umath_complex.py b/venv.bak/lib/python3.7/site-packages/numpy/core/tests/test_umath_complex.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/tests/test_umath_complex.py rename to venv.bak/lib/python3.7/site-packages/numpy/core/tests/test_umath_complex.py diff --git a/venv/lib/python3.7/site-packages/numpy/core/tests/test_unicode.py b/venv.bak/lib/python3.7/site-packages/numpy/core/tests/test_unicode.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/tests/test_unicode.py rename to venv.bak/lib/python3.7/site-packages/numpy/core/tests/test_unicode.py diff --git a/venv/lib/python3.7/site-packages/numpy/core/umath.py b/venv.bak/lib/python3.7/site-packages/numpy/core/umath.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/umath.py rename to venv.bak/lib/python3.7/site-packages/numpy/core/umath.py diff --git a/venv/lib/python3.7/site-packages/numpy/core/umath_tests.py b/venv.bak/lib/python3.7/site-packages/numpy/core/umath_tests.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/core/umath_tests.py rename to venv.bak/lib/python3.7/site-packages/numpy/core/umath_tests.py diff --git a/venv/lib/python3.7/site-packages/numpy/ctypeslib.py b/venv.bak/lib/python3.7/site-packages/numpy/ctypeslib.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/ctypeslib.py rename to venv.bak/lib/python3.7/site-packages/numpy/ctypeslib.py diff --git a/venv/lib/python3.7/site-packages/numpy/distutils/__config__.py b/venv.bak/lib/python3.7/site-packages/numpy/distutils/__config__.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/distutils/__config__.py rename to venv.bak/lib/python3.7/site-packages/numpy/distutils/__config__.py diff --git a/venv/lib/python3.7/site-packages/numpy/distutils/__init__.py b/venv.bak/lib/python3.7/site-packages/numpy/distutils/__init__.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/distutils/__init__.py rename to venv.bak/lib/python3.7/site-packages/numpy/distutils/__init__.py diff --git a/venv/lib/python3.7/site-packages/numpy/distutils/__pycache__/__config__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/distutils/__pycache__/__config__.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/distutils/__pycache__/__config__.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/distutils/__pycache__/__config__.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/distutils/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/distutils/__pycache__/__init__.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/distutils/__pycache__/__init__.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/distutils/__pycache__/__init__.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/distutils/__pycache__/_shell_utils.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/distutils/__pycache__/_shell_utils.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/distutils/__pycache__/_shell_utils.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/distutils/__pycache__/_shell_utils.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/distutils/__pycache__/ccompiler.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/distutils/__pycache__/ccompiler.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/distutils/__pycache__/ccompiler.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/distutils/__pycache__/ccompiler.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/distutils/__pycache__/compat.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/distutils/__pycache__/compat.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/distutils/__pycache__/compat.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/distutils/__pycache__/compat.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/distutils/__pycache__/conv_template.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/distutils/__pycache__/conv_template.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/distutils/__pycache__/conv_template.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/distutils/__pycache__/conv_template.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/distutils/__pycache__/core.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/distutils/__pycache__/core.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/distutils/__pycache__/core.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/distutils/__pycache__/core.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/distutils/__pycache__/cpuinfo.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/distutils/__pycache__/cpuinfo.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/distutils/__pycache__/cpuinfo.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/distutils/__pycache__/cpuinfo.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/distutils/__pycache__/exec_command.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/distutils/__pycache__/exec_command.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/distutils/__pycache__/exec_command.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/distutils/__pycache__/exec_command.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/distutils/__pycache__/extension.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/distutils/__pycache__/extension.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/distutils/__pycache__/extension.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/distutils/__pycache__/extension.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/distutils/__pycache__/from_template.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/distutils/__pycache__/from_template.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/distutils/__pycache__/from_template.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/distutils/__pycache__/from_template.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/distutils/__pycache__/intelccompiler.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/distutils/__pycache__/intelccompiler.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/distutils/__pycache__/intelccompiler.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/distutils/__pycache__/intelccompiler.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/distutils/__pycache__/lib2def.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/distutils/__pycache__/lib2def.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/distutils/__pycache__/lib2def.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/distutils/__pycache__/lib2def.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/distutils/__pycache__/line_endings.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/distutils/__pycache__/line_endings.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/distutils/__pycache__/line_endings.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/distutils/__pycache__/line_endings.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/distutils/__pycache__/log.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/distutils/__pycache__/log.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/distutils/__pycache__/log.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/distutils/__pycache__/log.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/distutils/__pycache__/mingw32ccompiler.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/distutils/__pycache__/mingw32ccompiler.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/distutils/__pycache__/mingw32ccompiler.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/distutils/__pycache__/mingw32ccompiler.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/distutils/__pycache__/misc_util.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/distutils/__pycache__/misc_util.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/distutils/__pycache__/misc_util.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/distutils/__pycache__/misc_util.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/distutils/__pycache__/msvc9compiler.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/distutils/__pycache__/msvc9compiler.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/distutils/__pycache__/msvc9compiler.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/distutils/__pycache__/msvc9compiler.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/distutils/__pycache__/msvccompiler.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/distutils/__pycache__/msvccompiler.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/distutils/__pycache__/msvccompiler.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/distutils/__pycache__/msvccompiler.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/distutils/__pycache__/npy_pkg_config.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/distutils/__pycache__/npy_pkg_config.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/distutils/__pycache__/npy_pkg_config.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/distutils/__pycache__/npy_pkg_config.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/distutils/__pycache__/numpy_distribution.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/distutils/__pycache__/numpy_distribution.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/distutils/__pycache__/numpy_distribution.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/distutils/__pycache__/numpy_distribution.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/distutils/__pycache__/pathccompiler.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/distutils/__pycache__/pathccompiler.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/distutils/__pycache__/pathccompiler.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/distutils/__pycache__/pathccompiler.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/distutils/__pycache__/setup.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/distutils/__pycache__/setup.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/distutils/__pycache__/setup.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/distutils/__pycache__/setup.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/distutils/__pycache__/system_info.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/distutils/__pycache__/system_info.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/distutils/__pycache__/system_info.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/distutils/__pycache__/system_info.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/distutils/__pycache__/unixccompiler.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/distutils/__pycache__/unixccompiler.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/distutils/__pycache__/unixccompiler.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/distutils/__pycache__/unixccompiler.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/distutils/_shell_utils.py b/venv.bak/lib/python3.7/site-packages/numpy/distutils/_shell_utils.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/distutils/_shell_utils.py rename to venv.bak/lib/python3.7/site-packages/numpy/distutils/_shell_utils.py diff --git a/venv/lib/python3.7/site-packages/numpy/distutils/ccompiler.py b/venv.bak/lib/python3.7/site-packages/numpy/distutils/ccompiler.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/distutils/ccompiler.py rename to venv.bak/lib/python3.7/site-packages/numpy/distutils/ccompiler.py diff --git a/venv/lib/python3.7/site-packages/numpy/distutils/command/__init__.py b/venv.bak/lib/python3.7/site-packages/numpy/distutils/command/__init__.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/distutils/command/__init__.py rename to venv.bak/lib/python3.7/site-packages/numpy/distutils/command/__init__.py diff --git a/venv/lib/python3.7/site-packages/numpy/distutils/command/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/distutils/command/__pycache__/__init__.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/distutils/command/__pycache__/__init__.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/distutils/command/__pycache__/__init__.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/distutils/command/__pycache__/autodist.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/distutils/command/__pycache__/autodist.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/distutils/command/__pycache__/autodist.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/distutils/command/__pycache__/autodist.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/distutils/command/__pycache__/bdist_rpm.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/distutils/command/__pycache__/bdist_rpm.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/distutils/command/__pycache__/bdist_rpm.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/distutils/command/__pycache__/bdist_rpm.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/distutils/command/__pycache__/build.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/distutils/command/__pycache__/build.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/distutils/command/__pycache__/build.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/distutils/command/__pycache__/build.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/distutils/command/__pycache__/build_clib.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/distutils/command/__pycache__/build_clib.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/distutils/command/__pycache__/build_clib.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/distutils/command/__pycache__/build_clib.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/distutils/command/__pycache__/build_ext.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/distutils/command/__pycache__/build_ext.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/distutils/command/__pycache__/build_ext.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/distutils/command/__pycache__/build_ext.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/distutils/command/__pycache__/build_py.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/distutils/command/__pycache__/build_py.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/distutils/command/__pycache__/build_py.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/distutils/command/__pycache__/build_py.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/distutils/command/__pycache__/build_scripts.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/distutils/command/__pycache__/build_scripts.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/distutils/command/__pycache__/build_scripts.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/distutils/command/__pycache__/build_scripts.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/distutils/command/__pycache__/build_src.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/distutils/command/__pycache__/build_src.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/distutils/command/__pycache__/build_src.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/distutils/command/__pycache__/build_src.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/distutils/command/__pycache__/config.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/distutils/command/__pycache__/config.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/distutils/command/__pycache__/config.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/distutils/command/__pycache__/config.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/distutils/command/__pycache__/config_compiler.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/distutils/command/__pycache__/config_compiler.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/distutils/command/__pycache__/config_compiler.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/distutils/command/__pycache__/config_compiler.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/distutils/command/__pycache__/develop.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/distutils/command/__pycache__/develop.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/distutils/command/__pycache__/develop.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/distutils/command/__pycache__/develop.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/distutils/command/__pycache__/egg_info.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/distutils/command/__pycache__/egg_info.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/distutils/command/__pycache__/egg_info.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/distutils/command/__pycache__/egg_info.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/distutils/command/__pycache__/install.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/distutils/command/__pycache__/install.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/distutils/command/__pycache__/install.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/distutils/command/__pycache__/install.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/distutils/command/__pycache__/install_clib.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/distutils/command/__pycache__/install_clib.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/distutils/command/__pycache__/install_clib.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/distutils/command/__pycache__/install_clib.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/distutils/command/__pycache__/install_data.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/distutils/command/__pycache__/install_data.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/distutils/command/__pycache__/install_data.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/distutils/command/__pycache__/install_data.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/distutils/command/__pycache__/install_headers.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/distutils/command/__pycache__/install_headers.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/distutils/command/__pycache__/install_headers.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/distutils/command/__pycache__/install_headers.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/distutils/command/__pycache__/sdist.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/distutils/command/__pycache__/sdist.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/distutils/command/__pycache__/sdist.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/distutils/command/__pycache__/sdist.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/distutils/command/autodist.py b/venv.bak/lib/python3.7/site-packages/numpy/distutils/command/autodist.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/distutils/command/autodist.py rename to venv.bak/lib/python3.7/site-packages/numpy/distutils/command/autodist.py diff --git a/venv/lib/python3.7/site-packages/numpy/distutils/command/bdist_rpm.py b/venv.bak/lib/python3.7/site-packages/numpy/distutils/command/bdist_rpm.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/distutils/command/bdist_rpm.py rename to venv.bak/lib/python3.7/site-packages/numpy/distutils/command/bdist_rpm.py diff --git a/venv/lib/python3.7/site-packages/numpy/distutils/command/build.py b/venv.bak/lib/python3.7/site-packages/numpy/distutils/command/build.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/distutils/command/build.py rename to venv.bak/lib/python3.7/site-packages/numpy/distutils/command/build.py diff --git a/venv/lib/python3.7/site-packages/numpy/distutils/command/build_clib.py b/venv.bak/lib/python3.7/site-packages/numpy/distutils/command/build_clib.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/distutils/command/build_clib.py rename to venv.bak/lib/python3.7/site-packages/numpy/distutils/command/build_clib.py diff --git a/venv/lib/python3.7/site-packages/numpy/distutils/command/build_ext.py b/venv.bak/lib/python3.7/site-packages/numpy/distutils/command/build_ext.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/distutils/command/build_ext.py rename to venv.bak/lib/python3.7/site-packages/numpy/distutils/command/build_ext.py diff --git a/venv/lib/python3.7/site-packages/numpy/distutils/command/build_py.py b/venv.bak/lib/python3.7/site-packages/numpy/distutils/command/build_py.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/distutils/command/build_py.py rename to venv.bak/lib/python3.7/site-packages/numpy/distutils/command/build_py.py diff --git a/venv/lib/python3.7/site-packages/numpy/distutils/command/build_scripts.py b/venv.bak/lib/python3.7/site-packages/numpy/distutils/command/build_scripts.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/distutils/command/build_scripts.py rename to venv.bak/lib/python3.7/site-packages/numpy/distutils/command/build_scripts.py diff --git a/venv/lib/python3.7/site-packages/numpy/distutils/command/build_src.py b/venv.bak/lib/python3.7/site-packages/numpy/distutils/command/build_src.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/distutils/command/build_src.py rename to venv.bak/lib/python3.7/site-packages/numpy/distutils/command/build_src.py diff --git a/venv/lib/python3.7/site-packages/numpy/distutils/command/config.py b/venv.bak/lib/python3.7/site-packages/numpy/distutils/command/config.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/distutils/command/config.py rename to venv.bak/lib/python3.7/site-packages/numpy/distutils/command/config.py diff --git a/venv/lib/python3.7/site-packages/numpy/distutils/command/config_compiler.py b/venv.bak/lib/python3.7/site-packages/numpy/distutils/command/config_compiler.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/distutils/command/config_compiler.py rename to venv.bak/lib/python3.7/site-packages/numpy/distutils/command/config_compiler.py diff --git a/venv/lib/python3.7/site-packages/numpy/distutils/command/develop.py b/venv.bak/lib/python3.7/site-packages/numpy/distutils/command/develop.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/distutils/command/develop.py rename to venv.bak/lib/python3.7/site-packages/numpy/distutils/command/develop.py diff --git a/venv/lib/python3.7/site-packages/numpy/distutils/command/egg_info.py b/venv.bak/lib/python3.7/site-packages/numpy/distutils/command/egg_info.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/distutils/command/egg_info.py rename to venv.bak/lib/python3.7/site-packages/numpy/distutils/command/egg_info.py diff --git a/venv/lib/python3.7/site-packages/numpy/distutils/command/install.py b/venv.bak/lib/python3.7/site-packages/numpy/distutils/command/install.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/distutils/command/install.py rename to venv.bak/lib/python3.7/site-packages/numpy/distutils/command/install.py diff --git a/venv/lib/python3.7/site-packages/numpy/distutils/command/install_clib.py b/venv.bak/lib/python3.7/site-packages/numpy/distutils/command/install_clib.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/distutils/command/install_clib.py rename to venv.bak/lib/python3.7/site-packages/numpy/distutils/command/install_clib.py diff --git a/venv/lib/python3.7/site-packages/numpy/distutils/command/install_data.py b/venv.bak/lib/python3.7/site-packages/numpy/distutils/command/install_data.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/distutils/command/install_data.py rename to venv.bak/lib/python3.7/site-packages/numpy/distutils/command/install_data.py diff --git a/venv/lib/python3.7/site-packages/numpy/distutils/command/install_headers.py b/venv.bak/lib/python3.7/site-packages/numpy/distutils/command/install_headers.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/distutils/command/install_headers.py rename to venv.bak/lib/python3.7/site-packages/numpy/distutils/command/install_headers.py diff --git a/venv/lib/python3.7/site-packages/numpy/distutils/command/sdist.py b/venv.bak/lib/python3.7/site-packages/numpy/distutils/command/sdist.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/distutils/command/sdist.py rename to venv.bak/lib/python3.7/site-packages/numpy/distutils/command/sdist.py diff --git a/venv/lib/python3.7/site-packages/numpy/distutils/compat.py b/venv.bak/lib/python3.7/site-packages/numpy/distutils/compat.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/distutils/compat.py rename to venv.bak/lib/python3.7/site-packages/numpy/distutils/compat.py diff --git a/venv/lib/python3.7/site-packages/numpy/distutils/conv_template.py b/venv.bak/lib/python3.7/site-packages/numpy/distutils/conv_template.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/distutils/conv_template.py rename to venv.bak/lib/python3.7/site-packages/numpy/distutils/conv_template.py diff --git a/venv/lib/python3.7/site-packages/numpy/distutils/core.py b/venv.bak/lib/python3.7/site-packages/numpy/distutils/core.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/distutils/core.py rename to venv.bak/lib/python3.7/site-packages/numpy/distutils/core.py diff --git a/venv/lib/python3.7/site-packages/numpy/distutils/cpuinfo.py b/venv.bak/lib/python3.7/site-packages/numpy/distutils/cpuinfo.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/distutils/cpuinfo.py rename to venv.bak/lib/python3.7/site-packages/numpy/distutils/cpuinfo.py diff --git a/venv/lib/python3.7/site-packages/numpy/distutils/exec_command.py b/venv.bak/lib/python3.7/site-packages/numpy/distutils/exec_command.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/distutils/exec_command.py rename to venv.bak/lib/python3.7/site-packages/numpy/distutils/exec_command.py diff --git a/venv/lib/python3.7/site-packages/numpy/distutils/extension.py b/venv.bak/lib/python3.7/site-packages/numpy/distutils/extension.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/distutils/extension.py rename to venv.bak/lib/python3.7/site-packages/numpy/distutils/extension.py diff --git a/venv/lib/python3.7/site-packages/numpy/distutils/fcompiler/__init__.py b/venv.bak/lib/python3.7/site-packages/numpy/distutils/fcompiler/__init__.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/distutils/fcompiler/__init__.py rename to venv.bak/lib/python3.7/site-packages/numpy/distutils/fcompiler/__init__.py diff --git a/venv/lib/python3.7/site-packages/numpy/distutils/fcompiler/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/distutils/fcompiler/__pycache__/__init__.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/distutils/fcompiler/__pycache__/__init__.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/distutils/fcompiler/__pycache__/__init__.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/distutils/fcompiler/__pycache__/absoft.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/distutils/fcompiler/__pycache__/absoft.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/distutils/fcompiler/__pycache__/absoft.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/distutils/fcompiler/__pycache__/absoft.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/distutils/fcompiler/__pycache__/compaq.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/distutils/fcompiler/__pycache__/compaq.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/distutils/fcompiler/__pycache__/compaq.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/distutils/fcompiler/__pycache__/compaq.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/distutils/fcompiler/__pycache__/environment.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/distutils/fcompiler/__pycache__/environment.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/distutils/fcompiler/__pycache__/environment.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/distutils/fcompiler/__pycache__/environment.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/distutils/fcompiler/__pycache__/g95.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/distutils/fcompiler/__pycache__/g95.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/distutils/fcompiler/__pycache__/g95.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/distutils/fcompiler/__pycache__/g95.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/distutils/fcompiler/__pycache__/gnu.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/distutils/fcompiler/__pycache__/gnu.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/distutils/fcompiler/__pycache__/gnu.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/distutils/fcompiler/__pycache__/gnu.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/distutils/fcompiler/__pycache__/hpux.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/distutils/fcompiler/__pycache__/hpux.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/distutils/fcompiler/__pycache__/hpux.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/distutils/fcompiler/__pycache__/hpux.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/distutils/fcompiler/__pycache__/ibm.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/distutils/fcompiler/__pycache__/ibm.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/distutils/fcompiler/__pycache__/ibm.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/distutils/fcompiler/__pycache__/ibm.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/distutils/fcompiler/__pycache__/intel.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/distutils/fcompiler/__pycache__/intel.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/distutils/fcompiler/__pycache__/intel.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/distutils/fcompiler/__pycache__/intel.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/distutils/fcompiler/__pycache__/lahey.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/distutils/fcompiler/__pycache__/lahey.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/distutils/fcompiler/__pycache__/lahey.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/distutils/fcompiler/__pycache__/lahey.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/distutils/fcompiler/__pycache__/mips.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/distutils/fcompiler/__pycache__/mips.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/distutils/fcompiler/__pycache__/mips.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/distutils/fcompiler/__pycache__/mips.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/distutils/fcompiler/__pycache__/nag.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/distutils/fcompiler/__pycache__/nag.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/distutils/fcompiler/__pycache__/nag.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/distutils/fcompiler/__pycache__/nag.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/distutils/fcompiler/__pycache__/none.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/distutils/fcompiler/__pycache__/none.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/distutils/fcompiler/__pycache__/none.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/distutils/fcompiler/__pycache__/none.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/distutils/fcompiler/__pycache__/pathf95.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/distutils/fcompiler/__pycache__/pathf95.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/distutils/fcompiler/__pycache__/pathf95.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/distutils/fcompiler/__pycache__/pathf95.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/distutils/fcompiler/__pycache__/pg.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/distutils/fcompiler/__pycache__/pg.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/distutils/fcompiler/__pycache__/pg.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/distutils/fcompiler/__pycache__/pg.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/distutils/fcompiler/__pycache__/sun.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/distutils/fcompiler/__pycache__/sun.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/distutils/fcompiler/__pycache__/sun.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/distutils/fcompiler/__pycache__/sun.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/distutils/fcompiler/__pycache__/vast.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/distutils/fcompiler/__pycache__/vast.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/distutils/fcompiler/__pycache__/vast.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/distutils/fcompiler/__pycache__/vast.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/distutils/fcompiler/absoft.py b/venv.bak/lib/python3.7/site-packages/numpy/distutils/fcompiler/absoft.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/distutils/fcompiler/absoft.py rename to venv.bak/lib/python3.7/site-packages/numpy/distutils/fcompiler/absoft.py diff --git a/venv/lib/python3.7/site-packages/numpy/distutils/fcompiler/compaq.py b/venv.bak/lib/python3.7/site-packages/numpy/distutils/fcompiler/compaq.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/distutils/fcompiler/compaq.py rename to venv.bak/lib/python3.7/site-packages/numpy/distutils/fcompiler/compaq.py diff --git a/venv/lib/python3.7/site-packages/numpy/distutils/fcompiler/environment.py b/venv.bak/lib/python3.7/site-packages/numpy/distutils/fcompiler/environment.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/distutils/fcompiler/environment.py rename to venv.bak/lib/python3.7/site-packages/numpy/distutils/fcompiler/environment.py diff --git a/venv/lib/python3.7/site-packages/numpy/distutils/fcompiler/g95.py b/venv.bak/lib/python3.7/site-packages/numpy/distutils/fcompiler/g95.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/distutils/fcompiler/g95.py rename to venv.bak/lib/python3.7/site-packages/numpy/distutils/fcompiler/g95.py diff --git a/venv/lib/python3.7/site-packages/numpy/distutils/fcompiler/gnu.py b/venv.bak/lib/python3.7/site-packages/numpy/distutils/fcompiler/gnu.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/distutils/fcompiler/gnu.py rename to venv.bak/lib/python3.7/site-packages/numpy/distutils/fcompiler/gnu.py diff --git a/venv/lib/python3.7/site-packages/numpy/distutils/fcompiler/hpux.py b/venv.bak/lib/python3.7/site-packages/numpy/distutils/fcompiler/hpux.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/distutils/fcompiler/hpux.py rename to venv.bak/lib/python3.7/site-packages/numpy/distutils/fcompiler/hpux.py diff --git a/venv/lib/python3.7/site-packages/numpy/distutils/fcompiler/ibm.py b/venv.bak/lib/python3.7/site-packages/numpy/distutils/fcompiler/ibm.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/distutils/fcompiler/ibm.py rename to venv.bak/lib/python3.7/site-packages/numpy/distutils/fcompiler/ibm.py diff --git a/venv/lib/python3.7/site-packages/numpy/distutils/fcompiler/intel.py b/venv.bak/lib/python3.7/site-packages/numpy/distutils/fcompiler/intel.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/distutils/fcompiler/intel.py rename to venv.bak/lib/python3.7/site-packages/numpy/distutils/fcompiler/intel.py diff --git a/venv/lib/python3.7/site-packages/numpy/distutils/fcompiler/lahey.py b/venv.bak/lib/python3.7/site-packages/numpy/distutils/fcompiler/lahey.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/distutils/fcompiler/lahey.py rename to venv.bak/lib/python3.7/site-packages/numpy/distutils/fcompiler/lahey.py diff --git a/venv/lib/python3.7/site-packages/numpy/distutils/fcompiler/mips.py b/venv.bak/lib/python3.7/site-packages/numpy/distutils/fcompiler/mips.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/distutils/fcompiler/mips.py rename to venv.bak/lib/python3.7/site-packages/numpy/distutils/fcompiler/mips.py diff --git a/venv/lib/python3.7/site-packages/numpy/distutils/fcompiler/nag.py b/venv.bak/lib/python3.7/site-packages/numpy/distutils/fcompiler/nag.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/distutils/fcompiler/nag.py rename to venv.bak/lib/python3.7/site-packages/numpy/distutils/fcompiler/nag.py diff --git a/venv/lib/python3.7/site-packages/numpy/distutils/fcompiler/none.py b/venv.bak/lib/python3.7/site-packages/numpy/distutils/fcompiler/none.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/distutils/fcompiler/none.py rename to venv.bak/lib/python3.7/site-packages/numpy/distutils/fcompiler/none.py diff --git a/venv/lib/python3.7/site-packages/numpy/distutils/fcompiler/pathf95.py b/venv.bak/lib/python3.7/site-packages/numpy/distutils/fcompiler/pathf95.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/distutils/fcompiler/pathf95.py rename to venv.bak/lib/python3.7/site-packages/numpy/distutils/fcompiler/pathf95.py diff --git a/venv/lib/python3.7/site-packages/numpy/distutils/fcompiler/pg.py b/venv.bak/lib/python3.7/site-packages/numpy/distutils/fcompiler/pg.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/distutils/fcompiler/pg.py rename to venv.bak/lib/python3.7/site-packages/numpy/distutils/fcompiler/pg.py diff --git a/venv/lib/python3.7/site-packages/numpy/distutils/fcompiler/sun.py b/venv.bak/lib/python3.7/site-packages/numpy/distutils/fcompiler/sun.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/distutils/fcompiler/sun.py rename to venv.bak/lib/python3.7/site-packages/numpy/distutils/fcompiler/sun.py diff --git a/venv/lib/python3.7/site-packages/numpy/distutils/fcompiler/vast.py b/venv.bak/lib/python3.7/site-packages/numpy/distutils/fcompiler/vast.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/distutils/fcompiler/vast.py rename to venv.bak/lib/python3.7/site-packages/numpy/distutils/fcompiler/vast.py diff --git a/venv/lib/python3.7/site-packages/numpy/distutils/from_template.py b/venv.bak/lib/python3.7/site-packages/numpy/distutils/from_template.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/distutils/from_template.py rename to venv.bak/lib/python3.7/site-packages/numpy/distutils/from_template.py diff --git a/venv/lib/python3.7/site-packages/numpy/distutils/intelccompiler.py b/venv.bak/lib/python3.7/site-packages/numpy/distutils/intelccompiler.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/distutils/intelccompiler.py rename to venv.bak/lib/python3.7/site-packages/numpy/distutils/intelccompiler.py diff --git a/venv/lib/python3.7/site-packages/numpy/distutils/lib2def.py b/venv.bak/lib/python3.7/site-packages/numpy/distutils/lib2def.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/distutils/lib2def.py rename to venv.bak/lib/python3.7/site-packages/numpy/distutils/lib2def.py diff --git a/venv/lib/python3.7/site-packages/numpy/distutils/line_endings.py b/venv.bak/lib/python3.7/site-packages/numpy/distutils/line_endings.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/distutils/line_endings.py rename to venv.bak/lib/python3.7/site-packages/numpy/distutils/line_endings.py diff --git a/venv/lib/python3.7/site-packages/numpy/distutils/log.py b/venv.bak/lib/python3.7/site-packages/numpy/distutils/log.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/distutils/log.py rename to venv.bak/lib/python3.7/site-packages/numpy/distutils/log.py diff --git a/venv/lib/python3.7/site-packages/numpy/distutils/mingw/gfortran_vs2003_hack.c b/venv.bak/lib/python3.7/site-packages/numpy/distutils/mingw/gfortran_vs2003_hack.c similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/distutils/mingw/gfortran_vs2003_hack.c rename to venv.bak/lib/python3.7/site-packages/numpy/distutils/mingw/gfortran_vs2003_hack.c diff --git a/venv/lib/python3.7/site-packages/numpy/distutils/mingw32ccompiler.py b/venv.bak/lib/python3.7/site-packages/numpy/distutils/mingw32ccompiler.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/distutils/mingw32ccompiler.py rename to venv.bak/lib/python3.7/site-packages/numpy/distutils/mingw32ccompiler.py diff --git a/venv/lib/python3.7/site-packages/numpy/distutils/misc_util.py b/venv.bak/lib/python3.7/site-packages/numpy/distutils/misc_util.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/distutils/misc_util.py rename to venv.bak/lib/python3.7/site-packages/numpy/distutils/misc_util.py diff --git a/venv/lib/python3.7/site-packages/numpy/distutils/msvc9compiler.py b/venv.bak/lib/python3.7/site-packages/numpy/distutils/msvc9compiler.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/distutils/msvc9compiler.py rename to venv.bak/lib/python3.7/site-packages/numpy/distutils/msvc9compiler.py diff --git a/venv/lib/python3.7/site-packages/numpy/distutils/msvccompiler.py b/venv.bak/lib/python3.7/site-packages/numpy/distutils/msvccompiler.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/distutils/msvccompiler.py rename to venv.bak/lib/python3.7/site-packages/numpy/distutils/msvccompiler.py diff --git a/venv/lib/python3.7/site-packages/numpy/distutils/npy_pkg_config.py b/venv.bak/lib/python3.7/site-packages/numpy/distutils/npy_pkg_config.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/distutils/npy_pkg_config.py rename to venv.bak/lib/python3.7/site-packages/numpy/distutils/npy_pkg_config.py diff --git a/venv/lib/python3.7/site-packages/numpy/distutils/numpy_distribution.py b/venv.bak/lib/python3.7/site-packages/numpy/distutils/numpy_distribution.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/distutils/numpy_distribution.py rename to venv.bak/lib/python3.7/site-packages/numpy/distutils/numpy_distribution.py diff --git a/venv/lib/python3.7/site-packages/numpy/distutils/pathccompiler.py b/venv.bak/lib/python3.7/site-packages/numpy/distutils/pathccompiler.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/distutils/pathccompiler.py rename to venv.bak/lib/python3.7/site-packages/numpy/distutils/pathccompiler.py diff --git a/venv/lib/python3.7/site-packages/numpy/distutils/setup.py b/venv.bak/lib/python3.7/site-packages/numpy/distutils/setup.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/distutils/setup.py rename to venv.bak/lib/python3.7/site-packages/numpy/distutils/setup.py diff --git a/venv/lib/python3.7/site-packages/numpy/distutils/system_info.py b/venv.bak/lib/python3.7/site-packages/numpy/distutils/system_info.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/distutils/system_info.py rename to venv.bak/lib/python3.7/site-packages/numpy/distutils/system_info.py diff --git a/venv/lib/python3.7/site-packages/numpy/linalg/tests/__init__.py b/venv.bak/lib/python3.7/site-packages/numpy/distutils/tests/__init__.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/linalg/tests/__init__.py rename to venv.bak/lib/python3.7/site-packages/numpy/distutils/tests/__init__.py diff --git a/venv/lib/python3.7/site-packages/numpy/distutils/tests/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/distutils/tests/__pycache__/__init__.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/distutils/tests/__pycache__/__init__.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/distutils/tests/__pycache__/__init__.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/distutils/tests/__pycache__/test_exec_command.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/distutils/tests/__pycache__/test_exec_command.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/distutils/tests/__pycache__/test_exec_command.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/distutils/tests/__pycache__/test_exec_command.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/distutils/tests/__pycache__/test_fcompiler.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/distutils/tests/__pycache__/test_fcompiler.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/distutils/tests/__pycache__/test_fcompiler.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/distutils/tests/__pycache__/test_fcompiler.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/distutils/tests/__pycache__/test_fcompiler_gnu.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/distutils/tests/__pycache__/test_fcompiler_gnu.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/distutils/tests/__pycache__/test_fcompiler_gnu.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/distutils/tests/__pycache__/test_fcompiler_gnu.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/distutils/tests/__pycache__/test_fcompiler_intel.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/distutils/tests/__pycache__/test_fcompiler_intel.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/distutils/tests/__pycache__/test_fcompiler_intel.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/distutils/tests/__pycache__/test_fcompiler_intel.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/distutils/tests/__pycache__/test_fcompiler_nagfor.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/distutils/tests/__pycache__/test_fcompiler_nagfor.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/distutils/tests/__pycache__/test_fcompiler_nagfor.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/distutils/tests/__pycache__/test_fcompiler_nagfor.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/distutils/tests/__pycache__/test_from_template.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/distutils/tests/__pycache__/test_from_template.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/distutils/tests/__pycache__/test_from_template.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/distutils/tests/__pycache__/test_from_template.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/distutils/tests/__pycache__/test_mingw32ccompiler.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/distutils/tests/__pycache__/test_mingw32ccompiler.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/distutils/tests/__pycache__/test_mingw32ccompiler.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/distutils/tests/__pycache__/test_mingw32ccompiler.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/distutils/tests/__pycache__/test_misc_util.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/distutils/tests/__pycache__/test_misc_util.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/distutils/tests/__pycache__/test_misc_util.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/distutils/tests/__pycache__/test_misc_util.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/distutils/tests/__pycache__/test_npy_pkg_config.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/distutils/tests/__pycache__/test_npy_pkg_config.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/distutils/tests/__pycache__/test_npy_pkg_config.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/distutils/tests/__pycache__/test_npy_pkg_config.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/distutils/tests/__pycache__/test_shell_utils.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/distutils/tests/__pycache__/test_shell_utils.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/distutils/tests/__pycache__/test_shell_utils.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/distutils/tests/__pycache__/test_shell_utils.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/distutils/tests/__pycache__/test_system_info.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/distutils/tests/__pycache__/test_system_info.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/distutils/tests/__pycache__/test_system_info.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/distutils/tests/__pycache__/test_system_info.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/distutils/tests/test_exec_command.py b/venv.bak/lib/python3.7/site-packages/numpy/distutils/tests/test_exec_command.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/distutils/tests/test_exec_command.py rename to venv.bak/lib/python3.7/site-packages/numpy/distutils/tests/test_exec_command.py diff --git a/venv/lib/python3.7/site-packages/numpy/distutils/tests/test_fcompiler.py b/venv.bak/lib/python3.7/site-packages/numpy/distutils/tests/test_fcompiler.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/distutils/tests/test_fcompiler.py rename to venv.bak/lib/python3.7/site-packages/numpy/distutils/tests/test_fcompiler.py diff --git a/venv/lib/python3.7/site-packages/numpy/distutils/tests/test_fcompiler_gnu.py b/venv.bak/lib/python3.7/site-packages/numpy/distutils/tests/test_fcompiler_gnu.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/distutils/tests/test_fcompiler_gnu.py rename to venv.bak/lib/python3.7/site-packages/numpy/distutils/tests/test_fcompiler_gnu.py diff --git a/venv/lib/python3.7/site-packages/numpy/distutils/tests/test_fcompiler_intel.py b/venv.bak/lib/python3.7/site-packages/numpy/distutils/tests/test_fcompiler_intel.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/distutils/tests/test_fcompiler_intel.py rename to venv.bak/lib/python3.7/site-packages/numpy/distutils/tests/test_fcompiler_intel.py diff --git a/venv/lib/python3.7/site-packages/numpy/distutils/tests/test_fcompiler_nagfor.py b/venv.bak/lib/python3.7/site-packages/numpy/distutils/tests/test_fcompiler_nagfor.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/distutils/tests/test_fcompiler_nagfor.py rename to venv.bak/lib/python3.7/site-packages/numpy/distutils/tests/test_fcompiler_nagfor.py diff --git a/venv/lib/python3.7/site-packages/numpy/distutils/tests/test_from_template.py b/venv.bak/lib/python3.7/site-packages/numpy/distutils/tests/test_from_template.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/distutils/tests/test_from_template.py rename to venv.bak/lib/python3.7/site-packages/numpy/distutils/tests/test_from_template.py diff --git a/venv/lib/python3.7/site-packages/numpy/distutils/tests/test_mingw32ccompiler.py b/venv.bak/lib/python3.7/site-packages/numpy/distutils/tests/test_mingw32ccompiler.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/distutils/tests/test_mingw32ccompiler.py rename to venv.bak/lib/python3.7/site-packages/numpy/distutils/tests/test_mingw32ccompiler.py diff --git a/venv/lib/python3.7/site-packages/numpy/distutils/tests/test_misc_util.py b/venv.bak/lib/python3.7/site-packages/numpy/distutils/tests/test_misc_util.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/distutils/tests/test_misc_util.py rename to venv.bak/lib/python3.7/site-packages/numpy/distutils/tests/test_misc_util.py diff --git a/venv/lib/python3.7/site-packages/numpy/distutils/tests/test_npy_pkg_config.py b/venv.bak/lib/python3.7/site-packages/numpy/distutils/tests/test_npy_pkg_config.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/distutils/tests/test_npy_pkg_config.py rename to venv.bak/lib/python3.7/site-packages/numpy/distutils/tests/test_npy_pkg_config.py diff --git a/venv/lib/python3.7/site-packages/numpy/distutils/tests/test_shell_utils.py b/venv.bak/lib/python3.7/site-packages/numpy/distutils/tests/test_shell_utils.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/distutils/tests/test_shell_utils.py rename to venv.bak/lib/python3.7/site-packages/numpy/distutils/tests/test_shell_utils.py diff --git a/venv/lib/python3.7/site-packages/numpy/distutils/tests/test_system_info.py b/venv.bak/lib/python3.7/site-packages/numpy/distutils/tests/test_system_info.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/distutils/tests/test_system_info.py rename to venv.bak/lib/python3.7/site-packages/numpy/distutils/tests/test_system_info.py diff --git a/venv/lib/python3.7/site-packages/numpy/distutils/unixccompiler.py b/venv.bak/lib/python3.7/site-packages/numpy/distutils/unixccompiler.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/distutils/unixccompiler.py rename to venv.bak/lib/python3.7/site-packages/numpy/distutils/unixccompiler.py diff --git a/venv/lib/python3.7/site-packages/numpy/doc/__init__.py b/venv.bak/lib/python3.7/site-packages/numpy/doc/__init__.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/doc/__init__.py rename to venv.bak/lib/python3.7/site-packages/numpy/doc/__init__.py diff --git a/venv/lib/python3.7/site-packages/numpy/doc/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/doc/__pycache__/__init__.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/doc/__pycache__/__init__.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/doc/__pycache__/__init__.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/doc/__pycache__/basics.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/doc/__pycache__/basics.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/doc/__pycache__/basics.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/doc/__pycache__/basics.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/doc/__pycache__/broadcasting.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/doc/__pycache__/broadcasting.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/doc/__pycache__/broadcasting.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/doc/__pycache__/broadcasting.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/doc/__pycache__/byteswapping.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/doc/__pycache__/byteswapping.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/doc/__pycache__/byteswapping.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/doc/__pycache__/byteswapping.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/doc/__pycache__/constants.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/doc/__pycache__/constants.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/doc/__pycache__/constants.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/doc/__pycache__/constants.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/doc/__pycache__/creation.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/doc/__pycache__/creation.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/doc/__pycache__/creation.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/doc/__pycache__/creation.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/doc/__pycache__/dispatch.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/doc/__pycache__/dispatch.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/doc/__pycache__/dispatch.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/doc/__pycache__/dispatch.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/doc/__pycache__/glossary.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/doc/__pycache__/glossary.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/doc/__pycache__/glossary.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/doc/__pycache__/glossary.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/doc/__pycache__/indexing.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/doc/__pycache__/indexing.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/doc/__pycache__/indexing.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/doc/__pycache__/indexing.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/doc/__pycache__/internals.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/doc/__pycache__/internals.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/doc/__pycache__/internals.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/doc/__pycache__/internals.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/doc/__pycache__/misc.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/doc/__pycache__/misc.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/doc/__pycache__/misc.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/doc/__pycache__/misc.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/doc/__pycache__/structured_arrays.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/doc/__pycache__/structured_arrays.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/doc/__pycache__/structured_arrays.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/doc/__pycache__/structured_arrays.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/doc/__pycache__/subclassing.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/doc/__pycache__/subclassing.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/doc/__pycache__/subclassing.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/doc/__pycache__/subclassing.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/doc/__pycache__/ufuncs.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/doc/__pycache__/ufuncs.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/doc/__pycache__/ufuncs.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/doc/__pycache__/ufuncs.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/doc/basics.py b/venv.bak/lib/python3.7/site-packages/numpy/doc/basics.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/doc/basics.py rename to venv.bak/lib/python3.7/site-packages/numpy/doc/basics.py diff --git a/venv/lib/python3.7/site-packages/numpy/doc/broadcasting.py b/venv.bak/lib/python3.7/site-packages/numpy/doc/broadcasting.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/doc/broadcasting.py rename to venv.bak/lib/python3.7/site-packages/numpy/doc/broadcasting.py diff --git a/venv/lib/python3.7/site-packages/numpy/doc/byteswapping.py b/venv.bak/lib/python3.7/site-packages/numpy/doc/byteswapping.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/doc/byteswapping.py rename to venv.bak/lib/python3.7/site-packages/numpy/doc/byteswapping.py diff --git a/venv/lib/python3.7/site-packages/numpy/doc/constants.py b/venv.bak/lib/python3.7/site-packages/numpy/doc/constants.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/doc/constants.py rename to venv.bak/lib/python3.7/site-packages/numpy/doc/constants.py diff --git a/venv/lib/python3.7/site-packages/numpy/doc/creation.py b/venv.bak/lib/python3.7/site-packages/numpy/doc/creation.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/doc/creation.py rename to venv.bak/lib/python3.7/site-packages/numpy/doc/creation.py diff --git a/venv/lib/python3.7/site-packages/numpy/doc/dispatch.py b/venv.bak/lib/python3.7/site-packages/numpy/doc/dispatch.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/doc/dispatch.py rename to venv.bak/lib/python3.7/site-packages/numpy/doc/dispatch.py diff --git a/venv/lib/python3.7/site-packages/numpy/doc/glossary.py b/venv.bak/lib/python3.7/site-packages/numpy/doc/glossary.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/doc/glossary.py rename to venv.bak/lib/python3.7/site-packages/numpy/doc/glossary.py diff --git a/venv/lib/python3.7/site-packages/numpy/doc/indexing.py b/venv.bak/lib/python3.7/site-packages/numpy/doc/indexing.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/doc/indexing.py rename to venv.bak/lib/python3.7/site-packages/numpy/doc/indexing.py diff --git a/venv/lib/python3.7/site-packages/numpy/doc/internals.py b/venv.bak/lib/python3.7/site-packages/numpy/doc/internals.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/doc/internals.py rename to venv.bak/lib/python3.7/site-packages/numpy/doc/internals.py diff --git a/venv/lib/python3.7/site-packages/numpy/doc/misc.py b/venv.bak/lib/python3.7/site-packages/numpy/doc/misc.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/doc/misc.py rename to venv.bak/lib/python3.7/site-packages/numpy/doc/misc.py diff --git a/venv/lib/python3.7/site-packages/numpy/doc/structured_arrays.py b/venv.bak/lib/python3.7/site-packages/numpy/doc/structured_arrays.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/doc/structured_arrays.py rename to venv.bak/lib/python3.7/site-packages/numpy/doc/structured_arrays.py diff --git a/venv/lib/python3.7/site-packages/numpy/doc/subclassing.py b/venv.bak/lib/python3.7/site-packages/numpy/doc/subclassing.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/doc/subclassing.py rename to venv.bak/lib/python3.7/site-packages/numpy/doc/subclassing.py diff --git a/venv/lib/python3.7/site-packages/numpy/doc/ufuncs.py b/venv.bak/lib/python3.7/site-packages/numpy/doc/ufuncs.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/doc/ufuncs.py rename to venv.bak/lib/python3.7/site-packages/numpy/doc/ufuncs.py diff --git a/venv/lib/python3.7/site-packages/numpy/dual.py b/venv.bak/lib/python3.7/site-packages/numpy/dual.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/dual.py rename to venv.bak/lib/python3.7/site-packages/numpy/dual.py diff --git a/venv/lib/python3.7/site-packages/numpy/f2py/__init__.py b/venv.bak/lib/python3.7/site-packages/numpy/f2py/__init__.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/f2py/__init__.py rename to venv.bak/lib/python3.7/site-packages/numpy/f2py/__init__.py diff --git a/venv/lib/python3.7/site-packages/numpy/f2py/__main__.py b/venv.bak/lib/python3.7/site-packages/numpy/f2py/__main__.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/f2py/__main__.py rename to venv.bak/lib/python3.7/site-packages/numpy/f2py/__main__.py diff --git a/venv/lib/python3.7/site-packages/numpy/f2py/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/f2py/__pycache__/__init__.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/f2py/__pycache__/__init__.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/f2py/__pycache__/__init__.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/f2py/__pycache__/__main__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/f2py/__pycache__/__main__.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/f2py/__pycache__/__main__.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/f2py/__pycache__/__main__.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/f2py/__pycache__/__version__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/f2py/__pycache__/__version__.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/f2py/__pycache__/__version__.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/f2py/__pycache__/__version__.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/f2py/__pycache__/auxfuncs.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/f2py/__pycache__/auxfuncs.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/f2py/__pycache__/auxfuncs.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/f2py/__pycache__/auxfuncs.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/f2py/__pycache__/capi_maps.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/f2py/__pycache__/capi_maps.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/f2py/__pycache__/capi_maps.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/f2py/__pycache__/capi_maps.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/f2py/__pycache__/cb_rules.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/f2py/__pycache__/cb_rules.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/f2py/__pycache__/cb_rules.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/f2py/__pycache__/cb_rules.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/f2py/__pycache__/cfuncs.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/f2py/__pycache__/cfuncs.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/f2py/__pycache__/cfuncs.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/f2py/__pycache__/cfuncs.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/f2py/__pycache__/common_rules.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/f2py/__pycache__/common_rules.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/f2py/__pycache__/common_rules.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/f2py/__pycache__/common_rules.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/f2py/__pycache__/crackfortran.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/f2py/__pycache__/crackfortran.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/f2py/__pycache__/crackfortran.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/f2py/__pycache__/crackfortran.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/f2py/__pycache__/diagnose.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/f2py/__pycache__/diagnose.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/f2py/__pycache__/diagnose.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/f2py/__pycache__/diagnose.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/f2py/__pycache__/f2py2e.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/f2py/__pycache__/f2py2e.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/f2py/__pycache__/f2py2e.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/f2py/__pycache__/f2py2e.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/f2py/__pycache__/f2py_testing.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/f2py/__pycache__/f2py_testing.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/f2py/__pycache__/f2py_testing.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/f2py/__pycache__/f2py_testing.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/f2py/__pycache__/f90mod_rules.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/f2py/__pycache__/f90mod_rules.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/f2py/__pycache__/f90mod_rules.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/f2py/__pycache__/f90mod_rules.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/f2py/__pycache__/func2subr.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/f2py/__pycache__/func2subr.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/f2py/__pycache__/func2subr.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/f2py/__pycache__/func2subr.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/f2py/__pycache__/rules.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/f2py/__pycache__/rules.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/f2py/__pycache__/rules.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/f2py/__pycache__/rules.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/f2py/__pycache__/setup.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/f2py/__pycache__/setup.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/f2py/__pycache__/setup.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/f2py/__pycache__/setup.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/f2py/__pycache__/use_rules.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/f2py/__pycache__/use_rules.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/f2py/__pycache__/use_rules.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/f2py/__pycache__/use_rules.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/f2py/__version__.py b/venv.bak/lib/python3.7/site-packages/numpy/f2py/__version__.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/f2py/__version__.py rename to venv.bak/lib/python3.7/site-packages/numpy/f2py/__version__.py diff --git a/venv/lib/python3.7/site-packages/numpy/f2py/auxfuncs.py b/venv.bak/lib/python3.7/site-packages/numpy/f2py/auxfuncs.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/f2py/auxfuncs.py rename to venv.bak/lib/python3.7/site-packages/numpy/f2py/auxfuncs.py diff --git a/venv/lib/python3.7/site-packages/numpy/f2py/capi_maps.py b/venv.bak/lib/python3.7/site-packages/numpy/f2py/capi_maps.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/f2py/capi_maps.py rename to venv.bak/lib/python3.7/site-packages/numpy/f2py/capi_maps.py diff --git a/venv/lib/python3.7/site-packages/numpy/f2py/cb_rules.py b/venv.bak/lib/python3.7/site-packages/numpy/f2py/cb_rules.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/f2py/cb_rules.py rename to venv.bak/lib/python3.7/site-packages/numpy/f2py/cb_rules.py diff --git a/venv/lib/python3.7/site-packages/numpy/f2py/cfuncs.py b/venv.bak/lib/python3.7/site-packages/numpy/f2py/cfuncs.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/f2py/cfuncs.py rename to venv.bak/lib/python3.7/site-packages/numpy/f2py/cfuncs.py diff --git a/venv/lib/python3.7/site-packages/numpy/f2py/common_rules.py b/venv.bak/lib/python3.7/site-packages/numpy/f2py/common_rules.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/f2py/common_rules.py rename to venv.bak/lib/python3.7/site-packages/numpy/f2py/common_rules.py diff --git a/venv/lib/python3.7/site-packages/numpy/f2py/crackfortran.py b/venv.bak/lib/python3.7/site-packages/numpy/f2py/crackfortran.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/f2py/crackfortran.py rename to venv.bak/lib/python3.7/site-packages/numpy/f2py/crackfortran.py diff --git a/venv/lib/python3.7/site-packages/numpy/f2py/diagnose.py b/venv.bak/lib/python3.7/site-packages/numpy/f2py/diagnose.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/f2py/diagnose.py rename to venv.bak/lib/python3.7/site-packages/numpy/f2py/diagnose.py diff --git a/venv/lib/python3.7/site-packages/numpy/f2py/f2py2e.py b/venv.bak/lib/python3.7/site-packages/numpy/f2py/f2py2e.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/f2py/f2py2e.py rename to venv.bak/lib/python3.7/site-packages/numpy/f2py/f2py2e.py diff --git a/venv/lib/python3.7/site-packages/numpy/f2py/f2py_testing.py b/venv.bak/lib/python3.7/site-packages/numpy/f2py/f2py_testing.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/f2py/f2py_testing.py rename to venv.bak/lib/python3.7/site-packages/numpy/f2py/f2py_testing.py diff --git a/venv/lib/python3.7/site-packages/numpy/f2py/f90mod_rules.py b/venv.bak/lib/python3.7/site-packages/numpy/f2py/f90mod_rules.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/f2py/f90mod_rules.py rename to venv.bak/lib/python3.7/site-packages/numpy/f2py/f90mod_rules.py diff --git a/venv/lib/python3.7/site-packages/numpy/f2py/func2subr.py b/venv.bak/lib/python3.7/site-packages/numpy/f2py/func2subr.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/f2py/func2subr.py rename to venv.bak/lib/python3.7/site-packages/numpy/f2py/func2subr.py diff --git a/venv/lib/python3.7/site-packages/numpy/f2py/rules.py b/venv.bak/lib/python3.7/site-packages/numpy/f2py/rules.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/f2py/rules.py rename to venv.bak/lib/python3.7/site-packages/numpy/f2py/rules.py diff --git a/venv/lib/python3.7/site-packages/numpy/f2py/setup.py b/venv.bak/lib/python3.7/site-packages/numpy/f2py/setup.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/f2py/setup.py rename to venv.bak/lib/python3.7/site-packages/numpy/f2py/setup.py diff --git a/venv/lib/python3.7/site-packages/numpy/f2py/src/fortranobject.c b/venv.bak/lib/python3.7/site-packages/numpy/f2py/src/fortranobject.c similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/f2py/src/fortranobject.c rename to venv.bak/lib/python3.7/site-packages/numpy/f2py/src/fortranobject.c diff --git a/venv/lib/python3.7/site-packages/numpy/f2py/src/fortranobject.h b/venv.bak/lib/python3.7/site-packages/numpy/f2py/src/fortranobject.h similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/f2py/src/fortranobject.h rename to venv.bak/lib/python3.7/site-packages/numpy/f2py/src/fortranobject.h diff --git a/venv/lib/python3.7/site-packages/numpy/ma/tests/__init__.py b/venv.bak/lib/python3.7/site-packages/numpy/f2py/tests/__init__.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/ma/tests/__init__.py rename to venv.bak/lib/python3.7/site-packages/numpy/f2py/tests/__init__.py diff --git a/venv/lib/python3.7/site-packages/numpy/f2py/tests/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/f2py/tests/__pycache__/__init__.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/f2py/tests/__pycache__/__init__.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/f2py/tests/__pycache__/__init__.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/f2py/tests/__pycache__/test_array_from_pyobj.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/f2py/tests/__pycache__/test_array_from_pyobj.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/f2py/tests/__pycache__/test_array_from_pyobj.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/f2py/tests/__pycache__/test_array_from_pyobj.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/f2py/tests/__pycache__/test_assumed_shape.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/f2py/tests/__pycache__/test_assumed_shape.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/f2py/tests/__pycache__/test_assumed_shape.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/f2py/tests/__pycache__/test_assumed_shape.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/f2py/tests/__pycache__/test_block_docstring.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/f2py/tests/__pycache__/test_block_docstring.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/f2py/tests/__pycache__/test_block_docstring.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/f2py/tests/__pycache__/test_block_docstring.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/f2py/tests/__pycache__/test_callback.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/f2py/tests/__pycache__/test_callback.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/f2py/tests/__pycache__/test_callback.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/f2py/tests/__pycache__/test_callback.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/f2py/tests/__pycache__/test_common.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/f2py/tests/__pycache__/test_common.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/f2py/tests/__pycache__/test_common.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/f2py/tests/__pycache__/test_common.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/f2py/tests/__pycache__/test_compile_function.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/f2py/tests/__pycache__/test_compile_function.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/f2py/tests/__pycache__/test_compile_function.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/f2py/tests/__pycache__/test_compile_function.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/f2py/tests/__pycache__/test_kind.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/f2py/tests/__pycache__/test_kind.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/f2py/tests/__pycache__/test_kind.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/f2py/tests/__pycache__/test_kind.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/f2py/tests/__pycache__/test_mixed.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/f2py/tests/__pycache__/test_mixed.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/f2py/tests/__pycache__/test_mixed.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/f2py/tests/__pycache__/test_mixed.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/f2py/tests/__pycache__/test_parameter.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/f2py/tests/__pycache__/test_parameter.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/f2py/tests/__pycache__/test_parameter.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/f2py/tests/__pycache__/test_parameter.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/f2py/tests/__pycache__/test_quoted_character.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/f2py/tests/__pycache__/test_quoted_character.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/f2py/tests/__pycache__/test_quoted_character.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/f2py/tests/__pycache__/test_quoted_character.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/f2py/tests/__pycache__/test_regression.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/f2py/tests/__pycache__/test_regression.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/f2py/tests/__pycache__/test_regression.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/f2py/tests/__pycache__/test_regression.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/f2py/tests/__pycache__/test_return_character.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/f2py/tests/__pycache__/test_return_character.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/f2py/tests/__pycache__/test_return_character.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/f2py/tests/__pycache__/test_return_character.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/f2py/tests/__pycache__/test_return_complex.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/f2py/tests/__pycache__/test_return_complex.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/f2py/tests/__pycache__/test_return_complex.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/f2py/tests/__pycache__/test_return_complex.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/f2py/tests/__pycache__/test_return_integer.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/f2py/tests/__pycache__/test_return_integer.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/f2py/tests/__pycache__/test_return_integer.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/f2py/tests/__pycache__/test_return_integer.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/f2py/tests/__pycache__/test_return_logical.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/f2py/tests/__pycache__/test_return_logical.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/f2py/tests/__pycache__/test_return_logical.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/f2py/tests/__pycache__/test_return_logical.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/f2py/tests/__pycache__/test_return_real.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/f2py/tests/__pycache__/test_return_real.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/f2py/tests/__pycache__/test_return_real.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/f2py/tests/__pycache__/test_return_real.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/f2py/tests/__pycache__/test_semicolon_split.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/f2py/tests/__pycache__/test_semicolon_split.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/f2py/tests/__pycache__/test_semicolon_split.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/f2py/tests/__pycache__/test_semicolon_split.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/f2py/tests/__pycache__/test_size.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/f2py/tests/__pycache__/test_size.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/f2py/tests/__pycache__/test_size.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/f2py/tests/__pycache__/test_size.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/f2py/tests/__pycache__/test_string.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/f2py/tests/__pycache__/test_string.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/f2py/tests/__pycache__/test_string.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/f2py/tests/__pycache__/test_string.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/f2py/tests/__pycache__/util.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/f2py/tests/__pycache__/util.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/f2py/tests/__pycache__/util.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/f2py/tests/__pycache__/util.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/f2py/tests/src/array_from_pyobj/wrapmodule.c b/venv.bak/lib/python3.7/site-packages/numpy/f2py/tests/src/array_from_pyobj/wrapmodule.c similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/f2py/tests/src/array_from_pyobj/wrapmodule.c rename to venv.bak/lib/python3.7/site-packages/numpy/f2py/tests/src/array_from_pyobj/wrapmodule.c diff --git a/venv/lib/python3.7/site-packages/numpy/f2py/tests/src/assumed_shape/.f2py_f2cmap b/venv.bak/lib/python3.7/site-packages/numpy/f2py/tests/src/assumed_shape/.f2py_f2cmap similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/f2py/tests/src/assumed_shape/.f2py_f2cmap rename to venv.bak/lib/python3.7/site-packages/numpy/f2py/tests/src/assumed_shape/.f2py_f2cmap diff --git a/venv/lib/python3.7/site-packages/numpy/f2py/tests/src/assumed_shape/foo_free.f90 b/venv.bak/lib/python3.7/site-packages/numpy/f2py/tests/src/assumed_shape/foo_free.f90 similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/f2py/tests/src/assumed_shape/foo_free.f90 rename to venv.bak/lib/python3.7/site-packages/numpy/f2py/tests/src/assumed_shape/foo_free.f90 diff --git a/venv/lib/python3.7/site-packages/numpy/f2py/tests/src/assumed_shape/foo_mod.f90 b/venv.bak/lib/python3.7/site-packages/numpy/f2py/tests/src/assumed_shape/foo_mod.f90 similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/f2py/tests/src/assumed_shape/foo_mod.f90 rename to venv.bak/lib/python3.7/site-packages/numpy/f2py/tests/src/assumed_shape/foo_mod.f90 diff --git a/venv/lib/python3.7/site-packages/numpy/f2py/tests/src/assumed_shape/foo_use.f90 b/venv.bak/lib/python3.7/site-packages/numpy/f2py/tests/src/assumed_shape/foo_use.f90 similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/f2py/tests/src/assumed_shape/foo_use.f90 rename to venv.bak/lib/python3.7/site-packages/numpy/f2py/tests/src/assumed_shape/foo_use.f90 diff --git a/venv/lib/python3.7/site-packages/numpy/f2py/tests/src/assumed_shape/precision.f90 b/venv.bak/lib/python3.7/site-packages/numpy/f2py/tests/src/assumed_shape/precision.f90 similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/f2py/tests/src/assumed_shape/precision.f90 rename to venv.bak/lib/python3.7/site-packages/numpy/f2py/tests/src/assumed_shape/precision.f90 diff --git a/venv/lib/python3.7/site-packages/numpy/f2py/tests/src/common/block.f b/venv.bak/lib/python3.7/site-packages/numpy/f2py/tests/src/common/block.f similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/f2py/tests/src/common/block.f rename to venv.bak/lib/python3.7/site-packages/numpy/f2py/tests/src/common/block.f diff --git a/venv/lib/python3.7/site-packages/numpy/f2py/tests/src/kind/foo.f90 b/venv.bak/lib/python3.7/site-packages/numpy/f2py/tests/src/kind/foo.f90 similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/f2py/tests/src/kind/foo.f90 rename to venv.bak/lib/python3.7/site-packages/numpy/f2py/tests/src/kind/foo.f90 diff --git a/venv/lib/python3.7/site-packages/numpy/f2py/tests/src/mixed/foo.f b/venv.bak/lib/python3.7/site-packages/numpy/f2py/tests/src/mixed/foo.f similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/f2py/tests/src/mixed/foo.f rename to venv.bak/lib/python3.7/site-packages/numpy/f2py/tests/src/mixed/foo.f diff --git a/venv/lib/python3.7/site-packages/numpy/f2py/tests/src/mixed/foo_fixed.f90 b/venv.bak/lib/python3.7/site-packages/numpy/f2py/tests/src/mixed/foo_fixed.f90 similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/f2py/tests/src/mixed/foo_fixed.f90 rename to venv.bak/lib/python3.7/site-packages/numpy/f2py/tests/src/mixed/foo_fixed.f90 diff --git a/venv/lib/python3.7/site-packages/numpy/f2py/tests/src/mixed/foo_free.f90 b/venv.bak/lib/python3.7/site-packages/numpy/f2py/tests/src/mixed/foo_free.f90 similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/f2py/tests/src/mixed/foo_free.f90 rename to venv.bak/lib/python3.7/site-packages/numpy/f2py/tests/src/mixed/foo_free.f90 diff --git a/venv/lib/python3.7/site-packages/numpy/f2py/tests/src/parameter/constant_both.f90 b/venv.bak/lib/python3.7/site-packages/numpy/f2py/tests/src/parameter/constant_both.f90 similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/f2py/tests/src/parameter/constant_both.f90 rename to venv.bak/lib/python3.7/site-packages/numpy/f2py/tests/src/parameter/constant_both.f90 diff --git a/venv/lib/python3.7/site-packages/numpy/f2py/tests/src/parameter/constant_compound.f90 b/venv.bak/lib/python3.7/site-packages/numpy/f2py/tests/src/parameter/constant_compound.f90 similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/f2py/tests/src/parameter/constant_compound.f90 rename to venv.bak/lib/python3.7/site-packages/numpy/f2py/tests/src/parameter/constant_compound.f90 diff --git a/venv/lib/python3.7/site-packages/numpy/f2py/tests/src/parameter/constant_integer.f90 b/venv.bak/lib/python3.7/site-packages/numpy/f2py/tests/src/parameter/constant_integer.f90 similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/f2py/tests/src/parameter/constant_integer.f90 rename to venv.bak/lib/python3.7/site-packages/numpy/f2py/tests/src/parameter/constant_integer.f90 diff --git a/venv/lib/python3.7/site-packages/numpy/f2py/tests/src/parameter/constant_non_compound.f90 b/venv.bak/lib/python3.7/site-packages/numpy/f2py/tests/src/parameter/constant_non_compound.f90 similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/f2py/tests/src/parameter/constant_non_compound.f90 rename to venv.bak/lib/python3.7/site-packages/numpy/f2py/tests/src/parameter/constant_non_compound.f90 diff --git a/venv/lib/python3.7/site-packages/numpy/f2py/tests/src/parameter/constant_real.f90 b/venv.bak/lib/python3.7/site-packages/numpy/f2py/tests/src/parameter/constant_real.f90 similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/f2py/tests/src/parameter/constant_real.f90 rename to venv.bak/lib/python3.7/site-packages/numpy/f2py/tests/src/parameter/constant_real.f90 diff --git a/venv/lib/python3.7/site-packages/numpy/f2py/tests/src/regression/inout.f90 b/venv.bak/lib/python3.7/site-packages/numpy/f2py/tests/src/regression/inout.f90 similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/f2py/tests/src/regression/inout.f90 rename to venv.bak/lib/python3.7/site-packages/numpy/f2py/tests/src/regression/inout.f90 diff --git a/venv/lib/python3.7/site-packages/numpy/f2py/tests/src/size/foo.f90 b/venv.bak/lib/python3.7/site-packages/numpy/f2py/tests/src/size/foo.f90 similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/f2py/tests/src/size/foo.f90 rename to venv.bak/lib/python3.7/site-packages/numpy/f2py/tests/src/size/foo.f90 diff --git a/venv/lib/python3.7/site-packages/numpy/f2py/tests/src/string/char.f90 b/venv.bak/lib/python3.7/site-packages/numpy/f2py/tests/src/string/char.f90 similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/f2py/tests/src/string/char.f90 rename to venv.bak/lib/python3.7/site-packages/numpy/f2py/tests/src/string/char.f90 diff --git a/venv/lib/python3.7/site-packages/numpy/f2py/tests/test_array_from_pyobj.py b/venv.bak/lib/python3.7/site-packages/numpy/f2py/tests/test_array_from_pyobj.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/f2py/tests/test_array_from_pyobj.py rename to venv.bak/lib/python3.7/site-packages/numpy/f2py/tests/test_array_from_pyobj.py diff --git a/venv/lib/python3.7/site-packages/numpy/f2py/tests/test_assumed_shape.py b/venv.bak/lib/python3.7/site-packages/numpy/f2py/tests/test_assumed_shape.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/f2py/tests/test_assumed_shape.py rename to venv.bak/lib/python3.7/site-packages/numpy/f2py/tests/test_assumed_shape.py diff --git a/venv/lib/python3.7/site-packages/numpy/f2py/tests/test_block_docstring.py b/venv.bak/lib/python3.7/site-packages/numpy/f2py/tests/test_block_docstring.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/f2py/tests/test_block_docstring.py rename to venv.bak/lib/python3.7/site-packages/numpy/f2py/tests/test_block_docstring.py diff --git a/venv/lib/python3.7/site-packages/numpy/f2py/tests/test_callback.py b/venv.bak/lib/python3.7/site-packages/numpy/f2py/tests/test_callback.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/f2py/tests/test_callback.py rename to venv.bak/lib/python3.7/site-packages/numpy/f2py/tests/test_callback.py diff --git a/venv/lib/python3.7/site-packages/numpy/f2py/tests/test_common.py b/venv.bak/lib/python3.7/site-packages/numpy/f2py/tests/test_common.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/f2py/tests/test_common.py rename to venv.bak/lib/python3.7/site-packages/numpy/f2py/tests/test_common.py diff --git a/venv/lib/python3.7/site-packages/numpy/f2py/tests/test_compile_function.py b/venv.bak/lib/python3.7/site-packages/numpy/f2py/tests/test_compile_function.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/f2py/tests/test_compile_function.py rename to venv.bak/lib/python3.7/site-packages/numpy/f2py/tests/test_compile_function.py diff --git a/venv/lib/python3.7/site-packages/numpy/f2py/tests/test_kind.py b/venv.bak/lib/python3.7/site-packages/numpy/f2py/tests/test_kind.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/f2py/tests/test_kind.py rename to venv.bak/lib/python3.7/site-packages/numpy/f2py/tests/test_kind.py diff --git a/venv/lib/python3.7/site-packages/numpy/f2py/tests/test_mixed.py b/venv.bak/lib/python3.7/site-packages/numpy/f2py/tests/test_mixed.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/f2py/tests/test_mixed.py rename to venv.bak/lib/python3.7/site-packages/numpy/f2py/tests/test_mixed.py diff --git a/venv/lib/python3.7/site-packages/numpy/f2py/tests/test_parameter.py b/venv.bak/lib/python3.7/site-packages/numpy/f2py/tests/test_parameter.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/f2py/tests/test_parameter.py rename to venv.bak/lib/python3.7/site-packages/numpy/f2py/tests/test_parameter.py diff --git a/venv/lib/python3.7/site-packages/numpy/f2py/tests/test_quoted_character.py b/venv.bak/lib/python3.7/site-packages/numpy/f2py/tests/test_quoted_character.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/f2py/tests/test_quoted_character.py rename to venv.bak/lib/python3.7/site-packages/numpy/f2py/tests/test_quoted_character.py diff --git a/venv/lib/python3.7/site-packages/numpy/f2py/tests/test_regression.py b/venv.bak/lib/python3.7/site-packages/numpy/f2py/tests/test_regression.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/f2py/tests/test_regression.py rename to venv.bak/lib/python3.7/site-packages/numpy/f2py/tests/test_regression.py diff --git a/venv/lib/python3.7/site-packages/numpy/f2py/tests/test_return_character.py b/venv.bak/lib/python3.7/site-packages/numpy/f2py/tests/test_return_character.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/f2py/tests/test_return_character.py rename to venv.bak/lib/python3.7/site-packages/numpy/f2py/tests/test_return_character.py diff --git a/venv/lib/python3.7/site-packages/numpy/f2py/tests/test_return_complex.py b/venv.bak/lib/python3.7/site-packages/numpy/f2py/tests/test_return_complex.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/f2py/tests/test_return_complex.py rename to venv.bak/lib/python3.7/site-packages/numpy/f2py/tests/test_return_complex.py diff --git a/venv/lib/python3.7/site-packages/numpy/f2py/tests/test_return_integer.py b/venv.bak/lib/python3.7/site-packages/numpy/f2py/tests/test_return_integer.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/f2py/tests/test_return_integer.py rename to venv.bak/lib/python3.7/site-packages/numpy/f2py/tests/test_return_integer.py diff --git a/venv/lib/python3.7/site-packages/numpy/f2py/tests/test_return_logical.py b/venv.bak/lib/python3.7/site-packages/numpy/f2py/tests/test_return_logical.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/f2py/tests/test_return_logical.py rename to venv.bak/lib/python3.7/site-packages/numpy/f2py/tests/test_return_logical.py diff --git a/venv/lib/python3.7/site-packages/numpy/f2py/tests/test_return_real.py b/venv.bak/lib/python3.7/site-packages/numpy/f2py/tests/test_return_real.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/f2py/tests/test_return_real.py rename to venv.bak/lib/python3.7/site-packages/numpy/f2py/tests/test_return_real.py diff --git a/venv/lib/python3.7/site-packages/numpy/f2py/tests/test_semicolon_split.py b/venv.bak/lib/python3.7/site-packages/numpy/f2py/tests/test_semicolon_split.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/f2py/tests/test_semicolon_split.py rename to venv.bak/lib/python3.7/site-packages/numpy/f2py/tests/test_semicolon_split.py diff --git a/venv/lib/python3.7/site-packages/numpy/f2py/tests/test_size.py b/venv.bak/lib/python3.7/site-packages/numpy/f2py/tests/test_size.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/f2py/tests/test_size.py rename to venv.bak/lib/python3.7/site-packages/numpy/f2py/tests/test_size.py diff --git a/venv/lib/python3.7/site-packages/numpy/f2py/tests/test_string.py b/venv.bak/lib/python3.7/site-packages/numpy/f2py/tests/test_string.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/f2py/tests/test_string.py rename to venv.bak/lib/python3.7/site-packages/numpy/f2py/tests/test_string.py diff --git a/venv/lib/python3.7/site-packages/numpy/f2py/tests/util.py b/venv.bak/lib/python3.7/site-packages/numpy/f2py/tests/util.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/f2py/tests/util.py rename to venv.bak/lib/python3.7/site-packages/numpy/f2py/tests/util.py diff --git a/venv/lib/python3.7/site-packages/numpy/f2py/use_rules.py b/venv.bak/lib/python3.7/site-packages/numpy/f2py/use_rules.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/f2py/use_rules.py rename to venv.bak/lib/python3.7/site-packages/numpy/f2py/use_rules.py diff --git a/venv/lib/python3.7/site-packages/numpy/fft/__init__.py b/venv.bak/lib/python3.7/site-packages/numpy/fft/__init__.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/fft/__init__.py rename to venv.bak/lib/python3.7/site-packages/numpy/fft/__init__.py diff --git a/venv/lib/python3.7/site-packages/numpy/fft/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/fft/__pycache__/__init__.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/fft/__pycache__/__init__.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/fft/__pycache__/__init__.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/fft/__pycache__/_pocketfft.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/fft/__pycache__/_pocketfft.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/fft/__pycache__/_pocketfft.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/fft/__pycache__/_pocketfft.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/fft/__pycache__/helper.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/fft/__pycache__/helper.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/fft/__pycache__/helper.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/fft/__pycache__/helper.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/fft/__pycache__/setup.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/fft/__pycache__/setup.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/fft/__pycache__/setup.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/fft/__pycache__/setup.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/fft/_pocketfft.py b/venv.bak/lib/python3.7/site-packages/numpy/fft/_pocketfft.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/fft/_pocketfft.py rename to venv.bak/lib/python3.7/site-packages/numpy/fft/_pocketfft.py diff --git a/venv/lib/python3.7/site-packages/numpy/fft/_pocketfft_internal.cpython-37m-darwin.so b/venv.bak/lib/python3.7/site-packages/numpy/fft/_pocketfft_internal.cpython-37m-darwin.so similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/fft/_pocketfft_internal.cpython-37m-darwin.so rename to venv.bak/lib/python3.7/site-packages/numpy/fft/_pocketfft_internal.cpython-37m-darwin.so diff --git a/venv/lib/python3.7/site-packages/numpy/fft/helper.py b/venv.bak/lib/python3.7/site-packages/numpy/fft/helper.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/fft/helper.py rename to venv.bak/lib/python3.7/site-packages/numpy/fft/helper.py diff --git a/venv/lib/python3.7/site-packages/numpy/fft/setup.py b/venv.bak/lib/python3.7/site-packages/numpy/fft/setup.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/fft/setup.py rename to venv.bak/lib/python3.7/site-packages/numpy/fft/setup.py diff --git a/venv/lib/python3.7/site-packages/numpy/matrixlib/tests/__init__.py b/venv.bak/lib/python3.7/site-packages/numpy/fft/tests/__init__.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/matrixlib/tests/__init__.py rename to venv.bak/lib/python3.7/site-packages/numpy/fft/tests/__init__.py diff --git a/venv/lib/python3.7/site-packages/numpy/fft/tests/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/fft/tests/__pycache__/__init__.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/fft/tests/__pycache__/__init__.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/fft/tests/__pycache__/__init__.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/fft/tests/__pycache__/test_helper.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/fft/tests/__pycache__/test_helper.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/fft/tests/__pycache__/test_helper.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/fft/tests/__pycache__/test_helper.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/fft/tests/__pycache__/test_pocketfft.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/fft/tests/__pycache__/test_pocketfft.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/fft/tests/__pycache__/test_pocketfft.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/fft/tests/__pycache__/test_pocketfft.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/fft/tests/test_helper.py b/venv.bak/lib/python3.7/site-packages/numpy/fft/tests/test_helper.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/fft/tests/test_helper.py rename to venv.bak/lib/python3.7/site-packages/numpy/fft/tests/test_helper.py diff --git a/venv/lib/python3.7/site-packages/numpy/fft/tests/test_pocketfft.py b/venv.bak/lib/python3.7/site-packages/numpy/fft/tests/test_pocketfft.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/fft/tests/test_pocketfft.py rename to venv.bak/lib/python3.7/site-packages/numpy/fft/tests/test_pocketfft.py diff --git a/venv/lib/python3.7/site-packages/numpy/lib/__init__.py b/venv.bak/lib/python3.7/site-packages/numpy/lib/__init__.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/lib/__init__.py rename to venv.bak/lib/python3.7/site-packages/numpy/lib/__init__.py diff --git a/venv/lib/python3.7/site-packages/numpy/lib/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/lib/__pycache__/__init__.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/lib/__pycache__/__init__.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/lib/__pycache__/__init__.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/lib/__pycache__/_datasource.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/lib/__pycache__/_datasource.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/lib/__pycache__/_datasource.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/lib/__pycache__/_datasource.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/lib/__pycache__/_iotools.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/lib/__pycache__/_iotools.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/lib/__pycache__/_iotools.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/lib/__pycache__/_iotools.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/lib/__pycache__/_version.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/lib/__pycache__/_version.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/lib/__pycache__/_version.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/lib/__pycache__/_version.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/lib/__pycache__/arraypad.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/lib/__pycache__/arraypad.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/lib/__pycache__/arraypad.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/lib/__pycache__/arraypad.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/lib/__pycache__/arraysetops.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/lib/__pycache__/arraysetops.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/lib/__pycache__/arraysetops.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/lib/__pycache__/arraysetops.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/lib/__pycache__/arrayterator.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/lib/__pycache__/arrayterator.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/lib/__pycache__/arrayterator.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/lib/__pycache__/arrayterator.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/lib/__pycache__/financial.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/lib/__pycache__/financial.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/lib/__pycache__/financial.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/lib/__pycache__/financial.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/lib/__pycache__/format.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/lib/__pycache__/format.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/lib/__pycache__/format.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/lib/__pycache__/format.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/lib/__pycache__/function_base.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/lib/__pycache__/function_base.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/lib/__pycache__/function_base.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/lib/__pycache__/function_base.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/lib/__pycache__/histograms.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/lib/__pycache__/histograms.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/lib/__pycache__/histograms.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/lib/__pycache__/histograms.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/lib/__pycache__/index_tricks.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/lib/__pycache__/index_tricks.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/lib/__pycache__/index_tricks.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/lib/__pycache__/index_tricks.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/lib/__pycache__/mixins.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/lib/__pycache__/mixins.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/lib/__pycache__/mixins.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/lib/__pycache__/mixins.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/lib/__pycache__/nanfunctions.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/lib/__pycache__/nanfunctions.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/lib/__pycache__/nanfunctions.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/lib/__pycache__/nanfunctions.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/lib/__pycache__/npyio.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/lib/__pycache__/npyio.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/lib/__pycache__/npyio.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/lib/__pycache__/npyio.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/lib/__pycache__/polynomial.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/lib/__pycache__/polynomial.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/lib/__pycache__/polynomial.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/lib/__pycache__/polynomial.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/lib/__pycache__/recfunctions.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/lib/__pycache__/recfunctions.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/lib/__pycache__/recfunctions.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/lib/__pycache__/recfunctions.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/lib/__pycache__/scimath.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/lib/__pycache__/scimath.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/lib/__pycache__/scimath.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/lib/__pycache__/scimath.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/lib/__pycache__/setup.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/lib/__pycache__/setup.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/lib/__pycache__/setup.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/lib/__pycache__/setup.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/lib/__pycache__/shape_base.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/lib/__pycache__/shape_base.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/lib/__pycache__/shape_base.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/lib/__pycache__/shape_base.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/lib/__pycache__/stride_tricks.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/lib/__pycache__/stride_tricks.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/lib/__pycache__/stride_tricks.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/lib/__pycache__/stride_tricks.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/lib/__pycache__/twodim_base.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/lib/__pycache__/twodim_base.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/lib/__pycache__/twodim_base.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/lib/__pycache__/twodim_base.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/lib/__pycache__/type_check.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/lib/__pycache__/type_check.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/lib/__pycache__/type_check.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/lib/__pycache__/type_check.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/lib/__pycache__/ufunclike.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/lib/__pycache__/ufunclike.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/lib/__pycache__/ufunclike.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/lib/__pycache__/ufunclike.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/lib/__pycache__/user_array.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/lib/__pycache__/user_array.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/lib/__pycache__/user_array.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/lib/__pycache__/user_array.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/lib/__pycache__/utils.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/lib/__pycache__/utils.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/lib/__pycache__/utils.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/lib/__pycache__/utils.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/lib/_datasource.py b/venv.bak/lib/python3.7/site-packages/numpy/lib/_datasource.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/lib/_datasource.py rename to venv.bak/lib/python3.7/site-packages/numpy/lib/_datasource.py diff --git a/venv/lib/python3.7/site-packages/numpy/lib/_iotools.py b/venv.bak/lib/python3.7/site-packages/numpy/lib/_iotools.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/lib/_iotools.py rename to venv.bak/lib/python3.7/site-packages/numpy/lib/_iotools.py diff --git a/venv/lib/python3.7/site-packages/numpy/lib/_version.py b/venv.bak/lib/python3.7/site-packages/numpy/lib/_version.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/lib/_version.py rename to venv.bak/lib/python3.7/site-packages/numpy/lib/_version.py diff --git a/venv/lib/python3.7/site-packages/numpy/lib/arraypad.py b/venv.bak/lib/python3.7/site-packages/numpy/lib/arraypad.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/lib/arraypad.py rename to venv.bak/lib/python3.7/site-packages/numpy/lib/arraypad.py diff --git a/venv/lib/python3.7/site-packages/numpy/lib/arraysetops.py b/venv.bak/lib/python3.7/site-packages/numpy/lib/arraysetops.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/lib/arraysetops.py rename to venv.bak/lib/python3.7/site-packages/numpy/lib/arraysetops.py diff --git a/venv/lib/python3.7/site-packages/numpy/lib/arrayterator.py b/venv.bak/lib/python3.7/site-packages/numpy/lib/arrayterator.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/lib/arrayterator.py rename to venv.bak/lib/python3.7/site-packages/numpy/lib/arrayterator.py diff --git a/venv/lib/python3.7/site-packages/numpy/lib/financial.py b/venv.bak/lib/python3.7/site-packages/numpy/lib/financial.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/lib/financial.py rename to venv.bak/lib/python3.7/site-packages/numpy/lib/financial.py diff --git a/venv/lib/python3.7/site-packages/numpy/lib/format.py b/venv.bak/lib/python3.7/site-packages/numpy/lib/format.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/lib/format.py rename to venv.bak/lib/python3.7/site-packages/numpy/lib/format.py diff --git a/venv/lib/python3.7/site-packages/numpy/lib/function_base.py b/venv.bak/lib/python3.7/site-packages/numpy/lib/function_base.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/lib/function_base.py rename to venv.bak/lib/python3.7/site-packages/numpy/lib/function_base.py diff --git a/venv/lib/python3.7/site-packages/numpy/lib/histograms.py b/venv.bak/lib/python3.7/site-packages/numpy/lib/histograms.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/lib/histograms.py rename to venv.bak/lib/python3.7/site-packages/numpy/lib/histograms.py diff --git a/venv/lib/python3.7/site-packages/numpy/lib/index_tricks.py b/venv.bak/lib/python3.7/site-packages/numpy/lib/index_tricks.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/lib/index_tricks.py rename to venv.bak/lib/python3.7/site-packages/numpy/lib/index_tricks.py diff --git a/venv/lib/python3.7/site-packages/numpy/lib/mixins.py b/venv.bak/lib/python3.7/site-packages/numpy/lib/mixins.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/lib/mixins.py rename to venv.bak/lib/python3.7/site-packages/numpy/lib/mixins.py diff --git a/venv/lib/python3.7/site-packages/numpy/lib/nanfunctions.py b/venv.bak/lib/python3.7/site-packages/numpy/lib/nanfunctions.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/lib/nanfunctions.py rename to venv.bak/lib/python3.7/site-packages/numpy/lib/nanfunctions.py diff --git a/venv/lib/python3.7/site-packages/numpy/lib/npyio.py b/venv.bak/lib/python3.7/site-packages/numpy/lib/npyio.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/lib/npyio.py rename to venv.bak/lib/python3.7/site-packages/numpy/lib/npyio.py diff --git a/venv/lib/python3.7/site-packages/numpy/lib/polynomial.py b/venv.bak/lib/python3.7/site-packages/numpy/lib/polynomial.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/lib/polynomial.py rename to venv.bak/lib/python3.7/site-packages/numpy/lib/polynomial.py diff --git a/venv/lib/python3.7/site-packages/numpy/lib/recfunctions.py b/venv.bak/lib/python3.7/site-packages/numpy/lib/recfunctions.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/lib/recfunctions.py rename to venv.bak/lib/python3.7/site-packages/numpy/lib/recfunctions.py diff --git a/venv/lib/python3.7/site-packages/numpy/lib/scimath.py b/venv.bak/lib/python3.7/site-packages/numpy/lib/scimath.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/lib/scimath.py rename to venv.bak/lib/python3.7/site-packages/numpy/lib/scimath.py diff --git a/venv/lib/python3.7/site-packages/numpy/lib/setup.py b/venv.bak/lib/python3.7/site-packages/numpy/lib/setup.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/lib/setup.py rename to venv.bak/lib/python3.7/site-packages/numpy/lib/setup.py diff --git a/venv/lib/python3.7/site-packages/numpy/lib/shape_base.py b/venv.bak/lib/python3.7/site-packages/numpy/lib/shape_base.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/lib/shape_base.py rename to venv.bak/lib/python3.7/site-packages/numpy/lib/shape_base.py diff --git a/venv/lib/python3.7/site-packages/numpy/lib/stride_tricks.py b/venv.bak/lib/python3.7/site-packages/numpy/lib/stride_tricks.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/lib/stride_tricks.py rename to venv.bak/lib/python3.7/site-packages/numpy/lib/stride_tricks.py diff --git a/venv/lib/python3.7/site-packages/numpy/polynomial/tests/__init__.py b/venv.bak/lib/python3.7/site-packages/numpy/lib/tests/__init__.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/polynomial/tests/__init__.py rename to venv.bak/lib/python3.7/site-packages/numpy/lib/tests/__init__.py diff --git a/venv/lib/python3.7/site-packages/numpy/lib/tests/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/lib/tests/__pycache__/__init__.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/lib/tests/__pycache__/__init__.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/lib/tests/__pycache__/__init__.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/lib/tests/__pycache__/test__datasource.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/lib/tests/__pycache__/test__datasource.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/lib/tests/__pycache__/test__datasource.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/lib/tests/__pycache__/test__datasource.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/lib/tests/__pycache__/test__iotools.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/lib/tests/__pycache__/test__iotools.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/lib/tests/__pycache__/test__iotools.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/lib/tests/__pycache__/test__iotools.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/lib/tests/__pycache__/test__version.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/lib/tests/__pycache__/test__version.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/lib/tests/__pycache__/test__version.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/lib/tests/__pycache__/test__version.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/lib/tests/__pycache__/test_arraypad.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/lib/tests/__pycache__/test_arraypad.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/lib/tests/__pycache__/test_arraypad.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/lib/tests/__pycache__/test_arraypad.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/lib/tests/__pycache__/test_arraysetops.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/lib/tests/__pycache__/test_arraysetops.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/lib/tests/__pycache__/test_arraysetops.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/lib/tests/__pycache__/test_arraysetops.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/lib/tests/__pycache__/test_arrayterator.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/lib/tests/__pycache__/test_arrayterator.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/lib/tests/__pycache__/test_arrayterator.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/lib/tests/__pycache__/test_arrayterator.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/lib/tests/__pycache__/test_financial.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/lib/tests/__pycache__/test_financial.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/lib/tests/__pycache__/test_financial.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/lib/tests/__pycache__/test_financial.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/lib/tests/__pycache__/test_format.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/lib/tests/__pycache__/test_format.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/lib/tests/__pycache__/test_format.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/lib/tests/__pycache__/test_format.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/lib/tests/__pycache__/test_function_base.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/lib/tests/__pycache__/test_function_base.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/lib/tests/__pycache__/test_function_base.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/lib/tests/__pycache__/test_function_base.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/lib/tests/__pycache__/test_histograms.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/lib/tests/__pycache__/test_histograms.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/lib/tests/__pycache__/test_histograms.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/lib/tests/__pycache__/test_histograms.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/lib/tests/__pycache__/test_index_tricks.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/lib/tests/__pycache__/test_index_tricks.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/lib/tests/__pycache__/test_index_tricks.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/lib/tests/__pycache__/test_index_tricks.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/lib/tests/__pycache__/test_io.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/lib/tests/__pycache__/test_io.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/lib/tests/__pycache__/test_io.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/lib/tests/__pycache__/test_io.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/lib/tests/__pycache__/test_mixins.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/lib/tests/__pycache__/test_mixins.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/lib/tests/__pycache__/test_mixins.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/lib/tests/__pycache__/test_mixins.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/lib/tests/__pycache__/test_nanfunctions.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/lib/tests/__pycache__/test_nanfunctions.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/lib/tests/__pycache__/test_nanfunctions.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/lib/tests/__pycache__/test_nanfunctions.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/lib/tests/__pycache__/test_packbits.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/lib/tests/__pycache__/test_packbits.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/lib/tests/__pycache__/test_packbits.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/lib/tests/__pycache__/test_packbits.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/lib/tests/__pycache__/test_polynomial.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/lib/tests/__pycache__/test_polynomial.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/lib/tests/__pycache__/test_polynomial.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/lib/tests/__pycache__/test_polynomial.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/lib/tests/__pycache__/test_recfunctions.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/lib/tests/__pycache__/test_recfunctions.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/lib/tests/__pycache__/test_recfunctions.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/lib/tests/__pycache__/test_recfunctions.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/lib/tests/__pycache__/test_regression.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/lib/tests/__pycache__/test_regression.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/lib/tests/__pycache__/test_regression.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/lib/tests/__pycache__/test_regression.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/lib/tests/__pycache__/test_shape_base.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/lib/tests/__pycache__/test_shape_base.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/lib/tests/__pycache__/test_shape_base.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/lib/tests/__pycache__/test_shape_base.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/lib/tests/__pycache__/test_stride_tricks.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/lib/tests/__pycache__/test_stride_tricks.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/lib/tests/__pycache__/test_stride_tricks.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/lib/tests/__pycache__/test_stride_tricks.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/lib/tests/__pycache__/test_twodim_base.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/lib/tests/__pycache__/test_twodim_base.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/lib/tests/__pycache__/test_twodim_base.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/lib/tests/__pycache__/test_twodim_base.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/lib/tests/__pycache__/test_type_check.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/lib/tests/__pycache__/test_type_check.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/lib/tests/__pycache__/test_type_check.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/lib/tests/__pycache__/test_type_check.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/lib/tests/__pycache__/test_ufunclike.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/lib/tests/__pycache__/test_ufunclike.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/lib/tests/__pycache__/test_ufunclike.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/lib/tests/__pycache__/test_ufunclike.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/lib/tests/__pycache__/test_utils.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/lib/tests/__pycache__/test_utils.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/lib/tests/__pycache__/test_utils.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/lib/tests/__pycache__/test_utils.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/lib/tests/data/py2-objarr.npy b/venv.bak/lib/python3.7/site-packages/numpy/lib/tests/data/py2-objarr.npy similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/lib/tests/data/py2-objarr.npy rename to venv.bak/lib/python3.7/site-packages/numpy/lib/tests/data/py2-objarr.npy diff --git a/venv/lib/python3.7/site-packages/numpy/lib/tests/data/py2-objarr.npz b/venv.bak/lib/python3.7/site-packages/numpy/lib/tests/data/py2-objarr.npz similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/lib/tests/data/py2-objarr.npz rename to venv.bak/lib/python3.7/site-packages/numpy/lib/tests/data/py2-objarr.npz diff --git a/venv/lib/python3.7/site-packages/numpy/lib/tests/data/py3-objarr.npy b/venv.bak/lib/python3.7/site-packages/numpy/lib/tests/data/py3-objarr.npy similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/lib/tests/data/py3-objarr.npy rename to venv.bak/lib/python3.7/site-packages/numpy/lib/tests/data/py3-objarr.npy diff --git a/venv/lib/python3.7/site-packages/numpy/lib/tests/data/py3-objarr.npz b/venv.bak/lib/python3.7/site-packages/numpy/lib/tests/data/py3-objarr.npz similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/lib/tests/data/py3-objarr.npz rename to venv.bak/lib/python3.7/site-packages/numpy/lib/tests/data/py3-objarr.npz diff --git a/venv/lib/python3.7/site-packages/numpy/lib/tests/data/python3.npy b/venv.bak/lib/python3.7/site-packages/numpy/lib/tests/data/python3.npy similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/lib/tests/data/python3.npy rename to venv.bak/lib/python3.7/site-packages/numpy/lib/tests/data/python3.npy diff --git a/venv/lib/python3.7/site-packages/numpy/lib/tests/data/win64python2.npy b/venv.bak/lib/python3.7/site-packages/numpy/lib/tests/data/win64python2.npy similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/lib/tests/data/win64python2.npy rename to venv.bak/lib/python3.7/site-packages/numpy/lib/tests/data/win64python2.npy diff --git a/venv/lib/python3.7/site-packages/numpy/lib/tests/test__datasource.py b/venv.bak/lib/python3.7/site-packages/numpy/lib/tests/test__datasource.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/lib/tests/test__datasource.py rename to venv.bak/lib/python3.7/site-packages/numpy/lib/tests/test__datasource.py diff --git a/venv/lib/python3.7/site-packages/numpy/lib/tests/test__iotools.py b/venv.bak/lib/python3.7/site-packages/numpy/lib/tests/test__iotools.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/lib/tests/test__iotools.py rename to venv.bak/lib/python3.7/site-packages/numpy/lib/tests/test__iotools.py diff --git a/venv/lib/python3.7/site-packages/numpy/lib/tests/test__version.py b/venv.bak/lib/python3.7/site-packages/numpy/lib/tests/test__version.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/lib/tests/test__version.py rename to venv.bak/lib/python3.7/site-packages/numpy/lib/tests/test__version.py diff --git a/venv/lib/python3.7/site-packages/numpy/lib/tests/test_arraypad.py b/venv.bak/lib/python3.7/site-packages/numpy/lib/tests/test_arraypad.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/lib/tests/test_arraypad.py rename to venv.bak/lib/python3.7/site-packages/numpy/lib/tests/test_arraypad.py diff --git a/venv/lib/python3.7/site-packages/numpy/lib/tests/test_arraysetops.py b/venv.bak/lib/python3.7/site-packages/numpy/lib/tests/test_arraysetops.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/lib/tests/test_arraysetops.py rename to venv.bak/lib/python3.7/site-packages/numpy/lib/tests/test_arraysetops.py diff --git a/venv/lib/python3.7/site-packages/numpy/lib/tests/test_arrayterator.py b/venv.bak/lib/python3.7/site-packages/numpy/lib/tests/test_arrayterator.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/lib/tests/test_arrayterator.py rename to venv.bak/lib/python3.7/site-packages/numpy/lib/tests/test_arrayterator.py diff --git a/venv/lib/python3.7/site-packages/numpy/lib/tests/test_financial.py b/venv.bak/lib/python3.7/site-packages/numpy/lib/tests/test_financial.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/lib/tests/test_financial.py rename to venv.bak/lib/python3.7/site-packages/numpy/lib/tests/test_financial.py diff --git a/venv/lib/python3.7/site-packages/numpy/lib/tests/test_format.py b/venv.bak/lib/python3.7/site-packages/numpy/lib/tests/test_format.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/lib/tests/test_format.py rename to venv.bak/lib/python3.7/site-packages/numpy/lib/tests/test_format.py diff --git a/venv/lib/python3.7/site-packages/numpy/lib/tests/test_function_base.py b/venv.bak/lib/python3.7/site-packages/numpy/lib/tests/test_function_base.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/lib/tests/test_function_base.py rename to venv.bak/lib/python3.7/site-packages/numpy/lib/tests/test_function_base.py diff --git a/venv/lib/python3.7/site-packages/numpy/lib/tests/test_histograms.py b/venv.bak/lib/python3.7/site-packages/numpy/lib/tests/test_histograms.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/lib/tests/test_histograms.py rename to venv.bak/lib/python3.7/site-packages/numpy/lib/tests/test_histograms.py diff --git a/venv/lib/python3.7/site-packages/numpy/lib/tests/test_index_tricks.py b/venv.bak/lib/python3.7/site-packages/numpy/lib/tests/test_index_tricks.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/lib/tests/test_index_tricks.py rename to venv.bak/lib/python3.7/site-packages/numpy/lib/tests/test_index_tricks.py diff --git a/venv/lib/python3.7/site-packages/numpy/lib/tests/test_io.py b/venv.bak/lib/python3.7/site-packages/numpy/lib/tests/test_io.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/lib/tests/test_io.py rename to venv.bak/lib/python3.7/site-packages/numpy/lib/tests/test_io.py diff --git a/venv/lib/python3.7/site-packages/numpy/lib/tests/test_mixins.py b/venv.bak/lib/python3.7/site-packages/numpy/lib/tests/test_mixins.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/lib/tests/test_mixins.py rename to venv.bak/lib/python3.7/site-packages/numpy/lib/tests/test_mixins.py diff --git a/venv/lib/python3.7/site-packages/numpy/lib/tests/test_nanfunctions.py b/venv.bak/lib/python3.7/site-packages/numpy/lib/tests/test_nanfunctions.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/lib/tests/test_nanfunctions.py rename to venv.bak/lib/python3.7/site-packages/numpy/lib/tests/test_nanfunctions.py diff --git a/venv/lib/python3.7/site-packages/numpy/lib/tests/test_packbits.py b/venv.bak/lib/python3.7/site-packages/numpy/lib/tests/test_packbits.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/lib/tests/test_packbits.py rename to venv.bak/lib/python3.7/site-packages/numpy/lib/tests/test_packbits.py diff --git a/venv/lib/python3.7/site-packages/numpy/lib/tests/test_polynomial.py b/venv.bak/lib/python3.7/site-packages/numpy/lib/tests/test_polynomial.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/lib/tests/test_polynomial.py rename to venv.bak/lib/python3.7/site-packages/numpy/lib/tests/test_polynomial.py diff --git a/venv/lib/python3.7/site-packages/numpy/lib/tests/test_recfunctions.py b/venv.bak/lib/python3.7/site-packages/numpy/lib/tests/test_recfunctions.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/lib/tests/test_recfunctions.py rename to venv.bak/lib/python3.7/site-packages/numpy/lib/tests/test_recfunctions.py diff --git a/venv/lib/python3.7/site-packages/numpy/lib/tests/test_regression.py b/venv.bak/lib/python3.7/site-packages/numpy/lib/tests/test_regression.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/lib/tests/test_regression.py rename to venv.bak/lib/python3.7/site-packages/numpy/lib/tests/test_regression.py diff --git a/venv/lib/python3.7/site-packages/numpy/lib/tests/test_shape_base.py b/venv.bak/lib/python3.7/site-packages/numpy/lib/tests/test_shape_base.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/lib/tests/test_shape_base.py rename to venv.bak/lib/python3.7/site-packages/numpy/lib/tests/test_shape_base.py diff --git a/venv/lib/python3.7/site-packages/numpy/lib/tests/test_stride_tricks.py b/venv.bak/lib/python3.7/site-packages/numpy/lib/tests/test_stride_tricks.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/lib/tests/test_stride_tricks.py rename to venv.bak/lib/python3.7/site-packages/numpy/lib/tests/test_stride_tricks.py diff --git a/venv/lib/python3.7/site-packages/numpy/lib/tests/test_twodim_base.py b/venv.bak/lib/python3.7/site-packages/numpy/lib/tests/test_twodim_base.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/lib/tests/test_twodim_base.py rename to venv.bak/lib/python3.7/site-packages/numpy/lib/tests/test_twodim_base.py diff --git a/venv/lib/python3.7/site-packages/numpy/lib/tests/test_type_check.py b/venv.bak/lib/python3.7/site-packages/numpy/lib/tests/test_type_check.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/lib/tests/test_type_check.py rename to venv.bak/lib/python3.7/site-packages/numpy/lib/tests/test_type_check.py diff --git a/venv/lib/python3.7/site-packages/numpy/lib/tests/test_ufunclike.py b/venv.bak/lib/python3.7/site-packages/numpy/lib/tests/test_ufunclike.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/lib/tests/test_ufunclike.py rename to venv.bak/lib/python3.7/site-packages/numpy/lib/tests/test_ufunclike.py diff --git a/venv/lib/python3.7/site-packages/numpy/lib/tests/test_utils.py b/venv.bak/lib/python3.7/site-packages/numpy/lib/tests/test_utils.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/lib/tests/test_utils.py rename to venv.bak/lib/python3.7/site-packages/numpy/lib/tests/test_utils.py diff --git a/venv/lib/python3.7/site-packages/numpy/lib/twodim_base.py b/venv.bak/lib/python3.7/site-packages/numpy/lib/twodim_base.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/lib/twodim_base.py rename to venv.bak/lib/python3.7/site-packages/numpy/lib/twodim_base.py diff --git a/venv/lib/python3.7/site-packages/numpy/lib/type_check.py b/venv.bak/lib/python3.7/site-packages/numpy/lib/type_check.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/lib/type_check.py rename to venv.bak/lib/python3.7/site-packages/numpy/lib/type_check.py diff --git a/venv/lib/python3.7/site-packages/numpy/lib/ufunclike.py b/venv.bak/lib/python3.7/site-packages/numpy/lib/ufunclike.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/lib/ufunclike.py rename to venv.bak/lib/python3.7/site-packages/numpy/lib/ufunclike.py diff --git a/venv/lib/python3.7/site-packages/numpy/lib/user_array.py b/venv.bak/lib/python3.7/site-packages/numpy/lib/user_array.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/lib/user_array.py rename to venv.bak/lib/python3.7/site-packages/numpy/lib/user_array.py diff --git a/venv/lib/python3.7/site-packages/numpy/lib/utils.py b/venv.bak/lib/python3.7/site-packages/numpy/lib/utils.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/lib/utils.py rename to venv.bak/lib/python3.7/site-packages/numpy/lib/utils.py diff --git a/venv/lib/python3.7/site-packages/numpy/linalg/__init__.py b/venv.bak/lib/python3.7/site-packages/numpy/linalg/__init__.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/linalg/__init__.py rename to venv.bak/lib/python3.7/site-packages/numpy/linalg/__init__.py diff --git a/venv/lib/python3.7/site-packages/numpy/linalg/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/linalg/__pycache__/__init__.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/linalg/__pycache__/__init__.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/linalg/__pycache__/__init__.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/linalg/__pycache__/linalg.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/linalg/__pycache__/linalg.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/linalg/__pycache__/linalg.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/linalg/__pycache__/linalg.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/linalg/__pycache__/setup.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/linalg/__pycache__/setup.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/linalg/__pycache__/setup.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/linalg/__pycache__/setup.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/linalg/_umath_linalg.cpython-37m-darwin.so b/venv.bak/lib/python3.7/site-packages/numpy/linalg/_umath_linalg.cpython-37m-darwin.so similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/linalg/_umath_linalg.cpython-37m-darwin.so rename to venv.bak/lib/python3.7/site-packages/numpy/linalg/_umath_linalg.cpython-37m-darwin.so diff --git a/venv/lib/python3.7/site-packages/numpy/linalg/lapack_lite.cpython-37m-darwin.so b/venv.bak/lib/python3.7/site-packages/numpy/linalg/lapack_lite.cpython-37m-darwin.so similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/linalg/lapack_lite.cpython-37m-darwin.so rename to venv.bak/lib/python3.7/site-packages/numpy/linalg/lapack_lite.cpython-37m-darwin.so diff --git a/venv/lib/python3.7/site-packages/numpy/linalg/linalg.py b/venv.bak/lib/python3.7/site-packages/numpy/linalg/linalg.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/linalg/linalg.py rename to venv.bak/lib/python3.7/site-packages/numpy/linalg/linalg.py diff --git a/venv/lib/python3.7/site-packages/numpy/linalg/setup.py b/venv.bak/lib/python3.7/site-packages/numpy/linalg/setup.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/linalg/setup.py rename to venv.bak/lib/python3.7/site-packages/numpy/linalg/setup.py diff --git a/venv/lib/python3.7/site-packages/numpy/random/tests/__init__.py b/venv.bak/lib/python3.7/site-packages/numpy/linalg/tests/__init__.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/random/tests/__init__.py rename to venv.bak/lib/python3.7/site-packages/numpy/linalg/tests/__init__.py diff --git a/venv/lib/python3.7/site-packages/numpy/linalg/tests/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/linalg/tests/__pycache__/__init__.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/linalg/tests/__pycache__/__init__.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/linalg/tests/__pycache__/__init__.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/linalg/tests/__pycache__/test_build.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/linalg/tests/__pycache__/test_build.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/linalg/tests/__pycache__/test_build.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/linalg/tests/__pycache__/test_build.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/linalg/tests/__pycache__/test_deprecations.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/linalg/tests/__pycache__/test_deprecations.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/linalg/tests/__pycache__/test_deprecations.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/linalg/tests/__pycache__/test_deprecations.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/linalg/tests/__pycache__/test_linalg.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/linalg/tests/__pycache__/test_linalg.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/linalg/tests/__pycache__/test_linalg.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/linalg/tests/__pycache__/test_linalg.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/linalg/tests/__pycache__/test_regression.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/linalg/tests/__pycache__/test_regression.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/linalg/tests/__pycache__/test_regression.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/linalg/tests/__pycache__/test_regression.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/linalg/tests/test_build.py b/venv.bak/lib/python3.7/site-packages/numpy/linalg/tests/test_build.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/linalg/tests/test_build.py rename to venv.bak/lib/python3.7/site-packages/numpy/linalg/tests/test_build.py diff --git a/venv/lib/python3.7/site-packages/numpy/linalg/tests/test_deprecations.py b/venv.bak/lib/python3.7/site-packages/numpy/linalg/tests/test_deprecations.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/linalg/tests/test_deprecations.py rename to venv.bak/lib/python3.7/site-packages/numpy/linalg/tests/test_deprecations.py diff --git a/venv/lib/python3.7/site-packages/numpy/linalg/tests/test_linalg.py b/venv.bak/lib/python3.7/site-packages/numpy/linalg/tests/test_linalg.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/linalg/tests/test_linalg.py rename to venv.bak/lib/python3.7/site-packages/numpy/linalg/tests/test_linalg.py diff --git a/venv/lib/python3.7/site-packages/numpy/linalg/tests/test_regression.py b/venv.bak/lib/python3.7/site-packages/numpy/linalg/tests/test_regression.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/linalg/tests/test_regression.py rename to venv.bak/lib/python3.7/site-packages/numpy/linalg/tests/test_regression.py diff --git a/venv/lib/python3.7/site-packages/numpy/ma/__init__.py b/venv.bak/lib/python3.7/site-packages/numpy/ma/__init__.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/ma/__init__.py rename to venv.bak/lib/python3.7/site-packages/numpy/ma/__init__.py diff --git a/venv/lib/python3.7/site-packages/numpy/ma/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/ma/__pycache__/__init__.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/ma/__pycache__/__init__.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/ma/__pycache__/__init__.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/ma/__pycache__/bench.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/ma/__pycache__/bench.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/ma/__pycache__/bench.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/ma/__pycache__/bench.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/ma/__pycache__/core.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/ma/__pycache__/core.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/ma/__pycache__/core.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/ma/__pycache__/core.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/ma/__pycache__/extras.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/ma/__pycache__/extras.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/ma/__pycache__/extras.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/ma/__pycache__/extras.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/ma/__pycache__/mrecords.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/ma/__pycache__/mrecords.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/ma/__pycache__/mrecords.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/ma/__pycache__/mrecords.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/ma/__pycache__/setup.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/ma/__pycache__/setup.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/ma/__pycache__/setup.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/ma/__pycache__/setup.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/ma/__pycache__/testutils.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/ma/__pycache__/testutils.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/ma/__pycache__/testutils.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/ma/__pycache__/testutils.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/ma/__pycache__/timer_comparison.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/ma/__pycache__/timer_comparison.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/ma/__pycache__/timer_comparison.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/ma/__pycache__/timer_comparison.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/ma/bench.py b/venv.bak/lib/python3.7/site-packages/numpy/ma/bench.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/ma/bench.py rename to venv.bak/lib/python3.7/site-packages/numpy/ma/bench.py diff --git a/venv/lib/python3.7/site-packages/numpy/ma/core.py b/venv.bak/lib/python3.7/site-packages/numpy/ma/core.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/ma/core.py rename to venv.bak/lib/python3.7/site-packages/numpy/ma/core.py diff --git a/venv/lib/python3.7/site-packages/numpy/ma/extras.py b/venv.bak/lib/python3.7/site-packages/numpy/ma/extras.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/ma/extras.py rename to venv.bak/lib/python3.7/site-packages/numpy/ma/extras.py diff --git a/venv/lib/python3.7/site-packages/numpy/ma/mrecords.py b/venv.bak/lib/python3.7/site-packages/numpy/ma/mrecords.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/ma/mrecords.py rename to venv.bak/lib/python3.7/site-packages/numpy/ma/mrecords.py diff --git a/venv/lib/python3.7/site-packages/numpy/ma/setup.py b/venv.bak/lib/python3.7/site-packages/numpy/ma/setup.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/ma/setup.py rename to venv.bak/lib/python3.7/site-packages/numpy/ma/setup.py diff --git a/venv/lib/python3.7/site-packages/numpy/random/tests/data/__init__.py b/venv.bak/lib/python3.7/site-packages/numpy/ma/tests/__init__.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/random/tests/data/__init__.py rename to venv.bak/lib/python3.7/site-packages/numpy/ma/tests/__init__.py diff --git a/venv/lib/python3.7/site-packages/numpy/ma/tests/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/ma/tests/__pycache__/__init__.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/ma/tests/__pycache__/__init__.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/ma/tests/__pycache__/__init__.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/ma/tests/__pycache__/test_core.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/ma/tests/__pycache__/test_core.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/ma/tests/__pycache__/test_core.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/ma/tests/__pycache__/test_core.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/ma/tests/__pycache__/test_deprecations.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/ma/tests/__pycache__/test_deprecations.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/ma/tests/__pycache__/test_deprecations.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/ma/tests/__pycache__/test_deprecations.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/ma/tests/__pycache__/test_extras.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/ma/tests/__pycache__/test_extras.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/ma/tests/__pycache__/test_extras.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/ma/tests/__pycache__/test_extras.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/ma/tests/__pycache__/test_mrecords.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/ma/tests/__pycache__/test_mrecords.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/ma/tests/__pycache__/test_mrecords.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/ma/tests/__pycache__/test_mrecords.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/ma/tests/__pycache__/test_old_ma.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/ma/tests/__pycache__/test_old_ma.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/ma/tests/__pycache__/test_old_ma.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/ma/tests/__pycache__/test_old_ma.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/ma/tests/__pycache__/test_regression.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/ma/tests/__pycache__/test_regression.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/ma/tests/__pycache__/test_regression.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/ma/tests/__pycache__/test_regression.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/ma/tests/__pycache__/test_subclassing.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/ma/tests/__pycache__/test_subclassing.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/ma/tests/__pycache__/test_subclassing.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/ma/tests/__pycache__/test_subclassing.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/ma/tests/test_core.py b/venv.bak/lib/python3.7/site-packages/numpy/ma/tests/test_core.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/ma/tests/test_core.py rename to venv.bak/lib/python3.7/site-packages/numpy/ma/tests/test_core.py diff --git a/venv/lib/python3.7/site-packages/numpy/ma/tests/test_deprecations.py b/venv.bak/lib/python3.7/site-packages/numpy/ma/tests/test_deprecations.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/ma/tests/test_deprecations.py rename to venv.bak/lib/python3.7/site-packages/numpy/ma/tests/test_deprecations.py diff --git a/venv/lib/python3.7/site-packages/numpy/ma/tests/test_extras.py b/venv.bak/lib/python3.7/site-packages/numpy/ma/tests/test_extras.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/ma/tests/test_extras.py rename to venv.bak/lib/python3.7/site-packages/numpy/ma/tests/test_extras.py diff --git a/venv/lib/python3.7/site-packages/numpy/ma/tests/test_mrecords.py b/venv.bak/lib/python3.7/site-packages/numpy/ma/tests/test_mrecords.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/ma/tests/test_mrecords.py rename to venv.bak/lib/python3.7/site-packages/numpy/ma/tests/test_mrecords.py diff --git a/venv/lib/python3.7/site-packages/numpy/ma/tests/test_old_ma.py b/venv.bak/lib/python3.7/site-packages/numpy/ma/tests/test_old_ma.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/ma/tests/test_old_ma.py rename to venv.bak/lib/python3.7/site-packages/numpy/ma/tests/test_old_ma.py diff --git a/venv/lib/python3.7/site-packages/numpy/ma/tests/test_regression.py b/venv.bak/lib/python3.7/site-packages/numpy/ma/tests/test_regression.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/ma/tests/test_regression.py rename to venv.bak/lib/python3.7/site-packages/numpy/ma/tests/test_regression.py diff --git a/venv/lib/python3.7/site-packages/numpy/ma/tests/test_subclassing.py b/venv.bak/lib/python3.7/site-packages/numpy/ma/tests/test_subclassing.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/ma/tests/test_subclassing.py rename to venv.bak/lib/python3.7/site-packages/numpy/ma/tests/test_subclassing.py diff --git a/venv/lib/python3.7/site-packages/numpy/ma/testutils.py b/venv.bak/lib/python3.7/site-packages/numpy/ma/testutils.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/ma/testutils.py rename to venv.bak/lib/python3.7/site-packages/numpy/ma/testutils.py diff --git a/venv/lib/python3.7/site-packages/numpy/ma/timer_comparison.py b/venv.bak/lib/python3.7/site-packages/numpy/ma/timer_comparison.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/ma/timer_comparison.py rename to venv.bak/lib/python3.7/site-packages/numpy/ma/timer_comparison.py diff --git a/venv/lib/python3.7/site-packages/numpy/matlib.py b/venv.bak/lib/python3.7/site-packages/numpy/matlib.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/matlib.py rename to venv.bak/lib/python3.7/site-packages/numpy/matlib.py diff --git a/venv/lib/python3.7/site-packages/numpy/matrixlib/__init__.py b/venv.bak/lib/python3.7/site-packages/numpy/matrixlib/__init__.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/matrixlib/__init__.py rename to venv.bak/lib/python3.7/site-packages/numpy/matrixlib/__init__.py diff --git a/venv/lib/python3.7/site-packages/numpy/matrixlib/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/matrixlib/__pycache__/__init__.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/matrixlib/__pycache__/__init__.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/matrixlib/__pycache__/__init__.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/matrixlib/__pycache__/defmatrix.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/matrixlib/__pycache__/defmatrix.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/matrixlib/__pycache__/defmatrix.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/matrixlib/__pycache__/defmatrix.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/matrixlib/__pycache__/setup.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/matrixlib/__pycache__/setup.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/matrixlib/__pycache__/setup.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/matrixlib/__pycache__/setup.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/matrixlib/defmatrix.py b/venv.bak/lib/python3.7/site-packages/numpy/matrixlib/defmatrix.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/matrixlib/defmatrix.py rename to venv.bak/lib/python3.7/site-packages/numpy/matrixlib/defmatrix.py diff --git a/venv/lib/python3.7/site-packages/numpy/matrixlib/setup.py b/venv.bak/lib/python3.7/site-packages/numpy/matrixlib/setup.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/matrixlib/setup.py rename to venv.bak/lib/python3.7/site-packages/numpy/matrixlib/setup.py diff --git a/venv/lib/python3.7/site-packages/numpy/testing/_private/__init__.py b/venv.bak/lib/python3.7/site-packages/numpy/matrixlib/tests/__init__.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/testing/_private/__init__.py rename to venv.bak/lib/python3.7/site-packages/numpy/matrixlib/tests/__init__.py diff --git a/venv/lib/python3.7/site-packages/numpy/matrixlib/tests/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/matrixlib/tests/__pycache__/__init__.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/matrixlib/tests/__pycache__/__init__.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/matrixlib/tests/__pycache__/__init__.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/matrixlib/tests/__pycache__/test_defmatrix.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/matrixlib/tests/__pycache__/test_defmatrix.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/matrixlib/tests/__pycache__/test_defmatrix.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/matrixlib/tests/__pycache__/test_defmatrix.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/matrixlib/tests/__pycache__/test_interaction.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/matrixlib/tests/__pycache__/test_interaction.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/matrixlib/tests/__pycache__/test_interaction.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/matrixlib/tests/__pycache__/test_interaction.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/matrixlib/tests/__pycache__/test_masked_matrix.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/matrixlib/tests/__pycache__/test_masked_matrix.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/matrixlib/tests/__pycache__/test_masked_matrix.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/matrixlib/tests/__pycache__/test_masked_matrix.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/matrixlib/tests/__pycache__/test_matrix_linalg.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/matrixlib/tests/__pycache__/test_matrix_linalg.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/matrixlib/tests/__pycache__/test_matrix_linalg.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/matrixlib/tests/__pycache__/test_matrix_linalg.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/matrixlib/tests/__pycache__/test_multiarray.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/matrixlib/tests/__pycache__/test_multiarray.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/matrixlib/tests/__pycache__/test_multiarray.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/matrixlib/tests/__pycache__/test_multiarray.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/matrixlib/tests/__pycache__/test_numeric.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/matrixlib/tests/__pycache__/test_numeric.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/matrixlib/tests/__pycache__/test_numeric.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/matrixlib/tests/__pycache__/test_numeric.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/matrixlib/tests/__pycache__/test_regression.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/matrixlib/tests/__pycache__/test_regression.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/matrixlib/tests/__pycache__/test_regression.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/matrixlib/tests/__pycache__/test_regression.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/matrixlib/tests/test_defmatrix.py b/venv.bak/lib/python3.7/site-packages/numpy/matrixlib/tests/test_defmatrix.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/matrixlib/tests/test_defmatrix.py rename to venv.bak/lib/python3.7/site-packages/numpy/matrixlib/tests/test_defmatrix.py diff --git a/venv/lib/python3.7/site-packages/numpy/matrixlib/tests/test_interaction.py b/venv.bak/lib/python3.7/site-packages/numpy/matrixlib/tests/test_interaction.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/matrixlib/tests/test_interaction.py rename to venv.bak/lib/python3.7/site-packages/numpy/matrixlib/tests/test_interaction.py diff --git a/venv/lib/python3.7/site-packages/numpy/matrixlib/tests/test_masked_matrix.py b/venv.bak/lib/python3.7/site-packages/numpy/matrixlib/tests/test_masked_matrix.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/matrixlib/tests/test_masked_matrix.py rename to venv.bak/lib/python3.7/site-packages/numpy/matrixlib/tests/test_masked_matrix.py diff --git a/venv/lib/python3.7/site-packages/numpy/matrixlib/tests/test_matrix_linalg.py b/venv.bak/lib/python3.7/site-packages/numpy/matrixlib/tests/test_matrix_linalg.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/matrixlib/tests/test_matrix_linalg.py rename to venv.bak/lib/python3.7/site-packages/numpy/matrixlib/tests/test_matrix_linalg.py diff --git a/venv/lib/python3.7/site-packages/numpy/matrixlib/tests/test_multiarray.py b/venv.bak/lib/python3.7/site-packages/numpy/matrixlib/tests/test_multiarray.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/matrixlib/tests/test_multiarray.py rename to venv.bak/lib/python3.7/site-packages/numpy/matrixlib/tests/test_multiarray.py diff --git a/venv/lib/python3.7/site-packages/numpy/matrixlib/tests/test_numeric.py b/venv.bak/lib/python3.7/site-packages/numpy/matrixlib/tests/test_numeric.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/matrixlib/tests/test_numeric.py rename to venv.bak/lib/python3.7/site-packages/numpy/matrixlib/tests/test_numeric.py diff --git a/venv/lib/python3.7/site-packages/numpy/matrixlib/tests/test_regression.py b/venv.bak/lib/python3.7/site-packages/numpy/matrixlib/tests/test_regression.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/matrixlib/tests/test_regression.py rename to venv.bak/lib/python3.7/site-packages/numpy/matrixlib/tests/test_regression.py diff --git a/venv/lib/python3.7/site-packages/numpy/polynomial/__init__.py b/venv.bak/lib/python3.7/site-packages/numpy/polynomial/__init__.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/polynomial/__init__.py rename to venv.bak/lib/python3.7/site-packages/numpy/polynomial/__init__.py diff --git a/venv/lib/python3.7/site-packages/numpy/polynomial/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/polynomial/__pycache__/__init__.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/polynomial/__pycache__/__init__.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/polynomial/__pycache__/__init__.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/polynomial/__pycache__/_polybase.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/polynomial/__pycache__/_polybase.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/polynomial/__pycache__/_polybase.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/polynomial/__pycache__/_polybase.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/polynomial/__pycache__/chebyshev.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/polynomial/__pycache__/chebyshev.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/polynomial/__pycache__/chebyshev.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/polynomial/__pycache__/chebyshev.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/polynomial/__pycache__/hermite.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/polynomial/__pycache__/hermite.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/polynomial/__pycache__/hermite.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/polynomial/__pycache__/hermite.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/polynomial/__pycache__/hermite_e.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/polynomial/__pycache__/hermite_e.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/polynomial/__pycache__/hermite_e.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/polynomial/__pycache__/hermite_e.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/polynomial/__pycache__/laguerre.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/polynomial/__pycache__/laguerre.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/polynomial/__pycache__/laguerre.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/polynomial/__pycache__/laguerre.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/polynomial/__pycache__/legendre.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/polynomial/__pycache__/legendre.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/polynomial/__pycache__/legendre.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/polynomial/__pycache__/legendre.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/polynomial/__pycache__/polynomial.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/polynomial/__pycache__/polynomial.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/polynomial/__pycache__/polynomial.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/polynomial/__pycache__/polynomial.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/polynomial/__pycache__/polyutils.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/polynomial/__pycache__/polyutils.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/polynomial/__pycache__/polyutils.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/polynomial/__pycache__/polyutils.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/polynomial/__pycache__/setup.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/polynomial/__pycache__/setup.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/polynomial/__pycache__/setup.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/polynomial/__pycache__/setup.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/polynomial/_polybase.py b/venv.bak/lib/python3.7/site-packages/numpy/polynomial/_polybase.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/polynomial/_polybase.py rename to venv.bak/lib/python3.7/site-packages/numpy/polynomial/_polybase.py diff --git a/venv/lib/python3.7/site-packages/numpy/polynomial/chebyshev.py b/venv.bak/lib/python3.7/site-packages/numpy/polynomial/chebyshev.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/polynomial/chebyshev.py rename to venv.bak/lib/python3.7/site-packages/numpy/polynomial/chebyshev.py diff --git a/venv/lib/python3.7/site-packages/numpy/polynomial/hermite.py b/venv.bak/lib/python3.7/site-packages/numpy/polynomial/hermite.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/polynomial/hermite.py rename to venv.bak/lib/python3.7/site-packages/numpy/polynomial/hermite.py diff --git a/venv/lib/python3.7/site-packages/numpy/polynomial/hermite_e.py b/venv.bak/lib/python3.7/site-packages/numpy/polynomial/hermite_e.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/polynomial/hermite_e.py rename to venv.bak/lib/python3.7/site-packages/numpy/polynomial/hermite_e.py diff --git a/venv/lib/python3.7/site-packages/numpy/polynomial/laguerre.py b/venv.bak/lib/python3.7/site-packages/numpy/polynomial/laguerre.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/polynomial/laguerre.py rename to venv.bak/lib/python3.7/site-packages/numpy/polynomial/laguerre.py diff --git a/venv/lib/python3.7/site-packages/numpy/polynomial/legendre.py b/venv.bak/lib/python3.7/site-packages/numpy/polynomial/legendre.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/polynomial/legendre.py rename to venv.bak/lib/python3.7/site-packages/numpy/polynomial/legendre.py diff --git a/venv/lib/python3.7/site-packages/numpy/polynomial/polynomial.py b/venv.bak/lib/python3.7/site-packages/numpy/polynomial/polynomial.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/polynomial/polynomial.py rename to venv.bak/lib/python3.7/site-packages/numpy/polynomial/polynomial.py diff --git a/venv/lib/python3.7/site-packages/numpy/polynomial/polyutils.py b/venv.bak/lib/python3.7/site-packages/numpy/polynomial/polyutils.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/polynomial/polyutils.py rename to venv.bak/lib/python3.7/site-packages/numpy/polynomial/polyutils.py diff --git a/venv/lib/python3.7/site-packages/numpy/polynomial/setup.py b/venv.bak/lib/python3.7/site-packages/numpy/polynomial/setup.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/polynomial/setup.py rename to venv.bak/lib/python3.7/site-packages/numpy/polynomial/setup.py diff --git a/venv/lib/python3.7/site-packages/numpy/testing/tests/__init__.py b/venv.bak/lib/python3.7/site-packages/numpy/polynomial/tests/__init__.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/testing/tests/__init__.py rename to venv.bak/lib/python3.7/site-packages/numpy/polynomial/tests/__init__.py diff --git a/venv/lib/python3.7/site-packages/numpy/polynomial/tests/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/polynomial/tests/__pycache__/__init__.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/polynomial/tests/__pycache__/__init__.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/polynomial/tests/__pycache__/__init__.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/polynomial/tests/__pycache__/test_chebyshev.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/polynomial/tests/__pycache__/test_chebyshev.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/polynomial/tests/__pycache__/test_chebyshev.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/polynomial/tests/__pycache__/test_chebyshev.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/polynomial/tests/__pycache__/test_classes.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/polynomial/tests/__pycache__/test_classes.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/polynomial/tests/__pycache__/test_classes.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/polynomial/tests/__pycache__/test_classes.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/polynomial/tests/__pycache__/test_hermite.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/polynomial/tests/__pycache__/test_hermite.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/polynomial/tests/__pycache__/test_hermite.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/polynomial/tests/__pycache__/test_hermite.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/polynomial/tests/__pycache__/test_hermite_e.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/polynomial/tests/__pycache__/test_hermite_e.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/polynomial/tests/__pycache__/test_hermite_e.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/polynomial/tests/__pycache__/test_hermite_e.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/polynomial/tests/__pycache__/test_laguerre.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/polynomial/tests/__pycache__/test_laguerre.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/polynomial/tests/__pycache__/test_laguerre.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/polynomial/tests/__pycache__/test_laguerre.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/polynomial/tests/__pycache__/test_legendre.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/polynomial/tests/__pycache__/test_legendre.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/polynomial/tests/__pycache__/test_legendre.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/polynomial/tests/__pycache__/test_legendre.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/polynomial/tests/__pycache__/test_polynomial.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/polynomial/tests/__pycache__/test_polynomial.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/polynomial/tests/__pycache__/test_polynomial.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/polynomial/tests/__pycache__/test_polynomial.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/polynomial/tests/__pycache__/test_polyutils.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/polynomial/tests/__pycache__/test_polyutils.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/polynomial/tests/__pycache__/test_polyutils.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/polynomial/tests/__pycache__/test_polyutils.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/polynomial/tests/__pycache__/test_printing.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/polynomial/tests/__pycache__/test_printing.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/polynomial/tests/__pycache__/test_printing.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/polynomial/tests/__pycache__/test_printing.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/polynomial/tests/test_chebyshev.py b/venv.bak/lib/python3.7/site-packages/numpy/polynomial/tests/test_chebyshev.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/polynomial/tests/test_chebyshev.py rename to venv.bak/lib/python3.7/site-packages/numpy/polynomial/tests/test_chebyshev.py diff --git a/venv/lib/python3.7/site-packages/numpy/polynomial/tests/test_classes.py b/venv.bak/lib/python3.7/site-packages/numpy/polynomial/tests/test_classes.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/polynomial/tests/test_classes.py rename to venv.bak/lib/python3.7/site-packages/numpy/polynomial/tests/test_classes.py diff --git a/venv/lib/python3.7/site-packages/numpy/polynomial/tests/test_hermite.py b/venv.bak/lib/python3.7/site-packages/numpy/polynomial/tests/test_hermite.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/polynomial/tests/test_hermite.py rename to venv.bak/lib/python3.7/site-packages/numpy/polynomial/tests/test_hermite.py diff --git a/venv/lib/python3.7/site-packages/numpy/polynomial/tests/test_hermite_e.py b/venv.bak/lib/python3.7/site-packages/numpy/polynomial/tests/test_hermite_e.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/polynomial/tests/test_hermite_e.py rename to venv.bak/lib/python3.7/site-packages/numpy/polynomial/tests/test_hermite_e.py diff --git a/venv/lib/python3.7/site-packages/numpy/polynomial/tests/test_laguerre.py b/venv.bak/lib/python3.7/site-packages/numpy/polynomial/tests/test_laguerre.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/polynomial/tests/test_laguerre.py rename to venv.bak/lib/python3.7/site-packages/numpy/polynomial/tests/test_laguerre.py diff --git a/venv/lib/python3.7/site-packages/numpy/polynomial/tests/test_legendre.py b/venv.bak/lib/python3.7/site-packages/numpy/polynomial/tests/test_legendre.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/polynomial/tests/test_legendre.py rename to venv.bak/lib/python3.7/site-packages/numpy/polynomial/tests/test_legendre.py diff --git a/venv/lib/python3.7/site-packages/numpy/polynomial/tests/test_polynomial.py b/venv.bak/lib/python3.7/site-packages/numpy/polynomial/tests/test_polynomial.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/polynomial/tests/test_polynomial.py rename to venv.bak/lib/python3.7/site-packages/numpy/polynomial/tests/test_polynomial.py diff --git a/venv/lib/python3.7/site-packages/numpy/polynomial/tests/test_polyutils.py b/venv.bak/lib/python3.7/site-packages/numpy/polynomial/tests/test_polyutils.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/polynomial/tests/test_polyutils.py rename to venv.bak/lib/python3.7/site-packages/numpy/polynomial/tests/test_polyutils.py diff --git a/venv/lib/python3.7/site-packages/numpy/polynomial/tests/test_printing.py b/venv.bak/lib/python3.7/site-packages/numpy/polynomial/tests/test_printing.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/polynomial/tests/test_printing.py rename to venv.bak/lib/python3.7/site-packages/numpy/polynomial/tests/test_printing.py diff --git a/venv/lib/python3.7/site-packages/numpy/random/__init__.pxd b/venv.bak/lib/python3.7/site-packages/numpy/random/__init__.pxd similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/random/__init__.pxd rename to venv.bak/lib/python3.7/site-packages/numpy/random/__init__.pxd diff --git a/venv/lib/python3.7/site-packages/numpy/random/__init__.py b/venv.bak/lib/python3.7/site-packages/numpy/random/__init__.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/random/__init__.py rename to venv.bak/lib/python3.7/site-packages/numpy/random/__init__.py diff --git a/venv/lib/python3.7/site-packages/numpy/random/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/random/__pycache__/__init__.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/random/__pycache__/__init__.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/random/__pycache__/__init__.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/random/__pycache__/_pickle.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/random/__pycache__/_pickle.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/random/__pycache__/_pickle.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/random/__pycache__/_pickle.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/random/__pycache__/setup.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/random/__pycache__/setup.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/random/__pycache__/setup.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/random/__pycache__/setup.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/random/_bit_generator.cpython-37m-darwin.so b/venv.bak/lib/python3.7/site-packages/numpy/random/_bit_generator.cpython-37m-darwin.so similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/random/_bit_generator.cpython-37m-darwin.so rename to venv.bak/lib/python3.7/site-packages/numpy/random/_bit_generator.cpython-37m-darwin.so diff --git a/venv/lib/python3.7/site-packages/numpy/random/_bit_generator.pxd b/venv.bak/lib/python3.7/site-packages/numpy/random/_bit_generator.pxd similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/random/_bit_generator.pxd rename to venv.bak/lib/python3.7/site-packages/numpy/random/_bit_generator.pxd diff --git a/venv/lib/python3.7/site-packages/numpy/random/_bounded_integers.cpython-37m-darwin.so b/venv.bak/lib/python3.7/site-packages/numpy/random/_bounded_integers.cpython-37m-darwin.so similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/random/_bounded_integers.cpython-37m-darwin.so rename to venv.bak/lib/python3.7/site-packages/numpy/random/_bounded_integers.cpython-37m-darwin.so diff --git a/venv/lib/python3.7/site-packages/numpy/random/_bounded_integers.pxd b/venv.bak/lib/python3.7/site-packages/numpy/random/_bounded_integers.pxd similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/random/_bounded_integers.pxd rename to venv.bak/lib/python3.7/site-packages/numpy/random/_bounded_integers.pxd diff --git a/venv/lib/python3.7/site-packages/numpy/random/_common.cpython-37m-darwin.so b/venv.bak/lib/python3.7/site-packages/numpy/random/_common.cpython-37m-darwin.so similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/random/_common.cpython-37m-darwin.so rename to venv.bak/lib/python3.7/site-packages/numpy/random/_common.cpython-37m-darwin.so diff --git a/venv/lib/python3.7/site-packages/numpy/random/_common.pxd b/venv.bak/lib/python3.7/site-packages/numpy/random/_common.pxd similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/random/_common.pxd rename to venv.bak/lib/python3.7/site-packages/numpy/random/_common.pxd diff --git a/venv/lib/python3.7/site-packages/numpy/random/_examples/cffi/__pycache__/extending.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/random/_examples/cffi/__pycache__/extending.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/random/_examples/cffi/__pycache__/extending.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/random/_examples/cffi/__pycache__/extending.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/random/_examples/cffi/__pycache__/parse.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/random/_examples/cffi/__pycache__/parse.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/random/_examples/cffi/__pycache__/parse.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/random/_examples/cffi/__pycache__/parse.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/random/_examples/cffi/extending.py b/venv.bak/lib/python3.7/site-packages/numpy/random/_examples/cffi/extending.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/random/_examples/cffi/extending.py rename to venv.bak/lib/python3.7/site-packages/numpy/random/_examples/cffi/extending.py diff --git a/venv/lib/python3.7/site-packages/numpy/random/_examples/cffi/parse.py b/venv.bak/lib/python3.7/site-packages/numpy/random/_examples/cffi/parse.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/random/_examples/cffi/parse.py rename to venv.bak/lib/python3.7/site-packages/numpy/random/_examples/cffi/parse.py diff --git a/venv/lib/python3.7/site-packages/numpy/random/_examples/cython/__pycache__/setup.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/random/_examples/cython/__pycache__/setup.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/random/_examples/cython/__pycache__/setup.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/random/_examples/cython/__pycache__/setup.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/random/_examples/cython/extending.pyx b/venv.bak/lib/python3.7/site-packages/numpy/random/_examples/cython/extending.pyx similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/random/_examples/cython/extending.pyx rename to venv.bak/lib/python3.7/site-packages/numpy/random/_examples/cython/extending.pyx diff --git a/venv/lib/python3.7/site-packages/numpy/random/_examples/cython/extending_distributions.pyx b/venv.bak/lib/python3.7/site-packages/numpy/random/_examples/cython/extending_distributions.pyx similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/random/_examples/cython/extending_distributions.pyx rename to venv.bak/lib/python3.7/site-packages/numpy/random/_examples/cython/extending_distributions.pyx diff --git a/venv/lib/python3.7/site-packages/numpy/random/_examples/cython/setup.py b/venv.bak/lib/python3.7/site-packages/numpy/random/_examples/cython/setup.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/random/_examples/cython/setup.py rename to venv.bak/lib/python3.7/site-packages/numpy/random/_examples/cython/setup.py diff --git a/venv/lib/python3.7/site-packages/numpy/random/_examples/numba/__pycache__/extending.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/random/_examples/numba/__pycache__/extending.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/random/_examples/numba/__pycache__/extending.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/random/_examples/numba/__pycache__/extending.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/random/_examples/numba/__pycache__/extending_distributions.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/random/_examples/numba/__pycache__/extending_distributions.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/random/_examples/numba/__pycache__/extending_distributions.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/random/_examples/numba/__pycache__/extending_distributions.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/random/_examples/numba/extending.py b/venv.bak/lib/python3.7/site-packages/numpy/random/_examples/numba/extending.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/random/_examples/numba/extending.py rename to venv.bak/lib/python3.7/site-packages/numpy/random/_examples/numba/extending.py diff --git a/venv/lib/python3.7/site-packages/numpy/random/_examples/numba/extending_distributions.py b/venv.bak/lib/python3.7/site-packages/numpy/random/_examples/numba/extending_distributions.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/random/_examples/numba/extending_distributions.py rename to venv.bak/lib/python3.7/site-packages/numpy/random/_examples/numba/extending_distributions.py diff --git a/venv/lib/python3.7/site-packages/numpy/random/_generator.cpython-37m-darwin.so b/venv.bak/lib/python3.7/site-packages/numpy/random/_generator.cpython-37m-darwin.so similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/random/_generator.cpython-37m-darwin.so rename to venv.bak/lib/python3.7/site-packages/numpy/random/_generator.cpython-37m-darwin.so diff --git a/venv/lib/python3.7/site-packages/numpy/random/_mt19937.cpython-37m-darwin.so b/venv.bak/lib/python3.7/site-packages/numpy/random/_mt19937.cpython-37m-darwin.so similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/random/_mt19937.cpython-37m-darwin.so rename to venv.bak/lib/python3.7/site-packages/numpy/random/_mt19937.cpython-37m-darwin.so diff --git a/venv/lib/python3.7/site-packages/numpy/random/_pcg64.cpython-37m-darwin.so b/venv.bak/lib/python3.7/site-packages/numpy/random/_pcg64.cpython-37m-darwin.so similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/random/_pcg64.cpython-37m-darwin.so rename to venv.bak/lib/python3.7/site-packages/numpy/random/_pcg64.cpython-37m-darwin.so diff --git a/venv/lib/python3.7/site-packages/numpy/random/_philox.cpython-37m-darwin.so b/venv.bak/lib/python3.7/site-packages/numpy/random/_philox.cpython-37m-darwin.so similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/random/_philox.cpython-37m-darwin.so rename to venv.bak/lib/python3.7/site-packages/numpy/random/_philox.cpython-37m-darwin.so diff --git a/venv/lib/python3.7/site-packages/numpy/random/_pickle.py b/venv.bak/lib/python3.7/site-packages/numpy/random/_pickle.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/random/_pickle.py rename to venv.bak/lib/python3.7/site-packages/numpy/random/_pickle.py diff --git a/venv/lib/python3.7/site-packages/numpy/random/_sfc64.cpython-37m-darwin.so b/venv.bak/lib/python3.7/site-packages/numpy/random/_sfc64.cpython-37m-darwin.so similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/random/_sfc64.cpython-37m-darwin.so rename to venv.bak/lib/python3.7/site-packages/numpy/random/_sfc64.cpython-37m-darwin.so diff --git a/venv/lib/python3.7/site-packages/numpy/random/mtrand.cpython-37m-darwin.so b/venv.bak/lib/python3.7/site-packages/numpy/random/mtrand.cpython-37m-darwin.so similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/random/mtrand.cpython-37m-darwin.so rename to venv.bak/lib/python3.7/site-packages/numpy/random/mtrand.cpython-37m-darwin.so diff --git a/venv/lib/python3.7/site-packages/numpy/random/setup.py b/venv.bak/lib/python3.7/site-packages/numpy/random/setup.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/random/setup.py rename to venv.bak/lib/python3.7/site-packages/numpy/random/setup.py diff --git a/venv/lib/python3.7/site-packages/numpy/tests/__init__.py b/venv.bak/lib/python3.7/site-packages/numpy/random/tests/__init__.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/tests/__init__.py rename to venv.bak/lib/python3.7/site-packages/numpy/random/tests/__init__.py diff --git a/venv/lib/python3.7/site-packages/numpy/random/tests/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/random/tests/__pycache__/__init__.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/random/tests/__pycache__/__init__.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/random/tests/__pycache__/__init__.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/random/tests/__pycache__/test_direct.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/random/tests/__pycache__/test_direct.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/random/tests/__pycache__/test_direct.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/random/tests/__pycache__/test_direct.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/random/tests/__pycache__/test_extending.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/random/tests/__pycache__/test_extending.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/random/tests/__pycache__/test_extending.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/random/tests/__pycache__/test_extending.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/random/tests/__pycache__/test_generator_mt19937.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/random/tests/__pycache__/test_generator_mt19937.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/random/tests/__pycache__/test_generator_mt19937.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/random/tests/__pycache__/test_generator_mt19937.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/random/tests/__pycache__/test_generator_mt19937_regressions.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/random/tests/__pycache__/test_generator_mt19937_regressions.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/random/tests/__pycache__/test_generator_mt19937_regressions.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/random/tests/__pycache__/test_generator_mt19937_regressions.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/random/tests/__pycache__/test_random.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/random/tests/__pycache__/test_random.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/random/tests/__pycache__/test_random.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/random/tests/__pycache__/test_random.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/random/tests/__pycache__/test_randomstate.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/random/tests/__pycache__/test_randomstate.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/random/tests/__pycache__/test_randomstate.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/random/tests/__pycache__/test_randomstate.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/random/tests/__pycache__/test_randomstate_regression.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/random/tests/__pycache__/test_randomstate_regression.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/random/tests/__pycache__/test_randomstate_regression.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/random/tests/__pycache__/test_randomstate_regression.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/random/tests/__pycache__/test_regression.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/random/tests/__pycache__/test_regression.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/random/tests/__pycache__/test_regression.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/random/tests/__pycache__/test_regression.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/random/tests/__pycache__/test_seed_sequence.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/random/tests/__pycache__/test_seed_sequence.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/random/tests/__pycache__/test_seed_sequence.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/random/tests/__pycache__/test_seed_sequence.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/random/tests/__pycache__/test_smoke.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/random/tests/__pycache__/test_smoke.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/random/tests/__pycache__/test_smoke.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/random/tests/__pycache__/test_smoke.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_internal/operations/__init__.py b/venv.bak/lib/python3.7/site-packages/numpy/random/tests/data/__init__.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_internal/operations/__init__.py rename to venv.bak/lib/python3.7/site-packages/numpy/random/tests/data/__init__.py diff --git a/venv/lib/python3.7/site-packages/numpy/random/tests/data/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/random/tests/data/__pycache__/__init__.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/random/tests/data/__pycache__/__init__.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/random/tests/data/__pycache__/__init__.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/random/tests/data/mt19937-testset-1.csv b/venv.bak/lib/python3.7/site-packages/numpy/random/tests/data/mt19937-testset-1.csv similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/random/tests/data/mt19937-testset-1.csv rename to venv.bak/lib/python3.7/site-packages/numpy/random/tests/data/mt19937-testset-1.csv diff --git a/venv/lib/python3.7/site-packages/numpy/random/tests/data/mt19937-testset-2.csv b/venv.bak/lib/python3.7/site-packages/numpy/random/tests/data/mt19937-testset-2.csv similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/random/tests/data/mt19937-testset-2.csv rename to venv.bak/lib/python3.7/site-packages/numpy/random/tests/data/mt19937-testset-2.csv diff --git a/venv/lib/python3.7/site-packages/numpy/random/tests/data/pcg64-testset-1.csv b/venv.bak/lib/python3.7/site-packages/numpy/random/tests/data/pcg64-testset-1.csv similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/random/tests/data/pcg64-testset-1.csv rename to venv.bak/lib/python3.7/site-packages/numpy/random/tests/data/pcg64-testset-1.csv diff --git a/venv/lib/python3.7/site-packages/numpy/random/tests/data/pcg64-testset-2.csv b/venv.bak/lib/python3.7/site-packages/numpy/random/tests/data/pcg64-testset-2.csv similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/random/tests/data/pcg64-testset-2.csv rename to venv.bak/lib/python3.7/site-packages/numpy/random/tests/data/pcg64-testset-2.csv diff --git a/venv/lib/python3.7/site-packages/numpy/random/tests/data/philox-testset-1.csv b/venv.bak/lib/python3.7/site-packages/numpy/random/tests/data/philox-testset-1.csv similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/random/tests/data/philox-testset-1.csv rename to venv.bak/lib/python3.7/site-packages/numpy/random/tests/data/philox-testset-1.csv diff --git a/venv/lib/python3.7/site-packages/numpy/random/tests/data/philox-testset-2.csv b/venv.bak/lib/python3.7/site-packages/numpy/random/tests/data/philox-testset-2.csv similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/random/tests/data/philox-testset-2.csv rename to venv.bak/lib/python3.7/site-packages/numpy/random/tests/data/philox-testset-2.csv diff --git a/venv/lib/python3.7/site-packages/numpy/random/tests/data/sfc64-testset-1.csv b/venv.bak/lib/python3.7/site-packages/numpy/random/tests/data/sfc64-testset-1.csv similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/random/tests/data/sfc64-testset-1.csv rename to venv.bak/lib/python3.7/site-packages/numpy/random/tests/data/sfc64-testset-1.csv diff --git a/venv/lib/python3.7/site-packages/numpy/random/tests/data/sfc64-testset-2.csv b/venv.bak/lib/python3.7/site-packages/numpy/random/tests/data/sfc64-testset-2.csv similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/random/tests/data/sfc64-testset-2.csv rename to venv.bak/lib/python3.7/site-packages/numpy/random/tests/data/sfc64-testset-2.csv diff --git a/venv/lib/python3.7/site-packages/numpy/random/tests/test_direct.py b/venv.bak/lib/python3.7/site-packages/numpy/random/tests/test_direct.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/random/tests/test_direct.py rename to venv.bak/lib/python3.7/site-packages/numpy/random/tests/test_direct.py diff --git a/venv/lib/python3.7/site-packages/numpy/random/tests/test_extending.py b/venv.bak/lib/python3.7/site-packages/numpy/random/tests/test_extending.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/random/tests/test_extending.py rename to venv.bak/lib/python3.7/site-packages/numpy/random/tests/test_extending.py diff --git a/venv/lib/python3.7/site-packages/numpy/random/tests/test_generator_mt19937.py b/venv.bak/lib/python3.7/site-packages/numpy/random/tests/test_generator_mt19937.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/random/tests/test_generator_mt19937.py rename to venv.bak/lib/python3.7/site-packages/numpy/random/tests/test_generator_mt19937.py diff --git a/venv/lib/python3.7/site-packages/numpy/random/tests/test_generator_mt19937_regressions.py b/venv.bak/lib/python3.7/site-packages/numpy/random/tests/test_generator_mt19937_regressions.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/random/tests/test_generator_mt19937_regressions.py rename to venv.bak/lib/python3.7/site-packages/numpy/random/tests/test_generator_mt19937_regressions.py diff --git a/venv/lib/python3.7/site-packages/numpy/random/tests/test_random.py b/venv.bak/lib/python3.7/site-packages/numpy/random/tests/test_random.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/random/tests/test_random.py rename to venv.bak/lib/python3.7/site-packages/numpy/random/tests/test_random.py diff --git a/venv/lib/python3.7/site-packages/numpy/random/tests/test_randomstate.py b/venv.bak/lib/python3.7/site-packages/numpy/random/tests/test_randomstate.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/random/tests/test_randomstate.py rename to venv.bak/lib/python3.7/site-packages/numpy/random/tests/test_randomstate.py diff --git a/venv/lib/python3.7/site-packages/numpy/random/tests/test_randomstate_regression.py b/venv.bak/lib/python3.7/site-packages/numpy/random/tests/test_randomstate_regression.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/random/tests/test_randomstate_regression.py rename to venv.bak/lib/python3.7/site-packages/numpy/random/tests/test_randomstate_regression.py diff --git a/venv/lib/python3.7/site-packages/numpy/random/tests/test_regression.py b/venv.bak/lib/python3.7/site-packages/numpy/random/tests/test_regression.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/random/tests/test_regression.py rename to venv.bak/lib/python3.7/site-packages/numpy/random/tests/test_regression.py diff --git a/venv/lib/python3.7/site-packages/numpy/random/tests/test_seed_sequence.py b/venv.bak/lib/python3.7/site-packages/numpy/random/tests/test_seed_sequence.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/random/tests/test_seed_sequence.py rename to venv.bak/lib/python3.7/site-packages/numpy/random/tests/test_seed_sequence.py diff --git a/venv/lib/python3.7/site-packages/numpy/random/tests/test_smoke.py b/venv.bak/lib/python3.7/site-packages/numpy/random/tests/test_smoke.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/random/tests/test_smoke.py rename to venv.bak/lib/python3.7/site-packages/numpy/random/tests/test_smoke.py diff --git a/venv/lib/python3.7/site-packages/numpy/setup.py b/venv.bak/lib/python3.7/site-packages/numpy/setup.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/setup.py rename to venv.bak/lib/python3.7/site-packages/numpy/setup.py diff --git a/venv/lib/python3.7/site-packages/numpy/testing/__init__.py b/venv.bak/lib/python3.7/site-packages/numpy/testing/__init__.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/testing/__init__.py rename to venv.bak/lib/python3.7/site-packages/numpy/testing/__init__.py diff --git a/venv/lib/python3.7/site-packages/numpy/testing/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/testing/__pycache__/__init__.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/testing/__pycache__/__init__.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/testing/__pycache__/__init__.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/testing/__pycache__/print_coercion_tables.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/testing/__pycache__/print_coercion_tables.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/testing/__pycache__/print_coercion_tables.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/testing/__pycache__/print_coercion_tables.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/testing/__pycache__/setup.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/testing/__pycache__/setup.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/testing/__pycache__/setup.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/testing/__pycache__/setup.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/testing/__pycache__/utils.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/testing/__pycache__/utils.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/testing/__pycache__/utils.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/testing/__pycache__/utils.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_internal/utils/__init__.py b/venv.bak/lib/python3.7/site-packages/numpy/testing/_private/__init__.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_internal/utils/__init__.py rename to venv.bak/lib/python3.7/site-packages/numpy/testing/_private/__init__.py diff --git a/venv/lib/python3.7/site-packages/numpy/testing/_private/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/testing/_private/__pycache__/__init__.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/testing/_private/__pycache__/__init__.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/testing/_private/__pycache__/__init__.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/testing/_private/__pycache__/decorators.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/testing/_private/__pycache__/decorators.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/testing/_private/__pycache__/decorators.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/testing/_private/__pycache__/decorators.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/testing/_private/__pycache__/noseclasses.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/testing/_private/__pycache__/noseclasses.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/testing/_private/__pycache__/noseclasses.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/testing/_private/__pycache__/noseclasses.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/testing/_private/__pycache__/nosetester.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/testing/_private/__pycache__/nosetester.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/testing/_private/__pycache__/nosetester.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/testing/_private/__pycache__/nosetester.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/testing/_private/__pycache__/parameterized.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/testing/_private/__pycache__/parameterized.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/testing/_private/__pycache__/parameterized.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/testing/_private/__pycache__/parameterized.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/testing/_private/__pycache__/utils.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/testing/_private/__pycache__/utils.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/testing/_private/__pycache__/utils.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/testing/_private/__pycache__/utils.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/testing/_private/decorators.py b/venv.bak/lib/python3.7/site-packages/numpy/testing/_private/decorators.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/testing/_private/decorators.py rename to venv.bak/lib/python3.7/site-packages/numpy/testing/_private/decorators.py diff --git a/venv/lib/python3.7/site-packages/numpy/testing/_private/noseclasses.py b/venv.bak/lib/python3.7/site-packages/numpy/testing/_private/noseclasses.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/testing/_private/noseclasses.py rename to venv.bak/lib/python3.7/site-packages/numpy/testing/_private/noseclasses.py diff --git a/venv/lib/python3.7/site-packages/numpy/testing/_private/nosetester.py b/venv.bak/lib/python3.7/site-packages/numpy/testing/_private/nosetester.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/testing/_private/nosetester.py rename to venv.bak/lib/python3.7/site-packages/numpy/testing/_private/nosetester.py diff --git a/venv/lib/python3.7/site-packages/numpy/testing/_private/parameterized.py b/venv.bak/lib/python3.7/site-packages/numpy/testing/_private/parameterized.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/testing/_private/parameterized.py rename to venv.bak/lib/python3.7/site-packages/numpy/testing/_private/parameterized.py diff --git a/venv/lib/python3.7/site-packages/numpy/testing/_private/utils.py b/venv.bak/lib/python3.7/site-packages/numpy/testing/_private/utils.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/testing/_private/utils.py rename to venv.bak/lib/python3.7/site-packages/numpy/testing/_private/utils.py diff --git a/venv/lib/python3.7/site-packages/numpy/testing/print_coercion_tables.py b/venv.bak/lib/python3.7/site-packages/numpy/testing/print_coercion_tables.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/testing/print_coercion_tables.py rename to venv.bak/lib/python3.7/site-packages/numpy/testing/print_coercion_tables.py diff --git a/venv/lib/python3.7/site-packages/numpy/testing/setup.py b/venv.bak/lib/python3.7/site-packages/numpy/testing/setup.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/testing/setup.py rename to venv.bak/lib/python3.7/site-packages/numpy/testing/setup.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/html5lib/filters/__init__.py b/venv.bak/lib/python3.7/site-packages/numpy/testing/tests/__init__.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/html5lib/filters/__init__.py rename to venv.bak/lib/python3.7/site-packages/numpy/testing/tests/__init__.py diff --git a/venv/lib/python3.7/site-packages/numpy/testing/tests/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/testing/tests/__pycache__/__init__.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/testing/tests/__pycache__/__init__.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/testing/tests/__pycache__/__init__.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/testing/tests/__pycache__/test_decorators.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/testing/tests/__pycache__/test_decorators.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/testing/tests/__pycache__/test_decorators.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/testing/tests/__pycache__/test_decorators.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/testing/tests/__pycache__/test_doctesting.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/testing/tests/__pycache__/test_doctesting.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/testing/tests/__pycache__/test_doctesting.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/testing/tests/__pycache__/test_doctesting.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/testing/tests/__pycache__/test_utils.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/testing/tests/__pycache__/test_utils.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/testing/tests/__pycache__/test_utils.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/testing/tests/__pycache__/test_utils.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/testing/tests/test_decorators.py b/venv.bak/lib/python3.7/site-packages/numpy/testing/tests/test_decorators.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/testing/tests/test_decorators.py rename to venv.bak/lib/python3.7/site-packages/numpy/testing/tests/test_decorators.py diff --git a/venv/lib/python3.7/site-packages/numpy/testing/tests/test_doctesting.py b/venv.bak/lib/python3.7/site-packages/numpy/testing/tests/test_doctesting.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/testing/tests/test_doctesting.py rename to venv.bak/lib/python3.7/site-packages/numpy/testing/tests/test_doctesting.py diff --git a/venv/lib/python3.7/site-packages/numpy/testing/tests/test_utils.py b/venv.bak/lib/python3.7/site-packages/numpy/testing/tests/test_utils.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/testing/tests/test_utils.py rename to venv.bak/lib/python3.7/site-packages/numpy/testing/tests/test_utils.py diff --git a/venv/lib/python3.7/site-packages/numpy/testing/utils.py b/venv.bak/lib/python3.7/site-packages/numpy/testing/utils.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/testing/utils.py rename to venv.bak/lib/python3.7/site-packages/numpy/testing/utils.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/urllib3/contrib/__init__.py b/venv.bak/lib/python3.7/site-packages/numpy/tests/__init__.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/urllib3/contrib/__init__.py rename to venv.bak/lib/python3.7/site-packages/numpy/tests/__init__.py diff --git a/venv/lib/python3.7/site-packages/numpy/tests/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/tests/__pycache__/__init__.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/tests/__pycache__/__init__.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/tests/__pycache__/__init__.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/tests/__pycache__/test_ctypeslib.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/tests/__pycache__/test_ctypeslib.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/tests/__pycache__/test_ctypeslib.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/tests/__pycache__/test_ctypeslib.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/tests/__pycache__/test_matlib.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/tests/__pycache__/test_matlib.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/tests/__pycache__/test_matlib.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/tests/__pycache__/test_matlib.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/tests/__pycache__/test_numpy_version.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/tests/__pycache__/test_numpy_version.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/tests/__pycache__/test_numpy_version.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/tests/__pycache__/test_numpy_version.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/tests/__pycache__/test_public_api.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/tests/__pycache__/test_public_api.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/tests/__pycache__/test_public_api.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/tests/__pycache__/test_public_api.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/tests/__pycache__/test_reloading.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/tests/__pycache__/test_reloading.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/tests/__pycache__/test_reloading.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/tests/__pycache__/test_reloading.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/tests/__pycache__/test_scripts.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/tests/__pycache__/test_scripts.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/tests/__pycache__/test_scripts.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/tests/__pycache__/test_scripts.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/tests/__pycache__/test_warnings.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/numpy/tests/__pycache__/test_warnings.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/tests/__pycache__/test_warnings.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/numpy/tests/__pycache__/test_warnings.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/numpy/tests/test_ctypeslib.py b/venv.bak/lib/python3.7/site-packages/numpy/tests/test_ctypeslib.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/tests/test_ctypeslib.py rename to venv.bak/lib/python3.7/site-packages/numpy/tests/test_ctypeslib.py diff --git a/venv/lib/python3.7/site-packages/numpy/tests/test_matlib.py b/venv.bak/lib/python3.7/site-packages/numpy/tests/test_matlib.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/tests/test_matlib.py rename to venv.bak/lib/python3.7/site-packages/numpy/tests/test_matlib.py diff --git a/venv/lib/python3.7/site-packages/numpy/tests/test_numpy_version.py b/venv.bak/lib/python3.7/site-packages/numpy/tests/test_numpy_version.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/tests/test_numpy_version.py rename to venv.bak/lib/python3.7/site-packages/numpy/tests/test_numpy_version.py diff --git a/venv/lib/python3.7/site-packages/numpy/tests/test_public_api.py b/venv.bak/lib/python3.7/site-packages/numpy/tests/test_public_api.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/tests/test_public_api.py rename to venv.bak/lib/python3.7/site-packages/numpy/tests/test_public_api.py diff --git a/venv/lib/python3.7/site-packages/numpy/tests/test_reloading.py b/venv.bak/lib/python3.7/site-packages/numpy/tests/test_reloading.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/tests/test_reloading.py rename to venv.bak/lib/python3.7/site-packages/numpy/tests/test_reloading.py diff --git a/venv/lib/python3.7/site-packages/numpy/tests/test_scripts.py b/venv.bak/lib/python3.7/site-packages/numpy/tests/test_scripts.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/tests/test_scripts.py rename to venv.bak/lib/python3.7/site-packages/numpy/tests/test_scripts.py diff --git a/venv/lib/python3.7/site-packages/numpy/tests/test_warnings.py b/venv.bak/lib/python3.7/site-packages/numpy/tests/test_warnings.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/tests/test_warnings.py rename to venv.bak/lib/python3.7/site-packages/numpy/tests/test_warnings.py diff --git a/venv/lib/python3.7/site-packages/numpy/version.py b/venv.bak/lib/python3.7/site-packages/numpy/version.py similarity index 100% rename from venv/lib/python3.7/site-packages/numpy/version.py rename to venv.bak/lib/python3.7/site-packages/numpy/version.py diff --git a/venv/lib/python3.7/site-packages/original/__pycache__/easy_install.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/__pycache__/easy_install.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/__pycache__/easy_install.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/__pycache__/easy_install.cpython-37.pyc diff --git a/venv.bak/lib/python3.7/site-packages/original/easy_install.py b/venv.bak/lib/python3.7/site-packages/original/easy_install.py new file mode 100644 index 0000000..d87e984 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/original/easy_install.py @@ -0,0 +1,5 @@ +"""Run the EasyInstall command""" + +if __name__ == '__main__': + from setuptools.command.easy_install import main + main() diff --git a/venv/lib/python3.7/site-packages/original/pip-19.0.3.dist-info/INSTALLER b/venv.bak/lib/python3.7/site-packages/original/pip-19.0.3.dist-info/INSTALLER similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip-19.0.3.dist-info/INSTALLER rename to venv.bak/lib/python3.7/site-packages/original/pip-19.0.3.dist-info/INSTALLER diff --git a/venv/lib/python3.7/site-packages/original/pip-19.0.3.dist-info/LICENSE.txt b/venv.bak/lib/python3.7/site-packages/original/pip-19.0.3.dist-info/LICENSE.txt similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip-19.0.3.dist-info/LICENSE.txt rename to venv.bak/lib/python3.7/site-packages/original/pip-19.0.3.dist-info/LICENSE.txt diff --git a/venv/lib/python3.7/site-packages/original/pip-19.0.3.dist-info/METADATA b/venv.bak/lib/python3.7/site-packages/original/pip-19.0.3.dist-info/METADATA similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip-19.0.3.dist-info/METADATA rename to venv.bak/lib/python3.7/site-packages/original/pip-19.0.3.dist-info/METADATA diff --git a/venv/lib/python3.7/site-packages/original/pip-19.0.3.dist-info/RECORD b/venv.bak/lib/python3.7/site-packages/original/pip-19.0.3.dist-info/RECORD similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip-19.0.3.dist-info/RECORD rename to venv.bak/lib/python3.7/site-packages/original/pip-19.0.3.dist-info/RECORD diff --git a/venv/lib/python3.7/site-packages/original/pip-19.0.3.dist-info/WHEEL b/venv.bak/lib/python3.7/site-packages/original/pip-19.0.3.dist-info/WHEEL similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip-19.0.3.dist-info/WHEEL rename to venv.bak/lib/python3.7/site-packages/original/pip-19.0.3.dist-info/WHEEL diff --git a/venv/lib/python3.7/site-packages/original/pip-19.0.3.dist-info/entry_points.txt b/venv.bak/lib/python3.7/site-packages/original/pip-19.0.3.dist-info/entry_points.txt similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip-19.0.3.dist-info/entry_points.txt rename to venv.bak/lib/python3.7/site-packages/original/pip-19.0.3.dist-info/entry_points.txt diff --git a/venv/lib/python3.7/site-packages/original/pip-19.0.3.dist-info/top_level.txt b/venv.bak/lib/python3.7/site-packages/original/pip-19.0.3.dist-info/top_level.txt similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip-19.0.3.dist-info/top_level.txt rename to venv.bak/lib/python3.7/site-packages/original/pip-19.0.3.dist-info/top_level.txt diff --git a/venv/lib/python3.7/site-packages/original/pip/__init__.py b/venv.bak/lib/python3.7/site-packages/original/pip/__init__.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/__init__.py rename to venv.bak/lib/python3.7/site-packages/original/pip/__init__.py diff --git a/venv/lib/python3.7/site-packages/original/pip/__main__.py b/venv.bak/lib/python3.7/site-packages/original/pip/__main__.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/__main__.py rename to venv.bak/lib/python3.7/site-packages/original/pip/__main__.py diff --git a/venv/lib/python3.7/site-packages/original/pip/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/__pycache__/__init__.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/__pycache__/__init__.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/__pycache__/__init__.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/__pycache__/__main__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/__pycache__/__main__.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/__pycache__/__main__.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/__pycache__/__main__.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_internal/__init__.py b/venv.bak/lib/python3.7/site-packages/original/pip/_internal/__init__.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_internal/__init__.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_internal/__init__.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_internal/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_internal/__pycache__/__init__.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_internal/__pycache__/__init__.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_internal/__pycache__/__init__.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_internal/__pycache__/build_env.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_internal/__pycache__/build_env.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_internal/__pycache__/build_env.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_internal/__pycache__/build_env.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_internal/__pycache__/cache.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_internal/__pycache__/cache.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_internal/__pycache__/cache.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_internal/__pycache__/cache.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_internal/__pycache__/configuration.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_internal/__pycache__/configuration.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_internal/__pycache__/configuration.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_internal/__pycache__/configuration.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_internal/__pycache__/download.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_internal/__pycache__/download.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_internal/__pycache__/download.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_internal/__pycache__/download.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_internal/__pycache__/exceptions.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_internal/__pycache__/exceptions.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_internal/__pycache__/exceptions.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_internal/__pycache__/exceptions.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_internal/__pycache__/index.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_internal/__pycache__/index.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_internal/__pycache__/index.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_internal/__pycache__/index.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_internal/__pycache__/locations.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_internal/__pycache__/locations.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_internal/__pycache__/locations.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_internal/__pycache__/locations.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_internal/__pycache__/pep425tags.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_internal/__pycache__/pep425tags.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_internal/__pycache__/pep425tags.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_internal/__pycache__/pep425tags.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_internal/__pycache__/pyproject.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_internal/__pycache__/pyproject.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_internal/__pycache__/pyproject.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_internal/__pycache__/pyproject.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_internal/__pycache__/resolve.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_internal/__pycache__/resolve.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_internal/__pycache__/resolve.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_internal/__pycache__/resolve.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_internal/__pycache__/wheel.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_internal/__pycache__/wheel.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_internal/__pycache__/wheel.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_internal/__pycache__/wheel.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_internal/build_env.py b/venv.bak/lib/python3.7/site-packages/original/pip/_internal/build_env.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_internal/build_env.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_internal/build_env.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_internal/cache.py b/venv.bak/lib/python3.7/site-packages/original/pip/_internal/cache.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_internal/cache.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_internal/cache.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_internal/cli/__init__.py b/venv.bak/lib/python3.7/site-packages/original/pip/_internal/cli/__init__.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_internal/cli/__init__.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_internal/cli/__init__.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_internal/cli/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_internal/cli/__pycache__/__init__.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_internal/cli/__pycache__/__init__.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_internal/cli/__pycache__/__init__.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_internal/cli/__pycache__/autocompletion.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_internal/cli/__pycache__/autocompletion.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_internal/cli/__pycache__/autocompletion.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_internal/cli/__pycache__/autocompletion.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_internal/cli/__pycache__/base_command.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_internal/cli/__pycache__/base_command.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_internal/cli/__pycache__/base_command.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_internal/cli/__pycache__/base_command.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_internal/cli/__pycache__/cmdoptions.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_internal/cli/__pycache__/cmdoptions.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_internal/cli/__pycache__/cmdoptions.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_internal/cli/__pycache__/cmdoptions.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_internal/cli/__pycache__/main_parser.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_internal/cli/__pycache__/main_parser.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_internal/cli/__pycache__/main_parser.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_internal/cli/__pycache__/main_parser.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_internal/cli/__pycache__/parser.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_internal/cli/__pycache__/parser.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_internal/cli/__pycache__/parser.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_internal/cli/__pycache__/parser.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_internal/cli/__pycache__/status_codes.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_internal/cli/__pycache__/status_codes.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_internal/cli/__pycache__/status_codes.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_internal/cli/__pycache__/status_codes.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_internal/cli/autocompletion.py b/venv.bak/lib/python3.7/site-packages/original/pip/_internal/cli/autocompletion.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_internal/cli/autocompletion.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_internal/cli/autocompletion.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_internal/cli/base_command.py b/venv.bak/lib/python3.7/site-packages/original/pip/_internal/cli/base_command.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_internal/cli/base_command.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_internal/cli/base_command.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_internal/cli/cmdoptions.py b/venv.bak/lib/python3.7/site-packages/original/pip/_internal/cli/cmdoptions.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_internal/cli/cmdoptions.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_internal/cli/cmdoptions.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_internal/cli/main_parser.py b/venv.bak/lib/python3.7/site-packages/original/pip/_internal/cli/main_parser.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_internal/cli/main_parser.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_internal/cli/main_parser.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_internal/cli/parser.py b/venv.bak/lib/python3.7/site-packages/original/pip/_internal/cli/parser.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_internal/cli/parser.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_internal/cli/parser.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_internal/cli/status_codes.py b/venv.bak/lib/python3.7/site-packages/original/pip/_internal/cli/status_codes.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_internal/cli/status_codes.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_internal/cli/status_codes.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_internal/commands/__init__.py b/venv.bak/lib/python3.7/site-packages/original/pip/_internal/commands/__init__.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_internal/commands/__init__.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_internal/commands/__init__.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_internal/commands/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_internal/commands/__pycache__/__init__.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_internal/commands/__pycache__/__init__.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_internal/commands/__pycache__/__init__.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_internal/commands/__pycache__/check.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_internal/commands/__pycache__/check.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_internal/commands/__pycache__/check.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_internal/commands/__pycache__/check.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_internal/commands/__pycache__/completion.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_internal/commands/__pycache__/completion.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_internal/commands/__pycache__/completion.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_internal/commands/__pycache__/completion.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_internal/commands/__pycache__/configuration.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_internal/commands/__pycache__/configuration.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_internal/commands/__pycache__/configuration.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_internal/commands/__pycache__/configuration.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_internal/commands/__pycache__/download.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_internal/commands/__pycache__/download.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_internal/commands/__pycache__/download.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_internal/commands/__pycache__/download.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_internal/commands/__pycache__/freeze.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_internal/commands/__pycache__/freeze.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_internal/commands/__pycache__/freeze.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_internal/commands/__pycache__/freeze.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_internal/commands/__pycache__/hash.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_internal/commands/__pycache__/hash.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_internal/commands/__pycache__/hash.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_internal/commands/__pycache__/hash.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_internal/commands/__pycache__/help.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_internal/commands/__pycache__/help.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_internal/commands/__pycache__/help.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_internal/commands/__pycache__/help.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_internal/commands/__pycache__/install.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_internal/commands/__pycache__/install.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_internal/commands/__pycache__/install.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_internal/commands/__pycache__/install.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_internal/commands/__pycache__/list.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_internal/commands/__pycache__/list.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_internal/commands/__pycache__/list.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_internal/commands/__pycache__/list.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_internal/commands/__pycache__/search.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_internal/commands/__pycache__/search.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_internal/commands/__pycache__/search.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_internal/commands/__pycache__/search.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_internal/commands/__pycache__/show.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_internal/commands/__pycache__/show.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_internal/commands/__pycache__/show.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_internal/commands/__pycache__/show.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_internal/commands/__pycache__/uninstall.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_internal/commands/__pycache__/uninstall.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_internal/commands/__pycache__/uninstall.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_internal/commands/__pycache__/uninstall.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_internal/commands/__pycache__/wheel.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_internal/commands/__pycache__/wheel.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_internal/commands/__pycache__/wheel.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_internal/commands/__pycache__/wheel.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_internal/commands/check.py b/venv.bak/lib/python3.7/site-packages/original/pip/_internal/commands/check.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_internal/commands/check.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_internal/commands/check.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_internal/commands/completion.py b/venv.bak/lib/python3.7/site-packages/original/pip/_internal/commands/completion.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_internal/commands/completion.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_internal/commands/completion.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_internal/commands/configuration.py b/venv.bak/lib/python3.7/site-packages/original/pip/_internal/commands/configuration.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_internal/commands/configuration.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_internal/commands/configuration.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_internal/commands/download.py b/venv.bak/lib/python3.7/site-packages/original/pip/_internal/commands/download.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_internal/commands/download.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_internal/commands/download.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_internal/commands/freeze.py b/venv.bak/lib/python3.7/site-packages/original/pip/_internal/commands/freeze.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_internal/commands/freeze.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_internal/commands/freeze.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_internal/commands/hash.py b/venv.bak/lib/python3.7/site-packages/original/pip/_internal/commands/hash.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_internal/commands/hash.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_internal/commands/hash.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_internal/commands/help.py b/venv.bak/lib/python3.7/site-packages/original/pip/_internal/commands/help.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_internal/commands/help.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_internal/commands/help.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_internal/commands/install.py b/venv.bak/lib/python3.7/site-packages/original/pip/_internal/commands/install.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_internal/commands/install.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_internal/commands/install.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_internal/commands/list.py b/venv.bak/lib/python3.7/site-packages/original/pip/_internal/commands/list.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_internal/commands/list.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_internal/commands/list.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_internal/commands/search.py b/venv.bak/lib/python3.7/site-packages/original/pip/_internal/commands/search.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_internal/commands/search.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_internal/commands/search.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_internal/commands/show.py b/venv.bak/lib/python3.7/site-packages/original/pip/_internal/commands/show.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_internal/commands/show.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_internal/commands/show.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_internal/commands/uninstall.py b/venv.bak/lib/python3.7/site-packages/original/pip/_internal/commands/uninstall.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_internal/commands/uninstall.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_internal/commands/uninstall.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_internal/commands/wheel.py b/venv.bak/lib/python3.7/site-packages/original/pip/_internal/commands/wheel.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_internal/commands/wheel.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_internal/commands/wheel.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_internal/configuration.py b/venv.bak/lib/python3.7/site-packages/original/pip/_internal/configuration.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_internal/configuration.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_internal/configuration.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_internal/download.py b/venv.bak/lib/python3.7/site-packages/original/pip/_internal/download.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_internal/download.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_internal/download.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_internal/exceptions.py b/venv.bak/lib/python3.7/site-packages/original/pip/_internal/exceptions.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_internal/exceptions.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_internal/exceptions.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_internal/index.py b/venv.bak/lib/python3.7/site-packages/original/pip/_internal/index.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_internal/index.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_internal/index.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_internal/locations.py b/venv.bak/lib/python3.7/site-packages/original/pip/_internal/locations.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_internal/locations.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_internal/locations.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_internal/models/__init__.py b/venv.bak/lib/python3.7/site-packages/original/pip/_internal/models/__init__.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_internal/models/__init__.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_internal/models/__init__.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_internal/models/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_internal/models/__pycache__/__init__.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_internal/models/__pycache__/__init__.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_internal/models/__pycache__/__init__.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_internal/models/__pycache__/candidate.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_internal/models/__pycache__/candidate.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_internal/models/__pycache__/candidate.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_internal/models/__pycache__/candidate.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_internal/models/__pycache__/format_control.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_internal/models/__pycache__/format_control.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_internal/models/__pycache__/format_control.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_internal/models/__pycache__/format_control.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_internal/models/__pycache__/index.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_internal/models/__pycache__/index.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_internal/models/__pycache__/index.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_internal/models/__pycache__/index.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_internal/models/__pycache__/link.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_internal/models/__pycache__/link.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_internal/models/__pycache__/link.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_internal/models/__pycache__/link.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_internal/models/candidate.py b/venv.bak/lib/python3.7/site-packages/original/pip/_internal/models/candidate.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_internal/models/candidate.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_internal/models/candidate.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_internal/models/format_control.py b/venv.bak/lib/python3.7/site-packages/original/pip/_internal/models/format_control.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_internal/models/format_control.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_internal/models/format_control.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_internal/models/index.py b/venv.bak/lib/python3.7/site-packages/original/pip/_internal/models/index.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_internal/models/index.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_internal/models/index.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_internal/models/link.py b/venv.bak/lib/python3.7/site-packages/original/pip/_internal/models/link.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_internal/models/link.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_internal/models/link.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/urllib3/contrib/_securetransport/__init__.py b/venv.bak/lib/python3.7/site-packages/original/pip/_internal/operations/__init__.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/urllib3/contrib/_securetransport/__init__.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_internal/operations/__init__.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_internal/operations/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_internal/operations/__pycache__/__init__.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_internal/operations/__pycache__/__init__.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_internal/operations/__pycache__/__init__.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_internal/operations/__pycache__/check.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_internal/operations/__pycache__/check.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_internal/operations/__pycache__/check.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_internal/operations/__pycache__/check.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_internal/operations/__pycache__/freeze.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_internal/operations/__pycache__/freeze.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_internal/operations/__pycache__/freeze.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_internal/operations/__pycache__/freeze.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_internal/operations/__pycache__/prepare.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_internal/operations/__pycache__/prepare.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_internal/operations/__pycache__/prepare.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_internal/operations/__pycache__/prepare.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_internal/operations/check.py b/venv.bak/lib/python3.7/site-packages/original/pip/_internal/operations/check.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_internal/operations/check.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_internal/operations/check.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_internal/operations/freeze.py b/venv.bak/lib/python3.7/site-packages/original/pip/_internal/operations/freeze.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_internal/operations/freeze.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_internal/operations/freeze.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_internal/operations/prepare.py b/venv.bak/lib/python3.7/site-packages/original/pip/_internal/operations/prepare.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_internal/operations/prepare.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_internal/operations/prepare.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_internal/pep425tags.py b/venv.bak/lib/python3.7/site-packages/original/pip/_internal/pep425tags.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_internal/pep425tags.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_internal/pep425tags.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_internal/pyproject.py b/venv.bak/lib/python3.7/site-packages/original/pip/_internal/pyproject.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_internal/pyproject.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_internal/pyproject.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_internal/req/__init__.py b/venv.bak/lib/python3.7/site-packages/original/pip/_internal/req/__init__.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_internal/req/__init__.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_internal/req/__init__.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_internal/req/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_internal/req/__pycache__/__init__.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_internal/req/__pycache__/__init__.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_internal/req/__pycache__/__init__.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_internal/req/__pycache__/constructors.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_internal/req/__pycache__/constructors.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_internal/req/__pycache__/constructors.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_internal/req/__pycache__/constructors.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_internal/req/__pycache__/req_file.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_internal/req/__pycache__/req_file.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_internal/req/__pycache__/req_file.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_internal/req/__pycache__/req_file.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_internal/req/__pycache__/req_install.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_internal/req/__pycache__/req_install.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_internal/req/__pycache__/req_install.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_internal/req/__pycache__/req_install.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_internal/req/__pycache__/req_set.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_internal/req/__pycache__/req_set.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_internal/req/__pycache__/req_set.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_internal/req/__pycache__/req_set.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_internal/req/__pycache__/req_tracker.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_internal/req/__pycache__/req_tracker.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_internal/req/__pycache__/req_tracker.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_internal/req/__pycache__/req_tracker.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_internal/req/__pycache__/req_uninstall.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_internal/req/__pycache__/req_uninstall.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_internal/req/__pycache__/req_uninstall.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_internal/req/__pycache__/req_uninstall.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_internal/req/constructors.py b/venv.bak/lib/python3.7/site-packages/original/pip/_internal/req/constructors.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_internal/req/constructors.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_internal/req/constructors.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_internal/req/req_file.py b/venv.bak/lib/python3.7/site-packages/original/pip/_internal/req/req_file.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_internal/req/req_file.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_internal/req/req_file.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_internal/req/req_install.py b/venv.bak/lib/python3.7/site-packages/original/pip/_internal/req/req_install.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_internal/req/req_install.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_internal/req/req_install.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_internal/req/req_set.py b/venv.bak/lib/python3.7/site-packages/original/pip/_internal/req/req_set.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_internal/req/req_set.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_internal/req/req_set.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_internal/req/req_tracker.py b/venv.bak/lib/python3.7/site-packages/original/pip/_internal/req/req_tracker.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_internal/req/req_tracker.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_internal/req/req_tracker.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_internal/req/req_uninstall.py b/venv.bak/lib/python3.7/site-packages/original/pip/_internal/req/req_uninstall.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_internal/req/req_uninstall.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_internal/req/req_uninstall.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_internal/resolve.py b/venv.bak/lib/python3.7/site-packages/original/pip/_internal/resolve.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_internal/resolve.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_internal/resolve.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/urllib3/packages/backports/__init__.py b/venv.bak/lib/python3.7/site-packages/original/pip/_internal/utils/__init__.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/urllib3/packages/backports/__init__.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_internal/utils/__init__.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_internal/utils/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_internal/utils/__pycache__/__init__.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_internal/utils/__pycache__/__init__.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_internal/utils/__pycache__/__init__.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_internal/utils/__pycache__/appdirs.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_internal/utils/__pycache__/appdirs.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_internal/utils/__pycache__/appdirs.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_internal/utils/__pycache__/appdirs.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_internal/utils/__pycache__/compat.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_internal/utils/__pycache__/compat.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_internal/utils/__pycache__/compat.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_internal/utils/__pycache__/compat.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_internal/utils/__pycache__/deprecation.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_internal/utils/__pycache__/deprecation.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_internal/utils/__pycache__/deprecation.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_internal/utils/__pycache__/deprecation.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_internal/utils/__pycache__/encoding.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_internal/utils/__pycache__/encoding.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_internal/utils/__pycache__/encoding.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_internal/utils/__pycache__/encoding.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_internal/utils/__pycache__/filesystem.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_internal/utils/__pycache__/filesystem.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_internal/utils/__pycache__/filesystem.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_internal/utils/__pycache__/filesystem.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_internal/utils/__pycache__/glibc.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_internal/utils/__pycache__/glibc.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_internal/utils/__pycache__/glibc.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_internal/utils/__pycache__/glibc.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_internal/utils/__pycache__/hashes.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_internal/utils/__pycache__/hashes.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_internal/utils/__pycache__/hashes.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_internal/utils/__pycache__/hashes.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_internal/utils/__pycache__/logging.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_internal/utils/__pycache__/logging.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_internal/utils/__pycache__/logging.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_internal/utils/__pycache__/logging.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_internal/utils/__pycache__/misc.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_internal/utils/__pycache__/misc.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_internal/utils/__pycache__/misc.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_internal/utils/__pycache__/misc.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_internal/utils/__pycache__/models.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_internal/utils/__pycache__/models.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_internal/utils/__pycache__/models.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_internal/utils/__pycache__/models.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_internal/utils/__pycache__/outdated.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_internal/utils/__pycache__/outdated.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_internal/utils/__pycache__/outdated.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_internal/utils/__pycache__/outdated.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_internal/utils/__pycache__/packaging.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_internal/utils/__pycache__/packaging.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_internal/utils/__pycache__/packaging.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_internal/utils/__pycache__/packaging.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_internal/utils/__pycache__/setuptools_build.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_internal/utils/__pycache__/setuptools_build.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_internal/utils/__pycache__/setuptools_build.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_internal/utils/__pycache__/setuptools_build.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_internal/utils/__pycache__/temp_dir.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_internal/utils/__pycache__/temp_dir.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_internal/utils/__pycache__/temp_dir.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_internal/utils/__pycache__/temp_dir.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_internal/utils/__pycache__/typing.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_internal/utils/__pycache__/typing.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_internal/utils/__pycache__/typing.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_internal/utils/__pycache__/typing.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_internal/utils/__pycache__/ui.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_internal/utils/__pycache__/ui.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_internal/utils/__pycache__/ui.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_internal/utils/__pycache__/ui.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_internal/utils/appdirs.py b/venv.bak/lib/python3.7/site-packages/original/pip/_internal/utils/appdirs.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_internal/utils/appdirs.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_internal/utils/appdirs.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_internal/utils/compat.py b/venv.bak/lib/python3.7/site-packages/original/pip/_internal/utils/compat.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_internal/utils/compat.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_internal/utils/compat.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_internal/utils/deprecation.py b/venv.bak/lib/python3.7/site-packages/original/pip/_internal/utils/deprecation.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_internal/utils/deprecation.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_internal/utils/deprecation.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_internal/utils/encoding.py b/venv.bak/lib/python3.7/site-packages/original/pip/_internal/utils/encoding.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_internal/utils/encoding.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_internal/utils/encoding.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_internal/utils/filesystem.py b/venv.bak/lib/python3.7/site-packages/original/pip/_internal/utils/filesystem.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_internal/utils/filesystem.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_internal/utils/filesystem.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_internal/utils/glibc.py b/venv.bak/lib/python3.7/site-packages/original/pip/_internal/utils/glibc.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_internal/utils/glibc.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_internal/utils/glibc.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_internal/utils/hashes.py b/venv.bak/lib/python3.7/site-packages/original/pip/_internal/utils/hashes.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_internal/utils/hashes.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_internal/utils/hashes.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_internal/utils/logging.py b/venv.bak/lib/python3.7/site-packages/original/pip/_internal/utils/logging.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_internal/utils/logging.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_internal/utils/logging.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_internal/utils/misc.py b/venv.bak/lib/python3.7/site-packages/original/pip/_internal/utils/misc.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_internal/utils/misc.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_internal/utils/misc.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_internal/utils/models.py b/venv.bak/lib/python3.7/site-packages/original/pip/_internal/utils/models.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_internal/utils/models.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_internal/utils/models.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_internal/utils/outdated.py b/venv.bak/lib/python3.7/site-packages/original/pip/_internal/utils/outdated.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_internal/utils/outdated.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_internal/utils/outdated.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_internal/utils/packaging.py b/venv.bak/lib/python3.7/site-packages/original/pip/_internal/utils/packaging.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_internal/utils/packaging.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_internal/utils/packaging.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_internal/utils/setuptools_build.py b/venv.bak/lib/python3.7/site-packages/original/pip/_internal/utils/setuptools_build.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_internal/utils/setuptools_build.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_internal/utils/setuptools_build.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_internal/utils/temp_dir.py b/venv.bak/lib/python3.7/site-packages/original/pip/_internal/utils/temp_dir.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_internal/utils/temp_dir.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_internal/utils/temp_dir.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_internal/utils/typing.py b/venv.bak/lib/python3.7/site-packages/original/pip/_internal/utils/typing.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_internal/utils/typing.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_internal/utils/typing.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_internal/utils/ui.py b/venv.bak/lib/python3.7/site-packages/original/pip/_internal/utils/ui.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_internal/utils/ui.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_internal/utils/ui.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_internal/vcs/__init__.py b/venv.bak/lib/python3.7/site-packages/original/pip/_internal/vcs/__init__.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_internal/vcs/__init__.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_internal/vcs/__init__.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_internal/vcs/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_internal/vcs/__pycache__/__init__.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_internal/vcs/__pycache__/__init__.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_internal/vcs/__pycache__/__init__.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_internal/vcs/__pycache__/bazaar.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_internal/vcs/__pycache__/bazaar.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_internal/vcs/__pycache__/bazaar.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_internal/vcs/__pycache__/bazaar.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_internal/vcs/__pycache__/git.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_internal/vcs/__pycache__/git.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_internal/vcs/__pycache__/git.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_internal/vcs/__pycache__/git.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_internal/vcs/__pycache__/mercurial.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_internal/vcs/__pycache__/mercurial.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_internal/vcs/__pycache__/mercurial.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_internal/vcs/__pycache__/mercurial.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_internal/vcs/__pycache__/subversion.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_internal/vcs/__pycache__/subversion.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_internal/vcs/__pycache__/subversion.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_internal/vcs/__pycache__/subversion.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_internal/vcs/bazaar.py b/venv.bak/lib/python3.7/site-packages/original/pip/_internal/vcs/bazaar.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_internal/vcs/bazaar.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_internal/vcs/bazaar.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_internal/vcs/git.py b/venv.bak/lib/python3.7/site-packages/original/pip/_internal/vcs/git.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_internal/vcs/git.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_internal/vcs/git.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_internal/vcs/mercurial.py b/venv.bak/lib/python3.7/site-packages/original/pip/_internal/vcs/mercurial.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_internal/vcs/mercurial.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_internal/vcs/mercurial.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_internal/vcs/subversion.py b/venv.bak/lib/python3.7/site-packages/original/pip/_internal/vcs/subversion.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_internal/vcs/subversion.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_internal/vcs/subversion.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_internal/wheel.py b/venv.bak/lib/python3.7/site-packages/original/pip/_internal/wheel.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_internal/wheel.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_internal/wheel.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/__init__.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/__init__.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/__init__.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/__init__.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/__pycache__/__init__.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/__pycache__/__init__.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/__pycache__/__init__.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/__pycache__/appdirs.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/__pycache__/appdirs.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/__pycache__/appdirs.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/__pycache__/appdirs.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/__pycache__/distro.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/__pycache__/distro.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/__pycache__/distro.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/__pycache__/distro.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/__pycache__/ipaddress.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/__pycache__/ipaddress.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/__pycache__/ipaddress.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/__pycache__/ipaddress.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/__pycache__/pyparsing.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/__pycache__/pyparsing.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/__pycache__/pyparsing.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/__pycache__/pyparsing.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/__pycache__/retrying.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/__pycache__/retrying.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/__pycache__/retrying.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/__pycache__/retrying.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/__pycache__/six.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/__pycache__/six.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/__pycache__/six.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/__pycache__/six.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/appdirs.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/appdirs.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/appdirs.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/appdirs.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/cachecontrol/__init__.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/cachecontrol/__init__.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/cachecontrol/__init__.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/cachecontrol/__init__.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/cachecontrol/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/cachecontrol/__pycache__/__init__.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/cachecontrol/__pycache__/__init__.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/cachecontrol/__pycache__/__init__.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/cachecontrol/__pycache__/_cmd.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/cachecontrol/__pycache__/_cmd.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/cachecontrol/__pycache__/_cmd.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/cachecontrol/__pycache__/_cmd.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/cachecontrol/__pycache__/adapter.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/cachecontrol/__pycache__/adapter.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/cachecontrol/__pycache__/adapter.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/cachecontrol/__pycache__/adapter.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/cachecontrol/__pycache__/cache.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/cachecontrol/__pycache__/cache.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/cachecontrol/__pycache__/cache.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/cachecontrol/__pycache__/cache.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/cachecontrol/__pycache__/compat.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/cachecontrol/__pycache__/compat.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/cachecontrol/__pycache__/compat.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/cachecontrol/__pycache__/compat.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/cachecontrol/__pycache__/controller.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/cachecontrol/__pycache__/controller.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/cachecontrol/__pycache__/controller.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/cachecontrol/__pycache__/controller.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/cachecontrol/__pycache__/filewrapper.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/cachecontrol/__pycache__/filewrapper.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/cachecontrol/__pycache__/filewrapper.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/cachecontrol/__pycache__/filewrapper.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/cachecontrol/__pycache__/heuristics.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/cachecontrol/__pycache__/heuristics.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/cachecontrol/__pycache__/heuristics.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/cachecontrol/__pycache__/heuristics.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/cachecontrol/__pycache__/serialize.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/cachecontrol/__pycache__/serialize.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/cachecontrol/__pycache__/serialize.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/cachecontrol/__pycache__/serialize.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/cachecontrol/__pycache__/wrapper.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/cachecontrol/__pycache__/wrapper.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/cachecontrol/__pycache__/wrapper.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/cachecontrol/__pycache__/wrapper.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/cachecontrol/_cmd.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/cachecontrol/_cmd.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/cachecontrol/_cmd.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/cachecontrol/_cmd.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/cachecontrol/adapter.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/cachecontrol/adapter.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/cachecontrol/adapter.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/cachecontrol/adapter.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/cachecontrol/cache.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/cachecontrol/cache.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/cachecontrol/cache.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/cachecontrol/cache.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/cachecontrol/caches/__init__.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/cachecontrol/caches/__init__.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/cachecontrol/caches/__init__.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/cachecontrol/caches/__init__.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/cachecontrol/caches/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/cachecontrol/caches/__pycache__/__init__.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/cachecontrol/caches/__pycache__/__init__.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/cachecontrol/caches/__pycache__/__init__.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/cachecontrol/caches/__pycache__/file_cache.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/cachecontrol/caches/__pycache__/file_cache.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/cachecontrol/caches/__pycache__/file_cache.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/cachecontrol/caches/__pycache__/file_cache.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/cachecontrol/caches/__pycache__/redis_cache.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/cachecontrol/caches/__pycache__/redis_cache.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/cachecontrol/caches/__pycache__/redis_cache.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/cachecontrol/caches/__pycache__/redis_cache.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/cachecontrol/caches/file_cache.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/cachecontrol/caches/file_cache.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/cachecontrol/caches/file_cache.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/cachecontrol/caches/file_cache.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/cachecontrol/caches/redis_cache.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/cachecontrol/caches/redis_cache.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/cachecontrol/caches/redis_cache.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/cachecontrol/caches/redis_cache.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/cachecontrol/compat.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/cachecontrol/compat.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/cachecontrol/compat.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/cachecontrol/compat.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/cachecontrol/controller.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/cachecontrol/controller.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/cachecontrol/controller.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/cachecontrol/controller.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/cachecontrol/filewrapper.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/cachecontrol/filewrapper.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/cachecontrol/filewrapper.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/cachecontrol/filewrapper.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/cachecontrol/heuristics.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/cachecontrol/heuristics.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/cachecontrol/heuristics.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/cachecontrol/heuristics.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/cachecontrol/serialize.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/cachecontrol/serialize.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/cachecontrol/serialize.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/cachecontrol/serialize.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/cachecontrol/wrapper.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/cachecontrol/wrapper.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/cachecontrol/wrapper.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/cachecontrol/wrapper.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/certifi/__init__.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/certifi/__init__.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/certifi/__init__.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/certifi/__init__.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/certifi/__main__.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/certifi/__main__.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/certifi/__main__.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/certifi/__main__.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/certifi/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/certifi/__pycache__/__init__.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/certifi/__pycache__/__init__.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/certifi/__pycache__/__init__.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/certifi/__pycache__/__main__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/certifi/__pycache__/__main__.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/certifi/__pycache__/__main__.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/certifi/__pycache__/__main__.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/certifi/__pycache__/core.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/certifi/__pycache__/core.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/certifi/__pycache__/core.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/certifi/__pycache__/core.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/certifi/cacert.pem b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/certifi/cacert.pem similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/certifi/cacert.pem rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/certifi/cacert.pem diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/certifi/core.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/certifi/core.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/certifi/core.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/certifi/core.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/__init__.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/__init__.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/__init__.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/__init__.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/__init__.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/__init__.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/__init__.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/big5freq.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/big5freq.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/big5freq.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/big5freq.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/big5prober.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/big5prober.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/big5prober.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/big5prober.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/chardistribution.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/chardistribution.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/chardistribution.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/chardistribution.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/charsetgroupprober.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/charsetgroupprober.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/charsetgroupprober.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/charsetgroupprober.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/charsetprober.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/charsetprober.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/charsetprober.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/charsetprober.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/codingstatemachine.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/codingstatemachine.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/codingstatemachine.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/codingstatemachine.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/compat.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/compat.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/compat.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/compat.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/cp949prober.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/cp949prober.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/cp949prober.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/cp949prober.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/enums.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/enums.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/enums.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/enums.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/escprober.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/escprober.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/escprober.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/escprober.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/escsm.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/escsm.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/escsm.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/escsm.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/eucjpprober.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/eucjpprober.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/eucjpprober.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/eucjpprober.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/euckrfreq.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/euckrfreq.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/euckrfreq.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/euckrfreq.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/euckrprober.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/euckrprober.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/euckrprober.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/euckrprober.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/euctwfreq.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/euctwfreq.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/euctwfreq.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/euctwfreq.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/euctwprober.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/euctwprober.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/euctwprober.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/euctwprober.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/gb2312freq.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/gb2312freq.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/gb2312freq.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/gb2312freq.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/gb2312prober.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/gb2312prober.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/gb2312prober.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/gb2312prober.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/hebrewprober.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/hebrewprober.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/hebrewprober.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/hebrewprober.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/jisfreq.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/jisfreq.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/jisfreq.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/jisfreq.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/jpcntx.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/jpcntx.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/jpcntx.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/jpcntx.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/langbulgarianmodel.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/langbulgarianmodel.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/langbulgarianmodel.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/langbulgarianmodel.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/langcyrillicmodel.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/langcyrillicmodel.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/langcyrillicmodel.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/langcyrillicmodel.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/langgreekmodel.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/langgreekmodel.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/langgreekmodel.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/langgreekmodel.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/langhebrewmodel.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/langhebrewmodel.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/langhebrewmodel.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/langhebrewmodel.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/langhungarianmodel.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/langhungarianmodel.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/langhungarianmodel.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/langhungarianmodel.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/langthaimodel.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/langthaimodel.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/langthaimodel.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/langthaimodel.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/langturkishmodel.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/langturkishmodel.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/langturkishmodel.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/langturkishmodel.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/latin1prober.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/latin1prober.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/latin1prober.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/latin1prober.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/mbcharsetprober.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/mbcharsetprober.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/mbcharsetprober.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/mbcharsetprober.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/mbcsgroupprober.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/mbcsgroupprober.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/mbcsgroupprober.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/mbcsgroupprober.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/mbcssm.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/mbcssm.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/mbcssm.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/mbcssm.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/sbcharsetprober.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/sbcharsetprober.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/sbcharsetprober.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/sbcharsetprober.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/sbcsgroupprober.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/sbcsgroupprober.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/sbcsgroupprober.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/sbcsgroupprober.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/sjisprober.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/sjisprober.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/sjisprober.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/sjisprober.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/universaldetector.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/universaldetector.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/universaldetector.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/universaldetector.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/utf8prober.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/utf8prober.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/utf8prober.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/utf8prober.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/version.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/version.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/version.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/__pycache__/version.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/big5freq.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/big5freq.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/big5freq.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/big5freq.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/big5prober.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/big5prober.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/big5prober.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/big5prober.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/chardistribution.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/chardistribution.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/chardistribution.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/chardistribution.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/charsetgroupprober.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/charsetgroupprober.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/charsetgroupprober.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/charsetgroupprober.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/charsetprober.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/charsetprober.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/charsetprober.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/charsetprober.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/cli/__init__.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/cli/__init__.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/cli/__init__.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/cli/__init__.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/cli/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/cli/__pycache__/__init__.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/cli/__pycache__/__init__.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/cli/__pycache__/__init__.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/cli/__pycache__/chardetect.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/cli/__pycache__/chardetect.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/cli/__pycache__/chardetect.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/cli/__pycache__/chardetect.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/cli/chardetect.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/cli/chardetect.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/cli/chardetect.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/cli/chardetect.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/codingstatemachine.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/codingstatemachine.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/codingstatemachine.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/codingstatemachine.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/compat.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/compat.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/compat.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/compat.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/cp949prober.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/cp949prober.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/cp949prober.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/cp949prober.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/enums.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/enums.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/enums.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/enums.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/escprober.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/escprober.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/escprober.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/escprober.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/escsm.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/escsm.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/escsm.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/escsm.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/eucjpprober.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/eucjpprober.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/eucjpprober.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/eucjpprober.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/euckrfreq.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/euckrfreq.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/euckrfreq.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/euckrfreq.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/euckrprober.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/euckrprober.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/euckrprober.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/euckrprober.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/euctwfreq.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/euctwfreq.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/euctwfreq.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/euctwfreq.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/euctwprober.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/euctwprober.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/euctwprober.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/euctwprober.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/gb2312freq.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/gb2312freq.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/gb2312freq.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/gb2312freq.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/gb2312prober.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/gb2312prober.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/gb2312prober.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/gb2312prober.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/hebrewprober.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/hebrewprober.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/hebrewprober.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/hebrewprober.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/jisfreq.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/jisfreq.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/jisfreq.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/jisfreq.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/jpcntx.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/jpcntx.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/jpcntx.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/jpcntx.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/langbulgarianmodel.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/langbulgarianmodel.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/langbulgarianmodel.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/langbulgarianmodel.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/langcyrillicmodel.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/langcyrillicmodel.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/langcyrillicmodel.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/langcyrillicmodel.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/langgreekmodel.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/langgreekmodel.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/langgreekmodel.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/langgreekmodel.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/langhebrewmodel.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/langhebrewmodel.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/langhebrewmodel.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/langhebrewmodel.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/langhungarianmodel.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/langhungarianmodel.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/langhungarianmodel.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/langhungarianmodel.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/langthaimodel.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/langthaimodel.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/langthaimodel.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/langthaimodel.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/langturkishmodel.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/langturkishmodel.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/langturkishmodel.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/langturkishmodel.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/latin1prober.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/latin1prober.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/latin1prober.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/latin1prober.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/mbcharsetprober.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/mbcharsetprober.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/mbcharsetprober.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/mbcharsetprober.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/mbcsgroupprober.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/mbcsgroupprober.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/mbcsgroupprober.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/mbcsgroupprober.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/mbcssm.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/mbcssm.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/mbcssm.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/mbcssm.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/sbcharsetprober.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/sbcharsetprober.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/sbcharsetprober.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/sbcharsetprober.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/sbcsgroupprober.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/sbcsgroupprober.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/sbcsgroupprober.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/sbcsgroupprober.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/sjisprober.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/sjisprober.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/sjisprober.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/sjisprober.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/universaldetector.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/universaldetector.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/universaldetector.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/universaldetector.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/utf8prober.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/utf8prober.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/utf8prober.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/utf8prober.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/version.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/version.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/chardet/version.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/chardet/version.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/colorama/__init__.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/colorama/__init__.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/colorama/__init__.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/colorama/__init__.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/colorama/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/colorama/__pycache__/__init__.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/colorama/__pycache__/__init__.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/colorama/__pycache__/__init__.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/colorama/__pycache__/ansi.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/colorama/__pycache__/ansi.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/colorama/__pycache__/ansi.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/colorama/__pycache__/ansi.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/colorama/__pycache__/ansitowin32.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/colorama/__pycache__/ansitowin32.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/colorama/__pycache__/ansitowin32.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/colorama/__pycache__/ansitowin32.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/colorama/__pycache__/initialise.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/colorama/__pycache__/initialise.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/colorama/__pycache__/initialise.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/colorama/__pycache__/initialise.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/colorama/__pycache__/win32.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/colorama/__pycache__/win32.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/colorama/__pycache__/win32.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/colorama/__pycache__/win32.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/colorama/__pycache__/winterm.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/colorama/__pycache__/winterm.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/colorama/__pycache__/winterm.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/colorama/__pycache__/winterm.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/colorama/ansi.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/colorama/ansi.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/colorama/ansi.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/colorama/ansi.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/colorama/ansitowin32.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/colorama/ansitowin32.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/colorama/ansitowin32.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/colorama/ansitowin32.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/colorama/initialise.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/colorama/initialise.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/colorama/initialise.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/colorama/initialise.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/colorama/win32.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/colorama/win32.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/colorama/win32.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/colorama/win32.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/colorama/winterm.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/colorama/winterm.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/colorama/winterm.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/colorama/winterm.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/distlib/__init__.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/distlib/__init__.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/distlib/__init__.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/distlib/__init__.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/distlib/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/distlib/__pycache__/__init__.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/distlib/__pycache__/__init__.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/distlib/__pycache__/__init__.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/distlib/__pycache__/compat.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/distlib/__pycache__/compat.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/distlib/__pycache__/compat.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/distlib/__pycache__/compat.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/distlib/__pycache__/database.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/distlib/__pycache__/database.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/distlib/__pycache__/database.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/distlib/__pycache__/database.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/distlib/__pycache__/index.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/distlib/__pycache__/index.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/distlib/__pycache__/index.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/distlib/__pycache__/index.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/distlib/__pycache__/locators.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/distlib/__pycache__/locators.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/distlib/__pycache__/locators.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/distlib/__pycache__/locators.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/distlib/__pycache__/manifest.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/distlib/__pycache__/manifest.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/distlib/__pycache__/manifest.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/distlib/__pycache__/manifest.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/distlib/__pycache__/markers.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/distlib/__pycache__/markers.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/distlib/__pycache__/markers.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/distlib/__pycache__/markers.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/distlib/__pycache__/metadata.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/distlib/__pycache__/metadata.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/distlib/__pycache__/metadata.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/distlib/__pycache__/metadata.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/distlib/__pycache__/resources.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/distlib/__pycache__/resources.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/distlib/__pycache__/resources.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/distlib/__pycache__/resources.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/distlib/__pycache__/scripts.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/distlib/__pycache__/scripts.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/distlib/__pycache__/scripts.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/distlib/__pycache__/scripts.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/distlib/__pycache__/util.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/distlib/__pycache__/util.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/distlib/__pycache__/util.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/distlib/__pycache__/util.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/distlib/__pycache__/version.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/distlib/__pycache__/version.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/distlib/__pycache__/version.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/distlib/__pycache__/version.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/distlib/__pycache__/wheel.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/distlib/__pycache__/wheel.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/distlib/__pycache__/wheel.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/distlib/__pycache__/wheel.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/distlib/_backport/__init__.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/distlib/_backport/__init__.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/distlib/_backport/__init__.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/distlib/_backport/__init__.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/distlib/_backport/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/distlib/_backport/__pycache__/__init__.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/distlib/_backport/__pycache__/__init__.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/distlib/_backport/__pycache__/__init__.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/distlib/_backport/__pycache__/misc.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/distlib/_backport/__pycache__/misc.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/distlib/_backport/__pycache__/misc.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/distlib/_backport/__pycache__/misc.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/distlib/_backport/__pycache__/shutil.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/distlib/_backport/__pycache__/shutil.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/distlib/_backport/__pycache__/shutil.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/distlib/_backport/__pycache__/shutil.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/distlib/_backport/__pycache__/sysconfig.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/distlib/_backport/__pycache__/sysconfig.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/distlib/_backport/__pycache__/sysconfig.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/distlib/_backport/__pycache__/sysconfig.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/distlib/_backport/__pycache__/tarfile.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/distlib/_backport/__pycache__/tarfile.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/distlib/_backport/__pycache__/tarfile.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/distlib/_backport/__pycache__/tarfile.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/distlib/_backport/misc.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/distlib/_backport/misc.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/distlib/_backport/misc.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/distlib/_backport/misc.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/distlib/_backport/shutil.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/distlib/_backport/shutil.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/distlib/_backport/shutil.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/distlib/_backport/shutil.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/distlib/_backport/sysconfig.cfg b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/distlib/_backport/sysconfig.cfg similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/distlib/_backport/sysconfig.cfg rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/distlib/_backport/sysconfig.cfg diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/distlib/_backport/sysconfig.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/distlib/_backport/sysconfig.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/distlib/_backport/sysconfig.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/distlib/_backport/sysconfig.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/distlib/_backport/tarfile.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/distlib/_backport/tarfile.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/distlib/_backport/tarfile.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/distlib/_backport/tarfile.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/distlib/compat.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/distlib/compat.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/distlib/compat.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/distlib/compat.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/distlib/database.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/distlib/database.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/distlib/database.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/distlib/database.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/distlib/index.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/distlib/index.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/distlib/index.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/distlib/index.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/distlib/locators.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/distlib/locators.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/distlib/locators.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/distlib/locators.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/distlib/manifest.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/distlib/manifest.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/distlib/manifest.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/distlib/manifest.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/distlib/markers.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/distlib/markers.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/distlib/markers.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/distlib/markers.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/distlib/metadata.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/distlib/metadata.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/distlib/metadata.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/distlib/metadata.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/distlib/resources.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/distlib/resources.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/distlib/resources.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/distlib/resources.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/distlib/scripts.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/distlib/scripts.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/distlib/scripts.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/distlib/scripts.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/distlib/t32.exe b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/distlib/t32.exe similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/distlib/t32.exe rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/distlib/t32.exe diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/distlib/t64.exe b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/distlib/t64.exe similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/distlib/t64.exe rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/distlib/t64.exe diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/distlib/util.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/distlib/util.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/distlib/util.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/distlib/util.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/distlib/version.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/distlib/version.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/distlib/version.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/distlib/version.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/distlib/w32.exe b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/distlib/w32.exe similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/distlib/w32.exe rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/distlib/w32.exe diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/distlib/w64.exe b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/distlib/w64.exe similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/distlib/w64.exe rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/distlib/w64.exe diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/distlib/wheel.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/distlib/wheel.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/distlib/wheel.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/distlib/wheel.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/distro.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/distro.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/distro.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/distro.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/html5lib/__init__.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/html5lib/__init__.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/html5lib/__init__.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/html5lib/__init__.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/html5lib/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/html5lib/__pycache__/__init__.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/html5lib/__pycache__/__init__.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/html5lib/__pycache__/__init__.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/html5lib/__pycache__/_ihatexml.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/html5lib/__pycache__/_ihatexml.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/html5lib/__pycache__/_ihatexml.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/html5lib/__pycache__/_ihatexml.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/html5lib/__pycache__/_inputstream.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/html5lib/__pycache__/_inputstream.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/html5lib/__pycache__/_inputstream.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/html5lib/__pycache__/_inputstream.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/html5lib/__pycache__/_tokenizer.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/html5lib/__pycache__/_tokenizer.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/html5lib/__pycache__/_tokenizer.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/html5lib/__pycache__/_tokenizer.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/html5lib/__pycache__/_utils.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/html5lib/__pycache__/_utils.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/html5lib/__pycache__/_utils.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/html5lib/__pycache__/_utils.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/html5lib/__pycache__/constants.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/html5lib/__pycache__/constants.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/html5lib/__pycache__/constants.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/html5lib/__pycache__/constants.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/html5lib/__pycache__/html5parser.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/html5lib/__pycache__/html5parser.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/html5lib/__pycache__/html5parser.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/html5lib/__pycache__/html5parser.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/html5lib/__pycache__/serializer.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/html5lib/__pycache__/serializer.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/html5lib/__pycache__/serializer.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/html5lib/__pycache__/serializer.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/html5lib/_ihatexml.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/html5lib/_ihatexml.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/html5lib/_ihatexml.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/html5lib/_ihatexml.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/html5lib/_inputstream.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/html5lib/_inputstream.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/html5lib/_inputstream.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/html5lib/_inputstream.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/html5lib/_tokenizer.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/html5lib/_tokenizer.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/html5lib/_tokenizer.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/html5lib/_tokenizer.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/html5lib/_trie/__init__.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/html5lib/_trie/__init__.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/html5lib/_trie/__init__.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/html5lib/_trie/__init__.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/html5lib/_trie/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/html5lib/_trie/__pycache__/__init__.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/html5lib/_trie/__pycache__/__init__.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/html5lib/_trie/__pycache__/__init__.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/html5lib/_trie/__pycache__/_base.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/html5lib/_trie/__pycache__/_base.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/html5lib/_trie/__pycache__/_base.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/html5lib/_trie/__pycache__/_base.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/html5lib/_trie/__pycache__/datrie.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/html5lib/_trie/__pycache__/datrie.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/html5lib/_trie/__pycache__/datrie.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/html5lib/_trie/__pycache__/datrie.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/html5lib/_trie/__pycache__/py.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/html5lib/_trie/__pycache__/py.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/html5lib/_trie/__pycache__/py.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/html5lib/_trie/__pycache__/py.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/html5lib/_trie/_base.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/html5lib/_trie/_base.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/html5lib/_trie/_base.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/html5lib/_trie/_base.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/html5lib/_trie/datrie.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/html5lib/_trie/datrie.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/html5lib/_trie/datrie.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/html5lib/_trie/datrie.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/html5lib/_trie/py.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/html5lib/_trie/py.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/html5lib/_trie/py.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/html5lib/_trie/py.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/html5lib/_utils.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/html5lib/_utils.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/html5lib/_utils.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/html5lib/_utils.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/html5lib/constants.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/html5lib/constants.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/html5lib/constants.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/html5lib/constants.py diff --git a/venv/lib/python3.7/site-packages/original/pkg_resources/_vendor/__init__.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/html5lib/filters/__init__.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pkg_resources/_vendor/__init__.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/html5lib/filters/__init__.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/html5lib/filters/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/html5lib/filters/__pycache__/__init__.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/html5lib/filters/__pycache__/__init__.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/html5lib/filters/__pycache__/__init__.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/html5lib/filters/__pycache__/alphabeticalattributes.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/html5lib/filters/__pycache__/alphabeticalattributes.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/html5lib/filters/__pycache__/alphabeticalattributes.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/html5lib/filters/__pycache__/alphabeticalattributes.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/html5lib/filters/__pycache__/base.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/html5lib/filters/__pycache__/base.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/html5lib/filters/__pycache__/base.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/html5lib/filters/__pycache__/base.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/html5lib/filters/__pycache__/inject_meta_charset.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/html5lib/filters/__pycache__/inject_meta_charset.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/html5lib/filters/__pycache__/inject_meta_charset.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/html5lib/filters/__pycache__/inject_meta_charset.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/html5lib/filters/__pycache__/lint.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/html5lib/filters/__pycache__/lint.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/html5lib/filters/__pycache__/lint.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/html5lib/filters/__pycache__/lint.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/html5lib/filters/__pycache__/optionaltags.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/html5lib/filters/__pycache__/optionaltags.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/html5lib/filters/__pycache__/optionaltags.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/html5lib/filters/__pycache__/optionaltags.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/html5lib/filters/__pycache__/sanitizer.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/html5lib/filters/__pycache__/sanitizer.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/html5lib/filters/__pycache__/sanitizer.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/html5lib/filters/__pycache__/sanitizer.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/html5lib/filters/__pycache__/whitespace.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/html5lib/filters/__pycache__/whitespace.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/html5lib/filters/__pycache__/whitespace.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/html5lib/filters/__pycache__/whitespace.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/html5lib/filters/alphabeticalattributes.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/html5lib/filters/alphabeticalattributes.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/html5lib/filters/alphabeticalattributes.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/html5lib/filters/alphabeticalattributes.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/html5lib/filters/base.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/html5lib/filters/base.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/html5lib/filters/base.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/html5lib/filters/base.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/html5lib/filters/inject_meta_charset.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/html5lib/filters/inject_meta_charset.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/html5lib/filters/inject_meta_charset.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/html5lib/filters/inject_meta_charset.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/html5lib/filters/lint.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/html5lib/filters/lint.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/html5lib/filters/lint.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/html5lib/filters/lint.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/html5lib/filters/optionaltags.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/html5lib/filters/optionaltags.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/html5lib/filters/optionaltags.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/html5lib/filters/optionaltags.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/html5lib/filters/sanitizer.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/html5lib/filters/sanitizer.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/html5lib/filters/sanitizer.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/html5lib/filters/sanitizer.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/html5lib/filters/whitespace.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/html5lib/filters/whitespace.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/html5lib/filters/whitespace.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/html5lib/filters/whitespace.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/html5lib/html5parser.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/html5lib/html5parser.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/html5lib/html5parser.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/html5lib/html5parser.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/html5lib/serializer.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/html5lib/serializer.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/html5lib/serializer.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/html5lib/serializer.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/html5lib/treeadapters/__init__.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/html5lib/treeadapters/__init__.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/html5lib/treeadapters/__init__.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/html5lib/treeadapters/__init__.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/html5lib/treeadapters/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/html5lib/treeadapters/__pycache__/__init__.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/html5lib/treeadapters/__pycache__/__init__.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/html5lib/treeadapters/__pycache__/__init__.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/html5lib/treeadapters/__pycache__/genshi.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/html5lib/treeadapters/__pycache__/genshi.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/html5lib/treeadapters/__pycache__/genshi.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/html5lib/treeadapters/__pycache__/genshi.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/html5lib/treeadapters/__pycache__/sax.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/html5lib/treeadapters/__pycache__/sax.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/html5lib/treeadapters/__pycache__/sax.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/html5lib/treeadapters/__pycache__/sax.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/html5lib/treeadapters/genshi.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/html5lib/treeadapters/genshi.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/html5lib/treeadapters/genshi.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/html5lib/treeadapters/genshi.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/html5lib/treeadapters/sax.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/html5lib/treeadapters/sax.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/html5lib/treeadapters/sax.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/html5lib/treeadapters/sax.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/html5lib/treebuilders/__init__.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/html5lib/treebuilders/__init__.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/html5lib/treebuilders/__init__.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/html5lib/treebuilders/__init__.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/html5lib/treebuilders/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/html5lib/treebuilders/__pycache__/__init__.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/html5lib/treebuilders/__pycache__/__init__.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/html5lib/treebuilders/__pycache__/__init__.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/html5lib/treebuilders/__pycache__/base.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/html5lib/treebuilders/__pycache__/base.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/html5lib/treebuilders/__pycache__/base.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/html5lib/treebuilders/__pycache__/base.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/html5lib/treebuilders/__pycache__/dom.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/html5lib/treebuilders/__pycache__/dom.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/html5lib/treebuilders/__pycache__/dom.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/html5lib/treebuilders/__pycache__/dom.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/html5lib/treebuilders/__pycache__/etree.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/html5lib/treebuilders/__pycache__/etree.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/html5lib/treebuilders/__pycache__/etree.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/html5lib/treebuilders/__pycache__/etree.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/html5lib/treebuilders/__pycache__/etree_lxml.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/html5lib/treebuilders/__pycache__/etree_lxml.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/html5lib/treebuilders/__pycache__/etree_lxml.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/html5lib/treebuilders/__pycache__/etree_lxml.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/html5lib/treebuilders/base.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/html5lib/treebuilders/base.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/html5lib/treebuilders/base.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/html5lib/treebuilders/base.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/html5lib/treebuilders/dom.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/html5lib/treebuilders/dom.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/html5lib/treebuilders/dom.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/html5lib/treebuilders/dom.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/html5lib/treebuilders/etree.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/html5lib/treebuilders/etree.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/html5lib/treebuilders/etree.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/html5lib/treebuilders/etree.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/html5lib/treebuilders/etree_lxml.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/html5lib/treebuilders/etree_lxml.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/html5lib/treebuilders/etree_lxml.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/html5lib/treebuilders/etree_lxml.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/html5lib/treewalkers/__init__.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/html5lib/treewalkers/__init__.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/html5lib/treewalkers/__init__.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/html5lib/treewalkers/__init__.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/html5lib/treewalkers/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/html5lib/treewalkers/__pycache__/__init__.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/html5lib/treewalkers/__pycache__/__init__.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/html5lib/treewalkers/__pycache__/__init__.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/html5lib/treewalkers/__pycache__/base.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/html5lib/treewalkers/__pycache__/base.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/html5lib/treewalkers/__pycache__/base.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/html5lib/treewalkers/__pycache__/base.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/html5lib/treewalkers/__pycache__/dom.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/html5lib/treewalkers/__pycache__/dom.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/html5lib/treewalkers/__pycache__/dom.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/html5lib/treewalkers/__pycache__/dom.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/html5lib/treewalkers/__pycache__/etree.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/html5lib/treewalkers/__pycache__/etree.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/html5lib/treewalkers/__pycache__/etree.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/html5lib/treewalkers/__pycache__/etree.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/html5lib/treewalkers/__pycache__/etree_lxml.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/html5lib/treewalkers/__pycache__/etree_lxml.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/html5lib/treewalkers/__pycache__/etree_lxml.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/html5lib/treewalkers/__pycache__/etree_lxml.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/html5lib/treewalkers/__pycache__/genshi.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/html5lib/treewalkers/__pycache__/genshi.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/html5lib/treewalkers/__pycache__/genshi.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/html5lib/treewalkers/__pycache__/genshi.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/html5lib/treewalkers/base.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/html5lib/treewalkers/base.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/html5lib/treewalkers/base.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/html5lib/treewalkers/base.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/html5lib/treewalkers/dom.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/html5lib/treewalkers/dom.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/html5lib/treewalkers/dom.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/html5lib/treewalkers/dom.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/html5lib/treewalkers/etree.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/html5lib/treewalkers/etree.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/html5lib/treewalkers/etree.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/html5lib/treewalkers/etree.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/html5lib/treewalkers/etree_lxml.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/html5lib/treewalkers/etree_lxml.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/html5lib/treewalkers/etree_lxml.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/html5lib/treewalkers/etree_lxml.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/html5lib/treewalkers/genshi.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/html5lib/treewalkers/genshi.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/html5lib/treewalkers/genshi.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/html5lib/treewalkers/genshi.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/idna/__init__.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/idna/__init__.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/idna/__init__.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/idna/__init__.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/idna/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/idna/__pycache__/__init__.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/idna/__pycache__/__init__.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/idna/__pycache__/__init__.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/idna/__pycache__/codec.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/idna/__pycache__/codec.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/idna/__pycache__/codec.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/idna/__pycache__/codec.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/idna/__pycache__/compat.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/idna/__pycache__/compat.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/idna/__pycache__/compat.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/idna/__pycache__/compat.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/idna/__pycache__/core.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/idna/__pycache__/core.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/idna/__pycache__/core.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/idna/__pycache__/core.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/idna/__pycache__/idnadata.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/idna/__pycache__/idnadata.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/idna/__pycache__/idnadata.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/idna/__pycache__/idnadata.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/idna/__pycache__/intranges.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/idna/__pycache__/intranges.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/idna/__pycache__/intranges.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/idna/__pycache__/intranges.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/idna/__pycache__/package_data.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/idna/__pycache__/package_data.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/idna/__pycache__/package_data.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/idna/__pycache__/package_data.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/idna/__pycache__/uts46data.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/idna/__pycache__/uts46data.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/idna/__pycache__/uts46data.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/idna/__pycache__/uts46data.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/idna/codec.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/idna/codec.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/idna/codec.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/idna/codec.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/idna/compat.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/idna/compat.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/idna/compat.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/idna/compat.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/idna/core.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/idna/core.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/idna/core.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/idna/core.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/idna/idnadata.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/idna/idnadata.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/idna/idnadata.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/idna/idnadata.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/idna/intranges.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/idna/intranges.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/idna/intranges.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/idna/intranges.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/idna/package_data.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/idna/package_data.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/idna/package_data.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/idna/package_data.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/idna/uts46data.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/idna/uts46data.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/idna/uts46data.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/idna/uts46data.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/ipaddress.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/ipaddress.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/ipaddress.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/ipaddress.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/lockfile/__init__.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/lockfile/__init__.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/lockfile/__init__.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/lockfile/__init__.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/lockfile/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/lockfile/__pycache__/__init__.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/lockfile/__pycache__/__init__.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/lockfile/__pycache__/__init__.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/lockfile/__pycache__/linklockfile.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/lockfile/__pycache__/linklockfile.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/lockfile/__pycache__/linklockfile.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/lockfile/__pycache__/linklockfile.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/lockfile/__pycache__/mkdirlockfile.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/lockfile/__pycache__/mkdirlockfile.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/lockfile/__pycache__/mkdirlockfile.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/lockfile/__pycache__/mkdirlockfile.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/lockfile/__pycache__/pidlockfile.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/lockfile/__pycache__/pidlockfile.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/lockfile/__pycache__/pidlockfile.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/lockfile/__pycache__/pidlockfile.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/lockfile/__pycache__/sqlitelockfile.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/lockfile/__pycache__/sqlitelockfile.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/lockfile/__pycache__/sqlitelockfile.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/lockfile/__pycache__/sqlitelockfile.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/lockfile/__pycache__/symlinklockfile.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/lockfile/__pycache__/symlinklockfile.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/lockfile/__pycache__/symlinklockfile.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/lockfile/__pycache__/symlinklockfile.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/lockfile/linklockfile.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/lockfile/linklockfile.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/lockfile/linklockfile.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/lockfile/linklockfile.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/lockfile/mkdirlockfile.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/lockfile/mkdirlockfile.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/lockfile/mkdirlockfile.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/lockfile/mkdirlockfile.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/lockfile/pidlockfile.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/lockfile/pidlockfile.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/lockfile/pidlockfile.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/lockfile/pidlockfile.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/lockfile/sqlitelockfile.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/lockfile/sqlitelockfile.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/lockfile/sqlitelockfile.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/lockfile/sqlitelockfile.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/lockfile/symlinklockfile.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/lockfile/symlinklockfile.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/lockfile/symlinklockfile.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/lockfile/symlinklockfile.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/msgpack/__init__.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/msgpack/__init__.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/msgpack/__init__.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/msgpack/__init__.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/msgpack/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/msgpack/__pycache__/__init__.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/msgpack/__pycache__/__init__.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/msgpack/__pycache__/__init__.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/msgpack/__pycache__/_version.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/msgpack/__pycache__/_version.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/msgpack/__pycache__/_version.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/msgpack/__pycache__/_version.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/msgpack/__pycache__/exceptions.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/msgpack/__pycache__/exceptions.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/msgpack/__pycache__/exceptions.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/msgpack/__pycache__/exceptions.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/msgpack/__pycache__/fallback.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/msgpack/__pycache__/fallback.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/msgpack/__pycache__/fallback.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/msgpack/__pycache__/fallback.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/msgpack/_version.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/msgpack/_version.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/msgpack/_version.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/msgpack/_version.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/msgpack/exceptions.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/msgpack/exceptions.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/msgpack/exceptions.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/msgpack/exceptions.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/msgpack/fallback.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/msgpack/fallback.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/msgpack/fallback.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/msgpack/fallback.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/packaging/__about__.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/packaging/__about__.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/packaging/__about__.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/packaging/__about__.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/packaging/__init__.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/packaging/__init__.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/packaging/__init__.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/packaging/__init__.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/packaging/__pycache__/__about__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/packaging/__pycache__/__about__.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/packaging/__pycache__/__about__.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/packaging/__pycache__/__about__.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/packaging/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/packaging/__pycache__/__init__.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/packaging/__pycache__/__init__.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/packaging/__pycache__/__init__.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/packaging/__pycache__/_compat.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/packaging/__pycache__/_compat.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/packaging/__pycache__/_compat.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/packaging/__pycache__/_compat.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/packaging/__pycache__/_structures.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/packaging/__pycache__/_structures.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/packaging/__pycache__/_structures.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/packaging/__pycache__/_structures.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/packaging/__pycache__/markers.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/packaging/__pycache__/markers.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/packaging/__pycache__/markers.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/packaging/__pycache__/markers.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/packaging/__pycache__/requirements.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/packaging/__pycache__/requirements.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/packaging/__pycache__/requirements.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/packaging/__pycache__/requirements.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/packaging/__pycache__/specifiers.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/packaging/__pycache__/specifiers.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/packaging/__pycache__/specifiers.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/packaging/__pycache__/specifiers.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/packaging/__pycache__/utils.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/packaging/__pycache__/utils.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/packaging/__pycache__/utils.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/packaging/__pycache__/utils.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/packaging/__pycache__/version.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/packaging/__pycache__/version.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/packaging/__pycache__/version.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/packaging/__pycache__/version.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/packaging/_compat.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/packaging/_compat.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/packaging/_compat.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/packaging/_compat.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/packaging/_structures.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/packaging/_structures.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/packaging/_structures.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/packaging/_structures.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/packaging/markers.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/packaging/markers.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/packaging/markers.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/packaging/markers.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/packaging/requirements.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/packaging/requirements.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/packaging/requirements.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/packaging/requirements.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/packaging/specifiers.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/packaging/specifiers.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/packaging/specifiers.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/packaging/specifiers.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/packaging/utils.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/packaging/utils.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/packaging/utils.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/packaging/utils.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/packaging/version.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/packaging/version.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/packaging/version.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/packaging/version.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/pep517/__init__.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/pep517/__init__.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/pep517/__init__.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/pep517/__init__.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/pep517/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/pep517/__pycache__/__init__.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/pep517/__pycache__/__init__.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/pep517/__pycache__/__init__.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/pep517/__pycache__/_in_process.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/pep517/__pycache__/_in_process.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/pep517/__pycache__/_in_process.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/pep517/__pycache__/_in_process.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/pep517/__pycache__/build.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/pep517/__pycache__/build.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/pep517/__pycache__/build.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/pep517/__pycache__/build.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/pep517/__pycache__/check.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/pep517/__pycache__/check.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/pep517/__pycache__/check.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/pep517/__pycache__/check.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/pep517/__pycache__/colorlog.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/pep517/__pycache__/colorlog.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/pep517/__pycache__/colorlog.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/pep517/__pycache__/colorlog.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/pep517/__pycache__/compat.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/pep517/__pycache__/compat.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/pep517/__pycache__/compat.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/pep517/__pycache__/compat.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/pep517/__pycache__/envbuild.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/pep517/__pycache__/envbuild.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/pep517/__pycache__/envbuild.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/pep517/__pycache__/envbuild.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/pep517/__pycache__/wrappers.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/pep517/__pycache__/wrappers.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/pep517/__pycache__/wrappers.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/pep517/__pycache__/wrappers.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/pep517/_in_process.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/pep517/_in_process.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/pep517/_in_process.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/pep517/_in_process.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/pep517/build.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/pep517/build.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/pep517/build.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/pep517/build.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/pep517/check.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/pep517/check.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/pep517/check.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/pep517/check.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/pep517/colorlog.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/pep517/colorlog.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/pep517/colorlog.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/pep517/colorlog.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/pep517/compat.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/pep517/compat.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/pep517/compat.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/pep517/compat.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/pep517/envbuild.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/pep517/envbuild.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/pep517/envbuild.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/pep517/envbuild.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/pep517/wrappers.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/pep517/wrappers.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/pep517/wrappers.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/pep517/wrappers.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/pkg_resources/__init__.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/pkg_resources/__init__.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/pkg_resources/__init__.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/pkg_resources/__init__.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/pkg_resources/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/pkg_resources/__pycache__/__init__.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/pkg_resources/__pycache__/__init__.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/pkg_resources/__pycache__/__init__.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/pkg_resources/__pycache__/py31compat.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/pkg_resources/__pycache__/py31compat.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/pkg_resources/__pycache__/py31compat.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/pkg_resources/__pycache__/py31compat.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/pkg_resources/py31compat.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/pkg_resources/py31compat.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/pkg_resources/py31compat.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/pkg_resources/py31compat.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/progress/__init__.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/progress/__init__.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/progress/__init__.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/progress/__init__.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/progress/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/progress/__pycache__/__init__.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/progress/__pycache__/__init__.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/progress/__pycache__/__init__.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/progress/__pycache__/bar.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/progress/__pycache__/bar.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/progress/__pycache__/bar.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/progress/__pycache__/bar.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/progress/__pycache__/counter.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/progress/__pycache__/counter.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/progress/__pycache__/counter.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/progress/__pycache__/counter.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/progress/__pycache__/helpers.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/progress/__pycache__/helpers.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/progress/__pycache__/helpers.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/progress/__pycache__/helpers.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/progress/__pycache__/spinner.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/progress/__pycache__/spinner.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/progress/__pycache__/spinner.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/progress/__pycache__/spinner.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/progress/bar.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/progress/bar.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/progress/bar.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/progress/bar.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/progress/counter.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/progress/counter.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/progress/counter.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/progress/counter.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/progress/helpers.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/progress/helpers.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/progress/helpers.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/progress/helpers.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/progress/spinner.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/progress/spinner.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/progress/spinner.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/progress/spinner.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/pyparsing.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/pyparsing.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/pyparsing.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/pyparsing.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/pytoml/__init__.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/pytoml/__init__.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/pytoml/__init__.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/pytoml/__init__.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/pytoml/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/pytoml/__pycache__/__init__.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/pytoml/__pycache__/__init__.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/pytoml/__pycache__/__init__.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/pytoml/__pycache__/core.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/pytoml/__pycache__/core.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/pytoml/__pycache__/core.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/pytoml/__pycache__/core.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/pytoml/__pycache__/parser.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/pytoml/__pycache__/parser.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/pytoml/__pycache__/parser.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/pytoml/__pycache__/parser.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/pytoml/__pycache__/test.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/pytoml/__pycache__/test.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/pytoml/__pycache__/test.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/pytoml/__pycache__/test.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/pytoml/__pycache__/utils.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/pytoml/__pycache__/utils.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/pytoml/__pycache__/utils.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/pytoml/__pycache__/utils.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/pytoml/__pycache__/writer.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/pytoml/__pycache__/writer.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/pytoml/__pycache__/writer.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/pytoml/__pycache__/writer.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/pytoml/core.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/pytoml/core.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/pytoml/core.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/pytoml/core.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/pytoml/parser.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/pytoml/parser.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/pytoml/parser.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/pytoml/parser.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/pytoml/test.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/pytoml/test.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/pytoml/test.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/pytoml/test.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/pytoml/utils.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/pytoml/utils.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/pytoml/utils.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/pytoml/utils.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/pytoml/writer.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/pytoml/writer.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/pytoml/writer.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/pytoml/writer.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/requests/__init__.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/requests/__init__.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/requests/__init__.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/requests/__init__.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/requests/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/requests/__pycache__/__init__.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/requests/__pycache__/__init__.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/requests/__pycache__/__init__.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/requests/__pycache__/__version__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/requests/__pycache__/__version__.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/requests/__pycache__/__version__.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/requests/__pycache__/__version__.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/requests/__pycache__/_internal_utils.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/requests/__pycache__/_internal_utils.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/requests/__pycache__/_internal_utils.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/requests/__pycache__/_internal_utils.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/requests/__pycache__/adapters.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/requests/__pycache__/adapters.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/requests/__pycache__/adapters.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/requests/__pycache__/adapters.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/requests/__pycache__/api.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/requests/__pycache__/api.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/requests/__pycache__/api.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/requests/__pycache__/api.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/requests/__pycache__/auth.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/requests/__pycache__/auth.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/requests/__pycache__/auth.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/requests/__pycache__/auth.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/requests/__pycache__/certs.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/requests/__pycache__/certs.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/requests/__pycache__/certs.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/requests/__pycache__/certs.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/requests/__pycache__/compat.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/requests/__pycache__/compat.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/requests/__pycache__/compat.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/requests/__pycache__/compat.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/requests/__pycache__/cookies.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/requests/__pycache__/cookies.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/requests/__pycache__/cookies.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/requests/__pycache__/cookies.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/requests/__pycache__/exceptions.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/requests/__pycache__/exceptions.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/requests/__pycache__/exceptions.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/requests/__pycache__/exceptions.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/requests/__pycache__/help.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/requests/__pycache__/help.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/requests/__pycache__/help.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/requests/__pycache__/help.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/requests/__pycache__/hooks.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/requests/__pycache__/hooks.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/requests/__pycache__/hooks.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/requests/__pycache__/hooks.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/requests/__pycache__/models.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/requests/__pycache__/models.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/requests/__pycache__/models.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/requests/__pycache__/models.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/requests/__pycache__/packages.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/requests/__pycache__/packages.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/requests/__pycache__/packages.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/requests/__pycache__/packages.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/requests/__pycache__/sessions.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/requests/__pycache__/sessions.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/requests/__pycache__/sessions.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/requests/__pycache__/sessions.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/requests/__pycache__/status_codes.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/requests/__pycache__/status_codes.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/requests/__pycache__/status_codes.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/requests/__pycache__/status_codes.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/requests/__pycache__/structures.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/requests/__pycache__/structures.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/requests/__pycache__/structures.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/requests/__pycache__/structures.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/requests/__pycache__/utils.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/requests/__pycache__/utils.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/requests/__pycache__/utils.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/requests/__pycache__/utils.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/requests/__version__.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/requests/__version__.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/requests/__version__.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/requests/__version__.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/requests/_internal_utils.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/requests/_internal_utils.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/requests/_internal_utils.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/requests/_internal_utils.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/requests/adapters.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/requests/adapters.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/requests/adapters.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/requests/adapters.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/requests/api.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/requests/api.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/requests/api.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/requests/api.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/requests/auth.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/requests/auth.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/requests/auth.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/requests/auth.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/requests/certs.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/requests/certs.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/requests/certs.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/requests/certs.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/requests/compat.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/requests/compat.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/requests/compat.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/requests/compat.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/requests/cookies.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/requests/cookies.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/requests/cookies.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/requests/cookies.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/requests/exceptions.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/requests/exceptions.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/requests/exceptions.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/requests/exceptions.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/requests/help.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/requests/help.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/requests/help.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/requests/help.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/requests/hooks.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/requests/hooks.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/requests/hooks.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/requests/hooks.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/requests/models.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/requests/models.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/requests/models.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/requests/models.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/requests/packages.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/requests/packages.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/requests/packages.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/requests/packages.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/requests/sessions.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/requests/sessions.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/requests/sessions.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/requests/sessions.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/requests/status_codes.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/requests/status_codes.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/requests/status_codes.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/requests/status_codes.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/requests/structures.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/requests/structures.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/requests/structures.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/requests/structures.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/requests/utils.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/requests/utils.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/requests/utils.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/requests/utils.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/retrying.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/retrying.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/retrying.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/retrying.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/six.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/six.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/six.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/six.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/urllib3/__init__.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/urllib3/__init__.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/urllib3/__init__.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/urllib3/__init__.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/urllib3/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/urllib3/__pycache__/__init__.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/urllib3/__pycache__/__init__.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/urllib3/__pycache__/__init__.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/urllib3/__pycache__/_collections.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/urllib3/__pycache__/_collections.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/urllib3/__pycache__/_collections.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/urllib3/__pycache__/_collections.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/urllib3/__pycache__/connection.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/urllib3/__pycache__/connection.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/urllib3/__pycache__/connection.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/urllib3/__pycache__/connection.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/urllib3/__pycache__/connectionpool.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/urllib3/__pycache__/connectionpool.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/urllib3/__pycache__/connectionpool.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/urllib3/__pycache__/connectionpool.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/urllib3/__pycache__/exceptions.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/urllib3/__pycache__/exceptions.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/urllib3/__pycache__/exceptions.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/urllib3/__pycache__/exceptions.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/urllib3/__pycache__/fields.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/urllib3/__pycache__/fields.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/urllib3/__pycache__/fields.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/urllib3/__pycache__/fields.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/urllib3/__pycache__/filepost.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/urllib3/__pycache__/filepost.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/urllib3/__pycache__/filepost.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/urllib3/__pycache__/filepost.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/urllib3/__pycache__/poolmanager.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/urllib3/__pycache__/poolmanager.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/urllib3/__pycache__/poolmanager.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/urllib3/__pycache__/poolmanager.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/urllib3/__pycache__/request.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/urllib3/__pycache__/request.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/urllib3/__pycache__/request.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/urllib3/__pycache__/request.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/urllib3/__pycache__/response.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/urllib3/__pycache__/response.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/urllib3/__pycache__/response.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/urllib3/__pycache__/response.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/urllib3/_collections.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/urllib3/_collections.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/urllib3/_collections.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/urllib3/_collections.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/urllib3/connection.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/urllib3/connection.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/urllib3/connection.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/urllib3/connection.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/urllib3/connectionpool.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/urllib3/connectionpool.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/urllib3/connectionpool.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/urllib3/connectionpool.py diff --git a/venv/lib/python3.7/site-packages/original/setuptools/_vendor/__init__.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/urllib3/contrib/__init__.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/setuptools/_vendor/__init__.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/urllib3/contrib/__init__.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/urllib3/contrib/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/urllib3/contrib/__pycache__/__init__.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/urllib3/contrib/__pycache__/__init__.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/urllib3/contrib/__pycache__/__init__.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/urllib3/contrib/__pycache__/_appengine_environ.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/urllib3/contrib/__pycache__/_appengine_environ.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/urllib3/contrib/__pycache__/_appengine_environ.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/urllib3/contrib/__pycache__/_appengine_environ.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/urllib3/contrib/__pycache__/appengine.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/urllib3/contrib/__pycache__/appengine.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/urllib3/contrib/__pycache__/appengine.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/urllib3/contrib/__pycache__/appengine.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/urllib3/contrib/__pycache__/ntlmpool.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/urllib3/contrib/__pycache__/ntlmpool.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/urllib3/contrib/__pycache__/ntlmpool.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/urllib3/contrib/__pycache__/ntlmpool.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/urllib3/contrib/__pycache__/pyopenssl.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/urllib3/contrib/__pycache__/pyopenssl.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/urllib3/contrib/__pycache__/pyopenssl.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/urllib3/contrib/__pycache__/pyopenssl.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/urllib3/contrib/__pycache__/securetransport.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/urllib3/contrib/__pycache__/securetransport.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/urllib3/contrib/__pycache__/securetransport.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/urllib3/contrib/__pycache__/securetransport.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/urllib3/contrib/__pycache__/socks.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/urllib3/contrib/__pycache__/socks.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/urllib3/contrib/__pycache__/socks.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/urllib3/contrib/__pycache__/socks.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/urllib3/contrib/_appengine_environ.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/urllib3/contrib/_appengine_environ.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/urllib3/contrib/_appengine_environ.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/urllib3/contrib/_appengine_environ.py diff --git a/venv/lib/python3.7/site-packages/pip/_internal/operations/build/__init__.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/urllib3/contrib/_securetransport/__init__.py similarity index 100% rename from venv/lib/python3.7/site-packages/pip/_internal/operations/build/__init__.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/urllib3/contrib/_securetransport/__init__.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/urllib3/contrib/_securetransport/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/urllib3/contrib/_securetransport/__pycache__/__init__.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/urllib3/contrib/_securetransport/__pycache__/__init__.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/urllib3/contrib/_securetransport/__pycache__/__init__.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/urllib3/contrib/_securetransport/__pycache__/bindings.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/urllib3/contrib/_securetransport/__pycache__/bindings.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/urllib3/contrib/_securetransport/__pycache__/bindings.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/urllib3/contrib/_securetransport/__pycache__/bindings.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/urllib3/contrib/_securetransport/__pycache__/low_level.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/urllib3/contrib/_securetransport/__pycache__/low_level.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/urllib3/contrib/_securetransport/__pycache__/low_level.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/urllib3/contrib/_securetransport/__pycache__/low_level.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/urllib3/contrib/_securetransport/bindings.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/urllib3/contrib/_securetransport/bindings.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/urllib3/contrib/_securetransport/bindings.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/urllib3/contrib/_securetransport/bindings.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/urllib3/contrib/_securetransport/low_level.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/urllib3/contrib/_securetransport/low_level.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/urllib3/contrib/_securetransport/low_level.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/urllib3/contrib/_securetransport/low_level.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/urllib3/contrib/appengine.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/urllib3/contrib/appengine.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/urllib3/contrib/appengine.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/urllib3/contrib/appengine.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/urllib3/contrib/ntlmpool.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/urllib3/contrib/ntlmpool.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/urllib3/contrib/ntlmpool.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/urllib3/contrib/ntlmpool.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/urllib3/contrib/pyopenssl.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/urllib3/contrib/pyopenssl.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/urllib3/contrib/pyopenssl.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/urllib3/contrib/pyopenssl.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/urllib3/contrib/securetransport.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/urllib3/contrib/securetransport.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/urllib3/contrib/securetransport.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/urllib3/contrib/securetransport.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/urllib3/contrib/socks.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/urllib3/contrib/socks.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/urllib3/contrib/socks.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/urllib3/contrib/socks.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/urllib3/exceptions.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/urllib3/exceptions.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/urllib3/exceptions.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/urllib3/exceptions.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/urllib3/fields.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/urllib3/fields.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/urllib3/fields.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/urllib3/fields.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/urllib3/filepost.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/urllib3/filepost.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/urllib3/filepost.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/urllib3/filepost.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/urllib3/packages/__init__.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/urllib3/packages/__init__.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/urllib3/packages/__init__.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/urllib3/packages/__init__.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/urllib3/packages/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/urllib3/packages/__pycache__/__init__.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/urllib3/packages/__pycache__/__init__.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/urllib3/packages/__pycache__/__init__.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/urllib3/packages/__pycache__/six.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/urllib3/packages/__pycache__/six.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/urllib3/packages/__pycache__/six.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/urllib3/packages/__pycache__/six.cpython-37.pyc diff --git a/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/urllib3/packages/backports/__init__.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/urllib3/packages/backports/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/urllib3/packages/backports/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/urllib3/packages/backports/__pycache__/__init__.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/urllib3/packages/backports/__pycache__/__init__.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/urllib3/packages/backports/__pycache__/__init__.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/urllib3/packages/backports/__pycache__/makefile.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/urllib3/packages/backports/__pycache__/makefile.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/urllib3/packages/backports/__pycache__/makefile.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/urllib3/packages/backports/__pycache__/makefile.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/urllib3/packages/backports/makefile.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/urllib3/packages/backports/makefile.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/urllib3/packages/backports/makefile.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/urllib3/packages/backports/makefile.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/urllib3/packages/six.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/urllib3/packages/six.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/urllib3/packages/six.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/urllib3/packages/six.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/urllib3/packages/ssl_match_hostname/__init__.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/urllib3/packages/ssl_match_hostname/__init__.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/urllib3/packages/ssl_match_hostname/__init__.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/urllib3/packages/ssl_match_hostname/__init__.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/urllib3/packages/ssl_match_hostname/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/urllib3/packages/ssl_match_hostname/__pycache__/__init__.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/urllib3/packages/ssl_match_hostname/__pycache__/__init__.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/urllib3/packages/ssl_match_hostname/__pycache__/__init__.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/urllib3/packages/ssl_match_hostname/__pycache__/_implementation.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/urllib3/packages/ssl_match_hostname/__pycache__/_implementation.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/urllib3/packages/ssl_match_hostname/__pycache__/_implementation.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/urllib3/packages/ssl_match_hostname/__pycache__/_implementation.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/urllib3/packages/ssl_match_hostname/_implementation.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/urllib3/packages/ssl_match_hostname/_implementation.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/urllib3/packages/ssl_match_hostname/_implementation.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/urllib3/packages/ssl_match_hostname/_implementation.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/urllib3/poolmanager.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/urllib3/poolmanager.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/urllib3/poolmanager.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/urllib3/poolmanager.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/urllib3/request.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/urllib3/request.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/urllib3/request.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/urllib3/request.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/urllib3/response.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/urllib3/response.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/urllib3/response.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/urllib3/response.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/urllib3/util/__init__.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/urllib3/util/__init__.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/urllib3/util/__init__.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/urllib3/util/__init__.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/urllib3/util/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/urllib3/util/__pycache__/__init__.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/urllib3/util/__pycache__/__init__.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/urllib3/util/__pycache__/__init__.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/urllib3/util/__pycache__/connection.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/urllib3/util/__pycache__/connection.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/urllib3/util/__pycache__/connection.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/urllib3/util/__pycache__/connection.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/urllib3/util/__pycache__/queue.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/urllib3/util/__pycache__/queue.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/urllib3/util/__pycache__/queue.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/urllib3/util/__pycache__/queue.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/urllib3/util/__pycache__/request.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/urllib3/util/__pycache__/request.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/urllib3/util/__pycache__/request.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/urllib3/util/__pycache__/request.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/urllib3/util/__pycache__/response.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/urllib3/util/__pycache__/response.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/urllib3/util/__pycache__/response.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/urllib3/util/__pycache__/response.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/urllib3/util/__pycache__/retry.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/urllib3/util/__pycache__/retry.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/urllib3/util/__pycache__/retry.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/urllib3/util/__pycache__/retry.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/urllib3/util/__pycache__/ssl_.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/urllib3/util/__pycache__/ssl_.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/urllib3/util/__pycache__/ssl_.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/urllib3/util/__pycache__/ssl_.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/urllib3/util/__pycache__/timeout.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/urllib3/util/__pycache__/timeout.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/urllib3/util/__pycache__/timeout.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/urllib3/util/__pycache__/timeout.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/urllib3/util/__pycache__/url.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/urllib3/util/__pycache__/url.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/urllib3/util/__pycache__/url.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/urllib3/util/__pycache__/url.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/urllib3/util/__pycache__/wait.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/urllib3/util/__pycache__/wait.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/urllib3/util/__pycache__/wait.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/urllib3/util/__pycache__/wait.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/urllib3/util/connection.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/urllib3/util/connection.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/urllib3/util/connection.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/urllib3/util/connection.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/urllib3/util/queue.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/urllib3/util/queue.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/urllib3/util/queue.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/urllib3/util/queue.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/urllib3/util/request.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/urllib3/util/request.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/urllib3/util/request.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/urllib3/util/request.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/urllib3/util/response.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/urllib3/util/response.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/urllib3/util/response.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/urllib3/util/response.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/urllib3/util/retry.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/urllib3/util/retry.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/urllib3/util/retry.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/urllib3/util/retry.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/urllib3/util/ssl_.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/urllib3/util/ssl_.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/urllib3/util/ssl_.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/urllib3/util/ssl_.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/urllib3/util/timeout.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/urllib3/util/timeout.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/urllib3/util/timeout.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/urllib3/util/timeout.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/urllib3/util/url.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/urllib3/util/url.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/urllib3/util/url.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/urllib3/util/url.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/urllib3/util/wait.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/urllib3/util/wait.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/urllib3/util/wait.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/urllib3/util/wait.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/webencodings/__init__.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/webencodings/__init__.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/webencodings/__init__.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/webencodings/__init__.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/webencodings/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/webencodings/__pycache__/__init__.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/webencodings/__pycache__/__init__.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/webencodings/__pycache__/__init__.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/webencodings/__pycache__/labels.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/webencodings/__pycache__/labels.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/webencodings/__pycache__/labels.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/webencodings/__pycache__/labels.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/webencodings/__pycache__/mklabels.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/webencodings/__pycache__/mklabels.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/webencodings/__pycache__/mklabels.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/webencodings/__pycache__/mklabels.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/webencodings/__pycache__/tests.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/webencodings/__pycache__/tests.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/webencodings/__pycache__/tests.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/webencodings/__pycache__/tests.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/webencodings/__pycache__/x_user_defined.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/webencodings/__pycache__/x_user_defined.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/webencodings/__pycache__/x_user_defined.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/webencodings/__pycache__/x_user_defined.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/webencodings/labels.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/webencodings/labels.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/webencodings/labels.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/webencodings/labels.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/webencodings/mklabels.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/webencodings/mklabels.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/webencodings/mklabels.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/webencodings/mklabels.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/webencodings/tests.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/webencodings/tests.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/webencodings/tests.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/webencodings/tests.py diff --git a/venv/lib/python3.7/site-packages/original/pip/_vendor/webencodings/x_user_defined.py b/venv.bak/lib/python3.7/site-packages/original/pip/_vendor/webencodings/x_user_defined.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pip/_vendor/webencodings/x_user_defined.py rename to venv.bak/lib/python3.7/site-packages/original/pip/_vendor/webencodings/x_user_defined.py diff --git a/venv/lib/python3.7/site-packages/original/pkg_resources/__init__.py b/venv.bak/lib/python3.7/site-packages/original/pkg_resources/__init__.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pkg_resources/__init__.py rename to venv.bak/lib/python3.7/site-packages/original/pkg_resources/__init__.py diff --git a/venv/lib/python3.7/site-packages/original/pkg_resources/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pkg_resources/__pycache__/__init__.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pkg_resources/__pycache__/__init__.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pkg_resources/__pycache__/__init__.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pkg_resources/__pycache__/py31compat.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pkg_resources/__pycache__/py31compat.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pkg_resources/__pycache__/py31compat.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pkg_resources/__pycache__/py31compat.cpython-37.pyc diff --git a/venv.bak/lib/python3.7/site-packages/original/pkg_resources/_vendor/__init__.py b/venv.bak/lib/python3.7/site-packages/original/pkg_resources/_vendor/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/venv/lib/python3.7/site-packages/original/pkg_resources/_vendor/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pkg_resources/_vendor/__pycache__/__init__.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pkg_resources/_vendor/__pycache__/__init__.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pkg_resources/_vendor/__pycache__/__init__.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pkg_resources/_vendor/__pycache__/appdirs.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pkg_resources/_vendor/__pycache__/appdirs.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pkg_resources/_vendor/__pycache__/appdirs.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pkg_resources/_vendor/__pycache__/appdirs.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pkg_resources/_vendor/__pycache__/pyparsing.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pkg_resources/_vendor/__pycache__/pyparsing.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pkg_resources/_vendor/__pycache__/pyparsing.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pkg_resources/_vendor/__pycache__/pyparsing.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pkg_resources/_vendor/__pycache__/six.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pkg_resources/_vendor/__pycache__/six.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pkg_resources/_vendor/__pycache__/six.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pkg_resources/_vendor/__pycache__/six.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pkg_resources/_vendor/appdirs.py b/venv.bak/lib/python3.7/site-packages/original/pkg_resources/_vendor/appdirs.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pkg_resources/_vendor/appdirs.py rename to venv.bak/lib/python3.7/site-packages/original/pkg_resources/_vendor/appdirs.py diff --git a/venv/lib/python3.7/site-packages/original/pkg_resources/_vendor/packaging/__about__.py b/venv.bak/lib/python3.7/site-packages/original/pkg_resources/_vendor/packaging/__about__.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pkg_resources/_vendor/packaging/__about__.py rename to venv.bak/lib/python3.7/site-packages/original/pkg_resources/_vendor/packaging/__about__.py diff --git a/venv/lib/python3.7/site-packages/original/pkg_resources/_vendor/packaging/__init__.py b/venv.bak/lib/python3.7/site-packages/original/pkg_resources/_vendor/packaging/__init__.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pkg_resources/_vendor/packaging/__init__.py rename to venv.bak/lib/python3.7/site-packages/original/pkg_resources/_vendor/packaging/__init__.py diff --git a/venv/lib/python3.7/site-packages/original/pkg_resources/_vendor/packaging/__pycache__/__about__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pkg_resources/_vendor/packaging/__pycache__/__about__.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pkg_resources/_vendor/packaging/__pycache__/__about__.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pkg_resources/_vendor/packaging/__pycache__/__about__.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pkg_resources/_vendor/packaging/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pkg_resources/_vendor/packaging/__pycache__/__init__.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pkg_resources/_vendor/packaging/__pycache__/__init__.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pkg_resources/_vendor/packaging/__pycache__/__init__.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pkg_resources/_vendor/packaging/__pycache__/_compat.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pkg_resources/_vendor/packaging/__pycache__/_compat.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pkg_resources/_vendor/packaging/__pycache__/_compat.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pkg_resources/_vendor/packaging/__pycache__/_compat.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pkg_resources/_vendor/packaging/__pycache__/_structures.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pkg_resources/_vendor/packaging/__pycache__/_structures.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pkg_resources/_vendor/packaging/__pycache__/_structures.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pkg_resources/_vendor/packaging/__pycache__/_structures.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pkg_resources/_vendor/packaging/__pycache__/markers.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pkg_resources/_vendor/packaging/__pycache__/markers.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pkg_resources/_vendor/packaging/__pycache__/markers.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pkg_resources/_vendor/packaging/__pycache__/markers.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pkg_resources/_vendor/packaging/__pycache__/requirements.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pkg_resources/_vendor/packaging/__pycache__/requirements.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pkg_resources/_vendor/packaging/__pycache__/requirements.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pkg_resources/_vendor/packaging/__pycache__/requirements.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pkg_resources/_vendor/packaging/__pycache__/specifiers.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pkg_resources/_vendor/packaging/__pycache__/specifiers.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pkg_resources/_vendor/packaging/__pycache__/specifiers.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pkg_resources/_vendor/packaging/__pycache__/specifiers.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pkg_resources/_vendor/packaging/__pycache__/utils.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pkg_resources/_vendor/packaging/__pycache__/utils.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pkg_resources/_vendor/packaging/__pycache__/utils.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pkg_resources/_vendor/packaging/__pycache__/utils.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pkg_resources/_vendor/packaging/__pycache__/version.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pkg_resources/_vendor/packaging/__pycache__/version.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pkg_resources/_vendor/packaging/__pycache__/version.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pkg_resources/_vendor/packaging/__pycache__/version.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pkg_resources/_vendor/packaging/_compat.py b/venv.bak/lib/python3.7/site-packages/original/pkg_resources/_vendor/packaging/_compat.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pkg_resources/_vendor/packaging/_compat.py rename to venv.bak/lib/python3.7/site-packages/original/pkg_resources/_vendor/packaging/_compat.py diff --git a/venv/lib/python3.7/site-packages/original/pkg_resources/_vendor/packaging/_structures.py b/venv.bak/lib/python3.7/site-packages/original/pkg_resources/_vendor/packaging/_structures.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pkg_resources/_vendor/packaging/_structures.py rename to venv.bak/lib/python3.7/site-packages/original/pkg_resources/_vendor/packaging/_structures.py diff --git a/venv/lib/python3.7/site-packages/original/pkg_resources/_vendor/packaging/markers.py b/venv.bak/lib/python3.7/site-packages/original/pkg_resources/_vendor/packaging/markers.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pkg_resources/_vendor/packaging/markers.py rename to venv.bak/lib/python3.7/site-packages/original/pkg_resources/_vendor/packaging/markers.py diff --git a/venv/lib/python3.7/site-packages/original/pkg_resources/_vendor/packaging/requirements.py b/venv.bak/lib/python3.7/site-packages/original/pkg_resources/_vendor/packaging/requirements.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pkg_resources/_vendor/packaging/requirements.py rename to venv.bak/lib/python3.7/site-packages/original/pkg_resources/_vendor/packaging/requirements.py diff --git a/venv/lib/python3.7/site-packages/original/pkg_resources/_vendor/packaging/specifiers.py b/venv.bak/lib/python3.7/site-packages/original/pkg_resources/_vendor/packaging/specifiers.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pkg_resources/_vendor/packaging/specifiers.py rename to venv.bak/lib/python3.7/site-packages/original/pkg_resources/_vendor/packaging/specifiers.py diff --git a/venv/lib/python3.7/site-packages/original/pkg_resources/_vendor/packaging/utils.py b/venv.bak/lib/python3.7/site-packages/original/pkg_resources/_vendor/packaging/utils.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pkg_resources/_vendor/packaging/utils.py rename to venv.bak/lib/python3.7/site-packages/original/pkg_resources/_vendor/packaging/utils.py diff --git a/venv/lib/python3.7/site-packages/original/pkg_resources/_vendor/packaging/version.py b/venv.bak/lib/python3.7/site-packages/original/pkg_resources/_vendor/packaging/version.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pkg_resources/_vendor/packaging/version.py rename to venv.bak/lib/python3.7/site-packages/original/pkg_resources/_vendor/packaging/version.py diff --git a/venv/lib/python3.7/site-packages/original/pkg_resources/_vendor/pyparsing.py b/venv.bak/lib/python3.7/site-packages/original/pkg_resources/_vendor/pyparsing.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pkg_resources/_vendor/pyparsing.py rename to venv.bak/lib/python3.7/site-packages/original/pkg_resources/_vendor/pyparsing.py diff --git a/venv/lib/python3.7/site-packages/original/pkg_resources/_vendor/six.py b/venv.bak/lib/python3.7/site-packages/original/pkg_resources/_vendor/six.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pkg_resources/_vendor/six.py rename to venv.bak/lib/python3.7/site-packages/original/pkg_resources/_vendor/six.py diff --git a/venv/lib/python3.7/site-packages/original/pkg_resources/extern/__init__.py b/venv.bak/lib/python3.7/site-packages/original/pkg_resources/extern/__init__.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pkg_resources/extern/__init__.py rename to venv.bak/lib/python3.7/site-packages/original/pkg_resources/extern/__init__.py diff --git a/venv/lib/python3.7/site-packages/original/pkg_resources/extern/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/pkg_resources/extern/__pycache__/__init__.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/pkg_resources/extern/__pycache__/__init__.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/pkg_resources/extern/__pycache__/__init__.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/pkg_resources/py31compat.py b/venv.bak/lib/python3.7/site-packages/original/pkg_resources/py31compat.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/pkg_resources/py31compat.py rename to venv.bak/lib/python3.7/site-packages/original/pkg_resources/py31compat.py diff --git a/venv/lib/python3.7/site-packages/original/setuptools-40.8.0.dist-info/INSTALLER b/venv.bak/lib/python3.7/site-packages/original/setuptools-40.8.0.dist-info/INSTALLER similarity index 100% rename from venv/lib/python3.7/site-packages/original/setuptools-40.8.0.dist-info/INSTALLER rename to venv.bak/lib/python3.7/site-packages/original/setuptools-40.8.0.dist-info/INSTALLER diff --git a/venv/lib/python3.7/site-packages/original/setuptools-40.8.0.dist-info/LICENSE b/venv.bak/lib/python3.7/site-packages/original/setuptools-40.8.0.dist-info/LICENSE similarity index 100% rename from venv/lib/python3.7/site-packages/original/setuptools-40.8.0.dist-info/LICENSE rename to venv.bak/lib/python3.7/site-packages/original/setuptools-40.8.0.dist-info/LICENSE diff --git a/venv/lib/python3.7/site-packages/original/setuptools-40.8.0.dist-info/METADATA b/venv.bak/lib/python3.7/site-packages/original/setuptools-40.8.0.dist-info/METADATA similarity index 100% rename from venv/lib/python3.7/site-packages/original/setuptools-40.8.0.dist-info/METADATA rename to venv.bak/lib/python3.7/site-packages/original/setuptools-40.8.0.dist-info/METADATA diff --git a/venv/lib/python3.7/site-packages/original/setuptools-40.8.0.dist-info/RECORD b/venv.bak/lib/python3.7/site-packages/original/setuptools-40.8.0.dist-info/RECORD similarity index 100% rename from venv/lib/python3.7/site-packages/original/setuptools-40.8.0.dist-info/RECORD rename to venv.bak/lib/python3.7/site-packages/original/setuptools-40.8.0.dist-info/RECORD diff --git a/venv/lib/python3.7/site-packages/original/setuptools-40.8.0.dist-info/WHEEL b/venv.bak/lib/python3.7/site-packages/original/setuptools-40.8.0.dist-info/WHEEL similarity index 100% rename from venv/lib/python3.7/site-packages/original/setuptools-40.8.0.dist-info/WHEEL rename to venv.bak/lib/python3.7/site-packages/original/setuptools-40.8.0.dist-info/WHEEL diff --git a/venv/lib/python3.7/site-packages/original/setuptools-40.8.0.dist-info/dependency_links.txt b/venv.bak/lib/python3.7/site-packages/original/setuptools-40.8.0.dist-info/dependency_links.txt similarity index 100% rename from venv/lib/python3.7/site-packages/original/setuptools-40.8.0.dist-info/dependency_links.txt rename to venv.bak/lib/python3.7/site-packages/original/setuptools-40.8.0.dist-info/dependency_links.txt diff --git a/venv/lib/python3.7/site-packages/original/setuptools-40.8.0.dist-info/entry_points.txt b/venv.bak/lib/python3.7/site-packages/original/setuptools-40.8.0.dist-info/entry_points.txt similarity index 100% rename from venv/lib/python3.7/site-packages/original/setuptools-40.8.0.dist-info/entry_points.txt rename to venv.bak/lib/python3.7/site-packages/original/setuptools-40.8.0.dist-info/entry_points.txt diff --git a/venv/lib/python3.7/site-packages/original/setuptools-40.8.0.dist-info/top_level.txt b/venv.bak/lib/python3.7/site-packages/original/setuptools-40.8.0.dist-info/top_level.txt similarity index 100% rename from venv/lib/python3.7/site-packages/original/setuptools-40.8.0.dist-info/top_level.txt rename to venv.bak/lib/python3.7/site-packages/original/setuptools-40.8.0.dist-info/top_level.txt diff --git a/venv/lib/python3.7/site-packages/original/setuptools-40.8.0.dist-info/zip-safe b/venv.bak/lib/python3.7/site-packages/original/setuptools-40.8.0.dist-info/zip-safe similarity index 100% rename from venv/lib/python3.7/site-packages/original/setuptools-40.8.0.dist-info/zip-safe rename to venv.bak/lib/python3.7/site-packages/original/setuptools-40.8.0.dist-info/zip-safe diff --git a/venv/lib/python3.7/site-packages/original/setuptools/__init__.py b/venv.bak/lib/python3.7/site-packages/original/setuptools/__init__.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/setuptools/__init__.py rename to venv.bak/lib/python3.7/site-packages/original/setuptools/__init__.py diff --git a/venv/lib/python3.7/site-packages/original/setuptools/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/setuptools/__pycache__/__init__.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/setuptools/__pycache__/__init__.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/setuptools/__pycache__/__init__.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/setuptools/__pycache__/_deprecation_warning.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/setuptools/__pycache__/_deprecation_warning.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/setuptools/__pycache__/_deprecation_warning.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/setuptools/__pycache__/_deprecation_warning.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/setuptools/__pycache__/archive_util.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/setuptools/__pycache__/archive_util.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/setuptools/__pycache__/archive_util.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/setuptools/__pycache__/archive_util.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/setuptools/__pycache__/build_meta.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/setuptools/__pycache__/build_meta.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/setuptools/__pycache__/build_meta.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/setuptools/__pycache__/build_meta.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/setuptools/__pycache__/config.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/setuptools/__pycache__/config.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/setuptools/__pycache__/config.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/setuptools/__pycache__/config.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/setuptools/__pycache__/dep_util.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/setuptools/__pycache__/dep_util.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/setuptools/__pycache__/dep_util.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/setuptools/__pycache__/dep_util.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/setuptools/__pycache__/depends.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/setuptools/__pycache__/depends.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/setuptools/__pycache__/depends.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/setuptools/__pycache__/depends.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/setuptools/__pycache__/dist.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/setuptools/__pycache__/dist.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/setuptools/__pycache__/dist.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/setuptools/__pycache__/dist.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/setuptools/__pycache__/extension.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/setuptools/__pycache__/extension.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/setuptools/__pycache__/extension.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/setuptools/__pycache__/extension.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/setuptools/__pycache__/glibc.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/setuptools/__pycache__/glibc.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/setuptools/__pycache__/glibc.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/setuptools/__pycache__/glibc.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/setuptools/__pycache__/glob.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/setuptools/__pycache__/glob.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/setuptools/__pycache__/glob.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/setuptools/__pycache__/glob.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/setuptools/__pycache__/launch.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/setuptools/__pycache__/launch.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/setuptools/__pycache__/launch.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/setuptools/__pycache__/launch.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/setuptools/__pycache__/lib2to3_ex.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/setuptools/__pycache__/lib2to3_ex.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/setuptools/__pycache__/lib2to3_ex.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/setuptools/__pycache__/lib2to3_ex.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/setuptools/__pycache__/monkey.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/setuptools/__pycache__/monkey.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/setuptools/__pycache__/monkey.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/setuptools/__pycache__/monkey.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/setuptools/__pycache__/msvc.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/setuptools/__pycache__/msvc.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/setuptools/__pycache__/msvc.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/setuptools/__pycache__/msvc.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/setuptools/__pycache__/namespaces.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/setuptools/__pycache__/namespaces.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/setuptools/__pycache__/namespaces.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/setuptools/__pycache__/namespaces.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/setuptools/__pycache__/package_index.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/setuptools/__pycache__/package_index.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/setuptools/__pycache__/package_index.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/setuptools/__pycache__/package_index.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/setuptools/__pycache__/pep425tags.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/setuptools/__pycache__/pep425tags.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/setuptools/__pycache__/pep425tags.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/setuptools/__pycache__/pep425tags.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/setuptools/__pycache__/py27compat.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/setuptools/__pycache__/py27compat.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/setuptools/__pycache__/py27compat.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/setuptools/__pycache__/py27compat.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/setuptools/__pycache__/py31compat.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/setuptools/__pycache__/py31compat.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/setuptools/__pycache__/py31compat.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/setuptools/__pycache__/py31compat.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/setuptools/__pycache__/py33compat.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/setuptools/__pycache__/py33compat.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/setuptools/__pycache__/py33compat.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/setuptools/__pycache__/py33compat.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/setuptools/__pycache__/sandbox.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/setuptools/__pycache__/sandbox.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/setuptools/__pycache__/sandbox.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/setuptools/__pycache__/sandbox.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/setuptools/__pycache__/site-patch.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/setuptools/__pycache__/site-patch.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/setuptools/__pycache__/site-patch.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/setuptools/__pycache__/site-patch.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/setuptools/__pycache__/ssl_support.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/setuptools/__pycache__/ssl_support.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/setuptools/__pycache__/ssl_support.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/setuptools/__pycache__/ssl_support.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/setuptools/__pycache__/unicode_utils.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/setuptools/__pycache__/unicode_utils.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/setuptools/__pycache__/unicode_utils.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/setuptools/__pycache__/unicode_utils.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/setuptools/__pycache__/version.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/setuptools/__pycache__/version.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/setuptools/__pycache__/version.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/setuptools/__pycache__/version.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/setuptools/__pycache__/wheel.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/setuptools/__pycache__/wheel.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/setuptools/__pycache__/wheel.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/setuptools/__pycache__/wheel.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/setuptools/__pycache__/windows_support.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/setuptools/__pycache__/windows_support.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/setuptools/__pycache__/windows_support.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/setuptools/__pycache__/windows_support.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/setuptools/_deprecation_warning.py b/venv.bak/lib/python3.7/site-packages/original/setuptools/_deprecation_warning.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/setuptools/_deprecation_warning.py rename to venv.bak/lib/python3.7/site-packages/original/setuptools/_deprecation_warning.py diff --git a/venv.bak/lib/python3.7/site-packages/original/setuptools/_vendor/__init__.py b/venv.bak/lib/python3.7/site-packages/original/setuptools/_vendor/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/venv/lib/python3.7/site-packages/original/setuptools/_vendor/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/setuptools/_vendor/__pycache__/__init__.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/setuptools/_vendor/__pycache__/__init__.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/setuptools/_vendor/__pycache__/__init__.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/setuptools/_vendor/__pycache__/pyparsing.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/setuptools/_vendor/__pycache__/pyparsing.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/setuptools/_vendor/__pycache__/pyparsing.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/setuptools/_vendor/__pycache__/pyparsing.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/setuptools/_vendor/__pycache__/six.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/setuptools/_vendor/__pycache__/six.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/setuptools/_vendor/__pycache__/six.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/setuptools/_vendor/__pycache__/six.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/setuptools/_vendor/packaging/__about__.py b/venv.bak/lib/python3.7/site-packages/original/setuptools/_vendor/packaging/__about__.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/setuptools/_vendor/packaging/__about__.py rename to venv.bak/lib/python3.7/site-packages/original/setuptools/_vendor/packaging/__about__.py diff --git a/venv/lib/python3.7/site-packages/original/setuptools/_vendor/packaging/__init__.py b/venv.bak/lib/python3.7/site-packages/original/setuptools/_vendor/packaging/__init__.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/setuptools/_vendor/packaging/__init__.py rename to venv.bak/lib/python3.7/site-packages/original/setuptools/_vendor/packaging/__init__.py diff --git a/venv/lib/python3.7/site-packages/original/setuptools/_vendor/packaging/__pycache__/__about__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/setuptools/_vendor/packaging/__pycache__/__about__.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/setuptools/_vendor/packaging/__pycache__/__about__.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/setuptools/_vendor/packaging/__pycache__/__about__.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/setuptools/_vendor/packaging/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/setuptools/_vendor/packaging/__pycache__/__init__.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/setuptools/_vendor/packaging/__pycache__/__init__.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/setuptools/_vendor/packaging/__pycache__/__init__.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/setuptools/_vendor/packaging/__pycache__/_compat.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/setuptools/_vendor/packaging/__pycache__/_compat.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/setuptools/_vendor/packaging/__pycache__/_compat.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/setuptools/_vendor/packaging/__pycache__/_compat.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/setuptools/_vendor/packaging/__pycache__/_structures.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/setuptools/_vendor/packaging/__pycache__/_structures.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/setuptools/_vendor/packaging/__pycache__/_structures.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/setuptools/_vendor/packaging/__pycache__/_structures.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/setuptools/_vendor/packaging/__pycache__/markers.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/setuptools/_vendor/packaging/__pycache__/markers.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/setuptools/_vendor/packaging/__pycache__/markers.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/setuptools/_vendor/packaging/__pycache__/markers.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/setuptools/_vendor/packaging/__pycache__/requirements.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/setuptools/_vendor/packaging/__pycache__/requirements.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/setuptools/_vendor/packaging/__pycache__/requirements.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/setuptools/_vendor/packaging/__pycache__/requirements.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/setuptools/_vendor/packaging/__pycache__/specifiers.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/setuptools/_vendor/packaging/__pycache__/specifiers.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/setuptools/_vendor/packaging/__pycache__/specifiers.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/setuptools/_vendor/packaging/__pycache__/specifiers.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/setuptools/_vendor/packaging/__pycache__/utils.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/setuptools/_vendor/packaging/__pycache__/utils.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/setuptools/_vendor/packaging/__pycache__/utils.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/setuptools/_vendor/packaging/__pycache__/utils.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/setuptools/_vendor/packaging/__pycache__/version.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/setuptools/_vendor/packaging/__pycache__/version.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/setuptools/_vendor/packaging/__pycache__/version.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/setuptools/_vendor/packaging/__pycache__/version.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/setuptools/_vendor/packaging/_compat.py b/venv.bak/lib/python3.7/site-packages/original/setuptools/_vendor/packaging/_compat.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/setuptools/_vendor/packaging/_compat.py rename to venv.bak/lib/python3.7/site-packages/original/setuptools/_vendor/packaging/_compat.py diff --git a/venv/lib/python3.7/site-packages/original/setuptools/_vendor/packaging/_structures.py b/venv.bak/lib/python3.7/site-packages/original/setuptools/_vendor/packaging/_structures.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/setuptools/_vendor/packaging/_structures.py rename to venv.bak/lib/python3.7/site-packages/original/setuptools/_vendor/packaging/_structures.py diff --git a/venv/lib/python3.7/site-packages/original/setuptools/_vendor/packaging/markers.py b/venv.bak/lib/python3.7/site-packages/original/setuptools/_vendor/packaging/markers.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/setuptools/_vendor/packaging/markers.py rename to venv.bak/lib/python3.7/site-packages/original/setuptools/_vendor/packaging/markers.py diff --git a/venv/lib/python3.7/site-packages/original/setuptools/_vendor/packaging/requirements.py b/venv.bak/lib/python3.7/site-packages/original/setuptools/_vendor/packaging/requirements.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/setuptools/_vendor/packaging/requirements.py rename to venv.bak/lib/python3.7/site-packages/original/setuptools/_vendor/packaging/requirements.py diff --git a/venv/lib/python3.7/site-packages/original/setuptools/_vendor/packaging/specifiers.py b/venv.bak/lib/python3.7/site-packages/original/setuptools/_vendor/packaging/specifiers.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/setuptools/_vendor/packaging/specifiers.py rename to venv.bak/lib/python3.7/site-packages/original/setuptools/_vendor/packaging/specifiers.py diff --git a/venv/lib/python3.7/site-packages/original/setuptools/_vendor/packaging/utils.py b/venv.bak/lib/python3.7/site-packages/original/setuptools/_vendor/packaging/utils.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/setuptools/_vendor/packaging/utils.py rename to venv.bak/lib/python3.7/site-packages/original/setuptools/_vendor/packaging/utils.py diff --git a/venv/lib/python3.7/site-packages/original/setuptools/_vendor/packaging/version.py b/venv.bak/lib/python3.7/site-packages/original/setuptools/_vendor/packaging/version.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/setuptools/_vendor/packaging/version.py rename to venv.bak/lib/python3.7/site-packages/original/setuptools/_vendor/packaging/version.py diff --git a/venv/lib/python3.7/site-packages/original/setuptools/_vendor/pyparsing.py b/venv.bak/lib/python3.7/site-packages/original/setuptools/_vendor/pyparsing.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/setuptools/_vendor/pyparsing.py rename to venv.bak/lib/python3.7/site-packages/original/setuptools/_vendor/pyparsing.py diff --git a/venv/lib/python3.7/site-packages/original/setuptools/_vendor/six.py b/venv.bak/lib/python3.7/site-packages/original/setuptools/_vendor/six.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/setuptools/_vendor/six.py rename to venv.bak/lib/python3.7/site-packages/original/setuptools/_vendor/six.py diff --git a/venv/lib/python3.7/site-packages/original/setuptools/archive_util.py b/venv.bak/lib/python3.7/site-packages/original/setuptools/archive_util.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/setuptools/archive_util.py rename to venv.bak/lib/python3.7/site-packages/original/setuptools/archive_util.py diff --git a/venv/lib/python3.7/site-packages/original/setuptools/build_meta.py b/venv.bak/lib/python3.7/site-packages/original/setuptools/build_meta.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/setuptools/build_meta.py rename to venv.bak/lib/python3.7/site-packages/original/setuptools/build_meta.py diff --git a/venv/lib/python3.7/site-packages/original/setuptools/cli-32.exe b/venv.bak/lib/python3.7/site-packages/original/setuptools/cli-32.exe similarity index 100% rename from venv/lib/python3.7/site-packages/original/setuptools/cli-32.exe rename to venv.bak/lib/python3.7/site-packages/original/setuptools/cli-32.exe diff --git a/venv/lib/python3.7/site-packages/original/setuptools/cli-64.exe b/venv.bak/lib/python3.7/site-packages/original/setuptools/cli-64.exe similarity index 100% rename from venv/lib/python3.7/site-packages/original/setuptools/cli-64.exe rename to venv.bak/lib/python3.7/site-packages/original/setuptools/cli-64.exe diff --git a/venv/lib/python3.7/site-packages/original/setuptools/cli.exe b/venv.bak/lib/python3.7/site-packages/original/setuptools/cli.exe similarity index 100% rename from venv/lib/python3.7/site-packages/original/setuptools/cli.exe rename to venv.bak/lib/python3.7/site-packages/original/setuptools/cli.exe diff --git a/venv/lib/python3.7/site-packages/original/setuptools/command/__init__.py b/venv.bak/lib/python3.7/site-packages/original/setuptools/command/__init__.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/setuptools/command/__init__.py rename to venv.bak/lib/python3.7/site-packages/original/setuptools/command/__init__.py diff --git a/venv/lib/python3.7/site-packages/original/setuptools/command/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/setuptools/command/__pycache__/__init__.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/setuptools/command/__pycache__/__init__.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/setuptools/command/__pycache__/__init__.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/setuptools/command/__pycache__/alias.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/setuptools/command/__pycache__/alias.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/setuptools/command/__pycache__/alias.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/setuptools/command/__pycache__/alias.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/setuptools/command/__pycache__/bdist_egg.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/setuptools/command/__pycache__/bdist_egg.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/setuptools/command/__pycache__/bdist_egg.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/setuptools/command/__pycache__/bdist_egg.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/setuptools/command/__pycache__/bdist_rpm.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/setuptools/command/__pycache__/bdist_rpm.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/setuptools/command/__pycache__/bdist_rpm.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/setuptools/command/__pycache__/bdist_rpm.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/setuptools/command/__pycache__/bdist_wininst.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/setuptools/command/__pycache__/bdist_wininst.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/setuptools/command/__pycache__/bdist_wininst.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/setuptools/command/__pycache__/bdist_wininst.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/setuptools/command/__pycache__/build_clib.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/setuptools/command/__pycache__/build_clib.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/setuptools/command/__pycache__/build_clib.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/setuptools/command/__pycache__/build_clib.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/setuptools/command/__pycache__/build_ext.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/setuptools/command/__pycache__/build_ext.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/setuptools/command/__pycache__/build_ext.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/setuptools/command/__pycache__/build_ext.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/setuptools/command/__pycache__/build_py.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/setuptools/command/__pycache__/build_py.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/setuptools/command/__pycache__/build_py.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/setuptools/command/__pycache__/build_py.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/setuptools/command/__pycache__/develop.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/setuptools/command/__pycache__/develop.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/setuptools/command/__pycache__/develop.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/setuptools/command/__pycache__/develop.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/setuptools/command/__pycache__/dist_info.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/setuptools/command/__pycache__/dist_info.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/setuptools/command/__pycache__/dist_info.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/setuptools/command/__pycache__/dist_info.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/setuptools/command/__pycache__/easy_install.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/setuptools/command/__pycache__/easy_install.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/setuptools/command/__pycache__/easy_install.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/setuptools/command/__pycache__/easy_install.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/setuptools/command/__pycache__/egg_info.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/setuptools/command/__pycache__/egg_info.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/setuptools/command/__pycache__/egg_info.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/setuptools/command/__pycache__/egg_info.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/setuptools/command/__pycache__/install.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/setuptools/command/__pycache__/install.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/setuptools/command/__pycache__/install.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/setuptools/command/__pycache__/install.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/setuptools/command/__pycache__/install_egg_info.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/setuptools/command/__pycache__/install_egg_info.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/setuptools/command/__pycache__/install_egg_info.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/setuptools/command/__pycache__/install_egg_info.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/setuptools/command/__pycache__/install_lib.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/setuptools/command/__pycache__/install_lib.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/setuptools/command/__pycache__/install_lib.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/setuptools/command/__pycache__/install_lib.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/setuptools/command/__pycache__/install_scripts.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/setuptools/command/__pycache__/install_scripts.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/setuptools/command/__pycache__/install_scripts.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/setuptools/command/__pycache__/install_scripts.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/setuptools/command/__pycache__/py36compat.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/setuptools/command/__pycache__/py36compat.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/setuptools/command/__pycache__/py36compat.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/setuptools/command/__pycache__/py36compat.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/setuptools/command/__pycache__/register.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/setuptools/command/__pycache__/register.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/setuptools/command/__pycache__/register.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/setuptools/command/__pycache__/register.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/setuptools/command/__pycache__/rotate.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/setuptools/command/__pycache__/rotate.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/setuptools/command/__pycache__/rotate.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/setuptools/command/__pycache__/rotate.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/setuptools/command/__pycache__/saveopts.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/setuptools/command/__pycache__/saveopts.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/setuptools/command/__pycache__/saveopts.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/setuptools/command/__pycache__/saveopts.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/setuptools/command/__pycache__/sdist.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/setuptools/command/__pycache__/sdist.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/setuptools/command/__pycache__/sdist.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/setuptools/command/__pycache__/sdist.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/setuptools/command/__pycache__/setopt.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/setuptools/command/__pycache__/setopt.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/setuptools/command/__pycache__/setopt.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/setuptools/command/__pycache__/setopt.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/setuptools/command/__pycache__/test.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/setuptools/command/__pycache__/test.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/setuptools/command/__pycache__/test.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/setuptools/command/__pycache__/test.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/setuptools/command/__pycache__/upload.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/setuptools/command/__pycache__/upload.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/setuptools/command/__pycache__/upload.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/setuptools/command/__pycache__/upload.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/setuptools/command/__pycache__/upload_docs.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/setuptools/command/__pycache__/upload_docs.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/setuptools/command/__pycache__/upload_docs.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/setuptools/command/__pycache__/upload_docs.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/setuptools/command/alias.py b/venv.bak/lib/python3.7/site-packages/original/setuptools/command/alias.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/setuptools/command/alias.py rename to venv.bak/lib/python3.7/site-packages/original/setuptools/command/alias.py diff --git a/venv/lib/python3.7/site-packages/original/setuptools/command/bdist_egg.py b/venv.bak/lib/python3.7/site-packages/original/setuptools/command/bdist_egg.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/setuptools/command/bdist_egg.py rename to venv.bak/lib/python3.7/site-packages/original/setuptools/command/bdist_egg.py diff --git a/venv/lib/python3.7/site-packages/original/setuptools/command/bdist_rpm.py b/venv.bak/lib/python3.7/site-packages/original/setuptools/command/bdist_rpm.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/setuptools/command/bdist_rpm.py rename to venv.bak/lib/python3.7/site-packages/original/setuptools/command/bdist_rpm.py diff --git a/venv/lib/python3.7/site-packages/original/setuptools/command/bdist_wininst.py b/venv.bak/lib/python3.7/site-packages/original/setuptools/command/bdist_wininst.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/setuptools/command/bdist_wininst.py rename to venv.bak/lib/python3.7/site-packages/original/setuptools/command/bdist_wininst.py diff --git a/venv/lib/python3.7/site-packages/original/setuptools/command/build_clib.py b/venv.bak/lib/python3.7/site-packages/original/setuptools/command/build_clib.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/setuptools/command/build_clib.py rename to venv.bak/lib/python3.7/site-packages/original/setuptools/command/build_clib.py diff --git a/venv/lib/python3.7/site-packages/original/setuptools/command/build_ext.py b/venv.bak/lib/python3.7/site-packages/original/setuptools/command/build_ext.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/setuptools/command/build_ext.py rename to venv.bak/lib/python3.7/site-packages/original/setuptools/command/build_ext.py diff --git a/venv/lib/python3.7/site-packages/original/setuptools/command/build_py.py b/venv.bak/lib/python3.7/site-packages/original/setuptools/command/build_py.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/setuptools/command/build_py.py rename to venv.bak/lib/python3.7/site-packages/original/setuptools/command/build_py.py diff --git a/venv/lib/python3.7/site-packages/original/setuptools/command/develop.py b/venv.bak/lib/python3.7/site-packages/original/setuptools/command/develop.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/setuptools/command/develop.py rename to venv.bak/lib/python3.7/site-packages/original/setuptools/command/develop.py diff --git a/venv/lib/python3.7/site-packages/original/setuptools/command/dist_info.py b/venv.bak/lib/python3.7/site-packages/original/setuptools/command/dist_info.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/setuptools/command/dist_info.py rename to venv.bak/lib/python3.7/site-packages/original/setuptools/command/dist_info.py diff --git a/venv/lib/python3.7/site-packages/original/setuptools/command/easy_install.py b/venv.bak/lib/python3.7/site-packages/original/setuptools/command/easy_install.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/setuptools/command/easy_install.py rename to venv.bak/lib/python3.7/site-packages/original/setuptools/command/easy_install.py diff --git a/venv/lib/python3.7/site-packages/original/setuptools/command/egg_info.py b/venv.bak/lib/python3.7/site-packages/original/setuptools/command/egg_info.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/setuptools/command/egg_info.py rename to venv.bak/lib/python3.7/site-packages/original/setuptools/command/egg_info.py diff --git a/venv/lib/python3.7/site-packages/original/setuptools/command/install.py b/venv.bak/lib/python3.7/site-packages/original/setuptools/command/install.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/setuptools/command/install.py rename to venv.bak/lib/python3.7/site-packages/original/setuptools/command/install.py diff --git a/venv/lib/python3.7/site-packages/original/setuptools/command/install_egg_info.py b/venv.bak/lib/python3.7/site-packages/original/setuptools/command/install_egg_info.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/setuptools/command/install_egg_info.py rename to venv.bak/lib/python3.7/site-packages/original/setuptools/command/install_egg_info.py diff --git a/venv/lib/python3.7/site-packages/original/setuptools/command/install_lib.py b/venv.bak/lib/python3.7/site-packages/original/setuptools/command/install_lib.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/setuptools/command/install_lib.py rename to venv.bak/lib/python3.7/site-packages/original/setuptools/command/install_lib.py diff --git a/venv/lib/python3.7/site-packages/original/setuptools/command/install_scripts.py b/venv.bak/lib/python3.7/site-packages/original/setuptools/command/install_scripts.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/setuptools/command/install_scripts.py rename to venv.bak/lib/python3.7/site-packages/original/setuptools/command/install_scripts.py diff --git a/venv/lib/python3.7/site-packages/original/setuptools/command/launcher manifest.xml b/venv.bak/lib/python3.7/site-packages/original/setuptools/command/launcher manifest.xml similarity index 100% rename from venv/lib/python3.7/site-packages/original/setuptools/command/launcher manifest.xml rename to venv.bak/lib/python3.7/site-packages/original/setuptools/command/launcher manifest.xml diff --git a/venv/lib/python3.7/site-packages/original/setuptools/command/py36compat.py b/venv.bak/lib/python3.7/site-packages/original/setuptools/command/py36compat.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/setuptools/command/py36compat.py rename to venv.bak/lib/python3.7/site-packages/original/setuptools/command/py36compat.py diff --git a/venv/lib/python3.7/site-packages/original/setuptools/command/register.py b/venv.bak/lib/python3.7/site-packages/original/setuptools/command/register.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/setuptools/command/register.py rename to venv.bak/lib/python3.7/site-packages/original/setuptools/command/register.py diff --git a/venv/lib/python3.7/site-packages/original/setuptools/command/rotate.py b/venv.bak/lib/python3.7/site-packages/original/setuptools/command/rotate.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/setuptools/command/rotate.py rename to venv.bak/lib/python3.7/site-packages/original/setuptools/command/rotate.py diff --git a/venv/lib/python3.7/site-packages/original/setuptools/command/saveopts.py b/venv.bak/lib/python3.7/site-packages/original/setuptools/command/saveopts.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/setuptools/command/saveopts.py rename to venv.bak/lib/python3.7/site-packages/original/setuptools/command/saveopts.py diff --git a/venv/lib/python3.7/site-packages/original/setuptools/command/sdist.py b/venv.bak/lib/python3.7/site-packages/original/setuptools/command/sdist.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/setuptools/command/sdist.py rename to venv.bak/lib/python3.7/site-packages/original/setuptools/command/sdist.py diff --git a/venv/lib/python3.7/site-packages/original/setuptools/command/setopt.py b/venv.bak/lib/python3.7/site-packages/original/setuptools/command/setopt.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/setuptools/command/setopt.py rename to venv.bak/lib/python3.7/site-packages/original/setuptools/command/setopt.py diff --git a/venv/lib/python3.7/site-packages/original/setuptools/command/test.py b/venv.bak/lib/python3.7/site-packages/original/setuptools/command/test.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/setuptools/command/test.py rename to venv.bak/lib/python3.7/site-packages/original/setuptools/command/test.py diff --git a/venv/lib/python3.7/site-packages/original/setuptools/command/upload.py b/venv.bak/lib/python3.7/site-packages/original/setuptools/command/upload.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/setuptools/command/upload.py rename to venv.bak/lib/python3.7/site-packages/original/setuptools/command/upload.py diff --git a/venv/lib/python3.7/site-packages/original/setuptools/command/upload_docs.py b/venv.bak/lib/python3.7/site-packages/original/setuptools/command/upload_docs.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/setuptools/command/upload_docs.py rename to venv.bak/lib/python3.7/site-packages/original/setuptools/command/upload_docs.py diff --git a/venv/lib/python3.7/site-packages/original/setuptools/config.py b/venv.bak/lib/python3.7/site-packages/original/setuptools/config.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/setuptools/config.py rename to venv.bak/lib/python3.7/site-packages/original/setuptools/config.py diff --git a/venv/lib/python3.7/site-packages/original/setuptools/dep_util.py b/venv.bak/lib/python3.7/site-packages/original/setuptools/dep_util.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/setuptools/dep_util.py rename to venv.bak/lib/python3.7/site-packages/original/setuptools/dep_util.py diff --git a/venv/lib/python3.7/site-packages/original/setuptools/depends.py b/venv.bak/lib/python3.7/site-packages/original/setuptools/depends.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/setuptools/depends.py rename to venv.bak/lib/python3.7/site-packages/original/setuptools/depends.py diff --git a/venv/lib/python3.7/site-packages/original/setuptools/dist.py b/venv.bak/lib/python3.7/site-packages/original/setuptools/dist.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/setuptools/dist.py rename to venv.bak/lib/python3.7/site-packages/original/setuptools/dist.py diff --git a/venv/lib/python3.7/site-packages/original/setuptools/extension.py b/venv.bak/lib/python3.7/site-packages/original/setuptools/extension.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/setuptools/extension.py rename to venv.bak/lib/python3.7/site-packages/original/setuptools/extension.py diff --git a/venv/lib/python3.7/site-packages/original/setuptools/extern/__init__.py b/venv.bak/lib/python3.7/site-packages/original/setuptools/extern/__init__.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/setuptools/extern/__init__.py rename to venv.bak/lib/python3.7/site-packages/original/setuptools/extern/__init__.py diff --git a/venv/lib/python3.7/site-packages/original/setuptools/extern/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/original/setuptools/extern/__pycache__/__init__.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/original/setuptools/extern/__pycache__/__init__.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/original/setuptools/extern/__pycache__/__init__.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/original/setuptools/glibc.py b/venv.bak/lib/python3.7/site-packages/original/setuptools/glibc.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/setuptools/glibc.py rename to venv.bak/lib/python3.7/site-packages/original/setuptools/glibc.py diff --git a/venv/lib/python3.7/site-packages/original/setuptools/glob.py b/venv.bak/lib/python3.7/site-packages/original/setuptools/glob.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/setuptools/glob.py rename to venv.bak/lib/python3.7/site-packages/original/setuptools/glob.py diff --git a/venv/lib/python3.7/site-packages/original/setuptools/gui-32.exe b/venv.bak/lib/python3.7/site-packages/original/setuptools/gui-32.exe similarity index 100% rename from venv/lib/python3.7/site-packages/original/setuptools/gui-32.exe rename to venv.bak/lib/python3.7/site-packages/original/setuptools/gui-32.exe diff --git a/venv/lib/python3.7/site-packages/original/setuptools/gui-64.exe b/venv.bak/lib/python3.7/site-packages/original/setuptools/gui-64.exe similarity index 100% rename from venv/lib/python3.7/site-packages/original/setuptools/gui-64.exe rename to venv.bak/lib/python3.7/site-packages/original/setuptools/gui-64.exe diff --git a/venv/lib/python3.7/site-packages/original/setuptools/gui.exe b/venv.bak/lib/python3.7/site-packages/original/setuptools/gui.exe similarity index 100% rename from venv/lib/python3.7/site-packages/original/setuptools/gui.exe rename to venv.bak/lib/python3.7/site-packages/original/setuptools/gui.exe diff --git a/venv/lib/python3.7/site-packages/original/setuptools/launch.py b/venv.bak/lib/python3.7/site-packages/original/setuptools/launch.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/setuptools/launch.py rename to venv.bak/lib/python3.7/site-packages/original/setuptools/launch.py diff --git a/venv/lib/python3.7/site-packages/original/setuptools/lib2to3_ex.py b/venv.bak/lib/python3.7/site-packages/original/setuptools/lib2to3_ex.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/setuptools/lib2to3_ex.py rename to venv.bak/lib/python3.7/site-packages/original/setuptools/lib2to3_ex.py diff --git a/venv/lib/python3.7/site-packages/original/setuptools/monkey.py b/venv.bak/lib/python3.7/site-packages/original/setuptools/monkey.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/setuptools/monkey.py rename to venv.bak/lib/python3.7/site-packages/original/setuptools/monkey.py diff --git a/venv/lib/python3.7/site-packages/original/setuptools/msvc.py b/venv.bak/lib/python3.7/site-packages/original/setuptools/msvc.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/setuptools/msvc.py rename to venv.bak/lib/python3.7/site-packages/original/setuptools/msvc.py diff --git a/venv/lib/python3.7/site-packages/original/setuptools/namespaces.py b/venv.bak/lib/python3.7/site-packages/original/setuptools/namespaces.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/setuptools/namespaces.py rename to venv.bak/lib/python3.7/site-packages/original/setuptools/namespaces.py diff --git a/venv/lib/python3.7/site-packages/original/setuptools/package_index.py b/venv.bak/lib/python3.7/site-packages/original/setuptools/package_index.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/setuptools/package_index.py rename to venv.bak/lib/python3.7/site-packages/original/setuptools/package_index.py diff --git a/venv/lib/python3.7/site-packages/original/setuptools/pep425tags.py b/venv.bak/lib/python3.7/site-packages/original/setuptools/pep425tags.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/setuptools/pep425tags.py rename to venv.bak/lib/python3.7/site-packages/original/setuptools/pep425tags.py diff --git a/venv/lib/python3.7/site-packages/original/setuptools/py27compat.py b/venv.bak/lib/python3.7/site-packages/original/setuptools/py27compat.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/setuptools/py27compat.py rename to venv.bak/lib/python3.7/site-packages/original/setuptools/py27compat.py diff --git a/venv/lib/python3.7/site-packages/original/setuptools/py31compat.py b/venv.bak/lib/python3.7/site-packages/original/setuptools/py31compat.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/setuptools/py31compat.py rename to venv.bak/lib/python3.7/site-packages/original/setuptools/py31compat.py diff --git a/venv/lib/python3.7/site-packages/original/setuptools/py33compat.py b/venv.bak/lib/python3.7/site-packages/original/setuptools/py33compat.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/setuptools/py33compat.py rename to venv.bak/lib/python3.7/site-packages/original/setuptools/py33compat.py diff --git a/venv/lib/python3.7/site-packages/original/setuptools/sandbox.py b/venv.bak/lib/python3.7/site-packages/original/setuptools/sandbox.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/setuptools/sandbox.py rename to venv.bak/lib/python3.7/site-packages/original/setuptools/sandbox.py diff --git a/venv/lib/python3.7/site-packages/original/setuptools/script (dev).tmpl b/venv.bak/lib/python3.7/site-packages/original/setuptools/script (dev).tmpl similarity index 100% rename from venv/lib/python3.7/site-packages/original/setuptools/script (dev).tmpl rename to venv.bak/lib/python3.7/site-packages/original/setuptools/script (dev).tmpl diff --git a/venv/lib/python3.7/site-packages/original/setuptools/script.tmpl b/venv.bak/lib/python3.7/site-packages/original/setuptools/script.tmpl similarity index 100% rename from venv/lib/python3.7/site-packages/original/setuptools/script.tmpl rename to venv.bak/lib/python3.7/site-packages/original/setuptools/script.tmpl diff --git a/venv/lib/python3.7/site-packages/original/setuptools/site-patch.py b/venv.bak/lib/python3.7/site-packages/original/setuptools/site-patch.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/setuptools/site-patch.py rename to venv.bak/lib/python3.7/site-packages/original/setuptools/site-patch.py diff --git a/venv/lib/python3.7/site-packages/original/setuptools/ssl_support.py b/venv.bak/lib/python3.7/site-packages/original/setuptools/ssl_support.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/setuptools/ssl_support.py rename to venv.bak/lib/python3.7/site-packages/original/setuptools/ssl_support.py diff --git a/venv/lib/python3.7/site-packages/original/setuptools/unicode_utils.py b/venv.bak/lib/python3.7/site-packages/original/setuptools/unicode_utils.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/setuptools/unicode_utils.py rename to venv.bak/lib/python3.7/site-packages/original/setuptools/unicode_utils.py diff --git a/venv/lib/python3.7/site-packages/original/setuptools/version.py b/venv.bak/lib/python3.7/site-packages/original/setuptools/version.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/setuptools/version.py rename to venv.bak/lib/python3.7/site-packages/original/setuptools/version.py diff --git a/venv/lib/python3.7/site-packages/original/setuptools/wheel.py b/venv.bak/lib/python3.7/site-packages/original/setuptools/wheel.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/setuptools/wheel.py rename to venv.bak/lib/python3.7/site-packages/original/setuptools/wheel.py diff --git a/venv/lib/python3.7/site-packages/original/setuptools/windows_support.py b/venv.bak/lib/python3.7/site-packages/original/setuptools/windows_support.py similarity index 100% rename from venv/lib/python3.7/site-packages/original/setuptools/windows_support.py rename to venv.bak/lib/python3.7/site-packages/original/setuptools/windows_support.py diff --git a/venv/lib/python3.7/site-packages/pip-20.0.2.dist-info/INSTALLER b/venv.bak/lib/python3.7/site-packages/pip-20.0.2.dist-info/INSTALLER similarity index 100% rename from venv/lib/python3.7/site-packages/pip-20.0.2.dist-info/INSTALLER rename to venv.bak/lib/python3.7/site-packages/pip-20.0.2.dist-info/INSTALLER diff --git a/venv/lib/python3.7/site-packages/pip-20.0.2.dist-info/LICENSE.txt b/venv.bak/lib/python3.7/site-packages/pip-20.0.2.dist-info/LICENSE.txt similarity index 100% rename from venv/lib/python3.7/site-packages/pip-20.0.2.dist-info/LICENSE.txt rename to venv.bak/lib/python3.7/site-packages/pip-20.0.2.dist-info/LICENSE.txt diff --git a/venv/lib/python3.7/site-packages/pip-20.0.2.dist-info/METADATA b/venv.bak/lib/python3.7/site-packages/pip-20.0.2.dist-info/METADATA similarity index 100% rename from venv/lib/python3.7/site-packages/pip-20.0.2.dist-info/METADATA rename to venv.bak/lib/python3.7/site-packages/pip-20.0.2.dist-info/METADATA diff --git a/venv/lib/python3.7/site-packages/pip-20.0.2.dist-info/RECORD b/venv.bak/lib/python3.7/site-packages/pip-20.0.2.dist-info/RECORD similarity index 100% rename from venv/lib/python3.7/site-packages/pip-20.0.2.dist-info/RECORD rename to venv.bak/lib/python3.7/site-packages/pip-20.0.2.dist-info/RECORD diff --git a/venv/lib/python3.7/site-packages/pip-20.0.2.dist-info/WHEEL b/venv.bak/lib/python3.7/site-packages/pip-20.0.2.dist-info/WHEEL similarity index 100% rename from venv/lib/python3.7/site-packages/pip-20.0.2.dist-info/WHEEL rename to venv.bak/lib/python3.7/site-packages/pip-20.0.2.dist-info/WHEEL diff --git a/venv/lib/python3.7/site-packages/pip-20.0.2.dist-info/entry_points.txt b/venv.bak/lib/python3.7/site-packages/pip-20.0.2.dist-info/entry_points.txt similarity index 100% rename from venv/lib/python3.7/site-packages/pip-20.0.2.dist-info/entry_points.txt rename to venv.bak/lib/python3.7/site-packages/pip-20.0.2.dist-info/entry_points.txt diff --git a/venv/lib/python3.7/site-packages/pip-20.0.2.dist-info/top_level.txt b/venv.bak/lib/python3.7/site-packages/pip-20.0.2.dist-info/top_level.txt similarity index 100% rename from venv/lib/python3.7/site-packages/pip-20.0.2.dist-info/top_level.txt rename to venv.bak/lib/python3.7/site-packages/pip-20.0.2.dist-info/top_level.txt diff --git a/venv.bak/lib/python3.7/site-packages/pip/__init__.py b/venv.bak/lib/python3.7/site-packages/pip/__init__.py new file mode 100644 index 0000000..827a4e2 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/__init__.py @@ -0,0 +1,18 @@ +from pip._internal.utils.typing import MYPY_CHECK_RUNNING + +if MYPY_CHECK_RUNNING: + from typing import List, Optional + + +__version__ = "20.0.2" + + +def main(args=None): + # type: (Optional[List[str]]) -> int + """This is an internal API only meant for use by pip's own console scripts. + + For additional details, see https://github.com/pypa/pip/issues/7498. + """ + from pip._internal.utils.entrypoints import _wrapper + + return _wrapper(args) diff --git a/venv.bak/lib/python3.7/site-packages/pip/__main__.py b/venv.bak/lib/python3.7/site-packages/pip/__main__.py new file mode 100644 index 0000000..e83b9e0 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/__main__.py @@ -0,0 +1,19 @@ +from __future__ import absolute_import + +import os +import sys + +# If we are running from a wheel, add the wheel to sys.path +# This allows the usage python pip-*.whl/pip install pip-*.whl +if __package__ == '': + # __file__ is pip-*.whl/pip/__main__.py + # first dirname call strips of '/__main__.py', second strips off '/pip' + # Resulting path is the name of the wheel itself + # Add that to sys.path so we can import pip + path = os.path.dirname(os.path.dirname(__file__)) + sys.path.insert(0, path) + +from pip._internal.cli.main import main as _main # isort:skip # noqa + +if __name__ == '__main__': + sys.exit(_main()) diff --git a/venv.bak/lib/python3.7/site-packages/pip/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/__pycache__/__init__.cpython-37.pyc new file mode 100644 index 0000000..67b2184 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/__pycache__/__init__.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/__pycache__/__main__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/__pycache__/__main__.cpython-37.pyc new file mode 100644 index 0000000..912de52 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/__pycache__/__main__.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_internal/__init__.py b/venv.bak/lib/python3.7/site-packages/pip/_internal/__init__.py new file mode 100644 index 0000000..3aa8a46 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_internal/__init__.py @@ -0,0 +1,18 @@ +#!/usr/bin/env python +import pip._internal.utils.inject_securetransport # noqa +from pip._internal.utils.typing import MYPY_CHECK_RUNNING + +if MYPY_CHECK_RUNNING: + from typing import Optional, List + + +def main(args=None): + # type: (Optional[List[str]]) -> int + """This is preserved for old console scripts that may still be referencing + it. + + For additional details, see https://github.com/pypa/pip/issues/7498. + """ + from pip._internal.utils.entrypoints import _wrapper + + return _wrapper(args) diff --git a/venv.bak/lib/python3.7/site-packages/pip/_internal/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_internal/__pycache__/__init__.cpython-37.pyc new file mode 100644 index 0000000..f2d7283 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_internal/__pycache__/__init__.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_internal/__pycache__/build_env.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_internal/__pycache__/build_env.cpython-37.pyc new file mode 100644 index 0000000..24160c9 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_internal/__pycache__/build_env.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_internal/__pycache__/cache.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_internal/__pycache__/cache.cpython-37.pyc new file mode 100644 index 0000000..4061860 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_internal/__pycache__/cache.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_internal/__pycache__/configuration.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_internal/__pycache__/configuration.cpython-37.pyc new file mode 100644 index 0000000..7e45816 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_internal/__pycache__/configuration.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_internal/__pycache__/exceptions.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_internal/__pycache__/exceptions.cpython-37.pyc new file mode 100644 index 0000000..4a74bdb Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_internal/__pycache__/exceptions.cpython-37.pyc differ diff --git a/venv/lib/python3.7/site-packages/pip/_internal/__pycache__/legacy_resolve.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_internal/__pycache__/legacy_resolve.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/pip/_internal/__pycache__/legacy_resolve.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/pip/_internal/__pycache__/legacy_resolve.cpython-37.pyc diff --git a/venv.bak/lib/python3.7/site-packages/pip/_internal/__pycache__/locations.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_internal/__pycache__/locations.cpython-37.pyc new file mode 100644 index 0000000..2486099 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_internal/__pycache__/locations.cpython-37.pyc differ diff --git a/venv/lib/python3.7/site-packages/pip/_internal/__pycache__/main.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_internal/__pycache__/main.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/pip/_internal/__pycache__/main.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/pip/_internal/__pycache__/main.cpython-37.pyc diff --git a/venv.bak/lib/python3.7/site-packages/pip/_internal/__pycache__/pep425tags.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_internal/__pycache__/pep425tags.cpython-37.pyc new file mode 100644 index 0000000..2852922 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_internal/__pycache__/pep425tags.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_internal/__pycache__/pyproject.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_internal/__pycache__/pyproject.cpython-37.pyc new file mode 100644 index 0000000..becae73 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_internal/__pycache__/pyproject.cpython-37.pyc differ diff --git a/venv/lib/python3.7/site-packages/pip/_internal/__pycache__/self_outdated_check.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_internal/__pycache__/self_outdated_check.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/pip/_internal/__pycache__/self_outdated_check.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/pip/_internal/__pycache__/self_outdated_check.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/pip/_internal/__pycache__/wheel_builder.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_internal/__pycache__/wheel_builder.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/pip/_internal/__pycache__/wheel_builder.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/pip/_internal/__pycache__/wheel_builder.cpython-37.pyc diff --git a/venv.bak/lib/python3.7/site-packages/pip/_internal/build_env.py b/venv.bak/lib/python3.7/site-packages/pip/_internal/build_env.py new file mode 100644 index 0000000..f55f0e6 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_internal/build_env.py @@ -0,0 +1,221 @@ +"""Build Environment used for isolation during sdist building +""" + +# The following comment should be removed at some point in the future. +# mypy: strict-optional=False +# mypy: disallow-untyped-defs=False + +import logging +import os +import sys +import textwrap +from collections import OrderedDict +from distutils.sysconfig import get_python_lib +from sysconfig import get_paths + +from pip._vendor.pkg_resources import Requirement, VersionConflict, WorkingSet + +from pip import __file__ as pip_location +from pip._internal.utils.subprocess import call_subprocess +from pip._internal.utils.temp_dir import TempDirectory +from pip._internal.utils.typing import MYPY_CHECK_RUNNING +from pip._internal.utils.ui import open_spinner + +if MYPY_CHECK_RUNNING: + from typing import Tuple, Set, Iterable, Optional, List + from pip._internal.index.package_finder import PackageFinder + +logger = logging.getLogger(__name__) + + +class _Prefix: + + def __init__(self, path): + # type: (str) -> None + self.path = path + self.setup = False + self.bin_dir = get_paths( + 'nt' if os.name == 'nt' else 'posix_prefix', + vars={'base': path, 'platbase': path} + )['scripts'] + # Note: prefer distutils' sysconfig to get the + # library paths so PyPy is correctly supported. + purelib = get_python_lib(plat_specific=False, prefix=path) + platlib = get_python_lib(plat_specific=True, prefix=path) + if purelib == platlib: + self.lib_dirs = [purelib] + else: + self.lib_dirs = [purelib, platlib] + + +class BuildEnvironment(object): + """Creates and manages an isolated environment to install build deps + """ + + def __init__(self): + # type: () -> None + self._temp_dir = TempDirectory(kind="build-env") + + self._prefixes = OrderedDict(( + (name, _Prefix(os.path.join(self._temp_dir.path, name))) + for name in ('normal', 'overlay') + )) + + self._bin_dirs = [] # type: List[str] + self._lib_dirs = [] # type: List[str] + for prefix in reversed(list(self._prefixes.values())): + self._bin_dirs.append(prefix.bin_dir) + self._lib_dirs.extend(prefix.lib_dirs) + + # Customize site to: + # - ensure .pth files are honored + # - prevent access to system site packages + system_sites = { + os.path.normcase(site) for site in ( + get_python_lib(plat_specific=False), + get_python_lib(plat_specific=True), + ) + } + self._site_dir = os.path.join(self._temp_dir.path, 'site') + if not os.path.exists(self._site_dir): + os.mkdir(self._site_dir) + with open(os.path.join(self._site_dir, 'sitecustomize.py'), 'w') as fp: + fp.write(textwrap.dedent( + ''' + import os, site, sys + + # First, drop system-sites related paths. + original_sys_path = sys.path[:] + known_paths = set() + for path in {system_sites!r}: + site.addsitedir(path, known_paths=known_paths) + system_paths = set( + os.path.normcase(path) + for path in sys.path[len(original_sys_path):] + ) + original_sys_path = [ + path for path in original_sys_path + if os.path.normcase(path) not in system_paths + ] + sys.path = original_sys_path + + # Second, add lib directories. + # ensuring .pth file are processed. + for path in {lib_dirs!r}: + assert not path in sys.path + site.addsitedir(path) + ''' + ).format(system_sites=system_sites, lib_dirs=self._lib_dirs)) + + def __enter__(self): + self._save_env = { + name: os.environ.get(name, None) + for name in ('PATH', 'PYTHONNOUSERSITE', 'PYTHONPATH') + } + + path = self._bin_dirs[:] + old_path = self._save_env['PATH'] + if old_path: + path.extend(old_path.split(os.pathsep)) + + pythonpath = [self._site_dir] + + os.environ.update({ + 'PATH': os.pathsep.join(path), + 'PYTHONNOUSERSITE': '1', + 'PYTHONPATH': os.pathsep.join(pythonpath), + }) + + def __exit__(self, exc_type, exc_val, exc_tb): + for varname, old_value in self._save_env.items(): + if old_value is None: + os.environ.pop(varname, None) + else: + os.environ[varname] = old_value + + def cleanup(self): + # type: () -> None + self._temp_dir.cleanup() + + def check_requirements(self, reqs): + # type: (Iterable[str]) -> Tuple[Set[Tuple[str, str]], Set[str]] + """Return 2 sets: + - conflicting requirements: set of (installed, wanted) reqs tuples + - missing requirements: set of reqs + """ + missing = set() + conflicting = set() + if reqs: + ws = WorkingSet(self._lib_dirs) + for req in reqs: + try: + if ws.find(Requirement.parse(req)) is None: + missing.add(req) + except VersionConflict as e: + conflicting.add((str(e.args[0].as_requirement()), + str(e.args[1]))) + return conflicting, missing + + def install_requirements( + self, + finder, # type: PackageFinder + requirements, # type: Iterable[str] + prefix_as_string, # type: str + message # type: Optional[str] + ): + # type: (...) -> None + prefix = self._prefixes[prefix_as_string] + assert not prefix.setup + prefix.setup = True + if not requirements: + return + args = [ + sys.executable, os.path.dirname(pip_location), 'install', + '--ignore-installed', '--no-user', '--prefix', prefix.path, + '--no-warn-script-location', + ] # type: List[str] + if logger.getEffectiveLevel() <= logging.DEBUG: + args.append('-v') + for format_control in ('no_binary', 'only_binary'): + formats = getattr(finder.format_control, format_control) + args.extend(('--' + format_control.replace('_', '-'), + ','.join(sorted(formats or {':none:'})))) + + index_urls = finder.index_urls + if index_urls: + args.extend(['-i', index_urls[0]]) + for extra_index in index_urls[1:]: + args.extend(['--extra-index-url', extra_index]) + else: + args.append('--no-index') + for link in finder.find_links: + args.extend(['--find-links', link]) + + for host in finder.trusted_hosts: + args.extend(['--trusted-host', host]) + if finder.allow_all_prereleases: + args.append('--pre') + args.append('--') + args.extend(requirements) + with open_spinner(message) as spinner: + call_subprocess(args, spinner=spinner) + + +class NoOpBuildEnvironment(BuildEnvironment): + """A no-op drop-in replacement for BuildEnvironment + """ + + def __init__(self): + pass + + def __enter__(self): + pass + + def __exit__(self, exc_type, exc_val, exc_tb): + pass + + def cleanup(self): + pass + + def install_requirements(self, finder, requirements, prefix, message): + raise NotImplementedError() diff --git a/venv.bak/lib/python3.7/site-packages/pip/_internal/cache.py b/venv.bak/lib/python3.7/site-packages/pip/_internal/cache.py new file mode 100644 index 0000000..abecd78 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_internal/cache.py @@ -0,0 +1,329 @@ +"""Cache Management +""" + +# The following comment should be removed at some point in the future. +# mypy: strict-optional=False + +import hashlib +import json +import logging +import os + +from pip._vendor.packaging.tags import interpreter_name, interpreter_version +from pip._vendor.packaging.utils import canonicalize_name + +from pip._internal.exceptions import InvalidWheelFilename +from pip._internal.models.link import Link +from pip._internal.models.wheel import Wheel +from pip._internal.utils.temp_dir import TempDirectory +from pip._internal.utils.typing import MYPY_CHECK_RUNNING +from pip._internal.utils.urls import path_to_url + +if MYPY_CHECK_RUNNING: + from typing import Optional, Set, List, Any, Dict + + from pip._vendor.packaging.tags import Tag + + from pip._internal.models.format_control import FormatControl + +logger = logging.getLogger(__name__) + + +def _hash_dict(d): + # type: (Dict[str, str]) -> str + """Return a stable sha224 of a dictionary.""" + s = json.dumps(d, sort_keys=True, separators=(",", ":"), ensure_ascii=True) + return hashlib.sha224(s.encode("ascii")).hexdigest() + + +class Cache(object): + """An abstract class - provides cache directories for data from links + + + :param cache_dir: The root of the cache. + :param format_control: An object of FormatControl class to limit + binaries being read from the cache. + :param allowed_formats: which formats of files the cache should store. + ('binary' and 'source' are the only allowed values) + """ + + def __init__(self, cache_dir, format_control, allowed_formats): + # type: (str, FormatControl, Set[str]) -> None + super(Cache, self).__init__() + assert not cache_dir or os.path.isabs(cache_dir) + self.cache_dir = cache_dir or None + self.format_control = format_control + self.allowed_formats = allowed_formats + + _valid_formats = {"source", "binary"} + assert self.allowed_formats.union(_valid_formats) == _valid_formats + + def _get_cache_path_parts_legacy(self, link): + # type: (Link) -> List[str] + """Get parts of part that must be os.path.joined with cache_dir + + Legacy cache key (pip < 20) for compatibility with older caches. + """ + + # We want to generate an url to use as our cache key, we don't want to + # just re-use the URL because it might have other items in the fragment + # and we don't care about those. + key_parts = [link.url_without_fragment] + if link.hash_name is not None and link.hash is not None: + key_parts.append("=".join([link.hash_name, link.hash])) + key_url = "#".join(key_parts) + + # Encode our key url with sha224, we'll use this because it has similar + # security properties to sha256, but with a shorter total output (and + # thus less secure). However the differences don't make a lot of + # difference for our use case here. + hashed = hashlib.sha224(key_url.encode()).hexdigest() + + # We want to nest the directories some to prevent having a ton of top + # level directories where we might run out of sub directories on some + # FS. + parts = [hashed[:2], hashed[2:4], hashed[4:6], hashed[6:]] + + return parts + + def _get_cache_path_parts(self, link): + # type: (Link) -> List[str] + """Get parts of part that must be os.path.joined with cache_dir + """ + + # We want to generate an url to use as our cache key, we don't want to + # just re-use the URL because it might have other items in the fragment + # and we don't care about those. + key_parts = {"url": link.url_without_fragment} + if link.hash_name is not None and link.hash is not None: + key_parts[link.hash_name] = link.hash + if link.subdirectory_fragment: + key_parts["subdirectory"] = link.subdirectory_fragment + + # Include interpreter name, major and minor version in cache key + # to cope with ill-behaved sdists that build a different wheel + # depending on the python version their setup.py is being run on, + # and don't encode the difference in compatibility tags. + # https://github.com/pypa/pip/issues/7296 + key_parts["interpreter_name"] = interpreter_name() + key_parts["interpreter_version"] = interpreter_version() + + # Encode our key url with sha224, we'll use this because it has similar + # security properties to sha256, but with a shorter total output (and + # thus less secure). However the differences don't make a lot of + # difference for our use case here. + hashed = _hash_dict(key_parts) + + # We want to nest the directories some to prevent having a ton of top + # level directories where we might run out of sub directories on some + # FS. + parts = [hashed[:2], hashed[2:4], hashed[4:6], hashed[6:]] + + return parts + + def _get_candidates(self, link, canonical_package_name): + # type: (Link, Optional[str]) -> List[Any] + can_not_cache = ( + not self.cache_dir or + not canonical_package_name or + not link + ) + if can_not_cache: + return [] + + formats = self.format_control.get_allowed_formats( + canonical_package_name + ) + if not self.allowed_formats.intersection(formats): + return [] + + candidates = [] + path = self.get_path_for_link(link) + if os.path.isdir(path): + for candidate in os.listdir(path): + candidates.append((candidate, path)) + # TODO remove legacy path lookup in pip>=21 + legacy_path = self.get_path_for_link_legacy(link) + if os.path.isdir(legacy_path): + for candidate in os.listdir(legacy_path): + candidates.append((candidate, legacy_path)) + return candidates + + def get_path_for_link_legacy(self, link): + # type: (Link) -> str + raise NotImplementedError() + + def get_path_for_link(self, link): + # type: (Link) -> str + """Return a directory to store cached items in for link. + """ + raise NotImplementedError() + + def get( + self, + link, # type: Link + package_name, # type: Optional[str] + supported_tags, # type: List[Tag] + ): + # type: (...) -> Link + """Returns a link to a cached item if it exists, otherwise returns the + passed link. + """ + raise NotImplementedError() + + def cleanup(self): + # type: () -> None + pass + + +class SimpleWheelCache(Cache): + """A cache of wheels for future installs. + """ + + def __init__(self, cache_dir, format_control): + # type: (str, FormatControl) -> None + super(SimpleWheelCache, self).__init__( + cache_dir, format_control, {"binary"} + ) + + def get_path_for_link_legacy(self, link): + # type: (Link) -> str + parts = self._get_cache_path_parts_legacy(link) + return os.path.join(self.cache_dir, "wheels", *parts) + + def get_path_for_link(self, link): + # type: (Link) -> str + """Return a directory to store cached wheels for link + + Because there are M wheels for any one sdist, we provide a directory + to cache them in, and then consult that directory when looking up + cache hits. + + We only insert things into the cache if they have plausible version + numbers, so that we don't contaminate the cache with things that were + not unique. E.g. ./package might have dozens of installs done for it + and build a version of 0.0...and if we built and cached a wheel, we'd + end up using the same wheel even if the source has been edited. + + :param link: The link of the sdist for which this will cache wheels. + """ + parts = self._get_cache_path_parts(link) + + # Store wheels within the root cache_dir + return os.path.join(self.cache_dir, "wheels", *parts) + + def get( + self, + link, # type: Link + package_name, # type: Optional[str] + supported_tags, # type: List[Tag] + ): + # type: (...) -> Link + candidates = [] + + if not package_name: + return link + + canonical_package_name = canonicalize_name(package_name) + for wheel_name, wheel_dir in self._get_candidates( + link, canonical_package_name + ): + try: + wheel = Wheel(wheel_name) + except InvalidWheelFilename: + continue + if canonicalize_name(wheel.name) != canonical_package_name: + logger.debug( + "Ignoring cached wheel {} for {} as it " + "does not match the expected distribution name {}.".format( + wheel_name, link, package_name + ) + ) + continue + if not wheel.supported(supported_tags): + # Built for a different python/arch/etc + continue + candidates.append( + ( + wheel.support_index_min(supported_tags), + wheel_name, + wheel_dir, + ) + ) + + if not candidates: + return link + + _, wheel_name, wheel_dir = min(candidates) + return Link(path_to_url(os.path.join(wheel_dir, wheel_name))) + + +class EphemWheelCache(SimpleWheelCache): + """A SimpleWheelCache that creates it's own temporary cache directory + """ + + def __init__(self, format_control): + # type: (FormatControl) -> None + self._temp_dir = TempDirectory(kind="ephem-wheel-cache") + + super(EphemWheelCache, self).__init__( + self._temp_dir.path, format_control + ) + + def cleanup(self): + # type: () -> None + self._temp_dir.cleanup() + + +class WheelCache(Cache): + """Wraps EphemWheelCache and SimpleWheelCache into a single Cache + + This Cache allows for gracefully degradation, using the ephem wheel cache + when a certain link is not found in the simple wheel cache first. + """ + + def __init__(self, cache_dir, format_control): + # type: (str, FormatControl) -> None + super(WheelCache, self).__init__( + cache_dir, format_control, {'binary'} + ) + self._wheel_cache = SimpleWheelCache(cache_dir, format_control) + self._ephem_cache = EphemWheelCache(format_control) + + def get_path_for_link_legacy(self, link): + # type: (Link) -> str + return self._wheel_cache.get_path_for_link_legacy(link) + + def get_path_for_link(self, link): + # type: (Link) -> str + return self._wheel_cache.get_path_for_link(link) + + def get_ephem_path_for_link(self, link): + # type: (Link) -> str + return self._ephem_cache.get_path_for_link(link) + + def get( + self, + link, # type: Link + package_name, # type: Optional[str] + supported_tags, # type: List[Tag] + ): + # type: (...) -> Link + retval = self._wheel_cache.get( + link=link, + package_name=package_name, + supported_tags=supported_tags, + ) + if retval is not link: + return retval + + return self._ephem_cache.get( + link=link, + package_name=package_name, + supported_tags=supported_tags, + ) + + def cleanup(self): + # type: () -> None + self._wheel_cache.cleanup() + self._ephem_cache.cleanup() diff --git a/venv.bak/lib/python3.7/site-packages/pip/_internal/cli/__init__.py b/venv.bak/lib/python3.7/site-packages/pip/_internal/cli/__init__.py new file mode 100644 index 0000000..e589bb9 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_internal/cli/__init__.py @@ -0,0 +1,4 @@ +"""Subpackage containing all of pip's command line interface related code +""" + +# This file intentionally does not import submodules diff --git a/venv.bak/lib/python3.7/site-packages/pip/_internal/cli/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_internal/cli/__pycache__/__init__.cpython-37.pyc new file mode 100644 index 0000000..769bd9e Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_internal/cli/__pycache__/__init__.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_internal/cli/__pycache__/autocompletion.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_internal/cli/__pycache__/autocompletion.cpython-37.pyc new file mode 100644 index 0000000..539b646 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_internal/cli/__pycache__/autocompletion.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_internal/cli/__pycache__/base_command.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_internal/cli/__pycache__/base_command.cpython-37.pyc new file mode 100644 index 0000000..77b48b3 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_internal/cli/__pycache__/base_command.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_internal/cli/__pycache__/cmdoptions.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_internal/cli/__pycache__/cmdoptions.cpython-37.pyc new file mode 100644 index 0000000..5a82e82 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_internal/cli/__pycache__/cmdoptions.cpython-37.pyc differ diff --git a/venv/lib/python3.7/site-packages/pip/_internal/cli/__pycache__/command_context.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_internal/cli/__pycache__/command_context.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/pip/_internal/cli/__pycache__/command_context.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/pip/_internal/cli/__pycache__/command_context.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/pip/_internal/cli/__pycache__/main.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_internal/cli/__pycache__/main.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/pip/_internal/cli/__pycache__/main.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/pip/_internal/cli/__pycache__/main.cpython-37.pyc diff --git a/venv.bak/lib/python3.7/site-packages/pip/_internal/cli/__pycache__/main_parser.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_internal/cli/__pycache__/main_parser.cpython-37.pyc new file mode 100644 index 0000000..3f08459 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_internal/cli/__pycache__/main_parser.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_internal/cli/__pycache__/parser.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_internal/cli/__pycache__/parser.cpython-37.pyc new file mode 100644 index 0000000..bbcbad9 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_internal/cli/__pycache__/parser.cpython-37.pyc differ diff --git a/venv/lib/python3.7/site-packages/pip/_internal/cli/__pycache__/req_command.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_internal/cli/__pycache__/req_command.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/pip/_internal/cli/__pycache__/req_command.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/pip/_internal/cli/__pycache__/req_command.cpython-37.pyc diff --git a/venv.bak/lib/python3.7/site-packages/pip/_internal/cli/__pycache__/status_codes.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_internal/cli/__pycache__/status_codes.cpython-37.pyc new file mode 100644 index 0000000..63c19f1 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_internal/cli/__pycache__/status_codes.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_internal/cli/autocompletion.py b/venv.bak/lib/python3.7/site-packages/pip/_internal/cli/autocompletion.py new file mode 100644 index 0000000..329de60 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_internal/cli/autocompletion.py @@ -0,0 +1,164 @@ +"""Logic that powers autocompletion installed by ``pip completion``. +""" + +import optparse +import os +import sys +from itertools import chain + +from pip._internal.cli.main_parser import create_main_parser +from pip._internal.commands import commands_dict, create_command +from pip._internal.utils.misc import get_installed_distributions +from pip._internal.utils.typing import MYPY_CHECK_RUNNING + +if MYPY_CHECK_RUNNING: + from typing import Any, Iterable, List, Optional + + +def autocomplete(): + # type: () -> None + """Entry Point for completion of main and subcommand options. + """ + # Don't complete if user hasn't sourced bash_completion file. + if 'PIP_AUTO_COMPLETE' not in os.environ: + return + cwords = os.environ['COMP_WORDS'].split()[1:] + cword = int(os.environ['COMP_CWORD']) + try: + current = cwords[cword - 1] + except IndexError: + current = '' + + parser = create_main_parser() + subcommands = list(commands_dict) + options = [] + + # subcommand + subcommand_name = None # type: Optional[str] + for word in cwords: + if word in subcommands: + subcommand_name = word + break + # subcommand options + if subcommand_name is not None: + # special case: 'help' subcommand has no options + if subcommand_name == 'help': + sys.exit(1) + # special case: list locally installed dists for show and uninstall + should_list_installed = ( + subcommand_name in ['show', 'uninstall'] and + not current.startswith('-') + ) + if should_list_installed: + installed = [] + lc = current.lower() + for dist in get_installed_distributions(local_only=True): + if dist.key.startswith(lc) and dist.key not in cwords[1:]: + installed.append(dist.key) + # if there are no dists installed, fall back to option completion + if installed: + for dist in installed: + print(dist) + sys.exit(1) + + subcommand = create_command(subcommand_name) + + for opt in subcommand.parser.option_list_all: + if opt.help != optparse.SUPPRESS_HELP: + for opt_str in opt._long_opts + opt._short_opts: + options.append((opt_str, opt.nargs)) + + # filter out previously specified options from available options + prev_opts = [x.split('=')[0] for x in cwords[1:cword - 1]] + options = [(x, v) for (x, v) in options if x not in prev_opts] + # filter options by current input + options = [(k, v) for k, v in options if k.startswith(current)] + # get completion type given cwords and available subcommand options + completion_type = get_path_completion_type( + cwords, cword, subcommand.parser.option_list_all, + ) + # get completion files and directories if ``completion_type`` is + # ````, ```` or ```` + if completion_type: + paths = auto_complete_paths(current, completion_type) + options = [(path, 0) for path in paths] + for option in options: + opt_label = option[0] + # append '=' to options which require args + if option[1] and option[0][:2] == "--": + opt_label += '=' + print(opt_label) + else: + # show main parser options only when necessary + + opts = [i.option_list for i in parser.option_groups] + opts.append(parser.option_list) + flattened_opts = chain.from_iterable(opts) + if current.startswith('-'): + for opt in flattened_opts: + if opt.help != optparse.SUPPRESS_HELP: + subcommands += opt._long_opts + opt._short_opts + else: + # get completion type given cwords and all available options + completion_type = get_path_completion_type(cwords, cword, + flattened_opts) + if completion_type: + subcommands = list(auto_complete_paths(current, + completion_type)) + + print(' '.join([x for x in subcommands if x.startswith(current)])) + sys.exit(1) + + +def get_path_completion_type(cwords, cword, opts): + # type: (List[str], int, Iterable[Any]) -> Optional[str] + """Get the type of path completion (``file``, ``dir``, ``path`` or None) + + :param cwords: same as the environmental variable ``COMP_WORDS`` + :param cword: same as the environmental variable ``COMP_CWORD`` + :param opts: The available options to check + :return: path completion type (``file``, ``dir``, ``path`` or None) + """ + if cword < 2 or not cwords[cword - 2].startswith('-'): + return None + for opt in opts: + if opt.help == optparse.SUPPRESS_HELP: + continue + for o in str(opt).split('/'): + if cwords[cword - 2].split('=')[0] == o: + if not opt.metavar or any( + x in ('path', 'file', 'dir') + for x in opt.metavar.split('/')): + return opt.metavar + return None + + +def auto_complete_paths(current, completion_type): + # type: (str, str) -> Iterable[str] + """If ``completion_type`` is ``file`` or ``path``, list all regular files + and directories starting with ``current``; otherwise only list directories + starting with ``current``. + + :param current: The word to be completed + :param completion_type: path completion type(`file`, `path` or `dir`)i + :return: A generator of regular files and/or directories + """ + directory, filename = os.path.split(current) + current_path = os.path.abspath(directory) + # Don't complete paths if they can't be accessed + if not os.access(current_path, os.R_OK): + return + filename = os.path.normcase(filename) + # list all files that start with ``filename`` + file_list = (x for x in os.listdir(current_path) + if os.path.normcase(x).startswith(filename)) + for f in file_list: + opt = os.path.join(current_path, f) + comp_file = os.path.normcase(os.path.join(directory, f)) + # complete regular files when there is not ```` after option + # complete directories when there is ````, ```` or + # ````after option + if completion_type != 'dir' and os.path.isfile(opt): + yield comp_file + elif os.path.isdir(opt): + yield os.path.join(comp_file, '') diff --git a/venv.bak/lib/python3.7/site-packages/pip/_internal/cli/base_command.py b/venv.bak/lib/python3.7/site-packages/pip/_internal/cli/base_command.py new file mode 100644 index 0000000..628faa3 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_internal/cli/base_command.py @@ -0,0 +1,226 @@ +"""Base Command class, and related routines""" + +from __future__ import absolute_import, print_function + +import logging +import logging.config +import optparse +import os +import platform +import sys +import traceback + +from pip._internal.cli import cmdoptions +from pip._internal.cli.command_context import CommandContextMixIn +from pip._internal.cli.parser import ( + ConfigOptionParser, + UpdatingDefaultsHelpFormatter, +) +from pip._internal.cli.status_codes import ( + ERROR, + PREVIOUS_BUILD_DIR_ERROR, + SUCCESS, + UNKNOWN_ERROR, + VIRTUALENV_NOT_FOUND, +) +from pip._internal.exceptions import ( + BadCommand, + CommandError, + InstallationError, + PreviousBuildDirError, + UninstallationError, +) +from pip._internal.utils.deprecation import deprecated +from pip._internal.utils.filesystem import check_path_owner +from pip._internal.utils.logging import BrokenStdoutLoggingError, setup_logging +from pip._internal.utils.misc import get_prog, normalize_path +from pip._internal.utils.temp_dir import global_tempdir_manager +from pip._internal.utils.typing import MYPY_CHECK_RUNNING +from pip._internal.utils.virtualenv import running_under_virtualenv + +if MYPY_CHECK_RUNNING: + from typing import List, Tuple, Any + from optparse import Values + +__all__ = ['Command'] + +logger = logging.getLogger(__name__) + + +class Command(CommandContextMixIn): + usage = None # type: str + ignore_require_venv = False # type: bool + + def __init__(self, name, summary, isolated=False): + # type: (str, str, bool) -> None + super(Command, self).__init__() + parser_kw = { + 'usage': self.usage, + 'prog': '%s %s' % (get_prog(), name), + 'formatter': UpdatingDefaultsHelpFormatter(), + 'add_help_option': False, + 'name': name, + 'description': self.__doc__, + 'isolated': isolated, + } + + self.name = name + self.summary = summary + self.parser = ConfigOptionParser(**parser_kw) + + # Commands should add options to this option group + optgroup_name = '%s Options' % self.name.capitalize() + self.cmd_opts = optparse.OptionGroup(self.parser, optgroup_name) + + # Add the general options + gen_opts = cmdoptions.make_option_group( + cmdoptions.general_group, + self.parser, + ) + self.parser.add_option_group(gen_opts) + + def handle_pip_version_check(self, options): + # type: (Values) -> None + """ + This is a no-op so that commands by default do not do the pip version + check. + """ + # Make sure we do the pip version check if the index_group options + # are present. + assert not hasattr(options, 'no_index') + + def run(self, options, args): + # type: (Values, List[Any]) -> Any + raise NotImplementedError + + def parse_args(self, args): + # type: (List[str]) -> Tuple[Any, Any] + # factored out for testability + return self.parser.parse_args(args) + + def main(self, args): + # type: (List[str]) -> int + try: + with self.main_context(): + return self._main(args) + finally: + logging.shutdown() + + def _main(self, args): + # type: (List[str]) -> int + # Intentionally set as early as possible so globally-managed temporary + # directories are available to the rest of the code. + self.enter_context(global_tempdir_manager()) + + options, args = self.parse_args(args) + + # Set verbosity so that it can be used elsewhere. + self.verbosity = options.verbose - options.quiet + + level_number = setup_logging( + verbosity=self.verbosity, + no_color=options.no_color, + user_log_file=options.log, + ) + + if ( + sys.version_info[:2] == (2, 7) and + not options.no_python_version_warning + ): + message = ( + "A future version of pip will drop support for Python 2.7. " + "More details about Python 2 support in pip, can be found at " + "https://pip.pypa.io/en/latest/development/release-process/#python-2-support" # noqa + ) + if platform.python_implementation() == "CPython": + message = ( + "Python 2.7 reached the end of its life on January " + "1st, 2020. Please upgrade your Python as Python 2.7 " + "is no longer maintained. " + ) + message + deprecated(message, replacement=None, gone_in=None) + + if options.skip_requirements_regex: + deprecated( + "--skip-requirements-regex is unsupported and will be removed", + replacement=( + "manage requirements/constraints files explicitly, " + "possibly generating them from metadata" + ), + gone_in="20.1", + issue=7297, + ) + + # TODO: Try to get these passing down from the command? + # without resorting to os.environ to hold these. + # This also affects isolated builds and it should. + + if options.no_input: + os.environ['PIP_NO_INPUT'] = '1' + + if options.exists_action: + os.environ['PIP_EXISTS_ACTION'] = ' '.join(options.exists_action) + + if options.require_venv and not self.ignore_require_venv: + # If a venv is required check if it can really be found + if not running_under_virtualenv(): + logger.critical( + 'Could not find an activated virtualenv (required).' + ) + sys.exit(VIRTUALENV_NOT_FOUND) + + if options.cache_dir: + options.cache_dir = normalize_path(options.cache_dir) + if not check_path_owner(options.cache_dir): + logger.warning( + "The directory '%s' or its parent directory is not owned " + "or is not writable by the current user. The cache " + "has been disabled. Check the permissions and owner of " + "that directory. If executing pip with sudo, you may want " + "sudo's -H flag.", + options.cache_dir, + ) + options.cache_dir = None + + try: + status = self.run(options, args) + # FIXME: all commands should return an exit status + # and when it is done, isinstance is not needed anymore + if isinstance(status, int): + return status + except PreviousBuildDirError as exc: + logger.critical(str(exc)) + logger.debug('Exception information:', exc_info=True) + + return PREVIOUS_BUILD_DIR_ERROR + except (InstallationError, UninstallationError, BadCommand) as exc: + logger.critical(str(exc)) + logger.debug('Exception information:', exc_info=True) + + return ERROR + except CommandError as exc: + logger.critical('%s', exc) + logger.debug('Exception information:', exc_info=True) + + return ERROR + except BrokenStdoutLoggingError: + # Bypass our logger and write any remaining messages to stderr + # because stdout no longer works. + print('ERROR: Pipe to stdout was broken', file=sys.stderr) + if level_number <= logging.DEBUG: + traceback.print_exc(file=sys.stderr) + + return ERROR + except KeyboardInterrupt: + logger.critical('Operation cancelled by user') + logger.debug('Exception information:', exc_info=True) + + return ERROR + except BaseException: + logger.critical('Exception:', exc_info=True) + + return UNKNOWN_ERROR + finally: + self.handle_pip_version_check(options) + + return SUCCESS diff --git a/venv.bak/lib/python3.7/site-packages/pip/_internal/cli/cmdoptions.py b/venv.bak/lib/python3.7/site-packages/pip/_internal/cli/cmdoptions.py new file mode 100644 index 0000000..42e2695 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_internal/cli/cmdoptions.py @@ -0,0 +1,957 @@ +""" +shared options and groups + +The principle here is to define options once, but *not* instantiate them +globally. One reason being that options with action='append' can carry state +between parses. pip parses general options twice internally, and shouldn't +pass on state. To be consistent, all options will follow this design. +""" + +# The following comment should be removed at some point in the future. +# mypy: strict-optional=False + +from __future__ import absolute_import + +import logging +import os +import textwrap +import warnings +from distutils.util import strtobool +from functools import partial +from optparse import SUPPRESS_HELP, Option, OptionGroup +from textwrap import dedent + +from pip._internal.exceptions import CommandError +from pip._internal.locations import USER_CACHE_DIR, get_src_prefix +from pip._internal.models.format_control import FormatControl +from pip._internal.models.index import PyPI +from pip._internal.models.target_python import TargetPython +from pip._internal.utils.hashes import STRONG_HASHES +from pip._internal.utils.typing import MYPY_CHECK_RUNNING +from pip._internal.utils.ui import BAR_TYPES + +if MYPY_CHECK_RUNNING: + from typing import Any, Callable, Dict, Optional, Tuple + from optparse import OptionParser, Values + from pip._internal.cli.parser import ConfigOptionParser + +logger = logging.getLogger(__name__) + + +def raise_option_error(parser, option, msg): + # type: (OptionParser, Option, str) -> None + """ + Raise an option parsing error using parser.error(). + + Args: + parser: an OptionParser instance. + option: an Option instance. + msg: the error text. + """ + msg = '{} error: {}'.format(option, msg) + msg = textwrap.fill(' '.join(msg.split())) + parser.error(msg) + + +def make_option_group(group, parser): + # type: (Dict[str, Any], ConfigOptionParser) -> OptionGroup + """ + Return an OptionGroup object + group -- assumed to be dict with 'name' and 'options' keys + parser -- an optparse Parser + """ + option_group = OptionGroup(parser, group['name']) + for option in group['options']: + option_group.add_option(option()) + return option_group + + +def check_install_build_global(options, check_options=None): + # type: (Values, Optional[Values]) -> None + """Disable wheels if per-setup.py call options are set. + + :param options: The OptionParser options to update. + :param check_options: The options to check, if not supplied defaults to + options. + """ + if check_options is None: + check_options = options + + def getname(n): + # type: (str) -> Optional[Any] + return getattr(check_options, n, None) + names = ["build_options", "global_options", "install_options"] + if any(map(getname, names)): + control = options.format_control + control.disallow_binaries() + warnings.warn( + 'Disabling all use of wheels due to the use of --build-option ' + '/ --global-option / --install-option.', stacklevel=2, + ) + + +def check_dist_restriction(options, check_target=False): + # type: (Values, bool) -> None + """Function for determining if custom platform options are allowed. + + :param options: The OptionParser options. + :param check_target: Whether or not to check if --target is being used. + """ + dist_restriction_set = any([ + options.python_version, + options.platform, + options.abi, + options.implementation, + ]) + + binary_only = FormatControl(set(), {':all:'}) + sdist_dependencies_allowed = ( + options.format_control != binary_only and + not options.ignore_dependencies + ) + + # Installations or downloads using dist restrictions must not combine + # source distributions and dist-specific wheels, as they are not + # guaranteed to be locally compatible. + if dist_restriction_set and sdist_dependencies_allowed: + raise CommandError( + "When restricting platform and interpreter constraints using " + "--python-version, --platform, --abi, or --implementation, " + "either --no-deps must be set, or --only-binary=:all: must be " + "set and --no-binary must not be set (or must be set to " + ":none:)." + ) + + if check_target: + if dist_restriction_set and not options.target_dir: + raise CommandError( + "Can not use any platform or abi specific options unless " + "installing via '--target'" + ) + + +def _path_option_check(option, opt, value): + # type: (Option, str, str) -> str + return os.path.expanduser(value) + + +class PipOption(Option): + TYPES = Option.TYPES + ("path",) + TYPE_CHECKER = Option.TYPE_CHECKER.copy() + TYPE_CHECKER["path"] = _path_option_check + + +########### +# options # +########### + +help_ = partial( + Option, + '-h', '--help', + dest='help', + action='help', + help='Show help.', +) # type: Callable[..., Option] + +isolated_mode = partial( + Option, + "--isolated", + dest="isolated_mode", + action="store_true", + default=False, + help=( + "Run pip in an isolated mode, ignoring environment variables and user " + "configuration." + ), +) # type: Callable[..., Option] + +require_virtualenv = partial( + Option, + # Run only if inside a virtualenv, bail if not. + '--require-virtualenv', '--require-venv', + dest='require_venv', + action='store_true', + default=False, + help=SUPPRESS_HELP +) # type: Callable[..., Option] + +verbose = partial( + Option, + '-v', '--verbose', + dest='verbose', + action='count', + default=0, + help='Give more output. Option is additive, and can be used up to 3 times.' +) # type: Callable[..., Option] + +no_color = partial( + Option, + '--no-color', + dest='no_color', + action='store_true', + default=False, + help="Suppress colored output", +) # type: Callable[..., Option] + +version = partial( + Option, + '-V', '--version', + dest='version', + action='store_true', + help='Show version and exit.', +) # type: Callable[..., Option] + +quiet = partial( + Option, + '-q', '--quiet', + dest='quiet', + action='count', + default=0, + help=( + 'Give less output. Option is additive, and can be used up to 3' + ' times (corresponding to WARNING, ERROR, and CRITICAL logging' + ' levels).' + ), +) # type: Callable[..., Option] + +progress_bar = partial( + Option, + '--progress-bar', + dest='progress_bar', + type='choice', + choices=list(BAR_TYPES.keys()), + default='on', + help=( + 'Specify type of progress to be displayed [' + + '|'.join(BAR_TYPES.keys()) + '] (default: %default)' + ), +) # type: Callable[..., Option] + +log = partial( + PipOption, + "--log", "--log-file", "--local-log", + dest="log", + metavar="path", + type="path", + help="Path to a verbose appending log." +) # type: Callable[..., Option] + +no_input = partial( + Option, + # Don't ask for input + '--no-input', + dest='no_input', + action='store_true', + default=False, + help=SUPPRESS_HELP +) # type: Callable[..., Option] + +proxy = partial( + Option, + '--proxy', + dest='proxy', + type='str', + default='', + help="Specify a proxy in the form [user:passwd@]proxy.server:port." +) # type: Callable[..., Option] + +retries = partial( + Option, + '--retries', + dest='retries', + type='int', + default=5, + help="Maximum number of retries each connection should attempt " + "(default %default times).", +) # type: Callable[..., Option] + +timeout = partial( + Option, + '--timeout', '--default-timeout', + metavar='sec', + dest='timeout', + type='float', + default=15, + help='Set the socket timeout (default %default seconds).', +) # type: Callable[..., Option] + +skip_requirements_regex = partial( + Option, + # A regex to be used to skip requirements + '--skip-requirements-regex', + dest='skip_requirements_regex', + type='str', + default='', + help=SUPPRESS_HELP, +) # type: Callable[..., Option] + + +def exists_action(): + # type: () -> Option + return Option( + # Option when path already exist + '--exists-action', + dest='exists_action', + type='choice', + choices=['s', 'i', 'w', 'b', 'a'], + default=[], + action='append', + metavar='action', + help="Default action when a path already exists: " + "(s)witch, (i)gnore, (w)ipe, (b)ackup, (a)bort.", + ) + + +cert = partial( + PipOption, + '--cert', + dest='cert', + type='path', + metavar='path', + help="Path to alternate CA bundle.", +) # type: Callable[..., Option] + +client_cert = partial( + PipOption, + '--client-cert', + dest='client_cert', + type='path', + default=None, + metavar='path', + help="Path to SSL client certificate, a single file containing the " + "private key and the certificate in PEM format.", +) # type: Callable[..., Option] + +index_url = partial( + Option, + '-i', '--index-url', '--pypi-url', + dest='index_url', + metavar='URL', + default=PyPI.simple_url, + help="Base URL of the Python Package Index (default %default). " + "This should point to a repository compliant with PEP 503 " + "(the simple repository API) or a local directory laid out " + "in the same format.", +) # type: Callable[..., Option] + + +def extra_index_url(): + # type: () -> Option + return Option( + '--extra-index-url', + dest='extra_index_urls', + metavar='URL', + action='append', + default=[], + help="Extra URLs of package indexes to use in addition to " + "--index-url. Should follow the same rules as " + "--index-url.", + ) + + +no_index = partial( + Option, + '--no-index', + dest='no_index', + action='store_true', + default=False, + help='Ignore package index (only looking at --find-links URLs instead).', +) # type: Callable[..., Option] + + +def find_links(): + # type: () -> Option + return Option( + '-f', '--find-links', + dest='find_links', + action='append', + default=[], + metavar='url', + help="If a url or path to an html file, then parse for links to " + "archives. If a local path or file:// url that's a directory, " + "then look for archives in the directory listing.", + ) + + +def trusted_host(): + # type: () -> Option + return Option( + "--trusted-host", + dest="trusted_hosts", + action="append", + metavar="HOSTNAME", + default=[], + help="Mark this host or host:port pair as trusted, even though it " + "does not have valid or any HTTPS.", + ) + + +def constraints(): + # type: () -> Option + return Option( + '-c', '--constraint', + dest='constraints', + action='append', + default=[], + metavar='file', + help='Constrain versions using the given constraints file. ' + 'This option can be used multiple times.' + ) + + +def requirements(): + # type: () -> Option + return Option( + '-r', '--requirement', + dest='requirements', + action='append', + default=[], + metavar='file', + help='Install from the given requirements file. ' + 'This option can be used multiple times.' + ) + + +def editable(): + # type: () -> Option + return Option( + '-e', '--editable', + dest='editables', + action='append', + default=[], + metavar='path/url', + help=('Install a project in editable mode (i.e. setuptools ' + '"develop mode") from a local project path or a VCS url.'), + ) + + +def _handle_src(option, opt_str, value, parser): + # type: (Option, str, str, OptionParser) -> None + value = os.path.abspath(value) + setattr(parser.values, option.dest, value) + + +src = partial( + PipOption, + '--src', '--source', '--source-dir', '--source-directory', + dest='src_dir', + type='path', + metavar='dir', + default=get_src_prefix(), + action='callback', + callback=_handle_src, + help='Directory to check out editable projects into. ' + 'The default in a virtualenv is "/src". ' + 'The default for global installs is "/src".' +) # type: Callable[..., Option] + + +def _get_format_control(values, option): + # type: (Values, Option) -> Any + """Get a format_control object.""" + return getattr(values, option.dest) + + +def _handle_no_binary(option, opt_str, value, parser): + # type: (Option, str, str, OptionParser) -> None + existing = _get_format_control(parser.values, option) + FormatControl.handle_mutual_excludes( + value, existing.no_binary, existing.only_binary, + ) + + +def _handle_only_binary(option, opt_str, value, parser): + # type: (Option, str, str, OptionParser) -> None + existing = _get_format_control(parser.values, option) + FormatControl.handle_mutual_excludes( + value, existing.only_binary, existing.no_binary, + ) + + +def no_binary(): + # type: () -> Option + format_control = FormatControl(set(), set()) + return Option( + "--no-binary", dest="format_control", action="callback", + callback=_handle_no_binary, type="str", + default=format_control, + help="Do not use binary packages. Can be supplied multiple times, and " + "each time adds to the existing value. Accepts either :all: to " + "disable all binary packages, :none: to empty the set, or one or " + "more package names with commas between them (no colons). Note " + "that some packages are tricky to compile and may fail to " + "install when this option is used on them.", + ) + + +def only_binary(): + # type: () -> Option + format_control = FormatControl(set(), set()) + return Option( + "--only-binary", dest="format_control", action="callback", + callback=_handle_only_binary, type="str", + default=format_control, + help="Do not use source packages. Can be supplied multiple times, and " + "each time adds to the existing value. Accepts either :all: to " + "disable all source packages, :none: to empty the set, or one or " + "more package names with commas between them. Packages without " + "binary distributions will fail to install when this option is " + "used on them.", + ) + + +platform = partial( + Option, + '--platform', + dest='platform', + metavar='platform', + default=None, + help=("Only use wheels compatible with . " + "Defaults to the platform of the running system."), +) # type: Callable[..., Option] + + +# This was made a separate function for unit-testing purposes. +def _convert_python_version(value): + # type: (str) -> Tuple[Tuple[int, ...], Optional[str]] + """ + Convert a version string like "3", "37", or "3.7.3" into a tuple of ints. + + :return: A 2-tuple (version_info, error_msg), where `error_msg` is + non-None if and only if there was a parsing error. + """ + if not value: + # The empty string is the same as not providing a value. + return (None, None) + + parts = value.split('.') + if len(parts) > 3: + return ((), 'at most three version parts are allowed') + + if len(parts) == 1: + # Then we are in the case of "3" or "37". + value = parts[0] + if len(value) > 1: + parts = [value[0], value[1:]] + + try: + version_info = tuple(int(part) for part in parts) + except ValueError: + return ((), 'each version part must be an integer') + + return (version_info, None) + + +def _handle_python_version(option, opt_str, value, parser): + # type: (Option, str, str, OptionParser) -> None + """ + Handle a provided --python-version value. + """ + version_info, error_msg = _convert_python_version(value) + if error_msg is not None: + msg = ( + 'invalid --python-version value: {!r}: {}'.format( + value, error_msg, + ) + ) + raise_option_error(parser, option=option, msg=msg) + + parser.values.python_version = version_info + + +python_version = partial( + Option, + '--python-version', + dest='python_version', + metavar='python_version', + action='callback', + callback=_handle_python_version, type='str', + default=None, + help=dedent("""\ + The Python interpreter version to use for wheel and "Requires-Python" + compatibility checks. Defaults to a version derived from the running + interpreter. The version can be specified using up to three dot-separated + integers (e.g. "3" for 3.0.0, "3.7" for 3.7.0, or "3.7.3"). A major-minor + version can also be given as a string without dots (e.g. "37" for 3.7.0). + """), +) # type: Callable[..., Option] + + +implementation = partial( + Option, + '--implementation', + dest='implementation', + metavar='implementation', + default=None, + help=("Only use wheels compatible with Python " + "implementation , e.g. 'pp', 'jy', 'cp', " + " or 'ip'. If not specified, then the current " + "interpreter implementation is used. Use 'py' to force " + "implementation-agnostic wheels."), +) # type: Callable[..., Option] + + +abi = partial( + Option, + '--abi', + dest='abi', + metavar='abi', + default=None, + help=("Only use wheels compatible with Python " + "abi , e.g. 'pypy_41'. If not specified, then the " + "current interpreter abi tag is used. Generally " + "you will need to specify --implementation, " + "--platform, and --python-version when using " + "this option."), +) # type: Callable[..., Option] + + +def add_target_python_options(cmd_opts): + # type: (OptionGroup) -> None + cmd_opts.add_option(platform()) + cmd_opts.add_option(python_version()) + cmd_opts.add_option(implementation()) + cmd_opts.add_option(abi()) + + +def make_target_python(options): + # type: (Values) -> TargetPython + target_python = TargetPython( + platform=options.platform, + py_version_info=options.python_version, + abi=options.abi, + implementation=options.implementation, + ) + + return target_python + + +def prefer_binary(): + # type: () -> Option + return Option( + "--prefer-binary", + dest="prefer_binary", + action="store_true", + default=False, + help="Prefer older binary packages over newer source packages." + ) + + +cache_dir = partial( + PipOption, + "--cache-dir", + dest="cache_dir", + default=USER_CACHE_DIR, + metavar="dir", + type='path', + help="Store the cache data in ." +) # type: Callable[..., Option] + + +def _handle_no_cache_dir(option, opt, value, parser): + # type: (Option, str, str, OptionParser) -> None + """ + Process a value provided for the --no-cache-dir option. + + This is an optparse.Option callback for the --no-cache-dir option. + """ + # The value argument will be None if --no-cache-dir is passed via the + # command-line, since the option doesn't accept arguments. However, + # the value can be non-None if the option is triggered e.g. by an + # environment variable, like PIP_NO_CACHE_DIR=true. + if value is not None: + # Then parse the string value to get argument error-checking. + try: + strtobool(value) + except ValueError as exc: + raise_option_error(parser, option=option, msg=str(exc)) + + # Originally, setting PIP_NO_CACHE_DIR to a value that strtobool() + # converted to 0 (like "false" or "no") caused cache_dir to be disabled + # rather than enabled (logic would say the latter). Thus, we disable + # the cache directory not just on values that parse to True, but (for + # backwards compatibility reasons) also on values that parse to False. + # In other words, always set it to False if the option is provided in + # some (valid) form. + parser.values.cache_dir = False + + +no_cache = partial( + Option, + "--no-cache-dir", + dest="cache_dir", + action="callback", + callback=_handle_no_cache_dir, + help="Disable the cache.", +) # type: Callable[..., Option] + +no_deps = partial( + Option, + '--no-deps', '--no-dependencies', + dest='ignore_dependencies', + action='store_true', + default=False, + help="Don't install package dependencies.", +) # type: Callable[..., Option] + + +def _handle_build_dir(option, opt, value, parser): + # type: (Option, str, str, OptionParser) -> None + if value: + value = os.path.abspath(value) + setattr(parser.values, option.dest, value) + + +build_dir = partial( + PipOption, + '-b', '--build', '--build-dir', '--build-directory', + dest='build_dir', + type='path', + metavar='dir', + action='callback', + callback=_handle_build_dir, + help='Directory to unpack packages into and build in. Note that ' + 'an initial build still takes place in a temporary directory. ' + 'The location of temporary directories can be controlled by setting ' + 'the TMPDIR environment variable (TEMP on Windows) appropriately. ' + 'When passed, build directories are not cleaned in case of failures.' +) # type: Callable[..., Option] + +ignore_requires_python = partial( + Option, + '--ignore-requires-python', + dest='ignore_requires_python', + action='store_true', + help='Ignore the Requires-Python information.' +) # type: Callable[..., Option] + +no_build_isolation = partial( + Option, + '--no-build-isolation', + dest='build_isolation', + action='store_false', + default=True, + help='Disable isolation when building a modern source distribution. ' + 'Build dependencies specified by PEP 518 must be already installed ' + 'if this option is used.' +) # type: Callable[..., Option] + + +def _handle_no_use_pep517(option, opt, value, parser): + # type: (Option, str, str, OptionParser) -> None + """ + Process a value provided for the --no-use-pep517 option. + + This is an optparse.Option callback for the no_use_pep517 option. + """ + # Since --no-use-pep517 doesn't accept arguments, the value argument + # will be None if --no-use-pep517 is passed via the command-line. + # However, the value can be non-None if the option is triggered e.g. + # by an environment variable, for example "PIP_NO_USE_PEP517=true". + if value is not None: + msg = """A value was passed for --no-use-pep517, + probably using either the PIP_NO_USE_PEP517 environment variable + or the "no-use-pep517" config file option. Use an appropriate value + of the PIP_USE_PEP517 environment variable or the "use-pep517" + config file option instead. + """ + raise_option_error(parser, option=option, msg=msg) + + # Otherwise, --no-use-pep517 was passed via the command-line. + parser.values.use_pep517 = False + + +use_pep517 = partial( + Option, + '--use-pep517', + dest='use_pep517', + action='store_true', + default=None, + help='Use PEP 517 for building source distributions ' + '(use --no-use-pep517 to force legacy behaviour).' +) # type: Any + +no_use_pep517 = partial( + Option, + '--no-use-pep517', + dest='use_pep517', + action='callback', + callback=_handle_no_use_pep517, + default=None, + help=SUPPRESS_HELP +) # type: Any + +install_options = partial( + Option, + '--install-option', + dest='install_options', + action='append', + metavar='options', + help="Extra arguments to be supplied to the setup.py install " + "command (use like --install-option=\"--install-scripts=/usr/local/" + "bin\"). Use multiple --install-option options to pass multiple " + "options to setup.py install. If you are using an option with a " + "directory path, be sure to use absolute path.", +) # type: Callable[..., Option] + +global_options = partial( + Option, + '--global-option', + dest='global_options', + action='append', + metavar='options', + help="Extra global options to be supplied to the setup.py " + "call before the install command.", +) # type: Callable[..., Option] + +no_clean = partial( + Option, + '--no-clean', + action='store_true', + default=False, + help="Don't clean up build directories." +) # type: Callable[..., Option] + +pre = partial( + Option, + '--pre', + action='store_true', + default=False, + help="Include pre-release and development versions. By default, " + "pip only finds stable versions.", +) # type: Callable[..., Option] + +disable_pip_version_check = partial( + Option, + "--disable-pip-version-check", + dest="disable_pip_version_check", + action="store_true", + default=False, + help="Don't periodically check PyPI to determine whether a new version " + "of pip is available for download. Implied with --no-index.", +) # type: Callable[..., Option] + + +# Deprecated, Remove later +always_unzip = partial( + Option, + '-Z', '--always-unzip', + dest='always_unzip', + action='store_true', + help=SUPPRESS_HELP, +) # type: Callable[..., Option] + + +def _handle_merge_hash(option, opt_str, value, parser): + # type: (Option, str, str, OptionParser) -> None + """Given a value spelled "algo:digest", append the digest to a list + pointed to in a dict by the algo name.""" + if not parser.values.hashes: + parser.values.hashes = {} + try: + algo, digest = value.split(':', 1) + except ValueError: + parser.error('Arguments to %s must be a hash name ' + 'followed by a value, like --hash=sha256:abcde...' % + opt_str) + if algo not in STRONG_HASHES: + parser.error('Allowed hash algorithms for %s are %s.' % + (opt_str, ', '.join(STRONG_HASHES))) + parser.values.hashes.setdefault(algo, []).append(digest) + + +hash = partial( + Option, + '--hash', + # Hash values eventually end up in InstallRequirement.hashes due to + # __dict__ copying in process_line(). + dest='hashes', + action='callback', + callback=_handle_merge_hash, + type='string', + help="Verify that the package's archive matches this " + 'hash before installing. Example: --hash=sha256:abcdef...', +) # type: Callable[..., Option] + + +require_hashes = partial( + Option, + '--require-hashes', + dest='require_hashes', + action='store_true', + default=False, + help='Require a hash to check each requirement against, for ' + 'repeatable installs. This option is implied when any package in a ' + 'requirements file has a --hash option.', +) # type: Callable[..., Option] + + +list_path = partial( + PipOption, + '--path', + dest='path', + type='path', + action='append', + help='Restrict to the specified installation path for listing ' + 'packages (can be used multiple times).' +) # type: Callable[..., Option] + + +def check_list_path_option(options): + # type: (Values) -> None + if options.path and (options.user or options.local): + raise CommandError( + "Cannot combine '--path' with '--user' or '--local'" + ) + + +no_python_version_warning = partial( + Option, + '--no-python-version-warning', + dest='no_python_version_warning', + action='store_true', + default=False, + help='Silence deprecation warnings for upcoming unsupported Pythons.', +) # type: Callable[..., Option] + + +########## +# groups # +########## + +general_group = { + 'name': 'General Options', + 'options': [ + help_, + isolated_mode, + require_virtualenv, + verbose, + version, + quiet, + log, + no_input, + proxy, + retries, + timeout, + skip_requirements_regex, + exists_action, + trusted_host, + cert, + client_cert, + cache_dir, + no_cache, + disable_pip_version_check, + no_color, + no_python_version_warning, + ] +} # type: Dict[str, Any] + +index_group = { + 'name': 'Package Index Options', + 'options': [ + index_url, + extra_index_url, + no_index, + find_links, + ] +} # type: Dict[str, Any] diff --git a/venv/lib/python3.7/site-packages/pip/_internal/cli/command_context.py b/venv.bak/lib/python3.7/site-packages/pip/_internal/cli/command_context.py similarity index 100% rename from venv/lib/python3.7/site-packages/pip/_internal/cli/command_context.py rename to venv.bak/lib/python3.7/site-packages/pip/_internal/cli/command_context.py diff --git a/venv/lib/python3.7/site-packages/pip/_internal/cli/main.py b/venv.bak/lib/python3.7/site-packages/pip/_internal/cli/main.py similarity index 100% rename from venv/lib/python3.7/site-packages/pip/_internal/cli/main.py rename to venv.bak/lib/python3.7/site-packages/pip/_internal/cli/main.py diff --git a/venv.bak/lib/python3.7/site-packages/pip/_internal/cli/main_parser.py b/venv.bak/lib/python3.7/site-packages/pip/_internal/cli/main_parser.py new file mode 100644 index 0000000..a89821d --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_internal/cli/main_parser.py @@ -0,0 +1,99 @@ +"""A single place for constructing and exposing the main parser +""" + +import os +import sys + +from pip._internal.cli import cmdoptions +from pip._internal.cli.parser import ( + ConfigOptionParser, + UpdatingDefaultsHelpFormatter, +) +from pip._internal.commands import commands_dict, get_similar_commands +from pip._internal.exceptions import CommandError +from pip._internal.utils.misc import get_pip_version, get_prog +from pip._internal.utils.typing import MYPY_CHECK_RUNNING + +if MYPY_CHECK_RUNNING: + from typing import Tuple, List + + +__all__ = ["create_main_parser", "parse_command"] + + +def create_main_parser(): + # type: () -> ConfigOptionParser + """Creates and returns the main parser for pip's CLI + """ + + parser_kw = { + 'usage': '\n%prog [options]', + 'add_help_option': False, + 'formatter': UpdatingDefaultsHelpFormatter(), + 'name': 'global', + 'prog': get_prog(), + } + + parser = ConfigOptionParser(**parser_kw) + parser.disable_interspersed_args() + + parser.version = get_pip_version() + + # add the general options + gen_opts = cmdoptions.make_option_group(cmdoptions.general_group, parser) + parser.add_option_group(gen_opts) + + # so the help formatter knows + parser.main = True # type: ignore + + # create command listing for description + description = [''] + [ + '%-27s %s' % (name, command_info.summary) + for name, command_info in commands_dict.items() + ] + parser.description = '\n'.join(description) + + return parser + + +def parse_command(args): + # type: (List[str]) -> Tuple[str, List[str]] + parser = create_main_parser() + + # Note: parser calls disable_interspersed_args(), so the result of this + # call is to split the initial args into the general options before the + # subcommand and everything else. + # For example: + # args: ['--timeout=5', 'install', '--user', 'INITools'] + # general_options: ['--timeout==5'] + # args_else: ['install', '--user', 'INITools'] + general_options, args_else = parser.parse_args(args) + + # --version + if general_options.version: + sys.stdout.write(parser.version) # type: ignore + sys.stdout.write(os.linesep) + sys.exit() + + # pip || pip help -> print_help() + if not args_else or (args_else[0] == 'help' and len(args_else) == 1): + parser.print_help() + sys.exit() + + # the subcommand name + cmd_name = args_else[0] + + if cmd_name not in commands_dict: + guess = get_similar_commands(cmd_name) + + msg = ['unknown command "%s"' % cmd_name] + if guess: + msg.append('maybe you meant "%s"' % guess) + + raise CommandError(' - '.join(msg)) + + # all the args without the subcommand + cmd_args = args[:] + cmd_args.remove(cmd_name) + + return cmd_name, cmd_args diff --git a/venv.bak/lib/python3.7/site-packages/pip/_internal/cli/parser.py b/venv.bak/lib/python3.7/site-packages/pip/_internal/cli/parser.py new file mode 100644 index 0000000..c99456b --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_internal/cli/parser.py @@ -0,0 +1,265 @@ +"""Base option parser setup""" + +# The following comment should be removed at some point in the future. +# mypy: disallow-untyped-defs=False + +from __future__ import absolute_import + +import logging +import optparse +import sys +import textwrap +from distutils.util import strtobool + +from pip._vendor.six import string_types + +from pip._internal.cli.status_codes import UNKNOWN_ERROR +from pip._internal.configuration import Configuration, ConfigurationError +from pip._internal.utils.compat import get_terminal_size + +logger = logging.getLogger(__name__) + + +class PrettyHelpFormatter(optparse.IndentedHelpFormatter): + """A prettier/less verbose help formatter for optparse.""" + + def __init__(self, *args, **kwargs): + # help position must be aligned with __init__.parseopts.description + kwargs['max_help_position'] = 30 + kwargs['indent_increment'] = 1 + kwargs['width'] = get_terminal_size()[0] - 2 + optparse.IndentedHelpFormatter.__init__(self, *args, **kwargs) + + def format_option_strings(self, option): + return self._format_option_strings(option, ' <%s>', ', ') + + def _format_option_strings(self, option, mvarfmt=' <%s>', optsep=', '): + """ + Return a comma-separated list of option strings and metavars. + + :param option: tuple of (short opt, long opt), e.g: ('-f', '--format') + :param mvarfmt: metavar format string - evaluated as mvarfmt % metavar + :param optsep: separator + """ + opts = [] + + if option._short_opts: + opts.append(option._short_opts[0]) + if option._long_opts: + opts.append(option._long_opts[0]) + if len(opts) > 1: + opts.insert(1, optsep) + + if option.takes_value(): + metavar = option.metavar or option.dest.lower() + opts.append(mvarfmt % metavar.lower()) + + return ''.join(opts) + + def format_heading(self, heading): + if heading == 'Options': + return '' + return heading + ':\n' + + def format_usage(self, usage): + """ + Ensure there is only one newline between usage and the first heading + if there is no description. + """ + msg = '\nUsage: %s\n' % self.indent_lines(textwrap.dedent(usage), " ") + return msg + + def format_description(self, description): + # leave full control over description to us + if description: + if hasattr(self.parser, 'main'): + label = 'Commands' + else: + label = 'Description' + # some doc strings have initial newlines, some don't + description = description.lstrip('\n') + # some doc strings have final newlines and spaces, some don't + description = description.rstrip() + # dedent, then reindent + description = self.indent_lines(textwrap.dedent(description), " ") + description = '%s:\n%s\n' % (label, description) + return description + else: + return '' + + def format_epilog(self, epilog): + # leave full control over epilog to us + if epilog: + return epilog + else: + return '' + + def indent_lines(self, text, indent): + new_lines = [indent + line for line in text.split('\n')] + return "\n".join(new_lines) + + +class UpdatingDefaultsHelpFormatter(PrettyHelpFormatter): + """Custom help formatter for use in ConfigOptionParser. + + This is updates the defaults before expanding them, allowing + them to show up correctly in the help listing. + """ + + def expand_default(self, option): + if self.parser is not None: + self.parser._update_defaults(self.parser.defaults) + return optparse.IndentedHelpFormatter.expand_default(self, option) + + +class CustomOptionParser(optparse.OptionParser): + + def insert_option_group(self, idx, *args, **kwargs): + """Insert an OptionGroup at a given position.""" + group = self.add_option_group(*args, **kwargs) + + self.option_groups.pop() + self.option_groups.insert(idx, group) + + return group + + @property + def option_list_all(self): + """Get a list of all options, including those in option groups.""" + res = self.option_list[:] + for i in self.option_groups: + res.extend(i.option_list) + + return res + + +class ConfigOptionParser(CustomOptionParser): + """Custom option parser which updates its defaults by checking the + configuration files and environmental variables""" + + def __init__(self, *args, **kwargs): + self.name = kwargs.pop('name') + + isolated = kwargs.pop("isolated", False) + self.config = Configuration(isolated) + + assert self.name + optparse.OptionParser.__init__(self, *args, **kwargs) + + def check_default(self, option, key, val): + try: + return option.check_value(key, val) + except optparse.OptionValueError as exc: + print("An error occurred during configuration: %s" % exc) + sys.exit(3) + + def _get_ordered_configuration_items(self): + # Configuration gives keys in an unordered manner. Order them. + override_order = ["global", self.name, ":env:"] + + # Pool the options into different groups + section_items = {name: [] for name in override_order} + for section_key, val in self.config.items(): + # ignore empty values + if not val: + logger.debug( + "Ignoring configuration key '%s' as it's value is empty.", + section_key + ) + continue + + section, key = section_key.split(".", 1) + if section in override_order: + section_items[section].append((key, val)) + + # Yield each group in their override order + for section in override_order: + for key, val in section_items[section]: + yield key, val + + def _update_defaults(self, defaults): + """Updates the given defaults with values from the config files and + the environ. Does a little special handling for certain types of + options (lists).""" + + # Accumulate complex default state. + self.values = optparse.Values(self.defaults) + late_eval = set() + # Then set the options with those values + for key, val in self._get_ordered_configuration_items(): + # '--' because configuration supports only long names + option = self.get_option('--' + key) + + # Ignore options not present in this parser. E.g. non-globals put + # in [global] by users that want them to apply to all applicable + # commands. + if option is None: + continue + + if option.action in ('store_true', 'store_false', 'count'): + try: + val = strtobool(val) + except ValueError: + error_msg = invalid_config_error_message( + option.action, key, val + ) + self.error(error_msg) + + elif option.action == 'append': + val = val.split() + val = [self.check_default(option, key, v) for v in val] + elif option.action == 'callback': + late_eval.add(option.dest) + opt_str = option.get_opt_string() + val = option.convert_value(opt_str, val) + # From take_action + args = option.callback_args or () + kwargs = option.callback_kwargs or {} + option.callback(option, opt_str, val, self, *args, **kwargs) + else: + val = self.check_default(option, key, val) + + defaults[option.dest] = val + + for key in late_eval: + defaults[key] = getattr(self.values, key) + self.values = None + return defaults + + def get_default_values(self): + """Overriding to make updating the defaults after instantiation of + the option parser possible, _update_defaults() does the dirty work.""" + if not self.process_default_values: + # Old, pre-Optik 1.5 behaviour. + return optparse.Values(self.defaults) + + # Load the configuration, or error out in case of an error + try: + self.config.load() + except ConfigurationError as err: + self.exit(UNKNOWN_ERROR, str(err)) + + defaults = self._update_defaults(self.defaults.copy()) # ours + for option in self._get_all_options(): + default = defaults.get(option.dest) + if isinstance(default, string_types): + opt_str = option.get_opt_string() + defaults[option.dest] = option.check_value(opt_str, default) + return optparse.Values(defaults) + + def error(self, msg): + self.print_usage(sys.stderr) + self.exit(UNKNOWN_ERROR, "%s\n" % msg) + + +def invalid_config_error_message(action, key, val): + """Returns a better error message when invalid configuration option + is provided.""" + if action in ('store_true', 'store_false'): + return ("{0} is not a valid value for {1} option, " + "please specify a boolean value like yes/no, " + "true/false or 1/0 instead.").format(val, key) + + return ("{0} is not a valid value for {1} option, " + "please specify a numerical value like 1/0 " + "instead.").format(val, key) diff --git a/venv/lib/python3.7/site-packages/pip/_internal/cli/req_command.py b/venv.bak/lib/python3.7/site-packages/pip/_internal/cli/req_command.py similarity index 100% rename from venv/lib/python3.7/site-packages/pip/_internal/cli/req_command.py rename to venv.bak/lib/python3.7/site-packages/pip/_internal/cli/req_command.py diff --git a/venv.bak/lib/python3.7/site-packages/pip/_internal/cli/status_codes.py b/venv.bak/lib/python3.7/site-packages/pip/_internal/cli/status_codes.py new file mode 100644 index 0000000..275360a --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_internal/cli/status_codes.py @@ -0,0 +1,8 @@ +from __future__ import absolute_import + +SUCCESS = 0 +ERROR = 1 +UNKNOWN_ERROR = 2 +VIRTUALENV_NOT_FOUND = 3 +PREVIOUS_BUILD_DIR_ERROR = 4 +NO_MATCHES_FOUND = 23 diff --git a/venv.bak/lib/python3.7/site-packages/pip/_internal/commands/__init__.py b/venv.bak/lib/python3.7/site-packages/pip/_internal/commands/__init__.py new file mode 100644 index 0000000..2a311f8 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_internal/commands/__init__.py @@ -0,0 +1,114 @@ +""" +Package containing all pip commands +""" + +# The following comment should be removed at some point in the future. +# mypy: disallow-untyped-defs=False + +from __future__ import absolute_import + +import importlib +from collections import OrderedDict, namedtuple + +from pip._internal.utils.typing import MYPY_CHECK_RUNNING + +if MYPY_CHECK_RUNNING: + from typing import Any + from pip._internal.cli.base_command import Command + + +CommandInfo = namedtuple('CommandInfo', 'module_path, class_name, summary') + +# The ordering matters for help display. +# Also, even though the module path starts with the same +# "pip._internal.commands" prefix in each case, we include the full path +# because it makes testing easier (specifically when modifying commands_dict +# in test setup / teardown by adding info for a FakeCommand class defined +# in a test-related module). +# Finally, we need to pass an iterable of pairs here rather than a dict +# so that the ordering won't be lost when using Python 2.7. +commands_dict = OrderedDict([ + ('install', CommandInfo( + 'pip._internal.commands.install', 'InstallCommand', + 'Install packages.', + )), + ('download', CommandInfo( + 'pip._internal.commands.download', 'DownloadCommand', + 'Download packages.', + )), + ('uninstall', CommandInfo( + 'pip._internal.commands.uninstall', 'UninstallCommand', + 'Uninstall packages.', + )), + ('freeze', CommandInfo( + 'pip._internal.commands.freeze', 'FreezeCommand', + 'Output installed packages in requirements format.', + )), + ('list', CommandInfo( + 'pip._internal.commands.list', 'ListCommand', + 'List installed packages.', + )), + ('show', CommandInfo( + 'pip._internal.commands.show', 'ShowCommand', + 'Show information about installed packages.', + )), + ('check', CommandInfo( + 'pip._internal.commands.check', 'CheckCommand', + 'Verify installed packages have compatible dependencies.', + )), + ('config', CommandInfo( + 'pip._internal.commands.configuration', 'ConfigurationCommand', + 'Manage local and global configuration.', + )), + ('search', CommandInfo( + 'pip._internal.commands.search', 'SearchCommand', + 'Search PyPI for packages.', + )), + ('wheel', CommandInfo( + 'pip._internal.commands.wheel', 'WheelCommand', + 'Build wheels from your requirements.', + )), + ('hash', CommandInfo( + 'pip._internal.commands.hash', 'HashCommand', + 'Compute hashes of package archives.', + )), + ('completion', CommandInfo( + 'pip._internal.commands.completion', 'CompletionCommand', + 'A helper command used for command completion.', + )), + ('debug', CommandInfo( + 'pip._internal.commands.debug', 'DebugCommand', + 'Show information useful for debugging.', + )), + ('help', CommandInfo( + 'pip._internal.commands.help', 'HelpCommand', + 'Show help for commands.', + )), +]) # type: OrderedDict[str, CommandInfo] + + +def create_command(name, **kwargs): + # type: (str, **Any) -> Command + """ + Create an instance of the Command class with the given name. + """ + module_path, class_name, summary = commands_dict[name] + module = importlib.import_module(module_path) + command_class = getattr(module, class_name) + command = command_class(name=name, summary=summary, **kwargs) + + return command + + +def get_similar_commands(name): + """Command name auto-correct.""" + from difflib import get_close_matches + + name = name.lower() + + close_commands = get_close_matches(name, commands_dict.keys()) + + if close_commands: + return close_commands[0] + else: + return False diff --git a/venv.bak/lib/python3.7/site-packages/pip/_internal/commands/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_internal/commands/__pycache__/__init__.cpython-37.pyc new file mode 100644 index 0000000..a05b59b Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_internal/commands/__pycache__/__init__.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_internal/commands/__pycache__/check.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_internal/commands/__pycache__/check.cpython-37.pyc new file mode 100644 index 0000000..b579e50 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_internal/commands/__pycache__/check.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_internal/commands/__pycache__/completion.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_internal/commands/__pycache__/completion.cpython-37.pyc new file mode 100644 index 0000000..872c217 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_internal/commands/__pycache__/completion.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_internal/commands/__pycache__/configuration.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_internal/commands/__pycache__/configuration.cpython-37.pyc new file mode 100644 index 0000000..26586e8 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_internal/commands/__pycache__/configuration.cpython-37.pyc differ diff --git a/venv/lib/python3.7/site-packages/pip/_internal/commands/__pycache__/debug.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_internal/commands/__pycache__/debug.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/pip/_internal/commands/__pycache__/debug.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/pip/_internal/commands/__pycache__/debug.cpython-37.pyc diff --git a/venv.bak/lib/python3.7/site-packages/pip/_internal/commands/__pycache__/download.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_internal/commands/__pycache__/download.cpython-37.pyc new file mode 100644 index 0000000..a9da7e6 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_internal/commands/__pycache__/download.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_internal/commands/__pycache__/freeze.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_internal/commands/__pycache__/freeze.cpython-37.pyc new file mode 100644 index 0000000..6826818 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_internal/commands/__pycache__/freeze.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_internal/commands/__pycache__/hash.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_internal/commands/__pycache__/hash.cpython-37.pyc new file mode 100644 index 0000000..eda6489 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_internal/commands/__pycache__/hash.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_internal/commands/__pycache__/help.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_internal/commands/__pycache__/help.cpython-37.pyc new file mode 100644 index 0000000..aad49ca Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_internal/commands/__pycache__/help.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_internal/commands/__pycache__/install.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_internal/commands/__pycache__/install.cpython-37.pyc new file mode 100644 index 0000000..58b0e5f Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_internal/commands/__pycache__/install.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_internal/commands/__pycache__/list.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_internal/commands/__pycache__/list.cpython-37.pyc new file mode 100644 index 0000000..1b12057 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_internal/commands/__pycache__/list.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_internal/commands/__pycache__/search.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_internal/commands/__pycache__/search.cpython-37.pyc new file mode 100644 index 0000000..4d17c1c Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_internal/commands/__pycache__/search.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_internal/commands/__pycache__/show.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_internal/commands/__pycache__/show.cpython-37.pyc new file mode 100644 index 0000000..550fd63 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_internal/commands/__pycache__/show.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_internal/commands/__pycache__/uninstall.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_internal/commands/__pycache__/uninstall.cpython-37.pyc new file mode 100644 index 0000000..d70beda Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_internal/commands/__pycache__/uninstall.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_internal/commands/__pycache__/wheel.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_internal/commands/__pycache__/wheel.cpython-37.pyc new file mode 100644 index 0000000..7abd096 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_internal/commands/__pycache__/wheel.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_internal/commands/check.py b/venv.bak/lib/python3.7/site-packages/pip/_internal/commands/check.py new file mode 100644 index 0000000..9689446 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_internal/commands/check.py @@ -0,0 +1,45 @@ +# The following comment should be removed at some point in the future. +# mypy: disallow-untyped-defs=False + +import logging + +from pip._internal.cli.base_command import Command +from pip._internal.operations.check import ( + check_package_set, + create_package_set_from_installed, +) +from pip._internal.utils.misc import write_output + +logger = logging.getLogger(__name__) + + +class CheckCommand(Command): + """Verify installed packages have compatible dependencies.""" + + usage = """ + %prog [options]""" + + def run(self, options, args): + package_set, parsing_probs = create_package_set_from_installed() + missing, conflicting = check_package_set(package_set) + + for project_name in missing: + version = package_set[project_name].version + for dependency in missing[project_name]: + write_output( + "%s %s requires %s, which is not installed.", + project_name, version, dependency[0], + ) + + for project_name in conflicting: + version = package_set[project_name].version + for dep_name, dep_version, req in conflicting[project_name]: + write_output( + "%s %s has requirement %s, but you have %s %s.", + project_name, version, req, dep_name, dep_version, + ) + + if missing or conflicting or parsing_probs: + return 1 + else: + write_output("No broken requirements found.") diff --git a/venv.bak/lib/python3.7/site-packages/pip/_internal/commands/completion.py b/venv.bak/lib/python3.7/site-packages/pip/_internal/commands/completion.py new file mode 100644 index 0000000..c532806 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_internal/commands/completion.py @@ -0,0 +1,96 @@ +# The following comment should be removed at some point in the future. +# mypy: disallow-untyped-defs=False + +from __future__ import absolute_import + +import sys +import textwrap + +from pip._internal.cli.base_command import Command +from pip._internal.utils.misc import get_prog + +BASE_COMPLETION = """ +# pip %(shell)s completion start%(script)s# pip %(shell)s completion end +""" + +COMPLETION_SCRIPTS = { + 'bash': """ + _pip_completion() + { + COMPREPLY=( $( COMP_WORDS="${COMP_WORDS[*]}" \\ + COMP_CWORD=$COMP_CWORD \\ + PIP_AUTO_COMPLETE=1 $1 2>/dev/null ) ) + } + complete -o default -F _pip_completion %(prog)s + """, + 'zsh': """ + function _pip_completion { + local words cword + read -Ac words + read -cn cword + reply=( $( COMP_WORDS="$words[*]" \\ + COMP_CWORD=$(( cword-1 )) \\ + PIP_AUTO_COMPLETE=1 $words[1] 2>/dev/null )) + } + compctl -K _pip_completion %(prog)s + """, + 'fish': """ + function __fish_complete_pip + set -lx COMP_WORDS (commandline -o) "" + set -lx COMP_CWORD ( \\ + math (contains -i -- (commandline -t) $COMP_WORDS)-1 \\ + ) + set -lx PIP_AUTO_COMPLETE 1 + string split \\ -- (eval $COMP_WORDS[1]) + end + complete -fa "(__fish_complete_pip)" -c %(prog)s + """, +} + + +class CompletionCommand(Command): + """A helper command to be used for command completion.""" + + ignore_require_venv = True + + def __init__(self, *args, **kw): + super(CompletionCommand, self).__init__(*args, **kw) + + cmd_opts = self.cmd_opts + + cmd_opts.add_option( + '--bash', '-b', + action='store_const', + const='bash', + dest='shell', + help='Emit completion code for bash') + cmd_opts.add_option( + '--zsh', '-z', + action='store_const', + const='zsh', + dest='shell', + help='Emit completion code for zsh') + cmd_opts.add_option( + '--fish', '-f', + action='store_const', + const='fish', + dest='shell', + help='Emit completion code for fish') + + self.parser.insert_option_group(0, cmd_opts) + + def run(self, options, args): + """Prints the completion code of the given shell""" + shells = COMPLETION_SCRIPTS.keys() + shell_options = ['--' + shell for shell in sorted(shells)] + if options.shell in shells: + script = textwrap.dedent( + COMPLETION_SCRIPTS.get(options.shell, '') % { + 'prog': get_prog(), + } + ) + print(BASE_COMPLETION % {'script': script, 'shell': options.shell}) + else: + sys.stderr.write( + 'ERROR: You must pass %s\n' % ' or '.join(shell_options) + ) diff --git a/venv.bak/lib/python3.7/site-packages/pip/_internal/commands/configuration.py b/venv.bak/lib/python3.7/site-packages/pip/_internal/commands/configuration.py new file mode 100644 index 0000000..efcf5bb --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_internal/commands/configuration.py @@ -0,0 +1,233 @@ +# The following comment should be removed at some point in the future. +# mypy: disallow-untyped-defs=False + +import logging +import os +import subprocess + +from pip._internal.cli.base_command import Command +from pip._internal.cli.status_codes import ERROR, SUCCESS +from pip._internal.configuration import ( + Configuration, + get_configuration_files, + kinds, +) +from pip._internal.exceptions import PipError +from pip._internal.utils.misc import get_prog, write_output + +logger = logging.getLogger(__name__) + + +class ConfigurationCommand(Command): + """Manage local and global configuration. + + Subcommands: + + list: List the active configuration (or from the file specified) + edit: Edit the configuration file in an editor + get: Get the value associated with name + set: Set the name=value + unset: Unset the value associated with name + + If none of --user, --global and --site are passed, a virtual + environment configuration file is used if one is active and the file + exists. Otherwise, all modifications happen on the to the user file by + default. + """ + + ignore_require_venv = True + usage = """ + %prog [] list + %prog [] [--editor ] edit + + %prog [] get name + %prog [] set name value + %prog [] unset name + """ + + def __init__(self, *args, **kwargs): + super(ConfigurationCommand, self).__init__(*args, **kwargs) + + self.configuration = None + + self.cmd_opts.add_option( + '--editor', + dest='editor', + action='store', + default=None, + help=( + 'Editor to use to edit the file. Uses VISUAL or EDITOR ' + 'environment variables if not provided.' + ) + ) + + self.cmd_opts.add_option( + '--global', + dest='global_file', + action='store_true', + default=False, + help='Use the system-wide configuration file only' + ) + + self.cmd_opts.add_option( + '--user', + dest='user_file', + action='store_true', + default=False, + help='Use the user configuration file only' + ) + + self.cmd_opts.add_option( + '--site', + dest='site_file', + action='store_true', + default=False, + help='Use the current environment configuration file only' + ) + + self.parser.insert_option_group(0, self.cmd_opts) + + def run(self, options, args): + handlers = { + "list": self.list_values, + "edit": self.open_in_editor, + "get": self.get_name, + "set": self.set_name_value, + "unset": self.unset_name + } + + # Determine action + if not args or args[0] not in handlers: + logger.error("Need an action ({}) to perform.".format( + ", ".join(sorted(handlers))) + ) + return ERROR + + action = args[0] + + # Determine which configuration files are to be loaded + # Depends on whether the command is modifying. + try: + load_only = self._determine_file( + options, need_value=(action in ["get", "set", "unset", "edit"]) + ) + except PipError as e: + logger.error(e.args[0]) + return ERROR + + # Load a new configuration + self.configuration = Configuration( + isolated=options.isolated_mode, load_only=load_only + ) + self.configuration.load() + + # Error handling happens here, not in the action-handlers. + try: + handlers[action](options, args[1:]) + except PipError as e: + logger.error(e.args[0]) + return ERROR + + return SUCCESS + + def _determine_file(self, options, need_value): + file_options = [key for key, value in ( + (kinds.USER, options.user_file), + (kinds.GLOBAL, options.global_file), + (kinds.SITE, options.site_file), + ) if value] + + if not file_options: + if not need_value: + return None + # Default to user, unless there's a site file. + elif any( + os.path.exists(site_config_file) + for site_config_file in get_configuration_files()[kinds.SITE] + ): + return kinds.SITE + else: + return kinds.USER + elif len(file_options) == 1: + return file_options[0] + + raise PipError( + "Need exactly one file to operate upon " + "(--user, --site, --global) to perform." + ) + + def list_values(self, options, args): + self._get_n_args(args, "list", n=0) + + for key, value in sorted(self.configuration.items()): + write_output("%s=%r", key, value) + + def get_name(self, options, args): + key = self._get_n_args(args, "get [name]", n=1) + value = self.configuration.get_value(key) + + write_output("%s", value) + + def set_name_value(self, options, args): + key, value = self._get_n_args(args, "set [name] [value]", n=2) + self.configuration.set_value(key, value) + + self._save_configuration() + + def unset_name(self, options, args): + key = self._get_n_args(args, "unset [name]", n=1) + self.configuration.unset_value(key) + + self._save_configuration() + + def open_in_editor(self, options, args): + editor = self._determine_editor(options) + + fname = self.configuration.get_file_to_edit() + if fname is None: + raise PipError("Could not determine appropriate file.") + + try: + subprocess.check_call([editor, fname]) + except subprocess.CalledProcessError as e: + raise PipError( + "Editor Subprocess exited with exit code {}" + .format(e.returncode) + ) + + def _get_n_args(self, args, example, n): + """Helper to make sure the command got the right number of arguments + """ + if len(args) != n: + msg = ( + 'Got unexpected number of arguments, expected {}. ' + '(example: "{} config {}")' + ).format(n, get_prog(), example) + raise PipError(msg) + + if n == 1: + return args[0] + else: + return args + + def _save_configuration(self): + # We successfully ran a modifying command. Need to save the + # configuration. + try: + self.configuration.save() + except Exception: + logger.error( + "Unable to save configuration. Please report this as a bug.", + exc_info=1 + ) + raise PipError("Internal Error.") + + def _determine_editor(self, options): + if options.editor is not None: + return options.editor + elif "VISUAL" in os.environ: + return os.environ["VISUAL"] + elif "EDITOR" in os.environ: + return os.environ["EDITOR"] + else: + raise PipError("Could not determine editor to use.") diff --git a/venv/lib/python3.7/site-packages/pip/_internal/commands/debug.py b/venv.bak/lib/python3.7/site-packages/pip/_internal/commands/debug.py similarity index 100% rename from venv/lib/python3.7/site-packages/pip/_internal/commands/debug.py rename to venv.bak/lib/python3.7/site-packages/pip/_internal/commands/debug.py diff --git a/venv.bak/lib/python3.7/site-packages/pip/_internal/commands/download.py b/venv.bak/lib/python3.7/site-packages/pip/_internal/commands/download.py new file mode 100644 index 0000000..24da3eb --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_internal/commands/download.py @@ -0,0 +1,147 @@ +# The following comment should be removed at some point in the future. +# mypy: disallow-untyped-defs=False + +from __future__ import absolute_import + +import logging +import os + +from pip._internal.cli import cmdoptions +from pip._internal.cli.cmdoptions import make_target_python +from pip._internal.cli.req_command import RequirementCommand +from pip._internal.req import RequirementSet +from pip._internal.req.req_tracker import get_requirement_tracker +from pip._internal.utils.misc import ensure_dir, normalize_path, write_output +from pip._internal.utils.temp_dir import TempDirectory + +logger = logging.getLogger(__name__) + + +class DownloadCommand(RequirementCommand): + """ + Download packages from: + + - PyPI (and other indexes) using requirement specifiers. + - VCS project urls. + - Local project directories. + - Local or remote source archives. + + pip also supports downloading from "requirements files", which provide + an easy way to specify a whole environment to be downloaded. + """ + + usage = """ + %prog [options] [package-index-options] ... + %prog [options] -r [package-index-options] ... + %prog [options] ... + %prog [options] ... + %prog [options] ...""" + + def __init__(self, *args, **kw): + super(DownloadCommand, self).__init__(*args, **kw) + + cmd_opts = self.cmd_opts + + cmd_opts.add_option(cmdoptions.constraints()) + cmd_opts.add_option(cmdoptions.requirements()) + cmd_opts.add_option(cmdoptions.build_dir()) + cmd_opts.add_option(cmdoptions.no_deps()) + cmd_opts.add_option(cmdoptions.global_options()) + cmd_opts.add_option(cmdoptions.no_binary()) + cmd_opts.add_option(cmdoptions.only_binary()) + cmd_opts.add_option(cmdoptions.prefer_binary()) + cmd_opts.add_option(cmdoptions.src()) + cmd_opts.add_option(cmdoptions.pre()) + cmd_opts.add_option(cmdoptions.no_clean()) + cmd_opts.add_option(cmdoptions.require_hashes()) + cmd_opts.add_option(cmdoptions.progress_bar()) + cmd_opts.add_option(cmdoptions.no_build_isolation()) + cmd_opts.add_option(cmdoptions.use_pep517()) + cmd_opts.add_option(cmdoptions.no_use_pep517()) + + cmd_opts.add_option( + '-d', '--dest', '--destination-dir', '--destination-directory', + dest='download_dir', + metavar='dir', + default=os.curdir, + help=("Download packages into ."), + ) + + cmdoptions.add_target_python_options(cmd_opts) + + index_opts = cmdoptions.make_option_group( + cmdoptions.index_group, + self.parser, + ) + + self.parser.insert_option_group(0, index_opts) + self.parser.insert_option_group(0, cmd_opts) + + def run(self, options, args): + options.ignore_installed = True + # editable doesn't really make sense for `pip download`, but the bowels + # of the RequirementSet code require that property. + options.editables = [] + + cmdoptions.check_dist_restriction(options) + + options.download_dir = normalize_path(options.download_dir) + + ensure_dir(options.download_dir) + + session = self.get_default_session(options) + + target_python = make_target_python(options) + finder = self._build_package_finder( + options=options, + session=session, + target_python=target_python, + ) + build_delete = (not (options.no_clean or options.build_dir)) + + with get_requirement_tracker() as req_tracker, TempDirectory( + options.build_dir, delete=build_delete, kind="download" + ) as directory: + + requirement_set = RequirementSet() + self.populate_requirement_set( + requirement_set, + args, + options, + finder, + session, + None + ) + + preparer = self.make_requirement_preparer( + temp_build_dir=directory, + options=options, + req_tracker=req_tracker, + session=session, + finder=finder, + download_dir=options.download_dir, + use_user_site=False, + ) + + resolver = self.make_resolver( + preparer=preparer, + finder=finder, + options=options, + py_version_info=options.python_version, + ) + + self.trace_basic_info(finder) + + resolver.resolve(requirement_set) + + downloaded = ' '.join([ + req.name for req in requirement_set.successfully_downloaded + ]) + if downloaded: + write_output('Successfully downloaded %s', downloaded) + + # Clean up + if not options.no_clean: + requirement_set.cleanup_files() + + return requirement_set diff --git a/venv.bak/lib/python3.7/site-packages/pip/_internal/commands/freeze.py b/venv.bak/lib/python3.7/site-packages/pip/_internal/commands/freeze.py new file mode 100644 index 0000000..c59eb39 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_internal/commands/freeze.py @@ -0,0 +1,103 @@ +# The following comment should be removed at some point in the future. +# mypy: disallow-untyped-defs=False + +from __future__ import absolute_import + +import sys + +from pip._internal.cache import WheelCache +from pip._internal.cli import cmdoptions +from pip._internal.cli.base_command import Command +from pip._internal.models.format_control import FormatControl +from pip._internal.operations.freeze import freeze +from pip._internal.utils.compat import stdlib_pkgs + +DEV_PKGS = {'pip', 'setuptools', 'distribute', 'wheel'} + + +class FreezeCommand(Command): + """ + Output installed packages in requirements format. + + packages are listed in a case-insensitive sorted order. + """ + + usage = """ + %prog [options]""" + log_streams = ("ext://sys.stderr", "ext://sys.stderr") + + def __init__(self, *args, **kw): + super(FreezeCommand, self).__init__(*args, **kw) + + self.cmd_opts.add_option( + '-r', '--requirement', + dest='requirements', + action='append', + default=[], + metavar='file', + help="Use the order in the given requirements file and its " + "comments when generating output. This option can be " + "used multiple times.") + self.cmd_opts.add_option( + '-f', '--find-links', + dest='find_links', + action='append', + default=[], + metavar='URL', + help='URL for finding packages, which will be added to the ' + 'output.') + self.cmd_opts.add_option( + '-l', '--local', + dest='local', + action='store_true', + default=False, + help='If in a virtualenv that has global access, do not output ' + 'globally-installed packages.') + self.cmd_opts.add_option( + '--user', + dest='user', + action='store_true', + default=False, + help='Only output packages installed in user-site.') + self.cmd_opts.add_option(cmdoptions.list_path()) + self.cmd_opts.add_option( + '--all', + dest='freeze_all', + action='store_true', + help='Do not skip these packages in the output:' + ' %s' % ', '.join(DEV_PKGS)) + self.cmd_opts.add_option( + '--exclude-editable', + dest='exclude_editable', + action='store_true', + help='Exclude editable package from output.') + + self.parser.insert_option_group(0, self.cmd_opts) + + def run(self, options, args): + format_control = FormatControl(set(), set()) + wheel_cache = WheelCache(options.cache_dir, format_control) + skip = set(stdlib_pkgs) + if not options.freeze_all: + skip.update(DEV_PKGS) + + cmdoptions.check_list_path_option(options) + + freeze_kwargs = dict( + requirement=options.requirements, + find_links=options.find_links, + local_only=options.local, + user_only=options.user, + paths=options.path, + skip_regex=options.skip_requirements_regex, + isolated=options.isolated_mode, + wheel_cache=wheel_cache, + skip=skip, + exclude_editable=options.exclude_editable, + ) + + try: + for line in freeze(**freeze_kwargs): + sys.stdout.write(line + '\n') + finally: + wheel_cache.cleanup() diff --git a/venv.bak/lib/python3.7/site-packages/pip/_internal/commands/hash.py b/venv.bak/lib/python3.7/site-packages/pip/_internal/commands/hash.py new file mode 100644 index 0000000..1dc7fb0 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_internal/commands/hash.py @@ -0,0 +1,58 @@ +# The following comment should be removed at some point in the future. +# mypy: disallow-untyped-defs=False + +from __future__ import absolute_import + +import hashlib +import logging +import sys + +from pip._internal.cli.base_command import Command +from pip._internal.cli.status_codes import ERROR +from pip._internal.utils.hashes import FAVORITE_HASH, STRONG_HASHES +from pip._internal.utils.misc import read_chunks, write_output + +logger = logging.getLogger(__name__) + + +class HashCommand(Command): + """ + Compute a hash of a local package archive. + + These can be used with --hash in a requirements file to do repeatable + installs. + """ + + usage = '%prog [options] ...' + ignore_require_venv = True + + def __init__(self, *args, **kw): + super(HashCommand, self).__init__(*args, **kw) + self.cmd_opts.add_option( + '-a', '--algorithm', + dest='algorithm', + choices=STRONG_HASHES, + action='store', + default=FAVORITE_HASH, + help='The hash algorithm to use: one of %s' % + ', '.join(STRONG_HASHES)) + self.parser.insert_option_group(0, self.cmd_opts) + + def run(self, options, args): + if not args: + self.parser.print_usage(sys.stderr) + return ERROR + + algorithm = options.algorithm + for path in args: + write_output('%s:\n--hash=%s:%s', + path, algorithm, _hash_of_file(path, algorithm)) + + +def _hash_of_file(path, algorithm): + """Return the hash digest of a file.""" + with open(path, 'rb') as archive: + hash = hashlib.new(algorithm) + for chunk in read_chunks(archive): + hash.update(chunk) + return hash.hexdigest() diff --git a/venv.bak/lib/python3.7/site-packages/pip/_internal/commands/help.py b/venv.bak/lib/python3.7/site-packages/pip/_internal/commands/help.py new file mode 100644 index 0000000..75af999 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_internal/commands/help.py @@ -0,0 +1,41 @@ +# The following comment should be removed at some point in the future. +# mypy: disallow-untyped-defs=False + +from __future__ import absolute_import + +from pip._internal.cli.base_command import Command +from pip._internal.cli.status_codes import SUCCESS +from pip._internal.exceptions import CommandError + + +class HelpCommand(Command): + """Show help for commands""" + + usage = """ + %prog """ + ignore_require_venv = True + + def run(self, options, args): + from pip._internal.commands import ( + commands_dict, create_command, get_similar_commands, + ) + + try: + # 'pip help' with no args is handled by pip.__init__.parseopt() + cmd_name = args[0] # the command we need help for + except IndexError: + return SUCCESS + + if cmd_name not in commands_dict: + guess = get_similar_commands(cmd_name) + + msg = ['unknown command "%s"' % cmd_name] + if guess: + msg.append('maybe you meant "%s"' % guess) + + raise CommandError(' - '.join(msg)) + + command = create_command(cmd_name) + command.parser.print_help() + + return SUCCESS diff --git a/venv.bak/lib/python3.7/site-packages/pip/_internal/commands/install.py b/venv.bak/lib/python3.7/site-packages/pip/_internal/commands/install.py new file mode 100644 index 0000000..02a187c --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_internal/commands/install.py @@ -0,0 +1,701 @@ +# The following comment should be removed at some point in the future. +# It's included for now because without it InstallCommand.run() has a +# couple errors where we have to know req.name is str rather than +# Optional[str] for the InstallRequirement req. +# mypy: strict-optional=False +# mypy: disallow-untyped-defs=False + +from __future__ import absolute_import + +import errno +import logging +import operator +import os +import shutil +import site +from optparse import SUPPRESS_HELP + +from pip._vendor import pkg_resources +from pip._vendor.packaging.utils import canonicalize_name + +from pip._internal.cache import WheelCache +from pip._internal.cli import cmdoptions +from pip._internal.cli.cmdoptions import make_target_python +from pip._internal.cli.req_command import RequirementCommand +from pip._internal.cli.status_codes import ERROR, SUCCESS +from pip._internal.exceptions import ( + CommandError, + InstallationError, + PreviousBuildDirError, +) +from pip._internal.locations import distutils_scheme +from pip._internal.operations.check import check_install_conflicts +from pip._internal.req import RequirementSet, install_given_reqs +from pip._internal.req.req_tracker import get_requirement_tracker +from pip._internal.utils.deprecation import deprecated +from pip._internal.utils.distutils_args import parse_distutils_args +from pip._internal.utils.filesystem import test_writable_dir +from pip._internal.utils.misc import ( + ensure_dir, + get_installed_version, + protect_pip_from_modification_on_windows, + write_output, +) +from pip._internal.utils.temp_dir import TempDirectory +from pip._internal.utils.typing import MYPY_CHECK_RUNNING +from pip._internal.utils.virtualenv import virtualenv_no_global +from pip._internal.wheel_builder import build, should_build_for_install_command + +if MYPY_CHECK_RUNNING: + from optparse import Values + from typing import Any, Iterable, List, Optional + + from pip._internal.models.format_control import FormatControl + from pip._internal.req.req_install import InstallRequirement + from pip._internal.wheel_builder import BinaryAllowedPredicate + + +logger = logging.getLogger(__name__) + + +def get_check_binary_allowed(format_control): + # type: (FormatControl) -> BinaryAllowedPredicate + def check_binary_allowed(req): + # type: (InstallRequirement) -> bool + if req.use_pep517: + return True + canonical_name = canonicalize_name(req.name) + allowed_formats = format_control.get_allowed_formats(canonical_name) + return "binary" in allowed_formats + + return check_binary_allowed + + +class InstallCommand(RequirementCommand): + """ + Install packages from: + + - PyPI (and other indexes) using requirement specifiers. + - VCS project urls. + - Local project directories. + - Local or remote source archives. + + pip also supports installing from "requirements files", which provide + an easy way to specify a whole environment to be installed. + """ + + usage = """ + %prog [options] [package-index-options] ... + %prog [options] -r [package-index-options] ... + %prog [options] [-e] ... + %prog [options] [-e] ... + %prog [options] ...""" + + def __init__(self, *args, **kw): + super(InstallCommand, self).__init__(*args, **kw) + + cmd_opts = self.cmd_opts + + cmd_opts.add_option(cmdoptions.requirements()) + cmd_opts.add_option(cmdoptions.constraints()) + cmd_opts.add_option(cmdoptions.no_deps()) + cmd_opts.add_option(cmdoptions.pre()) + + cmd_opts.add_option(cmdoptions.editable()) + cmd_opts.add_option( + '-t', '--target', + dest='target_dir', + metavar='dir', + default=None, + help='Install packages into . ' + 'By default this will not replace existing files/folders in ' + '. Use --upgrade to replace existing packages in ' + 'with new versions.' + ) + cmdoptions.add_target_python_options(cmd_opts) + + cmd_opts.add_option( + '--user', + dest='use_user_site', + action='store_true', + help="Install to the Python user install directory for your " + "platform. Typically ~/.local/, or %APPDATA%\\Python on " + "Windows. (See the Python documentation for site.USER_BASE " + "for full details.)") + cmd_opts.add_option( + '--no-user', + dest='use_user_site', + action='store_false', + help=SUPPRESS_HELP) + cmd_opts.add_option( + '--root', + dest='root_path', + metavar='dir', + default=None, + help="Install everything relative to this alternate root " + "directory.") + cmd_opts.add_option( + '--prefix', + dest='prefix_path', + metavar='dir', + default=None, + help="Installation prefix where lib, bin and other top-level " + "folders are placed") + + cmd_opts.add_option(cmdoptions.build_dir()) + + cmd_opts.add_option(cmdoptions.src()) + + cmd_opts.add_option( + '-U', '--upgrade', + dest='upgrade', + action='store_true', + help='Upgrade all specified packages to the newest available ' + 'version. The handling of dependencies depends on the ' + 'upgrade-strategy used.' + ) + + cmd_opts.add_option( + '--upgrade-strategy', + dest='upgrade_strategy', + default='only-if-needed', + choices=['only-if-needed', 'eager'], + help='Determines how dependency upgrading should be handled ' + '[default: %default]. ' + '"eager" - dependencies are upgraded regardless of ' + 'whether the currently installed version satisfies the ' + 'requirements of the upgraded package(s). ' + '"only-if-needed" - are upgraded only when they do not ' + 'satisfy the requirements of the upgraded package(s).' + ) + + cmd_opts.add_option( + '--force-reinstall', + dest='force_reinstall', + action='store_true', + help='Reinstall all packages even if they are already ' + 'up-to-date.') + + cmd_opts.add_option( + '-I', '--ignore-installed', + dest='ignore_installed', + action='store_true', + help='Ignore the installed packages, overwriting them. ' + 'This can break your system if the existing package ' + 'is of a different version or was installed ' + 'with a different package manager!' + ) + + cmd_opts.add_option(cmdoptions.ignore_requires_python()) + cmd_opts.add_option(cmdoptions.no_build_isolation()) + cmd_opts.add_option(cmdoptions.use_pep517()) + cmd_opts.add_option(cmdoptions.no_use_pep517()) + + cmd_opts.add_option(cmdoptions.install_options()) + cmd_opts.add_option(cmdoptions.global_options()) + + cmd_opts.add_option( + "--compile", + action="store_true", + dest="compile", + default=True, + help="Compile Python source files to bytecode", + ) + + cmd_opts.add_option( + "--no-compile", + action="store_false", + dest="compile", + help="Do not compile Python source files to bytecode", + ) + + cmd_opts.add_option( + "--no-warn-script-location", + action="store_false", + dest="warn_script_location", + default=True, + help="Do not warn when installing scripts outside PATH", + ) + cmd_opts.add_option( + "--no-warn-conflicts", + action="store_false", + dest="warn_about_conflicts", + default=True, + help="Do not warn about broken dependencies", + ) + + cmd_opts.add_option(cmdoptions.no_binary()) + cmd_opts.add_option(cmdoptions.only_binary()) + cmd_opts.add_option(cmdoptions.prefer_binary()) + cmd_opts.add_option(cmdoptions.no_clean()) + cmd_opts.add_option(cmdoptions.require_hashes()) + cmd_opts.add_option(cmdoptions.progress_bar()) + + index_opts = cmdoptions.make_option_group( + cmdoptions.index_group, + self.parser, + ) + + self.parser.insert_option_group(0, index_opts) + self.parser.insert_option_group(0, cmd_opts) + + def run(self, options, args): + # type: (Values, List[Any]) -> int + cmdoptions.check_install_build_global(options) + upgrade_strategy = "to-satisfy-only" + if options.upgrade: + upgrade_strategy = options.upgrade_strategy + + cmdoptions.check_dist_restriction(options, check_target=True) + + install_options = options.install_options or [] + + options.use_user_site = decide_user_install( + options.use_user_site, + prefix_path=options.prefix_path, + target_dir=options.target_dir, + root_path=options.root_path, + isolated_mode=options.isolated_mode, + ) + + target_temp_dir = None # type: Optional[TempDirectory] + target_temp_dir_path = None # type: Optional[str] + if options.target_dir: + options.ignore_installed = True + options.target_dir = os.path.abspath(options.target_dir) + if (os.path.exists(options.target_dir) and not + os.path.isdir(options.target_dir)): + raise CommandError( + "Target path exists but is not a directory, will not " + "continue." + ) + + # Create a target directory for using with the target option + target_temp_dir = TempDirectory(kind="target") + target_temp_dir_path = target_temp_dir.path + + global_options = options.global_options or [] + + session = self.get_default_session(options) + + target_python = make_target_python(options) + finder = self._build_package_finder( + options=options, + session=session, + target_python=target_python, + ignore_requires_python=options.ignore_requires_python, + ) + build_delete = (not (options.no_clean or options.build_dir)) + wheel_cache = WheelCache(options.cache_dir, options.format_control) + + with get_requirement_tracker() as req_tracker, TempDirectory( + options.build_dir, delete=build_delete, kind="install" + ) as directory: + requirement_set = RequirementSet( + check_supported_wheels=not options.target_dir, + ) + + try: + self.populate_requirement_set( + requirement_set, args, options, finder, session, + wheel_cache + ) + + warn_deprecated_install_options( + requirement_set, options.install_options + ) + + preparer = self.make_requirement_preparer( + temp_build_dir=directory, + options=options, + req_tracker=req_tracker, + session=session, + finder=finder, + use_user_site=options.use_user_site, + ) + resolver = self.make_resolver( + preparer=preparer, + finder=finder, + options=options, + wheel_cache=wheel_cache, + use_user_site=options.use_user_site, + ignore_installed=options.ignore_installed, + ignore_requires_python=options.ignore_requires_python, + force_reinstall=options.force_reinstall, + upgrade_strategy=upgrade_strategy, + use_pep517=options.use_pep517, + ) + + self.trace_basic_info(finder) + + resolver.resolve(requirement_set) + + try: + pip_req = requirement_set.get_requirement("pip") + except KeyError: + modifying_pip = None + else: + # If we're not replacing an already installed pip, + # we're not modifying it. + modifying_pip = pip_req.satisfied_by is None + protect_pip_from_modification_on_windows( + modifying_pip=modifying_pip + ) + + check_binary_allowed = get_check_binary_allowed( + finder.format_control + ) + + reqs_to_build = [ + r for r in requirement_set.requirements.values() + if should_build_for_install_command( + r, check_binary_allowed + ) + ] + + _, build_failures = build( + reqs_to_build, + wheel_cache=wheel_cache, + build_options=[], + global_options=[], + ) + + # If we're using PEP 517, we cannot do a direct install + # so we fail here. + # We don't care about failures building legacy + # requirements, as we'll fall through to a direct + # install for those. + pep517_build_failures = [ + r for r in build_failures if r.use_pep517 + ] + if pep517_build_failures: + raise InstallationError( + "Could not build wheels for {} which use" + " PEP 517 and cannot be installed directly".format( + ", ".join(r.name for r in pep517_build_failures))) + + to_install = resolver.get_installation_order( + requirement_set + ) + + # Consistency Checking of the package set we're installing. + should_warn_about_conflicts = ( + not options.ignore_dependencies and + options.warn_about_conflicts + ) + if should_warn_about_conflicts: + self._warn_about_conflicts(to_install) + + # Don't warn about script install locations if + # --target has been specified + warn_script_location = options.warn_script_location + if options.target_dir: + warn_script_location = False + + installed = install_given_reqs( + to_install, + install_options, + global_options, + root=options.root_path, + home=target_temp_dir_path, + prefix=options.prefix_path, + pycompile=options.compile, + warn_script_location=warn_script_location, + use_user_site=options.use_user_site, + ) + + lib_locations = get_lib_location_guesses( + user=options.use_user_site, + home=target_temp_dir_path, + root=options.root_path, + prefix=options.prefix_path, + isolated=options.isolated_mode, + ) + working_set = pkg_resources.WorkingSet(lib_locations) + + installed.sort(key=operator.attrgetter('name')) + items = [] + for result in installed: + item = result.name + try: + installed_version = get_installed_version( + result.name, working_set=working_set + ) + if installed_version: + item += '-' + installed_version + except Exception: + pass + items.append(item) + installed_desc = ' '.join(items) + if installed_desc: + write_output( + 'Successfully installed %s', installed_desc, + ) + except EnvironmentError as error: + show_traceback = (self.verbosity >= 1) + + message = create_env_error_message( + error, show_traceback, options.use_user_site, + ) + logger.error(message, exc_info=show_traceback) + + return ERROR + except PreviousBuildDirError: + options.no_clean = True + raise + finally: + # Clean up + if not options.no_clean: + requirement_set.cleanup_files() + wheel_cache.cleanup() + + if options.target_dir: + self._handle_target_dir( + options.target_dir, target_temp_dir, options.upgrade + ) + + return SUCCESS + + def _handle_target_dir(self, target_dir, target_temp_dir, upgrade): + ensure_dir(target_dir) + + # Checking both purelib and platlib directories for installed + # packages to be moved to target directory + lib_dir_list = [] + + with target_temp_dir: + # Checking both purelib and platlib directories for installed + # packages to be moved to target directory + scheme = distutils_scheme('', home=target_temp_dir.path) + purelib_dir = scheme['purelib'] + platlib_dir = scheme['platlib'] + data_dir = scheme['data'] + + if os.path.exists(purelib_dir): + lib_dir_list.append(purelib_dir) + if os.path.exists(platlib_dir) and platlib_dir != purelib_dir: + lib_dir_list.append(platlib_dir) + if os.path.exists(data_dir): + lib_dir_list.append(data_dir) + + for lib_dir in lib_dir_list: + for item in os.listdir(lib_dir): + if lib_dir == data_dir: + ddir = os.path.join(data_dir, item) + if any(s.startswith(ddir) for s in lib_dir_list[:-1]): + continue + target_item_dir = os.path.join(target_dir, item) + if os.path.exists(target_item_dir): + if not upgrade: + logger.warning( + 'Target directory %s already exists. Specify ' + '--upgrade to force replacement.', + target_item_dir + ) + continue + if os.path.islink(target_item_dir): + logger.warning( + 'Target directory %s already exists and is ' + 'a link. Pip will not automatically replace ' + 'links, please remove if replacement is ' + 'desired.', + target_item_dir + ) + continue + if os.path.isdir(target_item_dir): + shutil.rmtree(target_item_dir) + else: + os.remove(target_item_dir) + + shutil.move( + os.path.join(lib_dir, item), + target_item_dir + ) + + def _warn_about_conflicts(self, to_install): + try: + package_set, _dep_info = check_install_conflicts(to_install) + except Exception: + logger.error("Error checking for conflicts.", exc_info=True) + return + missing, conflicting = _dep_info + + # NOTE: There is some duplication here from pip check + for project_name in missing: + version = package_set[project_name][0] + for dependency in missing[project_name]: + logger.critical( + "%s %s requires %s, which is not installed.", + project_name, version, dependency[1], + ) + + for project_name in conflicting: + version = package_set[project_name][0] + for dep_name, dep_version, req in conflicting[project_name]: + logger.critical( + "%s %s has requirement %s, but you'll have %s %s which is " + "incompatible.", + project_name, version, req, dep_name, dep_version, + ) + + +def get_lib_location_guesses(*args, **kwargs): + scheme = distutils_scheme('', *args, **kwargs) + return [scheme['purelib'], scheme['platlib']] + + +def site_packages_writable(**kwargs): + return all( + test_writable_dir(d) for d in set(get_lib_location_guesses(**kwargs)) + ) + + +def decide_user_install( + use_user_site, # type: Optional[bool] + prefix_path=None, # type: Optional[str] + target_dir=None, # type: Optional[str] + root_path=None, # type: Optional[str] + isolated_mode=False, # type: bool +): + # type: (...) -> bool + """Determine whether to do a user install based on the input options. + + If use_user_site is False, no additional checks are done. + If use_user_site is True, it is checked for compatibility with other + options. + If use_user_site is None, the default behaviour depends on the environment, + which is provided by the other arguments. + """ + # In some cases (config from tox), use_user_site can be set to an integer + # rather than a bool, which 'use_user_site is False' wouldn't catch. + if (use_user_site is not None) and (not use_user_site): + logger.debug("Non-user install by explicit request") + return False + + if use_user_site: + if prefix_path: + raise CommandError( + "Can not combine '--user' and '--prefix' as they imply " + "different installation locations" + ) + if virtualenv_no_global(): + raise InstallationError( + "Can not perform a '--user' install. User site-packages " + "are not visible in this virtualenv." + ) + logger.debug("User install by explicit request") + return True + + # If we are here, user installs have not been explicitly requested/avoided + assert use_user_site is None + + # user install incompatible with --prefix/--target + if prefix_path or target_dir: + logger.debug("Non-user install due to --prefix or --target option") + return False + + # If user installs are not enabled, choose a non-user install + if not site.ENABLE_USER_SITE: + logger.debug("Non-user install because user site-packages disabled") + return False + + # If we have permission for a non-user install, do that, + # otherwise do a user install. + if site_packages_writable(root=root_path, isolated=isolated_mode): + logger.debug("Non-user install because site-packages writeable") + return False + + logger.info("Defaulting to user installation because normal site-packages " + "is not writeable") + return True + + +def warn_deprecated_install_options(requirement_set, options): + # type: (RequirementSet, Optional[List[str]]) -> None + """If any location-changing --install-option arguments were passed for + requirements or on the command-line, then show a deprecation warning. + """ + def format_options(option_names): + # type: (Iterable[str]) -> List[str] + return ["--{}".format(name.replace("_", "-")) for name in option_names] + + requirements = ( + requirement_set.unnamed_requirements + + list(requirement_set.requirements.values()) + ) + + offenders = [] + + for requirement in requirements: + install_options = requirement.options.get("install_options", []) + location_options = parse_distutils_args(install_options) + if location_options: + offenders.append( + "{!r} from {}".format( + format_options(location_options.keys()), requirement + ) + ) + + if options: + location_options = parse_distutils_args(options) + if location_options: + offenders.append( + "{!r} from command line".format( + format_options(location_options.keys()) + ) + ) + + if not offenders: + return + + deprecated( + reason=( + "Location-changing options found in --install-option: {}. " + "This configuration may cause unexpected behavior and is " + "unsupported.".format( + "; ".join(offenders) + ) + ), + replacement=( + "using pip-level options like --user, --prefix, --root, and " + "--target" + ), + gone_in="20.2", + issue=7309, + ) + + +def create_env_error_message(error, show_traceback, using_user_site): + """Format an error message for an EnvironmentError + + It may occur anytime during the execution of the install command. + """ + parts = [] + + # Mention the error if we are not going to show a traceback + parts.append("Could not install packages due to an EnvironmentError") + if not show_traceback: + parts.append(": ") + parts.append(str(error)) + else: + parts.append(".") + + # Spilt the error indication from a helper message (if any) + parts[-1] += "\n" + + # Suggest useful actions to the user: + # (1) using user site-packages or (2) verifying the permissions + if error.errno == errno.EACCES: + user_option_part = "Consider using the `--user` option" + permissions_part = "Check the permissions" + + if not using_user_site: + parts.extend([ + user_option_part, " or ", + permissions_part.lower(), + ]) + else: + parts.append(permissions_part) + parts.append(".\n") + + return "".join(parts).strip() + "\n" diff --git a/venv.bak/lib/python3.7/site-packages/pip/_internal/commands/list.py b/venv.bak/lib/python3.7/site-packages/pip/_internal/commands/list.py new file mode 100644 index 0000000..cce470a --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_internal/commands/list.py @@ -0,0 +1,313 @@ +# The following comment should be removed at some point in the future. +# mypy: disallow-untyped-defs=False + +from __future__ import absolute_import + +import json +import logging + +from pip._vendor import six +from pip._vendor.six.moves import zip_longest + +from pip._internal.cli import cmdoptions +from pip._internal.cli.req_command import IndexGroupCommand +from pip._internal.exceptions import CommandError +from pip._internal.index.package_finder import PackageFinder +from pip._internal.models.selection_prefs import SelectionPreferences +from pip._internal.self_outdated_check import make_link_collector +from pip._internal.utils.misc import ( + dist_is_editable, + get_installed_distributions, + write_output, +) +from pip._internal.utils.packaging import get_installer + +logger = logging.getLogger(__name__) + + +class ListCommand(IndexGroupCommand): + """ + List installed packages, including editables. + + Packages are listed in a case-insensitive sorted order. + """ + + usage = """ + %prog [options]""" + + def __init__(self, *args, **kw): + super(ListCommand, self).__init__(*args, **kw) + + cmd_opts = self.cmd_opts + + cmd_opts.add_option( + '-o', '--outdated', + action='store_true', + default=False, + help='List outdated packages') + cmd_opts.add_option( + '-u', '--uptodate', + action='store_true', + default=False, + help='List uptodate packages') + cmd_opts.add_option( + '-e', '--editable', + action='store_true', + default=False, + help='List editable projects.') + cmd_opts.add_option( + '-l', '--local', + action='store_true', + default=False, + help=('If in a virtualenv that has global access, do not list ' + 'globally-installed packages.'), + ) + self.cmd_opts.add_option( + '--user', + dest='user', + action='store_true', + default=False, + help='Only output packages installed in user-site.') + cmd_opts.add_option(cmdoptions.list_path()) + cmd_opts.add_option( + '--pre', + action='store_true', + default=False, + help=("Include pre-release and development versions. By default, " + "pip only finds stable versions."), + ) + + cmd_opts.add_option( + '--format', + action='store', + dest='list_format', + default="columns", + choices=('columns', 'freeze', 'json'), + help="Select the output format among: columns (default), freeze, " + "or json", + ) + + cmd_opts.add_option( + '--not-required', + action='store_true', + dest='not_required', + help="List packages that are not dependencies of " + "installed packages.", + ) + + cmd_opts.add_option( + '--exclude-editable', + action='store_false', + dest='include_editable', + help='Exclude editable package from output.', + ) + cmd_opts.add_option( + '--include-editable', + action='store_true', + dest='include_editable', + help='Include editable package from output.', + default=True, + ) + index_opts = cmdoptions.make_option_group( + cmdoptions.index_group, self.parser + ) + + self.parser.insert_option_group(0, index_opts) + self.parser.insert_option_group(0, cmd_opts) + + def _build_package_finder(self, options, session): + """ + Create a package finder appropriate to this list command. + """ + link_collector = make_link_collector(session, options=options) + + # Pass allow_yanked=False to ignore yanked versions. + selection_prefs = SelectionPreferences( + allow_yanked=False, + allow_all_prereleases=options.pre, + ) + + return PackageFinder.create( + link_collector=link_collector, + selection_prefs=selection_prefs, + ) + + def run(self, options, args): + if options.outdated and options.uptodate: + raise CommandError( + "Options --outdated and --uptodate cannot be combined.") + + cmdoptions.check_list_path_option(options) + + packages = get_installed_distributions( + local_only=options.local, + user_only=options.user, + editables_only=options.editable, + include_editables=options.include_editable, + paths=options.path, + ) + + # get_not_required must be called firstly in order to find and + # filter out all dependencies correctly. Otherwise a package + # can't be identified as requirement because some parent packages + # could be filtered out before. + if options.not_required: + packages = self.get_not_required(packages, options) + + if options.outdated: + packages = self.get_outdated(packages, options) + elif options.uptodate: + packages = self.get_uptodate(packages, options) + + self.output_package_listing(packages, options) + + def get_outdated(self, packages, options): + return [ + dist for dist in self.iter_packages_latest_infos(packages, options) + if dist.latest_version > dist.parsed_version + ] + + def get_uptodate(self, packages, options): + return [ + dist for dist in self.iter_packages_latest_infos(packages, options) + if dist.latest_version == dist.parsed_version + ] + + def get_not_required(self, packages, options): + dep_keys = set() + for dist in packages: + dep_keys.update(requirement.key for requirement in dist.requires()) + return {pkg for pkg in packages if pkg.key not in dep_keys} + + def iter_packages_latest_infos(self, packages, options): + with self._build_session(options) as session: + finder = self._build_package_finder(options, session) + + for dist in packages: + typ = 'unknown' + all_candidates = finder.find_all_candidates(dist.key) + if not options.pre: + # Remove prereleases + all_candidates = [candidate for candidate in all_candidates + if not candidate.version.is_prerelease] + + evaluator = finder.make_candidate_evaluator( + project_name=dist.project_name, + ) + best_candidate = evaluator.sort_best_candidate(all_candidates) + if best_candidate is None: + continue + + remote_version = best_candidate.version + if best_candidate.link.is_wheel: + typ = 'wheel' + else: + typ = 'sdist' + # This is dirty but makes the rest of the code much cleaner + dist.latest_version = remote_version + dist.latest_filetype = typ + yield dist + + def output_package_listing(self, packages, options): + packages = sorted( + packages, + key=lambda dist: dist.project_name.lower(), + ) + if options.list_format == 'columns' and packages: + data, header = format_for_columns(packages, options) + self.output_package_listing_columns(data, header) + elif options.list_format == 'freeze': + for dist in packages: + if options.verbose >= 1: + write_output("%s==%s (%s)", dist.project_name, + dist.version, dist.location) + else: + write_output("%s==%s", dist.project_name, dist.version) + elif options.list_format == 'json': + write_output(format_for_json(packages, options)) + + def output_package_listing_columns(self, data, header): + # insert the header first: we need to know the size of column names + if len(data) > 0: + data.insert(0, header) + + pkg_strings, sizes = tabulate(data) + + # Create and add a separator. + if len(data) > 0: + pkg_strings.insert(1, " ".join(map(lambda x: '-' * x, sizes))) + + for val in pkg_strings: + write_output(val) + + +def tabulate(vals): + # From pfmoore on GitHub: + # https://github.com/pypa/pip/issues/3651#issuecomment-216932564 + assert len(vals) > 0 + + sizes = [0] * max(len(x) for x in vals) + for row in vals: + sizes = [max(s, len(str(c))) for s, c in zip_longest(sizes, row)] + + result = [] + for row in vals: + display = " ".join([str(c).ljust(s) if c is not None else '' + for s, c in zip_longest(sizes, row)]) + result.append(display) + + return result, sizes + + +def format_for_columns(pkgs, options): + """ + Convert the package data into something usable + by output_package_listing_columns. + """ + running_outdated = options.outdated + # Adjust the header for the `pip list --outdated` case. + if running_outdated: + header = ["Package", "Version", "Latest", "Type"] + else: + header = ["Package", "Version"] + + data = [] + if options.verbose >= 1 or any(dist_is_editable(x) for x in pkgs): + header.append("Location") + if options.verbose >= 1: + header.append("Installer") + + for proj in pkgs: + # if we're working on the 'outdated' list, separate out the + # latest_version and type + row = [proj.project_name, proj.version] + + if running_outdated: + row.append(proj.latest_version) + row.append(proj.latest_filetype) + + if options.verbose >= 1 or dist_is_editable(proj): + row.append(proj.location) + if options.verbose >= 1: + row.append(get_installer(proj)) + + data.append(row) + + return data, header + + +def format_for_json(packages, options): + data = [] + for dist in packages: + info = { + 'name': dist.project_name, + 'version': six.text_type(dist.version), + } + if options.verbose >= 1: + info['location'] = dist.location + info['installer'] = get_installer(dist) + if options.outdated: + info['latest_version'] = six.text_type(dist.latest_version) + info['latest_filetype'] = dist.latest_filetype + data.append(info) + return json.dumps(data) diff --git a/venv.bak/lib/python3.7/site-packages/pip/_internal/commands/search.py b/venv.bak/lib/python3.7/site-packages/pip/_internal/commands/search.py new file mode 100644 index 0000000..2e880ee --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_internal/commands/search.py @@ -0,0 +1,145 @@ +# The following comment should be removed at some point in the future. +# mypy: disallow-untyped-defs=False + +from __future__ import absolute_import + +import logging +import sys +import textwrap +from collections import OrderedDict + +from pip._vendor import pkg_resources +from pip._vendor.packaging.version import parse as parse_version +# NOTE: XMLRPC Client is not annotated in typeshed as on 2017-07-17, which is +# why we ignore the type on this import +from pip._vendor.six.moves import xmlrpc_client # type: ignore + +from pip._internal.cli.base_command import Command +from pip._internal.cli.req_command import SessionCommandMixin +from pip._internal.cli.status_codes import NO_MATCHES_FOUND, SUCCESS +from pip._internal.exceptions import CommandError +from pip._internal.models.index import PyPI +from pip._internal.network.xmlrpc import PipXmlrpcTransport +from pip._internal.utils.compat import get_terminal_size +from pip._internal.utils.logging import indent_log +from pip._internal.utils.misc import write_output + +logger = logging.getLogger(__name__) + + +class SearchCommand(Command, SessionCommandMixin): + """Search for PyPI packages whose name or summary contains .""" + + usage = """ + %prog [options] """ + ignore_require_venv = True + + def __init__(self, *args, **kw): + super(SearchCommand, self).__init__(*args, **kw) + self.cmd_opts.add_option( + '-i', '--index', + dest='index', + metavar='URL', + default=PyPI.pypi_url, + help='Base URL of Python Package Index (default %default)') + + self.parser.insert_option_group(0, self.cmd_opts) + + def run(self, options, args): + if not args: + raise CommandError('Missing required argument (search query).') + query = args + pypi_hits = self.search(query, options) + hits = transform_hits(pypi_hits) + + terminal_width = None + if sys.stdout.isatty(): + terminal_width = get_terminal_size()[0] + + print_results(hits, terminal_width=terminal_width) + if pypi_hits: + return SUCCESS + return NO_MATCHES_FOUND + + def search(self, query, options): + index_url = options.index + + session = self.get_default_session(options) + + transport = PipXmlrpcTransport(index_url, session) + pypi = xmlrpc_client.ServerProxy(index_url, transport) + hits = pypi.search({'name': query, 'summary': query}, 'or') + return hits + + +def transform_hits(hits): + """ + The list from pypi is really a list of versions. We want a list of + packages with the list of versions stored inline. This converts the + list from pypi into one we can use. + """ + packages = OrderedDict() + for hit in hits: + name = hit['name'] + summary = hit['summary'] + version = hit['version'] + + if name not in packages.keys(): + packages[name] = { + 'name': name, + 'summary': summary, + 'versions': [version], + } + else: + packages[name]['versions'].append(version) + + # if this is the highest version, replace summary and score + if version == highest_version(packages[name]['versions']): + packages[name]['summary'] = summary + + return list(packages.values()) + + +def print_results(hits, name_column_width=None, terminal_width=None): + if not hits: + return + if name_column_width is None: + name_column_width = max([ + len(hit['name']) + len(highest_version(hit.get('versions', ['-']))) + for hit in hits + ]) + 4 + + installed_packages = [p.project_name for p in pkg_resources.working_set] + for hit in hits: + name = hit['name'] + summary = hit['summary'] or '' + latest = highest_version(hit.get('versions', ['-'])) + if terminal_width is not None: + target_width = terminal_width - name_column_width - 5 + if target_width > 10: + # wrap and indent summary to fit terminal + summary = textwrap.wrap(summary, target_width) + summary = ('\n' + ' ' * (name_column_width + 3)).join(summary) + + line = '%-*s - %s' % (name_column_width, + '%s (%s)' % (name, latest), summary) + try: + write_output(line) + if name in installed_packages: + dist = pkg_resources.get_distribution(name) + with indent_log(): + if dist.version == latest: + write_output('INSTALLED: %s (latest)', dist.version) + else: + write_output('INSTALLED: %s', dist.version) + if parse_version(latest).pre: + write_output('LATEST: %s (pre-release; install' + ' with "pip install --pre")', latest) + else: + write_output('LATEST: %s', latest) + except UnicodeEncodeError: + pass + + +def highest_version(versions): + return max(versions, key=parse_version) diff --git a/venv.bak/lib/python3.7/site-packages/pip/_internal/commands/show.py b/venv.bak/lib/python3.7/site-packages/pip/_internal/commands/show.py new file mode 100644 index 0000000..a46b08e --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_internal/commands/show.py @@ -0,0 +1,180 @@ +# The following comment should be removed at some point in the future. +# mypy: disallow-untyped-defs=False + +from __future__ import absolute_import + +import logging +import os +from email.parser import FeedParser + +from pip._vendor import pkg_resources +from pip._vendor.packaging.utils import canonicalize_name + +from pip._internal.cli.base_command import Command +from pip._internal.cli.status_codes import ERROR, SUCCESS +from pip._internal.utils.misc import write_output + +logger = logging.getLogger(__name__) + + +class ShowCommand(Command): + """ + Show information about one or more installed packages. + + The output is in RFC-compliant mail header format. + """ + + usage = """ + %prog [options] ...""" + ignore_require_venv = True + + def __init__(self, *args, **kw): + super(ShowCommand, self).__init__(*args, **kw) + self.cmd_opts.add_option( + '-f', '--files', + dest='files', + action='store_true', + default=False, + help='Show the full list of installed files for each package.') + + self.parser.insert_option_group(0, self.cmd_opts) + + def run(self, options, args): + if not args: + logger.warning('ERROR: Please provide a package name or names.') + return ERROR + query = args + + results = search_packages_info(query) + if not print_results( + results, list_files=options.files, verbose=options.verbose): + return ERROR + return SUCCESS + + +def search_packages_info(query): + """ + Gather details from installed distributions. Print distribution name, + version, location, and installed files. Installed files requires a + pip generated 'installed-files.txt' in the distributions '.egg-info' + directory. + """ + installed = {} + for p in pkg_resources.working_set: + installed[canonicalize_name(p.project_name)] = p + + query_names = [canonicalize_name(name) for name in query] + missing = sorted( + [name for name, pkg in zip(query, query_names) if pkg not in installed] + ) + if missing: + logger.warning('Package(s) not found: %s', ', '.join(missing)) + + def get_requiring_packages(package_name): + canonical_name = canonicalize_name(package_name) + return [ + pkg.project_name for pkg in pkg_resources.working_set + if canonical_name in + [canonicalize_name(required.name) for required in + pkg.requires()] + ] + + for dist in [installed[pkg] for pkg in query_names if pkg in installed]: + package = { + 'name': dist.project_name, + 'version': dist.version, + 'location': dist.location, + 'requires': [dep.project_name for dep in dist.requires()], + 'required_by': get_requiring_packages(dist.project_name) + } + file_list = None + metadata = None + if isinstance(dist, pkg_resources.DistInfoDistribution): + # RECORDs should be part of .dist-info metadatas + if dist.has_metadata('RECORD'): + lines = dist.get_metadata_lines('RECORD') + paths = [l.split(',')[0] for l in lines] + paths = [os.path.join(dist.location, p) for p in paths] + file_list = [os.path.relpath(p, dist.location) for p in paths] + + if dist.has_metadata('METADATA'): + metadata = dist.get_metadata('METADATA') + else: + # Otherwise use pip's log for .egg-info's + if dist.has_metadata('installed-files.txt'): + paths = dist.get_metadata_lines('installed-files.txt') + paths = [os.path.join(dist.egg_info, p) for p in paths] + file_list = [os.path.relpath(p, dist.location) for p in paths] + + if dist.has_metadata('PKG-INFO'): + metadata = dist.get_metadata('PKG-INFO') + + if dist.has_metadata('entry_points.txt'): + entry_points = dist.get_metadata_lines('entry_points.txt') + package['entry_points'] = entry_points + + if dist.has_metadata('INSTALLER'): + for line in dist.get_metadata_lines('INSTALLER'): + if line.strip(): + package['installer'] = line.strip() + break + + # @todo: Should pkg_resources.Distribution have a + # `get_pkg_info` method? + feed_parser = FeedParser() + feed_parser.feed(metadata) + pkg_info_dict = feed_parser.close() + for key in ('metadata-version', 'summary', + 'home-page', 'author', 'author-email', 'license'): + package[key] = pkg_info_dict.get(key) + + # It looks like FeedParser cannot deal with repeated headers + classifiers = [] + for line in metadata.splitlines(): + if line.startswith('Classifier: '): + classifiers.append(line[len('Classifier: '):]) + package['classifiers'] = classifiers + + if file_list: + package['files'] = sorted(file_list) + yield package + + +def print_results(distributions, list_files=False, verbose=False): + """ + Print the informations from installed distributions found. + """ + results_printed = False + for i, dist in enumerate(distributions): + results_printed = True + if i > 0: + write_output("---") + + write_output("Name: %s", dist.get('name', '')) + write_output("Version: %s", dist.get('version', '')) + write_output("Summary: %s", dist.get('summary', '')) + write_output("Home-page: %s", dist.get('home-page', '')) + write_output("Author: %s", dist.get('author', '')) + write_output("Author-email: %s", dist.get('author-email', '')) + write_output("License: %s", dist.get('license', '')) + write_output("Location: %s", dist.get('location', '')) + write_output("Requires: %s", ', '.join(dist.get('requires', []))) + write_output("Required-by: %s", ', '.join(dist.get('required_by', []))) + + if verbose: + write_output("Metadata-Version: %s", + dist.get('metadata-version', '')) + write_output("Installer: %s", dist.get('installer', '')) + write_output("Classifiers:") + for classifier in dist.get('classifiers', []): + write_output(" %s", classifier) + write_output("Entry-points:") + for entry in dist.get('entry_points', []): + write_output(" %s", entry.strip()) + if list_files: + write_output("Files:") + for line in dist.get('files', []): + write_output(" %s", line.strip()) + if "files" not in dist: + write_output("Cannot locate installed-files.txt") + return results_printed diff --git a/venv.bak/lib/python3.7/site-packages/pip/_internal/commands/uninstall.py b/venv.bak/lib/python3.7/site-packages/pip/_internal/commands/uninstall.py new file mode 100644 index 0000000..1bde414 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_internal/commands/uninstall.py @@ -0,0 +1,82 @@ +# The following comment should be removed at some point in the future. +# mypy: disallow-untyped-defs=False + +from __future__ import absolute_import + +from pip._vendor.packaging.utils import canonicalize_name + +from pip._internal.cli.base_command import Command +from pip._internal.cli.req_command import SessionCommandMixin +from pip._internal.exceptions import InstallationError +from pip._internal.req import parse_requirements +from pip._internal.req.constructors import install_req_from_line +from pip._internal.utils.misc import protect_pip_from_modification_on_windows + + +class UninstallCommand(Command, SessionCommandMixin): + """ + Uninstall packages. + + pip is able to uninstall most installed packages. Known exceptions are: + + - Pure distutils packages installed with ``python setup.py install``, which + leave behind no metadata to determine what files were installed. + - Script wrappers installed by ``python setup.py develop``. + """ + + usage = """ + %prog [options] ... + %prog [options] -r ...""" + + def __init__(self, *args, **kw): + super(UninstallCommand, self).__init__(*args, **kw) + self.cmd_opts.add_option( + '-r', '--requirement', + dest='requirements', + action='append', + default=[], + metavar='file', + help='Uninstall all the packages listed in the given requirements ' + 'file. This option can be used multiple times.', + ) + self.cmd_opts.add_option( + '-y', '--yes', + dest='yes', + action='store_true', + help="Don't ask for confirmation of uninstall deletions.") + + self.parser.insert_option_group(0, self.cmd_opts) + + def run(self, options, args): + session = self.get_default_session(options) + + reqs_to_uninstall = {} + for name in args: + req = install_req_from_line( + name, isolated=options.isolated_mode, + ) + if req.name: + reqs_to_uninstall[canonicalize_name(req.name)] = req + for filename in options.requirements: + for req in parse_requirements( + filename, + options=options, + session=session): + if req.name: + reqs_to_uninstall[canonicalize_name(req.name)] = req + if not reqs_to_uninstall: + raise InstallationError( + 'You must give at least one requirement to %(name)s (see ' + '"pip help %(name)s")' % dict(name=self.name) + ) + + protect_pip_from_modification_on_windows( + modifying_pip="pip" in reqs_to_uninstall + ) + + for req in reqs_to_uninstall.values(): + uninstall_pathset = req.uninstall( + auto_confirm=options.yes, verbose=self.verbosity > 0, + ) + if uninstall_pathset: + uninstall_pathset.commit() diff --git a/venv.bak/lib/python3.7/site-packages/pip/_internal/commands/wheel.py b/venv.bak/lib/python3.7/site-packages/pip/_internal/commands/wheel.py new file mode 100644 index 0000000..eb44bce --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_internal/commands/wheel.py @@ -0,0 +1,197 @@ +# -*- coding: utf-8 -*- + +# The following comment should be removed at some point in the future. +# mypy: disallow-untyped-defs=False + +from __future__ import absolute_import + +import logging +import os +import shutil + +from pip._internal.cache import WheelCache +from pip._internal.cli import cmdoptions +from pip._internal.cli.req_command import RequirementCommand +from pip._internal.exceptions import CommandError, PreviousBuildDirError +from pip._internal.req import RequirementSet +from pip._internal.req.req_tracker import get_requirement_tracker +from pip._internal.utils.misc import ensure_dir, normalize_path +from pip._internal.utils.temp_dir import TempDirectory +from pip._internal.utils.typing import MYPY_CHECK_RUNNING +from pip._internal.wheel_builder import build, should_build_for_wheel_command + +if MYPY_CHECK_RUNNING: + from optparse import Values + from typing import Any, List + + +logger = logging.getLogger(__name__) + + +class WheelCommand(RequirementCommand): + """ + Build Wheel archives for your requirements and dependencies. + + Wheel is a built-package format, and offers the advantage of not + recompiling your software during every install. For more details, see the + wheel docs: https://wheel.readthedocs.io/en/latest/ + + Requirements: setuptools>=0.8, and wheel. + + 'pip wheel' uses the bdist_wheel setuptools extension from the wheel + package to build individual wheels. + + """ + + usage = """ + %prog [options] ... + %prog [options] -r ... + %prog [options] [-e] ... + %prog [options] [-e] ... + %prog [options] ...""" + + def __init__(self, *args, **kw): + super(WheelCommand, self).__init__(*args, **kw) + + cmd_opts = self.cmd_opts + + cmd_opts.add_option( + '-w', '--wheel-dir', + dest='wheel_dir', + metavar='dir', + default=os.curdir, + help=("Build wheels into , where the default is the " + "current working directory."), + ) + cmd_opts.add_option(cmdoptions.no_binary()) + cmd_opts.add_option(cmdoptions.only_binary()) + cmd_opts.add_option(cmdoptions.prefer_binary()) + cmd_opts.add_option( + '--build-option', + dest='build_options', + metavar='options', + action='append', + help="Extra arguments to be supplied to 'setup.py bdist_wheel'.", + ) + cmd_opts.add_option(cmdoptions.no_build_isolation()) + cmd_opts.add_option(cmdoptions.use_pep517()) + cmd_opts.add_option(cmdoptions.no_use_pep517()) + cmd_opts.add_option(cmdoptions.constraints()) + cmd_opts.add_option(cmdoptions.editable()) + cmd_opts.add_option(cmdoptions.requirements()) + cmd_opts.add_option(cmdoptions.src()) + cmd_opts.add_option(cmdoptions.ignore_requires_python()) + cmd_opts.add_option(cmdoptions.no_deps()) + cmd_opts.add_option(cmdoptions.build_dir()) + cmd_opts.add_option(cmdoptions.progress_bar()) + + cmd_opts.add_option( + '--global-option', + dest='global_options', + action='append', + metavar='options', + help="Extra global options to be supplied to the setup.py " + "call before the 'bdist_wheel' command.") + + cmd_opts.add_option( + '--pre', + action='store_true', + default=False, + help=("Include pre-release and development versions. By default, " + "pip only finds stable versions."), + ) + + cmd_opts.add_option(cmdoptions.no_clean()) + cmd_opts.add_option(cmdoptions.require_hashes()) + + index_opts = cmdoptions.make_option_group( + cmdoptions.index_group, + self.parser, + ) + + self.parser.insert_option_group(0, index_opts) + self.parser.insert_option_group(0, cmd_opts) + + def run(self, options, args): + # type: (Values, List[Any]) -> None + cmdoptions.check_install_build_global(options) + + session = self.get_default_session(options) + + finder = self._build_package_finder(options, session) + build_delete = (not (options.no_clean or options.build_dir)) + wheel_cache = WheelCache(options.cache_dir, options.format_control) + + options.wheel_dir = normalize_path(options.wheel_dir) + ensure_dir(options.wheel_dir) + + with get_requirement_tracker() as req_tracker, TempDirectory( + options.build_dir, delete=build_delete, kind="wheel" + ) as directory: + + requirement_set = RequirementSet() + + try: + self.populate_requirement_set( + requirement_set, args, options, finder, session, + wheel_cache + ) + + preparer = self.make_requirement_preparer( + temp_build_dir=directory, + options=options, + req_tracker=req_tracker, + session=session, + finder=finder, + wheel_download_dir=options.wheel_dir, + use_user_site=False, + ) + + resolver = self.make_resolver( + preparer=preparer, + finder=finder, + options=options, + wheel_cache=wheel_cache, + ignore_requires_python=options.ignore_requires_python, + use_pep517=options.use_pep517, + ) + + self.trace_basic_info(finder) + + resolver.resolve(requirement_set) + + reqs_to_build = [ + r for r in requirement_set.requirements.values() + if should_build_for_wheel_command(r) + ] + + # build wheels + build_successes, build_failures = build( + reqs_to_build, + wheel_cache=wheel_cache, + build_options=options.build_options or [], + global_options=options.global_options or [], + ) + for req in build_successes: + assert req.link and req.link.is_wheel + assert req.local_file_path + # copy from cache to target directory + try: + shutil.copy(req.local_file_path, options.wheel_dir) + except OSError as e: + logger.warning( + "Building wheel for %s failed: %s", + req.name, e, + ) + build_failures.append(req) + if len(build_failures) != 0: + raise CommandError( + "Failed to build one or more wheels" + ) + except PreviousBuildDirError: + options.no_clean = True + raise + finally: + if not options.no_clean: + requirement_set.cleanup_files() + wheel_cache.cleanup() diff --git a/venv.bak/lib/python3.7/site-packages/pip/_internal/configuration.py b/venv.bak/lib/python3.7/site-packages/pip/_internal/configuration.py new file mode 100644 index 0000000..f09a1ae --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_internal/configuration.py @@ -0,0 +1,422 @@ +"""Configuration management setup + +Some terminology: +- name + As written in config files. +- value + Value associated with a name +- key + Name combined with it's section (section.name) +- variant + A single word describing where the configuration key-value pair came from +""" + +# The following comment should be removed at some point in the future. +# mypy: strict-optional=False + +import locale +import logging +import os +import sys + +from pip._vendor.six.moves import configparser + +from pip._internal.exceptions import ( + ConfigurationError, + ConfigurationFileCouldNotBeLoaded, +) +from pip._internal.utils import appdirs +from pip._internal.utils.compat import WINDOWS, expanduser +from pip._internal.utils.misc import ensure_dir, enum +from pip._internal.utils.typing import MYPY_CHECK_RUNNING + +if MYPY_CHECK_RUNNING: + from typing import ( + Any, Dict, Iterable, List, NewType, Optional, Tuple + ) + + RawConfigParser = configparser.RawConfigParser # Shorthand + Kind = NewType("Kind", str) + +logger = logging.getLogger(__name__) + + +# NOTE: Maybe use the optionx attribute to normalize keynames. +def _normalize_name(name): + # type: (str) -> str + """Make a name consistent regardless of source (environment or file) + """ + name = name.lower().replace('_', '-') + if name.startswith('--'): + name = name[2:] # only prefer long opts + return name + + +def _disassemble_key(name): + # type: (str) -> List[str] + if "." not in name: + error_message = ( + "Key does not contain dot separated section and key. " + "Perhaps you wanted to use 'global.{}' instead?" + ).format(name) + raise ConfigurationError(error_message) + return name.split(".", 1) + + +# The kinds of configurations there are. +kinds = enum( + USER="user", # User Specific + GLOBAL="global", # System Wide + SITE="site", # [Virtual] Environment Specific + ENV="env", # from PIP_CONFIG_FILE + ENV_VAR="env-var", # from Environment Variables +) + + +CONFIG_BASENAME = 'pip.ini' if WINDOWS else 'pip.conf' + + +def get_configuration_files(): + # type: () -> Dict[Kind, List[str]] + global_config_files = [ + os.path.join(path, CONFIG_BASENAME) + for path in appdirs.site_config_dirs('pip') + ] + + site_config_file = os.path.join(sys.prefix, CONFIG_BASENAME) + legacy_config_file = os.path.join( + expanduser('~'), + 'pip' if WINDOWS else '.pip', + CONFIG_BASENAME, + ) + new_config_file = os.path.join( + appdirs.user_config_dir("pip"), CONFIG_BASENAME + ) + return { + kinds.GLOBAL: global_config_files, + kinds.SITE: [site_config_file], + kinds.USER: [legacy_config_file, new_config_file], + } + + +class Configuration(object): + """Handles management of configuration. + + Provides an interface to accessing and managing configuration files. + + This class converts provides an API that takes "section.key-name" style + keys and stores the value associated with it as "key-name" under the + section "section". + + This allows for a clean interface wherein the both the section and the + key-name are preserved in an easy to manage form in the configuration files + and the data stored is also nice. + """ + + def __init__(self, isolated, load_only=None): + # type: (bool, Kind) -> None + super(Configuration, self).__init__() + + _valid_load_only = [kinds.USER, kinds.GLOBAL, kinds.SITE, None] + if load_only not in _valid_load_only: + raise ConfigurationError( + "Got invalid value for load_only - should be one of {}".format( + ", ".join(map(repr, _valid_load_only[:-1])) + ) + ) + self.isolated = isolated # type: bool + self.load_only = load_only # type: Optional[Kind] + + # The order here determines the override order. + self._override_order = [ + kinds.GLOBAL, kinds.USER, kinds.SITE, kinds.ENV, kinds.ENV_VAR + ] + + self._ignore_env_names = ["version", "help"] + + # Because we keep track of where we got the data from + self._parsers = { + variant: [] for variant in self._override_order + } # type: Dict[Kind, List[Tuple[str, RawConfigParser]]] + self._config = { + variant: {} for variant in self._override_order + } # type: Dict[Kind, Dict[str, Any]] + self._modified_parsers = [] # type: List[Tuple[str, RawConfigParser]] + + def load(self): + # type: () -> None + """Loads configuration from configuration files and environment + """ + self._load_config_files() + if not self.isolated: + self._load_environment_vars() + + def get_file_to_edit(self): + # type: () -> Optional[str] + """Returns the file with highest priority in configuration + """ + assert self.load_only is not None, \ + "Need to be specified a file to be editing" + + try: + return self._get_parser_to_modify()[0] + except IndexError: + return None + + def items(self): + # type: () -> Iterable[Tuple[str, Any]] + """Returns key-value pairs like dict.items() representing the loaded + configuration + """ + return self._dictionary.items() + + def get_value(self, key): + # type: (str) -> Any + """Get a value from the configuration. + """ + try: + return self._dictionary[key] + except KeyError: + raise ConfigurationError("No such key - {}".format(key)) + + def set_value(self, key, value): + # type: (str, Any) -> None + """Modify a value in the configuration. + """ + self._ensure_have_load_only() + + fname, parser = self._get_parser_to_modify() + + if parser is not None: + section, name = _disassemble_key(key) + + # Modify the parser and the configuration + if not parser.has_section(section): + parser.add_section(section) + parser.set(section, name, value) + + self._config[self.load_only][key] = value + self._mark_as_modified(fname, parser) + + def unset_value(self, key): + # type: (str) -> None + """Unset a value in the configuration. + """ + self._ensure_have_load_only() + + if key not in self._config[self.load_only]: + raise ConfigurationError("No such key - {}".format(key)) + + fname, parser = self._get_parser_to_modify() + + if parser is not None: + section, name = _disassemble_key(key) + + # Remove the key in the parser + modified_something = False + if parser.has_section(section): + # Returns whether the option was removed or not + modified_something = parser.remove_option(section, name) + + if modified_something: + # name removed from parser, section may now be empty + section_iter = iter(parser.items(section)) + try: + val = next(section_iter) + except StopIteration: + val = None + + if val is None: + parser.remove_section(section) + + self._mark_as_modified(fname, parser) + else: + raise ConfigurationError( + "Fatal Internal error [id=1]. Please report as a bug." + ) + + del self._config[self.load_only][key] + + def save(self): + # type: () -> None + """Save the current in-memory state. + """ + self._ensure_have_load_only() + + for fname, parser in self._modified_parsers: + logger.info("Writing to %s", fname) + + # Ensure directory exists. + ensure_dir(os.path.dirname(fname)) + + with open(fname, "w") as f: + parser.write(f) + + # + # Private routines + # + + def _ensure_have_load_only(self): + # type: () -> None + if self.load_only is None: + raise ConfigurationError("Needed a specific file to be modifying.") + logger.debug("Will be working with %s variant only", self.load_only) + + @property + def _dictionary(self): + # type: () -> Dict[str, Any] + """A dictionary representing the loaded configuration. + """ + # NOTE: Dictionaries are not populated if not loaded. So, conditionals + # are not needed here. + retval = {} + + for variant in self._override_order: + retval.update(self._config[variant]) + + return retval + + def _load_config_files(self): + # type: () -> None + """Loads configuration from configuration files + """ + config_files = dict(self._iter_config_files()) + if config_files[kinds.ENV][0:1] == [os.devnull]: + logger.debug( + "Skipping loading configuration files due to " + "environment's PIP_CONFIG_FILE being os.devnull" + ) + return + + for variant, files in config_files.items(): + for fname in files: + # If there's specific variant set in `load_only`, load only + # that variant, not the others. + if self.load_only is not None and variant != self.load_only: + logger.debug( + "Skipping file '%s' (variant: %s)", fname, variant + ) + continue + + parser = self._load_file(variant, fname) + + # Keeping track of the parsers used + self._parsers[variant].append((fname, parser)) + + def _load_file(self, variant, fname): + # type: (Kind, str) -> RawConfigParser + logger.debug("For variant '%s', will try loading '%s'", variant, fname) + parser = self._construct_parser(fname) + + for section in parser.sections(): + items = parser.items(section) + self._config[variant].update(self._normalized_keys(section, items)) + + return parser + + def _construct_parser(self, fname): + # type: (str) -> RawConfigParser + parser = configparser.RawConfigParser() + # If there is no such file, don't bother reading it but create the + # parser anyway, to hold the data. + # Doing this is useful when modifying and saving files, where we don't + # need to construct a parser. + if os.path.exists(fname): + try: + parser.read(fname) + except UnicodeDecodeError: + # See https://github.com/pypa/pip/issues/4963 + raise ConfigurationFileCouldNotBeLoaded( + reason="contains invalid {} characters".format( + locale.getpreferredencoding(False) + ), + fname=fname, + ) + except configparser.Error as error: + # See https://github.com/pypa/pip/issues/4893 + raise ConfigurationFileCouldNotBeLoaded(error=error) + return parser + + def _load_environment_vars(self): + # type: () -> None + """Loads configuration from environment variables + """ + self._config[kinds.ENV_VAR].update( + self._normalized_keys(":env:", self._get_environ_vars()) + ) + + def _normalized_keys(self, section, items): + # type: (str, Iterable[Tuple[str, Any]]) -> Dict[str, Any] + """Normalizes items to construct a dictionary with normalized keys. + + This routine is where the names become keys and are made the same + regardless of source - configuration files or environment. + """ + normalized = {} + for name, val in items: + key = section + "." + _normalize_name(name) + normalized[key] = val + return normalized + + def _get_environ_vars(self): + # type: () -> Iterable[Tuple[str, str]] + """Returns a generator with all environmental vars with prefix PIP_""" + for key, val in os.environ.items(): + should_be_yielded = ( + key.startswith("PIP_") and + key[4:].lower() not in self._ignore_env_names + ) + if should_be_yielded: + yield key[4:].lower(), val + + # XXX: This is patched in the tests. + def _iter_config_files(self): + # type: () -> Iterable[Tuple[Kind, List[str]]] + """Yields variant and configuration files associated with it. + + This should be treated like items of a dictionary. + """ + # SMELL: Move the conditions out of this function + + # environment variables have the lowest priority + config_file = os.environ.get('PIP_CONFIG_FILE', None) + if config_file is not None: + yield kinds.ENV, [config_file] + else: + yield kinds.ENV, [] + + config_files = get_configuration_files() + + # at the base we have any global configuration + yield kinds.GLOBAL, config_files[kinds.GLOBAL] + + # per-user configuration next + should_load_user_config = not self.isolated and not ( + config_file and os.path.exists(config_file) + ) + if should_load_user_config: + # The legacy config file is overridden by the new config file + yield kinds.USER, config_files[kinds.USER] + + # finally virtualenv configuration first trumping others + yield kinds.SITE, config_files[kinds.SITE] + + def _get_parser_to_modify(self): + # type: () -> Tuple[str, RawConfigParser] + # Determine which parser to modify + parsers = self._parsers[self.load_only] + if not parsers: + # This should not happen if everything works correctly. + raise ConfigurationError( + "Fatal Internal error [id=2]. Please report as a bug." + ) + + # Use the highest priority parser. + return parsers[-1] + + # XXX: This is patched in the tests. + def _mark_as_modified(self, fname, parser): + # type: (str, RawConfigParser) -> None + file_parser_tuple = (fname, parser) + if file_parser_tuple not in self._modified_parsers: + self._modified_parsers.append(file_parser_tuple) diff --git a/venv/lib/python3.7/site-packages/pip/_internal/distributions/__init__.py b/venv.bak/lib/python3.7/site-packages/pip/_internal/distributions/__init__.py similarity index 100% rename from venv/lib/python3.7/site-packages/pip/_internal/distributions/__init__.py rename to venv.bak/lib/python3.7/site-packages/pip/_internal/distributions/__init__.py diff --git a/venv/lib/python3.7/site-packages/pip/_internal/distributions/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_internal/distributions/__pycache__/__init__.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/pip/_internal/distributions/__pycache__/__init__.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/pip/_internal/distributions/__pycache__/__init__.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/pip/_internal/distributions/__pycache__/base.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_internal/distributions/__pycache__/base.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/pip/_internal/distributions/__pycache__/base.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/pip/_internal/distributions/__pycache__/base.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/pip/_internal/distributions/__pycache__/installed.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_internal/distributions/__pycache__/installed.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/pip/_internal/distributions/__pycache__/installed.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/pip/_internal/distributions/__pycache__/installed.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/pip/_internal/distributions/__pycache__/sdist.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_internal/distributions/__pycache__/sdist.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/pip/_internal/distributions/__pycache__/sdist.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/pip/_internal/distributions/__pycache__/sdist.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/pip/_internal/distributions/__pycache__/wheel.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_internal/distributions/__pycache__/wheel.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/pip/_internal/distributions/__pycache__/wheel.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/pip/_internal/distributions/__pycache__/wheel.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/pip/_internal/distributions/base.py b/venv.bak/lib/python3.7/site-packages/pip/_internal/distributions/base.py similarity index 100% rename from venv/lib/python3.7/site-packages/pip/_internal/distributions/base.py rename to venv.bak/lib/python3.7/site-packages/pip/_internal/distributions/base.py diff --git a/venv/lib/python3.7/site-packages/pip/_internal/distributions/installed.py b/venv.bak/lib/python3.7/site-packages/pip/_internal/distributions/installed.py similarity index 100% rename from venv/lib/python3.7/site-packages/pip/_internal/distributions/installed.py rename to venv.bak/lib/python3.7/site-packages/pip/_internal/distributions/installed.py diff --git a/venv/lib/python3.7/site-packages/pip/_internal/distributions/sdist.py b/venv.bak/lib/python3.7/site-packages/pip/_internal/distributions/sdist.py similarity index 100% rename from venv/lib/python3.7/site-packages/pip/_internal/distributions/sdist.py rename to venv.bak/lib/python3.7/site-packages/pip/_internal/distributions/sdist.py diff --git a/venv/lib/python3.7/site-packages/pip/_internal/distributions/wheel.py b/venv.bak/lib/python3.7/site-packages/pip/_internal/distributions/wheel.py similarity index 100% rename from venv/lib/python3.7/site-packages/pip/_internal/distributions/wheel.py rename to venv.bak/lib/python3.7/site-packages/pip/_internal/distributions/wheel.py diff --git a/venv.bak/lib/python3.7/site-packages/pip/_internal/exceptions.py b/venv.bak/lib/python3.7/site-packages/pip/_internal/exceptions.py new file mode 100644 index 0000000..dddec78 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_internal/exceptions.py @@ -0,0 +1,308 @@ +"""Exceptions used throughout package""" + +# The following comment should be removed at some point in the future. +# mypy: disallow-untyped-defs=False + +from __future__ import absolute_import + +from itertools import chain, groupby, repeat + +from pip._vendor.six import iteritems + +from pip._internal.utils.typing import MYPY_CHECK_RUNNING + +if MYPY_CHECK_RUNNING: + from typing import Optional + from pip._vendor.pkg_resources import Distribution + from pip._internal.req.req_install import InstallRequirement + + +class PipError(Exception): + """Base pip exception""" + + +class ConfigurationError(PipError): + """General exception in configuration""" + + +class InstallationError(PipError): + """General exception during installation""" + + +class UninstallationError(PipError): + """General exception during uninstallation""" + + +class NoneMetadataError(PipError): + """ + Raised when accessing "METADATA" or "PKG-INFO" metadata for a + pip._vendor.pkg_resources.Distribution object and + `dist.has_metadata('METADATA')` returns True but + `dist.get_metadata('METADATA')` returns None (and similarly for + "PKG-INFO"). + """ + + def __init__(self, dist, metadata_name): + # type: (Distribution, str) -> None + """ + :param dist: A Distribution object. + :param metadata_name: The name of the metadata being accessed + (can be "METADATA" or "PKG-INFO"). + """ + self.dist = dist + self.metadata_name = metadata_name + + def __str__(self): + # type: () -> str + # Use `dist` in the error message because its stringification + # includes more information, like the version and location. + return ( + 'None {} metadata found for distribution: {}'.format( + self.metadata_name, self.dist, + ) + ) + + +class DistributionNotFound(InstallationError): + """Raised when a distribution cannot be found to satisfy a requirement""" + + +class RequirementsFileParseError(InstallationError): + """Raised when a general error occurs parsing a requirements file line.""" + + +class BestVersionAlreadyInstalled(PipError): + """Raised when the most up-to-date version of a package is already + installed.""" + + +class BadCommand(PipError): + """Raised when virtualenv or a command is not found""" + + +class CommandError(PipError): + """Raised when there is an error in command-line arguments""" + + +class PreviousBuildDirError(PipError): + """Raised when there's a previous conflicting build directory""" + + +class InvalidWheelFilename(InstallationError): + """Invalid wheel filename.""" + + +class UnsupportedWheel(InstallationError): + """Unsupported wheel.""" + + +class HashErrors(InstallationError): + """Multiple HashError instances rolled into one for reporting""" + + def __init__(self): + self.errors = [] + + def append(self, error): + self.errors.append(error) + + def __str__(self): + lines = [] + self.errors.sort(key=lambda e: e.order) + for cls, errors_of_cls in groupby(self.errors, lambda e: e.__class__): + lines.append(cls.head) + lines.extend(e.body() for e in errors_of_cls) + if lines: + return '\n'.join(lines) + + def __nonzero__(self): + return bool(self.errors) + + def __bool__(self): + return self.__nonzero__() + + +class HashError(InstallationError): + """ + A failure to verify a package against known-good hashes + + :cvar order: An int sorting hash exception classes by difficulty of + recovery (lower being harder), so the user doesn't bother fretting + about unpinned packages when he has deeper issues, like VCS + dependencies, to deal with. Also keeps error reports in a + deterministic order. + :cvar head: A section heading for display above potentially many + exceptions of this kind + :ivar req: The InstallRequirement that triggered this error. This is + pasted on after the exception is instantiated, because it's not + typically available earlier. + + """ + req = None # type: Optional[InstallRequirement] + head = '' + + def body(self): + """Return a summary of me for display under the heading. + + This default implementation simply prints a description of the + triggering requirement. + + :param req: The InstallRequirement that provoked this error, with + populate_link() having already been called + + """ + return ' %s' % self._requirement_name() + + def __str__(self): + return '%s\n%s' % (self.head, self.body()) + + def _requirement_name(self): + """Return a description of the requirement that triggered me. + + This default implementation returns long description of the req, with + line numbers + + """ + return str(self.req) if self.req else 'unknown package' + + +class VcsHashUnsupported(HashError): + """A hash was provided for a version-control-system-based requirement, but + we don't have a method for hashing those.""" + + order = 0 + head = ("Can't verify hashes for these requirements because we don't " + "have a way to hash version control repositories:") + + +class DirectoryUrlHashUnsupported(HashError): + """A hash was provided for a version-control-system-based requirement, but + we don't have a method for hashing those.""" + + order = 1 + head = ("Can't verify hashes for these file:// requirements because they " + "point to directories:") + + +class HashMissing(HashError): + """A hash was needed for a requirement but is absent.""" + + order = 2 + head = ('Hashes are required in --require-hashes mode, but they are ' + 'missing from some requirements. Here is a list of those ' + 'requirements along with the hashes their downloaded archives ' + 'actually had. Add lines like these to your requirements files to ' + 'prevent tampering. (If you did not enable --require-hashes ' + 'manually, note that it turns on automatically when any package ' + 'has a hash.)') + + def __init__(self, gotten_hash): + """ + :param gotten_hash: The hash of the (possibly malicious) archive we + just downloaded + """ + self.gotten_hash = gotten_hash + + def body(self): + # Dodge circular import. + from pip._internal.utils.hashes import FAVORITE_HASH + + package = None + if self.req: + # In the case of URL-based requirements, display the original URL + # seen in the requirements file rather than the package name, + # so the output can be directly copied into the requirements file. + package = (self.req.original_link if self.req.original_link + # In case someone feeds something downright stupid + # to InstallRequirement's constructor. + else getattr(self.req, 'req', None)) + return ' %s --hash=%s:%s' % (package or 'unknown package', + FAVORITE_HASH, + self.gotten_hash) + + +class HashUnpinned(HashError): + """A requirement had a hash specified but was not pinned to a specific + version.""" + + order = 3 + head = ('In --require-hashes mode, all requirements must have their ' + 'versions pinned with ==. These do not:') + + +class HashMismatch(HashError): + """ + Distribution file hash values don't match. + + :ivar package_name: The name of the package that triggered the hash + mismatch. Feel free to write to this after the exception is raise to + improve its error message. + + """ + order = 4 + head = ('THESE PACKAGES DO NOT MATCH THE HASHES FROM THE REQUIREMENTS ' + 'FILE. If you have updated the package versions, please update ' + 'the hashes. Otherwise, examine the package contents carefully; ' + 'someone may have tampered with them.') + + def __init__(self, allowed, gots): + """ + :param allowed: A dict of algorithm names pointing to lists of allowed + hex digests + :param gots: A dict of algorithm names pointing to hashes we + actually got from the files under suspicion + """ + self.allowed = allowed + self.gots = gots + + def body(self): + return ' %s:\n%s' % (self._requirement_name(), + self._hash_comparison()) + + def _hash_comparison(self): + """ + Return a comparison of actual and expected hash values. + + Example:: + + Expected sha256 abcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcde + or 123451234512345123451234512345123451234512345 + Got bcdefbcdefbcdefbcdefbcdefbcdefbcdefbcdefbcdef + + """ + def hash_then_or(hash_name): + # For now, all the decent hashes have 6-char names, so we can get + # away with hard-coding space literals. + return chain([hash_name], repeat(' or')) + + lines = [] + for hash_name, expecteds in iteritems(self.allowed): + prefix = hash_then_or(hash_name) + lines.extend((' Expected %s %s' % (next(prefix), e)) + for e in expecteds) + lines.append(' Got %s\n' % + self.gots[hash_name].hexdigest()) + return '\n'.join(lines) + + +class UnsupportedPythonVersion(InstallationError): + """Unsupported python version according to Requires-Python package + metadata.""" + + +class ConfigurationFileCouldNotBeLoaded(ConfigurationError): + """When there are errors while loading a configuration file + """ + + def __init__(self, reason="could not be loaded", fname=None, error=None): + super(ConfigurationFileCouldNotBeLoaded, self).__init__(error) + self.reason = reason + self.fname = fname + self.error = error + + def __str__(self): + if self.fname is not None: + message_part = " in {}.".format(self.fname) + else: + assert self.error is not None + message_part = ".\n{}\n".format(self.error.message) + return "Configuration file {}{}".format(self.reason, message_part) diff --git a/venv/lib/python3.7/site-packages/pip/_internal/index/__init__.py b/venv.bak/lib/python3.7/site-packages/pip/_internal/index/__init__.py similarity index 100% rename from venv/lib/python3.7/site-packages/pip/_internal/index/__init__.py rename to venv.bak/lib/python3.7/site-packages/pip/_internal/index/__init__.py diff --git a/venv/lib/python3.7/site-packages/pip/_internal/index/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_internal/index/__pycache__/__init__.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/pip/_internal/index/__pycache__/__init__.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/pip/_internal/index/__pycache__/__init__.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/pip/_internal/index/__pycache__/collector.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_internal/index/__pycache__/collector.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/pip/_internal/index/__pycache__/collector.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/pip/_internal/index/__pycache__/collector.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/pip/_internal/index/__pycache__/package_finder.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_internal/index/__pycache__/package_finder.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/pip/_internal/index/__pycache__/package_finder.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/pip/_internal/index/__pycache__/package_finder.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/pip/_internal/index/collector.py b/venv.bak/lib/python3.7/site-packages/pip/_internal/index/collector.py similarity index 100% rename from venv/lib/python3.7/site-packages/pip/_internal/index/collector.py rename to venv.bak/lib/python3.7/site-packages/pip/_internal/index/collector.py diff --git a/venv/lib/python3.7/site-packages/pip/_internal/index/package_finder.py b/venv.bak/lib/python3.7/site-packages/pip/_internal/index/package_finder.py similarity index 100% rename from venv/lib/python3.7/site-packages/pip/_internal/index/package_finder.py rename to venv.bak/lib/python3.7/site-packages/pip/_internal/index/package_finder.py diff --git a/venv/lib/python3.7/site-packages/pip/_internal/legacy_resolve.py b/venv.bak/lib/python3.7/site-packages/pip/_internal/legacy_resolve.py similarity index 100% rename from venv/lib/python3.7/site-packages/pip/_internal/legacy_resolve.py rename to venv.bak/lib/python3.7/site-packages/pip/_internal/legacy_resolve.py diff --git a/venv.bak/lib/python3.7/site-packages/pip/_internal/locations.py b/venv.bak/lib/python3.7/site-packages/pip/_internal/locations.py new file mode 100644 index 0000000..0c11553 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_internal/locations.py @@ -0,0 +1,194 @@ +"""Locations where we look for configs, install stuff, etc""" + +# The following comment should be removed at some point in the future. +# mypy: strict-optional=False + +from __future__ import absolute_import + +import os +import os.path +import platform +import site +import sys +import sysconfig +from distutils import sysconfig as distutils_sysconfig +from distutils.command.install import SCHEME_KEYS # type: ignore +from distutils.command.install import install as distutils_install_command + +from pip._internal.models.scheme import Scheme +from pip._internal.utils import appdirs +from pip._internal.utils.compat import WINDOWS +from pip._internal.utils.typing import MYPY_CHECK_RUNNING, cast +from pip._internal.utils.virtualenv import running_under_virtualenv + +if MYPY_CHECK_RUNNING: + from typing import Dict, List, Optional, Union + + from distutils.cmd import Command as DistutilsCommand + + +# Application Directories +USER_CACHE_DIR = appdirs.user_cache_dir("pip") + + +def get_major_minor_version(): + # type: () -> str + """ + Return the major-minor version of the current Python as a string, e.g. + "3.7" or "3.10". + """ + return '{}.{}'.format(*sys.version_info) + + +def get_src_prefix(): + # type: () -> str + if running_under_virtualenv(): + src_prefix = os.path.join(sys.prefix, 'src') + else: + # FIXME: keep src in cwd for now (it is not a temporary folder) + try: + src_prefix = os.path.join(os.getcwd(), 'src') + except OSError: + # In case the current working directory has been renamed or deleted + sys.exit( + "The folder you are executing pip from can no longer be found." + ) + + # under macOS + virtualenv sys.prefix is not properly resolved + # it is something like /path/to/python/bin/.. + return os.path.abspath(src_prefix) + + +# FIXME doesn't account for venv linked to global site-packages + +site_packages = sysconfig.get_path("purelib") # type: Optional[str] + +# This is because of a bug in PyPy's sysconfig module, see +# https://bitbucket.org/pypy/pypy/issues/2506/sysconfig-returns-incorrect-paths +# for more information. +if platform.python_implementation().lower() == "pypy": + site_packages = distutils_sysconfig.get_python_lib() +try: + # Use getusersitepackages if this is present, as it ensures that the + # value is initialised properly. + user_site = site.getusersitepackages() +except AttributeError: + user_site = site.USER_SITE + +if WINDOWS: + bin_py = os.path.join(sys.prefix, 'Scripts') + bin_user = os.path.join(user_site, 'Scripts') + # buildout uses 'bin' on Windows too? + if not os.path.exists(bin_py): + bin_py = os.path.join(sys.prefix, 'bin') + bin_user = os.path.join(user_site, 'bin') +else: + bin_py = os.path.join(sys.prefix, 'bin') + bin_user = os.path.join(user_site, 'bin') + + # Forcing to use /usr/local/bin for standard macOS framework installs + # Also log to ~/Library/Logs/ for use with the Console.app log viewer + if sys.platform[:6] == 'darwin' and sys.prefix[:16] == '/System/Library/': + bin_py = '/usr/local/bin' + + +def distutils_scheme( + dist_name, user=False, home=None, root=None, isolated=False, prefix=None +): + # type:(str, bool, str, str, bool, str) -> Dict[str, str] + """ + Return a distutils install scheme + """ + from distutils.dist import Distribution + + dist_args = {'name': dist_name} # type: Dict[str, Union[str, List[str]]] + if isolated: + dist_args["script_args"] = ["--no-user-cfg"] + + d = Distribution(dist_args) + d.parse_config_files() + obj = None # type: Optional[DistutilsCommand] + obj = d.get_command_obj('install', create=True) + assert obj is not None + i = cast(distutils_install_command, obj) + # NOTE: setting user or home has the side-effect of creating the home dir + # or user base for installations during finalize_options() + # ideally, we'd prefer a scheme class that has no side-effects. + assert not (user and prefix), "user={} prefix={}".format(user, prefix) + assert not (home and prefix), "home={} prefix={}".format(home, prefix) + i.user = user or i.user + if user or home: + i.prefix = "" + i.prefix = prefix or i.prefix + i.home = home or i.home + i.root = root or i.root + i.finalize_options() + + scheme = {} + for key in SCHEME_KEYS: + scheme[key] = getattr(i, 'install_' + key) + + # install_lib specified in setup.cfg should install *everything* + # into there (i.e. it takes precedence over both purelib and + # platlib). Note, i.install_lib is *always* set after + # finalize_options(); we only want to override here if the user + # has explicitly requested it hence going back to the config + if 'install_lib' in d.get_option_dict('install'): + scheme.update(dict(purelib=i.install_lib, platlib=i.install_lib)) + + if running_under_virtualenv(): + scheme['headers'] = os.path.join( + sys.prefix, + 'include', + 'site', + 'python{}'.format(get_major_minor_version()), + dist_name, + ) + + if root is not None: + path_no_drive = os.path.splitdrive( + os.path.abspath(scheme["headers"]))[1] + scheme["headers"] = os.path.join( + root, + path_no_drive[1:], + ) + + return scheme + + +def get_scheme( + dist_name, # type: str + user=False, # type: bool + home=None, # type: Optional[str] + root=None, # type: Optional[str] + isolated=False, # type: bool + prefix=None, # type: Optional[str] +): + # type: (...) -> Scheme + """ + Get the "scheme" corresponding to the input parameters. The distutils + documentation provides the context for the available schemes: + https://docs.python.org/3/install/index.html#alternate-installation + + :param dist_name: the name of the package to retrieve the scheme for, used + in the headers scheme path + :param user: indicates to use the "user" scheme + :param home: indicates to use the "home" scheme and provides the base + directory for the same + :param root: root under which other directories are re-based + :param isolated: equivalent to --no-user-cfg, i.e. do not consider + ~/.pydistutils.cfg (posix) or ~/pydistutils.cfg (non-posix) for + scheme paths + :param prefix: indicates to use the "prefix" scheme and provides the + base directory for the same + """ + scheme = distutils_scheme( + dist_name, user, home, root, isolated, prefix + ) + return Scheme( + platlib=scheme["platlib"], + purelib=scheme["purelib"], + headers=scheme["headers"], + scripts=scheme["scripts"], + data=scheme["data"], + ) diff --git a/venv/lib/python3.7/site-packages/pip/_internal/main.py b/venv.bak/lib/python3.7/site-packages/pip/_internal/main.py similarity index 100% rename from venv/lib/python3.7/site-packages/pip/_internal/main.py rename to venv.bak/lib/python3.7/site-packages/pip/_internal/main.py diff --git a/venv.bak/lib/python3.7/site-packages/pip/_internal/models/__init__.py b/venv.bak/lib/python3.7/site-packages/pip/_internal/models/__init__.py new file mode 100644 index 0000000..7855226 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_internal/models/__init__.py @@ -0,0 +1,2 @@ +"""A package that contains models that represent entities. +""" diff --git a/venv.bak/lib/python3.7/site-packages/pip/_internal/models/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_internal/models/__pycache__/__init__.cpython-37.pyc new file mode 100644 index 0000000..a752d61 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_internal/models/__pycache__/__init__.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_internal/models/__pycache__/candidate.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_internal/models/__pycache__/candidate.cpython-37.pyc new file mode 100644 index 0000000..82084e8 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_internal/models/__pycache__/candidate.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_internal/models/__pycache__/format_control.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_internal/models/__pycache__/format_control.cpython-37.pyc new file mode 100644 index 0000000..907faf1 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_internal/models/__pycache__/format_control.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_internal/models/__pycache__/index.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_internal/models/__pycache__/index.cpython-37.pyc new file mode 100644 index 0000000..df5e5fd Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_internal/models/__pycache__/index.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_internal/models/__pycache__/link.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_internal/models/__pycache__/link.cpython-37.pyc new file mode 100644 index 0000000..2f86997 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_internal/models/__pycache__/link.cpython-37.pyc differ diff --git a/venv/lib/python3.7/site-packages/pip/_internal/models/__pycache__/scheme.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_internal/models/__pycache__/scheme.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/pip/_internal/models/__pycache__/scheme.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/pip/_internal/models/__pycache__/scheme.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/pip/_internal/models/__pycache__/search_scope.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_internal/models/__pycache__/search_scope.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/pip/_internal/models/__pycache__/search_scope.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/pip/_internal/models/__pycache__/search_scope.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/pip/_internal/models/__pycache__/selection_prefs.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_internal/models/__pycache__/selection_prefs.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/pip/_internal/models/__pycache__/selection_prefs.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/pip/_internal/models/__pycache__/selection_prefs.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/pip/_internal/models/__pycache__/target_python.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_internal/models/__pycache__/target_python.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/pip/_internal/models/__pycache__/target_python.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/pip/_internal/models/__pycache__/target_python.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/pip/_internal/models/__pycache__/wheel.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_internal/models/__pycache__/wheel.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/pip/_internal/models/__pycache__/wheel.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/pip/_internal/models/__pycache__/wheel.cpython-37.pyc diff --git a/venv.bak/lib/python3.7/site-packages/pip/_internal/models/candidate.py b/venv.bak/lib/python3.7/site-packages/pip/_internal/models/candidate.py new file mode 100644 index 0000000..1dc1a57 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_internal/models/candidate.py @@ -0,0 +1,36 @@ +from pip._vendor.packaging.version import parse as parse_version + +from pip._internal.utils.models import KeyBasedCompareMixin +from pip._internal.utils.typing import MYPY_CHECK_RUNNING + +if MYPY_CHECK_RUNNING: + from pip._vendor.packaging.version import _BaseVersion + from pip._internal.models.link import Link + + +class InstallationCandidate(KeyBasedCompareMixin): + """Represents a potential "candidate" for installation. + """ + + def __init__(self, name, version, link): + # type: (str, str, Link) -> None + self.name = name + self.version = parse_version(version) # type: _BaseVersion + self.link = link + + super(InstallationCandidate, self).__init__( + key=(self.name, self.version, self.link), + defining_class=InstallationCandidate + ) + + def __repr__(self): + # type: () -> str + return "".format( + self.name, self.version, self.link, + ) + + def __str__(self): + # type: () -> str + return '{!r} candidate (version {} at {})'.format( + self.name, self.version, self.link, + ) diff --git a/venv.bak/lib/python3.7/site-packages/pip/_internal/models/format_control.py b/venv.bak/lib/python3.7/site-packages/pip/_internal/models/format_control.py new file mode 100644 index 0000000..2e13727 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_internal/models/format_control.py @@ -0,0 +1,84 @@ +# The following comment should be removed at some point in the future. +# mypy: strict-optional=False + +from pip._vendor.packaging.utils import canonicalize_name + +from pip._internal.exceptions import CommandError +from pip._internal.utils.typing import MYPY_CHECK_RUNNING + +if MYPY_CHECK_RUNNING: + from typing import Optional, Set, FrozenSet + + +class FormatControl(object): + """Helper for managing formats from which a package can be installed. + """ + + def __init__(self, no_binary=None, only_binary=None): + # type: (Optional[Set[str]], Optional[Set[str]]) -> None + if no_binary is None: + no_binary = set() + if only_binary is None: + only_binary = set() + + self.no_binary = no_binary + self.only_binary = only_binary + + def __eq__(self, other): + # type: (object) -> bool + return self.__dict__ == other.__dict__ + + def __ne__(self, other): + # type: (object) -> bool + return not self.__eq__(other) + + def __repr__(self): + # type: () -> str + return "{}({}, {})".format( + self.__class__.__name__, + self.no_binary, + self.only_binary + ) + + @staticmethod + def handle_mutual_excludes(value, target, other): + # type: (str, Optional[Set[str]], Optional[Set[str]]) -> None + if value.startswith('-'): + raise CommandError( + "--no-binary / --only-binary option requires 1 argument." + ) + new = value.split(',') + while ':all:' in new: + other.clear() + target.clear() + target.add(':all:') + del new[:new.index(':all:') + 1] + # Without a none, we want to discard everything as :all: covers it + if ':none:' not in new: + return + for name in new: + if name == ':none:': + target.clear() + continue + name = canonicalize_name(name) + other.discard(name) + target.add(name) + + def get_allowed_formats(self, canonical_name): + # type: (str) -> FrozenSet[str] + result = {"binary", "source"} + if canonical_name in self.only_binary: + result.discard('source') + elif canonical_name in self.no_binary: + result.discard('binary') + elif ':all:' in self.only_binary: + result.discard('source') + elif ':all:' in self.no_binary: + result.discard('binary') + return frozenset(result) + + def disallow_binaries(self): + # type: () -> None + self.handle_mutual_excludes( + ':all:', self.no_binary, self.only_binary, + ) diff --git a/venv.bak/lib/python3.7/site-packages/pip/_internal/models/index.py b/venv.bak/lib/python3.7/site-packages/pip/_internal/models/index.py new file mode 100644 index 0000000..ead1efb --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_internal/models/index.py @@ -0,0 +1,31 @@ +from pip._vendor.six.moves.urllib import parse as urllib_parse + + +class PackageIndex(object): + """Represents a Package Index and provides easier access to endpoints + """ + + def __init__(self, url, file_storage_domain): + # type: (str, str) -> None + super(PackageIndex, self).__init__() + self.url = url + self.netloc = urllib_parse.urlsplit(url).netloc + self.simple_url = self._url_for_path('simple') + self.pypi_url = self._url_for_path('pypi') + + # This is part of a temporary hack used to block installs of PyPI + # packages which depend on external urls only necessary until PyPI can + # block such packages themselves + self.file_storage_domain = file_storage_domain + + def _url_for_path(self, path): + # type: (str) -> str + return urllib_parse.urljoin(self.url, path) + + +PyPI = PackageIndex( + 'https://pypi.org/', file_storage_domain='files.pythonhosted.org' +) +TestPyPI = PackageIndex( + 'https://test.pypi.org/', file_storage_domain='test-files.pythonhosted.org' +) diff --git a/venv.bak/lib/python3.7/site-packages/pip/_internal/models/link.py b/venv.bak/lib/python3.7/site-packages/pip/_internal/models/link.py new file mode 100644 index 0000000..34fbcbf --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_internal/models/link.py @@ -0,0 +1,227 @@ +import os +import posixpath +import re + +from pip._vendor.six.moves.urllib import parse as urllib_parse + +from pip._internal.utils.filetypes import WHEEL_EXTENSION +from pip._internal.utils.misc import ( + redact_auth_from_url, + split_auth_from_netloc, + splitext, +) +from pip._internal.utils.models import KeyBasedCompareMixin +from pip._internal.utils.typing import MYPY_CHECK_RUNNING +from pip._internal.utils.urls import path_to_url, url_to_path + +if MYPY_CHECK_RUNNING: + from typing import Optional, Text, Tuple, Union + from pip._internal.index.collector import HTMLPage + from pip._internal.utils.hashes import Hashes + + +class Link(KeyBasedCompareMixin): + """Represents a parsed link from a Package Index's simple URL + """ + + def __init__( + self, + url, # type: str + comes_from=None, # type: Optional[Union[str, HTMLPage]] + requires_python=None, # type: Optional[str] + yanked_reason=None, # type: Optional[Text] + ): + # type: (...) -> None + """ + :param url: url of the resource pointed to (href of the link) + :param comes_from: instance of HTMLPage where the link was found, + or string. + :param requires_python: String containing the `Requires-Python` + metadata field, specified in PEP 345. This may be specified by + a data-requires-python attribute in the HTML link tag, as + described in PEP 503. + :param yanked_reason: the reason the file has been yanked, if the + file has been yanked, or None if the file hasn't been yanked. + This is the value of the "data-yanked" attribute, if present, in + a simple repository HTML link. If the file has been yanked but + no reason was provided, this should be the empty string. See + PEP 592 for more information and the specification. + """ + + # url can be a UNC windows share + if url.startswith('\\\\'): + url = path_to_url(url) + + self._parsed_url = urllib_parse.urlsplit(url) + # Store the url as a private attribute to prevent accidentally + # trying to set a new value. + self._url = url + + self.comes_from = comes_from + self.requires_python = requires_python if requires_python else None + self.yanked_reason = yanked_reason + + super(Link, self).__init__(key=url, defining_class=Link) + + def __str__(self): + # type: () -> str + if self.requires_python: + rp = ' (requires-python:%s)' % self.requires_python + else: + rp = '' + if self.comes_from: + return '%s (from %s)%s' % (redact_auth_from_url(self._url), + self.comes_from, rp) + else: + return redact_auth_from_url(str(self._url)) + + def __repr__(self): + # type: () -> str + return '' % self + + @property + def url(self): + # type: () -> str + return self._url + + @property + def filename(self): + # type: () -> str + path = self.path.rstrip('/') + name = posixpath.basename(path) + if not name: + # Make sure we don't leak auth information if the netloc + # includes a username and password. + netloc, user_pass = split_auth_from_netloc(self.netloc) + return netloc + + name = urllib_parse.unquote(name) + assert name, ('URL %r produced no filename' % self._url) + return name + + @property + def file_path(self): + # type: () -> str + return url_to_path(self.url) + + @property + def scheme(self): + # type: () -> str + return self._parsed_url.scheme + + @property + def netloc(self): + # type: () -> str + """ + This can contain auth information. + """ + return self._parsed_url.netloc + + @property + def path(self): + # type: () -> str + return urllib_parse.unquote(self._parsed_url.path) + + def splitext(self): + # type: () -> Tuple[str, str] + return splitext(posixpath.basename(self.path.rstrip('/'))) + + @property + def ext(self): + # type: () -> str + return self.splitext()[1] + + @property + def url_without_fragment(self): + # type: () -> str + scheme, netloc, path, query, fragment = self._parsed_url + return urllib_parse.urlunsplit((scheme, netloc, path, query, None)) + + _egg_fragment_re = re.compile(r'[#&]egg=([^&]*)') + + @property + def egg_fragment(self): + # type: () -> Optional[str] + match = self._egg_fragment_re.search(self._url) + if not match: + return None + return match.group(1) + + _subdirectory_fragment_re = re.compile(r'[#&]subdirectory=([^&]*)') + + @property + def subdirectory_fragment(self): + # type: () -> Optional[str] + match = self._subdirectory_fragment_re.search(self._url) + if not match: + return None + return match.group(1) + + _hash_re = re.compile( + r'(sha1|sha224|sha384|sha256|sha512|md5)=([a-f0-9]+)' + ) + + @property + def hash(self): + # type: () -> Optional[str] + match = self._hash_re.search(self._url) + if match: + return match.group(2) + return None + + @property + def hash_name(self): + # type: () -> Optional[str] + match = self._hash_re.search(self._url) + if match: + return match.group(1) + return None + + @property + def show_url(self): + # type: () -> str + return posixpath.basename(self._url.split('#', 1)[0].split('?', 1)[0]) + + @property + def is_file(self): + # type: () -> bool + return self.scheme == 'file' + + def is_existing_dir(self): + # type: () -> bool + return self.is_file and os.path.isdir(self.file_path) + + @property + def is_wheel(self): + # type: () -> bool + return self.ext == WHEEL_EXTENSION + + @property + def is_vcs(self): + # type: () -> bool + from pip._internal.vcs import vcs + + return self.scheme in vcs.all_schemes + + @property + def is_yanked(self): + # type: () -> bool + return self.yanked_reason is not None + + @property + def has_hash(self): + # type: () -> bool + return self.hash_name is not None + + def is_hash_allowed(self, hashes): + # type: (Optional[Hashes]) -> bool + """ + Return True if the link has a hash and it is allowed. + """ + if hashes is None or not self.has_hash: + return False + # Assert non-None so mypy knows self.hash_name and self.hash are str. + assert self.hash_name is not None + assert self.hash is not None + + return hashes.is_hash_allowed(self.hash_name, hex_digest=self.hash) diff --git a/venv/lib/python3.7/site-packages/pip/_internal/models/scheme.py b/venv.bak/lib/python3.7/site-packages/pip/_internal/models/scheme.py similarity index 100% rename from venv/lib/python3.7/site-packages/pip/_internal/models/scheme.py rename to venv.bak/lib/python3.7/site-packages/pip/_internal/models/scheme.py diff --git a/venv/lib/python3.7/site-packages/pip/_internal/models/search_scope.py b/venv.bak/lib/python3.7/site-packages/pip/_internal/models/search_scope.py similarity index 100% rename from venv/lib/python3.7/site-packages/pip/_internal/models/search_scope.py rename to venv.bak/lib/python3.7/site-packages/pip/_internal/models/search_scope.py diff --git a/venv/lib/python3.7/site-packages/pip/_internal/models/selection_prefs.py b/venv.bak/lib/python3.7/site-packages/pip/_internal/models/selection_prefs.py similarity index 100% rename from venv/lib/python3.7/site-packages/pip/_internal/models/selection_prefs.py rename to venv.bak/lib/python3.7/site-packages/pip/_internal/models/selection_prefs.py diff --git a/venv/lib/python3.7/site-packages/pip/_internal/models/target_python.py b/venv.bak/lib/python3.7/site-packages/pip/_internal/models/target_python.py similarity index 100% rename from venv/lib/python3.7/site-packages/pip/_internal/models/target_python.py rename to venv.bak/lib/python3.7/site-packages/pip/_internal/models/target_python.py diff --git a/venv/lib/python3.7/site-packages/pip/_internal/models/wheel.py b/venv.bak/lib/python3.7/site-packages/pip/_internal/models/wheel.py similarity index 100% rename from venv/lib/python3.7/site-packages/pip/_internal/models/wheel.py rename to venv.bak/lib/python3.7/site-packages/pip/_internal/models/wheel.py diff --git a/venv/lib/python3.7/site-packages/pip/_internal/network/__init__.py b/venv.bak/lib/python3.7/site-packages/pip/_internal/network/__init__.py similarity index 100% rename from venv/lib/python3.7/site-packages/pip/_internal/network/__init__.py rename to venv.bak/lib/python3.7/site-packages/pip/_internal/network/__init__.py diff --git a/venv/lib/python3.7/site-packages/pip/_internal/network/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_internal/network/__pycache__/__init__.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/pip/_internal/network/__pycache__/__init__.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/pip/_internal/network/__pycache__/__init__.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/pip/_internal/network/__pycache__/auth.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_internal/network/__pycache__/auth.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/pip/_internal/network/__pycache__/auth.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/pip/_internal/network/__pycache__/auth.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/pip/_internal/network/__pycache__/cache.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_internal/network/__pycache__/cache.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/pip/_internal/network/__pycache__/cache.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/pip/_internal/network/__pycache__/cache.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/pip/_internal/network/__pycache__/download.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_internal/network/__pycache__/download.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/pip/_internal/network/__pycache__/download.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/pip/_internal/network/__pycache__/download.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/pip/_internal/network/__pycache__/session.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_internal/network/__pycache__/session.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/pip/_internal/network/__pycache__/session.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/pip/_internal/network/__pycache__/session.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/pip/_internal/network/__pycache__/utils.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_internal/network/__pycache__/utils.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/pip/_internal/network/__pycache__/utils.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/pip/_internal/network/__pycache__/utils.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/pip/_internal/network/__pycache__/xmlrpc.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_internal/network/__pycache__/xmlrpc.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/pip/_internal/network/__pycache__/xmlrpc.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/pip/_internal/network/__pycache__/xmlrpc.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/pip/_internal/network/auth.py b/venv.bak/lib/python3.7/site-packages/pip/_internal/network/auth.py similarity index 100% rename from venv/lib/python3.7/site-packages/pip/_internal/network/auth.py rename to venv.bak/lib/python3.7/site-packages/pip/_internal/network/auth.py diff --git a/venv/lib/python3.7/site-packages/pip/_internal/network/cache.py b/venv.bak/lib/python3.7/site-packages/pip/_internal/network/cache.py similarity index 100% rename from venv/lib/python3.7/site-packages/pip/_internal/network/cache.py rename to venv.bak/lib/python3.7/site-packages/pip/_internal/network/cache.py diff --git a/venv/lib/python3.7/site-packages/pip/_internal/network/download.py b/venv.bak/lib/python3.7/site-packages/pip/_internal/network/download.py similarity index 100% rename from venv/lib/python3.7/site-packages/pip/_internal/network/download.py rename to venv.bak/lib/python3.7/site-packages/pip/_internal/network/download.py diff --git a/venv/lib/python3.7/site-packages/pip/_internal/network/session.py b/venv.bak/lib/python3.7/site-packages/pip/_internal/network/session.py similarity index 100% rename from venv/lib/python3.7/site-packages/pip/_internal/network/session.py rename to venv.bak/lib/python3.7/site-packages/pip/_internal/network/session.py diff --git a/venv/lib/python3.7/site-packages/pip/_internal/network/utils.py b/venv.bak/lib/python3.7/site-packages/pip/_internal/network/utils.py similarity index 100% rename from venv/lib/python3.7/site-packages/pip/_internal/network/utils.py rename to venv.bak/lib/python3.7/site-packages/pip/_internal/network/utils.py diff --git a/venv/lib/python3.7/site-packages/pip/_internal/network/xmlrpc.py b/venv.bak/lib/python3.7/site-packages/pip/_internal/network/xmlrpc.py similarity index 100% rename from venv/lib/python3.7/site-packages/pip/_internal/network/xmlrpc.py rename to venv.bak/lib/python3.7/site-packages/pip/_internal/network/xmlrpc.py diff --git a/venv.bak/lib/python3.7/site-packages/pip/_internal/operations/__init__.py b/venv.bak/lib/python3.7/site-packages/pip/_internal/operations/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/venv.bak/lib/python3.7/site-packages/pip/_internal/operations/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_internal/operations/__pycache__/__init__.cpython-37.pyc new file mode 100644 index 0000000..e130187 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_internal/operations/__pycache__/__init__.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_internal/operations/__pycache__/check.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_internal/operations/__pycache__/check.cpython-37.pyc new file mode 100644 index 0000000..e835628 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_internal/operations/__pycache__/check.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_internal/operations/__pycache__/freeze.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_internal/operations/__pycache__/freeze.cpython-37.pyc new file mode 100644 index 0000000..ceb1907 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_internal/operations/__pycache__/freeze.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_internal/operations/__pycache__/prepare.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_internal/operations/__pycache__/prepare.cpython-37.pyc new file mode 100644 index 0000000..10cc3e4 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_internal/operations/__pycache__/prepare.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_internal/operations/build/__init__.py b/venv.bak/lib/python3.7/site-packages/pip/_internal/operations/build/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/venv/lib/python3.7/site-packages/pip/_internal/operations/build/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_internal/operations/build/__pycache__/__init__.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/pip/_internal/operations/build/__pycache__/__init__.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/pip/_internal/operations/build/__pycache__/__init__.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/pip/_internal/operations/build/__pycache__/metadata.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_internal/operations/build/__pycache__/metadata.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/pip/_internal/operations/build/__pycache__/metadata.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/pip/_internal/operations/build/__pycache__/metadata.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/pip/_internal/operations/build/__pycache__/metadata_legacy.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_internal/operations/build/__pycache__/metadata_legacy.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/pip/_internal/operations/build/__pycache__/metadata_legacy.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/pip/_internal/operations/build/__pycache__/metadata_legacy.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/pip/_internal/operations/build/__pycache__/wheel.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_internal/operations/build/__pycache__/wheel.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/pip/_internal/operations/build/__pycache__/wheel.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/pip/_internal/operations/build/__pycache__/wheel.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/pip/_internal/operations/build/__pycache__/wheel_legacy.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_internal/operations/build/__pycache__/wheel_legacy.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/pip/_internal/operations/build/__pycache__/wheel_legacy.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/pip/_internal/operations/build/__pycache__/wheel_legacy.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/pip/_internal/operations/build/metadata.py b/venv.bak/lib/python3.7/site-packages/pip/_internal/operations/build/metadata.py similarity index 100% rename from venv/lib/python3.7/site-packages/pip/_internal/operations/build/metadata.py rename to venv.bak/lib/python3.7/site-packages/pip/_internal/operations/build/metadata.py diff --git a/venv/lib/python3.7/site-packages/pip/_internal/operations/build/metadata_legacy.py b/venv.bak/lib/python3.7/site-packages/pip/_internal/operations/build/metadata_legacy.py similarity index 100% rename from venv/lib/python3.7/site-packages/pip/_internal/operations/build/metadata_legacy.py rename to venv.bak/lib/python3.7/site-packages/pip/_internal/operations/build/metadata_legacy.py diff --git a/venv/lib/python3.7/site-packages/pip/_internal/operations/build/wheel.py b/venv.bak/lib/python3.7/site-packages/pip/_internal/operations/build/wheel.py similarity index 100% rename from venv/lib/python3.7/site-packages/pip/_internal/operations/build/wheel.py rename to venv.bak/lib/python3.7/site-packages/pip/_internal/operations/build/wheel.py diff --git a/venv/lib/python3.7/site-packages/pip/_internal/operations/build/wheel_legacy.py b/venv.bak/lib/python3.7/site-packages/pip/_internal/operations/build/wheel_legacy.py similarity index 100% rename from venv/lib/python3.7/site-packages/pip/_internal/operations/build/wheel_legacy.py rename to venv.bak/lib/python3.7/site-packages/pip/_internal/operations/build/wheel_legacy.py diff --git a/venv.bak/lib/python3.7/site-packages/pip/_internal/operations/check.py b/venv.bak/lib/python3.7/site-packages/pip/_internal/operations/check.py new file mode 100644 index 0000000..b85a123 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_internal/operations/check.py @@ -0,0 +1,163 @@ +"""Validation of dependencies of packages +""" + +# The following comment should be removed at some point in the future. +# mypy: strict-optional=False +# mypy: disallow-untyped-defs=False + +import logging +from collections import namedtuple + +from pip._vendor.packaging.utils import canonicalize_name +from pip._vendor.pkg_resources import RequirementParseError + +from pip._internal.distributions import ( + make_distribution_for_install_requirement, +) +from pip._internal.utils.misc import get_installed_distributions +from pip._internal.utils.typing import MYPY_CHECK_RUNNING + +logger = logging.getLogger(__name__) + +if MYPY_CHECK_RUNNING: + from pip._internal.req.req_install import InstallRequirement + from typing import ( + Any, Callable, Dict, Optional, Set, Tuple, List + ) + + # Shorthands + PackageSet = Dict[str, 'PackageDetails'] + Missing = Tuple[str, Any] + Conflicting = Tuple[str, str, Any] + + MissingDict = Dict[str, List[Missing]] + ConflictingDict = Dict[str, List[Conflicting]] + CheckResult = Tuple[MissingDict, ConflictingDict] + +PackageDetails = namedtuple('PackageDetails', ['version', 'requires']) + + +def create_package_set_from_installed(**kwargs): + # type: (**Any) -> Tuple[PackageSet, bool] + """Converts a list of distributions into a PackageSet. + """ + # Default to using all packages installed on the system + if kwargs == {}: + kwargs = {"local_only": False, "skip": ()} + + package_set = {} + problems = False + for dist in get_installed_distributions(**kwargs): + name = canonicalize_name(dist.project_name) + try: + package_set[name] = PackageDetails(dist.version, dist.requires()) + except RequirementParseError as e: + # Don't crash on broken metadata + logger.warning("Error parsing requirements for %s: %s", name, e) + problems = True + return package_set, problems + + +def check_package_set(package_set, should_ignore=None): + # type: (PackageSet, Optional[Callable[[str], bool]]) -> CheckResult + """Check if a package set is consistent + + If should_ignore is passed, it should be a callable that takes a + package name and returns a boolean. + """ + if should_ignore is None: + def should_ignore(name): + return False + + missing = {} + conflicting = {} + + for package_name in package_set: + # Info about dependencies of package_name + missing_deps = set() # type: Set[Missing] + conflicting_deps = set() # type: Set[Conflicting] + + if should_ignore(package_name): + continue + + for req in package_set[package_name].requires: + name = canonicalize_name(req.project_name) # type: str + + # Check if it's missing + if name not in package_set: + missed = True + if req.marker is not None: + missed = req.marker.evaluate() + if missed: + missing_deps.add((name, req)) + continue + + # Check if there's a conflict + version = package_set[name].version # type: str + if not req.specifier.contains(version, prereleases=True): + conflicting_deps.add((name, version, req)) + + if missing_deps: + missing[package_name] = sorted(missing_deps, key=str) + if conflicting_deps: + conflicting[package_name] = sorted(conflicting_deps, key=str) + + return missing, conflicting + + +def check_install_conflicts(to_install): + # type: (List[InstallRequirement]) -> Tuple[PackageSet, CheckResult] + """For checking if the dependency graph would be consistent after \ + installing given requirements + """ + # Start from the current state + package_set, _ = create_package_set_from_installed() + # Install packages + would_be_installed = _simulate_installation_of(to_install, package_set) + + # Only warn about directly-dependent packages; create a whitelist of them + whitelist = _create_whitelist(would_be_installed, package_set) + + return ( + package_set, + check_package_set( + package_set, should_ignore=lambda name: name not in whitelist + ) + ) + + +def _simulate_installation_of(to_install, package_set): + # type: (List[InstallRequirement], PackageSet) -> Set[str] + """Computes the version of packages after installing to_install. + """ + + # Keep track of packages that were installed + installed = set() + + # Modify it as installing requirement_set would (assuming no errors) + for inst_req in to_install: + abstract_dist = make_distribution_for_install_requirement(inst_req) + dist = abstract_dist.get_pkg_resources_distribution() + + name = canonicalize_name(dist.key) + package_set[name] = PackageDetails(dist.version, dist.requires()) + + installed.add(name) + + return installed + + +def _create_whitelist(would_be_installed, package_set): + # type: (Set[str], PackageSet) -> Set[str] + packages_affected = set(would_be_installed) + + for package_name in package_set: + if package_name in packages_affected: + continue + + for req in package_set[package_name].requires: + if canonicalize_name(req.name) in packages_affected: + packages_affected.add(package_name) + break + + return packages_affected diff --git a/venv.bak/lib/python3.7/site-packages/pip/_internal/operations/freeze.py b/venv.bak/lib/python3.7/site-packages/pip/_internal/operations/freeze.py new file mode 100644 index 0000000..36a5c33 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_internal/operations/freeze.py @@ -0,0 +1,265 @@ +# The following comment should be removed at some point in the future. +# mypy: strict-optional=False +# mypy: disallow-untyped-defs=False + +from __future__ import absolute_import + +import collections +import logging +import os +import re + +from pip._vendor import six +from pip._vendor.packaging.utils import canonicalize_name +from pip._vendor.pkg_resources import RequirementParseError + +from pip._internal.exceptions import BadCommand, InstallationError +from pip._internal.req.constructors import ( + install_req_from_editable, + install_req_from_line, +) +from pip._internal.req.req_file import COMMENT_RE +from pip._internal.utils.misc import ( + dist_is_editable, + get_installed_distributions, +) +from pip._internal.utils.typing import MYPY_CHECK_RUNNING + +if MYPY_CHECK_RUNNING: + from typing import ( + Iterator, Optional, List, Container, Set, Dict, Tuple, Iterable, Union + ) + from pip._internal.cache import WheelCache + from pip._vendor.pkg_resources import ( + Distribution, Requirement + ) + + RequirementInfo = Tuple[Optional[Union[str, Requirement]], bool, List[str]] + + +logger = logging.getLogger(__name__) + + +def freeze( + requirement=None, # type: Optional[List[str]] + find_links=None, # type: Optional[List[str]] + local_only=None, # type: Optional[bool] + user_only=None, # type: Optional[bool] + paths=None, # type: Optional[List[str]] + skip_regex=None, # type: Optional[str] + isolated=False, # type: bool + wheel_cache=None, # type: Optional[WheelCache] + exclude_editable=False, # type: bool + skip=() # type: Container[str] +): + # type: (...) -> Iterator[str] + find_links = find_links or [] + skip_match = None + + if skip_regex: + skip_match = re.compile(skip_regex).search + + for link in find_links: + yield '-f %s' % link + installations = {} # type: Dict[str, FrozenRequirement] + for dist in get_installed_distributions(local_only=local_only, + skip=(), + user_only=user_only, + paths=paths): + try: + req = FrozenRequirement.from_dist(dist) + except RequirementParseError as exc: + # We include dist rather than dist.project_name because the + # dist string includes more information, like the version and + # location. We also include the exception message to aid + # troubleshooting. + logger.warning( + 'Could not generate requirement for distribution %r: %s', + dist, exc + ) + continue + if exclude_editable and req.editable: + continue + installations[req.canonical_name] = req + + if requirement: + # the options that don't get turned into an InstallRequirement + # should only be emitted once, even if the same option is in multiple + # requirements files, so we need to keep track of what has been emitted + # so that we don't emit it again if it's seen again + emitted_options = set() # type: Set[str] + # keep track of which files a requirement is in so that we can + # give an accurate warning if a requirement appears multiple times. + req_files = collections.defaultdict(list) # type: Dict[str, List[str]] + for req_file_path in requirement: + with open(req_file_path) as req_file: + for line in req_file: + if (not line.strip() or + line.strip().startswith('#') or + (skip_match and skip_match(line)) or + line.startswith(( + '-r', '--requirement', + '-Z', '--always-unzip', + '-f', '--find-links', + '-i', '--index-url', + '--pre', + '--trusted-host', + '--process-dependency-links', + '--extra-index-url'))): + line = line.rstrip() + if line not in emitted_options: + emitted_options.add(line) + yield line + continue + + if line.startswith('-e') or line.startswith('--editable'): + if line.startswith('-e'): + line = line[2:].strip() + else: + line = line[len('--editable'):].strip().lstrip('=') + line_req = install_req_from_editable( + line, + isolated=isolated, + wheel_cache=wheel_cache, + ) + else: + line_req = install_req_from_line( + COMMENT_RE.sub('', line).strip(), + isolated=isolated, + wheel_cache=wheel_cache, + ) + + if not line_req.name: + logger.info( + "Skipping line in requirement file [%s] because " + "it's not clear what it would install: %s", + req_file_path, line.strip(), + ) + logger.info( + " (add #egg=PackageName to the URL to avoid" + " this warning)" + ) + else: + line_req_canonical_name = canonicalize_name( + line_req.name) + if line_req_canonical_name not in installations: + # either it's not installed, or it is installed + # but has been processed already + if not req_files[line_req.name]: + logger.warning( + "Requirement file [%s] contains %s, but " + "package %r is not installed", + req_file_path, + COMMENT_RE.sub('', line).strip(), + line_req.name + ) + else: + req_files[line_req.name].append(req_file_path) + else: + yield str(installations[ + line_req_canonical_name]).rstrip() + del installations[line_req_canonical_name] + req_files[line_req.name].append(req_file_path) + + # Warn about requirements that were included multiple times (in a + # single requirements file or in different requirements files). + for name, files in six.iteritems(req_files): + if len(files) > 1: + logger.warning("Requirement %s included multiple times [%s]", + name, ', '.join(sorted(set(files)))) + + yield( + '## The following requirements were added by ' + 'pip freeze:' + ) + for installation in sorted( + installations.values(), key=lambda x: x.name.lower()): + if installation.canonical_name not in skip: + yield str(installation).rstrip() + + +def get_requirement_info(dist): + # type: (Distribution) -> RequirementInfo + """ + Compute and return values (req, editable, comments) for use in + FrozenRequirement.from_dist(). + """ + if not dist_is_editable(dist): + return (None, False, []) + + location = os.path.normcase(os.path.abspath(dist.location)) + + from pip._internal.vcs import vcs, RemoteNotFoundError + vcs_backend = vcs.get_backend_for_dir(location) + + if vcs_backend is None: + req = dist.as_requirement() + logger.debug( + 'No VCS found for editable requirement "%s" in: %r', req, + location, + ) + comments = [ + '# Editable install with no version control ({})'.format(req) + ] + return (location, True, comments) + + try: + req = vcs_backend.get_src_requirement(location, dist.project_name) + except RemoteNotFoundError: + req = dist.as_requirement() + comments = [ + '# Editable {} install with no remote ({})'.format( + type(vcs_backend).__name__, req, + ) + ] + return (location, True, comments) + + except BadCommand: + logger.warning( + 'cannot determine version of editable source in %s ' + '(%s command not found in path)', + location, + vcs_backend.name, + ) + return (None, True, []) + + except InstallationError as exc: + logger.warning( + "Error when trying to get requirement for VCS system %s, " + "falling back to uneditable format", exc + ) + else: + if req is not None: + return (req, True, []) + + logger.warning( + 'Could not determine repository location of %s', location + ) + comments = ['## !! Could not determine repository location'] + + return (None, False, comments) + + +class FrozenRequirement(object): + def __init__(self, name, req, editable, comments=()): + # type: (str, Union[str, Requirement], bool, Iterable[str]) -> None + self.name = name + self.canonical_name = canonicalize_name(name) + self.req = req + self.editable = editable + self.comments = comments + + @classmethod + def from_dist(cls, dist): + # type: (Distribution) -> FrozenRequirement + req, editable, comments = get_requirement_info(dist) + if req is None: + req = dist.as_requirement() + + return cls(dist.project_name, req, editable, comments=comments) + + def __str__(self): + req = self.req + if self.editable: + req = '-e %s' % req + return '\n'.join(list(self.comments) + [str(req)]) + '\n' diff --git a/venv/lib/python3.7/site-packages/pip/_internal/operations/install/__init__.py b/venv.bak/lib/python3.7/site-packages/pip/_internal/operations/install/__init__.py similarity index 100% rename from venv/lib/python3.7/site-packages/pip/_internal/operations/install/__init__.py rename to venv.bak/lib/python3.7/site-packages/pip/_internal/operations/install/__init__.py diff --git a/venv/lib/python3.7/site-packages/pip/_internal/operations/install/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_internal/operations/install/__pycache__/__init__.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/pip/_internal/operations/install/__pycache__/__init__.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/pip/_internal/operations/install/__pycache__/__init__.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/pip/_internal/operations/install/__pycache__/editable_legacy.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_internal/operations/install/__pycache__/editable_legacy.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/pip/_internal/operations/install/__pycache__/editable_legacy.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/pip/_internal/operations/install/__pycache__/editable_legacy.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/pip/_internal/operations/install/__pycache__/legacy.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_internal/operations/install/__pycache__/legacy.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/pip/_internal/operations/install/__pycache__/legacy.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/pip/_internal/operations/install/__pycache__/legacy.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/pip/_internal/operations/install/__pycache__/wheel.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_internal/operations/install/__pycache__/wheel.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/pip/_internal/operations/install/__pycache__/wheel.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/pip/_internal/operations/install/__pycache__/wheel.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/pip/_internal/operations/install/editable_legacy.py b/venv.bak/lib/python3.7/site-packages/pip/_internal/operations/install/editable_legacy.py similarity index 100% rename from venv/lib/python3.7/site-packages/pip/_internal/operations/install/editable_legacy.py rename to venv.bak/lib/python3.7/site-packages/pip/_internal/operations/install/editable_legacy.py diff --git a/venv/lib/python3.7/site-packages/pip/_internal/operations/install/legacy.py b/venv.bak/lib/python3.7/site-packages/pip/_internal/operations/install/legacy.py similarity index 100% rename from venv/lib/python3.7/site-packages/pip/_internal/operations/install/legacy.py rename to venv.bak/lib/python3.7/site-packages/pip/_internal/operations/install/legacy.py diff --git a/venv/lib/python3.7/site-packages/pip/_internal/operations/install/wheel.py b/venv.bak/lib/python3.7/site-packages/pip/_internal/operations/install/wheel.py similarity index 100% rename from venv/lib/python3.7/site-packages/pip/_internal/operations/install/wheel.py rename to venv.bak/lib/python3.7/site-packages/pip/_internal/operations/install/wheel.py diff --git a/venv.bak/lib/python3.7/site-packages/pip/_internal/operations/prepare.py b/venv.bak/lib/python3.7/site-packages/pip/_internal/operations/prepare.py new file mode 100644 index 0000000..0b61f20 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_internal/operations/prepare.py @@ -0,0 +1,591 @@ +"""Prepares a distribution for installation +""" + +# The following comment should be removed at some point in the future. +# mypy: strict-optional=False + +import logging +import mimetypes +import os +import shutil +import sys + +from pip._vendor import requests +from pip._vendor.six import PY2 + +from pip._internal.distributions import ( + make_distribution_for_install_requirement, +) +from pip._internal.distributions.installed import InstalledDistribution +from pip._internal.exceptions import ( + DirectoryUrlHashUnsupported, + HashMismatch, + HashUnpinned, + InstallationError, + PreviousBuildDirError, + VcsHashUnsupported, +) +from pip._internal.utils.filesystem import copy2_fixed +from pip._internal.utils.hashes import MissingHashes +from pip._internal.utils.logging import indent_log +from pip._internal.utils.marker_files import write_delete_marker_file +from pip._internal.utils.misc import ( + ask_path_exists, + backup_dir, + display_path, + hide_url, + path_to_display, + rmtree, +) +from pip._internal.utils.temp_dir import TempDirectory +from pip._internal.utils.typing import MYPY_CHECK_RUNNING +from pip._internal.utils.unpacking import unpack_file +from pip._internal.vcs import vcs + +if MYPY_CHECK_RUNNING: + from typing import ( + Callable, List, Optional, Tuple, + ) + + from mypy_extensions import TypedDict + + from pip._internal.distributions import AbstractDistribution + from pip._internal.index.package_finder import PackageFinder + from pip._internal.models.link import Link + from pip._internal.network.download import Downloader + from pip._internal.req.req_install import InstallRequirement + from pip._internal.req.req_tracker import RequirementTracker + from pip._internal.utils.hashes import Hashes + + if PY2: + CopytreeKwargs = TypedDict( + 'CopytreeKwargs', + { + 'ignore': Callable[[str, List[str]], List[str]], + 'symlinks': bool, + }, + total=False, + ) + else: + CopytreeKwargs = TypedDict( + 'CopytreeKwargs', + { + 'copy_function': Callable[[str, str], None], + 'ignore': Callable[[str, List[str]], List[str]], + 'ignore_dangling_symlinks': bool, + 'symlinks': bool, + }, + total=False, + ) + +logger = logging.getLogger(__name__) + + +def _get_prepared_distribution( + req, # type: InstallRequirement + req_tracker, # type: RequirementTracker + finder, # type: PackageFinder + build_isolation # type: bool +): + # type: (...) -> AbstractDistribution + """Prepare a distribution for installation. + """ + abstract_dist = make_distribution_for_install_requirement(req) + with req_tracker.track(req): + abstract_dist.prepare_distribution_metadata(finder, build_isolation) + return abstract_dist + + +def unpack_vcs_link(link, location): + # type: (Link, str) -> None + vcs_backend = vcs.get_backend_for_scheme(link.scheme) + assert vcs_backend is not None + vcs_backend.unpack(location, url=hide_url(link.url)) + + +def _copy_file(filename, location, link): + # type: (str, str, Link) -> None + copy = True + download_location = os.path.join(location, link.filename) + if os.path.exists(download_location): + response = ask_path_exists( + 'The file {} exists. (i)gnore, (w)ipe, (b)ackup, (a)abort'.format( + display_path(download_location) + ), + ('i', 'w', 'b', 'a'), + ) + if response == 'i': + copy = False + elif response == 'w': + logger.warning('Deleting %s', display_path(download_location)) + os.remove(download_location) + elif response == 'b': + dest_file = backup_dir(download_location) + logger.warning( + 'Backing up %s to %s', + display_path(download_location), + display_path(dest_file), + ) + shutil.move(download_location, dest_file) + elif response == 'a': + sys.exit(-1) + if copy: + shutil.copy(filename, download_location) + logger.info('Saved %s', display_path(download_location)) + + +def unpack_http_url( + link, # type: Link + location, # type: str + downloader, # type: Downloader + download_dir=None, # type: Optional[str] + hashes=None, # type: Optional[Hashes] +): + # type: (...) -> str + temp_dir = TempDirectory(kind="unpack", globally_managed=True) + # If a download dir is specified, is the file already downloaded there? + already_downloaded_path = None + if download_dir: + already_downloaded_path = _check_download_dir( + link, download_dir, hashes + ) + + if already_downloaded_path: + from_path = already_downloaded_path + content_type = mimetypes.guess_type(from_path)[0] + else: + # let's download to a tmp dir + from_path, content_type = _download_http_url( + link, downloader, temp_dir.path, hashes + ) + + # unpack the archive to the build dir location. even when only + # downloading archives, they have to be unpacked to parse dependencies + unpack_file(from_path, location, content_type) + + return from_path + + +def _copy2_ignoring_special_files(src, dest): + # type: (str, str) -> None + """Copying special files is not supported, but as a convenience to users + we skip errors copying them. This supports tools that may create e.g. + socket files in the project source directory. + """ + try: + copy2_fixed(src, dest) + except shutil.SpecialFileError as e: + # SpecialFileError may be raised due to either the source or + # destination. If the destination was the cause then we would actually + # care, but since the destination directory is deleted prior to + # copy we ignore all of them assuming it is caused by the source. + logger.warning( + "Ignoring special file error '%s' encountered copying %s to %s.", + str(e), + path_to_display(src), + path_to_display(dest), + ) + + +def _copy_source_tree(source, target): + # type: (str, str) -> None + def ignore(d, names): + # type: (str, List[str]) -> List[str] + # Pulling in those directories can potentially be very slow, + # exclude the following directories if they appear in the top + # level dir (and only it). + # See discussion at https://github.com/pypa/pip/pull/6770 + return ['.tox', '.nox'] if d == source else [] + + kwargs = dict(ignore=ignore, symlinks=True) # type: CopytreeKwargs + + if not PY2: + # Python 2 does not support copy_function, so we only ignore + # errors on special file copy in Python 3. + kwargs['copy_function'] = _copy2_ignoring_special_files + + shutil.copytree(source, target, **kwargs) + + +def unpack_file_url( + link, # type: Link + location, # type: str + download_dir=None, # type: Optional[str] + hashes=None # type: Optional[Hashes] +): + # type: (...) -> Optional[str] + """Unpack link into location. + """ + link_path = link.file_path + # If it's a url to a local directory + if link.is_existing_dir(): + if os.path.isdir(location): + rmtree(location) + _copy_source_tree(link_path, location) + return None + + # If a download dir is specified, is the file already there and valid? + already_downloaded_path = None + if download_dir: + already_downloaded_path = _check_download_dir( + link, download_dir, hashes + ) + + if already_downloaded_path: + from_path = already_downloaded_path + else: + from_path = link_path + + # If --require-hashes is off, `hashes` is either empty, the + # link's embedded hash, or MissingHashes; it is required to + # match. If --require-hashes is on, we are satisfied by any + # hash in `hashes` matching: a URL-based or an option-based + # one; no internet-sourced hash will be in `hashes`. + if hashes: + hashes.check_against_path(from_path) + + content_type = mimetypes.guess_type(from_path)[0] + + # unpack the archive to the build dir location. even when only downloading + # archives, they have to be unpacked to parse dependencies + unpack_file(from_path, location, content_type) + + return from_path + + +def unpack_url( + link, # type: Link + location, # type: str + downloader, # type: Downloader + download_dir=None, # type: Optional[str] + hashes=None, # type: Optional[Hashes] +): + # type: (...) -> Optional[str] + """Unpack link into location, downloading if required. + + :param hashes: A Hashes object, one of whose embedded hashes must match, + or HashMismatch will be raised. If the Hashes is empty, no matches are + required, and unhashable types of requirements (like VCS ones, which + would ordinarily raise HashUnsupported) are allowed. + """ + # non-editable vcs urls + if link.is_vcs: + unpack_vcs_link(link, location) + return None + + # file urls + elif link.is_file: + return unpack_file_url(link, location, download_dir, hashes=hashes) + + # http urls + else: + return unpack_http_url( + link, + location, + downloader, + download_dir, + hashes=hashes, + ) + + +def _download_http_url( + link, # type: Link + downloader, # type: Downloader + temp_dir, # type: str + hashes, # type: Optional[Hashes] +): + # type: (...) -> Tuple[str, str] + """Download link url into temp_dir using provided session""" + download = downloader(link) + + file_path = os.path.join(temp_dir, download.filename) + with open(file_path, 'wb') as content_file: + for chunk in download.chunks: + content_file.write(chunk) + + if hashes: + hashes.check_against_path(file_path) + + return file_path, download.response.headers.get('content-type', '') + + +def _check_download_dir(link, download_dir, hashes): + # type: (Link, str, Optional[Hashes]) -> Optional[str] + """ Check download_dir for previously downloaded file with correct hash + If a correct file is found return its path else None + """ + download_path = os.path.join(download_dir, link.filename) + + if not os.path.exists(download_path): + return None + + # If already downloaded, does its hash match? + logger.info('File was already downloaded %s', download_path) + if hashes: + try: + hashes.check_against_path(download_path) + except HashMismatch: + logger.warning( + 'Previously-downloaded file %s has bad hash. ' + 'Re-downloading.', + download_path + ) + os.unlink(download_path) + return None + return download_path + + +class RequirementPreparer(object): + """Prepares a Requirement + """ + + def __init__( + self, + build_dir, # type: str + download_dir, # type: Optional[str] + src_dir, # type: str + wheel_download_dir, # type: Optional[str] + build_isolation, # type: bool + req_tracker, # type: RequirementTracker + downloader, # type: Downloader + finder, # type: PackageFinder + require_hashes, # type: bool + use_user_site, # type: bool + ): + # type: (...) -> None + super(RequirementPreparer, self).__init__() + + self.src_dir = src_dir + self.build_dir = build_dir + self.req_tracker = req_tracker + self.downloader = downloader + self.finder = finder + + # Where still-packed archives should be written to. If None, they are + # not saved, and are deleted immediately after unpacking. + self.download_dir = download_dir + + # Where still-packed .whl files should be written to. If None, they are + # written to the download_dir parameter. Separate to download_dir to + # permit only keeping wheel archives for pip wheel. + self.wheel_download_dir = wheel_download_dir + + # NOTE + # download_dir and wheel_download_dir overlap semantically and may + # be combined if we're willing to have non-wheel archives present in + # the wheelhouse output by 'pip wheel'. + + # Is build isolation allowed? + self.build_isolation = build_isolation + + # Should hash-checking be required? + self.require_hashes = require_hashes + + # Should install in user site-packages? + self.use_user_site = use_user_site + + @property + def _download_should_save(self): + # type: () -> bool + if not self.download_dir: + return False + + if os.path.exists(self.download_dir): + return True + + logger.critical('Could not find download directory') + raise InstallationError( + "Could not find or access download directory '{}'" + .format(self.download_dir)) + + def prepare_linked_requirement( + self, + req, # type: InstallRequirement + ): + # type: (...) -> AbstractDistribution + """Prepare a requirement that would be obtained from req.link + """ + assert req.link + link = req.link + + # TODO: Breakup into smaller functions + if link.scheme == 'file': + path = link.file_path + logger.info('Processing %s', display_path(path)) + else: + logger.info('Collecting %s', req.req or req) + + with indent_log(): + # @@ if filesystem packages are not marked + # editable in a req, a non deterministic error + # occurs when the script attempts to unpack the + # build directory + # Since source_dir is only set for editable requirements. + assert req.source_dir is None + req.ensure_has_source_dir(self.build_dir) + # If a checkout exists, it's unwise to keep going. version + # inconsistencies are logged later, but do not fail the + # installation. + # FIXME: this won't upgrade when there's an existing + # package unpacked in `req.source_dir` + if os.path.exists(os.path.join(req.source_dir, 'setup.py')): + raise PreviousBuildDirError( + "pip can't proceed with requirements '{}' due to a" + " pre-existing build directory ({}). This is " + "likely due to a previous installation that failed" + ". pip is being responsible and not assuming it " + "can delete this. Please delete it and try again." + .format(req, req.source_dir) + ) + + # Now that we have the real link, we can tell what kind of + # requirements we have and raise some more informative errors + # than otherwise. (For example, we can raise VcsHashUnsupported + # for a VCS URL rather than HashMissing.) + if self.require_hashes: + # We could check these first 2 conditions inside + # unpack_url and save repetition of conditions, but then + # we would report less-useful error messages for + # unhashable requirements, complaining that there's no + # hash provided. + if link.is_vcs: + raise VcsHashUnsupported() + elif link.is_existing_dir(): + raise DirectoryUrlHashUnsupported() + if not req.original_link and not req.is_pinned: + # Unpinned packages are asking for trouble when a new + # version is uploaded. This isn't a security check, but + # it saves users a surprising hash mismatch in the + # future. + # + # file:/// URLs aren't pinnable, so don't complain + # about them not being pinned. + raise HashUnpinned() + + hashes = req.hashes(trust_internet=not self.require_hashes) + if self.require_hashes and not hashes: + # Known-good hashes are missing for this requirement, so + # shim it with a facade object that will provoke hash + # computation and then raise a HashMissing exception + # showing the user what the hash should be. + hashes = MissingHashes() + + download_dir = self.download_dir + if link.is_wheel and self.wheel_download_dir: + # when doing 'pip wheel` we download wheels to a + # dedicated dir. + download_dir = self.wheel_download_dir + + try: + local_path = unpack_url( + link, req.source_dir, self.downloader, download_dir, + hashes=hashes, + ) + except requests.HTTPError as exc: + logger.critical( + 'Could not install requirement %s because of error %s', + req, + exc, + ) + raise InstallationError( + 'Could not install requirement {} because of HTTP ' + 'error {} for URL {}'.format(req, exc, link) + ) + + # For use in later processing, preserve the file path on the + # requirement. + if local_path: + req.local_file_path = local_path + + if link.is_wheel: + if download_dir: + # When downloading, we only unpack wheels to get + # metadata. + autodelete_unpacked = True + else: + # When installing a wheel, we use the unpacked + # wheel. + autodelete_unpacked = False + else: + # We always delete unpacked sdists after pip runs. + autodelete_unpacked = True + if autodelete_unpacked: + write_delete_marker_file(req.source_dir) + + abstract_dist = _get_prepared_distribution( + req, self.req_tracker, self.finder, self.build_isolation, + ) + + if download_dir: + if link.is_existing_dir(): + logger.info('Link is a directory, ignoring download_dir') + elif local_path and not os.path.exists( + os.path.join(download_dir, link.filename) + ): + _copy_file(local_path, download_dir, link) + + if self._download_should_save: + # Make a .zip of the source_dir we already created. + if link.is_vcs: + req.archive(self.download_dir) + return abstract_dist + + def prepare_editable_requirement( + self, + req, # type: InstallRequirement + ): + # type: (...) -> AbstractDistribution + """Prepare an editable requirement + """ + assert req.editable, "cannot prepare a non-editable req as editable" + + logger.info('Obtaining %s', req) + + with indent_log(): + if self.require_hashes: + raise InstallationError( + 'The editable requirement {} cannot be installed when ' + 'requiring hashes, because there is no single file to ' + 'hash.'.format(req) + ) + req.ensure_has_source_dir(self.src_dir) + req.update_editable(not self._download_should_save) + + abstract_dist = _get_prepared_distribution( + req, self.req_tracker, self.finder, self.build_isolation, + ) + + if self._download_should_save: + req.archive(self.download_dir) + req.check_if_exists(self.use_user_site) + + return abstract_dist + + def prepare_installed_requirement( + self, + req, # type: InstallRequirement + skip_reason # type: str + ): + # type: (...) -> AbstractDistribution + """Prepare an already-installed requirement + """ + assert req.satisfied_by, "req should have been satisfied but isn't" + assert skip_reason is not None, ( + "did not get skip reason skipped but req.satisfied_by " + "is set to {}".format(req.satisfied_by) + ) + logger.info( + 'Requirement %s: %s (%s)', + skip_reason, req, req.satisfied_by.version + ) + with indent_log(): + if self.require_hashes: + logger.debug( + 'Since it is already installed, we are trusting this ' + 'package without checking its hash. To ensure a ' + 'completely repeatable environment, install into an ' + 'empty virtualenv.' + ) + abstract_dist = InstalledDistribution(req) + + return abstract_dist diff --git a/venv.bak/lib/python3.7/site-packages/pip/_internal/pep425tags.py b/venv.bak/lib/python3.7/site-packages/pip/_internal/pep425tags.py new file mode 100644 index 0000000..a2386ee --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_internal/pep425tags.py @@ -0,0 +1,167 @@ +"""Generate and work with PEP 425 Compatibility Tags.""" +from __future__ import absolute_import + +import logging +import re + +from pip._vendor.packaging.tags import ( + Tag, + compatible_tags, + cpython_tags, + generic_tags, + interpreter_name, + interpreter_version, + mac_platforms, +) + +from pip._internal.utils.typing import MYPY_CHECK_RUNNING + +if MYPY_CHECK_RUNNING: + from typing import List, Optional, Tuple + + from pip._vendor.packaging.tags import PythonVersion + +logger = logging.getLogger(__name__) + +_osx_arch_pat = re.compile(r'(.+)_(\d+)_(\d+)_(.+)') + + +def version_info_to_nodot(version_info): + # type: (Tuple[int, ...]) -> str + # Only use up to the first two numbers. + return ''.join(map(str, version_info[:2])) + + +def _mac_platforms(arch): + # type: (str) -> List[str] + match = _osx_arch_pat.match(arch) + if match: + name, major, minor, actual_arch = match.groups() + mac_version = (int(major), int(minor)) + arches = [ + # Since we have always only checked that the platform starts + # with "macosx", for backwards-compatibility we extract the + # actual prefix provided by the user in case they provided + # something like "macosxcustom_". It may be good to remove + # this as undocumented or deprecate it in the future. + '{}_{}'.format(name, arch[len('macosx_'):]) + for arch in mac_platforms(mac_version, actual_arch) + ] + else: + # arch pattern didn't match (?!) + arches = [arch] + return arches + + +def _custom_manylinux_platforms(arch): + # type: (str) -> List[str] + arches = [arch] + arch_prefix, arch_sep, arch_suffix = arch.partition('_') + if arch_prefix == 'manylinux2014': + # manylinux1/manylinux2010 wheels run on most manylinux2014 systems + # with the exception of wheels depending on ncurses. PEP 599 states + # manylinux1/manylinux2010 wheels should be considered + # manylinux2014 wheels: + # https://www.python.org/dev/peps/pep-0599/#backwards-compatibility-with-manylinux2010-wheels + if arch_suffix in {'i686', 'x86_64'}: + arches.append('manylinux2010' + arch_sep + arch_suffix) + arches.append('manylinux1' + arch_sep + arch_suffix) + elif arch_prefix == 'manylinux2010': + # manylinux1 wheels run on most manylinux2010 systems with the + # exception of wheels depending on ncurses. PEP 571 states + # manylinux1 wheels should be considered manylinux2010 wheels: + # https://www.python.org/dev/peps/pep-0571/#backwards-compatibility-with-manylinux1-wheels + arches.append('manylinux1' + arch_sep + arch_suffix) + return arches + + +def _get_custom_platforms(arch): + # type: (str) -> List[str] + arch_prefix, arch_sep, arch_suffix = arch.partition('_') + if arch.startswith('macosx'): + arches = _mac_platforms(arch) + elif arch_prefix in ['manylinux2014', 'manylinux2010']: + arches = _custom_manylinux_platforms(arch) + else: + arches = [arch] + return arches + + +def _get_python_version(version): + # type: (str) -> PythonVersion + if len(version) > 1: + return int(version[0]), int(version[1:]) + else: + return (int(version[0]),) + + +def _get_custom_interpreter(implementation=None, version=None): + # type: (Optional[str], Optional[str]) -> str + if implementation is None: + implementation = interpreter_name() + if version is None: + version = interpreter_version() + return "{}{}".format(implementation, version) + + +def get_supported( + version=None, # type: Optional[str] + platform=None, # type: Optional[str] + impl=None, # type: Optional[str] + abi=None # type: Optional[str] +): + # type: (...) -> List[Tag] + """Return a list of supported tags for each version specified in + `versions`. + + :param version: a string version, of the form "33" or "32", + or None. The version will be assumed to support our ABI. + :param platform: specify the exact platform you want valid + tags for, or None. If None, use the local system platform. + :param impl: specify the exact implementation you want valid + tags for, or None. If None, use the local interpreter impl. + :param abi: specify the exact abi you want valid + tags for, or None. If None, use the local interpreter abi. + """ + supported = [] # type: List[Tag] + + python_version = None # type: Optional[PythonVersion] + if version is not None: + python_version = _get_python_version(version) + + interpreter = _get_custom_interpreter(impl, version) + + abis = None # type: Optional[List[str]] + if abi is not None: + abis = [abi] + + platforms = None # type: Optional[List[str]] + if platform is not None: + platforms = _get_custom_platforms(platform) + + is_cpython = (impl or interpreter_name()) == "cp" + if is_cpython: + supported.extend( + cpython_tags( + python_version=python_version, + abis=abis, + platforms=platforms, + ) + ) + else: + supported.extend( + generic_tags( + interpreter=interpreter, + abis=abis, + platforms=platforms, + ) + ) + supported.extend( + compatible_tags( + python_version=python_version, + interpreter=interpreter, + platforms=platforms, + ) + ) + + return supported diff --git a/venv.bak/lib/python3.7/site-packages/pip/_internal/pyproject.py b/venv.bak/lib/python3.7/site-packages/pip/_internal/pyproject.py new file mode 100644 index 0000000..cf614fd --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_internal/pyproject.py @@ -0,0 +1,196 @@ +from __future__ import absolute_import + +import io +import os +import sys +from collections import namedtuple + +from pip._vendor import pytoml, six +from pip._vendor.packaging.requirements import InvalidRequirement, Requirement + +from pip._internal.exceptions import InstallationError +from pip._internal.utils.typing import MYPY_CHECK_RUNNING + +if MYPY_CHECK_RUNNING: + from typing import Any, Optional, List + + +def _is_list_of_str(obj): + # type: (Any) -> bool + return ( + isinstance(obj, list) and + all(isinstance(item, six.string_types) for item in obj) + ) + + +def make_pyproject_path(unpacked_source_directory): + # type: (str) -> str + path = os.path.join(unpacked_source_directory, 'pyproject.toml') + + # Python2 __file__ should not be unicode + if six.PY2 and isinstance(path, six.text_type): + path = path.encode(sys.getfilesystemencoding()) + + return path + + +BuildSystemDetails = namedtuple('BuildSystemDetails', [ + 'requires', 'backend', 'check', 'backend_path' +]) + + +def load_pyproject_toml( + use_pep517, # type: Optional[bool] + pyproject_toml, # type: str + setup_py, # type: str + req_name # type: str +): + # type: (...) -> Optional[BuildSystemDetails] + """Load the pyproject.toml file. + + Parameters: + use_pep517 - Has the user requested PEP 517 processing? None + means the user hasn't explicitly specified. + pyproject_toml - Location of the project's pyproject.toml file + setup_py - Location of the project's setup.py file + req_name - The name of the requirement we're processing (for + error reporting) + + Returns: + None if we should use the legacy code path, otherwise a tuple + ( + requirements from pyproject.toml, + name of PEP 517 backend, + requirements we should check are installed after setting + up the build environment + directory paths to import the backend from (backend-path), + relative to the project root. + ) + """ + has_pyproject = os.path.isfile(pyproject_toml) + has_setup = os.path.isfile(setup_py) + + if has_pyproject: + with io.open(pyproject_toml, encoding="utf-8") as f: + pp_toml = pytoml.load(f) + build_system = pp_toml.get("build-system") + else: + build_system = None + + # The following cases must use PEP 517 + # We check for use_pep517 being non-None and falsey because that means + # the user explicitly requested --no-use-pep517. The value 0 as + # opposed to False can occur when the value is provided via an + # environment variable or config file option (due to the quirk of + # strtobool() returning an integer in pip's configuration code). + if has_pyproject and not has_setup: + if use_pep517 is not None and not use_pep517: + raise InstallationError( + "Disabling PEP 517 processing is invalid: " + "project does not have a setup.py" + ) + use_pep517 = True + elif build_system and "build-backend" in build_system: + if use_pep517 is not None and not use_pep517: + raise InstallationError( + "Disabling PEP 517 processing is invalid: " + "project specifies a build backend of {} " + "in pyproject.toml".format( + build_system["build-backend"] + ) + ) + use_pep517 = True + + # If we haven't worked out whether to use PEP 517 yet, + # and the user hasn't explicitly stated a preference, + # we do so if the project has a pyproject.toml file. + elif use_pep517 is None: + use_pep517 = has_pyproject + + # At this point, we know whether we're going to use PEP 517. + assert use_pep517 is not None + + # If we're using the legacy code path, there is nothing further + # for us to do here. + if not use_pep517: + return None + + if build_system is None: + # Either the user has a pyproject.toml with no build-system + # section, or the user has no pyproject.toml, but has opted in + # explicitly via --use-pep517. + # In the absence of any explicit backend specification, we + # assume the setuptools backend that most closely emulates the + # traditional direct setup.py execution, and require wheel and + # a version of setuptools that supports that backend. + + build_system = { + "requires": ["setuptools>=40.8.0", "wheel"], + "build-backend": "setuptools.build_meta:__legacy__", + } + + # If we're using PEP 517, we have build system information (either + # from pyproject.toml, or defaulted by the code above). + # Note that at this point, we do not know if the user has actually + # specified a backend, though. + assert build_system is not None + + # Ensure that the build-system section in pyproject.toml conforms + # to PEP 518. + error_template = ( + "{package} has a pyproject.toml file that does not comply " + "with PEP 518: {reason}" + ) + + # Specifying the build-system table but not the requires key is invalid + if "requires" not in build_system: + raise InstallationError( + error_template.format(package=req_name, reason=( + "it has a 'build-system' table but not " + "'build-system.requires' which is mandatory in the table" + )) + ) + + # Error out if requires is not a list of strings + requires = build_system["requires"] + if not _is_list_of_str(requires): + raise InstallationError(error_template.format( + package=req_name, + reason="'build-system.requires' is not a list of strings.", + )) + + # Each requirement must be valid as per PEP 508 + for requirement in requires: + try: + Requirement(requirement) + except InvalidRequirement: + raise InstallationError( + error_template.format( + package=req_name, + reason=( + "'build-system.requires' contains an invalid " + "requirement: {!r}".format(requirement) + ), + ) + ) + + backend = build_system.get("build-backend") + backend_path = build_system.get("backend-path", []) + check = [] # type: List[str] + if backend is None: + # If the user didn't specify a backend, we assume they want to use + # the setuptools backend. But we can't be sure they have included + # a version of setuptools which supplies the backend, or wheel + # (which is needed by the backend) in their requirements. So we + # make a note to check that those requirements are present once + # we have set up the environment. + # This is quite a lot of work to check for a very specific case. But + # the problem is, that case is potentially quite common - projects that + # adopted PEP 518 early for the ability to specify requirements to + # execute setup.py, but never considered needing to mention the build + # tools themselves. The original PEP 518 code had a similar check (but + # implemented in a different way). + backend = "setuptools.build_meta:__legacy__" + check = ["setuptools>=40.8.0", "wheel"] + + return BuildSystemDetails(requires, backend, check, backend_path) diff --git a/venv.bak/lib/python3.7/site-packages/pip/_internal/req/__init__.py b/venv.bak/lib/python3.7/site-packages/pip/_internal/req/__init__.py new file mode 100644 index 0000000..d2d027a --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_internal/req/__init__.py @@ -0,0 +1,92 @@ +# The following comment should be removed at some point in the future. +# mypy: strict-optional=False + +from __future__ import absolute_import + +import logging + +from pip._internal.utils.logging import indent_log +from pip._internal.utils.typing import MYPY_CHECK_RUNNING + +from .req_file import parse_requirements +from .req_install import InstallRequirement +from .req_set import RequirementSet + +if MYPY_CHECK_RUNNING: + from typing import Any, List, Sequence + +__all__ = [ + "RequirementSet", "InstallRequirement", + "parse_requirements", "install_given_reqs", +] + +logger = logging.getLogger(__name__) + + +class InstallationResult(object): + def __init__(self, name): + # type: (str) -> None + self.name = name + + def __repr__(self): + # type: () -> str + return "InstallationResult(name={!r})".format(self.name) + + +def install_given_reqs( + to_install, # type: List[InstallRequirement] + install_options, # type: List[str] + global_options=(), # type: Sequence[str] + *args, # type: Any + **kwargs # type: Any +): + # type: (...) -> List[InstallationResult] + """ + Install everything in the given list. + + (to be called after having downloaded and unpacked the packages) + """ + + if to_install: + logger.info( + 'Installing collected packages: %s', + ', '.join([req.name for req in to_install]), + ) + + installed = [] + + with indent_log(): + for requirement in to_install: + if requirement.should_reinstall: + logger.info('Attempting uninstall: %s', requirement.name) + with indent_log(): + uninstalled_pathset = requirement.uninstall( + auto_confirm=True + ) + try: + requirement.install( + install_options, + global_options, + *args, + **kwargs + ) + except Exception: + should_rollback = ( + requirement.should_reinstall and + not requirement.install_succeeded + ) + # if install did not succeed, rollback previous uninstall + if should_rollback: + uninstalled_pathset.rollback() + raise + else: + should_commit = ( + requirement.should_reinstall and + requirement.install_succeeded + ) + if should_commit: + uninstalled_pathset.commit() + + installed.append(InstallationResult(requirement.name)) + + return installed diff --git a/venv.bak/lib/python3.7/site-packages/pip/_internal/req/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_internal/req/__pycache__/__init__.cpython-37.pyc new file mode 100644 index 0000000..ff55707 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_internal/req/__pycache__/__init__.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_internal/req/__pycache__/constructors.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_internal/req/__pycache__/constructors.cpython-37.pyc new file mode 100644 index 0000000..7b81cac Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_internal/req/__pycache__/constructors.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_internal/req/__pycache__/req_file.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_internal/req/__pycache__/req_file.cpython-37.pyc new file mode 100644 index 0000000..b57d3e9 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_internal/req/__pycache__/req_file.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_internal/req/__pycache__/req_install.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_internal/req/__pycache__/req_install.cpython-37.pyc new file mode 100644 index 0000000..c437bfa Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_internal/req/__pycache__/req_install.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_internal/req/__pycache__/req_set.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_internal/req/__pycache__/req_set.cpython-37.pyc new file mode 100644 index 0000000..fc02aa7 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_internal/req/__pycache__/req_set.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_internal/req/__pycache__/req_tracker.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_internal/req/__pycache__/req_tracker.cpython-37.pyc new file mode 100644 index 0000000..69beb9a Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_internal/req/__pycache__/req_tracker.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_internal/req/__pycache__/req_uninstall.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_internal/req/__pycache__/req_uninstall.cpython-37.pyc new file mode 100644 index 0000000..7b68907 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_internal/req/__pycache__/req_uninstall.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_internal/req/constructors.py b/venv.bak/lib/python3.7/site-packages/pip/_internal/req/constructors.py new file mode 100644 index 0000000..1f3cd8a --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_internal/req/constructors.py @@ -0,0 +1,436 @@ +"""Backing implementation for InstallRequirement's various constructors + +The idea here is that these formed a major chunk of InstallRequirement's size +so, moving them and support code dedicated to them outside of that class +helps creates for better understandability for the rest of the code. + +These are meant to be used elsewhere within pip to create instances of +InstallRequirement. +""" + +# The following comment should be removed at some point in the future. +# mypy: strict-optional=False + +import logging +import os +import re + +from pip._vendor.packaging.markers import Marker +from pip._vendor.packaging.requirements import InvalidRequirement, Requirement +from pip._vendor.packaging.specifiers import Specifier +from pip._vendor.pkg_resources import RequirementParseError, parse_requirements + +from pip._internal.exceptions import InstallationError +from pip._internal.models.index import PyPI, TestPyPI +from pip._internal.models.link import Link +from pip._internal.models.wheel import Wheel +from pip._internal.pyproject import make_pyproject_path +from pip._internal.req.req_install import InstallRequirement +from pip._internal.utils.filetypes import ARCHIVE_EXTENSIONS +from pip._internal.utils.misc import is_installable_dir, splitext +from pip._internal.utils.typing import MYPY_CHECK_RUNNING +from pip._internal.utils.urls import path_to_url +from pip._internal.vcs import is_url, vcs + +if MYPY_CHECK_RUNNING: + from typing import ( + Any, Dict, Optional, Set, Tuple, Union, + ) + from pip._internal.cache import WheelCache + + +__all__ = [ + "install_req_from_editable", "install_req_from_line", + "parse_editable" +] + +logger = logging.getLogger(__name__) +operators = Specifier._operators.keys() + + +def is_archive_file(name): + # type: (str) -> bool + """Return True if `name` is a considered as an archive file.""" + ext = splitext(name)[1].lower() + if ext in ARCHIVE_EXTENSIONS: + return True + return False + + +def _strip_extras(path): + # type: (str) -> Tuple[str, Optional[str]] + m = re.match(r'^(.+)(\[[^\]]+\])$', path) + extras = None + if m: + path_no_extras = m.group(1) + extras = m.group(2) + else: + path_no_extras = path + + return path_no_extras, extras + + +def convert_extras(extras): + # type: (Optional[str]) -> Set[str] + if not extras: + return set() + return Requirement("placeholder" + extras.lower()).extras + + +def parse_editable(editable_req): + # type: (str) -> Tuple[Optional[str], str, Optional[Set[str]]] + """Parses an editable requirement into: + - a requirement name + - an URL + - extras + - editable options + Accepted requirements: + svn+http://blahblah@rev#egg=Foobar[baz]&subdirectory=version_subdir + .[some_extra] + """ + + url = editable_req + + # If a file path is specified with extras, strip off the extras. + url_no_extras, extras = _strip_extras(url) + + if os.path.isdir(url_no_extras): + if not os.path.exists(os.path.join(url_no_extras, 'setup.py')): + msg = ( + 'File "setup.py" not found. Directory cannot be installed ' + 'in editable mode: {}'.format(os.path.abspath(url_no_extras)) + ) + pyproject_path = make_pyproject_path(url_no_extras) + if os.path.isfile(pyproject_path): + msg += ( + '\n(A "pyproject.toml" file was found, but editable ' + 'mode currently requires a setup.py based build.)' + ) + raise InstallationError(msg) + + # Treating it as code that has already been checked out + url_no_extras = path_to_url(url_no_extras) + + if url_no_extras.lower().startswith('file:'): + package_name = Link(url_no_extras).egg_fragment + if extras: + return ( + package_name, + url_no_extras, + Requirement("placeholder" + extras.lower()).extras, + ) + else: + return package_name, url_no_extras, None + + for version_control in vcs: + if url.lower().startswith('%s:' % version_control): + url = '%s+%s' % (version_control, url) + break + + if '+' not in url: + raise InstallationError( + '{} is not a valid editable requirement. ' + 'It should either be a path to a local project or a VCS URL ' + '(beginning with svn+, git+, hg+, or bzr+).'.format(editable_req) + ) + + vc_type = url.split('+', 1)[0].lower() + + if not vcs.get_backend(vc_type): + error_message = 'For --editable=%s only ' % editable_req + \ + ', '.join([backend.name + '+URL' for backend in vcs.backends]) + \ + ' is currently supported' + raise InstallationError(error_message) + + package_name = Link(url).egg_fragment + if not package_name: + raise InstallationError( + "Could not detect requirement name for '%s', please specify one " + "with #egg=your_package_name" % editable_req + ) + return package_name, url, None + + +def deduce_helpful_msg(req): + # type: (str) -> str + """Returns helpful msg in case requirements file does not exist, + or cannot be parsed. + + :params req: Requirements file path + """ + msg = "" + if os.path.exists(req): + msg = " It does exist." + # Try to parse and check if it is a requirements file. + try: + with open(req, 'r') as fp: + # parse first line only + next(parse_requirements(fp.read())) + msg += " The argument you provided " + \ + "(%s) appears to be a" % (req) + \ + " requirements file. If that is the" + \ + " case, use the '-r' flag to install" + \ + " the packages specified within it." + except RequirementParseError: + logger.debug("Cannot parse '%s' as requirements \ + file" % (req), exc_info=True) + else: + msg += " File '%s' does not exist." % (req) + return msg + + +class RequirementParts(object): + def __init__( + self, + requirement, # type: Optional[Requirement] + link, # type: Optional[Link] + markers, # type: Optional[Marker] + extras, # type: Set[str] + ): + self.requirement = requirement + self.link = link + self.markers = markers + self.extras = extras + + +def parse_req_from_editable(editable_req): + # type: (str) -> RequirementParts + name, url, extras_override = parse_editable(editable_req) + + if name is not None: + try: + req = Requirement(name) + except InvalidRequirement: + raise InstallationError("Invalid requirement: '%s'" % name) + else: + req = None + + link = Link(url) + + return RequirementParts(req, link, None, extras_override) + + +# ---- The actual constructors follow ---- + + +def install_req_from_editable( + editable_req, # type: str + comes_from=None, # type: Optional[str] + use_pep517=None, # type: Optional[bool] + isolated=False, # type: bool + options=None, # type: Optional[Dict[str, Any]] + wheel_cache=None, # type: Optional[WheelCache] + constraint=False # type: bool +): + # type: (...) -> InstallRequirement + + parts = parse_req_from_editable(editable_req) + + source_dir = parts.link.file_path if parts.link.scheme == 'file' else None + + return InstallRequirement( + parts.requirement, comes_from, source_dir=source_dir, + editable=True, + link=parts.link, + constraint=constraint, + use_pep517=use_pep517, + isolated=isolated, + options=options if options else {}, + wheel_cache=wheel_cache, + extras=parts.extras, + ) + + +def _looks_like_path(name): + # type: (str) -> bool + """Checks whether the string "looks like" a path on the filesystem. + + This does not check whether the target actually exists, only judge from the + appearance. + + Returns true if any of the following conditions is true: + * a path separator is found (either os.path.sep or os.path.altsep); + * a dot is found (which represents the current directory). + """ + if os.path.sep in name: + return True + if os.path.altsep is not None and os.path.altsep in name: + return True + if name.startswith("."): + return True + return False + + +def _get_url_from_path(path, name): + # type: (str, str) -> str + """ + First, it checks whether a provided path is an installable directory + (e.g. it has a setup.py). If it is, returns the path. + + If false, check if the path is an archive file (such as a .whl). + The function checks if the path is a file. If false, if the path has + an @, it will treat it as a PEP 440 URL requirement and return the path. + """ + if _looks_like_path(name) and os.path.isdir(path): + if is_installable_dir(path): + return path_to_url(path) + raise InstallationError( + "Directory %r is not installable. Neither 'setup.py' " + "nor 'pyproject.toml' found." % name + ) + if not is_archive_file(path): + return None + if os.path.isfile(path): + return path_to_url(path) + urlreq_parts = name.split('@', 1) + if len(urlreq_parts) >= 2 and not _looks_like_path(urlreq_parts[0]): + # If the path contains '@' and the part before it does not look + # like a path, try to treat it as a PEP 440 URL req instead. + return None + logger.warning( + 'Requirement %r looks like a filename, but the ' + 'file does not exist', + name + ) + return path_to_url(path) + + +def parse_req_from_line(name, line_source): + # type: (str, Optional[str]) -> RequirementParts + if is_url(name): + marker_sep = '; ' + else: + marker_sep = ';' + if marker_sep in name: + name, markers_as_string = name.split(marker_sep, 1) + markers_as_string = markers_as_string.strip() + if not markers_as_string: + markers = None + else: + markers = Marker(markers_as_string) + else: + markers = None + name = name.strip() + req_as_string = None + path = os.path.normpath(os.path.abspath(name)) + link = None + extras_as_string = None + + if is_url(name): + link = Link(name) + else: + p, extras_as_string = _strip_extras(path) + url = _get_url_from_path(p, name) + if url is not None: + link = Link(url) + + # it's a local file, dir, or url + if link: + # Handle relative file URLs + if link.scheme == 'file' and re.search(r'\.\./', link.url): + link = Link( + path_to_url(os.path.normpath(os.path.abspath(link.path)))) + # wheel file + if link.is_wheel: + wheel = Wheel(link.filename) # can raise InvalidWheelFilename + req_as_string = "%s==%s" % (wheel.name, wheel.version) + else: + # set the req to the egg fragment. when it's not there, this + # will become an 'unnamed' requirement + req_as_string = link.egg_fragment + + # a requirement specifier + else: + req_as_string = name + + extras = convert_extras(extras_as_string) + + def with_source(text): + # type: (str) -> str + if not line_source: + return text + return '{} (from {})'.format(text, line_source) + + if req_as_string is not None: + try: + req = Requirement(req_as_string) + except InvalidRequirement: + if os.path.sep in req_as_string: + add_msg = "It looks like a path." + add_msg += deduce_helpful_msg(req_as_string) + elif ('=' in req_as_string and + not any(op in req_as_string for op in operators)): + add_msg = "= is not a valid operator. Did you mean == ?" + else: + add_msg = '' + msg = with_source( + 'Invalid requirement: {!r}'.format(req_as_string) + ) + if add_msg: + msg += '\nHint: {}'.format(add_msg) + raise InstallationError(msg) + else: + req = None + + return RequirementParts(req, link, markers, extras) + + +def install_req_from_line( + name, # type: str + comes_from=None, # type: Optional[Union[str, InstallRequirement]] + use_pep517=None, # type: Optional[bool] + isolated=False, # type: bool + options=None, # type: Optional[Dict[str, Any]] + wheel_cache=None, # type: Optional[WheelCache] + constraint=False, # type: bool + line_source=None, # type: Optional[str] +): + # type: (...) -> InstallRequirement + """Creates an InstallRequirement from a name, which might be a + requirement, directory containing 'setup.py', filename, or URL. + + :param line_source: An optional string describing where the line is from, + for logging purposes in case of an error. + """ + parts = parse_req_from_line(name, line_source) + + return InstallRequirement( + parts.requirement, comes_from, link=parts.link, markers=parts.markers, + use_pep517=use_pep517, isolated=isolated, + options=options if options else {}, + wheel_cache=wheel_cache, + constraint=constraint, + extras=parts.extras, + ) + + +def install_req_from_req_string( + req_string, # type: str + comes_from=None, # type: Optional[InstallRequirement] + isolated=False, # type: bool + wheel_cache=None, # type: Optional[WheelCache] + use_pep517=None # type: Optional[bool] +): + # type: (...) -> InstallRequirement + try: + req = Requirement(req_string) + except InvalidRequirement: + raise InstallationError("Invalid requirement: '%s'" % req_string) + + domains_not_allowed = [ + PyPI.file_storage_domain, + TestPyPI.file_storage_domain, + ] + if (req.url and comes_from and comes_from.link and + comes_from.link.netloc in domains_not_allowed): + # Explicitly disallow pypi packages that depend on external urls + raise InstallationError( + "Packages installed from PyPI cannot depend on packages " + "which are not also hosted on PyPI.\n" + "%s depends on %s " % (comes_from.name, req) + ) + + return InstallRequirement( + req, comes_from, isolated=isolated, wheel_cache=wheel_cache, + use_pep517=use_pep517 + ) diff --git a/venv.bak/lib/python3.7/site-packages/pip/_internal/req/req_file.py b/venv.bak/lib/python3.7/site-packages/pip/_internal/req/req_file.py new file mode 100644 index 0000000..8c78104 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_internal/req/req_file.py @@ -0,0 +1,546 @@ +""" +Requirements file parsing +""" + +# The following comment should be removed at some point in the future. +# mypy: strict-optional=False + +from __future__ import absolute_import + +import optparse +import os +import re +import shlex +import sys + +from pip._vendor.six.moves import filterfalse +from pip._vendor.six.moves.urllib import parse as urllib_parse + +from pip._internal.cli import cmdoptions +from pip._internal.exceptions import ( + InstallationError, + RequirementsFileParseError, +) +from pip._internal.models.search_scope import SearchScope +from pip._internal.req.constructors import ( + install_req_from_editable, + install_req_from_line, +) +from pip._internal.utils.encoding import auto_decode +from pip._internal.utils.typing import MYPY_CHECK_RUNNING +from pip._internal.utils.urls import get_url_scheme + +if MYPY_CHECK_RUNNING: + from optparse import Values + from typing import ( + Any, Callable, Iterator, List, NoReturn, Optional, Text, Tuple, + ) + + from pip._internal.req import InstallRequirement + from pip._internal.cache import WheelCache + from pip._internal.index.package_finder import PackageFinder + from pip._internal.network.session import PipSession + + ReqFileLines = Iterator[Tuple[int, Text]] + + LineParser = Callable[[Text], Tuple[str, Values]] + + +__all__ = ['parse_requirements'] + +SCHEME_RE = re.compile(r'^(http|https|file):', re.I) +COMMENT_RE = re.compile(r'(^|\s+)#.*$') + +# Matches environment variable-style values in '${MY_VARIABLE_1}' with the +# variable name consisting of only uppercase letters, digits or the '_' +# (underscore). This follows the POSIX standard defined in IEEE Std 1003.1, +# 2013 Edition. +ENV_VAR_RE = re.compile(r'(?P\$\{(?P[A-Z0-9_]+)\})') + +SUPPORTED_OPTIONS = [ + cmdoptions.index_url, + cmdoptions.extra_index_url, + cmdoptions.no_index, + cmdoptions.constraints, + cmdoptions.requirements, + cmdoptions.editable, + cmdoptions.find_links, + cmdoptions.no_binary, + cmdoptions.only_binary, + cmdoptions.require_hashes, + cmdoptions.pre, + cmdoptions.trusted_host, + cmdoptions.always_unzip, # Deprecated +] # type: List[Callable[..., optparse.Option]] + +# options to be passed to requirements +SUPPORTED_OPTIONS_REQ = [ + cmdoptions.install_options, + cmdoptions.global_options, + cmdoptions.hash, +] # type: List[Callable[..., optparse.Option]] + +# the 'dest' string values +SUPPORTED_OPTIONS_REQ_DEST = [str(o().dest) for o in SUPPORTED_OPTIONS_REQ] + + +class ParsedLine(object): + def __init__( + self, + filename, # type: str + lineno, # type: int + comes_from, # type: str + args, # type: str + opts, # type: Values + constraint, # type: bool + ): + # type: (...) -> None + self.filename = filename + self.lineno = lineno + self.comes_from = comes_from + self.args = args + self.opts = opts + self.constraint = constraint + + +def parse_requirements( + filename, # type: str + session, # type: PipSession + finder=None, # type: Optional[PackageFinder] + comes_from=None, # type: Optional[str] + options=None, # type: Optional[optparse.Values] + constraint=False, # type: bool + wheel_cache=None, # type: Optional[WheelCache] + use_pep517=None # type: Optional[bool] +): + # type: (...) -> Iterator[InstallRequirement] + """Parse a requirements file and yield InstallRequirement instances. + + :param filename: Path or url of requirements file. + :param session: PipSession instance. + :param finder: Instance of pip.index.PackageFinder. + :param comes_from: Origin description of requirements. + :param options: cli options. + :param constraint: If true, parsing a constraint file rather than + requirements file. + :param wheel_cache: Instance of pip.wheel.WheelCache + :param use_pep517: Value of the --use-pep517 option. + """ + skip_requirements_regex = ( + options.skip_requirements_regex if options else None + ) + line_parser = get_line_parser(finder) + parser = RequirementsFileParser( + session, line_parser, comes_from, skip_requirements_regex + ) + + for parsed_line in parser.parse(filename, constraint): + req = handle_line( + parsed_line, finder, options, session, wheel_cache, use_pep517 + ) + if req is not None: + yield req + + +def preprocess(content, skip_requirements_regex): + # type: (Text, Optional[str]) -> ReqFileLines + """Split, filter, and join lines, and return a line iterator + + :param content: the content of the requirements file + :param options: cli options + """ + lines_enum = enumerate(content.splitlines(), start=1) # type: ReqFileLines + lines_enum = join_lines(lines_enum) + lines_enum = ignore_comments(lines_enum) + if skip_requirements_regex: + lines_enum = skip_regex(lines_enum, skip_requirements_regex) + lines_enum = expand_env_variables(lines_enum) + return lines_enum + + +def handle_line( + line, # type: ParsedLine + finder=None, # type: Optional[PackageFinder] + options=None, # type: Optional[optparse.Values] + session=None, # type: Optional[PipSession] + wheel_cache=None, # type: Optional[WheelCache] + use_pep517=None, # type: Optional[bool] +): + # type: (...) -> Optional[InstallRequirement] + """Handle a single parsed requirements line; This can result in + creating/yielding requirements, or updating the finder. + + For lines that contain requirements, the only options that have an effect + are from SUPPORTED_OPTIONS_REQ, and they are scoped to the + requirement. Other options from SUPPORTED_OPTIONS may be present, but are + ignored. + + For lines that do not contain requirements, the only options that have an + effect are from SUPPORTED_OPTIONS. Options from SUPPORTED_OPTIONS_REQ may + be present, but are ignored. These lines may contain multiple options + (although our docs imply only one is supported), and all our parsed and + affect the finder. + """ + + # preserve for the nested code path + line_comes_from = '%s %s (line %s)' % ( + '-c' if line.constraint else '-r', line.filename, line.lineno, + ) + + # return a line requirement + if line.args: + isolated = options.isolated_mode if options else False + if options: + cmdoptions.check_install_build_global(options, line.opts) + # get the options that apply to requirements + req_options = {} + for dest in SUPPORTED_OPTIONS_REQ_DEST: + if dest in line.opts.__dict__ and line.opts.__dict__[dest]: + req_options[dest] = line.opts.__dict__[dest] + line_source = 'line {} of {}'.format(line.lineno, line.filename) + return install_req_from_line( + line.args, + comes_from=line_comes_from, + use_pep517=use_pep517, + isolated=isolated, + options=req_options, + wheel_cache=wheel_cache, + constraint=line.constraint, + line_source=line_source, + ) + + # return an editable requirement + elif line.opts.editables: + isolated = options.isolated_mode if options else False + return install_req_from_editable( + line.opts.editables[0], comes_from=line_comes_from, + use_pep517=use_pep517, + constraint=line.constraint, isolated=isolated, + wheel_cache=wheel_cache + ) + + # percolate hash-checking option upward + elif line.opts.require_hashes: + options.require_hashes = line.opts.require_hashes + + # set finder options + elif finder: + find_links = finder.find_links + index_urls = finder.index_urls + if line.opts.index_url: + index_urls = [line.opts.index_url] + if line.opts.no_index is True: + index_urls = [] + if line.opts.extra_index_urls: + index_urls.extend(line.opts.extra_index_urls) + if line.opts.find_links: + # FIXME: it would be nice to keep track of the source + # of the find_links: support a find-links local path + # relative to a requirements file. + value = line.opts.find_links[0] + req_dir = os.path.dirname(os.path.abspath(line.filename)) + relative_to_reqs_file = os.path.join(req_dir, value) + if os.path.exists(relative_to_reqs_file): + value = relative_to_reqs_file + find_links.append(value) + + search_scope = SearchScope( + find_links=find_links, + index_urls=index_urls, + ) + finder.search_scope = search_scope + + if line.opts.pre: + finder.set_allow_all_prereleases() + + if session: + for host in line.opts.trusted_hosts or []: + source = 'line {} of {}'.format(line.lineno, line.filename) + session.add_trusted_host(host, source=source) + + return None + + +class RequirementsFileParser(object): + def __init__( + self, + session, # type: PipSession + line_parser, # type: LineParser + comes_from, # type: str + skip_requirements_regex, # type: Optional[str] + ): + # type: (...) -> None + self._session = session + self._line_parser = line_parser + self._comes_from = comes_from + self._skip_requirements_regex = skip_requirements_regex + + def parse(self, filename, constraint): + # type: (str, bool) -> Iterator[ParsedLine] + """Parse a given file, yielding parsed lines. + """ + for line in self._parse_and_recurse(filename, constraint): + yield line + + def _parse_and_recurse(self, filename, constraint): + # type: (str, bool) -> Iterator[ParsedLine] + for line in self._parse_file(filename, constraint): + if ( + not line.args and + not line.opts.editables and + (line.opts.requirements or line.opts.constraints) + ): + # parse a nested requirements file + if line.opts.requirements: + req_path = line.opts.requirements[0] + nested_constraint = False + else: + req_path = line.opts.constraints[0] + nested_constraint = True + + # original file is over http + if SCHEME_RE.search(filename): + # do a url join so relative paths work + req_path = urllib_parse.urljoin(filename, req_path) + # original file and nested file are paths + elif not SCHEME_RE.search(req_path): + # do a join so relative paths work + req_path = os.path.join( + os.path.dirname(filename), req_path, + ) + + for inner_line in self._parse_and_recurse( + req_path, nested_constraint, + ): + yield inner_line + else: + yield line + + def _parse_file(self, filename, constraint): + # type: (str, bool) -> Iterator[ParsedLine] + _, content = get_file_content( + filename, self._session, comes_from=self._comes_from + ) + + lines_enum = preprocess(content, self._skip_requirements_regex) + + for line_number, line in lines_enum: + try: + args_str, opts = self._line_parser(line) + except OptionParsingError as e: + # add offending line + msg = 'Invalid requirement: %s\n%s' % (line, e.msg) + raise RequirementsFileParseError(msg) + + yield ParsedLine( + filename, + line_number, + self._comes_from, + args_str, + opts, + constraint, + ) + + +def get_line_parser(finder): + # type: (Optional[PackageFinder]) -> LineParser + def parse_line(line): + # type: (Text) -> Tuple[str, Values] + # Build new parser for each line since it accumulates appendable + # options. + parser = build_parser() + defaults = parser.get_default_values() + defaults.index_url = None + if finder: + defaults.format_control = finder.format_control + + args_str, options_str = break_args_options(line) + # Prior to 2.7.3, shlex cannot deal with unicode entries + if sys.version_info < (2, 7, 3): + # https://github.com/python/mypy/issues/1174 + options_str = options_str.encode('utf8') # type: ignore + + # https://github.com/python/mypy/issues/1174 + opts, _ = parser.parse_args( + shlex.split(options_str), defaults) # type: ignore + + return args_str, opts + + return parse_line + + +def break_args_options(line): + # type: (Text) -> Tuple[str, Text] + """Break up the line into an args and options string. We only want to shlex + (and then optparse) the options, not the args. args can contain markers + which are corrupted by shlex. + """ + tokens = line.split(' ') + args = [] + options = tokens[:] + for token in tokens: + if token.startswith('-') or token.startswith('--'): + break + else: + args.append(token) + options.pop(0) + return ' '.join(args), ' '.join(options) # type: ignore + + +class OptionParsingError(Exception): + def __init__(self, msg): + # type: (str) -> None + self.msg = msg + + +def build_parser(): + # type: () -> optparse.OptionParser + """ + Return a parser for parsing requirement lines + """ + parser = optparse.OptionParser(add_help_option=False) + + option_factories = SUPPORTED_OPTIONS + SUPPORTED_OPTIONS_REQ + for option_factory in option_factories: + option = option_factory() + parser.add_option(option) + + # By default optparse sys.exits on parsing errors. We want to wrap + # that in our own exception. + def parser_exit(self, msg): + # type: (Any, str) -> NoReturn + raise OptionParsingError(msg) + # NOTE: mypy disallows assigning to a method + # https://github.com/python/mypy/issues/2427 + parser.exit = parser_exit # type: ignore + + return parser + + +def join_lines(lines_enum): + # type: (ReqFileLines) -> ReqFileLines + """Joins a line ending in '\' with the previous line (except when following + comments). The joined line takes on the index of the first line. + """ + primary_line_number = None + new_line = [] # type: List[Text] + for line_number, line in lines_enum: + if not line.endswith('\\') or COMMENT_RE.match(line): + if COMMENT_RE.match(line): + # this ensures comments are always matched later + line = ' ' + line + if new_line: + new_line.append(line) + yield primary_line_number, ''.join(new_line) + new_line = [] + else: + yield line_number, line + else: + if not new_line: + primary_line_number = line_number + new_line.append(line.strip('\\')) + + # last line contains \ + if new_line: + yield primary_line_number, ''.join(new_line) + + # TODO: handle space after '\'. + + +def ignore_comments(lines_enum): + # type: (ReqFileLines) -> ReqFileLines + """ + Strips comments and filter empty lines. + """ + for line_number, line in lines_enum: + line = COMMENT_RE.sub('', line) + line = line.strip() + if line: + yield line_number, line + + +def skip_regex(lines_enum, pattern): + # type: (ReqFileLines, str) -> ReqFileLines + """ + Skip lines that match the provided pattern + + Note: the regex pattern is only built once + """ + matcher = re.compile(pattern) + lines_enum = filterfalse(lambda e: matcher.search(e[1]), lines_enum) + return lines_enum + + +def expand_env_variables(lines_enum): + # type: (ReqFileLines) -> ReqFileLines + """Replace all environment variables that can be retrieved via `os.getenv`. + + The only allowed format for environment variables defined in the + requirement file is `${MY_VARIABLE_1}` to ensure two things: + + 1. Strings that contain a `$` aren't accidentally (partially) expanded. + 2. Ensure consistency across platforms for requirement files. + + These points are the result of a discussion on the `github pull + request #3514 `_. + + Valid characters in variable names follow the `POSIX standard + `_ and are limited + to uppercase letter, digits and the `_` (underscore). + """ + for line_number, line in lines_enum: + for env_var, var_name in ENV_VAR_RE.findall(line): + value = os.getenv(var_name) + if not value: + continue + + line = line.replace(env_var, value) + + yield line_number, line + + +def get_file_content(url, session, comes_from=None): + # type: (str, PipSession, Optional[str]) -> Tuple[str, Text] + """Gets the content of a file; it may be a filename, file: URL, or + http: URL. Returns (location, content). Content is unicode. + Respects # -*- coding: declarations on the retrieved files. + + :param url: File path or url. + :param session: PipSession instance. + :param comes_from: Origin description of requirements. + """ + scheme = get_url_scheme(url) + + if scheme in ['http', 'https']: + # FIXME: catch some errors + resp = session.get(url) + resp.raise_for_status() + return resp.url, resp.text + + elif scheme == 'file': + if comes_from and comes_from.startswith('http'): + raise InstallationError( + 'Requirements file %s references URL %s, which is local' + % (comes_from, url)) + + path = url.split(':', 1)[1] + path = path.replace('\\', '/') + match = _url_slash_drive_re.match(path) + if match: + path = match.group(1) + ':' + path.split('|', 1)[1] + path = urllib_parse.unquote(path) + if path.startswith('/'): + path = '/' + path.lstrip('/') + url = path + + try: + with open(url, 'rb') as f: + content = auto_decode(f.read()) + except IOError as exc: + raise InstallationError( + 'Could not open requirements file: %s' % str(exc) + ) + return url, content + + +_url_slash_drive_re = re.compile(r'/*([a-z])\|', re.I) diff --git a/venv.bak/lib/python3.7/site-packages/pip/_internal/req/req_install.py b/venv.bak/lib/python3.7/site-packages/pip/_internal/req/req_install.py new file mode 100644 index 0000000..22ac24b --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_internal/req/req_install.py @@ -0,0 +1,830 @@ +# The following comment should be removed at some point in the future. +# mypy: strict-optional=False + +from __future__ import absolute_import + +import logging +import os +import shutil +import sys +import zipfile + +from pip._vendor import pkg_resources, six +from pip._vendor.packaging.requirements import Requirement +from pip._vendor.packaging.utils import canonicalize_name +from pip._vendor.packaging.version import Version +from pip._vendor.packaging.version import parse as parse_version +from pip._vendor.pep517.wrappers import Pep517HookCaller + +from pip._internal import pep425tags +from pip._internal.build_env import NoOpBuildEnvironment +from pip._internal.exceptions import InstallationError +from pip._internal.locations import get_scheme +from pip._internal.models.link import Link +from pip._internal.operations.build.metadata import generate_metadata +from pip._internal.operations.build.metadata_legacy import \ + generate_metadata as generate_metadata_legacy +from pip._internal.operations.install.editable_legacy import \ + install_editable as install_editable_legacy +from pip._internal.operations.install.legacy import install as install_legacy +from pip._internal.operations.install.wheel import install_wheel +from pip._internal.pyproject import load_pyproject_toml, make_pyproject_path +from pip._internal.req.req_uninstall import UninstallPathSet +from pip._internal.utils.deprecation import deprecated +from pip._internal.utils.hashes import Hashes +from pip._internal.utils.logging import indent_log +from pip._internal.utils.marker_files import ( + PIP_DELETE_MARKER_FILENAME, + has_delete_marker_file, + write_delete_marker_file, +) +from pip._internal.utils.misc import ( + ask_path_exists, + backup_dir, + display_path, + dist_in_site_packages, + dist_in_usersite, + get_installed_version, + hide_url, + redact_auth_from_url, + rmtree, +) +from pip._internal.utils.packaging import get_metadata +from pip._internal.utils.temp_dir import TempDirectory +from pip._internal.utils.typing import MYPY_CHECK_RUNNING +from pip._internal.utils.virtualenv import running_under_virtualenv +from pip._internal.vcs import vcs + +if MYPY_CHECK_RUNNING: + from typing import ( + Any, Dict, Iterable, List, Optional, Sequence, Union, + ) + from pip._internal.build_env import BuildEnvironment + from pip._internal.cache import WheelCache + from pip._internal.index.package_finder import PackageFinder + from pip._vendor.pkg_resources import Distribution + from pip._vendor.packaging.specifiers import SpecifierSet + from pip._vendor.packaging.markers import Marker + + +logger = logging.getLogger(__name__) + + +def _get_dist(metadata_directory): + # type: (str) -> Distribution + """Return a pkg_resources.Distribution for the provided + metadata directory. + """ + dist_dir = metadata_directory.rstrip(os.sep) + + # Determine the correct Distribution object type. + if dist_dir.endswith(".egg-info"): + dist_cls = pkg_resources.Distribution + else: + assert dist_dir.endswith(".dist-info") + dist_cls = pkg_resources.DistInfoDistribution + + # Build a PathMetadata object, from path to metadata. :wink: + base_dir, dist_dir_name = os.path.split(dist_dir) + dist_name = os.path.splitext(dist_dir_name)[0] + metadata = pkg_resources.PathMetadata(base_dir, dist_dir) + + return dist_cls( + base_dir, + project_name=dist_name, + metadata=metadata, + ) + + +class InstallRequirement(object): + """ + Represents something that may be installed later on, may have information + about where to fetch the relevant requirement and also contains logic for + installing the said requirement. + """ + + def __init__( + self, + req, # type: Optional[Requirement] + comes_from, # type: Optional[Union[str, InstallRequirement]] + source_dir=None, # type: Optional[str] + editable=False, # type: bool + link=None, # type: Optional[Link] + markers=None, # type: Optional[Marker] + use_pep517=None, # type: Optional[bool] + isolated=False, # type: bool + options=None, # type: Optional[Dict[str, Any]] + wheel_cache=None, # type: Optional[WheelCache] + constraint=False, # type: bool + extras=() # type: Iterable[str] + ): + # type: (...) -> None + assert req is None or isinstance(req, Requirement), req + self.req = req + self.comes_from = comes_from + self.constraint = constraint + if source_dir is None: + self.source_dir = None # type: Optional[str] + else: + self.source_dir = os.path.normpath(os.path.abspath(source_dir)) + self.editable = editable + + self._wheel_cache = wheel_cache + if link is None and req and req.url: + # PEP 508 URL requirement + link = Link(req.url) + self.link = self.original_link = link + # Path to any downloaded or already-existing package. + self.local_file_path = None # type: Optional[str] + if self.link and self.link.is_file: + self.local_file_path = self.link.file_path + + if extras: + self.extras = extras + elif req: + self.extras = { + pkg_resources.safe_extra(extra) for extra in req.extras + } + else: + self.extras = set() + if markers is None and req: + markers = req.marker + self.markers = markers + + # This holds the pkg_resources.Distribution object if this requirement + # is already available: + self.satisfied_by = None # type: Optional[Distribution] + # Whether the installation process should try to uninstall an existing + # distribution before installing this requirement. + self.should_reinstall = False + # Temporary build location + self._temp_build_dir = None # type: Optional[TempDirectory] + # Set to True after successful installation + self.install_succeeded = None # type: Optional[bool] + self.options = options if options else {} + # Set to True after successful preparation of this requirement + self.prepared = False + self.is_direct = False + + self.isolated = isolated + self.build_env = NoOpBuildEnvironment() # type: BuildEnvironment + + # For PEP 517, the directory where we request the project metadata + # gets stored. We need this to pass to build_wheel, so the backend + # can ensure that the wheel matches the metadata (see the PEP for + # details). + self.metadata_directory = None # type: Optional[str] + + # The static build requirements (from pyproject.toml) + self.pyproject_requires = None # type: Optional[List[str]] + + # Build requirements that we will check are available + self.requirements_to_check = [] # type: List[str] + + # The PEP 517 backend we should use to build the project + self.pep517_backend = None # type: Optional[Pep517HookCaller] + + # Are we using PEP 517 for this requirement? + # After pyproject.toml has been loaded, the only valid values are True + # and False. Before loading, None is valid (meaning "use the default"). + # Setting an explicit value before loading pyproject.toml is supported, + # but after loading this flag should be treated as read only. + self.use_pep517 = use_pep517 + + def __str__(self): + # type: () -> str + if self.req: + s = str(self.req) + if self.link: + s += ' from %s' % redact_auth_from_url(self.link.url) + elif self.link: + s = redact_auth_from_url(self.link.url) + else: + s = '' + if self.satisfied_by is not None: + s += ' in %s' % display_path(self.satisfied_by.location) + if self.comes_from: + if isinstance(self.comes_from, six.string_types): + comes_from = self.comes_from # type: Optional[str] + else: + comes_from = self.comes_from.from_path() + if comes_from: + s += ' (from %s)' % comes_from + return s + + def __repr__(self): + # type: () -> str + return '<%s object: %s editable=%r>' % ( + self.__class__.__name__, str(self), self.editable) + + def format_debug(self): + # type: () -> str + """An un-tested helper for getting state, for debugging. + """ + attributes = vars(self) + names = sorted(attributes) + + state = ( + "{}={!r}".format(attr, attributes[attr]) for attr in sorted(names) + ) + return '<{name} object: {{{state}}}>'.format( + name=self.__class__.__name__, + state=", ".join(state), + ) + + def populate_link(self, finder, upgrade, require_hashes): + # type: (PackageFinder, bool, bool) -> None + """Ensure that if a link can be found for this, that it is found. + + Note that self.link may still be None - if Upgrade is False and the + requirement is already installed. + + If require_hashes is True, don't use the wheel cache, because cached + wheels, always built locally, have different hashes than the files + downloaded from the index server and thus throw false hash mismatches. + Furthermore, cached wheels at present have undeterministic contents due + to file modification times. + """ + if self.link is None: + self.link = finder.find_requirement(self, upgrade) + if self._wheel_cache is not None and not require_hashes: + old_link = self.link + supported_tags = pep425tags.get_supported() + self.link = self._wheel_cache.get( + link=self.link, + package_name=self.name, + supported_tags=supported_tags, + ) + if old_link != self.link: + logger.debug('Using cached wheel link: %s', self.link) + + # Things that are valid for all kinds of requirements? + @property + def name(self): + # type: () -> Optional[str] + if self.req is None: + return None + return six.ensure_str(pkg_resources.safe_name(self.req.name)) + + @property + def specifier(self): + # type: () -> SpecifierSet + return self.req.specifier + + @property + def is_pinned(self): + # type: () -> bool + """Return whether I am pinned to an exact version. + + For example, some-package==1.2 is pinned; some-package>1.2 is not. + """ + specifiers = self.specifier + return (len(specifiers) == 1 and + next(iter(specifiers)).operator in {'==', '==='}) + + @property + def installed_version(self): + # type: () -> Optional[str] + return get_installed_version(self.name) + + def match_markers(self, extras_requested=None): + # type: (Optional[Iterable[str]]) -> bool + if not extras_requested: + # Provide an extra to safely evaluate the markers + # without matching any extra + extras_requested = ('',) + if self.markers is not None: + return any( + self.markers.evaluate({'extra': extra}) + for extra in extras_requested) + else: + return True + + @property + def has_hash_options(self): + # type: () -> bool + """Return whether any known-good hashes are specified as options. + + These activate --require-hashes mode; hashes specified as part of a + URL do not. + + """ + return bool(self.options.get('hashes', {})) + + def hashes(self, trust_internet=True): + # type: (bool) -> Hashes + """Return a hash-comparer that considers my option- and URL-based + hashes to be known-good. + + Hashes in URLs--ones embedded in the requirements file, not ones + downloaded from an index server--are almost peers with ones from + flags. They satisfy --require-hashes (whether it was implicitly or + explicitly activated) but do not activate it. md5 and sha224 are not + allowed in flags, which should nudge people toward good algos. We + always OR all hashes together, even ones from URLs. + + :param trust_internet: Whether to trust URL-based (#md5=...) hashes + downloaded from the internet, as by populate_link() + + """ + good_hashes = self.options.get('hashes', {}).copy() + link = self.link if trust_internet else self.original_link + if link and link.hash: + good_hashes.setdefault(link.hash_name, []).append(link.hash) + return Hashes(good_hashes) + + def from_path(self): + # type: () -> Optional[str] + """Format a nice indicator to show where this "comes from" + """ + if self.req is None: + return None + s = str(self.req) + if self.comes_from: + if isinstance(self.comes_from, six.string_types): + comes_from = self.comes_from + else: + comes_from = self.comes_from.from_path() + if comes_from: + s += '->' + comes_from + return s + + def ensure_build_location(self, build_dir): + # type: (str) -> str + assert build_dir is not None + if self._temp_build_dir is not None: + assert self._temp_build_dir.path + return self._temp_build_dir.path + if self.req is None: + # Some systems have /tmp as a symlink which confuses custom + # builds (such as numpy). Thus, we ensure that the real path + # is returned. + self._temp_build_dir = TempDirectory(kind="req-build") + + return self._temp_build_dir.path + if self.editable: + name = self.name.lower() + else: + name = self.name + # FIXME: Is there a better place to create the build_dir? (hg and bzr + # need this) + if not os.path.exists(build_dir): + logger.debug('Creating directory %s', build_dir) + os.makedirs(build_dir) + write_delete_marker_file(build_dir) + return os.path.join(build_dir, name) + + def _set_requirement(self): + # type: () -> None + """Set requirement after generating metadata. + """ + assert self.req is None + assert self.metadata is not None + assert self.source_dir is not None + + # Construct a Requirement object from the generated metadata + if isinstance(parse_version(self.metadata["Version"]), Version): + op = "==" + else: + op = "===" + + self.req = Requirement( + "".join([ + self.metadata["Name"], + op, + self.metadata["Version"], + ]) + ) + + def warn_on_mismatching_name(self): + # type: () -> None + metadata_name = canonicalize_name(self.metadata["Name"]) + if canonicalize_name(self.req.name) == metadata_name: + # Everything is fine. + return + + # If we're here, there's a mismatch. Log a warning about it. + logger.warning( + 'Generating metadata for package %s ' + 'produced metadata for project name %s. Fix your ' + '#egg=%s fragments.', + self.name, metadata_name, self.name + ) + self.req = Requirement(metadata_name) + + def remove_temporary_source(self): + # type: () -> None + """Remove the source files from this requirement, if they are marked + for deletion""" + if self.source_dir and has_delete_marker_file(self.source_dir): + logger.debug('Removing source in %s', self.source_dir) + rmtree(self.source_dir) + self.source_dir = None + if self._temp_build_dir: + self._temp_build_dir.cleanup() + self._temp_build_dir = None + self.build_env.cleanup() + + def check_if_exists(self, use_user_site): + # type: (bool) -> None + """Find an installed distribution that satisfies or conflicts + with this requirement, and set self.satisfied_by or + self.should_reinstall appropriately. + """ + if self.req is None: + return + # get_distribution() will resolve the entire list of requirements + # anyway, and we've already determined that we need the requirement + # in question, so strip the marker so that we don't try to + # evaluate it. + no_marker = Requirement(str(self.req)) + no_marker.marker = None + try: + self.satisfied_by = pkg_resources.get_distribution(str(no_marker)) + except pkg_resources.DistributionNotFound: + return + except pkg_resources.VersionConflict: + existing_dist = pkg_resources.get_distribution( + self.req.name + ) + if use_user_site: + if dist_in_usersite(existing_dist): + self.should_reinstall = True + elif (running_under_virtualenv() and + dist_in_site_packages(existing_dist)): + raise InstallationError( + "Will not install to the user site because it will " + "lack sys.path precedence to %s in %s" % + (existing_dist.project_name, existing_dist.location) + ) + else: + self.should_reinstall = True + else: + if self.editable and self.satisfied_by: + self.should_reinstall = True + # when installing editables, nothing pre-existing should ever + # satisfy + self.satisfied_by = None + + # Things valid for wheels + @property + def is_wheel(self): + # type: () -> bool + if not self.link: + return False + return self.link.is_wheel + + # Things valid for sdists + @property + def unpacked_source_directory(self): + # type: () -> str + return os.path.join( + self.source_dir, + self.link and self.link.subdirectory_fragment or '') + + @property + def setup_py_path(self): + # type: () -> str + assert self.source_dir, "No source dir for %s" % self + setup_py = os.path.join(self.unpacked_source_directory, 'setup.py') + + # Python2 __file__ should not be unicode + if six.PY2 and isinstance(setup_py, six.text_type): + setup_py = setup_py.encode(sys.getfilesystemencoding()) + + return setup_py + + @property + def pyproject_toml_path(self): + # type: () -> str + assert self.source_dir, "No source dir for %s" % self + return make_pyproject_path(self.unpacked_source_directory) + + def load_pyproject_toml(self): + # type: () -> None + """Load the pyproject.toml file. + + After calling this routine, all of the attributes related to PEP 517 + processing for this requirement have been set. In particular, the + use_pep517 attribute can be used to determine whether we should + follow the PEP 517 or legacy (setup.py) code path. + """ + pyproject_toml_data = load_pyproject_toml( + self.use_pep517, + self.pyproject_toml_path, + self.setup_py_path, + str(self) + ) + + if pyproject_toml_data is None: + self.use_pep517 = False + return + + self.use_pep517 = True + requires, backend, check, backend_path = pyproject_toml_data + self.requirements_to_check = check + self.pyproject_requires = requires + self.pep517_backend = Pep517HookCaller( + self.unpacked_source_directory, backend, backend_path=backend_path, + ) + + def _generate_metadata(self): + # type: () -> str + """Invokes metadata generator functions, with the required arguments. + """ + if not self.use_pep517: + assert self.unpacked_source_directory + + return generate_metadata_legacy( + build_env=self.build_env, + setup_py_path=self.setup_py_path, + source_dir=self.unpacked_source_directory, + editable=self.editable, + isolated=self.isolated, + details=self.name or "from {}".format(self.link) + ) + + assert self.pep517_backend is not None + + return generate_metadata( + build_env=self.build_env, + backend=self.pep517_backend, + ) + + def prepare_metadata(self): + # type: () -> None + """Ensure that project metadata is available. + + Under PEP 517, call the backend hook to prepare the metadata. + Under legacy processing, call setup.py egg-info. + """ + assert self.source_dir + + with indent_log(): + self.metadata_directory = self._generate_metadata() + + # Act on the newly generated metadata, based on the name and version. + if not self.name: + self._set_requirement() + else: + self.warn_on_mismatching_name() + + self.assert_source_matches_version() + + @property + def metadata(self): + # type: () -> Any + if not hasattr(self, '_metadata'): + self._metadata = get_metadata(self.get_dist()) + + return self._metadata + + def get_dist(self): + # type: () -> Distribution + return _get_dist(self.metadata_directory) + + def assert_source_matches_version(self): + # type: () -> None + assert self.source_dir + version = self.metadata['version'] + if self.req.specifier and version not in self.req.specifier: + logger.warning( + 'Requested %s, but installing version %s', + self, + version, + ) + else: + logger.debug( + 'Source in %s has version %s, which satisfies requirement %s', + display_path(self.source_dir), + version, + self, + ) + + # For both source distributions and editables + def ensure_has_source_dir(self, parent_dir): + # type: (str) -> None + """Ensure that a source_dir is set. + + This will create a temporary build dir if the name of the requirement + isn't known yet. + + :param parent_dir: The ideal pip parent_dir for the source_dir. + Generally src_dir for editables and build_dir for sdists. + :return: self.source_dir + """ + if self.source_dir is None: + self.source_dir = self.ensure_build_location(parent_dir) + + # For editable installations + def update_editable(self, obtain=True): + # type: (bool) -> None + if not self.link: + logger.debug( + "Cannot update repository at %s; repository location is " + "unknown", + self.source_dir, + ) + return + assert self.editable + assert self.source_dir + if self.link.scheme == 'file': + # Static paths don't get updated + return + assert '+' in self.link.url, "bad url: %r" % self.link.url + vc_type, url = self.link.url.split('+', 1) + vcs_backend = vcs.get_backend(vc_type) + if vcs_backend: + if not self.link.is_vcs: + reason = ( + "This form of VCS requirement is being deprecated: {}." + ).format( + self.link.url + ) + replacement = None + if self.link.url.startswith("git+git@"): + replacement = ( + "git+https://git@example.com/..., " + "git+ssh://git@example.com/..., " + "or the insecure git+git://git@example.com/..." + ) + deprecated(reason, replacement, gone_in="21.0", issue=7554) + hidden_url = hide_url(self.link.url) + if obtain: + vcs_backend.obtain(self.source_dir, url=hidden_url) + else: + vcs_backend.export(self.source_dir, url=hidden_url) + else: + assert 0, ( + 'Unexpected version control type (in %s): %s' + % (self.link, vc_type)) + + # Top-level Actions + def uninstall(self, auto_confirm=False, verbose=False): + # type: (bool, bool) -> Optional[UninstallPathSet] + """ + Uninstall the distribution currently satisfying this requirement. + + Prompts before removing or modifying files unless + ``auto_confirm`` is True. + + Refuses to delete or modify files outside of ``sys.prefix`` - + thus uninstallation within a virtual environment can only + modify that virtual environment, even if the virtualenv is + linked to global site-packages. + + """ + assert self.req + try: + dist = pkg_resources.get_distribution(self.req.name) + except pkg_resources.DistributionNotFound: + logger.warning("Skipping %s as it is not installed.", self.name) + return None + else: + logger.info('Found existing installation: %s', dist) + + uninstalled_pathset = UninstallPathSet.from_dist(dist) + uninstalled_pathset.remove(auto_confirm, verbose) + return uninstalled_pathset + + def _get_archive_name(self, path, parentdir, rootdir): + # type: (str, str, str) -> str + + def _clean_zip_name(name, prefix): + # type: (str, str) -> str + assert name.startswith(prefix + os.path.sep), ( + "name %r doesn't start with prefix %r" % (name, prefix) + ) + name = name[len(prefix) + 1:] + name = name.replace(os.path.sep, '/') + return name + + path = os.path.join(parentdir, path) + name = _clean_zip_name(path, rootdir) + return self.name + '/' + name + + def archive(self, build_dir): + # type: (str) -> None + """Saves archive to provided build_dir. + + Used for saving downloaded VCS requirements as part of `pip download`. + """ + assert self.source_dir + + create_archive = True + archive_name = '%s-%s.zip' % (self.name, self.metadata["version"]) + archive_path = os.path.join(build_dir, archive_name) + + if os.path.exists(archive_path): + response = ask_path_exists( + 'The file %s exists. (i)gnore, (w)ipe, (b)ackup, (a)bort ' % + display_path(archive_path), ('i', 'w', 'b', 'a')) + if response == 'i': + create_archive = False + elif response == 'w': + logger.warning('Deleting %s', display_path(archive_path)) + os.remove(archive_path) + elif response == 'b': + dest_file = backup_dir(archive_path) + logger.warning( + 'Backing up %s to %s', + display_path(archive_path), + display_path(dest_file), + ) + shutil.move(archive_path, dest_file) + elif response == 'a': + sys.exit(-1) + + if not create_archive: + return + + zip_output = zipfile.ZipFile( + archive_path, 'w', zipfile.ZIP_DEFLATED, allowZip64=True, + ) + with zip_output: + dir = os.path.normcase( + os.path.abspath(self.unpacked_source_directory) + ) + for dirpath, dirnames, filenames in os.walk(dir): + if 'pip-egg-info' in dirnames: + dirnames.remove('pip-egg-info') + for dirname in dirnames: + dir_arcname = self._get_archive_name( + dirname, parentdir=dirpath, rootdir=dir, + ) + zipdir = zipfile.ZipInfo(dir_arcname + '/') + zipdir.external_attr = 0x1ED << 16 # 0o755 + zip_output.writestr(zipdir, '') + for filename in filenames: + if filename == PIP_DELETE_MARKER_FILENAME: + continue + file_arcname = self._get_archive_name( + filename, parentdir=dirpath, rootdir=dir, + ) + filename = os.path.join(dirpath, filename) + zip_output.write(filename, file_arcname) + + logger.info('Saved %s', display_path(archive_path)) + + def install( + self, + install_options, # type: List[str] + global_options=None, # type: Optional[Sequence[str]] + root=None, # type: Optional[str] + home=None, # type: Optional[str] + prefix=None, # type: Optional[str] + warn_script_location=True, # type: bool + use_user_site=False, # type: bool + pycompile=True # type: bool + ): + # type: (...) -> None + scheme = get_scheme( + self.name, + user=use_user_site, + home=home, + root=root, + isolated=self.isolated, + prefix=prefix, + ) + + global_options = global_options if global_options is not None else [] + if self.editable: + install_editable_legacy( + install_options, + global_options, + prefix=prefix, + home=home, + use_user_site=use_user_site, + name=self.name, + setup_py_path=self.setup_py_path, + isolated=self.isolated, + build_env=self.build_env, + unpacked_source_directory=self.unpacked_source_directory, + ) + self.install_succeeded = True + return + + if self.is_wheel: + assert self.local_file_path + install_wheel( + self.name, + self.local_file_path, + scheme=scheme, + req_description=str(self.req), + pycompile=pycompile, + warn_script_location=warn_script_location, + ) + self.install_succeeded = True + return + + install_legacy( + self, + install_options=install_options, + global_options=global_options, + root=root, + home=home, + prefix=prefix, + use_user_site=use_user_site, + pycompile=pycompile, + scheme=scheme, + ) diff --git a/venv.bak/lib/python3.7/site-packages/pip/_internal/req/req_set.py b/venv.bak/lib/python3.7/site-packages/pip/_internal/req/req_set.py new file mode 100644 index 0000000..087ac59 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_internal/req/req_set.py @@ -0,0 +1,209 @@ +# The following comment should be removed at some point in the future. +# mypy: strict-optional=False + +from __future__ import absolute_import + +import logging +from collections import OrderedDict + +from pip._vendor.packaging.utils import canonicalize_name + +from pip._internal import pep425tags +from pip._internal.exceptions import InstallationError +from pip._internal.models.wheel import Wheel +from pip._internal.utils.logging import indent_log +from pip._internal.utils.typing import MYPY_CHECK_RUNNING + +if MYPY_CHECK_RUNNING: + from typing import Dict, Iterable, List, Optional, Tuple + from pip._internal.req.req_install import InstallRequirement + + +logger = logging.getLogger(__name__) + + +class RequirementSet(object): + + def __init__(self, check_supported_wheels=True): + # type: (bool) -> None + """Create a RequirementSet. + """ + + self.requirements = OrderedDict() # type: Dict[str, InstallRequirement] # noqa: E501 + self.check_supported_wheels = check_supported_wheels + + self.unnamed_requirements = [] # type: List[InstallRequirement] + self.successfully_downloaded = [] # type: List[InstallRequirement] + self.reqs_to_cleanup = [] # type: List[InstallRequirement] + + def __str__(self): + # type: () -> str + requirements = sorted( + (req for req in self.requirements.values() if not req.comes_from), + key=lambda req: canonicalize_name(req.name), + ) + return ' '.join(str(req.req) for req in requirements) + + def __repr__(self): + # type: () -> str + requirements = sorted( + self.requirements.values(), + key=lambda req: canonicalize_name(req.name), + ) + + format_string = '<{classname} object; {count} requirement(s): {reqs}>' + return format_string.format( + classname=self.__class__.__name__, + count=len(requirements), + reqs=', '.join(str(req.req) for req in requirements), + ) + + def add_unnamed_requirement(self, install_req): + # type: (InstallRequirement) -> None + assert not install_req.name + self.unnamed_requirements.append(install_req) + + def add_named_requirement(self, install_req): + # type: (InstallRequirement) -> None + assert install_req.name + + project_name = canonicalize_name(install_req.name) + self.requirements[project_name] = install_req + + def add_requirement( + self, + install_req, # type: InstallRequirement + parent_req_name=None, # type: Optional[str] + extras_requested=None # type: Optional[Iterable[str]] + ): + # type: (...) -> Tuple[List[InstallRequirement], Optional[InstallRequirement]] # noqa: E501 + """Add install_req as a requirement to install. + + :param parent_req_name: The name of the requirement that needed this + added. The name is used because when multiple unnamed requirements + resolve to the same name, we could otherwise end up with dependency + links that point outside the Requirements set. parent_req must + already be added. Note that None implies that this is a user + supplied requirement, vs an inferred one. + :param extras_requested: an iterable of extras used to evaluate the + environment markers. + :return: Additional requirements to scan. That is either [] if + the requirement is not applicable, or [install_req] if the + requirement is applicable and has just been added. + """ + # If the markers do not match, ignore this requirement. + if not install_req.match_markers(extras_requested): + logger.info( + "Ignoring %s: markers '%s' don't match your environment", + install_req.name, install_req.markers, + ) + return [], None + + # If the wheel is not supported, raise an error. + # Should check this after filtering out based on environment markers to + # allow specifying different wheels based on the environment/OS, in a + # single requirements file. + if install_req.link and install_req.link.is_wheel: + wheel = Wheel(install_req.link.filename) + tags = pep425tags.get_supported() + if (self.check_supported_wheels and not wheel.supported(tags)): + raise InstallationError( + "%s is not a supported wheel on this platform." % + wheel.filename + ) + + # This next bit is really a sanity check. + assert install_req.is_direct == (parent_req_name is None), ( + "a direct req shouldn't have a parent and also, " + "a non direct req should have a parent" + ) + + # Unnamed requirements are scanned again and the requirement won't be + # added as a dependency until after scanning. + if not install_req.name: + self.add_unnamed_requirement(install_req) + return [install_req], None + + try: + existing_req = self.get_requirement(install_req.name) + except KeyError: + existing_req = None + + has_conflicting_requirement = ( + parent_req_name is None and + existing_req and + not existing_req.constraint and + existing_req.extras == install_req.extras and + existing_req.req.specifier != install_req.req.specifier + ) + if has_conflicting_requirement: + raise InstallationError( + "Double requirement given: %s (already in %s, name=%r)" + % (install_req, existing_req, install_req.name) + ) + + # When no existing requirement exists, add the requirement as a + # dependency and it will be scanned again after. + if not existing_req: + self.add_named_requirement(install_req) + # We'd want to rescan this requirement later + return [install_req], install_req + + # Assume there's no need to scan, and that we've already + # encountered this for scanning. + if install_req.constraint or not existing_req.constraint: + return [], existing_req + + does_not_satisfy_constraint = ( + install_req.link and + not ( + existing_req.link and + install_req.link.path == existing_req.link.path + ) + ) + if does_not_satisfy_constraint: + self.reqs_to_cleanup.append(install_req) + raise InstallationError( + "Could not satisfy constraints for '%s': " + "installation from path or url cannot be " + "constrained to a version" % install_req.name, + ) + # If we're now installing a constraint, mark the existing + # object for real installation. + existing_req.constraint = False + existing_req.extras = tuple(sorted( + set(existing_req.extras) | set(install_req.extras) + )) + logger.debug( + "Setting %s extras to: %s", + existing_req, existing_req.extras, + ) + # Return the existing requirement for addition to the parent and + # scanning again. + return [existing_req], existing_req + + def has_requirement(self, name): + # type: (str) -> bool + project_name = canonicalize_name(name) + + return ( + project_name in self.requirements and + not self.requirements[project_name].constraint + ) + + def get_requirement(self, name): + # type: (str) -> InstallRequirement + project_name = canonicalize_name(name) + + if project_name in self.requirements: + return self.requirements[project_name] + + raise KeyError("No project with the name %r" % name) + + def cleanup_files(self): + # type: () -> None + """Clean up files, remove builds.""" + logger.debug('Cleaning up...') + with indent_log(): + for req in self.reqs_to_cleanup: + req.remove_temporary_source() diff --git a/venv.bak/lib/python3.7/site-packages/pip/_internal/req/req_tracker.py b/venv.bak/lib/python3.7/site-packages/pip/_internal/req/req_tracker.py new file mode 100644 index 0000000..84e0c04 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_internal/req/req_tracker.py @@ -0,0 +1,150 @@ +# The following comment should be removed at some point in the future. +# mypy: strict-optional=False + +from __future__ import absolute_import + +import contextlib +import errno +import hashlib +import logging +import os + +from pip._vendor import contextlib2 + +from pip._internal.utils.temp_dir import TempDirectory +from pip._internal.utils.typing import MYPY_CHECK_RUNNING + +if MYPY_CHECK_RUNNING: + from types import TracebackType + from typing import Dict, Iterator, Optional, Set, Type, Union + from pip._internal.req.req_install import InstallRequirement + from pip._internal.models.link import Link + +logger = logging.getLogger(__name__) + + +@contextlib.contextmanager +def update_env_context_manager(**changes): + # type: (str) -> Iterator[None] + target = os.environ + + # Save values from the target and change them. + non_existent_marker = object() + saved_values = {} # type: Dict[str, Union[object, str]] + for name, new_value in changes.items(): + try: + saved_values[name] = target[name] + except KeyError: + saved_values[name] = non_existent_marker + target[name] = new_value + + try: + yield + finally: + # Restore original values in the target. + for name, original_value in saved_values.items(): + if original_value is non_existent_marker: + del target[name] + else: + assert isinstance(original_value, str) # for mypy + target[name] = original_value + + +@contextlib.contextmanager +def get_requirement_tracker(): + # type: () -> Iterator[RequirementTracker] + root = os.environ.get('PIP_REQ_TRACKER') + with contextlib2.ExitStack() as ctx: + if root is None: + root = ctx.enter_context( + TempDirectory(kind='req-tracker') + ).path + ctx.enter_context(update_env_context_manager(PIP_REQ_TRACKER=root)) + logger.debug("Initialized build tracking at %s", root) + + with RequirementTracker(root) as tracker: + yield tracker + + +class RequirementTracker(object): + + def __init__(self, root): + # type: (str) -> None + self._root = root + self._entries = set() # type: Set[InstallRequirement] + logger.debug("Created build tracker: %s", self._root) + + def __enter__(self): + # type: () -> RequirementTracker + logger.debug("Entered build tracker: %s", self._root) + return self + + def __exit__( + self, + exc_type, # type: Optional[Type[BaseException]] + exc_val, # type: Optional[BaseException] + exc_tb # type: Optional[TracebackType] + ): + # type: (...) -> None + self.cleanup() + + def _entry_path(self, link): + # type: (Link) -> str + hashed = hashlib.sha224(link.url_without_fragment.encode()).hexdigest() + return os.path.join(self._root, hashed) + + def add(self, req): + # type: (InstallRequirement) -> None + """Add an InstallRequirement to build tracking. + """ + + # Get the file to write information about this requirement. + entry_path = self._entry_path(req.link) + + # Try reading from the file. If it exists and can be read from, a build + # is already in progress, so a LookupError is raised. + try: + with open(entry_path) as fp: + contents = fp.read() + except IOError as e: + # if the error is anything other than "file does not exist", raise. + if e.errno != errno.ENOENT: + raise + else: + message = '%s is already being built: %s' % (req.link, contents) + raise LookupError(message) + + # If we're here, req should really not be building already. + assert req not in self._entries + + # Start tracking this requirement. + with open(entry_path, 'w') as fp: + fp.write(str(req)) + self._entries.add(req) + + logger.debug('Added %s to build tracker %r', req, self._root) + + def remove(self, req): + # type: (InstallRequirement) -> None + """Remove an InstallRequirement from build tracking. + """ + + # Delete the created file and the corresponding entries. + os.unlink(self._entry_path(req.link)) + self._entries.remove(req) + + logger.debug('Removed %s from build tracker %r', req, self._root) + + def cleanup(self): + # type: () -> None + for req in set(self._entries): + self.remove(req) + + logger.debug("Removed build tracker: %r", self._root) + + @contextlib.contextmanager + def track(self, req): + # type: (InstallRequirement) -> Iterator[None] + self.add(req) + yield + self.remove(req) diff --git a/venv.bak/lib/python3.7/site-packages/pip/_internal/req/req_uninstall.py b/venv.bak/lib/python3.7/site-packages/pip/_internal/req/req_uninstall.py new file mode 100644 index 0000000..5971b13 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_internal/req/req_uninstall.py @@ -0,0 +1,644 @@ +from __future__ import absolute_import + +import csv +import functools +import logging +import os +import sys +import sysconfig + +from pip._vendor import pkg_resources + +from pip._internal.exceptions import UninstallationError +from pip._internal.locations import bin_py, bin_user +from pip._internal.utils.compat import WINDOWS, cache_from_source, uses_pycache +from pip._internal.utils.logging import indent_log +from pip._internal.utils.misc import ( + FakeFile, + ask, + dist_in_usersite, + dist_is_local, + egg_link_path, + is_local, + normalize_path, + renames, + rmtree, +) +from pip._internal.utils.temp_dir import AdjacentTempDirectory, TempDirectory +from pip._internal.utils.typing import MYPY_CHECK_RUNNING + +if MYPY_CHECK_RUNNING: + from typing import ( + Any, Callable, Dict, Iterable, Iterator, List, Optional, Set, Tuple, + ) + from pip._vendor.pkg_resources import Distribution + +logger = logging.getLogger(__name__) + + +def _script_names(dist, script_name, is_gui): + # type: (Distribution, str, bool) -> List[str] + """Create the fully qualified name of the files created by + {console,gui}_scripts for the given ``dist``. + Returns the list of file names + """ + if dist_in_usersite(dist): + bin_dir = bin_user + else: + bin_dir = bin_py + exe_name = os.path.join(bin_dir, script_name) + paths_to_remove = [exe_name] + if WINDOWS: + paths_to_remove.append(exe_name + '.exe') + paths_to_remove.append(exe_name + '.exe.manifest') + if is_gui: + paths_to_remove.append(exe_name + '-script.pyw') + else: + paths_to_remove.append(exe_name + '-script.py') + return paths_to_remove + + +def _unique(fn): + # type: (Callable[..., Iterator[Any]]) -> Callable[..., Iterator[Any]] + @functools.wraps(fn) + def unique(*args, **kw): + # type: (Any, Any) -> Iterator[Any] + seen = set() # type: Set[Any] + for item in fn(*args, **kw): + if item not in seen: + seen.add(item) + yield item + return unique + + +@_unique +def uninstallation_paths(dist): + # type: (Distribution) -> Iterator[str] + """ + Yield all the uninstallation paths for dist based on RECORD-without-.py[co] + + Yield paths to all the files in RECORD. For each .py file in RECORD, add + the .pyc and .pyo in the same directory. + + UninstallPathSet.add() takes care of the __pycache__ .py[co]. + """ + r = csv.reader(FakeFile(dist.get_metadata_lines('RECORD'))) + for row in r: + path = os.path.join(dist.location, row[0]) + yield path + if path.endswith('.py'): + dn, fn = os.path.split(path) + base = fn[:-3] + path = os.path.join(dn, base + '.pyc') + yield path + path = os.path.join(dn, base + '.pyo') + yield path + + +def compact(paths): + # type: (Iterable[str]) -> Set[str] + """Compact a path set to contain the minimal number of paths + necessary to contain all paths in the set. If /a/path/ and + /a/path/to/a/file.txt are both in the set, leave only the + shorter path.""" + + sep = os.path.sep + short_paths = set() # type: Set[str] + for path in sorted(paths, key=len): + should_skip = any( + path.startswith(shortpath.rstrip("*")) and + path[len(shortpath.rstrip("*").rstrip(sep))] == sep + for shortpath in short_paths + ) + if not should_skip: + short_paths.add(path) + return short_paths + + +def compress_for_rename(paths): + # type: (Iterable[str]) -> Set[str] + """Returns a set containing the paths that need to be renamed. + + This set may include directories when the original sequence of paths + included every file on disk. + """ + case_map = dict((os.path.normcase(p), p) for p in paths) + remaining = set(case_map) + unchecked = sorted(set(os.path.split(p)[0] + for p in case_map.values()), key=len) + wildcards = set() # type: Set[str] + + def norm_join(*a): + # type: (str) -> str + return os.path.normcase(os.path.join(*a)) + + for root in unchecked: + if any(os.path.normcase(root).startswith(w) + for w in wildcards): + # This directory has already been handled. + continue + + all_files = set() # type: Set[str] + all_subdirs = set() # type: Set[str] + for dirname, subdirs, files in os.walk(root): + all_subdirs.update(norm_join(root, dirname, d) + for d in subdirs) + all_files.update(norm_join(root, dirname, f) + for f in files) + # If all the files we found are in our remaining set of files to + # remove, then remove them from the latter set and add a wildcard + # for the directory. + if not (all_files - remaining): + remaining.difference_update(all_files) + wildcards.add(root + os.sep) + + return set(map(case_map.__getitem__, remaining)) | wildcards + + +def compress_for_output_listing(paths): + # type: (Iterable[str]) -> Tuple[Set[str], Set[str]] + """Returns a tuple of 2 sets of which paths to display to user + + The first set contains paths that would be deleted. Files of a package + are not added and the top-level directory of the package has a '*' added + at the end - to signify that all it's contents are removed. + + The second set contains files that would have been skipped in the above + folders. + """ + + will_remove = set(paths) + will_skip = set() + + # Determine folders and files + folders = set() + files = set() + for path in will_remove: + if path.endswith(".pyc"): + continue + if path.endswith("__init__.py") or ".dist-info" in path: + folders.add(os.path.dirname(path)) + files.add(path) + + # probably this one https://github.com/python/mypy/issues/390 + _normcased_files = set(map(os.path.normcase, files)) # type: ignore + + folders = compact(folders) + + # This walks the tree using os.walk to not miss extra folders + # that might get added. + for folder in folders: + for dirpath, _, dirfiles in os.walk(folder): + for fname in dirfiles: + if fname.endswith(".pyc"): + continue + + file_ = os.path.join(dirpath, fname) + if (os.path.isfile(file_) and + os.path.normcase(file_) not in _normcased_files): + # We are skipping this file. Add it to the set. + will_skip.add(file_) + + will_remove = files | { + os.path.join(folder, "*") for folder in folders + } + + return will_remove, will_skip + + +class StashedUninstallPathSet(object): + """A set of file rename operations to stash files while + tentatively uninstalling them.""" + def __init__(self): + # type: () -> None + # Mapping from source file root to [Adjacent]TempDirectory + # for files under that directory. + self._save_dirs = {} # type: Dict[str, TempDirectory] + # (old path, new path) tuples for each move that may need + # to be undone. + self._moves = [] # type: List[Tuple[str, str]] + + def _get_directory_stash(self, path): + # type: (str) -> str + """Stashes a directory. + + Directories are stashed adjacent to their original location if + possible, or else moved/copied into the user's temp dir.""" + + try: + save_dir = AdjacentTempDirectory(path) # type: TempDirectory + except OSError: + save_dir = TempDirectory(kind="uninstall") + self._save_dirs[os.path.normcase(path)] = save_dir + + return save_dir.path + + def _get_file_stash(self, path): + # type: (str) -> str + """Stashes a file. + + If no root has been provided, one will be created for the directory + in the user's temp directory.""" + path = os.path.normcase(path) + head, old_head = os.path.dirname(path), None + save_dir = None + + while head != old_head: + try: + save_dir = self._save_dirs[head] + break + except KeyError: + pass + head, old_head = os.path.dirname(head), head + else: + # Did not find any suitable root + head = os.path.dirname(path) + save_dir = TempDirectory(kind='uninstall') + self._save_dirs[head] = save_dir + + relpath = os.path.relpath(path, head) + if relpath and relpath != os.path.curdir: + return os.path.join(save_dir.path, relpath) + return save_dir.path + + def stash(self, path): + # type: (str) -> str + """Stashes the directory or file and returns its new location. + Handle symlinks as files to avoid modifying the symlink targets. + """ + path_is_dir = os.path.isdir(path) and not os.path.islink(path) + if path_is_dir: + new_path = self._get_directory_stash(path) + else: + new_path = self._get_file_stash(path) + + self._moves.append((path, new_path)) + if (path_is_dir and os.path.isdir(new_path)): + # If we're moving a directory, we need to + # remove the destination first or else it will be + # moved to inside the existing directory. + # We just created new_path ourselves, so it will + # be removable. + os.rmdir(new_path) + renames(path, new_path) + return new_path + + def commit(self): + # type: () -> None + """Commits the uninstall by removing stashed files.""" + for _, save_dir in self._save_dirs.items(): + save_dir.cleanup() + self._moves = [] + self._save_dirs = {} + + def rollback(self): + # type: () -> None + """Undoes the uninstall by moving stashed files back.""" + for p in self._moves: + logger.info("Moving to %s\n from %s", *p) + + for new_path, path in self._moves: + try: + logger.debug('Replacing %s from %s', new_path, path) + if os.path.isfile(new_path) or os.path.islink(new_path): + os.unlink(new_path) + elif os.path.isdir(new_path): + rmtree(new_path) + renames(path, new_path) + except OSError as ex: + logger.error("Failed to restore %s", new_path) + logger.debug("Exception: %s", ex) + + self.commit() + + @property + def can_rollback(self): + # type: () -> bool + return bool(self._moves) + + +class UninstallPathSet(object): + """A set of file paths to be removed in the uninstallation of a + requirement.""" + def __init__(self, dist): + # type: (Distribution) -> None + self.paths = set() # type: Set[str] + self._refuse = set() # type: Set[str] + self.pth = {} # type: Dict[str, UninstallPthEntries] + self.dist = dist + self._moved_paths = StashedUninstallPathSet() + + def _permitted(self, path): + # type: (str) -> bool + """ + Return True if the given path is one we are permitted to + remove/modify, False otherwise. + + """ + return is_local(path) + + def add(self, path): + # type: (str) -> None + head, tail = os.path.split(path) + + # we normalize the head to resolve parent directory symlinks, but not + # the tail, since we only want to uninstall symlinks, not their targets + path = os.path.join(normalize_path(head), os.path.normcase(tail)) + + if not os.path.exists(path): + return + if self._permitted(path): + self.paths.add(path) + else: + self._refuse.add(path) + + # __pycache__ files can show up after 'installed-files.txt' is created, + # due to imports + if os.path.splitext(path)[1] == '.py' and uses_pycache: + self.add(cache_from_source(path)) + + def add_pth(self, pth_file, entry): + # type: (str, str) -> None + pth_file = normalize_path(pth_file) + if self._permitted(pth_file): + if pth_file not in self.pth: + self.pth[pth_file] = UninstallPthEntries(pth_file) + self.pth[pth_file].add(entry) + else: + self._refuse.add(pth_file) + + def remove(self, auto_confirm=False, verbose=False): + # type: (bool, bool) -> None + """Remove paths in ``self.paths`` with confirmation (unless + ``auto_confirm`` is True).""" + + if not self.paths: + logger.info( + "Can't uninstall '%s'. No files were found to uninstall.", + self.dist.project_name, + ) + return + + dist_name_version = ( + self.dist.project_name + "-" + self.dist.version + ) + logger.info('Uninstalling %s:', dist_name_version) + + with indent_log(): + if auto_confirm or self._allowed_to_proceed(verbose): + moved = self._moved_paths + + for_rename = compress_for_rename(self.paths) + + for path in sorted(compact(for_rename)): + moved.stash(path) + logger.debug('Removing file or directory %s', path) + + for pth in self.pth.values(): + pth.remove() + + logger.info('Successfully uninstalled %s', dist_name_version) + + def _allowed_to_proceed(self, verbose): + # type: (bool) -> bool + """Display which files would be deleted and prompt for confirmation + """ + + def _display(msg, paths): + # type: (str, Iterable[str]) -> None + if not paths: + return + + logger.info(msg) + with indent_log(): + for path in sorted(compact(paths)): + logger.info(path) + + if not verbose: + will_remove, will_skip = compress_for_output_listing(self.paths) + else: + # In verbose mode, display all the files that are going to be + # deleted. + will_remove = set(self.paths) + will_skip = set() + + _display('Would remove:', will_remove) + _display('Would not remove (might be manually added):', will_skip) + _display('Would not remove (outside of prefix):', self._refuse) + if verbose: + _display('Will actually move:', compress_for_rename(self.paths)) + + return ask('Proceed (y/n)? ', ('y', 'n')) == 'y' + + def rollback(self): + # type: () -> None + """Rollback the changes previously made by remove().""" + if not self._moved_paths.can_rollback: + logger.error( + "Can't roll back %s; was not uninstalled", + self.dist.project_name, + ) + return + logger.info('Rolling back uninstall of %s', self.dist.project_name) + self._moved_paths.rollback() + for pth in self.pth.values(): + pth.rollback() + + def commit(self): + # type: () -> None + """Remove temporary save dir: rollback will no longer be possible.""" + self._moved_paths.commit() + + @classmethod + def from_dist(cls, dist): + # type: (Distribution) -> UninstallPathSet + dist_path = normalize_path(dist.location) + if not dist_is_local(dist): + logger.info( + "Not uninstalling %s at %s, outside environment %s", + dist.key, + dist_path, + sys.prefix, + ) + return cls(dist) + + if dist_path in {p for p in {sysconfig.get_path("stdlib"), + sysconfig.get_path("platstdlib")} + if p}: + logger.info( + "Not uninstalling %s at %s, as it is in the standard library.", + dist.key, + dist_path, + ) + return cls(dist) + + paths_to_remove = cls(dist) + develop_egg_link = egg_link_path(dist) + develop_egg_link_egg_info = '{}.egg-info'.format( + pkg_resources.to_filename(dist.project_name)) + egg_info_exists = dist.egg_info and os.path.exists(dist.egg_info) + # Special case for distutils installed package + distutils_egg_info = getattr(dist._provider, 'path', None) + + # Uninstall cases order do matter as in the case of 2 installs of the + # same package, pip needs to uninstall the currently detected version + if (egg_info_exists and dist.egg_info.endswith('.egg-info') and + not dist.egg_info.endswith(develop_egg_link_egg_info)): + # if dist.egg_info.endswith(develop_egg_link_egg_info), we + # are in fact in the develop_egg_link case + paths_to_remove.add(dist.egg_info) + if dist.has_metadata('installed-files.txt'): + for installed_file in dist.get_metadata( + 'installed-files.txt').splitlines(): + path = os.path.normpath( + os.path.join(dist.egg_info, installed_file) + ) + paths_to_remove.add(path) + # FIXME: need a test for this elif block + # occurs with --single-version-externally-managed/--record outside + # of pip + elif dist.has_metadata('top_level.txt'): + if dist.has_metadata('namespace_packages.txt'): + namespaces = dist.get_metadata('namespace_packages.txt') + else: + namespaces = [] + for top_level_pkg in [ + p for p + in dist.get_metadata('top_level.txt').splitlines() + if p and p not in namespaces]: + path = os.path.join(dist.location, top_level_pkg) + paths_to_remove.add(path) + paths_to_remove.add(path + '.py') + paths_to_remove.add(path + '.pyc') + paths_to_remove.add(path + '.pyo') + + elif distutils_egg_info: + raise UninstallationError( + "Cannot uninstall {!r}. It is a distutils installed project " + "and thus we cannot accurately determine which files belong " + "to it which would lead to only a partial uninstall.".format( + dist.project_name, + ) + ) + + elif dist.location.endswith('.egg'): + # package installed by easy_install + # We cannot match on dist.egg_name because it can slightly vary + # i.e. setuptools-0.6c11-py2.6.egg vs setuptools-0.6rc11-py2.6.egg + paths_to_remove.add(dist.location) + easy_install_egg = os.path.split(dist.location)[1] + easy_install_pth = os.path.join(os.path.dirname(dist.location), + 'easy-install.pth') + paths_to_remove.add_pth(easy_install_pth, './' + easy_install_egg) + + elif egg_info_exists and dist.egg_info.endswith('.dist-info'): + for path in uninstallation_paths(dist): + paths_to_remove.add(path) + + elif develop_egg_link: + # develop egg + with open(develop_egg_link, 'r') as fh: + link_pointer = os.path.normcase(fh.readline().strip()) + assert (link_pointer == dist.location), ( + 'Egg-link %s does not match installed location of %s ' + '(at %s)' % (link_pointer, dist.project_name, dist.location) + ) + paths_to_remove.add(develop_egg_link) + easy_install_pth = os.path.join(os.path.dirname(develop_egg_link), + 'easy-install.pth') + paths_to_remove.add_pth(easy_install_pth, dist.location) + + else: + logger.debug( + 'Not sure how to uninstall: %s - Check: %s', + dist, dist.location, + ) + + # find distutils scripts= scripts + if dist.has_metadata('scripts') and dist.metadata_isdir('scripts'): + for script in dist.metadata_listdir('scripts'): + if dist_in_usersite(dist): + bin_dir = bin_user + else: + bin_dir = bin_py + paths_to_remove.add(os.path.join(bin_dir, script)) + if WINDOWS: + paths_to_remove.add(os.path.join(bin_dir, script) + '.bat') + + # find console_scripts + _scripts_to_remove = [] + console_scripts = dist.get_entry_map(group='console_scripts') + for name in console_scripts.keys(): + _scripts_to_remove.extend(_script_names(dist, name, False)) + # find gui_scripts + gui_scripts = dist.get_entry_map(group='gui_scripts') + for name in gui_scripts.keys(): + _scripts_to_remove.extend(_script_names(dist, name, True)) + + for s in _scripts_to_remove: + paths_to_remove.add(s) + + return paths_to_remove + + +class UninstallPthEntries(object): + def __init__(self, pth_file): + # type: (str) -> None + if not os.path.isfile(pth_file): + raise UninstallationError( + "Cannot remove entries from nonexistent file %s" % pth_file + ) + self.file = pth_file + self.entries = set() # type: Set[str] + self._saved_lines = None # type: Optional[List[bytes]] + + def add(self, entry): + # type: (str) -> None + entry = os.path.normcase(entry) + # On Windows, os.path.normcase converts the entry to use + # backslashes. This is correct for entries that describe absolute + # paths outside of site-packages, but all the others use forward + # slashes. + # os.path.splitdrive is used instead of os.path.isabs because isabs + # treats non-absolute paths with drive letter markings like c:foo\bar + # as absolute paths. It also does not recognize UNC paths if they don't + # have more than "\\sever\share". Valid examples: "\\server\share\" or + # "\\server\share\folder". Python 2.7.8+ support UNC in splitdrive. + if WINDOWS and not os.path.splitdrive(entry)[0]: + entry = entry.replace('\\', '/') + self.entries.add(entry) + + def remove(self): + # type: () -> None + logger.debug('Removing pth entries from %s:', self.file) + with open(self.file, 'rb') as fh: + # windows uses '\r\n' with py3k, but uses '\n' with py2.x + lines = fh.readlines() + self._saved_lines = lines + if any(b'\r\n' in line for line in lines): + endline = '\r\n' + else: + endline = '\n' + # handle missing trailing newline + if lines and not lines[-1].endswith(endline.encode("utf-8")): + lines[-1] = lines[-1] + endline.encode("utf-8") + for entry in self.entries: + try: + logger.debug('Removing entry: %s', entry) + lines.remove((entry + endline).encode("utf-8")) + except ValueError: + pass + with open(self.file, 'wb') as fh: + fh.writelines(lines) + + def rollback(self): + # type: () -> bool + if self._saved_lines is None: + logger.error( + 'Cannot roll back changes to %s, none were made', self.file + ) + return False + logger.debug('Rolling %s back to previous state', self.file) + with open(self.file, 'wb') as fh: + fh.writelines(self._saved_lines) + return True diff --git a/venv/lib/python3.7/site-packages/pip/_internal/self_outdated_check.py b/venv.bak/lib/python3.7/site-packages/pip/_internal/self_outdated_check.py similarity index 100% rename from venv/lib/python3.7/site-packages/pip/_internal/self_outdated_check.py rename to venv.bak/lib/python3.7/site-packages/pip/_internal/self_outdated_check.py diff --git a/venv.bak/lib/python3.7/site-packages/pip/_internal/utils/__init__.py b/venv.bak/lib/python3.7/site-packages/pip/_internal/utils/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/venv.bak/lib/python3.7/site-packages/pip/_internal/utils/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_internal/utils/__pycache__/__init__.cpython-37.pyc new file mode 100644 index 0000000..7752b69 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_internal/utils/__pycache__/__init__.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_internal/utils/__pycache__/appdirs.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_internal/utils/__pycache__/appdirs.cpython-37.pyc new file mode 100644 index 0000000..f556053 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_internal/utils/__pycache__/appdirs.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_internal/utils/__pycache__/compat.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_internal/utils/__pycache__/compat.cpython-37.pyc new file mode 100644 index 0000000..5408e6b Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_internal/utils/__pycache__/compat.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_internal/utils/__pycache__/deprecation.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_internal/utils/__pycache__/deprecation.cpython-37.pyc new file mode 100644 index 0000000..63265ef Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_internal/utils/__pycache__/deprecation.cpython-37.pyc differ diff --git a/venv/lib/python3.7/site-packages/pip/_internal/utils/__pycache__/distutils_args.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_internal/utils/__pycache__/distutils_args.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/pip/_internal/utils/__pycache__/distutils_args.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/pip/_internal/utils/__pycache__/distutils_args.cpython-37.pyc diff --git a/venv.bak/lib/python3.7/site-packages/pip/_internal/utils/__pycache__/encoding.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_internal/utils/__pycache__/encoding.cpython-37.pyc new file mode 100644 index 0000000..7e920ec Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_internal/utils/__pycache__/encoding.cpython-37.pyc differ diff --git a/venv/lib/python3.7/site-packages/pip/_internal/utils/__pycache__/entrypoints.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_internal/utils/__pycache__/entrypoints.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/pip/_internal/utils/__pycache__/entrypoints.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/pip/_internal/utils/__pycache__/entrypoints.cpython-37.pyc diff --git a/venv.bak/lib/python3.7/site-packages/pip/_internal/utils/__pycache__/filesystem.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_internal/utils/__pycache__/filesystem.cpython-37.pyc new file mode 100644 index 0000000..5a8e8a3 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_internal/utils/__pycache__/filesystem.cpython-37.pyc differ diff --git a/venv/lib/python3.7/site-packages/pip/_internal/utils/__pycache__/filetypes.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_internal/utils/__pycache__/filetypes.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/pip/_internal/utils/__pycache__/filetypes.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/pip/_internal/utils/__pycache__/filetypes.cpython-37.pyc diff --git a/venv.bak/lib/python3.7/site-packages/pip/_internal/utils/__pycache__/glibc.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_internal/utils/__pycache__/glibc.cpython-37.pyc new file mode 100644 index 0000000..f198514 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_internal/utils/__pycache__/glibc.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_internal/utils/__pycache__/hashes.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_internal/utils/__pycache__/hashes.cpython-37.pyc new file mode 100644 index 0000000..a0b9dca Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_internal/utils/__pycache__/hashes.cpython-37.pyc differ diff --git a/venv/lib/python3.7/site-packages/pip/_internal/utils/__pycache__/inject_securetransport.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_internal/utils/__pycache__/inject_securetransport.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/pip/_internal/utils/__pycache__/inject_securetransport.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/pip/_internal/utils/__pycache__/inject_securetransport.cpython-37.pyc diff --git a/venv.bak/lib/python3.7/site-packages/pip/_internal/utils/__pycache__/logging.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_internal/utils/__pycache__/logging.cpython-37.pyc new file mode 100644 index 0000000..17a347f Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_internal/utils/__pycache__/logging.cpython-37.pyc differ diff --git a/venv/lib/python3.7/site-packages/pip/_internal/utils/__pycache__/marker_files.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_internal/utils/__pycache__/marker_files.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/pip/_internal/utils/__pycache__/marker_files.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/pip/_internal/utils/__pycache__/marker_files.cpython-37.pyc diff --git a/venv.bak/lib/python3.7/site-packages/pip/_internal/utils/__pycache__/misc.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_internal/utils/__pycache__/misc.cpython-37.pyc new file mode 100644 index 0000000..dabc40c Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_internal/utils/__pycache__/misc.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_internal/utils/__pycache__/models.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_internal/utils/__pycache__/models.cpython-37.pyc new file mode 100644 index 0000000..2381ca6 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_internal/utils/__pycache__/models.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_internal/utils/__pycache__/packaging.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_internal/utils/__pycache__/packaging.cpython-37.pyc new file mode 100644 index 0000000..0d74a54 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_internal/utils/__pycache__/packaging.cpython-37.pyc differ diff --git a/venv/lib/python3.7/site-packages/pip/_internal/utils/__pycache__/pkg_resources.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_internal/utils/__pycache__/pkg_resources.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/pip/_internal/utils/__pycache__/pkg_resources.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/pip/_internal/utils/__pycache__/pkg_resources.cpython-37.pyc diff --git a/venv.bak/lib/python3.7/site-packages/pip/_internal/utils/__pycache__/setuptools_build.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_internal/utils/__pycache__/setuptools_build.cpython-37.pyc new file mode 100644 index 0000000..3c0867f Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_internal/utils/__pycache__/setuptools_build.cpython-37.pyc differ diff --git a/venv/lib/python3.7/site-packages/pip/_internal/utils/__pycache__/subprocess.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_internal/utils/__pycache__/subprocess.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/pip/_internal/utils/__pycache__/subprocess.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/pip/_internal/utils/__pycache__/subprocess.cpython-37.pyc diff --git a/venv.bak/lib/python3.7/site-packages/pip/_internal/utils/__pycache__/temp_dir.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_internal/utils/__pycache__/temp_dir.cpython-37.pyc new file mode 100644 index 0000000..1881f2b Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_internal/utils/__pycache__/temp_dir.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_internal/utils/__pycache__/typing.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_internal/utils/__pycache__/typing.cpython-37.pyc new file mode 100644 index 0000000..39da82e Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_internal/utils/__pycache__/typing.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_internal/utils/__pycache__/ui.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_internal/utils/__pycache__/ui.cpython-37.pyc new file mode 100644 index 0000000..4171640 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_internal/utils/__pycache__/ui.cpython-37.pyc differ diff --git a/venv/lib/python3.7/site-packages/pip/_internal/utils/__pycache__/unpacking.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_internal/utils/__pycache__/unpacking.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/pip/_internal/utils/__pycache__/unpacking.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/pip/_internal/utils/__pycache__/unpacking.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/pip/_internal/utils/__pycache__/urls.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_internal/utils/__pycache__/urls.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/pip/_internal/utils/__pycache__/urls.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/pip/_internal/utils/__pycache__/urls.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/pip/_internal/utils/__pycache__/virtualenv.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_internal/utils/__pycache__/virtualenv.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/pip/_internal/utils/__pycache__/virtualenv.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/pip/_internal/utils/__pycache__/virtualenv.cpython-37.pyc diff --git a/venv/lib/python3.7/site-packages/pip/_internal/utils/__pycache__/wheel.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_internal/utils/__pycache__/wheel.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/pip/_internal/utils/__pycache__/wheel.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/pip/_internal/utils/__pycache__/wheel.cpython-37.pyc diff --git a/venv.bak/lib/python3.7/site-packages/pip/_internal/utils/appdirs.py b/venv.bak/lib/python3.7/site-packages/pip/_internal/utils/appdirs.py new file mode 100644 index 0000000..251c5fd --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_internal/utils/appdirs.py @@ -0,0 +1,41 @@ +""" +This code wraps the vendored appdirs module to so the return values are +compatible for the current pip code base. + +The intention is to rewrite current usages gradually, keeping the tests pass, +and eventually drop this after all usages are changed. +""" + +from __future__ import absolute_import + +import os + +from pip._vendor import appdirs as _appdirs + +from pip._internal.utils.typing import MYPY_CHECK_RUNNING + +if MYPY_CHECK_RUNNING: + from typing import List + + +def user_cache_dir(appname): + # type: (str) -> str + return _appdirs.user_cache_dir(appname, appauthor=False) + + +def user_config_dir(appname, roaming=True): + # type: (str, bool) -> str + return _appdirs.user_config_dir(appname, appauthor=False, roaming=roaming) + + +def user_data_dir(appname, roaming=False): + # type: (str, bool) -> str + return _appdirs.user_data_dir(appname, appauthor=False, roaming=roaming) + + +def site_config_dirs(appname): + # type: (str) -> List[str] + dirval = _appdirs.site_config_dir(appname, appauthor=False, multipath=True) + if _appdirs.system not in ["win32", "darwin"]: + return dirval.split(os.pathsep) + return [dirval] diff --git a/venv.bak/lib/python3.7/site-packages/pip/_internal/utils/compat.py b/venv.bak/lib/python3.7/site-packages/pip/_internal/utils/compat.py new file mode 100644 index 0000000..6efa52a --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_internal/utils/compat.py @@ -0,0 +1,269 @@ +"""Stuff that differs in different Python versions and platform +distributions.""" + +# The following comment should be removed at some point in the future. +# mypy: disallow-untyped-defs=False + +from __future__ import absolute_import, division + +import codecs +import locale +import logging +import os +import shutil +import sys + +from pip._vendor.six import PY2, text_type + +from pip._internal.utils.typing import MYPY_CHECK_RUNNING + +if MYPY_CHECK_RUNNING: + from typing import Optional, Text, Tuple, Union + +try: + import ipaddress +except ImportError: + try: + from pip._vendor import ipaddress # type: ignore + except ImportError: + import ipaddr as ipaddress # type: ignore + ipaddress.ip_address = ipaddress.IPAddress # type: ignore + ipaddress.ip_network = ipaddress.IPNetwork # type: ignore + + +__all__ = [ + "ipaddress", "uses_pycache", "console_to_str", + "get_path_uid", "stdlib_pkgs", "WINDOWS", "samefile", "get_terminal_size", +] + + +logger = logging.getLogger(__name__) + +if PY2: + import imp + + try: + cache_from_source = imp.cache_from_source # type: ignore + except AttributeError: + # does not use __pycache__ + cache_from_source = None + + uses_pycache = cache_from_source is not None +else: + uses_pycache = True + from importlib.util import cache_from_source + + +if PY2: + # In Python 2.7, backslashreplace exists + # but does not support use for decoding. + # We implement our own replace handler for this + # situation, so that we can consistently use + # backslash replacement for all versions. + def backslashreplace_decode_fn(err): + raw_bytes = (err.object[i] for i in range(err.start, err.end)) + # Python 2 gave us characters - convert to numeric bytes + raw_bytes = (ord(b) for b in raw_bytes) + return u"".join(u"\\x%x" % c for c in raw_bytes), err.end + codecs.register_error( + "backslashreplace_decode", + backslashreplace_decode_fn, + ) + backslashreplace_decode = "backslashreplace_decode" +else: + backslashreplace_decode = "backslashreplace" + + +def has_tls(): + # type: () -> bool + try: + import _ssl # noqa: F401 # ignore unused + return True + except ImportError: + pass + + from pip._vendor.urllib3.util import IS_PYOPENSSL + return IS_PYOPENSSL + + +def str_to_display(data, desc=None): + # type: (Union[bytes, Text], Optional[str]) -> Text + """ + For display or logging purposes, convert a bytes object (or text) to + text (e.g. unicode in Python 2) safe for output. + + :param desc: An optional phrase describing the input data, for use in + the log message if a warning is logged. Defaults to "Bytes object". + + This function should never error out and so can take a best effort + approach. It is okay to be lossy if needed since the return value is + just for display. + + We assume the data is in the locale preferred encoding. If it won't + decode properly, we warn the user but decode as best we can. + + We also ensure that the output can be safely written to standard output + without encoding errors. + """ + if isinstance(data, text_type): + return data + + # Otherwise, data is a bytes object (str in Python 2). + # First, get the encoding we assume. This is the preferred + # encoding for the locale, unless that is not found, or + # it is ASCII, in which case assume UTF-8 + encoding = locale.getpreferredencoding() + if (not encoding) or codecs.lookup(encoding).name == "ascii": + encoding = "utf-8" + + # Now try to decode the data - if we fail, warn the user and + # decode with replacement. + try: + decoded_data = data.decode(encoding) + except UnicodeDecodeError: + if desc is None: + desc = 'Bytes object' + msg_format = '{} does not appear to be encoded as %s'.format(desc) + logger.warning(msg_format, encoding) + decoded_data = data.decode(encoding, errors=backslashreplace_decode) + + # Make sure we can print the output, by encoding it to the output + # encoding with replacement of unencodable characters, and then + # decoding again. + # We use stderr's encoding because it's less likely to be + # redirected and if we don't find an encoding we skip this + # step (on the assumption that output is wrapped by something + # that won't fail). + # The double getattr is to deal with the possibility that we're + # being called in a situation where sys.__stderr__ doesn't exist, + # or doesn't have an encoding attribute. Neither of these cases + # should occur in normal pip use, but there's no harm in checking + # in case people use pip in (unsupported) unusual situations. + output_encoding = getattr(getattr(sys, "__stderr__", None), + "encoding", None) + + if output_encoding: + output_encoded = decoded_data.encode( + output_encoding, + errors="backslashreplace" + ) + decoded_data = output_encoded.decode(output_encoding) + + return decoded_data + + +def console_to_str(data): + # type: (bytes) -> Text + """Return a string, safe for output, of subprocess output. + """ + return str_to_display(data, desc='Subprocess output') + + +def get_path_uid(path): + # type: (str) -> int + """ + Return path's uid. + + Does not follow symlinks: + https://github.com/pypa/pip/pull/935#discussion_r5307003 + + Placed this function in compat due to differences on AIX and + Jython, that should eventually go away. + + :raises OSError: When path is a symlink or can't be read. + """ + if hasattr(os, 'O_NOFOLLOW'): + fd = os.open(path, os.O_RDONLY | os.O_NOFOLLOW) + file_uid = os.fstat(fd).st_uid + os.close(fd) + else: # AIX and Jython + # WARNING: time of check vulnerability, but best we can do w/o NOFOLLOW + if not os.path.islink(path): + # older versions of Jython don't have `os.fstat` + file_uid = os.stat(path).st_uid + else: + # raise OSError for parity with os.O_NOFOLLOW above + raise OSError( + "%s is a symlink; Will not return uid for symlinks" % path + ) + return file_uid + + +def expanduser(path): + # type: (str) -> str + """ + Expand ~ and ~user constructions. + + Includes a workaround for https://bugs.python.org/issue14768 + """ + expanded = os.path.expanduser(path) + if path.startswith('~/') and expanded.startswith('//'): + expanded = expanded[1:] + return expanded + + +# packages in the stdlib that may have installation metadata, but should not be +# considered 'installed'. this theoretically could be determined based on +# dist.location (py27:`sysconfig.get_paths()['stdlib']`, +# py26:sysconfig.get_config_vars('LIBDEST')), but fear platform variation may +# make this ineffective, so hard-coding +stdlib_pkgs = {"python", "wsgiref", "argparse"} + + +# windows detection, covers cpython and ironpython +WINDOWS = (sys.platform.startswith("win") or + (sys.platform == 'cli' and os.name == 'nt')) + + +def samefile(file1, file2): + # type: (str, str) -> bool + """Provide an alternative for os.path.samefile on Windows/Python2""" + if hasattr(os.path, 'samefile'): + return os.path.samefile(file1, file2) + else: + path1 = os.path.normcase(os.path.abspath(file1)) + path2 = os.path.normcase(os.path.abspath(file2)) + return path1 == path2 + + +if hasattr(shutil, 'get_terminal_size'): + def get_terminal_size(): + # type: () -> Tuple[int, int] + """ + Returns a tuple (x, y) representing the width(x) and the height(y) + in characters of the terminal window. + """ + return tuple(shutil.get_terminal_size()) # type: ignore +else: + def get_terminal_size(): + # type: () -> Tuple[int, int] + """ + Returns a tuple (x, y) representing the width(x) and the height(y) + in characters of the terminal window. + """ + def ioctl_GWINSZ(fd): + try: + import fcntl + import termios + import struct + cr = struct.unpack_from( + 'hh', + fcntl.ioctl(fd, termios.TIOCGWINSZ, '12345678') + ) + except Exception: + return None + if cr == (0, 0): + return None + return cr + cr = ioctl_GWINSZ(0) or ioctl_GWINSZ(1) or ioctl_GWINSZ(2) + if not cr: + if sys.platform != "win32": + try: + fd = os.open(os.ctermid(), os.O_RDONLY) + cr = ioctl_GWINSZ(fd) + os.close(fd) + except Exception: + pass + if not cr: + cr = (os.environ.get('LINES', 25), os.environ.get('COLUMNS', 80)) + return int(cr[1]), int(cr[0]) diff --git a/venv.bak/lib/python3.7/site-packages/pip/_internal/utils/deprecation.py b/venv.bak/lib/python3.7/site-packages/pip/_internal/utils/deprecation.py new file mode 100644 index 0000000..2f20cfd --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_internal/utils/deprecation.py @@ -0,0 +1,104 @@ +""" +A module that implements tooling to enable easy warnings about deprecations. +""" + +# The following comment should be removed at some point in the future. +# mypy: disallow-untyped-defs=False + +from __future__ import absolute_import + +import logging +import warnings + +from pip._vendor.packaging.version import parse + +from pip import __version__ as current_version +from pip._internal.utils.typing import MYPY_CHECK_RUNNING + +if MYPY_CHECK_RUNNING: + from typing import Any, Optional + + +DEPRECATION_MSG_PREFIX = "DEPRECATION: " + + +class PipDeprecationWarning(Warning): + pass + + +_original_showwarning = None # type: Any + + +# Warnings <-> Logging Integration +def _showwarning(message, category, filename, lineno, file=None, line=None): + if file is not None: + if _original_showwarning is not None: + _original_showwarning( + message, category, filename, lineno, file, line, + ) + elif issubclass(category, PipDeprecationWarning): + # We use a specially named logger which will handle all of the + # deprecation messages for pip. + logger = logging.getLogger("pip._internal.deprecations") + logger.warning(message) + else: + _original_showwarning( + message, category, filename, lineno, file, line, + ) + + +def install_warning_logger(): + # type: () -> None + # Enable our Deprecation Warnings + warnings.simplefilter("default", PipDeprecationWarning, append=True) + + global _original_showwarning + + if _original_showwarning is None: + _original_showwarning = warnings.showwarning + warnings.showwarning = _showwarning + + +def deprecated(reason, replacement, gone_in, issue=None): + # type: (str, Optional[str], Optional[str], Optional[int]) -> None + """Helper to deprecate existing functionality. + + reason: + Textual reason shown to the user about why this functionality has + been deprecated. + replacement: + Textual suggestion shown to the user about what alternative + functionality they can use. + gone_in: + The version of pip does this functionality should get removed in. + Raises errors if pip's current version is greater than or equal to + this. + issue: + Issue number on the tracker that would serve as a useful place for + users to find related discussion and provide feedback. + + Always pass replacement, gone_in and issue as keyword arguments for clarity + at the call site. + """ + + # Construct a nice message. + # This is eagerly formatted as we want it to get logged as if someone + # typed this entire message out. + sentences = [ + (reason, DEPRECATION_MSG_PREFIX + "{}"), + (gone_in, "pip {} will remove support for this functionality."), + (replacement, "A possible replacement is {}."), + (issue, ( + "You can find discussion regarding this at " + "https://github.com/pypa/pip/issues/{}." + )), + ] + message = " ".join( + template.format(val) for val, template in sentences if val is not None + ) + + # Raise as an error if it has to be removed. + if gone_in is not None and parse(current_version) >= parse(gone_in): + raise PipDeprecationWarning(message) + + warnings.warn(message, category=PipDeprecationWarning, stacklevel=2) diff --git a/venv/lib/python3.7/site-packages/pip/_internal/utils/distutils_args.py b/venv.bak/lib/python3.7/site-packages/pip/_internal/utils/distutils_args.py similarity index 100% rename from venv/lib/python3.7/site-packages/pip/_internal/utils/distutils_args.py rename to venv.bak/lib/python3.7/site-packages/pip/_internal/utils/distutils_args.py diff --git a/venv.bak/lib/python3.7/site-packages/pip/_internal/utils/encoding.py b/venv.bak/lib/python3.7/site-packages/pip/_internal/utils/encoding.py new file mode 100644 index 0000000..ab4d4b9 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_internal/utils/encoding.py @@ -0,0 +1,42 @@ +# The following comment should be removed at some point in the future. +# mypy: strict-optional=False + +import codecs +import locale +import re +import sys + +from pip._internal.utils.typing import MYPY_CHECK_RUNNING + +if MYPY_CHECK_RUNNING: + from typing import List, Tuple, Text + +BOMS = [ + (codecs.BOM_UTF8, 'utf-8'), + (codecs.BOM_UTF16, 'utf-16'), + (codecs.BOM_UTF16_BE, 'utf-16-be'), + (codecs.BOM_UTF16_LE, 'utf-16-le'), + (codecs.BOM_UTF32, 'utf-32'), + (codecs.BOM_UTF32_BE, 'utf-32-be'), + (codecs.BOM_UTF32_LE, 'utf-32-le'), +] # type: List[Tuple[bytes, Text]] + +ENCODING_RE = re.compile(br'coding[:=]\s*([-\w.]+)') + + +def auto_decode(data): + # type: (bytes) -> Text + """Check a bytes string for a BOM to correctly detect the encoding + + Fallback to locale.getpreferredencoding(False) like open() on Python3""" + for bom, encoding in BOMS: + if data.startswith(bom): + return data[len(bom):].decode(encoding) + # Lets check the first two lines as in PEP263 + for line in data.split(b'\n')[:2]: + if line[0:1] == b'#' and ENCODING_RE.search(line): + encoding = ENCODING_RE.search(line).groups()[0].decode('ascii') + return data.decode(encoding) + return data.decode( + locale.getpreferredencoding(False) or sys.getdefaultencoding(), + ) diff --git a/venv/lib/python3.7/site-packages/pip/_internal/utils/entrypoints.py b/venv.bak/lib/python3.7/site-packages/pip/_internal/utils/entrypoints.py similarity index 100% rename from venv/lib/python3.7/site-packages/pip/_internal/utils/entrypoints.py rename to venv.bak/lib/python3.7/site-packages/pip/_internal/utils/entrypoints.py diff --git a/venv.bak/lib/python3.7/site-packages/pip/_internal/utils/filesystem.py b/venv.bak/lib/python3.7/site-packages/pip/_internal/utils/filesystem.py new file mode 100644 index 0000000..6f1537e --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_internal/utils/filesystem.py @@ -0,0 +1,171 @@ +import errno +import os +import os.path +import random +import shutil +import stat +import sys +from contextlib import contextmanager +from tempfile import NamedTemporaryFile + +# NOTE: retrying is not annotated in typeshed as on 2017-07-17, which is +# why we ignore the type on this import. +from pip._vendor.retrying import retry # type: ignore +from pip._vendor.six import PY2 + +from pip._internal.utils.compat import get_path_uid +from pip._internal.utils.typing import MYPY_CHECK_RUNNING, cast + +if MYPY_CHECK_RUNNING: + from typing import BinaryIO, Iterator + + class NamedTemporaryFileResult(BinaryIO): + @property + def file(self): + # type: () -> BinaryIO + pass + + +def check_path_owner(path): + # type: (str) -> bool + # If we don't have a way to check the effective uid of this process, then + # we'll just assume that we own the directory. + if sys.platform == "win32" or not hasattr(os, "geteuid"): + return True + + assert os.path.isabs(path) + + previous = None + while path != previous: + if os.path.lexists(path): + # Check if path is writable by current user. + if os.geteuid() == 0: + # Special handling for root user in order to handle properly + # cases where users use sudo without -H flag. + try: + path_uid = get_path_uid(path) + except OSError: + return False + return path_uid == 0 + else: + return os.access(path, os.W_OK) + else: + previous, path = path, os.path.dirname(path) + return False # assume we don't own the path + + +def copy2_fixed(src, dest): + # type: (str, str) -> None + """Wrap shutil.copy2() but map errors copying socket files to + SpecialFileError as expected. + + See also https://bugs.python.org/issue37700. + """ + try: + shutil.copy2(src, dest) + except (OSError, IOError): + for f in [src, dest]: + try: + is_socket_file = is_socket(f) + except OSError: + # An error has already occurred. Another error here is not + # a problem and we can ignore it. + pass + else: + if is_socket_file: + raise shutil.SpecialFileError("`%s` is a socket" % f) + + raise + + +def is_socket(path): + # type: (str) -> bool + return stat.S_ISSOCK(os.lstat(path).st_mode) + + +@contextmanager +def adjacent_tmp_file(path): + # type: (str) -> Iterator[NamedTemporaryFileResult] + """Given a path to a file, open a temp file next to it securely and ensure + it is written to disk after the context reaches its end. + """ + with NamedTemporaryFile( + delete=False, + dir=os.path.dirname(path), + prefix=os.path.basename(path), + suffix='.tmp', + ) as f: + result = cast('NamedTemporaryFileResult', f) + try: + yield result + finally: + result.file.flush() + os.fsync(result.file.fileno()) + + +_replace_retry = retry(stop_max_delay=1000, wait_fixed=250) + +if PY2: + @_replace_retry + def replace(src, dest): + # type: (str, str) -> None + try: + os.rename(src, dest) + except OSError: + os.remove(dest) + os.rename(src, dest) + +else: + replace = _replace_retry(os.replace) + + +# test_writable_dir and _test_writable_dir_win are copied from Flit, +# with the author's agreement to also place them under pip's license. +def test_writable_dir(path): + # type: (str) -> bool + """Check if a directory is writable. + + Uses os.access() on POSIX, tries creating files on Windows. + """ + # If the directory doesn't exist, find the closest parent that does. + while not os.path.isdir(path): + parent = os.path.dirname(path) + if parent == path: + break # Should never get here, but infinite loops are bad + path = parent + + if os.name == 'posix': + return os.access(path, os.W_OK) + + return _test_writable_dir_win(path) + + +def _test_writable_dir_win(path): + # type: (str) -> bool + # os.access doesn't work on Windows: http://bugs.python.org/issue2528 + # and we can't use tempfile: http://bugs.python.org/issue22107 + basename = 'accesstest_deleteme_fishfingers_custard_' + alphabet = 'abcdefghijklmnopqrstuvwxyz0123456789' + for i in range(10): + name = basename + ''.join(random.choice(alphabet) for _ in range(6)) + file = os.path.join(path, name) + try: + fd = os.open(file, os.O_RDWR | os.O_CREAT | os.O_EXCL) + except OSError as e: + if e.errno == errno.EEXIST: + continue + if e.errno == errno.EPERM: + # This could be because there's a directory with the same name. + # But it's highly unlikely there's a directory called that, + # so we'll assume it's because the parent dir is not writable. + return False + raise + else: + os.close(fd) + os.unlink(file) + return True + + # This should never be reached + raise EnvironmentError( + 'Unexpected condition testing for writable directory' + ) diff --git a/venv/lib/python3.7/site-packages/pip/_internal/utils/filetypes.py b/venv.bak/lib/python3.7/site-packages/pip/_internal/utils/filetypes.py similarity index 100% rename from venv/lib/python3.7/site-packages/pip/_internal/utils/filetypes.py rename to venv.bak/lib/python3.7/site-packages/pip/_internal/utils/filetypes.py diff --git a/venv.bak/lib/python3.7/site-packages/pip/_internal/utils/glibc.py b/venv.bak/lib/python3.7/site-packages/pip/_internal/utils/glibc.py new file mode 100644 index 0000000..3610424 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_internal/utils/glibc.py @@ -0,0 +1,98 @@ +# The following comment should be removed at some point in the future. +# mypy: strict-optional=False + +from __future__ import absolute_import + +import os +import sys + +from pip._internal.utils.typing import MYPY_CHECK_RUNNING + +if MYPY_CHECK_RUNNING: + from typing import Optional, Tuple + + +def glibc_version_string(): + # type: () -> Optional[str] + "Returns glibc version string, or None if not using glibc." + return glibc_version_string_confstr() or glibc_version_string_ctypes() + + +def glibc_version_string_confstr(): + # type: () -> Optional[str] + "Primary implementation of glibc_version_string using os.confstr." + # os.confstr is quite a bit faster than ctypes.DLL. It's also less likely + # to be broken or missing. This strategy is used in the standard library + # platform module: + # https://github.com/python/cpython/blob/fcf1d003bf4f0100c9d0921ff3d70e1127ca1b71/Lib/platform.py#L175-L183 + if sys.platform == "win32": + return None + try: + # os.confstr("CS_GNU_LIBC_VERSION") returns a string like "glibc 2.17": + _, version = os.confstr("CS_GNU_LIBC_VERSION").split() + except (AttributeError, OSError, ValueError): + # os.confstr() or CS_GNU_LIBC_VERSION not available (or a bad value)... + return None + return version + + +def glibc_version_string_ctypes(): + # type: () -> Optional[str] + "Fallback implementation of glibc_version_string using ctypes." + + try: + import ctypes + except ImportError: + return None + + # ctypes.CDLL(None) internally calls dlopen(NULL), and as the dlopen + # manpage says, "If filename is NULL, then the returned handle is for the + # main program". This way we can let the linker do the work to figure out + # which libc our process is actually using. + process_namespace = ctypes.CDLL(None) + try: + gnu_get_libc_version = process_namespace.gnu_get_libc_version + except AttributeError: + # Symbol doesn't exist -> therefore, we are not linked to + # glibc. + return None + + # Call gnu_get_libc_version, which returns a string like "2.5" + gnu_get_libc_version.restype = ctypes.c_char_p + version_str = gnu_get_libc_version() + # py2 / py3 compatibility: + if not isinstance(version_str, str): + version_str = version_str.decode("ascii") + + return version_str + + +# platform.libc_ver regularly returns completely nonsensical glibc +# versions. E.g. on my computer, platform says: +# +# ~$ python2.7 -c 'import platform; print(platform.libc_ver())' +# ('glibc', '2.7') +# ~$ python3.5 -c 'import platform; print(platform.libc_ver())' +# ('glibc', '2.9') +# +# But the truth is: +# +# ~$ ldd --version +# ldd (Debian GLIBC 2.22-11) 2.22 +# +# This is unfortunate, because it means that the linehaul data on libc +# versions that was generated by pip 8.1.2 and earlier is useless and +# misleading. Solution: instead of using platform, use our code that actually +# works. +def libc_ver(): + # type: () -> Tuple[str, str] + """Try to determine the glibc version + + Returns a tuple of strings (lib, version) which default to empty strings + in case the lookup fails. + """ + glibc_version = glibc_version_string() + if glibc_version is None: + return ("", "") + else: + return ("glibc", glibc_version) diff --git a/venv.bak/lib/python3.7/site-packages/pip/_internal/utils/hashes.py b/venv.bak/lib/python3.7/site-packages/pip/_internal/utils/hashes.py new file mode 100644 index 0000000..4c41551 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_internal/utils/hashes.py @@ -0,0 +1,131 @@ +from __future__ import absolute_import + +import hashlib + +from pip._vendor.six import iteritems, iterkeys, itervalues + +from pip._internal.exceptions import ( + HashMismatch, + HashMissing, + InstallationError, +) +from pip._internal.utils.misc import read_chunks +from pip._internal.utils.typing import MYPY_CHECK_RUNNING + +if MYPY_CHECK_RUNNING: + from typing import ( + Dict, List, BinaryIO, NoReturn, Iterator + ) + from pip._vendor.six import PY3 + if PY3: + from hashlib import _Hash + else: + from hashlib import _hash as _Hash + + +# The recommended hash algo of the moment. Change this whenever the state of +# the art changes; it won't hurt backward compatibility. +FAVORITE_HASH = 'sha256' + + +# Names of hashlib algorithms allowed by the --hash option and ``pip hash`` +# Currently, those are the ones at least as collision-resistant as sha256. +STRONG_HASHES = ['sha256', 'sha384', 'sha512'] + + +class Hashes(object): + """A wrapper that builds multiple hashes at once and checks them against + known-good values + + """ + def __init__(self, hashes=None): + # type: (Dict[str, List[str]]) -> None + """ + :param hashes: A dict of algorithm names pointing to lists of allowed + hex digests + """ + self._allowed = {} if hashes is None else hashes + + @property + def digest_count(self): + # type: () -> int + return sum(len(digests) for digests in self._allowed.values()) + + def is_hash_allowed( + self, + hash_name, # type: str + hex_digest, # type: str + ): + # type: (...) -> bool + """Return whether the given hex digest is allowed.""" + return hex_digest in self._allowed.get(hash_name, []) + + def check_against_chunks(self, chunks): + # type: (Iterator[bytes]) -> None + """Check good hashes against ones built from iterable of chunks of + data. + + Raise HashMismatch if none match. + + """ + gots = {} + for hash_name in iterkeys(self._allowed): + try: + gots[hash_name] = hashlib.new(hash_name) + except (ValueError, TypeError): + raise InstallationError('Unknown hash name: %s' % hash_name) + + for chunk in chunks: + for hash in itervalues(gots): + hash.update(chunk) + + for hash_name, got in iteritems(gots): + if got.hexdigest() in self._allowed[hash_name]: + return + self._raise(gots) + + def _raise(self, gots): + # type: (Dict[str, _Hash]) -> NoReturn + raise HashMismatch(self._allowed, gots) + + def check_against_file(self, file): + # type: (BinaryIO) -> None + """Check good hashes against a file-like object + + Raise HashMismatch if none match. + + """ + return self.check_against_chunks(read_chunks(file)) + + def check_against_path(self, path): + # type: (str) -> None + with open(path, 'rb') as file: + return self.check_against_file(file) + + def __nonzero__(self): + # type: () -> bool + """Return whether I know any known-good hashes.""" + return bool(self._allowed) + + def __bool__(self): + # type: () -> bool + return self.__nonzero__() + + +class MissingHashes(Hashes): + """A workalike for Hashes used when we're missing a hash for a requirement + + It computes the actual hash of the requirement and raises a HashMissing + exception showing it to the user. + + """ + def __init__(self): + # type: () -> None + """Don't offer the ``hashes`` kwarg.""" + # Pass our favorite hash in to generate a "gotten hash". With the + # empty list, it will never match, so an error will always raise. + super(MissingHashes, self).__init__(hashes={FAVORITE_HASH: []}) + + def _raise(self, gots): + # type: (Dict[str, _Hash]) -> NoReturn + raise HashMissing(gots[FAVORITE_HASH].hexdigest()) diff --git a/venv/lib/python3.7/site-packages/pip/_internal/utils/inject_securetransport.py b/venv.bak/lib/python3.7/site-packages/pip/_internal/utils/inject_securetransport.py similarity index 100% rename from venv/lib/python3.7/site-packages/pip/_internal/utils/inject_securetransport.py rename to venv.bak/lib/python3.7/site-packages/pip/_internal/utils/inject_securetransport.py diff --git a/venv.bak/lib/python3.7/site-packages/pip/_internal/utils/logging.py b/venv.bak/lib/python3.7/site-packages/pip/_internal/utils/logging.py new file mode 100644 index 0000000..7767111 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_internal/utils/logging.py @@ -0,0 +1,398 @@ +# The following comment should be removed at some point in the future. +# mypy: disallow-untyped-defs=False + +from __future__ import absolute_import + +import contextlib +import errno +import logging +import logging.handlers +import os +import sys +from logging import Filter, getLogger + +from pip._vendor.six import PY2 + +from pip._internal.utils.compat import WINDOWS +from pip._internal.utils.deprecation import DEPRECATION_MSG_PREFIX +from pip._internal.utils.misc import ensure_dir + +try: + import threading +except ImportError: + import dummy_threading as threading # type: ignore + + +try: + # Use "import as" and set colorama in the else clause to avoid mypy + # errors and get the following correct revealed type for colorama: + # `Union[_importlib_modulespec.ModuleType, None]` + # Otherwise, we get an error like the following in the except block: + # > Incompatible types in assignment (expression has type "None", + # variable has type Module) + # TODO: eliminate the need to use "import as" once mypy addresses some + # of its issues with conditional imports. Here is an umbrella issue: + # https://github.com/python/mypy/issues/1297 + from pip._vendor import colorama as _colorama +# Lots of different errors can come from this, including SystemError and +# ImportError. +except Exception: + colorama = None +else: + # Import Fore explicitly rather than accessing below as colorama.Fore + # to avoid the following error running mypy: + # > Module has no attribute "Fore" + # TODO: eliminate the need to import Fore once mypy addresses some of its + # issues with conditional imports. This particular case could be an + # instance of the following issue (but also see the umbrella issue above): + # https://github.com/python/mypy/issues/3500 + from pip._vendor.colorama import Fore + + colorama = _colorama + + +_log_state = threading.local() +_log_state.indentation = 0 +subprocess_logger = getLogger('pip.subprocessor') + + +class BrokenStdoutLoggingError(Exception): + """ + Raised if BrokenPipeError occurs for the stdout stream while logging. + """ + pass + + +# BrokenPipeError does not exist in Python 2 and, in addition, manifests +# differently in Windows and non-Windows. +if WINDOWS: + # In Windows, a broken pipe can show up as EINVAL rather than EPIPE: + # https://bugs.python.org/issue19612 + # https://bugs.python.org/issue30418 + if PY2: + def _is_broken_pipe_error(exc_class, exc): + """See the docstring for non-Windows Python 3 below.""" + return (exc_class is IOError and + exc.errno in (errno.EINVAL, errno.EPIPE)) + else: + # In Windows, a broken pipe IOError became OSError in Python 3. + def _is_broken_pipe_error(exc_class, exc): + """See the docstring for non-Windows Python 3 below.""" + return ((exc_class is BrokenPipeError) or # noqa: F821 + (exc_class is OSError and + exc.errno in (errno.EINVAL, errno.EPIPE))) +elif PY2: + def _is_broken_pipe_error(exc_class, exc): + """See the docstring for non-Windows Python 3 below.""" + return (exc_class is IOError and exc.errno == errno.EPIPE) +else: + # Then we are in the non-Windows Python 3 case. + def _is_broken_pipe_error(exc_class, exc): + """ + Return whether an exception is a broken pipe error. + + Args: + exc_class: an exception class. + exc: an exception instance. + """ + return (exc_class is BrokenPipeError) # noqa: F821 + + +@contextlib.contextmanager +def indent_log(num=2): + """ + A context manager which will cause the log output to be indented for any + log messages emitted inside it. + """ + _log_state.indentation += num + try: + yield + finally: + _log_state.indentation -= num + + +def get_indentation(): + return getattr(_log_state, 'indentation', 0) + + +class IndentingFormatter(logging.Formatter): + + def __init__(self, *args, **kwargs): + """ + A logging.Formatter that obeys the indent_log() context manager. + + :param add_timestamp: A bool indicating output lines should be prefixed + with their record's timestamp. + """ + self.add_timestamp = kwargs.pop("add_timestamp", False) + super(IndentingFormatter, self).__init__(*args, **kwargs) + + def get_message_start(self, formatted, levelno): + """ + Return the start of the formatted log message (not counting the + prefix to add to each line). + """ + if levelno < logging.WARNING: + return '' + if formatted.startswith(DEPRECATION_MSG_PREFIX): + # Then the message already has a prefix. We don't want it to + # look like "WARNING: DEPRECATION: ...." + return '' + if levelno < logging.ERROR: + return 'WARNING: ' + + return 'ERROR: ' + + def format(self, record): + """ + Calls the standard formatter, but will indent all of the log message + lines by our current indentation level. + """ + formatted = super(IndentingFormatter, self).format(record) + message_start = self.get_message_start(formatted, record.levelno) + formatted = message_start + formatted + + prefix = '' + if self.add_timestamp: + # TODO: Use Formatter.default_time_format after dropping PY2. + t = self.formatTime(record, "%Y-%m-%dT%H:%M:%S") + prefix = '%s,%03d ' % (t, record.msecs) + prefix += " " * get_indentation() + formatted = "".join([ + prefix + line + for line in formatted.splitlines(True) + ]) + return formatted + + +def _color_wrap(*colors): + def wrapped(inp): + return "".join(list(colors) + [inp, colorama.Style.RESET_ALL]) + return wrapped + + +class ColorizedStreamHandler(logging.StreamHandler): + + # Don't build up a list of colors if we don't have colorama + if colorama: + COLORS = [ + # This needs to be in order from highest logging level to lowest. + (logging.ERROR, _color_wrap(Fore.RED)), + (logging.WARNING, _color_wrap(Fore.YELLOW)), + ] + else: + COLORS = [] + + def __init__(self, stream=None, no_color=None): + logging.StreamHandler.__init__(self, stream) + self._no_color = no_color + + if WINDOWS and colorama: + self.stream = colorama.AnsiToWin32(self.stream) + + def _using_stdout(self): + """ + Return whether the handler is using sys.stdout. + """ + if WINDOWS and colorama: + # Then self.stream is an AnsiToWin32 object. + return self.stream.wrapped is sys.stdout + + return self.stream is sys.stdout + + def should_color(self): + # Don't colorize things if we do not have colorama or if told not to + if not colorama or self._no_color: + return False + + real_stream = ( + self.stream if not isinstance(self.stream, colorama.AnsiToWin32) + else self.stream.wrapped + ) + + # If the stream is a tty we should color it + if hasattr(real_stream, "isatty") and real_stream.isatty(): + return True + + # If we have an ANSI term we should color it + if os.environ.get("TERM") == "ANSI": + return True + + # If anything else we should not color it + return False + + def format(self, record): + msg = logging.StreamHandler.format(self, record) + + if self.should_color(): + for level, color in self.COLORS: + if record.levelno >= level: + msg = color(msg) + break + + return msg + + # The logging module says handleError() can be customized. + def handleError(self, record): + exc_class, exc = sys.exc_info()[:2] + # If a broken pipe occurred while calling write() or flush() on the + # stdout stream in logging's Handler.emit(), then raise our special + # exception so we can handle it in main() instead of logging the + # broken pipe error and continuing. + if (exc_class and self._using_stdout() and + _is_broken_pipe_error(exc_class, exc)): + raise BrokenStdoutLoggingError() + + return super(ColorizedStreamHandler, self).handleError(record) + + +class BetterRotatingFileHandler(logging.handlers.RotatingFileHandler): + + def _open(self): + ensure_dir(os.path.dirname(self.baseFilename)) + return logging.handlers.RotatingFileHandler._open(self) + + +class MaxLevelFilter(Filter): + + def __init__(self, level): + self.level = level + + def filter(self, record): + return record.levelno < self.level + + +class ExcludeLoggerFilter(Filter): + + """ + A logging Filter that excludes records from a logger (or its children). + """ + + def filter(self, record): + # The base Filter class allows only records from a logger (or its + # children). + return not super(ExcludeLoggerFilter, self).filter(record) + + +def setup_logging(verbosity, no_color, user_log_file): + """Configures and sets up all of the logging + + Returns the requested logging level, as its integer value. + """ + + # Determine the level to be logging at. + if verbosity >= 1: + level = "DEBUG" + elif verbosity == -1: + level = "WARNING" + elif verbosity == -2: + level = "ERROR" + elif verbosity <= -3: + level = "CRITICAL" + else: + level = "INFO" + + level_number = getattr(logging, level) + + # The "root" logger should match the "console" level *unless* we also need + # to log to a user log file. + include_user_log = user_log_file is not None + if include_user_log: + additional_log_file = user_log_file + root_level = "DEBUG" + else: + additional_log_file = "/dev/null" + root_level = level + + # Disable any logging besides WARNING unless we have DEBUG level logging + # enabled for vendored libraries. + vendored_log_level = "WARNING" if level in ["INFO", "ERROR"] else "DEBUG" + + # Shorthands for clarity + log_streams = { + "stdout": "ext://sys.stdout", + "stderr": "ext://sys.stderr", + } + handler_classes = { + "stream": "pip._internal.utils.logging.ColorizedStreamHandler", + "file": "pip._internal.utils.logging.BetterRotatingFileHandler", + } + handlers = ["console", "console_errors", "console_subprocess"] + ( + ["user_log"] if include_user_log else [] + ) + + logging.config.dictConfig({ + "version": 1, + "disable_existing_loggers": False, + "filters": { + "exclude_warnings": { + "()": "pip._internal.utils.logging.MaxLevelFilter", + "level": logging.WARNING, + }, + "restrict_to_subprocess": { + "()": "logging.Filter", + "name": subprocess_logger.name, + }, + "exclude_subprocess": { + "()": "pip._internal.utils.logging.ExcludeLoggerFilter", + "name": subprocess_logger.name, + }, + }, + "formatters": { + "indent": { + "()": IndentingFormatter, + "format": "%(message)s", + }, + "indent_with_timestamp": { + "()": IndentingFormatter, + "format": "%(message)s", + "add_timestamp": True, + }, + }, + "handlers": { + "console": { + "level": level, + "class": handler_classes["stream"], + "no_color": no_color, + "stream": log_streams["stdout"], + "filters": ["exclude_subprocess", "exclude_warnings"], + "formatter": "indent", + }, + "console_errors": { + "level": "WARNING", + "class": handler_classes["stream"], + "no_color": no_color, + "stream": log_streams["stderr"], + "filters": ["exclude_subprocess"], + "formatter": "indent", + }, + # A handler responsible for logging to the console messages + # from the "subprocessor" logger. + "console_subprocess": { + "level": level, + "class": handler_classes["stream"], + "no_color": no_color, + "stream": log_streams["stderr"], + "filters": ["restrict_to_subprocess"], + "formatter": "indent", + }, + "user_log": { + "level": "DEBUG", + "class": handler_classes["file"], + "filename": additional_log_file, + "delay": True, + "formatter": "indent_with_timestamp", + }, + }, + "root": { + "level": root_level, + "handlers": handlers, + }, + "loggers": { + "pip._vendor": { + "level": vendored_log_level + } + }, + }) + + return level_number diff --git a/venv/lib/python3.7/site-packages/pip/_internal/utils/marker_files.py b/venv.bak/lib/python3.7/site-packages/pip/_internal/utils/marker_files.py similarity index 100% rename from venv/lib/python3.7/site-packages/pip/_internal/utils/marker_files.py rename to venv.bak/lib/python3.7/site-packages/pip/_internal/utils/marker_files.py diff --git a/venv.bak/lib/python3.7/site-packages/pip/_internal/utils/misc.py b/venv.bak/lib/python3.7/site-packages/pip/_internal/utils/misc.py new file mode 100644 index 0000000..4a58160 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_internal/utils/misc.py @@ -0,0 +1,886 @@ +# The following comment should be removed at some point in the future. +# mypy: strict-optional=False +# mypy: disallow-untyped-defs=False + +from __future__ import absolute_import + +import contextlib +import errno +import getpass +import hashlib +import io +import logging +import os +import posixpath +import shutil +import stat +import sys +from collections import deque + +from pip._vendor import pkg_resources +# NOTE: retrying is not annotated in typeshed as on 2017-07-17, which is +# why we ignore the type on this import. +from pip._vendor.retrying import retry # type: ignore +from pip._vendor.six import PY2, text_type +from pip._vendor.six.moves import input +from pip._vendor.six.moves.urllib import parse as urllib_parse +from pip._vendor.six.moves.urllib.parse import unquote as urllib_unquote + +from pip import __version__ +from pip._internal.exceptions import CommandError +from pip._internal.locations import ( + get_major_minor_version, + site_packages, + user_site, +) +from pip._internal.utils.compat import ( + WINDOWS, + expanduser, + stdlib_pkgs, + str_to_display, +) +from pip._internal.utils.typing import MYPY_CHECK_RUNNING, cast +from pip._internal.utils.virtualenv import ( + running_under_virtualenv, + virtualenv_no_global, +) + +if PY2: + from io import BytesIO as StringIO +else: + from io import StringIO + +if MYPY_CHECK_RUNNING: + from typing import ( + Any, AnyStr, Container, Iterable, List, Optional, Text, + Tuple, Union, + ) + from pip._vendor.pkg_resources import Distribution + + VersionInfo = Tuple[int, int, int] + + +__all__ = ['rmtree', 'display_path', 'backup_dir', + 'ask', 'splitext', + 'format_size', 'is_installable_dir', + 'normalize_path', + 'renames', 'get_prog', + 'captured_stdout', 'ensure_dir', + 'get_installed_version', 'remove_auth_from_url'] + + +logger = logging.getLogger(__name__) + + +def get_pip_version(): + # type: () -> str + pip_pkg_dir = os.path.join(os.path.dirname(__file__), "..", "..") + pip_pkg_dir = os.path.abspath(pip_pkg_dir) + + return ( + 'pip {} from {} (python {})'.format( + __version__, pip_pkg_dir, get_major_minor_version(), + ) + ) + + +def normalize_version_info(py_version_info): + # type: (Tuple[int, ...]) -> Tuple[int, int, int] + """ + Convert a tuple of ints representing a Python version to one of length + three. + + :param py_version_info: a tuple of ints representing a Python version, + or None to specify no version. The tuple can have any length. + + :return: a tuple of length three if `py_version_info` is non-None. + Otherwise, return `py_version_info` unchanged (i.e. None). + """ + if len(py_version_info) < 3: + py_version_info += (3 - len(py_version_info)) * (0,) + elif len(py_version_info) > 3: + py_version_info = py_version_info[:3] + + return cast('VersionInfo', py_version_info) + + +def ensure_dir(path): + # type: (AnyStr) -> None + """os.path.makedirs without EEXIST.""" + try: + os.makedirs(path) + except OSError as e: + # Windows can raise spurious ENOTEMPTY errors. See #6426. + if e.errno != errno.EEXIST and e.errno != errno.ENOTEMPTY: + raise + + +def get_prog(): + # type: () -> str + try: + prog = os.path.basename(sys.argv[0]) + if prog in ('__main__.py', '-c'): + return "%s -m pip" % sys.executable + else: + return prog + except (AttributeError, TypeError, IndexError): + pass + return 'pip' + + +# Retry every half second for up to 3 seconds +@retry(stop_max_delay=3000, wait_fixed=500) +def rmtree(dir, ignore_errors=False): + # type: (str, bool) -> None + shutil.rmtree(dir, ignore_errors=ignore_errors, + onerror=rmtree_errorhandler) + + +def rmtree_errorhandler(func, path, exc_info): + """On Windows, the files in .svn are read-only, so when rmtree() tries to + remove them, an exception is thrown. We catch that here, remove the + read-only attribute, and hopefully continue without problems.""" + try: + has_attr_readonly = not (os.stat(path).st_mode & stat.S_IWRITE) + except (IOError, OSError): + # it's equivalent to os.path.exists + return + + if has_attr_readonly: + # convert to read/write + os.chmod(path, stat.S_IWRITE) + # use the original function to repeat the operation + func(path) + return + else: + raise + + +def path_to_display(path): + # type: (Optional[Union[str, Text]]) -> Optional[Text] + """ + Convert a bytes (or text) path to text (unicode in Python 2) for display + and logging purposes. + + This function should never error out. Also, this function is mainly needed + for Python 2 since in Python 3 str paths are already text. + """ + if path is None: + return None + if isinstance(path, text_type): + return path + # Otherwise, path is a bytes object (str in Python 2). + try: + display_path = path.decode(sys.getfilesystemencoding(), 'strict') + except UnicodeDecodeError: + # Include the full bytes to make troubleshooting easier, even though + # it may not be very human readable. + if PY2: + # Convert the bytes to a readable str representation using + # repr(), and then convert the str to unicode. + # Also, we add the prefix "b" to the repr() return value both + # to make the Python 2 output look like the Python 3 output, and + # to signal to the user that this is a bytes representation. + display_path = str_to_display('b{!r}'.format(path)) + else: + # Silence the "F821 undefined name 'ascii'" flake8 error since + # in Python 3 ascii() is a built-in. + display_path = ascii(path) # noqa: F821 + + return display_path + + +def display_path(path): + # type: (Union[str, Text]) -> str + """Gives the display value for a given path, making it relative to cwd + if possible.""" + path = os.path.normcase(os.path.abspath(path)) + if sys.version_info[0] == 2: + path = path.decode(sys.getfilesystemencoding(), 'replace') + path = path.encode(sys.getdefaultencoding(), 'replace') + if path.startswith(os.getcwd() + os.path.sep): + path = '.' + path[len(os.getcwd()):] + return path + + +def backup_dir(dir, ext='.bak'): + # type: (str, str) -> str + """Figure out the name of a directory to back up the given dir to + (adding .bak, .bak2, etc)""" + n = 1 + extension = ext + while os.path.exists(dir + extension): + n += 1 + extension = ext + str(n) + return dir + extension + + +def ask_path_exists(message, options): + # type: (str, Iterable[str]) -> str + for action in os.environ.get('PIP_EXISTS_ACTION', '').split(): + if action in options: + return action + return ask(message, options) + + +def _check_no_input(message): + # type: (str) -> None + """Raise an error if no input is allowed.""" + if os.environ.get('PIP_NO_INPUT'): + raise Exception( + 'No input was expected ($PIP_NO_INPUT set); question: %s' % + message + ) + + +def ask(message, options): + # type: (str, Iterable[str]) -> str + """Ask the message interactively, with the given possible responses""" + while 1: + _check_no_input(message) + response = input(message) + response = response.strip().lower() + if response not in options: + print( + 'Your response (%r) was not one of the expected responses: ' + '%s' % (response, ', '.join(options)) + ) + else: + return response + + +def ask_input(message): + # type: (str) -> str + """Ask for input interactively.""" + _check_no_input(message) + return input(message) + + +def ask_password(message): + # type: (str) -> str + """Ask for a password interactively.""" + _check_no_input(message) + return getpass.getpass(message) + + +def format_size(bytes): + # type: (float) -> str + if bytes > 1000 * 1000: + return '%.1f MB' % (bytes / 1000.0 / 1000) + elif bytes > 10 * 1000: + return '%i kB' % (bytes / 1000) + elif bytes > 1000: + return '%.1f kB' % (bytes / 1000.0) + else: + return '%i bytes' % bytes + + +def is_installable_dir(path): + # type: (str) -> bool + """Is path is a directory containing setup.py or pyproject.toml? + """ + if not os.path.isdir(path): + return False + setup_py = os.path.join(path, 'setup.py') + if os.path.isfile(setup_py): + return True + pyproject_toml = os.path.join(path, 'pyproject.toml') + if os.path.isfile(pyproject_toml): + return True + return False + + +def read_chunks(file, size=io.DEFAULT_BUFFER_SIZE): + """Yield pieces of data from a file-like object until EOF.""" + while True: + chunk = file.read(size) + if not chunk: + break + yield chunk + + +def normalize_path(path, resolve_symlinks=True): + # type: (str, bool) -> str + """ + Convert a path to its canonical, case-normalized, absolute version. + + """ + path = expanduser(path) + if resolve_symlinks: + path = os.path.realpath(path) + else: + path = os.path.abspath(path) + return os.path.normcase(path) + + +def splitext(path): + # type: (str) -> Tuple[str, str] + """Like os.path.splitext, but take off .tar too""" + base, ext = posixpath.splitext(path) + if base.lower().endswith('.tar'): + ext = base[-4:] + ext + base = base[:-4] + return base, ext + + +def renames(old, new): + # type: (str, str) -> None + """Like os.renames(), but handles renaming across devices.""" + # Implementation borrowed from os.renames(). + head, tail = os.path.split(new) + if head and tail and not os.path.exists(head): + os.makedirs(head) + + shutil.move(old, new) + + head, tail = os.path.split(old) + if head and tail: + try: + os.removedirs(head) + except OSError: + pass + + +def is_local(path): + # type: (str) -> bool + """ + Return True if path is within sys.prefix, if we're running in a virtualenv. + + If we're not in a virtualenv, all paths are considered "local." + + Caution: this function assumes the head of path has been normalized + with normalize_path. + """ + if not running_under_virtualenv(): + return True + return path.startswith(normalize_path(sys.prefix)) + + +def dist_is_local(dist): + # type: (Distribution) -> bool + """ + Return True if given Distribution object is installed locally + (i.e. within current virtualenv). + + Always True if we're not in a virtualenv. + + """ + return is_local(dist_location(dist)) + + +def dist_in_usersite(dist): + # type: (Distribution) -> bool + """ + Return True if given Distribution is installed in user site. + """ + return dist_location(dist).startswith(normalize_path(user_site)) + + +def dist_in_site_packages(dist): + # type: (Distribution) -> bool + """ + Return True if given Distribution is installed in + sysconfig.get_python_lib(). + """ + return dist_location(dist).startswith(normalize_path(site_packages)) + + +def dist_is_editable(dist): + # type: (Distribution) -> bool + """ + Return True if given Distribution is an editable install. + """ + for path_item in sys.path: + egg_link = os.path.join(path_item, dist.project_name + '.egg-link') + if os.path.isfile(egg_link): + return True + return False + + +def get_installed_distributions( + local_only=True, # type: bool + skip=stdlib_pkgs, # type: Container[str] + include_editables=True, # type: bool + editables_only=False, # type: bool + user_only=False, # type: bool + paths=None # type: Optional[List[str]] +): + # type: (...) -> List[Distribution] + """ + Return a list of installed Distribution objects. + + If ``local_only`` is True (default), only return installations + local to the current virtualenv, if in a virtualenv. + + ``skip`` argument is an iterable of lower-case project names to + ignore; defaults to stdlib_pkgs + + If ``include_editables`` is False, don't report editables. + + If ``editables_only`` is True , only report editables. + + If ``user_only`` is True , only report installations in the user + site directory. + + If ``paths`` is set, only report the distributions present at the + specified list of locations. + """ + if paths: + working_set = pkg_resources.WorkingSet(paths) + else: + working_set = pkg_resources.working_set + + if local_only: + local_test = dist_is_local + else: + def local_test(d): + return True + + if include_editables: + def editable_test(d): + return True + else: + def editable_test(d): + return not dist_is_editable(d) + + if editables_only: + def editables_only_test(d): + return dist_is_editable(d) + else: + def editables_only_test(d): + return True + + if user_only: + user_test = dist_in_usersite + else: + def user_test(d): + return True + + return [d for d in working_set + if local_test(d) and + d.key not in skip and + editable_test(d) and + editables_only_test(d) and + user_test(d) + ] + + +def egg_link_path(dist): + # type: (Distribution) -> Optional[str] + """ + Return the path for the .egg-link file if it exists, otherwise, None. + + There's 3 scenarios: + 1) not in a virtualenv + try to find in site.USER_SITE, then site_packages + 2) in a no-global virtualenv + try to find in site_packages + 3) in a yes-global virtualenv + try to find in site_packages, then site.USER_SITE + (don't look in global location) + + For #1 and #3, there could be odd cases, where there's an egg-link in 2 + locations. + + This method will just return the first one found. + """ + sites = [] + if running_under_virtualenv(): + sites.append(site_packages) + if not virtualenv_no_global() and user_site: + sites.append(user_site) + else: + if user_site: + sites.append(user_site) + sites.append(site_packages) + + for site in sites: + egglink = os.path.join(site, dist.project_name) + '.egg-link' + if os.path.isfile(egglink): + return egglink + return None + + +def dist_location(dist): + # type: (Distribution) -> str + """ + Get the site-packages location of this distribution. Generally + this is dist.location, except in the case of develop-installed + packages, where dist.location is the source code location, and we + want to know where the egg-link file is. + + The returned location is normalized (in particular, with symlinks removed). + """ + egg_link = egg_link_path(dist) + if egg_link: + return normalize_path(egg_link) + return normalize_path(dist.location) + + +def write_output(msg, *args): + # type: (str, str) -> None + logger.info(msg, *args) + + +class FakeFile(object): + """Wrap a list of lines in an object with readline() to make + ConfigParser happy.""" + def __init__(self, lines): + self._gen = (l for l in lines) + + def readline(self): + try: + try: + return next(self._gen) + except NameError: + return self._gen.next() + except StopIteration: + return '' + + def __iter__(self): + return self._gen + + +class StreamWrapper(StringIO): + + @classmethod + def from_stream(cls, orig_stream): + cls.orig_stream = orig_stream + return cls() + + # compileall.compile_dir() needs stdout.encoding to print to stdout + @property + def encoding(self): + return self.orig_stream.encoding + + +@contextlib.contextmanager +def captured_output(stream_name): + """Return a context manager used by captured_stdout/stdin/stderr + that temporarily replaces the sys stream *stream_name* with a StringIO. + + Taken from Lib/support/__init__.py in the CPython repo. + """ + orig_stdout = getattr(sys, stream_name) + setattr(sys, stream_name, StreamWrapper.from_stream(orig_stdout)) + try: + yield getattr(sys, stream_name) + finally: + setattr(sys, stream_name, orig_stdout) + + +def captured_stdout(): + """Capture the output of sys.stdout: + + with captured_stdout() as stdout: + print('hello') + self.assertEqual(stdout.getvalue(), 'hello\n') + + Taken from Lib/support/__init__.py in the CPython repo. + """ + return captured_output('stdout') + + +def captured_stderr(): + """ + See captured_stdout(). + """ + return captured_output('stderr') + + +class cached_property(object): + """A property that is only computed once per instance and then replaces + itself with an ordinary attribute. Deleting the attribute resets the + property. + + Source: https://github.com/bottlepy/bottle/blob/0.11.5/bottle.py#L175 + """ + + def __init__(self, func): + self.__doc__ = getattr(func, '__doc__') + self.func = func + + def __get__(self, obj, cls): + if obj is None: + # We're being accessed from the class itself, not from an object + return self + value = obj.__dict__[self.func.__name__] = self.func(obj) + return value + + +def get_installed_version(dist_name, working_set=None): + """Get the installed version of dist_name avoiding pkg_resources cache""" + # Create a requirement that we'll look for inside of setuptools. + req = pkg_resources.Requirement.parse(dist_name) + + if working_set is None: + # We want to avoid having this cached, so we need to construct a new + # working set each time. + working_set = pkg_resources.WorkingSet() + + # Get the installed distribution from our working set + dist = working_set.find(req) + + # Check to see if we got an installed distribution or not, if we did + # we want to return it's version. + return dist.version if dist else None + + +def consume(iterator): + """Consume an iterable at C speed.""" + deque(iterator, maxlen=0) + + +# Simulates an enum +def enum(*sequential, **named): + enums = dict(zip(sequential, range(len(sequential))), **named) + reverse = {value: key for key, value in enums.items()} + enums['reverse_mapping'] = reverse + return type('Enum', (), enums) + + +def build_netloc(host, port): + # type: (str, Optional[int]) -> str + """ + Build a netloc from a host-port pair + """ + if port is None: + return host + if ':' in host: + # Only wrap host with square brackets when it is IPv6 + host = '[{}]'.format(host) + return '{}:{}'.format(host, port) + + +def build_url_from_netloc(netloc, scheme='https'): + # type: (str, str) -> str + """ + Build a full URL from a netloc. + """ + if netloc.count(':') >= 2 and '@' not in netloc and '[' not in netloc: + # It must be a bare IPv6 address, so wrap it with brackets. + netloc = '[{}]'.format(netloc) + return '{}://{}'.format(scheme, netloc) + + +def parse_netloc(netloc): + # type: (str) -> Tuple[str, Optional[int]] + """ + Return the host-port pair from a netloc. + """ + url = build_url_from_netloc(netloc) + parsed = urllib_parse.urlparse(url) + return parsed.hostname, parsed.port + + +def split_auth_from_netloc(netloc): + """ + Parse out and remove the auth information from a netloc. + + Returns: (netloc, (username, password)). + """ + if '@' not in netloc: + return netloc, (None, None) + + # Split from the right because that's how urllib.parse.urlsplit() + # behaves if more than one @ is present (which can be checked using + # the password attribute of urlsplit()'s return value). + auth, netloc = netloc.rsplit('@', 1) + if ':' in auth: + # Split from the left because that's how urllib.parse.urlsplit() + # behaves if more than one : is present (which again can be checked + # using the password attribute of the return value) + user_pass = auth.split(':', 1) + else: + user_pass = auth, None + + user_pass = tuple( + None if x is None else urllib_unquote(x) for x in user_pass + ) + + return netloc, user_pass + + +def redact_netloc(netloc): + # type: (str) -> str + """ + Replace the sensitive data in a netloc with "****", if it exists. + + For example: + - "user:pass@example.com" returns "user:****@example.com" + - "accesstoken@example.com" returns "****@example.com" + """ + netloc, (user, password) = split_auth_from_netloc(netloc) + if user is None: + return netloc + if password is None: + user = '****' + password = '' + else: + user = urllib_parse.quote(user) + password = ':****' + return '{user}{password}@{netloc}'.format(user=user, + password=password, + netloc=netloc) + + +def _transform_url(url, transform_netloc): + """Transform and replace netloc in a url. + + transform_netloc is a function taking the netloc and returning a + tuple. The first element of this tuple is the new netloc. The + entire tuple is returned. + + Returns a tuple containing the transformed url as item 0 and the + original tuple returned by transform_netloc as item 1. + """ + purl = urllib_parse.urlsplit(url) + netloc_tuple = transform_netloc(purl.netloc) + # stripped url + url_pieces = ( + purl.scheme, netloc_tuple[0], purl.path, purl.query, purl.fragment + ) + surl = urllib_parse.urlunsplit(url_pieces) + return surl, netloc_tuple + + +def _get_netloc(netloc): + return split_auth_from_netloc(netloc) + + +def _redact_netloc(netloc): + return (redact_netloc(netloc),) + + +def split_auth_netloc_from_url(url): + # type: (str) -> Tuple[str, str, Tuple[str, str]] + """ + Parse a url into separate netloc, auth, and url with no auth. + + Returns: (url_without_auth, netloc, (username, password)) + """ + url_without_auth, (netloc, auth) = _transform_url(url, _get_netloc) + return url_without_auth, netloc, auth + + +def remove_auth_from_url(url): + # type: (str) -> str + """Return a copy of url with 'username:password@' removed.""" + # username/pass params are passed to subversion through flags + # and are not recognized in the url. + return _transform_url(url, _get_netloc)[0] + + +def redact_auth_from_url(url): + # type: (str) -> str + """Replace the password in a given url with ****.""" + return _transform_url(url, _redact_netloc)[0] + + +class HiddenText(object): + def __init__( + self, + secret, # type: str + redacted, # type: str + ): + # type: (...) -> None + self.secret = secret + self.redacted = redacted + + def __repr__(self): + # type: (...) -> str + return ''.format(str(self)) + + def __str__(self): + # type: (...) -> str + return self.redacted + + # This is useful for testing. + def __eq__(self, other): + # type: (Any) -> bool + if type(self) != type(other): + return False + + # The string being used for redaction doesn't also have to match, + # just the raw, original string. + return (self.secret == other.secret) + + # We need to provide an explicit __ne__ implementation for Python 2. + # TODO: remove this when we drop PY2 support. + def __ne__(self, other): + # type: (Any) -> bool + return not self == other + + +def hide_value(value): + # type: (str) -> HiddenText + return HiddenText(value, redacted='****') + + +def hide_url(url): + # type: (str) -> HiddenText + redacted = redact_auth_from_url(url) + return HiddenText(url, redacted=redacted) + + +def protect_pip_from_modification_on_windows(modifying_pip): + # type: (bool) -> None + """Protection of pip.exe from modification on Windows + + On Windows, any operation modifying pip should be run as: + python -m pip ... + """ + pip_names = [ + "pip.exe", + "pip{}.exe".format(sys.version_info[0]), + "pip{}.{}.exe".format(*sys.version_info[:2]) + ] + + # See https://github.com/pypa/pip/issues/1299 for more discussion + should_show_use_python_msg = ( + modifying_pip and + WINDOWS and + os.path.basename(sys.argv[0]) in pip_names + ) + + if should_show_use_python_msg: + new_command = [ + sys.executable, "-m", "pip" + ] + sys.argv[1:] + raise CommandError( + 'To modify pip, please run the following command:\n{}' + .format(" ".join(new_command)) + ) + + +def is_console_interactive(): + # type: () -> bool + """Is this console interactive? + """ + return sys.stdin is not None and sys.stdin.isatty() + + +def hash_file(path, blocksize=1 << 20): + # type: (str, int) -> Tuple[Any, int] + """Return (hash, length) for path using hashlib.sha256() + """ + + h = hashlib.sha256() + length = 0 + with open(path, 'rb') as f: + for block in read_chunks(f, size=blocksize): + length += len(block) + h.update(block) + return h, length + + +def is_wheel_installed(): + """ + Return whether the wheel package is installed. + """ + try: + import wheel # noqa: F401 + except ImportError: + return False + + return True diff --git a/venv.bak/lib/python3.7/site-packages/pip/_internal/utils/models.py b/venv.bak/lib/python3.7/site-packages/pip/_internal/utils/models.py new file mode 100644 index 0000000..29e1441 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_internal/utils/models.py @@ -0,0 +1,42 @@ +"""Utilities for defining models +""" +# The following comment should be removed at some point in the future. +# mypy: disallow-untyped-defs=False + +import operator + + +class KeyBasedCompareMixin(object): + """Provides comparison capabilities that is based on a key + """ + + def __init__(self, key, defining_class): + self._compare_key = key + self._defining_class = defining_class + + def __hash__(self): + return hash(self._compare_key) + + def __lt__(self, other): + return self._compare(other, operator.__lt__) + + def __le__(self, other): + return self._compare(other, operator.__le__) + + def __gt__(self, other): + return self._compare(other, operator.__gt__) + + def __ge__(self, other): + return self._compare(other, operator.__ge__) + + def __eq__(self, other): + return self._compare(other, operator.__eq__) + + def __ne__(self, other): + return self._compare(other, operator.__ne__) + + def _compare(self, other, method): + if not isinstance(other, self._defining_class): + return NotImplemented + + return method(self._compare_key, other._compare_key) diff --git a/venv.bak/lib/python3.7/site-packages/pip/_internal/utils/packaging.py b/venv.bak/lib/python3.7/site-packages/pip/_internal/utils/packaging.py new file mode 100644 index 0000000..68aa86e --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_internal/utils/packaging.py @@ -0,0 +1,94 @@ +from __future__ import absolute_import + +import logging +from email.parser import FeedParser + +from pip._vendor import pkg_resources +from pip._vendor.packaging import specifiers, version + +from pip._internal.exceptions import NoneMetadataError +from pip._internal.utils.misc import display_path +from pip._internal.utils.typing import MYPY_CHECK_RUNNING + +if MYPY_CHECK_RUNNING: + from typing import Optional, Tuple + from email.message import Message + from pip._vendor.pkg_resources import Distribution + + +logger = logging.getLogger(__name__) + + +def check_requires_python(requires_python, version_info): + # type: (Optional[str], Tuple[int, ...]) -> bool + """ + Check if the given Python version matches a "Requires-Python" specifier. + + :param version_info: A 3-tuple of ints representing a Python + major-minor-micro version to check (e.g. `sys.version_info[:3]`). + + :return: `True` if the given Python version satisfies the requirement. + Otherwise, return `False`. + + :raises InvalidSpecifier: If `requires_python` has an invalid format. + """ + if requires_python is None: + # The package provides no information + return True + requires_python_specifier = specifiers.SpecifierSet(requires_python) + + python_version = version.parse('.'.join(map(str, version_info))) + return python_version in requires_python_specifier + + +def get_metadata(dist): + # type: (Distribution) -> Message + """ + :raises NoneMetadataError: if the distribution reports `has_metadata()` + True but `get_metadata()` returns None. + """ + metadata_name = 'METADATA' + if (isinstance(dist, pkg_resources.DistInfoDistribution) and + dist.has_metadata(metadata_name)): + metadata = dist.get_metadata(metadata_name) + elif dist.has_metadata('PKG-INFO'): + metadata_name = 'PKG-INFO' + metadata = dist.get_metadata(metadata_name) + else: + logger.warning("No metadata found in %s", display_path(dist.location)) + metadata = '' + + if metadata is None: + raise NoneMetadataError(dist, metadata_name) + + feed_parser = FeedParser() + # The following line errors out if with a "NoneType" TypeError if + # passed metadata=None. + feed_parser.feed(metadata) + return feed_parser.close() + + +def get_requires_python(dist): + # type: (pkg_resources.Distribution) -> Optional[str] + """ + Return the "Requires-Python" metadata for a distribution, or None + if not present. + """ + pkg_info_dict = get_metadata(dist) + requires_python = pkg_info_dict.get('Requires-Python') + + if requires_python is not None: + # Convert to a str to satisfy the type checker, since requires_python + # can be a Header object. + requires_python = str(requires_python) + + return requires_python + + +def get_installer(dist): + # type: (Distribution) -> str + if dist.has_metadata('INSTALLER'): + for line in dist.get_metadata_lines('INSTALLER'): + if line.strip(): + return line.strip() + return '' diff --git a/venv/lib/python3.7/site-packages/pip/_internal/utils/pkg_resources.py b/venv.bak/lib/python3.7/site-packages/pip/_internal/utils/pkg_resources.py similarity index 100% rename from venv/lib/python3.7/site-packages/pip/_internal/utils/pkg_resources.py rename to venv.bak/lib/python3.7/site-packages/pip/_internal/utils/pkg_resources.py diff --git a/venv.bak/lib/python3.7/site-packages/pip/_internal/utils/setuptools_build.py b/venv.bak/lib/python3.7/site-packages/pip/_internal/utils/setuptools_build.py new file mode 100644 index 0000000..4147a65 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_internal/utils/setuptools_build.py @@ -0,0 +1,181 @@ +import sys + +from pip._internal.utils.typing import MYPY_CHECK_RUNNING + +if MYPY_CHECK_RUNNING: + from typing import List, Optional, Sequence + +# Shim to wrap setup.py invocation with setuptools +# +# We set sys.argv[0] to the path to the underlying setup.py file so +# setuptools / distutils don't take the path to the setup.py to be "-c" when +# invoking via the shim. This avoids e.g. the following manifest_maker +# warning: "warning: manifest_maker: standard file '-c' not found". +_SETUPTOOLS_SHIM = ( + "import sys, setuptools, tokenize; sys.argv[0] = {0!r}; __file__={0!r};" + "f=getattr(tokenize, 'open', open)(__file__);" + "code=f.read().replace('\\r\\n', '\\n');" + "f.close();" + "exec(compile(code, __file__, 'exec'))" +) + + +def make_setuptools_shim_args( + setup_py_path, # type: str + global_options=None, # type: Sequence[str] + no_user_config=False, # type: bool + unbuffered_output=False # type: bool +): + # type: (...) -> List[str] + """ + Get setuptools command arguments with shim wrapped setup file invocation. + + :param setup_py_path: The path to setup.py to be wrapped. + :param global_options: Additional global options. + :param no_user_config: If True, disables personal user configuration. + :param unbuffered_output: If True, adds the unbuffered switch to the + argument list. + """ + args = [sys.executable] + if unbuffered_output: + args += ["-u"] + args += ["-c", _SETUPTOOLS_SHIM.format(setup_py_path)] + if global_options: + args += global_options + if no_user_config: + args += ["--no-user-cfg"] + return args + + +def make_setuptools_bdist_wheel_args( + setup_py_path, # type: str + global_options, # type: Sequence[str] + build_options, # type: Sequence[str] + destination_dir, # type: str +): + # type: (...) -> List[str] + # NOTE: Eventually, we'd want to also -S to the flags here, when we're + # isolating. Currently, it breaks Python in virtualenvs, because it + # relies on site.py to find parts of the standard library outside the + # virtualenv. + args = make_setuptools_shim_args( + setup_py_path, + global_options=global_options, + unbuffered_output=True + ) + args += ["bdist_wheel", "-d", destination_dir] + args += build_options + return args + + +def make_setuptools_clean_args( + setup_py_path, # type: str + global_options, # type: Sequence[str] +): + # type: (...) -> List[str] + args = make_setuptools_shim_args( + setup_py_path, + global_options=global_options, + unbuffered_output=True + ) + args += ["clean", "--all"] + return args + + +def make_setuptools_develop_args( + setup_py_path, # type: str + global_options, # type: Sequence[str] + install_options, # type: Sequence[str] + no_user_config, # type: bool + prefix, # type: Optional[str] + home, # type: Optional[str] + use_user_site, # type: bool +): + # type: (...) -> List[str] + assert not (use_user_site and prefix) + + args = make_setuptools_shim_args( + setup_py_path, + global_options=global_options, + no_user_config=no_user_config, + ) + + args += ["develop", "--no-deps"] + + args += install_options + + if prefix: + args += ["--prefix", prefix] + if home is not None: + args += ["--home", home] + + if use_user_site: + args += ["--user", "--prefix="] + + return args + + +def make_setuptools_egg_info_args( + setup_py_path, # type: str + egg_info_dir, # type: Optional[str] + no_user_config, # type: bool +): + # type: (...) -> List[str] + args = make_setuptools_shim_args(setup_py_path) + if no_user_config: + args += ["--no-user-cfg"] + + args += ["egg_info"] + + if egg_info_dir: + args += ["--egg-base", egg_info_dir] + + return args + + +def make_setuptools_install_args( + setup_py_path, # type: str + global_options, # type: Sequence[str] + install_options, # type: Sequence[str] + record_filename, # type: str + root, # type: Optional[str] + prefix, # type: Optional[str] + header_dir, # type: Optional[str] + home, # type: Optional[str] + use_user_site, # type: bool + no_user_config, # type: bool + pycompile # type: bool +): + # type: (...) -> List[str] + assert not (use_user_site and prefix) + assert not (use_user_site and root) + + args = make_setuptools_shim_args( + setup_py_path, + global_options=global_options, + no_user_config=no_user_config, + unbuffered_output=True + ) + args += ["install", "--record", record_filename] + args += ["--single-version-externally-managed"] + + if root is not None: + args += ["--root", root] + if prefix is not None: + args += ["--prefix", prefix] + if home is not None: + args += ["--home", home] + if use_user_site: + args += ["--user", "--prefix="] + + if pycompile: + args += ["--compile"] + else: + args += ["--no-compile"] + + if header_dir: + args += ["--install-headers", header_dir] + + args += install_options + + return args diff --git a/venv/lib/python3.7/site-packages/pip/_internal/utils/subprocess.py b/venv.bak/lib/python3.7/site-packages/pip/_internal/utils/subprocess.py similarity index 100% rename from venv/lib/python3.7/site-packages/pip/_internal/utils/subprocess.py rename to venv.bak/lib/python3.7/site-packages/pip/_internal/utils/subprocess.py diff --git a/venv.bak/lib/python3.7/site-packages/pip/_internal/utils/temp_dir.py b/venv.bak/lib/python3.7/site-packages/pip/_internal/utils/temp_dir.py new file mode 100644 index 0000000..65e41bc --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_internal/utils/temp_dir.py @@ -0,0 +1,250 @@ +from __future__ import absolute_import + +import errno +import itertools +import logging +import os.path +import tempfile +from contextlib import contextmanager + +from pip._vendor.contextlib2 import ExitStack + +from pip._internal.utils.misc import rmtree +from pip._internal.utils.typing import MYPY_CHECK_RUNNING + +if MYPY_CHECK_RUNNING: + from typing import Any, Dict, Iterator, Optional, TypeVar + + _T = TypeVar('_T', bound='TempDirectory') + + +logger = logging.getLogger(__name__) + + +_tempdir_manager = None # type: Optional[ExitStack] + + +@contextmanager +def global_tempdir_manager(): + # type: () -> Iterator[None] + global _tempdir_manager + with ExitStack() as stack: + old_tempdir_manager, _tempdir_manager = _tempdir_manager, stack + try: + yield + finally: + _tempdir_manager = old_tempdir_manager + + +class TempDirectoryTypeRegistry(object): + """Manages temp directory behavior + """ + + def __init__(self): + # type: () -> None + self._should_delete = {} # type: Dict[str, bool] + + def set_delete(self, kind, value): + # type: (str, bool) -> None + """Indicate whether a TempDirectory of the given kind should be + auto-deleted. + """ + self._should_delete[kind] = value + + def get_delete(self, kind): + # type: (str) -> bool + """Get configured auto-delete flag for a given TempDirectory type, + default True. + """ + return self._should_delete.get(kind, True) + + +_tempdir_registry = None # type: Optional[TempDirectoryTypeRegistry] + + +@contextmanager +def tempdir_registry(): + # type: () -> Iterator[TempDirectoryTypeRegistry] + """Provides a scoped global tempdir registry that can be used to dictate + whether directories should be deleted. + """ + global _tempdir_registry + old_tempdir_registry = _tempdir_registry + _tempdir_registry = TempDirectoryTypeRegistry() + try: + yield _tempdir_registry + finally: + _tempdir_registry = old_tempdir_registry + + +class TempDirectory(object): + """Helper class that owns and cleans up a temporary directory. + + This class can be used as a context manager or as an OO representation of a + temporary directory. + + Attributes: + path + Location to the created temporary directory + delete + Whether the directory should be deleted when exiting + (when used as a contextmanager) + + Methods: + cleanup() + Deletes the temporary directory + + When used as a context manager, if the delete attribute is True, on + exiting the context the temporary directory is deleted. + """ + + def __init__( + self, + path=None, # type: Optional[str] + delete=None, # type: Optional[bool] + kind="temp", # type: str + globally_managed=False, # type: bool + ): + super(TempDirectory, self).__init__() + + # If we were given an explicit directory, resolve delete option now. + # Otherwise we wait until cleanup and see what tempdir_registry says. + if path is not None and delete is None: + delete = False + + if path is None: + path = self._create(kind) + + self._path = path + self._deleted = False + self.delete = delete + self.kind = kind + + if globally_managed: + assert _tempdir_manager is not None + _tempdir_manager.enter_context(self) + + @property + def path(self): + # type: () -> str + assert not self._deleted, ( + "Attempted to access deleted path: {}".format(self._path) + ) + return self._path + + def __repr__(self): + # type: () -> str + return "<{} {!r}>".format(self.__class__.__name__, self.path) + + def __enter__(self): + # type: (_T) -> _T + return self + + def __exit__(self, exc, value, tb): + # type: (Any, Any, Any) -> None + if self.delete is not None: + delete = self.delete + elif _tempdir_registry: + delete = _tempdir_registry.get_delete(self.kind) + else: + delete = True + + if delete: + self.cleanup() + + def _create(self, kind): + # type: (str) -> str + """Create a temporary directory and store its path in self.path + """ + # We realpath here because some systems have their default tmpdir + # symlinked to another directory. This tends to confuse build + # scripts, so we canonicalize the path by traversing potential + # symlinks here. + path = os.path.realpath( + tempfile.mkdtemp(prefix="pip-{}-".format(kind)) + ) + logger.debug("Created temporary directory: {}".format(path)) + return path + + def cleanup(self): + # type: () -> None + """Remove the temporary directory created and reset state + """ + self._deleted = True + if os.path.exists(self._path): + rmtree(self._path) + + +class AdjacentTempDirectory(TempDirectory): + """Helper class that creates a temporary directory adjacent to a real one. + + Attributes: + original + The original directory to create a temp directory for. + path + After calling create() or entering, contains the full + path to the temporary directory. + delete + Whether the directory should be deleted when exiting + (when used as a contextmanager) + + """ + # The characters that may be used to name the temp directory + # We always prepend a ~ and then rotate through these until + # a usable name is found. + # pkg_resources raises a different error for .dist-info folder + # with leading '-' and invalid metadata + LEADING_CHARS = "-~.=%0123456789" + + def __init__(self, original, delete=None): + # type: (str, Optional[bool]) -> None + self.original = original.rstrip('/\\') + super(AdjacentTempDirectory, self).__init__(delete=delete) + + @classmethod + def _generate_names(cls, name): + # type: (str) -> Iterator[str] + """Generates a series of temporary names. + + The algorithm replaces the leading characters in the name + with ones that are valid filesystem characters, but are not + valid package names (for both Python and pip definitions of + package). + """ + for i in range(1, len(name)): + for candidate in itertools.combinations_with_replacement( + cls.LEADING_CHARS, i - 1): + new_name = '~' + ''.join(candidate) + name[i:] + if new_name != name: + yield new_name + + # If we make it this far, we will have to make a longer name + for i in range(len(cls.LEADING_CHARS)): + for candidate in itertools.combinations_with_replacement( + cls.LEADING_CHARS, i): + new_name = '~' + ''.join(candidate) + name + if new_name != name: + yield new_name + + def _create(self, kind): + # type: (str) -> str + root, name = os.path.split(self.original) + for candidate in self._generate_names(name): + path = os.path.join(root, candidate) + try: + os.mkdir(path) + except OSError as ex: + # Continue if the name exists already + if ex.errno != errno.EEXIST: + raise + else: + path = os.path.realpath(path) + break + else: + # Final fallback on the default behavior. + path = os.path.realpath( + tempfile.mkdtemp(prefix="pip-{}-".format(kind)) + ) + + logger.debug("Created temporary directory: {}".format(path)) + return path diff --git a/venv.bak/lib/python3.7/site-packages/pip/_internal/utils/typing.py b/venv.bak/lib/python3.7/site-packages/pip/_internal/utils/typing.py new file mode 100644 index 0000000..8505a29 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_internal/utils/typing.py @@ -0,0 +1,38 @@ +"""For neatly implementing static typing in pip. + +`mypy` - the static type analysis tool we use - uses the `typing` module, which +provides core functionality fundamental to mypy's functioning. + +Generally, `typing` would be imported at runtime and used in that fashion - +it acts as a no-op at runtime and does not have any run-time overhead by +design. + +As it turns out, `typing` is not vendorable - it uses separate sources for +Python 2/Python 3. Thus, this codebase can not expect it to be present. +To work around this, mypy allows the typing import to be behind a False-y +optional to prevent it from running at runtime and type-comments can be used +to remove the need for the types to be accessible directly during runtime. + +This module provides the False-y guard in a nicely named fashion so that a +curious maintainer can reach here to read this. + +In pip, all static-typing related imports should be guarded as follows: + + from pip._internal.utils.typing import MYPY_CHECK_RUNNING + + if MYPY_CHECK_RUNNING: + from typing import ... + +Ref: https://github.com/python/mypy/issues/3216 +""" + +MYPY_CHECK_RUNNING = False + + +if MYPY_CHECK_RUNNING: + from typing import cast +else: + # typing's cast() is needed at runtime, but we don't want to import typing. + # Thus, we use a dummy no-op version, which we tell mypy to ignore. + def cast(type_, value): # type: ignore + return value diff --git a/venv.bak/lib/python3.7/site-packages/pip/_internal/utils/ui.py b/venv.bak/lib/python3.7/site-packages/pip/_internal/utils/ui.py new file mode 100644 index 0000000..87782aa --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_internal/utils/ui.py @@ -0,0 +1,428 @@ +# The following comment should be removed at some point in the future. +# mypy: strict-optional=False +# mypy: disallow-untyped-defs=False + +from __future__ import absolute_import, division + +import contextlib +import itertools +import logging +import sys +import time +from signal import SIGINT, default_int_handler, signal + +from pip._vendor import six +from pip._vendor.progress import HIDE_CURSOR, SHOW_CURSOR +from pip._vendor.progress.bar import Bar, FillingCirclesBar, IncrementalBar +from pip._vendor.progress.spinner import Spinner + +from pip._internal.utils.compat import WINDOWS +from pip._internal.utils.logging import get_indentation +from pip._internal.utils.misc import format_size +from pip._internal.utils.typing import MYPY_CHECK_RUNNING + +if MYPY_CHECK_RUNNING: + from typing import Any, Iterator, IO + +try: + from pip._vendor import colorama +# Lots of different errors can come from this, including SystemError and +# ImportError. +except Exception: + colorama = None + +logger = logging.getLogger(__name__) + + +def _select_progress_class(preferred, fallback): + encoding = getattr(preferred.file, "encoding", None) + + # If we don't know what encoding this file is in, then we'll just assume + # that it doesn't support unicode and use the ASCII bar. + if not encoding: + return fallback + + # Collect all of the possible characters we want to use with the preferred + # bar. + characters = [ + getattr(preferred, "empty_fill", six.text_type()), + getattr(preferred, "fill", six.text_type()), + ] + characters += list(getattr(preferred, "phases", [])) + + # Try to decode the characters we're using for the bar using the encoding + # of the given file, if this works then we'll assume that we can use the + # fancier bar and if not we'll fall back to the plaintext bar. + try: + six.text_type().join(characters).encode(encoding) + except UnicodeEncodeError: + return fallback + else: + return preferred + + +_BaseBar = _select_progress_class(IncrementalBar, Bar) # type: Any + + +class InterruptibleMixin(object): + """ + Helper to ensure that self.finish() gets called on keyboard interrupt. + + This allows downloads to be interrupted without leaving temporary state + (like hidden cursors) behind. + + This class is similar to the progress library's existing SigIntMixin + helper, but as of version 1.2, that helper has the following problems: + + 1. It calls sys.exit(). + 2. It discards the existing SIGINT handler completely. + 3. It leaves its own handler in place even after an uninterrupted finish, + which will have unexpected delayed effects if the user triggers an + unrelated keyboard interrupt some time after a progress-displaying + download has already completed, for example. + """ + + def __init__(self, *args, **kwargs): + """ + Save the original SIGINT handler for later. + """ + super(InterruptibleMixin, self).__init__(*args, **kwargs) + + self.original_handler = signal(SIGINT, self.handle_sigint) + + # If signal() returns None, the previous handler was not installed from + # Python, and we cannot restore it. This probably should not happen, + # but if it does, we must restore something sensible instead, at least. + # The least bad option should be Python's default SIGINT handler, which + # just raises KeyboardInterrupt. + if self.original_handler is None: + self.original_handler = default_int_handler + + def finish(self): + """ + Restore the original SIGINT handler after finishing. + + This should happen regardless of whether the progress display finishes + normally, or gets interrupted. + """ + super(InterruptibleMixin, self).finish() + signal(SIGINT, self.original_handler) + + def handle_sigint(self, signum, frame): + """ + Call self.finish() before delegating to the original SIGINT handler. + + This handler should only be in place while the progress display is + active. + """ + self.finish() + self.original_handler(signum, frame) + + +class SilentBar(Bar): + + def update(self): + pass + + +class BlueEmojiBar(IncrementalBar): + + suffix = "%(percent)d%%" + bar_prefix = " " + bar_suffix = " " + phases = (u"\U0001F539", u"\U0001F537", u"\U0001F535") # type: Any + + +class DownloadProgressMixin(object): + + def __init__(self, *args, **kwargs): + super(DownloadProgressMixin, self).__init__(*args, **kwargs) + self.message = (" " * (get_indentation() + 2)) + self.message + + @property + def downloaded(self): + return format_size(self.index) + + @property + def download_speed(self): + # Avoid zero division errors... + if self.avg == 0.0: + return "..." + return format_size(1 / self.avg) + "/s" + + @property + def pretty_eta(self): + if self.eta: + return "eta %s" % self.eta_td + return "" + + def iter(self, it): + for x in it: + yield x + self.next(len(x)) + self.finish() + + +class WindowsMixin(object): + + def __init__(self, *args, **kwargs): + # The Windows terminal does not support the hide/show cursor ANSI codes + # even with colorama. So we'll ensure that hide_cursor is False on + # Windows. + # This call needs to go before the super() call, so that hide_cursor + # is set in time. The base progress bar class writes the "hide cursor" + # code to the terminal in its init, so if we don't set this soon + # enough, we get a "hide" with no corresponding "show"... + if WINDOWS and self.hide_cursor: + self.hide_cursor = False + + super(WindowsMixin, self).__init__(*args, **kwargs) + + # Check if we are running on Windows and we have the colorama module, + # if we do then wrap our file with it. + if WINDOWS and colorama: + self.file = colorama.AnsiToWin32(self.file) + # The progress code expects to be able to call self.file.isatty() + # but the colorama.AnsiToWin32() object doesn't have that, so we'll + # add it. + self.file.isatty = lambda: self.file.wrapped.isatty() + # The progress code expects to be able to call self.file.flush() + # but the colorama.AnsiToWin32() object doesn't have that, so we'll + # add it. + self.file.flush = lambda: self.file.wrapped.flush() + + +class BaseDownloadProgressBar(WindowsMixin, InterruptibleMixin, + DownloadProgressMixin): + + file = sys.stdout + message = "%(percent)d%%" + suffix = "%(downloaded)s %(download_speed)s %(pretty_eta)s" + +# NOTE: The "type: ignore" comments on the following classes are there to +# work around https://github.com/python/typing/issues/241 + + +class DefaultDownloadProgressBar(BaseDownloadProgressBar, + _BaseBar): + pass + + +class DownloadSilentBar(BaseDownloadProgressBar, SilentBar): # type: ignore + pass + + +class DownloadBar(BaseDownloadProgressBar, # type: ignore + Bar): + pass + + +class DownloadFillingCirclesBar(BaseDownloadProgressBar, # type: ignore + FillingCirclesBar): + pass + + +class DownloadBlueEmojiProgressBar(BaseDownloadProgressBar, # type: ignore + BlueEmojiBar): + pass + + +class DownloadProgressSpinner(WindowsMixin, InterruptibleMixin, + DownloadProgressMixin, Spinner): + + file = sys.stdout + suffix = "%(downloaded)s %(download_speed)s" + + def next_phase(self): + if not hasattr(self, "_phaser"): + self._phaser = itertools.cycle(self.phases) + return next(self._phaser) + + def update(self): + message = self.message % self + phase = self.next_phase() + suffix = self.suffix % self + line = ''.join([ + message, + " " if message else "", + phase, + " " if suffix else "", + suffix, + ]) + + self.writeln(line) + + +BAR_TYPES = { + "off": (DownloadSilentBar, DownloadSilentBar), + "on": (DefaultDownloadProgressBar, DownloadProgressSpinner), + "ascii": (DownloadBar, DownloadProgressSpinner), + "pretty": (DownloadFillingCirclesBar, DownloadProgressSpinner), + "emoji": (DownloadBlueEmojiProgressBar, DownloadProgressSpinner) +} + + +def DownloadProgressProvider(progress_bar, max=None): + if max is None or max == 0: + return BAR_TYPES[progress_bar][1]().iter + else: + return BAR_TYPES[progress_bar][0](max=max).iter + + +################################################################ +# Generic "something is happening" spinners +# +# We don't even try using progress.spinner.Spinner here because it's actually +# simpler to reimplement from scratch than to coerce their code into doing +# what we need. +################################################################ + +@contextlib.contextmanager +def hidden_cursor(file): + # type: (IO[Any]) -> Iterator[None] + # The Windows terminal does not support the hide/show cursor ANSI codes, + # even via colorama. So don't even try. + if WINDOWS: + yield + # We don't want to clutter the output with control characters if we're + # writing to a file, or if the user is running with --quiet. + # See https://github.com/pypa/pip/issues/3418 + elif not file.isatty() or logger.getEffectiveLevel() > logging.INFO: + yield + else: + file.write(HIDE_CURSOR) + try: + yield + finally: + file.write(SHOW_CURSOR) + + +class RateLimiter(object): + def __init__(self, min_update_interval_seconds): + # type: (float) -> None + self._min_update_interval_seconds = min_update_interval_seconds + self._last_update = 0 # type: float + + def ready(self): + # type: () -> bool + now = time.time() + delta = now - self._last_update + return delta >= self._min_update_interval_seconds + + def reset(self): + # type: () -> None + self._last_update = time.time() + + +class SpinnerInterface(object): + def spin(self): + # type: () -> None + raise NotImplementedError() + + def finish(self, final_status): + # type: (str) -> None + raise NotImplementedError() + + +class InteractiveSpinner(SpinnerInterface): + def __init__(self, message, file=None, spin_chars="-\\|/", + # Empirically, 8 updates/second looks nice + min_update_interval_seconds=0.125): + self._message = message + if file is None: + file = sys.stdout + self._file = file + self._rate_limiter = RateLimiter(min_update_interval_seconds) + self._finished = False + + self._spin_cycle = itertools.cycle(spin_chars) + + self._file.write(" " * get_indentation() + self._message + " ... ") + self._width = 0 + + def _write(self, status): + assert not self._finished + # Erase what we wrote before by backspacing to the beginning, writing + # spaces to overwrite the old text, and then backspacing again + backup = "\b" * self._width + self._file.write(backup + " " * self._width + backup) + # Now we have a blank slate to add our status + self._file.write(status) + self._width = len(status) + self._file.flush() + self._rate_limiter.reset() + + def spin(self): + # type: () -> None + if self._finished: + return + if not self._rate_limiter.ready(): + return + self._write(next(self._spin_cycle)) + + def finish(self, final_status): + # type: (str) -> None + if self._finished: + return + self._write(final_status) + self._file.write("\n") + self._file.flush() + self._finished = True + + +# Used for dumb terminals, non-interactive installs (no tty), etc. +# We still print updates occasionally (once every 60 seconds by default) to +# act as a keep-alive for systems like Travis-CI that take lack-of-output as +# an indication that a task has frozen. +class NonInteractiveSpinner(SpinnerInterface): + def __init__(self, message, min_update_interval_seconds=60): + # type: (str, float) -> None + self._message = message + self._finished = False + self._rate_limiter = RateLimiter(min_update_interval_seconds) + self._update("started") + + def _update(self, status): + assert not self._finished + self._rate_limiter.reset() + logger.info("%s: %s", self._message, status) + + def spin(self): + # type: () -> None + if self._finished: + return + if not self._rate_limiter.ready(): + return + self._update("still running...") + + def finish(self, final_status): + # type: (str) -> None + if self._finished: + return + self._update("finished with status '%s'" % (final_status,)) + self._finished = True + + +@contextlib.contextmanager +def open_spinner(message): + # type: (str) -> Iterator[SpinnerInterface] + # Interactive spinner goes directly to sys.stdout rather than being routed + # through the logging system, but it acts like it has level INFO, + # i.e. it's only displayed if we're at level INFO or better. + # Non-interactive spinner goes through the logging system, so it is always + # in sync with logging configuration. + if sys.stdout.isatty() and logger.getEffectiveLevel() <= logging.INFO: + spinner = InteractiveSpinner(message) # type: SpinnerInterface + else: + spinner = NonInteractiveSpinner(message) + try: + with hidden_cursor(sys.stdout): + yield spinner + except KeyboardInterrupt: + spinner.finish("canceled") + raise + except Exception: + spinner.finish("error") + raise + else: + spinner.finish("done") diff --git a/venv/lib/python3.7/site-packages/pip/_internal/utils/unpacking.py b/venv.bak/lib/python3.7/site-packages/pip/_internal/utils/unpacking.py similarity index 100% rename from venv/lib/python3.7/site-packages/pip/_internal/utils/unpacking.py rename to venv.bak/lib/python3.7/site-packages/pip/_internal/utils/unpacking.py diff --git a/venv/lib/python3.7/site-packages/pip/_internal/utils/urls.py b/venv.bak/lib/python3.7/site-packages/pip/_internal/utils/urls.py similarity index 100% rename from venv/lib/python3.7/site-packages/pip/_internal/utils/urls.py rename to venv.bak/lib/python3.7/site-packages/pip/_internal/utils/urls.py diff --git a/venv/lib/python3.7/site-packages/pip/_internal/utils/virtualenv.py b/venv.bak/lib/python3.7/site-packages/pip/_internal/utils/virtualenv.py similarity index 100% rename from venv/lib/python3.7/site-packages/pip/_internal/utils/virtualenv.py rename to venv.bak/lib/python3.7/site-packages/pip/_internal/utils/virtualenv.py diff --git a/venv/lib/python3.7/site-packages/pip/_internal/utils/wheel.py b/venv.bak/lib/python3.7/site-packages/pip/_internal/utils/wheel.py similarity index 100% rename from venv/lib/python3.7/site-packages/pip/_internal/utils/wheel.py rename to venv.bak/lib/python3.7/site-packages/pip/_internal/utils/wheel.py diff --git a/venv.bak/lib/python3.7/site-packages/pip/_internal/vcs/__init__.py b/venv.bak/lib/python3.7/site-packages/pip/_internal/vcs/__init__.py new file mode 100644 index 0000000..2a4eb13 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_internal/vcs/__init__.py @@ -0,0 +1,15 @@ +# Expose a limited set of classes and functions so callers outside of +# the vcs package don't need to import deeper than `pip._internal.vcs`. +# (The test directory and imports protected by MYPY_CHECK_RUNNING may +# still need to import from a vcs sub-package.) +# Import all vcs modules to register each VCS in the VcsSupport object. +import pip._internal.vcs.bazaar +import pip._internal.vcs.git +import pip._internal.vcs.mercurial +import pip._internal.vcs.subversion # noqa: F401 +from pip._internal.vcs.versioncontrol import ( # noqa: F401 + RemoteNotFoundError, + is_url, + make_vcs_requirement_url, + vcs, +) diff --git a/venv.bak/lib/python3.7/site-packages/pip/_internal/vcs/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_internal/vcs/__pycache__/__init__.cpython-37.pyc new file mode 100644 index 0000000..3c4e491 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_internal/vcs/__pycache__/__init__.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_internal/vcs/__pycache__/bazaar.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_internal/vcs/__pycache__/bazaar.cpython-37.pyc new file mode 100644 index 0000000..0e5b823 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_internal/vcs/__pycache__/bazaar.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_internal/vcs/__pycache__/git.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_internal/vcs/__pycache__/git.cpython-37.pyc new file mode 100644 index 0000000..3ec7f24 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_internal/vcs/__pycache__/git.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_internal/vcs/__pycache__/mercurial.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_internal/vcs/__pycache__/mercurial.cpython-37.pyc new file mode 100644 index 0000000..dbf91de Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_internal/vcs/__pycache__/mercurial.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_internal/vcs/__pycache__/subversion.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_internal/vcs/__pycache__/subversion.cpython-37.pyc new file mode 100644 index 0000000..f2152c6 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_internal/vcs/__pycache__/subversion.cpython-37.pyc differ diff --git a/venv/lib/python3.7/site-packages/pip/_internal/vcs/__pycache__/versioncontrol.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_internal/vcs/__pycache__/versioncontrol.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/pip/_internal/vcs/__pycache__/versioncontrol.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/pip/_internal/vcs/__pycache__/versioncontrol.cpython-37.pyc diff --git a/venv.bak/lib/python3.7/site-packages/pip/_internal/vcs/bazaar.py b/venv.bak/lib/python3.7/site-packages/pip/_internal/vcs/bazaar.py new file mode 100644 index 0000000..347c06f --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_internal/vcs/bazaar.py @@ -0,0 +1,120 @@ +# The following comment should be removed at some point in the future. +# mypy: disallow-untyped-defs=False + +from __future__ import absolute_import + +import logging +import os + +from pip._vendor.six.moves.urllib import parse as urllib_parse + +from pip._internal.utils.misc import display_path, rmtree +from pip._internal.utils.subprocess import make_command +from pip._internal.utils.typing import MYPY_CHECK_RUNNING +from pip._internal.utils.urls import path_to_url +from pip._internal.vcs.versioncontrol import VersionControl, vcs + +if MYPY_CHECK_RUNNING: + from typing import Optional, Tuple + from pip._internal.utils.misc import HiddenText + from pip._internal.vcs.versioncontrol import AuthInfo, RevOptions + + +logger = logging.getLogger(__name__) + + +class Bazaar(VersionControl): + name = 'bzr' + dirname = '.bzr' + repo_name = 'branch' + schemes = ( + 'bzr', 'bzr+http', 'bzr+https', 'bzr+ssh', 'bzr+sftp', 'bzr+ftp', + 'bzr+lp', + ) + + def __init__(self, *args, **kwargs): + super(Bazaar, self).__init__(*args, **kwargs) + # This is only needed for python <2.7.5 + # Register lp but do not expose as a scheme to support bzr+lp. + if getattr(urllib_parse, 'uses_fragment', None): + urllib_parse.uses_fragment.extend(['lp']) + + @staticmethod + def get_base_rev_args(rev): + return ['-r', rev] + + def export(self, location, url): + # type: (str, HiddenText) -> None + """ + Export the Bazaar repository at the url to the destination location + """ + # Remove the location to make sure Bazaar can export it correctly + if os.path.exists(location): + rmtree(location) + + url, rev_options = self.get_url_rev_options(url) + self.run_command( + make_command('export', location, url, rev_options.to_args()), + show_stdout=False, + ) + + def fetch_new(self, dest, url, rev_options): + # type: (str, HiddenText, RevOptions) -> None + rev_display = rev_options.to_display() + logger.info( + 'Checking out %s%s to %s', + url, + rev_display, + display_path(dest), + ) + cmd_args = ( + make_command('branch', '-q', rev_options.to_args(), url, dest) + ) + self.run_command(cmd_args) + + def switch(self, dest, url, rev_options): + # type: (str, HiddenText, RevOptions) -> None + self.run_command(make_command('switch', url), cwd=dest) + + def update(self, dest, url, rev_options): + # type: (str, HiddenText, RevOptions) -> None + cmd_args = make_command('pull', '-q', rev_options.to_args()) + self.run_command(cmd_args, cwd=dest) + + @classmethod + def get_url_rev_and_auth(cls, url): + # type: (str) -> Tuple[str, Optional[str], AuthInfo] + # hotfix the URL scheme after removing bzr+ from bzr+ssh:// readd it + url, rev, user_pass = super(Bazaar, cls).get_url_rev_and_auth(url) + if url.startswith('ssh://'): + url = 'bzr+' + url + return url, rev, user_pass + + @classmethod + def get_remote_url(cls, location): + urls = cls.run_command(['info'], show_stdout=False, cwd=location) + for line in urls.splitlines(): + line = line.strip() + for x in ('checkout of branch: ', + 'parent branch: '): + if line.startswith(x): + repo = line.split(x)[1] + if cls._is_local_repository(repo): + return path_to_url(repo) + return repo + return None + + @classmethod + def get_revision(cls, location): + revision = cls.run_command( + ['revno'], show_stdout=False, cwd=location, + ) + return revision.splitlines()[-1] + + @classmethod + def is_commit_id_equal(cls, dest, name): + """Always assume the versions don't match""" + return False + + +vcs.register(Bazaar) diff --git a/venv.bak/lib/python3.7/site-packages/pip/_internal/vcs/git.py b/venv.bak/lib/python3.7/site-packages/pip/_internal/vcs/git.py new file mode 100644 index 0000000..7483303 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_internal/vcs/git.py @@ -0,0 +1,389 @@ +# The following comment should be removed at some point in the future. +# mypy: disallow-untyped-defs=False + +from __future__ import absolute_import + +import logging +import os.path +import re + +from pip._vendor.packaging.version import parse as parse_version +from pip._vendor.six.moves.urllib import parse as urllib_parse +from pip._vendor.six.moves.urllib import request as urllib_request + +from pip._internal.exceptions import BadCommand +from pip._internal.utils.misc import display_path, hide_url +from pip._internal.utils.subprocess import make_command +from pip._internal.utils.temp_dir import TempDirectory +from pip._internal.utils.typing import MYPY_CHECK_RUNNING +from pip._internal.vcs.versioncontrol import ( + RemoteNotFoundError, + VersionControl, + find_path_to_setup_from_repo_root, + vcs, +) + +if MYPY_CHECK_RUNNING: + from typing import Optional, Tuple + from pip._internal.utils.misc import HiddenText + from pip._internal.vcs.versioncontrol import AuthInfo, RevOptions + + +urlsplit = urllib_parse.urlsplit +urlunsplit = urllib_parse.urlunsplit + + +logger = logging.getLogger(__name__) + + +HASH_REGEX = re.compile('^[a-fA-F0-9]{40}$') + + +def looks_like_hash(sha): + return bool(HASH_REGEX.match(sha)) + + +class Git(VersionControl): + name = 'git' + dirname = '.git' + repo_name = 'clone' + schemes = ( + 'git', 'git+http', 'git+https', 'git+ssh', 'git+git', 'git+file', + ) + # Prevent the user's environment variables from interfering with pip: + # https://github.com/pypa/pip/issues/1130 + unset_environ = ('GIT_DIR', 'GIT_WORK_TREE') + default_arg_rev = 'HEAD' + + @staticmethod + def get_base_rev_args(rev): + return [rev] + + def is_immutable_rev_checkout(self, url, dest): + # type: (str, str) -> bool + _, rev_options = self.get_url_rev_options(hide_url(url)) + if not rev_options.rev: + return False + if not self.is_commit_id_equal(dest, rev_options.rev): + # the current commit is different from rev, + # which means rev was something else than a commit hash + return False + # return False in the rare case rev is both a commit hash + # and a tag or a branch; we don't want to cache in that case + # because that branch/tag could point to something else in the future + is_tag_or_branch = bool( + self.get_revision_sha(dest, rev_options.rev)[0] + ) + return not is_tag_or_branch + + def get_git_version(self): + VERSION_PFX = 'git version ' + version = self.run_command(['version'], show_stdout=False) + if version.startswith(VERSION_PFX): + version = version[len(VERSION_PFX):].split()[0] + else: + version = '' + # get first 3 positions of the git version because + # on windows it is x.y.z.windows.t, and this parses as + # LegacyVersion which always smaller than a Version. + version = '.'.join(version.split('.')[:3]) + return parse_version(version) + + @classmethod + def get_current_branch(cls, location): + """ + Return the current branch, or None if HEAD isn't at a branch + (e.g. detached HEAD). + """ + # git-symbolic-ref exits with empty stdout if "HEAD" is a detached + # HEAD rather than a symbolic ref. In addition, the -q causes the + # command to exit with status code 1 instead of 128 in this case + # and to suppress the message to stderr. + args = ['symbolic-ref', '-q', 'HEAD'] + output = cls.run_command( + args, extra_ok_returncodes=(1, ), show_stdout=False, cwd=location, + ) + ref = output.strip() + + if ref.startswith('refs/heads/'): + return ref[len('refs/heads/'):] + + return None + + def export(self, location, url): + # type: (str, HiddenText) -> None + """Export the Git repository at the url to the destination location""" + if not location.endswith('/'): + location = location + '/' + + with TempDirectory(kind="export") as temp_dir: + self.unpack(temp_dir.path, url=url) + self.run_command( + ['checkout-index', '-a', '-f', '--prefix', location], + show_stdout=False, cwd=temp_dir.path + ) + + @classmethod + def get_revision_sha(cls, dest, rev): + """ + Return (sha_or_none, is_branch), where sha_or_none is a commit hash + if the revision names a remote branch or tag, otherwise None. + + Args: + dest: the repository directory. + rev: the revision name. + """ + # Pass rev to pre-filter the list. + output = cls.run_command(['show-ref', rev], cwd=dest, + show_stdout=False, on_returncode='ignore') + refs = {} + for line in output.strip().splitlines(): + try: + sha, ref = line.split() + except ValueError: + # Include the offending line to simplify troubleshooting if + # this error ever occurs. + raise ValueError('unexpected show-ref line: {!r}'.format(line)) + + refs[ref] = sha + + branch_ref = 'refs/remotes/origin/{}'.format(rev) + tag_ref = 'refs/tags/{}'.format(rev) + + sha = refs.get(branch_ref) + if sha is not None: + return (sha, True) + + sha = refs.get(tag_ref) + + return (sha, False) + + @classmethod + def resolve_revision(cls, dest, url, rev_options): + # type: (str, HiddenText, RevOptions) -> RevOptions + """ + Resolve a revision to a new RevOptions object with the SHA1 of the + branch, tag, or ref if found. + + Args: + rev_options: a RevOptions object. + """ + rev = rev_options.arg_rev + # The arg_rev property's implementation for Git ensures that the + # rev return value is always non-None. + assert rev is not None + + sha, is_branch = cls.get_revision_sha(dest, rev) + + if sha is not None: + rev_options = rev_options.make_new(sha) + rev_options.branch_name = rev if is_branch else None + + return rev_options + + # Do not show a warning for the common case of something that has + # the form of a Git commit hash. + if not looks_like_hash(rev): + logger.warning( + "Did not find branch or tag '%s', assuming revision or ref.", + rev, + ) + + if not rev.startswith('refs/'): + return rev_options + + # If it looks like a ref, we have to fetch it explicitly. + cls.run_command( + make_command('fetch', '-q', url, rev_options.to_args()), + cwd=dest, + ) + # Change the revision to the SHA of the ref we fetched + sha = cls.get_revision(dest, rev='FETCH_HEAD') + rev_options = rev_options.make_new(sha) + + return rev_options + + @classmethod + def is_commit_id_equal(cls, dest, name): + """ + Return whether the current commit hash equals the given name. + + Args: + dest: the repository directory. + name: a string name. + """ + if not name: + # Then avoid an unnecessary subprocess call. + return False + + return cls.get_revision(dest) == name + + def fetch_new(self, dest, url, rev_options): + # type: (str, HiddenText, RevOptions) -> None + rev_display = rev_options.to_display() + logger.info('Cloning %s%s to %s', url, rev_display, display_path(dest)) + self.run_command(make_command('clone', '-q', url, dest)) + + if rev_options.rev: + # Then a specific revision was requested. + rev_options = self.resolve_revision(dest, url, rev_options) + branch_name = getattr(rev_options, 'branch_name', None) + if branch_name is None: + # Only do a checkout if the current commit id doesn't match + # the requested revision. + if not self.is_commit_id_equal(dest, rev_options.rev): + cmd_args = make_command( + 'checkout', '-q', rev_options.to_args(), + ) + self.run_command(cmd_args, cwd=dest) + elif self.get_current_branch(dest) != branch_name: + # Then a specific branch was requested, and that branch + # is not yet checked out. + track_branch = 'origin/{}'.format(branch_name) + cmd_args = [ + 'checkout', '-b', branch_name, '--track', track_branch, + ] + self.run_command(cmd_args, cwd=dest) + + #: repo may contain submodules + self.update_submodules(dest) + + def switch(self, dest, url, rev_options): + # type: (str, HiddenText, RevOptions) -> None + self.run_command( + make_command('config', 'remote.origin.url', url), + cwd=dest, + ) + cmd_args = make_command('checkout', '-q', rev_options.to_args()) + self.run_command(cmd_args, cwd=dest) + + self.update_submodules(dest) + + def update(self, dest, url, rev_options): + # type: (str, HiddenText, RevOptions) -> None + # First fetch changes from the default remote + if self.get_git_version() >= parse_version('1.9.0'): + # fetch tags in addition to everything else + self.run_command(['fetch', '-q', '--tags'], cwd=dest) + else: + self.run_command(['fetch', '-q'], cwd=dest) + # Then reset to wanted revision (maybe even origin/master) + rev_options = self.resolve_revision(dest, url, rev_options) + cmd_args = make_command('reset', '--hard', '-q', rev_options.to_args()) + self.run_command(cmd_args, cwd=dest) + #: update submodules + self.update_submodules(dest) + + @classmethod + def get_remote_url(cls, location): + """ + Return URL of the first remote encountered. + + Raises RemoteNotFoundError if the repository does not have a remote + url configured. + """ + # We need to pass 1 for extra_ok_returncodes since the command + # exits with return code 1 if there are no matching lines. + stdout = cls.run_command( + ['config', '--get-regexp', r'remote\..*\.url'], + extra_ok_returncodes=(1, ), show_stdout=False, cwd=location, + ) + remotes = stdout.splitlines() + try: + found_remote = remotes[0] + except IndexError: + raise RemoteNotFoundError + + for remote in remotes: + if remote.startswith('remote.origin.url '): + found_remote = remote + break + url = found_remote.split(' ')[1] + return url.strip() + + @classmethod + def get_revision(cls, location, rev=None): + if rev is None: + rev = 'HEAD' + current_rev = cls.run_command( + ['rev-parse', rev], show_stdout=False, cwd=location, + ) + return current_rev.strip() + + @classmethod + def get_subdirectory(cls, location): + """ + Return the path to setup.py, relative to the repo root. + Return None if setup.py is in the repo root. + """ + # find the repo root + git_dir = cls.run_command( + ['rev-parse', '--git-dir'], + show_stdout=False, cwd=location).strip() + if not os.path.isabs(git_dir): + git_dir = os.path.join(location, git_dir) + repo_root = os.path.abspath(os.path.join(git_dir, '..')) + return find_path_to_setup_from_repo_root(location, repo_root) + + @classmethod + def get_url_rev_and_auth(cls, url): + # type: (str) -> Tuple[str, Optional[str], AuthInfo] + """ + Prefixes stub URLs like 'user@hostname:user/repo.git' with 'ssh://'. + That's required because although they use SSH they sometimes don't + work with a ssh:// scheme (e.g. GitHub). But we need a scheme for + parsing. Hence we remove it again afterwards and return it as a stub. + """ + # Works around an apparent Git bug + # (see https://article.gmane.org/gmane.comp.version-control.git/146500) + scheme, netloc, path, query, fragment = urlsplit(url) + if scheme.endswith('file'): + initial_slashes = path[:-len(path.lstrip('/'))] + newpath = ( + initial_slashes + + urllib_request.url2pathname(path) + .replace('\\', '/').lstrip('/') + ) + url = urlunsplit((scheme, netloc, newpath, query, fragment)) + after_plus = scheme.find('+') + 1 + url = scheme[:after_plus] + urlunsplit( + (scheme[after_plus:], netloc, newpath, query, fragment), + ) + + if '://' not in url: + assert 'file:' not in url + url = url.replace('git+', 'git+ssh://') + url, rev, user_pass = super(Git, cls).get_url_rev_and_auth(url) + url = url.replace('ssh://', '') + else: + url, rev, user_pass = super(Git, cls).get_url_rev_and_auth(url) + + return url, rev, user_pass + + @classmethod + def update_submodules(cls, location): + if not os.path.exists(os.path.join(location, '.gitmodules')): + return + cls.run_command( + ['submodule', 'update', '--init', '--recursive', '-q'], + cwd=location, + ) + + @classmethod + def controls_location(cls, location): + if super(Git, cls).controls_location(location): + return True + try: + r = cls.run_command(['rev-parse'], + cwd=location, + show_stdout=False, + on_returncode='ignore', + log_failed_cmd=False) + return not r + except BadCommand: + logger.debug("could not determine if %s is under git control " + "because git is not available", location) + return False + + +vcs.register(Git) diff --git a/venv.bak/lib/python3.7/site-packages/pip/_internal/vcs/mercurial.py b/venv.bak/lib/python3.7/site-packages/pip/_internal/vcs/mercurial.py new file mode 100644 index 0000000..d9b58cf --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_internal/vcs/mercurial.py @@ -0,0 +1,155 @@ +# The following comment should be removed at some point in the future. +# mypy: disallow-untyped-defs=False + +from __future__ import absolute_import + +import logging +import os + +from pip._vendor.six.moves import configparser + +from pip._internal.exceptions import BadCommand, InstallationError +from pip._internal.utils.misc import display_path +from pip._internal.utils.subprocess import make_command +from pip._internal.utils.temp_dir import TempDirectory +from pip._internal.utils.typing import MYPY_CHECK_RUNNING +from pip._internal.utils.urls import path_to_url +from pip._internal.vcs.versioncontrol import ( + VersionControl, + find_path_to_setup_from_repo_root, + vcs, +) + +if MYPY_CHECK_RUNNING: + from pip._internal.utils.misc import HiddenText + from pip._internal.vcs.versioncontrol import RevOptions + + +logger = logging.getLogger(__name__) + + +class Mercurial(VersionControl): + name = 'hg' + dirname = '.hg' + repo_name = 'clone' + schemes = ( + 'hg', 'hg+file', 'hg+http', 'hg+https', 'hg+ssh', 'hg+static-http', + ) + + @staticmethod + def get_base_rev_args(rev): + return [rev] + + def export(self, location, url): + # type: (str, HiddenText) -> None + """Export the Hg repository at the url to the destination location""" + with TempDirectory(kind="export") as temp_dir: + self.unpack(temp_dir.path, url=url) + + self.run_command( + ['archive', location], show_stdout=False, cwd=temp_dir.path + ) + + def fetch_new(self, dest, url, rev_options): + # type: (str, HiddenText, RevOptions) -> None + rev_display = rev_options.to_display() + logger.info( + 'Cloning hg %s%s to %s', + url, + rev_display, + display_path(dest), + ) + self.run_command(make_command('clone', '--noupdate', '-q', url, dest)) + self.run_command( + make_command('update', '-q', rev_options.to_args()), + cwd=dest, + ) + + def switch(self, dest, url, rev_options): + # type: (str, HiddenText, RevOptions) -> None + repo_config = os.path.join(dest, self.dirname, 'hgrc') + config = configparser.RawConfigParser() + try: + config.read(repo_config) + config.set('paths', 'default', url.secret) + with open(repo_config, 'w') as config_file: + config.write(config_file) + except (OSError, configparser.NoSectionError) as exc: + logger.warning( + 'Could not switch Mercurial repository to %s: %s', url, exc, + ) + else: + cmd_args = make_command('update', '-q', rev_options.to_args()) + self.run_command(cmd_args, cwd=dest) + + def update(self, dest, url, rev_options): + # type: (str, HiddenText, RevOptions) -> None + self.run_command(['pull', '-q'], cwd=dest) + cmd_args = make_command('update', '-q', rev_options.to_args()) + self.run_command(cmd_args, cwd=dest) + + @classmethod + def get_remote_url(cls, location): + url = cls.run_command( + ['showconfig', 'paths.default'], + show_stdout=False, cwd=location).strip() + if cls._is_local_repository(url): + url = path_to_url(url) + return url.strip() + + @classmethod + def get_revision(cls, location): + """ + Return the repository-local changeset revision number, as an integer. + """ + current_revision = cls.run_command( + ['parents', '--template={rev}'], + show_stdout=False, cwd=location).strip() + return current_revision + + @classmethod + def get_requirement_revision(cls, location): + """ + Return the changeset identification hash, as a 40-character + hexadecimal string + """ + current_rev_hash = cls.run_command( + ['parents', '--template={node}'], + show_stdout=False, cwd=location).strip() + return current_rev_hash + + @classmethod + def is_commit_id_equal(cls, dest, name): + """Always assume the versions don't match""" + return False + + @classmethod + def get_subdirectory(cls, location): + """ + Return the path to setup.py, relative to the repo root. + Return None if setup.py is in the repo root. + """ + # find the repo root + repo_root = cls.run_command( + ['root'], show_stdout=False, cwd=location).strip() + if not os.path.isabs(repo_root): + repo_root = os.path.abspath(os.path.join(location, repo_root)) + return find_path_to_setup_from_repo_root(location, repo_root) + + @classmethod + def controls_location(cls, location): + if super(Mercurial, cls).controls_location(location): + return True + try: + cls.run_command( + ['identify'], + cwd=location, + show_stdout=False, + on_returncode='raise', + log_failed_cmd=False) + return True + except (BadCommand, InstallationError): + return False + + +vcs.register(Mercurial) diff --git a/venv.bak/lib/python3.7/site-packages/pip/_internal/vcs/subversion.py b/venv.bak/lib/python3.7/site-packages/pip/_internal/vcs/subversion.py new file mode 100644 index 0000000..6c76d1a --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_internal/vcs/subversion.py @@ -0,0 +1,333 @@ +# The following comment should be removed at some point in the future. +# mypy: disallow-untyped-defs=False + +from __future__ import absolute_import + +import logging +import os +import re + +from pip._internal.utils.logging import indent_log +from pip._internal.utils.misc import ( + display_path, + is_console_interactive, + rmtree, + split_auth_from_netloc, +) +from pip._internal.utils.subprocess import make_command +from pip._internal.utils.typing import MYPY_CHECK_RUNNING +from pip._internal.vcs.versioncontrol import VersionControl, vcs + +_svn_xml_url_re = re.compile('url="([^"]+)"') +_svn_rev_re = re.compile(r'committed-rev="(\d+)"') +_svn_info_xml_rev_re = re.compile(r'\s*revision="(\d+)"') +_svn_info_xml_url_re = re.compile(r'(.*)') + + +if MYPY_CHECK_RUNNING: + from typing import Optional, Tuple + from pip._internal.utils.subprocess import CommandArgs + from pip._internal.utils.misc import HiddenText + from pip._internal.vcs.versioncontrol import AuthInfo, RevOptions + + +logger = logging.getLogger(__name__) + + +class Subversion(VersionControl): + name = 'svn' + dirname = '.svn' + repo_name = 'checkout' + schemes = ('svn', 'svn+ssh', 'svn+http', 'svn+https', 'svn+svn') + + @classmethod + def should_add_vcs_url_prefix(cls, remote_url): + return True + + @staticmethod + def get_base_rev_args(rev): + return ['-r', rev] + + @classmethod + def get_revision(cls, location): + """ + Return the maximum revision for all files under a given location + """ + # Note: taken from setuptools.command.egg_info + revision = 0 + + for base, dirs, files in os.walk(location): + if cls.dirname not in dirs: + dirs[:] = [] + continue # no sense walking uncontrolled subdirs + dirs.remove(cls.dirname) + entries_fn = os.path.join(base, cls.dirname, 'entries') + if not os.path.exists(entries_fn): + # FIXME: should we warn? + continue + + dirurl, localrev = cls._get_svn_url_rev(base) + + if base == location: + base = dirurl + '/' # save the root url + elif not dirurl or not dirurl.startswith(base): + dirs[:] = [] + continue # not part of the same svn tree, skip it + revision = max(revision, localrev) + return revision + + @classmethod + def get_netloc_and_auth(cls, netloc, scheme): + """ + This override allows the auth information to be passed to svn via the + --username and --password options instead of via the URL. + """ + if scheme == 'ssh': + # The --username and --password options can't be used for + # svn+ssh URLs, so keep the auth information in the URL. + return super(Subversion, cls).get_netloc_and_auth(netloc, scheme) + + return split_auth_from_netloc(netloc) + + @classmethod + def get_url_rev_and_auth(cls, url): + # type: (str) -> Tuple[str, Optional[str], AuthInfo] + # hotfix the URL scheme after removing svn+ from svn+ssh:// readd it + url, rev, user_pass = super(Subversion, cls).get_url_rev_and_auth(url) + if url.startswith('ssh://'): + url = 'svn+' + url + return url, rev, user_pass + + @staticmethod + def make_rev_args(username, password): + # type: (Optional[str], Optional[HiddenText]) -> CommandArgs + extra_args = [] # type: CommandArgs + if username: + extra_args += ['--username', username] + if password: + extra_args += ['--password', password] + + return extra_args + + @classmethod + def get_remote_url(cls, location): + # In cases where the source is in a subdirectory, not alongside + # setup.py we have to look up in the location until we find a real + # setup.py + orig_location = location + while not os.path.exists(os.path.join(location, 'setup.py')): + last_location = location + location = os.path.dirname(location) + if location == last_location: + # We've traversed up to the root of the filesystem without + # finding setup.py + logger.warning( + "Could not find setup.py for directory %s (tried all " + "parent directories)", + orig_location, + ) + return None + + return cls._get_svn_url_rev(location)[0] + + @classmethod + def _get_svn_url_rev(cls, location): + from pip._internal.exceptions import InstallationError + + entries_path = os.path.join(location, cls.dirname, 'entries') + if os.path.exists(entries_path): + with open(entries_path) as f: + data = f.read() + else: # subversion >= 1.7 does not have the 'entries' file + data = '' + + if (data.startswith('8') or + data.startswith('9') or + data.startswith('10')): + data = list(map(str.splitlines, data.split('\n\x0c\n'))) + del data[0][0] # get rid of the '8' + url = data[0][3] + revs = [int(d[9]) for d in data if len(d) > 9 and d[9]] + [0] + elif data.startswith('= 1.7 + # Note that using get_remote_call_options is not necessary here + # because `svn info` is being run against a local directory. + # We don't need to worry about making sure interactive mode + # is being used to prompt for passwords, because passwords + # are only potentially needed for remote server requests. + xml = cls.run_command( + ['info', '--xml', location], + show_stdout=False, + ) + url = _svn_info_xml_url_re.search(xml).group(1) + revs = [ + int(m.group(1)) for m in _svn_info_xml_rev_re.finditer(xml) + ] + except InstallationError: + url, revs = None, [] + + if revs: + rev = max(revs) + else: + rev = 0 + + return url, rev + + @classmethod + def is_commit_id_equal(cls, dest, name): + """Always assume the versions don't match""" + return False + + def __init__(self, use_interactive=None): + # type: (bool) -> None + if use_interactive is None: + use_interactive = is_console_interactive() + self.use_interactive = use_interactive + + # This member is used to cache the fetched version of the current + # ``svn`` client. + # Special value definitions: + # None: Not evaluated yet. + # Empty tuple: Could not parse version. + self._vcs_version = None # type: Optional[Tuple[int, ...]] + + super(Subversion, self).__init__() + + def call_vcs_version(self): + # type: () -> Tuple[int, ...] + """Query the version of the currently installed Subversion client. + + :return: A tuple containing the parts of the version information or + ``()`` if the version returned from ``svn`` could not be parsed. + :raises: BadCommand: If ``svn`` is not installed. + """ + # Example versions: + # svn, version 1.10.3 (r1842928) + # compiled Feb 25 2019, 14:20:39 on x86_64-apple-darwin17.0.0 + # svn, version 1.7.14 (r1542130) + # compiled Mar 28 2018, 08:49:13 on x86_64-pc-linux-gnu + version_prefix = 'svn, version ' + version = self.run_command(['--version'], show_stdout=False) + if not version.startswith(version_prefix): + return () + + version = version[len(version_prefix):].split()[0] + version_list = version.split('.') + try: + parsed_version = tuple(map(int, version_list)) + except ValueError: + return () + + return parsed_version + + def get_vcs_version(self): + # type: () -> Tuple[int, ...] + """Return the version of the currently installed Subversion client. + + If the version of the Subversion client has already been queried, + a cached value will be used. + + :return: A tuple containing the parts of the version information or + ``()`` if the version returned from ``svn`` could not be parsed. + :raises: BadCommand: If ``svn`` is not installed. + """ + if self._vcs_version is not None: + # Use cached version, if available. + # If parsing the version failed previously (empty tuple), + # do not attempt to parse it again. + return self._vcs_version + + vcs_version = self.call_vcs_version() + self._vcs_version = vcs_version + return vcs_version + + def get_remote_call_options(self): + # type: () -> CommandArgs + """Return options to be used on calls to Subversion that contact the server. + + These options are applicable for the following ``svn`` subcommands used + in this class. + + - checkout + - export + - switch + - update + + :return: A list of command line arguments to pass to ``svn``. + """ + if not self.use_interactive: + # --non-interactive switch is available since Subversion 0.14.4. + # Subversion < 1.8 runs in interactive mode by default. + return ['--non-interactive'] + + svn_version = self.get_vcs_version() + # By default, Subversion >= 1.8 runs in non-interactive mode if + # stdin is not a TTY. Since that is how pip invokes SVN, in + # call_subprocess(), pip must pass --force-interactive to ensure + # the user can be prompted for a password, if required. + # SVN added the --force-interactive option in SVN 1.8. Since + # e.g. RHEL/CentOS 7, which is supported until 2024, ships with + # SVN 1.7, pip should continue to support SVN 1.7. Therefore, pip + # can't safely add the option if the SVN version is < 1.8 (or unknown). + if svn_version >= (1, 8): + return ['--force-interactive'] + + return [] + + def export(self, location, url): + # type: (str, HiddenText) -> None + """Export the svn repository at the url to the destination location""" + url, rev_options = self.get_url_rev_options(url) + + logger.info('Exporting svn repository %s to %s', url, location) + with indent_log(): + if os.path.exists(location): + # Subversion doesn't like to check out over an existing + # directory --force fixes this, but was only added in svn 1.5 + rmtree(location) + cmd_args = make_command( + 'export', self.get_remote_call_options(), + rev_options.to_args(), url, location, + ) + self.run_command(cmd_args, show_stdout=False) + + def fetch_new(self, dest, url, rev_options): + # type: (str, HiddenText, RevOptions) -> None + rev_display = rev_options.to_display() + logger.info( + 'Checking out %s%s to %s', + url, + rev_display, + display_path(dest), + ) + cmd_args = make_command( + 'checkout', '-q', self.get_remote_call_options(), + rev_options.to_args(), url, dest, + ) + self.run_command(cmd_args) + + def switch(self, dest, url, rev_options): + # type: (str, HiddenText, RevOptions) -> None + cmd_args = make_command( + 'switch', self.get_remote_call_options(), rev_options.to_args(), + url, dest, + ) + self.run_command(cmd_args) + + def update(self, dest, url, rev_options): + # type: (str, HiddenText, RevOptions) -> None + cmd_args = make_command( + 'update', self.get_remote_call_options(), rev_options.to_args(), + dest, + ) + self.run_command(cmd_args) + + +vcs.register(Subversion) diff --git a/venv/lib/python3.7/site-packages/pip/_internal/vcs/versioncontrol.py b/venv.bak/lib/python3.7/site-packages/pip/_internal/vcs/versioncontrol.py similarity index 100% rename from venv/lib/python3.7/site-packages/pip/_internal/vcs/versioncontrol.py rename to venv.bak/lib/python3.7/site-packages/pip/_internal/vcs/versioncontrol.py diff --git a/venv/lib/python3.7/site-packages/pip/_internal/wheel_builder.py b/venv.bak/lib/python3.7/site-packages/pip/_internal/wheel_builder.py similarity index 100% rename from venv/lib/python3.7/site-packages/pip/_internal/wheel_builder.py rename to venv.bak/lib/python3.7/site-packages/pip/_internal/wheel_builder.py diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/__init__.py b/venv.bak/lib/python3.7/site-packages/pip/_vendor/__init__.py new file mode 100644 index 0000000..a0fcb8e --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_vendor/__init__.py @@ -0,0 +1,109 @@ +""" +pip._vendor is for vendoring dependencies of pip to prevent needing pip to +depend on something external. + +Files inside of pip._vendor should be considered immutable and should only be +updated to versions from upstream. +""" +from __future__ import absolute_import + +import glob +import os.path +import sys + +# Downstream redistributors which have debundled our dependencies should also +# patch this value to be true. This will trigger the additional patching +# to cause things like "six" to be available as pip. +DEBUNDLED = False + +# By default, look in this directory for a bunch of .whl files which we will +# add to the beginning of sys.path before attempting to import anything. This +# is done to support downstream re-distributors like Debian and Fedora who +# wish to create their own Wheels for our dependencies to aid in debundling. +WHEEL_DIR = os.path.abspath(os.path.dirname(__file__)) + + +# Define a small helper function to alias our vendored modules to the real ones +# if the vendored ones do not exist. This idea of this was taken from +# https://github.com/kennethreitz/requests/pull/2567. +def vendored(modulename): + vendored_name = "{0}.{1}".format(__name__, modulename) + + try: + __import__(modulename, globals(), locals(), level=0) + except ImportError: + # We can just silently allow import failures to pass here. If we + # got to this point it means that ``import pip._vendor.whatever`` + # failed and so did ``import whatever``. Since we're importing this + # upfront in an attempt to alias imports, not erroring here will + # just mean we get a regular import error whenever pip *actually* + # tries to import one of these modules to use it, which actually + # gives us a better error message than we would have otherwise + # gotten. + pass + else: + sys.modules[vendored_name] = sys.modules[modulename] + base, head = vendored_name.rsplit(".", 1) + setattr(sys.modules[base], head, sys.modules[modulename]) + + +# If we're operating in a debundled setup, then we want to go ahead and trigger +# the aliasing of our vendored libraries as well as looking for wheels to add +# to our sys.path. This will cause all of this code to be a no-op typically +# however downstream redistributors can enable it in a consistent way across +# all platforms. +if DEBUNDLED: + # Actually look inside of WHEEL_DIR to find .whl files and add them to the + # front of our sys.path. + sys.path[:] = glob.glob(os.path.join(WHEEL_DIR, "*.whl")) + sys.path + + # Actually alias all of our vendored dependencies. + vendored("cachecontrol") + vendored("colorama") + vendored("contextlib2") + vendored("distlib") + vendored("distro") + vendored("html5lib") + vendored("six") + vendored("six.moves") + vendored("six.moves.urllib") + vendored("six.moves.urllib.parse") + vendored("packaging") + vendored("packaging.version") + vendored("packaging.specifiers") + vendored("pep517") + vendored("pkg_resources") + vendored("progress") + vendored("pytoml") + vendored("retrying") + vendored("requests") + vendored("requests.exceptions") + vendored("requests.packages") + vendored("requests.packages.urllib3") + vendored("requests.packages.urllib3._collections") + vendored("requests.packages.urllib3.connection") + vendored("requests.packages.urllib3.connectionpool") + vendored("requests.packages.urllib3.contrib") + vendored("requests.packages.urllib3.contrib.ntlmpool") + vendored("requests.packages.urllib3.contrib.pyopenssl") + vendored("requests.packages.urllib3.exceptions") + vendored("requests.packages.urllib3.fields") + vendored("requests.packages.urllib3.filepost") + vendored("requests.packages.urllib3.packages") + vendored("requests.packages.urllib3.packages.ordered_dict") + vendored("requests.packages.urllib3.packages.six") + vendored("requests.packages.urllib3.packages.ssl_match_hostname") + vendored("requests.packages.urllib3.packages.ssl_match_hostname." + "_implementation") + vendored("requests.packages.urllib3.poolmanager") + vendored("requests.packages.urllib3.request") + vendored("requests.packages.urllib3.response") + vendored("requests.packages.urllib3.util") + vendored("requests.packages.urllib3.util.connection") + vendored("requests.packages.urllib3.util.request") + vendored("requests.packages.urllib3.util.response") + vendored("requests.packages.urllib3.util.retry") + vendored("requests.packages.urllib3.util.ssl_") + vendored("requests.packages.urllib3.util.timeout") + vendored("requests.packages.urllib3.util.url") + vendored("urllib3") diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_vendor/__pycache__/__init__.cpython-37.pyc new file mode 100644 index 0000000..5ccc5f7 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_vendor/__pycache__/__init__.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/__pycache__/appdirs.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_vendor/__pycache__/appdirs.cpython-37.pyc new file mode 100644 index 0000000..af96f02 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_vendor/__pycache__/appdirs.cpython-37.pyc differ diff --git a/venv/lib/python3.7/site-packages/pip/_vendor/__pycache__/contextlib2.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_vendor/__pycache__/contextlib2.cpython-37.pyc similarity index 100% rename from venv/lib/python3.7/site-packages/pip/_vendor/__pycache__/contextlib2.cpython-37.pyc rename to venv.bak/lib/python3.7/site-packages/pip/_vendor/__pycache__/contextlib2.cpython-37.pyc diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/__pycache__/distro.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_vendor/__pycache__/distro.cpython-37.pyc new file mode 100644 index 0000000..4a24cee Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_vendor/__pycache__/distro.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/__pycache__/ipaddress.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_vendor/__pycache__/ipaddress.cpython-37.pyc new file mode 100644 index 0000000..e2a98ad Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_vendor/__pycache__/ipaddress.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/__pycache__/pyparsing.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_vendor/__pycache__/pyparsing.cpython-37.pyc new file mode 100644 index 0000000..320ca21 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_vendor/__pycache__/pyparsing.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/__pycache__/retrying.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_vendor/__pycache__/retrying.cpython-37.pyc new file mode 100644 index 0000000..53ecee2 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_vendor/__pycache__/retrying.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/__pycache__/six.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_vendor/__pycache__/six.cpython-37.pyc new file mode 100644 index 0000000..278b1b4 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_vendor/__pycache__/six.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/appdirs.py b/venv.bak/lib/python3.7/site-packages/pip/_vendor/appdirs.py new file mode 100644 index 0000000..3a52b75 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_vendor/appdirs.py @@ -0,0 +1,639 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# Copyright (c) 2005-2010 ActiveState Software Inc. +# Copyright (c) 2013 Eddy Petrișor + +"""Utilities for determining application-specific dirs. + +See for details and usage. +""" +# Dev Notes: +# - MSDN on where to store app data files: +# http://support.microsoft.com/default.aspx?scid=kb;en-us;310294#XSLTH3194121123120121120120 +# - Mac OS X: http://developer.apple.com/documentation/MacOSX/Conceptual/BPFileSystem/index.html +# - XDG spec for Un*x: http://standards.freedesktop.org/basedir-spec/basedir-spec-latest.html + +__version_info__ = (1, 4, 3) +__version__ = '.'.join(map(str, __version_info__)) + + +import sys +import os + +PY3 = sys.version_info[0] == 3 + +if PY3: + unicode = str + +if sys.platform.startswith('java'): + import platform + os_name = platform.java_ver()[3][0] + if os_name.startswith('Windows'): # "Windows XP", "Windows 7", etc. + system = 'win32' + elif os_name.startswith('Mac'): # "Mac OS X", etc. + system = 'darwin' + else: # "Linux", "SunOS", "FreeBSD", etc. + # Setting this to "linux2" is not ideal, but only Windows or Mac + # are actually checked for and the rest of the module expects + # *sys.platform* style strings. + system = 'linux2' +elif sys.platform == 'cli' and os.name == 'nt': + # Detect Windows in IronPython to match pip._internal.utils.compat.WINDOWS + # Discussion: + system = 'win32' +else: + system = sys.platform + + + +def user_data_dir(appname=None, appauthor=None, version=None, roaming=False): + r"""Return full path to the user-specific data dir for this application. + + "appname" is the name of application. + If None, just the system directory is returned. + "appauthor" (only used on Windows) is the name of the + appauthor or distributing body for this application. Typically + it is the owning company name. This falls back to appname. You may + pass False to disable it. + "version" is an optional version path element to append to the + path. You might want to use this if you want multiple versions + of your app to be able to run independently. If used, this + would typically be ".". + Only applied when appname is present. + "roaming" (boolean, default False) can be set True to use the Windows + roaming appdata directory. That means that for users on a Windows + network setup for roaming profiles, this user data will be + sync'd on login. See + + for a discussion of issues. + + Typical user data directories are: + Mac OS X: ~/Library/Application Support/ # or ~/.config/, if the other does not exist + Unix: ~/.local/share/ # or in $XDG_DATA_HOME, if defined + Win XP (not roaming): C:\Documents and Settings\\Application Data\\ + Win XP (roaming): C:\Documents and Settings\\Local Settings\Application Data\\ + Win 7 (not roaming): C:\Users\\AppData\Local\\ + Win 7 (roaming): C:\Users\\AppData\Roaming\\ + + For Unix, we follow the XDG spec and support $XDG_DATA_HOME. + That means, by default "~/.local/share/". + """ + if system == "win32": + if appauthor is None: + appauthor = appname + const = roaming and "CSIDL_APPDATA" or "CSIDL_LOCAL_APPDATA" + path = os.path.normpath(_get_win_folder(const)) + if appname: + if appauthor is not False: + path = os.path.join(path, appauthor, appname) + else: + path = os.path.join(path, appname) + elif system == 'darwin': + path = os.path.expanduser('~/Library/Application Support/') + if appname: + path = os.path.join(path, appname) + if not os.path.isdir(path): + path = os.path.expanduser('~/.config/') + if appname: + path = os.path.join(path, appname) + else: + path = os.getenv('XDG_DATA_HOME', os.path.expanduser("~/.local/share")) + if appname: + path = os.path.join(path, appname) + if appname and version: + path = os.path.join(path, version) + return path + + +def site_data_dir(appname=None, appauthor=None, version=None, multipath=False): + r"""Return full path to the user-shared data dir for this application. + + "appname" is the name of application. + If None, just the system directory is returned. + "appauthor" (only used on Windows) is the name of the + appauthor or distributing body for this application. Typically + it is the owning company name. This falls back to appname. You may + pass False to disable it. + "version" is an optional version path element to append to the + path. You might want to use this if you want multiple versions + of your app to be able to run independently. If used, this + would typically be ".". + Only applied when appname is present. + "multipath" is an optional parameter only applicable to *nix + which indicates that the entire list of data dirs should be + returned. By default, the first item from XDG_DATA_DIRS is + returned, or '/usr/local/share/', + if XDG_DATA_DIRS is not set + + Typical site data directories are: + Mac OS X: /Library/Application Support/ + Unix: /usr/local/share/ or /usr/share/ + Win XP: C:\Documents and Settings\All Users\Application Data\\ + Vista: (Fail! "C:\ProgramData" is a hidden *system* directory on Vista.) + Win 7: C:\ProgramData\\ # Hidden, but writeable on Win 7. + + For Unix, this is using the $XDG_DATA_DIRS[0] default. + + WARNING: Do not use this on Windows. See the Vista-Fail note above for why. + """ + if system == "win32": + if appauthor is None: + appauthor = appname + path = os.path.normpath(_get_win_folder("CSIDL_COMMON_APPDATA")) + if appname: + if appauthor is not False: + path = os.path.join(path, appauthor, appname) + else: + path = os.path.join(path, appname) + elif system == 'darwin': + path = os.path.expanduser('/Library/Application Support') + if appname: + path = os.path.join(path, appname) + else: + # XDG default for $XDG_DATA_DIRS + # only first, if multipath is False + path = os.getenv('XDG_DATA_DIRS', + os.pathsep.join(['/usr/local/share', '/usr/share'])) + pathlist = [os.path.expanduser(x.rstrip(os.sep)) for x in path.split(os.pathsep)] + if appname: + if version: + appname = os.path.join(appname, version) + pathlist = [os.path.join(x, appname) for x in pathlist] + + if multipath: + path = os.pathsep.join(pathlist) + else: + path = pathlist[0] + return path + + if appname and version: + path = os.path.join(path, version) + return path + + +def user_config_dir(appname=None, appauthor=None, version=None, roaming=False): + r"""Return full path to the user-specific config dir for this application. + + "appname" is the name of application. + If None, just the system directory is returned. + "appauthor" (only used on Windows) is the name of the + appauthor or distributing body for this application. Typically + it is the owning company name. This falls back to appname. You may + pass False to disable it. + "version" is an optional version path element to append to the + path. You might want to use this if you want multiple versions + of your app to be able to run independently. If used, this + would typically be ".". + Only applied when appname is present. + "roaming" (boolean, default False) can be set True to use the Windows + roaming appdata directory. That means that for users on a Windows + network setup for roaming profiles, this user data will be + sync'd on login. See + + for a discussion of issues. + + Typical user config directories are: + Mac OS X: same as user_data_dir + Unix: ~/.config/ # or in $XDG_CONFIG_HOME, if defined + Win *: same as user_data_dir + + For Unix, we follow the XDG spec and support $XDG_CONFIG_HOME. + That means, by default "~/.config/". + """ + if system in ["win32", "darwin"]: + path = user_data_dir(appname, appauthor, None, roaming) + else: + path = os.getenv('XDG_CONFIG_HOME', os.path.expanduser("~/.config")) + if appname: + path = os.path.join(path, appname) + if appname and version: + path = os.path.join(path, version) + return path + + +# for the discussion regarding site_config_dir locations +# see +def site_config_dir(appname=None, appauthor=None, version=None, multipath=False): + r"""Return full path to the user-shared data dir for this application. + + "appname" is the name of application. + If None, just the system directory is returned. + "appauthor" (only used on Windows) is the name of the + appauthor or distributing body for this application. Typically + it is the owning company name. This falls back to appname. You may + pass False to disable it. + "version" is an optional version path element to append to the + path. You might want to use this if you want multiple versions + of your app to be able to run independently. If used, this + would typically be ".". + Only applied when appname is present. + "multipath" is an optional parameter only applicable to *nix + which indicates that the entire list of config dirs should be + returned. By default, the first item from XDG_CONFIG_DIRS is + returned, or '/etc/xdg/', if XDG_CONFIG_DIRS is not set + + Typical site config directories are: + Mac OS X: same as site_data_dir + Unix: /etc/xdg/ or $XDG_CONFIG_DIRS[i]/ for each value in + $XDG_CONFIG_DIRS + Win *: same as site_data_dir + Vista: (Fail! "C:\ProgramData" is a hidden *system* directory on Vista.) + + For Unix, this is using the $XDG_CONFIG_DIRS[0] default, if multipath=False + + WARNING: Do not use this on Windows. See the Vista-Fail note above for why. + """ + if system in ["win32", "darwin"]: + path = site_data_dir(appname, appauthor) + if appname and version: + path = os.path.join(path, version) + else: + # XDG default for $XDG_CONFIG_DIRS (missing or empty) + # see + # only first, if multipath is False + path = os.getenv('XDG_CONFIG_DIRS') or '/etc/xdg' + pathlist = [os.path.expanduser(x.rstrip(os.sep)) for x in path.split(os.pathsep) if x] + if appname: + if version: + appname = os.path.join(appname, version) + pathlist = [os.path.join(x, appname) for x in pathlist] + # always look in /etc directly as well + pathlist.append('/etc') + + if multipath: + path = os.pathsep.join(pathlist) + else: + path = pathlist[0] + return path + + +def user_cache_dir(appname=None, appauthor=None, version=None, opinion=True): + r"""Return full path to the user-specific cache dir for this application. + + "appname" is the name of application. + If None, just the system directory is returned. + "appauthor" (only used on Windows) is the name of the + appauthor or distributing body for this application. Typically + it is the owning company name. This falls back to appname. You may + pass False to disable it. + "version" is an optional version path element to append to the + path. You might want to use this if you want multiple versions + of your app to be able to run independently. If used, this + would typically be ".". + Only applied when appname is present. + "opinion" (boolean) can be False to disable the appending of + "Cache" to the base app data dir for Windows. See + discussion below. + + Typical user cache directories are: + Mac OS X: ~/Library/Caches/ + Unix: ~/.cache/ (XDG default) + Win XP: C:\Documents and Settings\\Local Settings\Application Data\\\Cache + Vista: C:\Users\\AppData\Local\\\Cache + + On Windows the only suggestion in the MSDN docs is that local settings go in + the `CSIDL_LOCAL_APPDATA` directory. This is identical to the non-roaming + app data dir (the default returned by `user_data_dir` above). Apps typically + put cache data somewhere *under* the given dir here. Some examples: + ...\Mozilla\Firefox\Profiles\\Cache + ...\Acme\SuperApp\Cache\1.0 + OPINION: This function appends "Cache" to the `CSIDL_LOCAL_APPDATA` value. + This can be disabled with the `opinion=False` option. + """ + if system == "win32": + if appauthor is None: + appauthor = appname + path = os.path.normpath(_get_win_folder("CSIDL_LOCAL_APPDATA")) + # When using Python 2, return paths as bytes on Windows like we do on + # other operating systems. See helper function docs for more details. + if not PY3 and isinstance(path, unicode): + path = _win_path_to_bytes(path) + if appname: + if appauthor is not False: + path = os.path.join(path, appauthor, appname) + else: + path = os.path.join(path, appname) + if opinion: + path = os.path.join(path, "Cache") + elif system == 'darwin': + path = os.path.expanduser('~/Library/Caches') + if appname: + path = os.path.join(path, appname) + else: + path = os.getenv('XDG_CACHE_HOME', os.path.expanduser('~/.cache')) + if appname: + path = os.path.join(path, appname) + if appname and version: + path = os.path.join(path, version) + return path + + +def user_state_dir(appname=None, appauthor=None, version=None, roaming=False): + r"""Return full path to the user-specific state dir for this application. + + "appname" is the name of application. + If None, just the system directory is returned. + "appauthor" (only used on Windows) is the name of the + appauthor or distributing body for this application. Typically + it is the owning company name. This falls back to appname. You may + pass False to disable it. + "version" is an optional version path element to append to the + path. You might want to use this if you want multiple versions + of your app to be able to run independently. If used, this + would typically be ".". + Only applied when appname is present. + "roaming" (boolean, default False) can be set True to use the Windows + roaming appdata directory. That means that for users on a Windows + network setup for roaming profiles, this user data will be + sync'd on login. See + + for a discussion of issues. + + Typical user state directories are: + Mac OS X: same as user_data_dir + Unix: ~/.local/state/ # or in $XDG_STATE_HOME, if defined + Win *: same as user_data_dir + + For Unix, we follow this Debian proposal + to extend the XDG spec and support $XDG_STATE_HOME. + + That means, by default "~/.local/state/". + """ + if system in ["win32", "darwin"]: + path = user_data_dir(appname, appauthor, None, roaming) + else: + path = os.getenv('XDG_STATE_HOME', os.path.expanduser("~/.local/state")) + if appname: + path = os.path.join(path, appname) + if appname and version: + path = os.path.join(path, version) + return path + + +def user_log_dir(appname=None, appauthor=None, version=None, opinion=True): + r"""Return full path to the user-specific log dir for this application. + + "appname" is the name of application. + If None, just the system directory is returned. + "appauthor" (only used on Windows) is the name of the + appauthor or distributing body for this application. Typically + it is the owning company name. This falls back to appname. You may + pass False to disable it. + "version" is an optional version path element to append to the + path. You might want to use this if you want multiple versions + of your app to be able to run independently. If used, this + would typically be ".". + Only applied when appname is present. + "opinion" (boolean) can be False to disable the appending of + "Logs" to the base app data dir for Windows, and "log" to the + base cache dir for Unix. See discussion below. + + Typical user log directories are: + Mac OS X: ~/Library/Logs/ + Unix: ~/.cache//log # or under $XDG_CACHE_HOME if defined + Win XP: C:\Documents and Settings\\Local Settings\Application Data\\\Logs + Vista: C:\Users\\AppData\Local\\\Logs + + On Windows the only suggestion in the MSDN docs is that local settings + go in the `CSIDL_LOCAL_APPDATA` directory. (Note: I'm interested in + examples of what some windows apps use for a logs dir.) + + OPINION: This function appends "Logs" to the `CSIDL_LOCAL_APPDATA` + value for Windows and appends "log" to the user cache dir for Unix. + This can be disabled with the `opinion=False` option. + """ + if system == "darwin": + path = os.path.join( + os.path.expanduser('~/Library/Logs'), + appname) + elif system == "win32": + path = user_data_dir(appname, appauthor, version) + version = False + if opinion: + path = os.path.join(path, "Logs") + else: + path = user_cache_dir(appname, appauthor, version) + version = False + if opinion: + path = os.path.join(path, "log") + if appname and version: + path = os.path.join(path, version) + return path + + +class AppDirs(object): + """Convenience wrapper for getting application dirs.""" + def __init__(self, appname=None, appauthor=None, version=None, + roaming=False, multipath=False): + self.appname = appname + self.appauthor = appauthor + self.version = version + self.roaming = roaming + self.multipath = multipath + + @property + def user_data_dir(self): + return user_data_dir(self.appname, self.appauthor, + version=self.version, roaming=self.roaming) + + @property + def site_data_dir(self): + return site_data_dir(self.appname, self.appauthor, + version=self.version, multipath=self.multipath) + + @property + def user_config_dir(self): + return user_config_dir(self.appname, self.appauthor, + version=self.version, roaming=self.roaming) + + @property + def site_config_dir(self): + return site_config_dir(self.appname, self.appauthor, + version=self.version, multipath=self.multipath) + + @property + def user_cache_dir(self): + return user_cache_dir(self.appname, self.appauthor, + version=self.version) + + @property + def user_state_dir(self): + return user_state_dir(self.appname, self.appauthor, + version=self.version) + + @property + def user_log_dir(self): + return user_log_dir(self.appname, self.appauthor, + version=self.version) + + +#---- internal support stuff + +def _get_win_folder_from_registry(csidl_name): + """This is a fallback technique at best. I'm not sure if using the + registry for this guarantees us the correct answer for all CSIDL_* + names. + """ + if PY3: + import winreg as _winreg + else: + import _winreg + + shell_folder_name = { + "CSIDL_APPDATA": "AppData", + "CSIDL_COMMON_APPDATA": "Common AppData", + "CSIDL_LOCAL_APPDATA": "Local AppData", + }[csidl_name] + + key = _winreg.OpenKey( + _winreg.HKEY_CURRENT_USER, + r"Software\Microsoft\Windows\CurrentVersion\Explorer\Shell Folders" + ) + dir, type = _winreg.QueryValueEx(key, shell_folder_name) + return dir + + +def _get_win_folder_with_pywin32(csidl_name): + from win32com.shell import shellcon, shell + dir = shell.SHGetFolderPath(0, getattr(shellcon, csidl_name), 0, 0) + # Try to make this a unicode path because SHGetFolderPath does + # not return unicode strings when there is unicode data in the + # path. + try: + dir = unicode(dir) + + # Downgrade to short path name if have highbit chars. See + # . + has_high_char = False + for c in dir: + if ord(c) > 255: + has_high_char = True + break + if has_high_char: + try: + import win32api + dir = win32api.GetShortPathName(dir) + except ImportError: + pass + except UnicodeError: + pass + return dir + + +def _get_win_folder_with_ctypes(csidl_name): + import ctypes + + csidl_const = { + "CSIDL_APPDATA": 26, + "CSIDL_COMMON_APPDATA": 35, + "CSIDL_LOCAL_APPDATA": 28, + }[csidl_name] + + buf = ctypes.create_unicode_buffer(1024) + ctypes.windll.shell32.SHGetFolderPathW(None, csidl_const, None, 0, buf) + + # Downgrade to short path name if have highbit chars. See + # . + has_high_char = False + for c in buf: + if ord(c) > 255: + has_high_char = True + break + if has_high_char: + buf2 = ctypes.create_unicode_buffer(1024) + if ctypes.windll.kernel32.GetShortPathNameW(buf.value, buf2, 1024): + buf = buf2 + + return buf.value + +def _get_win_folder_with_jna(csidl_name): + import array + from com.sun import jna + from com.sun.jna.platform import win32 + + buf_size = win32.WinDef.MAX_PATH * 2 + buf = array.zeros('c', buf_size) + shell = win32.Shell32.INSTANCE + shell.SHGetFolderPath(None, getattr(win32.ShlObj, csidl_name), None, win32.ShlObj.SHGFP_TYPE_CURRENT, buf) + dir = jna.Native.toString(buf.tostring()).rstrip("\0") + + # Downgrade to short path name if have highbit chars. See + # . + has_high_char = False + for c in dir: + if ord(c) > 255: + has_high_char = True + break + if has_high_char: + buf = array.zeros('c', buf_size) + kernel = win32.Kernel32.INSTANCE + if kernel.GetShortPathName(dir, buf, buf_size): + dir = jna.Native.toString(buf.tostring()).rstrip("\0") + + return dir + +if system == "win32": + try: + from ctypes import windll + _get_win_folder = _get_win_folder_with_ctypes + except ImportError: + try: + import com.sun.jna + _get_win_folder = _get_win_folder_with_jna + except ImportError: + _get_win_folder = _get_win_folder_from_registry + + +def _win_path_to_bytes(path): + """Encode Windows paths to bytes. Only used on Python 2. + + Motivation is to be consistent with other operating systems where paths + are also returned as bytes. This avoids problems mixing bytes and Unicode + elsewhere in the codebase. For more details and discussion see + . + + If encoding using ASCII and MBCS fails, return the original Unicode path. + """ + for encoding in ('ASCII', 'MBCS'): + try: + return path.encode(encoding) + except (UnicodeEncodeError, LookupError): + pass + return path + + +#---- self test code + +if __name__ == "__main__": + appname = "MyApp" + appauthor = "MyCompany" + + props = ("user_data_dir", + "user_config_dir", + "user_cache_dir", + "user_state_dir", + "user_log_dir", + "site_data_dir", + "site_config_dir") + + print("-- app dirs %s --" % __version__) + + print("-- app dirs (with optional 'version')") + dirs = AppDirs(appname, appauthor, version="1.0") + for prop in props: + print("%s: %s" % (prop, getattr(dirs, prop))) + + print("\n-- app dirs (without optional 'version')") + dirs = AppDirs(appname, appauthor) + for prop in props: + print("%s: %s" % (prop, getattr(dirs, prop))) + + print("\n-- app dirs (without optional 'appauthor')") + dirs = AppDirs(appname) + for prop in props: + print("%s: %s" % (prop, getattr(dirs, prop))) + + print("\n-- app dirs (with disabled 'appauthor')") + dirs = AppDirs(appname, appauthor=False) + for prop in props: + print("%s: %s" % (prop, getattr(dirs, prop))) diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/cachecontrol/__init__.py b/venv.bak/lib/python3.7/site-packages/pip/_vendor/cachecontrol/__init__.py new file mode 100644 index 0000000..a1bbbbe --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_vendor/cachecontrol/__init__.py @@ -0,0 +1,11 @@ +"""CacheControl import Interface. + +Make it easy to import from cachecontrol without long namespaces. +""" +__author__ = "Eric Larson" +__email__ = "eric@ionrock.org" +__version__ = "0.12.6" + +from .wrapper import CacheControl +from .adapter import CacheControlAdapter +from .controller import CacheController diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/cachecontrol/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_vendor/cachecontrol/__pycache__/__init__.cpython-37.pyc new file mode 100644 index 0000000..e84e402 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_vendor/cachecontrol/__pycache__/__init__.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/cachecontrol/__pycache__/_cmd.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_vendor/cachecontrol/__pycache__/_cmd.cpython-37.pyc new file mode 100644 index 0000000..2c36b4c Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_vendor/cachecontrol/__pycache__/_cmd.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/cachecontrol/__pycache__/adapter.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_vendor/cachecontrol/__pycache__/adapter.cpython-37.pyc new file mode 100644 index 0000000..1a7270c Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_vendor/cachecontrol/__pycache__/adapter.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/cachecontrol/__pycache__/cache.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_vendor/cachecontrol/__pycache__/cache.cpython-37.pyc new file mode 100644 index 0000000..81efc4d Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_vendor/cachecontrol/__pycache__/cache.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/cachecontrol/__pycache__/compat.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_vendor/cachecontrol/__pycache__/compat.cpython-37.pyc new file mode 100644 index 0000000..afd87e9 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_vendor/cachecontrol/__pycache__/compat.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/cachecontrol/__pycache__/controller.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_vendor/cachecontrol/__pycache__/controller.cpython-37.pyc new file mode 100644 index 0000000..e0751f3 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_vendor/cachecontrol/__pycache__/controller.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/cachecontrol/__pycache__/filewrapper.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_vendor/cachecontrol/__pycache__/filewrapper.cpython-37.pyc new file mode 100644 index 0000000..fb31e33 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_vendor/cachecontrol/__pycache__/filewrapper.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/cachecontrol/__pycache__/heuristics.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_vendor/cachecontrol/__pycache__/heuristics.cpython-37.pyc new file mode 100644 index 0000000..f973730 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_vendor/cachecontrol/__pycache__/heuristics.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/cachecontrol/__pycache__/serialize.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_vendor/cachecontrol/__pycache__/serialize.cpython-37.pyc new file mode 100644 index 0000000..a0e8b4d Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_vendor/cachecontrol/__pycache__/serialize.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/cachecontrol/__pycache__/wrapper.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_vendor/cachecontrol/__pycache__/wrapper.cpython-37.pyc new file mode 100644 index 0000000..da1ec83 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_vendor/cachecontrol/__pycache__/wrapper.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/cachecontrol/_cmd.py b/venv.bak/lib/python3.7/site-packages/pip/_vendor/cachecontrol/_cmd.py new file mode 100644 index 0000000..f1e0ad9 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_vendor/cachecontrol/_cmd.py @@ -0,0 +1,57 @@ +import logging + +from pip._vendor import requests + +from pip._vendor.cachecontrol.adapter import CacheControlAdapter +from pip._vendor.cachecontrol.cache import DictCache +from pip._vendor.cachecontrol.controller import logger + +from argparse import ArgumentParser + + +def setup_logging(): + logger.setLevel(logging.DEBUG) + handler = logging.StreamHandler() + logger.addHandler(handler) + + +def get_session(): + adapter = CacheControlAdapter( + DictCache(), cache_etags=True, serializer=None, heuristic=None + ) + sess = requests.Session() + sess.mount("http://", adapter) + sess.mount("https://", adapter) + + sess.cache_controller = adapter.controller + return sess + + +def get_args(): + parser = ArgumentParser() + parser.add_argument("url", help="The URL to try and cache") + return parser.parse_args() + + +def main(args=None): + args = get_args() + sess = get_session() + + # Make a request to get a response + resp = sess.get(args.url) + + # Turn on logging + setup_logging() + + # try setting the cache + sess.cache_controller.cache_response(resp.request, resp.raw) + + # Now try to get it + if sess.cache_controller.cached_request(resp.request): + print("Cached!") + else: + print("Not cached :(") + + +if __name__ == "__main__": + main() diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/cachecontrol/adapter.py b/venv.bak/lib/python3.7/site-packages/pip/_vendor/cachecontrol/adapter.py new file mode 100644 index 0000000..815650e --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_vendor/cachecontrol/adapter.py @@ -0,0 +1,133 @@ +import types +import functools +import zlib + +from pip._vendor.requests.adapters import HTTPAdapter + +from .controller import CacheController +from .cache import DictCache +from .filewrapper import CallbackFileWrapper + + +class CacheControlAdapter(HTTPAdapter): + invalidating_methods = {"PUT", "DELETE"} + + def __init__( + self, + cache=None, + cache_etags=True, + controller_class=None, + serializer=None, + heuristic=None, + cacheable_methods=None, + *args, + **kw + ): + super(CacheControlAdapter, self).__init__(*args, **kw) + self.cache = DictCache() if cache is None else cache + self.heuristic = heuristic + self.cacheable_methods = cacheable_methods or ("GET",) + + controller_factory = controller_class or CacheController + self.controller = controller_factory( + self.cache, cache_etags=cache_etags, serializer=serializer + ) + + def send(self, request, cacheable_methods=None, **kw): + """ + Send a request. Use the request information to see if it + exists in the cache and cache the response if we need to and can. + """ + cacheable = cacheable_methods or self.cacheable_methods + if request.method in cacheable: + try: + cached_response = self.controller.cached_request(request) + except zlib.error: + cached_response = None + if cached_response: + return self.build_response(request, cached_response, from_cache=True) + + # check for etags and add headers if appropriate + request.headers.update(self.controller.conditional_headers(request)) + + resp = super(CacheControlAdapter, self).send(request, **kw) + + return resp + + def build_response( + self, request, response, from_cache=False, cacheable_methods=None + ): + """ + Build a response by making a request or using the cache. + + This will end up calling send and returning a potentially + cached response + """ + cacheable = cacheable_methods or self.cacheable_methods + if not from_cache and request.method in cacheable: + # Check for any heuristics that might update headers + # before trying to cache. + if self.heuristic: + response = self.heuristic.apply(response) + + # apply any expiration heuristics + if response.status == 304: + # We must have sent an ETag request. This could mean + # that we've been expired already or that we simply + # have an etag. In either case, we want to try and + # update the cache if that is the case. + cached_response = self.controller.update_cached_response( + request, response + ) + + if cached_response is not response: + from_cache = True + + # We are done with the server response, read a + # possible response body (compliant servers will + # not return one, but we cannot be 100% sure) and + # release the connection back to the pool. + response.read(decode_content=False) + response.release_conn() + + response = cached_response + + # We always cache the 301 responses + elif response.status == 301: + self.controller.cache_response(request, response) + else: + # Wrap the response file with a wrapper that will cache the + # response when the stream has been consumed. + response._fp = CallbackFileWrapper( + response._fp, + functools.partial( + self.controller.cache_response, request, response + ), + ) + if response.chunked: + super_update_chunk_length = response._update_chunk_length + + def _update_chunk_length(self): + super_update_chunk_length() + if self.chunk_left == 0: + self._fp._close() + + response._update_chunk_length = types.MethodType( + _update_chunk_length, response + ) + + resp = super(CacheControlAdapter, self).build_response(request, response) + + # See if we should invalidate the cache. + if request.method in self.invalidating_methods and resp.ok: + cache_url = self.controller.cache_url(request.url) + self.cache.delete(cache_url) + + # Give the request a from_cache attr to let people use it + resp.from_cache = from_cache + + return resp + + def close(self): + self.cache.close() + super(CacheControlAdapter, self).close() diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/cachecontrol/cache.py b/venv.bak/lib/python3.7/site-packages/pip/_vendor/cachecontrol/cache.py new file mode 100644 index 0000000..94e0773 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_vendor/cachecontrol/cache.py @@ -0,0 +1,39 @@ +""" +The cache object API for implementing caches. The default is a thread +safe in-memory dictionary. +""" +from threading import Lock + + +class BaseCache(object): + + def get(self, key): + raise NotImplementedError() + + def set(self, key, value): + raise NotImplementedError() + + def delete(self, key): + raise NotImplementedError() + + def close(self): + pass + + +class DictCache(BaseCache): + + def __init__(self, init_dict=None): + self.lock = Lock() + self.data = init_dict or {} + + def get(self, key): + return self.data.get(key, None) + + def set(self, key, value): + with self.lock: + self.data.update({key: value}) + + def delete(self, key): + with self.lock: + if key in self.data: + self.data.pop(key) diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/cachecontrol/caches/__init__.py b/venv.bak/lib/python3.7/site-packages/pip/_vendor/cachecontrol/caches/__init__.py new file mode 100644 index 0000000..0e1658f --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_vendor/cachecontrol/caches/__init__.py @@ -0,0 +1,2 @@ +from .file_cache import FileCache # noqa +from .redis_cache import RedisCache # noqa diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/cachecontrol/caches/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_vendor/cachecontrol/caches/__pycache__/__init__.cpython-37.pyc new file mode 100644 index 0000000..f39d59f Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_vendor/cachecontrol/caches/__pycache__/__init__.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/cachecontrol/caches/__pycache__/file_cache.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_vendor/cachecontrol/caches/__pycache__/file_cache.cpython-37.pyc new file mode 100644 index 0000000..93d436e Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_vendor/cachecontrol/caches/__pycache__/file_cache.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/cachecontrol/caches/__pycache__/redis_cache.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_vendor/cachecontrol/caches/__pycache__/redis_cache.cpython-37.pyc new file mode 100644 index 0000000..a6f33ab Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_vendor/cachecontrol/caches/__pycache__/redis_cache.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/cachecontrol/caches/file_cache.py b/venv.bak/lib/python3.7/site-packages/pip/_vendor/cachecontrol/caches/file_cache.py new file mode 100644 index 0000000..607b945 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_vendor/cachecontrol/caches/file_cache.py @@ -0,0 +1,146 @@ +import hashlib +import os +from textwrap import dedent + +from ..cache import BaseCache +from ..controller import CacheController + +try: + FileNotFoundError +except NameError: + # py2.X + FileNotFoundError = (IOError, OSError) + + +def _secure_open_write(filename, fmode): + # We only want to write to this file, so open it in write only mode + flags = os.O_WRONLY + + # os.O_CREAT | os.O_EXCL will fail if the file already exists, so we only + # will open *new* files. + # We specify this because we want to ensure that the mode we pass is the + # mode of the file. + flags |= os.O_CREAT | os.O_EXCL + + # Do not follow symlinks to prevent someone from making a symlink that + # we follow and insecurely open a cache file. + if hasattr(os, "O_NOFOLLOW"): + flags |= os.O_NOFOLLOW + + # On Windows we'll mark this file as binary + if hasattr(os, "O_BINARY"): + flags |= os.O_BINARY + + # Before we open our file, we want to delete any existing file that is + # there + try: + os.remove(filename) + except (IOError, OSError): + # The file must not exist already, so we can just skip ahead to opening + pass + + # Open our file, the use of os.O_CREAT | os.O_EXCL will ensure that if a + # race condition happens between the os.remove and this line, that an + # error will be raised. Because we utilize a lockfile this should only + # happen if someone is attempting to attack us. + fd = os.open(filename, flags, fmode) + try: + return os.fdopen(fd, "wb") + + except: + # An error occurred wrapping our FD in a file object + os.close(fd) + raise + + +class FileCache(BaseCache): + + def __init__( + self, + directory, + forever=False, + filemode=0o0600, + dirmode=0o0700, + use_dir_lock=None, + lock_class=None, + ): + + if use_dir_lock is not None and lock_class is not None: + raise ValueError("Cannot use use_dir_lock and lock_class together") + + try: + from lockfile import LockFile + from lockfile.mkdirlockfile import MkdirLockFile + except ImportError: + notice = dedent( + """ + NOTE: In order to use the FileCache you must have + lockfile installed. You can install it via pip: + pip install lockfile + """ + ) + raise ImportError(notice) + + else: + if use_dir_lock: + lock_class = MkdirLockFile + + elif lock_class is None: + lock_class = LockFile + + self.directory = directory + self.forever = forever + self.filemode = filemode + self.dirmode = dirmode + self.lock_class = lock_class + + @staticmethod + def encode(x): + return hashlib.sha224(x.encode()).hexdigest() + + def _fn(self, name): + # NOTE: This method should not change as some may depend on it. + # See: https://github.com/ionrock/cachecontrol/issues/63 + hashed = self.encode(name) + parts = list(hashed[:5]) + [hashed] + return os.path.join(self.directory, *parts) + + def get(self, key): + name = self._fn(key) + try: + with open(name, "rb") as fh: + return fh.read() + + except FileNotFoundError: + return None + + def set(self, key, value): + name = self._fn(key) + + # Make sure the directory exists + try: + os.makedirs(os.path.dirname(name), self.dirmode) + except (IOError, OSError): + pass + + with self.lock_class(name) as lock: + # Write our actual file + with _secure_open_write(lock.path, self.filemode) as fh: + fh.write(value) + + def delete(self, key): + name = self._fn(key) + if not self.forever: + try: + os.remove(name) + except FileNotFoundError: + pass + + +def url_to_file_path(url, filecache): + """Return the file cache path based on the URL. + + This does not ensure the file exists! + """ + key = CacheController.cache_url(url) + return filecache._fn(key) diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/cachecontrol/caches/redis_cache.py b/venv.bak/lib/python3.7/site-packages/pip/_vendor/cachecontrol/caches/redis_cache.py new file mode 100644 index 0000000..ed705ce --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_vendor/cachecontrol/caches/redis_cache.py @@ -0,0 +1,33 @@ +from __future__ import division + +from datetime import datetime +from pip._vendor.cachecontrol.cache import BaseCache + + +class RedisCache(BaseCache): + + def __init__(self, conn): + self.conn = conn + + def get(self, key): + return self.conn.get(key) + + def set(self, key, value, expires=None): + if not expires: + self.conn.set(key, value) + else: + expires = expires - datetime.utcnow() + self.conn.setex(key, int(expires.total_seconds()), value) + + def delete(self, key): + self.conn.delete(key) + + def clear(self): + """Helper for clearing all the keys in a database. Use with + caution!""" + for key in self.conn.keys(): + self.conn.delete(key) + + def close(self): + """Redis uses connection pooling, no need to close the connection.""" + pass diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/cachecontrol/compat.py b/venv.bak/lib/python3.7/site-packages/pip/_vendor/cachecontrol/compat.py new file mode 100644 index 0000000..33b5aed --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_vendor/cachecontrol/compat.py @@ -0,0 +1,29 @@ +try: + from urllib.parse import urljoin +except ImportError: + from urlparse import urljoin + + +try: + import cPickle as pickle +except ImportError: + import pickle + + +# Handle the case where the requests module has been patched to not have +# urllib3 bundled as part of its source. +try: + from pip._vendor.requests.packages.urllib3.response import HTTPResponse +except ImportError: + from pip._vendor.urllib3.response import HTTPResponse + +try: + from pip._vendor.requests.packages.urllib3.util import is_fp_closed +except ImportError: + from pip._vendor.urllib3.util import is_fp_closed + +# Replicate some six behaviour +try: + text_type = unicode +except NameError: + text_type = str diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/cachecontrol/controller.py b/venv.bak/lib/python3.7/site-packages/pip/_vendor/cachecontrol/controller.py new file mode 100644 index 0000000..dafe55c --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_vendor/cachecontrol/controller.py @@ -0,0 +1,376 @@ +""" +The httplib2 algorithms ported for use with requests. +""" +import logging +import re +import calendar +import time +from email.utils import parsedate_tz + +from pip._vendor.requests.structures import CaseInsensitiveDict + +from .cache import DictCache +from .serialize import Serializer + + +logger = logging.getLogger(__name__) + +URI = re.compile(r"^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?") + + +def parse_uri(uri): + """Parses a URI using the regex given in Appendix B of RFC 3986. + + (scheme, authority, path, query, fragment) = parse_uri(uri) + """ + groups = URI.match(uri).groups() + return (groups[1], groups[3], groups[4], groups[6], groups[8]) + + +class CacheController(object): + """An interface to see if request should cached or not. + """ + + def __init__( + self, cache=None, cache_etags=True, serializer=None, status_codes=None + ): + self.cache = DictCache() if cache is None else cache + self.cache_etags = cache_etags + self.serializer = serializer or Serializer() + self.cacheable_status_codes = status_codes or (200, 203, 300, 301) + + @classmethod + def _urlnorm(cls, uri): + """Normalize the URL to create a safe key for the cache""" + (scheme, authority, path, query, fragment) = parse_uri(uri) + if not scheme or not authority: + raise Exception("Only absolute URIs are allowed. uri = %s" % uri) + + scheme = scheme.lower() + authority = authority.lower() + + if not path: + path = "/" + + # Could do syntax based normalization of the URI before + # computing the digest. See Section 6.2.2 of Std 66. + request_uri = query and "?".join([path, query]) or path + defrag_uri = scheme + "://" + authority + request_uri + + return defrag_uri + + @classmethod + def cache_url(cls, uri): + return cls._urlnorm(uri) + + def parse_cache_control(self, headers): + known_directives = { + # https://tools.ietf.org/html/rfc7234#section-5.2 + "max-age": (int, True), + "max-stale": (int, False), + "min-fresh": (int, True), + "no-cache": (None, False), + "no-store": (None, False), + "no-transform": (None, False), + "only-if-cached": (None, False), + "must-revalidate": (None, False), + "public": (None, False), + "private": (None, False), + "proxy-revalidate": (None, False), + "s-maxage": (int, True), + } + + cc_headers = headers.get("cache-control", headers.get("Cache-Control", "")) + + retval = {} + + for cc_directive in cc_headers.split(","): + if not cc_directive.strip(): + continue + + parts = cc_directive.split("=", 1) + directive = parts[0].strip() + + try: + typ, required = known_directives[directive] + except KeyError: + logger.debug("Ignoring unknown cache-control directive: %s", directive) + continue + + if not typ or not required: + retval[directive] = None + if typ: + try: + retval[directive] = typ(parts[1].strip()) + except IndexError: + if required: + logger.debug( + "Missing value for cache-control " "directive: %s", + directive, + ) + except ValueError: + logger.debug( + "Invalid value for cache-control directive " "%s, must be %s", + directive, + typ.__name__, + ) + + return retval + + def cached_request(self, request): + """ + Return a cached response if it exists in the cache, otherwise + return False. + """ + cache_url = self.cache_url(request.url) + logger.debug('Looking up "%s" in the cache', cache_url) + cc = self.parse_cache_control(request.headers) + + # Bail out if the request insists on fresh data + if "no-cache" in cc: + logger.debug('Request header has "no-cache", cache bypassed') + return False + + if "max-age" in cc and cc["max-age"] == 0: + logger.debug('Request header has "max_age" as 0, cache bypassed') + return False + + # Request allows serving from the cache, let's see if we find something + cache_data = self.cache.get(cache_url) + if cache_data is None: + logger.debug("No cache entry available") + return False + + # Check whether it can be deserialized + resp = self.serializer.loads(request, cache_data) + if not resp: + logger.warning("Cache entry deserialization failed, entry ignored") + return False + + # If we have a cached 301, return it immediately. We don't + # need to test our response for other headers b/c it is + # intrinsically "cacheable" as it is Permanent. + # See: + # https://tools.ietf.org/html/rfc7231#section-6.4.2 + # + # Client can try to refresh the value by repeating the request + # with cache busting headers as usual (ie no-cache). + if resp.status == 301: + msg = ( + 'Returning cached "301 Moved Permanently" response ' + "(ignoring date and etag information)" + ) + logger.debug(msg) + return resp + + headers = CaseInsensitiveDict(resp.headers) + if not headers or "date" not in headers: + if "etag" not in headers: + # Without date or etag, the cached response can never be used + # and should be deleted. + logger.debug("Purging cached response: no date or etag") + self.cache.delete(cache_url) + logger.debug("Ignoring cached response: no date") + return False + + now = time.time() + date = calendar.timegm(parsedate_tz(headers["date"])) + current_age = max(0, now - date) + logger.debug("Current age based on date: %i", current_age) + + # TODO: There is an assumption that the result will be a + # urllib3 response object. This may not be best since we + # could probably avoid instantiating or constructing the + # response until we know we need it. + resp_cc = self.parse_cache_control(headers) + + # determine freshness + freshness_lifetime = 0 + + # Check the max-age pragma in the cache control header + if "max-age" in resp_cc: + freshness_lifetime = resp_cc["max-age"] + logger.debug("Freshness lifetime from max-age: %i", freshness_lifetime) + + # If there isn't a max-age, check for an expires header + elif "expires" in headers: + expires = parsedate_tz(headers["expires"]) + if expires is not None: + expire_time = calendar.timegm(expires) - date + freshness_lifetime = max(0, expire_time) + logger.debug("Freshness lifetime from expires: %i", freshness_lifetime) + + # Determine if we are setting freshness limit in the + # request. Note, this overrides what was in the response. + if "max-age" in cc: + freshness_lifetime = cc["max-age"] + logger.debug( + "Freshness lifetime from request max-age: %i", freshness_lifetime + ) + + if "min-fresh" in cc: + min_fresh = cc["min-fresh"] + # adjust our current age by our min fresh + current_age += min_fresh + logger.debug("Adjusted current age from min-fresh: %i", current_age) + + # Return entry if it is fresh enough + if freshness_lifetime > current_age: + logger.debug('The response is "fresh", returning cached response') + logger.debug("%i > %i", freshness_lifetime, current_age) + return resp + + # we're not fresh. If we don't have an Etag, clear it out + if "etag" not in headers: + logger.debug('The cached response is "stale" with no etag, purging') + self.cache.delete(cache_url) + + # return the original handler + return False + + def conditional_headers(self, request): + cache_url = self.cache_url(request.url) + resp = self.serializer.loads(request, self.cache.get(cache_url)) + new_headers = {} + + if resp: + headers = CaseInsensitiveDict(resp.headers) + + if "etag" in headers: + new_headers["If-None-Match"] = headers["ETag"] + + if "last-modified" in headers: + new_headers["If-Modified-Since"] = headers["Last-Modified"] + + return new_headers + + def cache_response(self, request, response, body=None, status_codes=None): + """ + Algorithm for caching requests. + + This assumes a requests Response object. + """ + # From httplib2: Don't cache 206's since we aren't going to + # handle byte range requests + cacheable_status_codes = status_codes or self.cacheable_status_codes + if response.status not in cacheable_status_codes: + logger.debug( + "Status code %s not in %s", response.status, cacheable_status_codes + ) + return + + response_headers = CaseInsensitiveDict(response.headers) + + # If we've been given a body, our response has a Content-Length, that + # Content-Length is valid then we can check to see if the body we've + # been given matches the expected size, and if it doesn't we'll just + # skip trying to cache it. + if ( + body is not None + and "content-length" in response_headers + and response_headers["content-length"].isdigit() + and int(response_headers["content-length"]) != len(body) + ): + return + + cc_req = self.parse_cache_control(request.headers) + cc = self.parse_cache_control(response_headers) + + cache_url = self.cache_url(request.url) + logger.debug('Updating cache with response from "%s"', cache_url) + + # Delete it from the cache if we happen to have it stored there + no_store = False + if "no-store" in cc: + no_store = True + logger.debug('Response header has "no-store"') + if "no-store" in cc_req: + no_store = True + logger.debug('Request header has "no-store"') + if no_store and self.cache.get(cache_url): + logger.debug('Purging existing cache entry to honor "no-store"') + self.cache.delete(cache_url) + if no_store: + return + + # https://tools.ietf.org/html/rfc7234#section-4.1: + # A Vary header field-value of "*" always fails to match. + # Storing such a response leads to a deserialization warning + # during cache lookup and is not allowed to ever be served, + # so storing it can be avoided. + if "*" in response_headers.get("vary", ""): + logger.debug('Response header has "Vary: *"') + return + + # If we've been given an etag, then keep the response + if self.cache_etags and "etag" in response_headers: + logger.debug("Caching due to etag") + self.cache.set( + cache_url, self.serializer.dumps(request, response, body=body) + ) + + # Add to the cache any 301s. We do this before looking that + # the Date headers. + elif response.status == 301: + logger.debug("Caching permanant redirect") + self.cache.set(cache_url, self.serializer.dumps(request, response)) + + # Add to the cache if the response headers demand it. If there + # is no date header then we can't do anything about expiring + # the cache. + elif "date" in response_headers: + # cache when there is a max-age > 0 + if "max-age" in cc and cc["max-age"] > 0: + logger.debug("Caching b/c date exists and max-age > 0") + self.cache.set( + cache_url, self.serializer.dumps(request, response, body=body) + ) + + # If the request can expire, it means we should cache it + # in the meantime. + elif "expires" in response_headers: + if response_headers["expires"]: + logger.debug("Caching b/c of expires header") + self.cache.set( + cache_url, self.serializer.dumps(request, response, body=body) + ) + + def update_cached_response(self, request, response): + """On a 304 we will get a new set of headers that we want to + update our cached value with, assuming we have one. + + This should only ever be called when we've sent an ETag and + gotten a 304 as the response. + """ + cache_url = self.cache_url(request.url) + + cached_response = self.serializer.loads(request, self.cache.get(cache_url)) + + if not cached_response: + # we didn't have a cached response + return response + + # Lets update our headers with the headers from the new request: + # http://tools.ietf.org/html/draft-ietf-httpbis-p4-conditional-26#section-4.1 + # + # The server isn't supposed to send headers that would make + # the cached body invalid. But... just in case, we'll be sure + # to strip out ones we know that might be problmatic due to + # typical assumptions. + excluded_headers = ["content-length"] + + cached_response.headers.update( + dict( + (k, v) + for k, v in response.headers.items() + if k.lower() not in excluded_headers + ) + ) + + # we want a 200 b/c we have content via the cache + cached_response.status = 200 + + # update our cache + self.cache.set(cache_url, self.serializer.dumps(request, cached_response)) + + return cached_response diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/cachecontrol/filewrapper.py b/venv.bak/lib/python3.7/site-packages/pip/_vendor/cachecontrol/filewrapper.py new file mode 100644 index 0000000..30ed4c5 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_vendor/cachecontrol/filewrapper.py @@ -0,0 +1,80 @@ +from io import BytesIO + + +class CallbackFileWrapper(object): + """ + Small wrapper around a fp object which will tee everything read into a + buffer, and when that file is closed it will execute a callback with the + contents of that buffer. + + All attributes are proxied to the underlying file object. + + This class uses members with a double underscore (__) leading prefix so as + not to accidentally shadow an attribute. + """ + + def __init__(self, fp, callback): + self.__buf = BytesIO() + self.__fp = fp + self.__callback = callback + + def __getattr__(self, name): + # The vaguaries of garbage collection means that self.__fp is + # not always set. By using __getattribute__ and the private + # name[0] allows looking up the attribute value and raising an + # AttributeError when it doesn't exist. This stop thigns from + # infinitely recursing calls to getattr in the case where + # self.__fp hasn't been set. + # + # [0] https://docs.python.org/2/reference/expressions.html#atom-identifiers + fp = self.__getattribute__("_CallbackFileWrapper__fp") + return getattr(fp, name) + + def __is_fp_closed(self): + try: + return self.__fp.fp is None + + except AttributeError: + pass + + try: + return self.__fp.closed + + except AttributeError: + pass + + # We just don't cache it then. + # TODO: Add some logging here... + return False + + def _close(self): + if self.__callback: + self.__callback(self.__buf.getvalue()) + + # We assign this to None here, because otherwise we can get into + # really tricky problems where the CPython interpreter dead locks + # because the callback is holding a reference to something which + # has a __del__ method. Setting this to None breaks the cycle + # and allows the garbage collector to do it's thing normally. + self.__callback = None + + def read(self, amt=None): + data = self.__fp.read(amt) + self.__buf.write(data) + if self.__is_fp_closed(): + self._close() + + return data + + def _safe_read(self, amt): + data = self.__fp._safe_read(amt) + if amt == 2 and data == b"\r\n": + # urllib executes this read to toss the CRLF at the end + # of the chunk. + return data + + self.__buf.write(data) + if self.__is_fp_closed(): + self._close() + + return data diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/cachecontrol/heuristics.py b/venv.bak/lib/python3.7/site-packages/pip/_vendor/cachecontrol/heuristics.py new file mode 100644 index 0000000..6c0e979 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_vendor/cachecontrol/heuristics.py @@ -0,0 +1,135 @@ +import calendar +import time + +from email.utils import formatdate, parsedate, parsedate_tz + +from datetime import datetime, timedelta + +TIME_FMT = "%a, %d %b %Y %H:%M:%S GMT" + + +def expire_after(delta, date=None): + date = date or datetime.utcnow() + return date + delta + + +def datetime_to_header(dt): + return formatdate(calendar.timegm(dt.timetuple())) + + +class BaseHeuristic(object): + + def warning(self, response): + """ + Return a valid 1xx warning header value describing the cache + adjustments. + + The response is provided too allow warnings like 113 + http://tools.ietf.org/html/rfc7234#section-5.5.4 where we need + to explicitly say response is over 24 hours old. + """ + return '110 - "Response is Stale"' + + def update_headers(self, response): + """Update the response headers with any new headers. + + NOTE: This SHOULD always include some Warning header to + signify that the response was cached by the client, not + by way of the provided headers. + """ + return {} + + def apply(self, response): + updated_headers = self.update_headers(response) + + if updated_headers: + response.headers.update(updated_headers) + warning_header_value = self.warning(response) + if warning_header_value is not None: + response.headers.update({"Warning": warning_header_value}) + + return response + + +class OneDayCache(BaseHeuristic): + """ + Cache the response by providing an expires 1 day in the + future. + """ + + def update_headers(self, response): + headers = {} + + if "expires" not in response.headers: + date = parsedate(response.headers["date"]) + expires = expire_after(timedelta(days=1), date=datetime(*date[:6])) + headers["expires"] = datetime_to_header(expires) + headers["cache-control"] = "public" + return headers + + +class ExpiresAfter(BaseHeuristic): + """ + Cache **all** requests for a defined time period. + """ + + def __init__(self, **kw): + self.delta = timedelta(**kw) + + def update_headers(self, response): + expires = expire_after(self.delta) + return {"expires": datetime_to_header(expires), "cache-control": "public"} + + def warning(self, response): + tmpl = "110 - Automatically cached for %s. Response might be stale" + return tmpl % self.delta + + +class LastModified(BaseHeuristic): + """ + If there is no Expires header already, fall back on Last-Modified + using the heuristic from + http://tools.ietf.org/html/rfc7234#section-4.2.2 + to calculate a reasonable value. + + Firefox also does something like this per + https://developer.mozilla.org/en-US/docs/Web/HTTP/Caching_FAQ + http://lxr.mozilla.org/mozilla-release/source/netwerk/protocol/http/nsHttpResponseHead.cpp#397 + Unlike mozilla we limit this to 24-hr. + """ + cacheable_by_default_statuses = { + 200, 203, 204, 206, 300, 301, 404, 405, 410, 414, 501 + } + + def update_headers(self, resp): + headers = resp.headers + + if "expires" in headers: + return {} + + if "cache-control" in headers and headers["cache-control"] != "public": + return {} + + if resp.status not in self.cacheable_by_default_statuses: + return {} + + if "date" not in headers or "last-modified" not in headers: + return {} + + date = calendar.timegm(parsedate_tz(headers["date"])) + last_modified = parsedate(headers["last-modified"]) + if date is None or last_modified is None: + return {} + + now = time.time() + current_age = max(0, now - date) + delta = date - calendar.timegm(last_modified) + freshness_lifetime = max(0, min(delta / 10, 24 * 3600)) + if freshness_lifetime <= current_age: + return {} + + expires = date + freshness_lifetime + return {"expires": time.strftime(TIME_FMT, time.gmtime(expires))} + + def warning(self, resp): + return None diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/cachecontrol/serialize.py b/venv.bak/lib/python3.7/site-packages/pip/_vendor/cachecontrol/serialize.py new file mode 100644 index 0000000..3b6ec2d --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_vendor/cachecontrol/serialize.py @@ -0,0 +1,188 @@ +import base64 +import io +import json +import zlib + +from pip._vendor import msgpack +from pip._vendor.requests.structures import CaseInsensitiveDict + +from .compat import HTTPResponse, pickle, text_type + + +def _b64_decode_bytes(b): + return base64.b64decode(b.encode("ascii")) + + +def _b64_decode_str(s): + return _b64_decode_bytes(s).decode("utf8") + + +class Serializer(object): + + def dumps(self, request, response, body=None): + response_headers = CaseInsensitiveDict(response.headers) + + if body is None: + body = response.read(decode_content=False) + + # NOTE: 99% sure this is dead code. I'm only leaving it + # here b/c I don't have a test yet to prove + # it. Basically, before using + # `cachecontrol.filewrapper.CallbackFileWrapper`, + # this made an effort to reset the file handle. The + # `CallbackFileWrapper` short circuits this code by + # setting the body as the content is consumed, the + # result being a `body` argument is *always* passed + # into cache_response, and in turn, + # `Serializer.dump`. + response._fp = io.BytesIO(body) + + # NOTE: This is all a bit weird, but it's really important that on + # Python 2.x these objects are unicode and not str, even when + # they contain only ascii. The problem here is that msgpack + # understands the difference between unicode and bytes and we + # have it set to differentiate between them, however Python 2 + # doesn't know the difference. Forcing these to unicode will be + # enough to have msgpack know the difference. + data = { + u"response": { + u"body": body, + u"headers": dict( + (text_type(k), text_type(v)) for k, v in response.headers.items() + ), + u"status": response.status, + u"version": response.version, + u"reason": text_type(response.reason), + u"strict": response.strict, + u"decode_content": response.decode_content, + } + } + + # Construct our vary headers + data[u"vary"] = {} + if u"vary" in response_headers: + varied_headers = response_headers[u"vary"].split(",") + for header in varied_headers: + header = text_type(header).strip() + header_value = request.headers.get(header, None) + if header_value is not None: + header_value = text_type(header_value) + data[u"vary"][header] = header_value + + return b",".join([b"cc=4", msgpack.dumps(data, use_bin_type=True)]) + + def loads(self, request, data): + # Short circuit if we've been given an empty set of data + if not data: + return + + # Determine what version of the serializer the data was serialized + # with + try: + ver, data = data.split(b",", 1) + except ValueError: + ver = b"cc=0" + + # Make sure that our "ver" is actually a version and isn't a false + # positive from a , being in the data stream. + if ver[:3] != b"cc=": + data = ver + data + ver = b"cc=0" + + # Get the version number out of the cc=N + ver = ver.split(b"=", 1)[-1].decode("ascii") + + # Dispatch to the actual load method for the given version + try: + return getattr(self, "_loads_v{}".format(ver))(request, data) + + except AttributeError: + # This is a version we don't have a loads function for, so we'll + # just treat it as a miss and return None + return + + def prepare_response(self, request, cached): + """Verify our vary headers match and construct a real urllib3 + HTTPResponse object. + """ + # Special case the '*' Vary value as it means we cannot actually + # determine if the cached response is suitable for this request. + # This case is also handled in the controller code when creating + # a cache entry, but is left here for backwards compatibility. + if "*" in cached.get("vary", {}): + return + + # Ensure that the Vary headers for the cached response match our + # request + for header, value in cached.get("vary", {}).items(): + if request.headers.get(header, None) != value: + return + + body_raw = cached["response"].pop("body") + + headers = CaseInsensitiveDict(data=cached["response"]["headers"]) + if headers.get("transfer-encoding", "") == "chunked": + headers.pop("transfer-encoding") + + cached["response"]["headers"] = headers + + try: + body = io.BytesIO(body_raw) + except TypeError: + # This can happen if cachecontrol serialized to v1 format (pickle) + # using Python 2. A Python 2 str(byte string) will be unpickled as + # a Python 3 str (unicode string), which will cause the above to + # fail with: + # + # TypeError: 'str' does not support the buffer interface + body = io.BytesIO(body_raw.encode("utf8")) + + return HTTPResponse(body=body, preload_content=False, **cached["response"]) + + def _loads_v0(self, request, data): + # The original legacy cache data. This doesn't contain enough + # information to construct everything we need, so we'll treat this as + # a miss. + return + + def _loads_v1(self, request, data): + try: + cached = pickle.loads(data) + except ValueError: + return + + return self.prepare_response(request, cached) + + def _loads_v2(self, request, data): + try: + cached = json.loads(zlib.decompress(data).decode("utf8")) + except (ValueError, zlib.error): + return + + # We need to decode the items that we've base64 encoded + cached["response"]["body"] = _b64_decode_bytes(cached["response"]["body"]) + cached["response"]["headers"] = dict( + (_b64_decode_str(k), _b64_decode_str(v)) + for k, v in cached["response"]["headers"].items() + ) + cached["response"]["reason"] = _b64_decode_str(cached["response"]["reason"]) + cached["vary"] = dict( + (_b64_decode_str(k), _b64_decode_str(v) if v is not None else v) + for k, v in cached["vary"].items() + ) + + return self.prepare_response(request, cached) + + def _loads_v3(self, request, data): + # Due to Python 2 encoding issues, it's impossible to know for sure + # exactly how to load v3 entries, thus we'll treat these as a miss so + # that they get rewritten out as v4 entries. + return + + def _loads_v4(self, request, data): + try: + cached = msgpack.loads(data, raw=False) + except ValueError: + return + + return self.prepare_response(request, cached) diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/cachecontrol/wrapper.py b/venv.bak/lib/python3.7/site-packages/pip/_vendor/cachecontrol/wrapper.py new file mode 100644 index 0000000..d8e6fc6 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_vendor/cachecontrol/wrapper.py @@ -0,0 +1,29 @@ +from .adapter import CacheControlAdapter +from .cache import DictCache + + +def CacheControl( + sess, + cache=None, + cache_etags=True, + serializer=None, + heuristic=None, + controller_class=None, + adapter_class=None, + cacheable_methods=None, +): + + cache = DictCache() if cache is None else cache + adapter_class = adapter_class or CacheControlAdapter + adapter = adapter_class( + cache, + cache_etags=cache_etags, + serializer=serializer, + heuristic=heuristic, + controller_class=controller_class, + cacheable_methods=cacheable_methods, + ) + sess.mount("http://", adapter) + sess.mount("https://", adapter) + + return sess diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/certifi/__init__.py b/venv.bak/lib/python3.7/site-packages/pip/_vendor/certifi/__init__.py new file mode 100644 index 0000000..0d59a05 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_vendor/certifi/__init__.py @@ -0,0 +1,3 @@ +from .core import where + +__version__ = "2019.11.28" diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/certifi/__main__.py b/venv.bak/lib/python3.7/site-packages/pip/_vendor/certifi/__main__.py new file mode 100644 index 0000000..ae2aff5 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_vendor/certifi/__main__.py @@ -0,0 +1,2 @@ +from pip._vendor.certifi import where +print(where()) diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/certifi/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_vendor/certifi/__pycache__/__init__.cpython-37.pyc new file mode 100644 index 0000000..a006a64 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_vendor/certifi/__pycache__/__init__.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/certifi/__pycache__/__main__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_vendor/certifi/__pycache__/__main__.cpython-37.pyc new file mode 100644 index 0000000..7d84b7d Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_vendor/certifi/__pycache__/__main__.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/certifi/__pycache__/core.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_vendor/certifi/__pycache__/core.cpython-37.pyc new file mode 100644 index 0000000..13cb69f Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_vendor/certifi/__pycache__/core.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/certifi/cacert.pem b/venv.bak/lib/python3.7/site-packages/pip/_vendor/certifi/cacert.pem new file mode 100644 index 0000000..a4758ef --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_vendor/certifi/cacert.pem @@ -0,0 +1,4602 @@ + +# Issuer: CN=GlobalSign Root CA O=GlobalSign nv-sa OU=Root CA +# Subject: CN=GlobalSign Root CA O=GlobalSign nv-sa OU=Root CA +# Label: "GlobalSign Root CA" +# Serial: 4835703278459707669005204 +# MD5 Fingerprint: 3e:45:52:15:09:51:92:e1:b7:5d:37:9f:b1:87:29:8a +# SHA1 Fingerprint: b1:bc:96:8b:d4:f4:9d:62:2a:a8:9a:81:f2:15:01:52:a4:1d:82:9c +# SHA256 Fingerprint: eb:d4:10:40:e4:bb:3e:c7:42:c9:e3:81:d3:1e:f2:a4:1a:48:b6:68:5c:96:e7:ce:f3:c1:df:6c:d4:33:1c:99 +-----BEGIN CERTIFICATE----- +MIIDdTCCAl2gAwIBAgILBAAAAAABFUtaw5QwDQYJKoZIhvcNAQEFBQAwVzELMAkG +A1UEBhMCQkUxGTAXBgNVBAoTEEdsb2JhbFNpZ24gbnYtc2ExEDAOBgNVBAsTB1Jv +b3QgQ0ExGzAZBgNVBAMTEkdsb2JhbFNpZ24gUm9vdCBDQTAeFw05ODA5MDExMjAw +MDBaFw0yODAxMjgxMjAwMDBaMFcxCzAJBgNVBAYTAkJFMRkwFwYDVQQKExBHbG9i +YWxTaWduIG52LXNhMRAwDgYDVQQLEwdSb290IENBMRswGQYDVQQDExJHbG9iYWxT +aWduIFJvb3QgQ0EwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQDaDuaZ +jc6j40+Kfvvxi4Mla+pIH/EqsLmVEQS98GPR4mdmzxzdzxtIK+6NiY6arymAZavp +xy0Sy6scTHAHoT0KMM0VjU/43dSMUBUc71DuxC73/OlS8pF94G3VNTCOXkNz8kHp +1Wrjsok6Vjk4bwY8iGlbKk3Fp1S4bInMm/k8yuX9ifUSPJJ4ltbcdG6TRGHRjcdG +snUOhugZitVtbNV4FpWi6cgKOOvyJBNPc1STE4U6G7weNLWLBYy5d4ux2x8gkasJ +U26Qzns3dLlwR5EiUWMWea6xrkEmCMgZK9FGqkjWZCrXgzT/LCrBbBlDSgeF59N8 +9iFo7+ryUp9/k5DPAgMBAAGjQjBAMA4GA1UdDwEB/wQEAwIBBjAPBgNVHRMBAf8E +BTADAQH/MB0GA1UdDgQWBBRge2YaRQ2XyolQL30EzTSo//z9SzANBgkqhkiG9w0B +AQUFAAOCAQEA1nPnfE920I2/7LqivjTFKDK1fPxsnCwrvQmeU79rXqoRSLblCKOz +yj1hTdNGCbM+w6DjY1Ub8rrvrTnhQ7k4o+YviiY776BQVvnGCv04zcQLcFGUl5gE +38NflNUVyRRBnMRddWQVDf9VMOyGj/8N7yy5Y0b2qvzfvGn9LhJIZJrglfCm7ymP +AbEVtQwdpf5pLGkkeB6zpxxxYu7KyJesF12KwvhHhm4qxFYxldBniYUr+WymXUad +DKqC5JlR3XC321Y9YeRq4VzW9v493kHMB65jUr9TU/Qr6cf9tveCX4XSQRjbgbME +HMUfpIBvFSDJ3gyICh3WZlXi/EjJKSZp4A== +-----END CERTIFICATE----- + +# Issuer: CN=GlobalSign O=GlobalSign OU=GlobalSign Root CA - R2 +# Subject: CN=GlobalSign O=GlobalSign OU=GlobalSign Root CA - R2 +# Label: "GlobalSign Root CA - R2" +# Serial: 4835703278459682885658125 +# MD5 Fingerprint: 94:14:77:7e:3e:5e:fd:8f:30:bd:41:b0:cf:e7:d0:30 +# SHA1 Fingerprint: 75:e0:ab:b6:13:85:12:27:1c:04:f8:5f:dd:de:38:e4:b7:24:2e:fe +# SHA256 Fingerprint: ca:42:dd:41:74:5f:d0:b8:1e:b9:02:36:2c:f9:d8:bf:71:9d:a1:bd:1b:1e:fc:94:6f:5b:4c:99:f4:2c:1b:9e +-----BEGIN CERTIFICATE----- +MIIDujCCAqKgAwIBAgILBAAAAAABD4Ym5g0wDQYJKoZIhvcNAQEFBQAwTDEgMB4G +A1UECxMXR2xvYmFsU2lnbiBSb290IENBIC0gUjIxEzARBgNVBAoTCkdsb2JhbFNp +Z24xEzARBgNVBAMTCkdsb2JhbFNpZ24wHhcNMDYxMjE1MDgwMDAwWhcNMjExMjE1 +MDgwMDAwWjBMMSAwHgYDVQQLExdHbG9iYWxTaWduIFJvb3QgQ0EgLSBSMjETMBEG +A1UEChMKR2xvYmFsU2lnbjETMBEGA1UEAxMKR2xvYmFsU2lnbjCCASIwDQYJKoZI +hvcNAQEBBQADggEPADCCAQoCggEBAKbPJA6+Lm8omUVCxKs+IVSbC9N/hHD6ErPL +v4dfxn+G07IwXNb9rfF73OX4YJYJkhD10FPe+3t+c4isUoh7SqbKSaZeqKeMWhG8 +eoLrvozps6yWJQeXSpkqBy+0Hne/ig+1AnwblrjFuTosvNYSuetZfeLQBoZfXklq +tTleiDTsvHgMCJiEbKjNS7SgfQx5TfC4LcshytVsW33hoCmEofnTlEnLJGKRILzd +C9XZzPnqJworc5HGnRusyMvo4KD0L5CLTfuwNhv2GXqF4G3yYROIXJ/gkwpRl4pa +zq+r1feqCapgvdzZX99yqWATXgAByUr6P6TqBwMhAo6CygPCm48CAwEAAaOBnDCB +mTAOBgNVHQ8BAf8EBAMCAQYwDwYDVR0TAQH/BAUwAwEB/zAdBgNVHQ4EFgQUm+IH +V2ccHsBqBt5ZtJot39wZhi4wNgYDVR0fBC8wLTAroCmgJ4YlaHR0cDovL2NybC5n +bG9iYWxzaWduLm5ldC9yb290LXIyLmNybDAfBgNVHSMEGDAWgBSb4gdXZxwewGoG +3lm0mi3f3BmGLjANBgkqhkiG9w0BAQUFAAOCAQEAmYFThxxol4aR7OBKuEQLq4Gs +J0/WwbgcQ3izDJr86iw8bmEbTUsp9Z8FHSbBuOmDAGJFtqkIk7mpM0sYmsL4h4hO +291xNBrBVNpGP+DTKqttVCL1OmLNIG+6KYnX3ZHu01yiPqFbQfXf5WRDLenVOavS +ot+3i9DAgBkcRcAtjOj4LaR0VknFBbVPFd5uRHg5h6h+u/N5GJG79G+dwfCMNYxd +AfvDbbnvRG15RjF+Cv6pgsH/76tuIMRQyV+dTZsXjAzlAcmgQWpzU/qlULRuJQ/7 +TBj0/VLZjmmx6BEP3ojY+x1J96relc8geMJgEtslQIxq/H5COEBkEveegeGTLg== +-----END CERTIFICATE----- + +# Issuer: CN=VeriSign Class 3 Public Primary Certification Authority - G3 O=VeriSign, Inc. OU=VeriSign Trust Network/(c) 1999 VeriSign, Inc. - For authorized use only +# Subject: CN=VeriSign Class 3 Public Primary Certification Authority - G3 O=VeriSign, Inc. OU=VeriSign Trust Network/(c) 1999 VeriSign, Inc. - For authorized use only +# Label: "Verisign Class 3 Public Primary Certification Authority - G3" +# Serial: 206684696279472310254277870180966723415 +# MD5 Fingerprint: cd:68:b6:a7:c7:c4:ce:75:e0:1d:4f:57:44:61:92:09 +# SHA1 Fingerprint: 13:2d:0d:45:53:4b:69:97:cd:b2:d5:c3:39:e2:55:76:60:9b:5c:c6 +# SHA256 Fingerprint: eb:04:cf:5e:b1:f3:9a:fa:76:2f:2b:b1:20:f2:96:cb:a5:20:c1:b9:7d:b1:58:95:65:b8:1c:b9:a1:7b:72:44 +-----BEGIN CERTIFICATE----- +MIIEGjCCAwICEQCbfgZJoz5iudXukEhxKe9XMA0GCSqGSIb3DQEBBQUAMIHKMQsw +CQYDVQQGEwJVUzEXMBUGA1UEChMOVmVyaVNpZ24sIEluYy4xHzAdBgNVBAsTFlZl +cmlTaWduIFRydXN0IE5ldHdvcmsxOjA4BgNVBAsTMShjKSAxOTk5IFZlcmlTaWdu +LCBJbmMuIC0gRm9yIGF1dGhvcml6ZWQgdXNlIG9ubHkxRTBDBgNVBAMTPFZlcmlT +aWduIENsYXNzIDMgUHVibGljIFByaW1hcnkgQ2VydGlmaWNhdGlvbiBBdXRob3Jp +dHkgLSBHMzAeFw05OTEwMDEwMDAwMDBaFw0zNjA3MTYyMzU5NTlaMIHKMQswCQYD +VQQGEwJVUzEXMBUGA1UEChMOVmVyaVNpZ24sIEluYy4xHzAdBgNVBAsTFlZlcmlT +aWduIFRydXN0IE5ldHdvcmsxOjA4BgNVBAsTMShjKSAxOTk5IFZlcmlTaWduLCBJ +bmMuIC0gRm9yIGF1dGhvcml6ZWQgdXNlIG9ubHkxRTBDBgNVBAMTPFZlcmlTaWdu +IENsYXNzIDMgUHVibGljIFByaW1hcnkgQ2VydGlmaWNhdGlvbiBBdXRob3JpdHkg +LSBHMzCCASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoCggEBAMu6nFL8eB8aHm8b +N3O9+MlrlBIwT/A2R/XQkQr1F8ilYcEWQE37imGQ5XYgwREGfassbqb1EUGO+i2t +KmFZpGcmTNDovFJbcCAEWNF6yaRpvIMXZK0Fi7zQWM6NjPXr8EJJC52XJ2cybuGu +kxUccLwgTS8Y3pKI6GyFVxEa6X7jJhFUokWWVYPKMIno3Nij7SqAP395ZVc+FSBm +CC+Vk7+qRy+oRpfwEuL+wgorUeZ25rdGt+INpsyow0xZVYnm6FNcHOqd8GIWC6fJ +Xwzw3sJ2zq/3avL6QaaiMxTJ5Xpj055iN9WFZZ4O5lMkdBteHRJTW8cs54NJOxWu +imi5V5cCAwEAATANBgkqhkiG9w0BAQUFAAOCAQEAERSWwauSCPc/L8my/uRan2Te +2yFPhpk0djZX3dAVL8WtfxUfN2JzPtTnX84XA9s1+ivbrmAJXx5fj267Cz3qWhMe +DGBvtcC1IyIuBwvLqXTLR7sdwdela8wv0kL9Sd2nic9TutoAWii/gt/4uhMdUIaC +/Y4wjylGsB49Ndo4YhYYSq3mtlFs3q9i6wHQHiT+eo8SGhJouPtmmRQURVyu565p +F4ErWjfJXir0xuKhXFSbplQAz/DxwceYMBo7Nhbbo27q/a2ywtrvAkcTisDxszGt +TxzhT5yvDwyd93gN2PQ1VoDat20Xj50egWTh/sVFuq1ruQp6Tk9LhO5L8X3dEQ== +-----END CERTIFICATE----- + +# Issuer: CN=Entrust.net Certification Authority (2048) O=Entrust.net OU=www.entrust.net/CPS_2048 incorp. by ref. (limits liab.)/(c) 1999 Entrust.net Limited +# Subject: CN=Entrust.net Certification Authority (2048) O=Entrust.net OU=www.entrust.net/CPS_2048 incorp. by ref. (limits liab.)/(c) 1999 Entrust.net Limited +# Label: "Entrust.net Premium 2048 Secure Server CA" +# Serial: 946069240 +# MD5 Fingerprint: ee:29:31:bc:32:7e:9a:e6:e8:b5:f7:51:b4:34:71:90 +# SHA1 Fingerprint: 50:30:06:09:1d:97:d4:f5:ae:39:f7:cb:e7:92:7d:7d:65:2d:34:31 +# SHA256 Fingerprint: 6d:c4:71:72:e0:1c:bc:b0:bf:62:58:0d:89:5f:e2:b8:ac:9a:d4:f8:73:80:1e:0c:10:b9:c8:37:d2:1e:b1:77 +-----BEGIN CERTIFICATE----- +MIIEKjCCAxKgAwIBAgIEOGPe+DANBgkqhkiG9w0BAQUFADCBtDEUMBIGA1UEChML +RW50cnVzdC5uZXQxQDA+BgNVBAsUN3d3dy5lbnRydXN0Lm5ldC9DUFNfMjA0OCBp +bmNvcnAuIGJ5IHJlZi4gKGxpbWl0cyBsaWFiLikxJTAjBgNVBAsTHChjKSAxOTk5 +IEVudHJ1c3QubmV0IExpbWl0ZWQxMzAxBgNVBAMTKkVudHJ1c3QubmV0IENlcnRp +ZmljYXRpb24gQXV0aG9yaXR5ICgyMDQ4KTAeFw05OTEyMjQxNzUwNTFaFw0yOTA3 +MjQxNDE1MTJaMIG0MRQwEgYDVQQKEwtFbnRydXN0Lm5ldDFAMD4GA1UECxQ3d3d3 +LmVudHJ1c3QubmV0L0NQU18yMDQ4IGluY29ycC4gYnkgcmVmLiAobGltaXRzIGxp +YWIuKTElMCMGA1UECxMcKGMpIDE5OTkgRW50cnVzdC5uZXQgTGltaXRlZDEzMDEG +A1UEAxMqRW50cnVzdC5uZXQgQ2VydGlmaWNhdGlvbiBBdXRob3JpdHkgKDIwNDgp +MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEArU1LqRKGsuqjIAcVFmQq +K0vRvwtKTY7tgHalZ7d4QMBzQshowNtTK91euHaYNZOLGp18EzoOH1u3Hs/lJBQe +sYGpjX24zGtLA/ECDNyrpUAkAH90lKGdCCmziAv1h3edVc3kw37XamSrhRSGlVuX +MlBvPci6Zgzj/L24ScF2iUkZ/cCovYmjZy/Gn7xxGWC4LeksyZB2ZnuU4q941mVT +XTzWnLLPKQP5L6RQstRIzgUyVYr9smRMDuSYB3Xbf9+5CFVghTAp+XtIpGmG4zU/ +HoZdenoVve8AjhUiVBcAkCaTvA5JaJG/+EfTnZVCwQ5N328mz8MYIWJmQ3DW1cAH +4QIDAQABo0IwQDAOBgNVHQ8BAf8EBAMCAQYwDwYDVR0TAQH/BAUwAwEB/zAdBgNV +HQ4EFgQUVeSB0RGAvtiJuQijMfmhJAkWuXAwDQYJKoZIhvcNAQEFBQADggEBADub +j1abMOdTmXx6eadNl9cZlZD7Bh/KM3xGY4+WZiT6QBshJ8rmcnPyT/4xmf3IDExo +U8aAghOY+rat2l098c5u9hURlIIM7j+VrxGrD9cv3h8Dj1csHsm7mhpElesYT6Yf +zX1XEC+bBAlahLVu2B064dae0Wx5XnkcFMXj0EyTO2U87d89vqbllRrDtRnDvV5b +u/8j72gZyxKTJ1wDLW8w0B62GqzeWvfRqqgnpv55gcR5mTNXuhKwqeBCbJPKVt7+ +bYQLCIt+jerXmCHG8+c8eS9enNFMFY3h7CI3zJpDC5fcgJCNs2ebb0gIFVbPv/Er +fF6adulZkMV8gzURZVE= +-----END CERTIFICATE----- + +# Issuer: CN=Baltimore CyberTrust Root O=Baltimore OU=CyberTrust +# Subject: CN=Baltimore CyberTrust Root O=Baltimore OU=CyberTrust +# Label: "Baltimore CyberTrust Root" +# Serial: 33554617 +# MD5 Fingerprint: ac:b6:94:a5:9c:17:e0:d7:91:52:9b:b1:97:06:a6:e4 +# SHA1 Fingerprint: d4:de:20:d0:5e:66:fc:53:fe:1a:50:88:2c:78:db:28:52:ca:e4:74 +# SHA256 Fingerprint: 16:af:57:a9:f6:76:b0:ab:12:60:95:aa:5e:ba:de:f2:2a:b3:11:19:d6:44:ac:95:cd:4b:93:db:f3:f2:6a:eb +-----BEGIN CERTIFICATE----- +MIIDdzCCAl+gAwIBAgIEAgAAuTANBgkqhkiG9w0BAQUFADBaMQswCQYDVQQGEwJJ +RTESMBAGA1UEChMJQmFsdGltb3JlMRMwEQYDVQQLEwpDeWJlclRydXN0MSIwIAYD +VQQDExlCYWx0aW1vcmUgQ3liZXJUcnVzdCBSb290MB4XDTAwMDUxMjE4NDYwMFoX +DTI1MDUxMjIzNTkwMFowWjELMAkGA1UEBhMCSUUxEjAQBgNVBAoTCUJhbHRpbW9y +ZTETMBEGA1UECxMKQ3liZXJUcnVzdDEiMCAGA1UEAxMZQmFsdGltb3JlIEN5YmVy +VHJ1c3QgUm9vdDCCASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoCggEBAKMEuyKr +mD1X6CZymrV51Cni4eiVgLGw41uOKymaZN+hXe2wCQVt2yguzmKiYv60iNoS6zjr +IZ3AQSsBUnuId9Mcj8e6uYi1agnnc+gRQKfRzMpijS3ljwumUNKoUMMo6vWrJYeK +mpYcqWe4PwzV9/lSEy/CG9VwcPCPwBLKBsua4dnKM3p31vjsufFoREJIE9LAwqSu +XmD+tqYF/LTdB1kC1FkYmGP1pWPgkAx9XbIGevOF6uvUA65ehD5f/xXtabz5OTZy +dc93Uk3zyZAsuT3lySNTPx8kmCFcB5kpvcY67Oduhjprl3RjM71oGDHweI12v/ye +jl0qhqdNkNwnGjkCAwEAAaNFMEMwHQYDVR0OBBYEFOWdWTCCR1jMrPoIVDaGezq1 +BE3wMBIGA1UdEwEB/wQIMAYBAf8CAQMwDgYDVR0PAQH/BAQDAgEGMA0GCSqGSIb3 +DQEBBQUAA4IBAQCFDF2O5G9RaEIFoN27TyclhAO992T9Ldcw46QQF+vaKSm2eT92 +9hkTI7gQCvlYpNRhcL0EYWoSihfVCr3FvDB81ukMJY2GQE/szKN+OMY3EU/t3Wgx +jkzSswF07r51XgdIGn9w/xZchMB5hbgF/X++ZRGjD8ACtPhSNzkE1akxehi/oCr0 +Epn3o0WC4zxe9Z2etciefC7IpJ5OCBRLbf1wbWsaY71k5h+3zvDyny67G7fyUIhz +ksLi4xaNmjICq44Y3ekQEe5+NauQrz4wlHrQMz2nZQ/1/I6eYs9HRCwBXbsdtTLS +R9I4LtD+gdwyah617jzV/OeBHRnDJELqYzmp +-----END CERTIFICATE----- + +# Issuer: CN=AddTrust External CA Root O=AddTrust AB OU=AddTrust External TTP Network +# Subject: CN=AddTrust External CA Root O=AddTrust AB OU=AddTrust External TTP Network +# Label: "AddTrust External Root" +# Serial: 1 +# MD5 Fingerprint: 1d:35:54:04:85:78:b0:3f:42:42:4d:bf:20:73:0a:3f +# SHA1 Fingerprint: 02:fa:f3:e2:91:43:54:68:60:78:57:69:4d:f5:e4:5b:68:85:18:68 +# SHA256 Fingerprint: 68:7f:a4:51:38:22:78:ff:f0:c8:b1:1f:8d:43:d5:76:67:1c:6e:b2:bc:ea:b4:13:fb:83:d9:65:d0:6d:2f:f2 +-----BEGIN CERTIFICATE----- +MIIENjCCAx6gAwIBAgIBATANBgkqhkiG9w0BAQUFADBvMQswCQYDVQQGEwJTRTEU +MBIGA1UEChMLQWRkVHJ1c3QgQUIxJjAkBgNVBAsTHUFkZFRydXN0IEV4dGVybmFs +IFRUUCBOZXR3b3JrMSIwIAYDVQQDExlBZGRUcnVzdCBFeHRlcm5hbCBDQSBSb290 +MB4XDTAwMDUzMDEwNDgzOFoXDTIwMDUzMDEwNDgzOFowbzELMAkGA1UEBhMCU0Ux +FDASBgNVBAoTC0FkZFRydXN0IEFCMSYwJAYDVQQLEx1BZGRUcnVzdCBFeHRlcm5h +bCBUVFAgTmV0d29yazEiMCAGA1UEAxMZQWRkVHJ1c3QgRXh0ZXJuYWwgQ0EgUm9v +dDCCASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoCggEBALf3GjPm8gAELTngTlvt +H7xsD821+iO2zt6bETOXpClMfZOfvUq8k+0DGuOPz+VtUFrWlymUWoCwSXrbLpX9 +uMq/NzgtHj6RQa1wVsfwTz/oMp50ysiQVOnGXw94nZpAPA6sYapeFI+eh6FqUNzX +mk6vBbOmcZSccbNQYArHE504B4YCqOmoaSYYkKtMsE8jqzpPhNjfzp/haW+710LX +a0Tkx63ubUFfclpxCDezeWWkWaCUN/cALw3CknLa0Dhy2xSoRcRdKn23tNbE7qzN +E0S3ySvdQwAl+mG5aWpYIxG3pzOPVnVZ9c0p10a3CitlttNCbxWyuHv77+ldU9U0 +WicCAwEAAaOB3DCB2TAdBgNVHQ4EFgQUrb2YejS0Jvf6xCZU7wO94CTLVBowCwYD +VR0PBAQDAgEGMA8GA1UdEwEB/wQFMAMBAf8wgZkGA1UdIwSBkTCBjoAUrb2YejS0 +Jvf6xCZU7wO94CTLVBqhc6RxMG8xCzAJBgNVBAYTAlNFMRQwEgYDVQQKEwtBZGRU +cnVzdCBBQjEmMCQGA1UECxMdQWRkVHJ1c3QgRXh0ZXJuYWwgVFRQIE5ldHdvcmsx +IjAgBgNVBAMTGUFkZFRydXN0IEV4dGVybmFsIENBIFJvb3SCAQEwDQYJKoZIhvcN +AQEFBQADggEBALCb4IUlwtYj4g+WBpKdQZic2YR5gdkeWxQHIzZlj7DYd7usQWxH +YINRsPkyPef89iYTx4AWpb9a/IfPeHmJIZriTAcKhjW88t5RxNKWt9x+Tu5w/Rw5 +6wwCURQtjr0W4MHfRnXnJK3s9EK0hZNwEGe6nQY1ShjTK3rMUUKhemPR5ruhxSvC +Nr4TDea9Y355e6cJDUCrat2PisP29owaQgVR1EX1n6diIWgVIEM8med8vSTYqZEX +c4g/VhsxOBi0cQ+azcgOno4uG+GMmIPLHzHxREzGBHNJdmAPx/i9F4BrLunMTA5a +mnkPIAou1Z5jJh5VkpTYghdae9C8x49OhgQ= +-----END CERTIFICATE----- + +# Issuer: CN=Entrust Root Certification Authority O=Entrust, Inc. OU=www.entrust.net/CPS is incorporated by reference/(c) 2006 Entrust, Inc. +# Subject: CN=Entrust Root Certification Authority O=Entrust, Inc. OU=www.entrust.net/CPS is incorporated by reference/(c) 2006 Entrust, Inc. +# Label: "Entrust Root Certification Authority" +# Serial: 1164660820 +# MD5 Fingerprint: d6:a5:c3:ed:5d:dd:3e:00:c1:3d:87:92:1f:1d:3f:e4 +# SHA1 Fingerprint: b3:1e:b1:b7:40:e3:6c:84:02:da:dc:37:d4:4d:f5:d4:67:49:52:f9 +# SHA256 Fingerprint: 73:c1:76:43:4f:1b:c6:d5:ad:f4:5b:0e:76:e7:27:28:7c:8d:e5:76:16:c1:e6:e6:14:1a:2b:2c:bc:7d:8e:4c +-----BEGIN CERTIFICATE----- +MIIEkTCCA3mgAwIBAgIERWtQVDANBgkqhkiG9w0BAQUFADCBsDELMAkGA1UEBhMC +VVMxFjAUBgNVBAoTDUVudHJ1c3QsIEluYy4xOTA3BgNVBAsTMHd3dy5lbnRydXN0 +Lm5ldC9DUFMgaXMgaW5jb3Jwb3JhdGVkIGJ5IHJlZmVyZW5jZTEfMB0GA1UECxMW +KGMpIDIwMDYgRW50cnVzdCwgSW5jLjEtMCsGA1UEAxMkRW50cnVzdCBSb290IENl +cnRpZmljYXRpb24gQXV0aG9yaXR5MB4XDTA2MTEyNzIwMjM0MloXDTI2MTEyNzIw +NTM0MlowgbAxCzAJBgNVBAYTAlVTMRYwFAYDVQQKEw1FbnRydXN0LCBJbmMuMTkw +NwYDVQQLEzB3d3cuZW50cnVzdC5uZXQvQ1BTIGlzIGluY29ycG9yYXRlZCBieSBy +ZWZlcmVuY2UxHzAdBgNVBAsTFihjKSAyMDA2IEVudHJ1c3QsIEluYy4xLTArBgNV +BAMTJEVudHJ1c3QgUm9vdCBDZXJ0aWZpY2F0aW9uIEF1dGhvcml0eTCCASIwDQYJ +KoZIhvcNAQEBBQADggEPADCCAQoCggEBALaVtkNC+sZtKm9I35RMOVcF7sN5EUFo +Nu3s/poBj6E4KPz3EEZmLk0eGrEaTsbRwJWIsMn/MYszA9u3g3s+IIRe7bJWKKf4 +4LlAcTfFy0cOlypowCKVYhXbR9n10Cv/gkvJrT7eTNuQgFA/CYqEAOwwCj0Yzfv9 +KlmaI5UXLEWeH25DeW0MXJj+SKfFI0dcXv1u5x609mhF0YaDW6KKjbHjKYD+JXGI +rb68j6xSlkuqUY3kEzEZ6E5Nn9uss2rVvDlUccp6en+Q3X0dgNmBu1kmwhH+5pPi +94DkZfs0Nw4pgHBNrziGLp5/V6+eF67rHMsoIV+2HNjnogQi+dPa2MsCAwEAAaOB +sDCBrTAOBgNVHQ8BAf8EBAMCAQYwDwYDVR0TAQH/BAUwAwEB/zArBgNVHRAEJDAi +gA8yMDA2MTEyNzIwMjM0MlqBDzIwMjYxMTI3MjA1MzQyWjAfBgNVHSMEGDAWgBRo +kORnpKZTgMeGZqTx90tD+4S9bTAdBgNVHQ4EFgQUaJDkZ6SmU4DHhmak8fdLQ/uE +vW0wHQYJKoZIhvZ9B0EABBAwDhsIVjcuMTo0LjADAgSQMA0GCSqGSIb3DQEBBQUA +A4IBAQCT1DCw1wMgKtD5Y+iRDAUgqV8ZyntyTtSx29CW+1RaGSwMCPeyvIWonX9t +O1KzKtvn1ISMY/YPyyYBkVBs9F8U4pN0wBOeMDpQ47RgxRzwIkSNcUesyBrJ6Zua +AGAT/3B+XxFNSRuzFVJ7yVTav52Vr2ua2J7p8eRDjeIRRDq/r72DQnNSi6q7pynP +9WQcCk3RvKqsnyrQ/39/2n3qse0wJcGE2jTSW3iDVuycNsMm4hH2Z0kdkquM++v/ +eu6FSqdQgPCnXEqULl8FmTxSQeDNtGPPAUO6nIPcj2A781q0tHuu2guQOHXvgR1m +0vdXcDazv/wor3ElhVsT/h5/WrQ8 +-----END CERTIFICATE----- + +# Issuer: CN=GeoTrust Global CA O=GeoTrust Inc. +# Subject: CN=GeoTrust Global CA O=GeoTrust Inc. +# Label: "GeoTrust Global CA" +# Serial: 144470 +# MD5 Fingerprint: f7:75:ab:29:fb:51:4e:b7:77:5e:ff:05:3c:99:8e:f5 +# SHA1 Fingerprint: de:28:f4:a4:ff:e5:b9:2f:a3:c5:03:d1:a3:49:a7:f9:96:2a:82:12 +# SHA256 Fingerprint: ff:85:6a:2d:25:1d:cd:88:d3:66:56:f4:50:12:67:98:cf:ab:aa:de:40:79:9c:72:2d:e4:d2:b5:db:36:a7:3a +-----BEGIN CERTIFICATE----- +MIIDVDCCAjygAwIBAgIDAjRWMA0GCSqGSIb3DQEBBQUAMEIxCzAJBgNVBAYTAlVT +MRYwFAYDVQQKEw1HZW9UcnVzdCBJbmMuMRswGQYDVQQDExJHZW9UcnVzdCBHbG9i +YWwgQ0EwHhcNMDIwNTIxMDQwMDAwWhcNMjIwNTIxMDQwMDAwWjBCMQswCQYDVQQG +EwJVUzEWMBQGA1UEChMNR2VvVHJ1c3QgSW5jLjEbMBkGA1UEAxMSR2VvVHJ1c3Qg +R2xvYmFsIENBMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEA2swYYzD9 +9BcjGlZ+W988bDjkcbd4kdS8odhM+KhDtgPpTSEHCIjaWC9mOSm9BXiLnTjoBbdq +fnGk5sRgprDvgOSJKA+eJdbtg/OtppHHmMlCGDUUna2YRpIuT8rxh0PBFpVXLVDv +iS2Aelet8u5fa9IAjbkU+BQVNdnARqN7csiRv8lVK83Qlz6cJmTM386DGXHKTubU +1XupGc1V3sjs0l44U+VcT4wt/lAjNvxm5suOpDkZALeVAjmRCw7+OC7RHQWa9k0+ +bw8HHa8sHo9gOeL6NlMTOdReJivbPagUvTLrGAMoUgRx5aszPeE4uwc2hGKceeoW +MPRfwCvocWvk+QIDAQABo1MwUTAPBgNVHRMBAf8EBTADAQH/MB0GA1UdDgQWBBTA +ephojYn7qwVkDBF9qn1luMrMTjAfBgNVHSMEGDAWgBTAephojYn7qwVkDBF9qn1l +uMrMTjANBgkqhkiG9w0BAQUFAAOCAQEANeMpauUvXVSOKVCUn5kaFOSPeCpilKIn +Z57QzxpeR+nBsqTP3UEaBU6bS+5Kb1VSsyShNwrrZHYqLizz/Tt1kL/6cdjHPTfS +tQWVYrmm3ok9Nns4d0iXrKYgjy6myQzCsplFAMfOEVEiIuCl6rYVSAlk6l5PdPcF +PseKUgzbFbS9bZvlxrFUaKnjaZC2mqUPuLk/IH2uSrW4nOQdtqvmlKXBx4Ot2/Un +hw4EbNX/3aBd7YdStysVAq45pmp06drE57xNNB6pXE0zX5IJL4hmXXeXxx12E6nV +5fEWCRE11azbJHFwLJhWC9kXtNHjUStedejV0NxPNO3CBWaAocvmMw== +-----END CERTIFICATE----- + +# Issuer: CN=GeoTrust Universal CA O=GeoTrust Inc. +# Subject: CN=GeoTrust Universal CA O=GeoTrust Inc. +# Label: "GeoTrust Universal CA" +# Serial: 1 +# MD5 Fingerprint: 92:65:58:8b:a2:1a:31:72:73:68:5c:b4:a5:7a:07:48 +# SHA1 Fingerprint: e6:21:f3:35:43:79:05:9a:4b:68:30:9d:8a:2f:74:22:15:87:ec:79 +# SHA256 Fingerprint: a0:45:9b:9f:63:b2:25:59:f5:fa:5d:4c:6d:b3:f9:f7:2f:f1:93:42:03:35:78:f0:73:bf:1d:1b:46:cb:b9:12 +-----BEGIN CERTIFICATE----- +MIIFaDCCA1CgAwIBAgIBATANBgkqhkiG9w0BAQUFADBFMQswCQYDVQQGEwJVUzEW +MBQGA1UEChMNR2VvVHJ1c3QgSW5jLjEeMBwGA1UEAxMVR2VvVHJ1c3QgVW5pdmVy +c2FsIENBMB4XDTA0MDMwNDA1MDAwMFoXDTI5MDMwNDA1MDAwMFowRTELMAkGA1UE +BhMCVVMxFjAUBgNVBAoTDUdlb1RydXN0IEluYy4xHjAcBgNVBAMTFUdlb1RydXN0 +IFVuaXZlcnNhbCBDQTCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAKYV +VaCjxuAfjJ0hUNfBvitbtaSeodlyWL0AG0y/YckUHUWCq8YdgNY96xCcOq9tJPi8 +cQGeBvV8Xx7BDlXKg5pZMK4ZyzBIle0iN430SppyZj6tlcDgFgDgEB8rMQ7XlFTT +QjOgNB0eRXbdT8oYN+yFFXoZCPzVx5zw8qkuEKmS5j1YPakWaDwvdSEYfyh3peFh +F7em6fgemdtzbvQKoiFs7tqqhZJmr/Z6a4LauiIINQ/PQvE1+mrufislzDoR5G2v +c7J2Ha3QsnhnGqQ5HFELZ1aD/ThdDc7d8Lsrlh/eezJS/R27tQahsiFepdaVaH/w +mZ7cRQg+59IJDTWU3YBOU5fXtQlEIGQWFwMCTFMNaN7VqnJNk22CDtucvc+081xd +VHppCZbW2xHBjXWotM85yM48vCR85mLK4b19p71XZQvk/iXttmkQ3CgaRr0BHdCX +teGYO8A3ZNY9lO4L4fUorgtWv3GLIylBjobFS1J72HGrH4oVpjuDWtdYAVHGTEHZ +f9hBZ3KiKN9gg6meyHv8U3NyWfWTehd2Ds735VzZC1U0oqpbtWpU5xPKV+yXbfRe +Bi9Fi1jUIxaS5BZuKGNZMN9QAZxjiRqf2xeUgnA3wySemkfWWspOqGmJch+RbNt+ +nhutxx9z3SxPGWX9f5NAEC7S8O08ni4oPmkmM8V7AgMBAAGjYzBhMA8GA1UdEwEB +/wQFMAMBAf8wHQYDVR0OBBYEFNq7LqqwDLiIJlF0XG0D08DYj3rWMB8GA1UdIwQY +MBaAFNq7LqqwDLiIJlF0XG0D08DYj3rWMA4GA1UdDwEB/wQEAwIBhjANBgkqhkiG +9w0BAQUFAAOCAgEAMXjmx7XfuJRAyXHEqDXsRh3ChfMoWIawC/yOsjmPRFWrZIRc +aanQmjg8+uUfNeVE44B5lGiku8SfPeE0zTBGi1QrlaXv9z+ZhP015s8xxtxqv6fX +IwjhmF7DWgh2qaavdy+3YL1ERmrvl/9zlcGO6JP7/TG37FcREUWbMPEaiDnBTzyn +ANXH/KttgCJwpQzgXQQpAvvLoJHRfNbDflDVnVi+QTjruXU8FdmbyUqDWcDaU/0z +uzYYm4UPFd3uLax2k7nZAY1IEKj79TiG8dsKxr2EoyNB3tZ3b4XUhRxQ4K5RirqN +Pnbiucon8l+f725ZDQbYKxek0nxru18UGkiPGkzns0ccjkxFKyDuSN/n3QmOGKja +QI2SJhFTYXNd673nxE0pN2HrrDktZy4W1vUAg4WhzH92xH3kt0tm7wNFYGm2DFKW +koRepqO1pD4r2czYG0eq8kTaT/kD6PAUyz/zg97QwVTjt+gKN02LIFkDMBmhLMi9 +ER/frslKxfMnZmaGrGiR/9nmUxwPi1xpZQomyB40w11Re9epnAahNt3ViZS82eQt +DF4JbAiXfKM9fJP/P6EUp8+1Xevb2xzEdt+Iub1FBZUbrvxGakyvSOPOrg/Sfuvm +bJxPgWp6ZKy7PtXny3YuxadIwVyQD8vIP/rmMuGNG2+k5o7Y+SlIis5z/iw= +-----END CERTIFICATE----- + +# Issuer: CN=GeoTrust Universal CA 2 O=GeoTrust Inc. +# Subject: CN=GeoTrust Universal CA 2 O=GeoTrust Inc. +# Label: "GeoTrust Universal CA 2" +# Serial: 1 +# MD5 Fingerprint: 34:fc:b8:d0:36:db:9e:14:b3:c2:f2:db:8f:e4:94:c7 +# SHA1 Fingerprint: 37:9a:19:7b:41:85:45:35:0c:a6:03:69:f3:3c:2e:af:47:4f:20:79 +# SHA256 Fingerprint: a0:23:4f:3b:c8:52:7c:a5:62:8e:ec:81:ad:5d:69:89:5d:a5:68:0d:c9:1d:1c:b8:47:7f:33:f8:78:b9:5b:0b +-----BEGIN CERTIFICATE----- +MIIFbDCCA1SgAwIBAgIBATANBgkqhkiG9w0BAQUFADBHMQswCQYDVQQGEwJVUzEW +MBQGA1UEChMNR2VvVHJ1c3QgSW5jLjEgMB4GA1UEAxMXR2VvVHJ1c3QgVW5pdmVy +c2FsIENBIDIwHhcNMDQwMzA0MDUwMDAwWhcNMjkwMzA0MDUwMDAwWjBHMQswCQYD +VQQGEwJVUzEWMBQGA1UEChMNR2VvVHJ1c3QgSW5jLjEgMB4GA1UEAxMXR2VvVHJ1 +c3QgVW5pdmVyc2FsIENBIDIwggIiMA0GCSqGSIb3DQEBAQUAA4ICDwAwggIKAoIC +AQCzVFLByT7y2dyxUxpZKeexw0Uo5dfR7cXFS6GqdHtXr0om/Nj1XqduGdt0DE81 +WzILAePb63p3NeqqWuDW6KFXlPCQo3RWlEQwAx5cTiuFJnSCegx2oG9NzkEtoBUG +FF+3Qs17j1hhNNwqCPkuwwGmIkQcTAeC5lvO0Ep8BNMZcyfwqph/Lq9O64ceJHdq +XbboW0W63MOhBW9Wjo8QJqVJwy7XQYci4E+GymC16qFjwAGXEHm9ADwSbSsVsaxL +se4YuU6W3Nx2/zu+z18DwPw76L5GG//aQMJS9/7jOvdqdzXQ2o3rXhhqMcceujwb +KNZrVMaqW9eiLBsZzKIC9ptZvTdrhrVtgrrY6slWvKk2WP0+GfPtDCapkzj4T8Fd +IgbQl+rhrcZV4IErKIM6+vR7IVEAvlI4zs1meaj0gVbi0IMJR1FbUGrP20gaXT73 +y/Zl92zxlfgCOzJWgjl6W70viRu/obTo/3+NjN8D8WBOWBFM66M/ECuDmgFz2ZRt +hAAnZqzwcEAJQpKtT5MNYQlRJNiS1QuUYbKHsu3/mjX/hVTK7URDrBs8FmtISgoc +QIgfksILAAX/8sgCSqSqqcyZlpwvWOB94b67B9xfBHJcMTTD7F8t4D1kkCLm0ey4 +Lt1ZrtmhN79UNdxzMk+MBB4zsslG8dhcyFVQyWi9qLo2CQIDAQABo2MwYTAPBgNV +HRMBAf8EBTADAQH/MB0GA1UdDgQWBBR281Xh+qQ2+/CfXGJx7Tz0RzgQKzAfBgNV +HSMEGDAWgBR281Xh+qQ2+/CfXGJx7Tz0RzgQKzAOBgNVHQ8BAf8EBAMCAYYwDQYJ +KoZIhvcNAQEFBQADggIBAGbBxiPz2eAubl/oz66wsCVNK/g7WJtAJDday6sWSf+z +dXkzoS9tcBc0kf5nfo/sm+VegqlVHy/c1FEHEv6sFj4sNcZj/NwQ6w2jqtB8zNHQ +L1EuxBRa3ugZ4T7GzKQp5y6EqgYweHZUcyiYWTjgAA1i00J9IZ+uPTqM1fp3DRgr +Fg5fNuH8KrUwJM/gYwx7WBr+mbpCErGR9Hxo4sjoryzqyX6uuyo9DRXcNJW2GHSo +ag/HtPQTxORb7QrSpJdMKu0vbBKJPfEncKpqA1Ihn0CoZ1Dy81of398j9tx4TuaY +T1U6U+Pv8vSfx3zYWK8pIpe44L2RLrB27FcRz+8pRPPphXpgY+RdM4kX2TGq2tbz +GDVyz4crL2MjhF2EjD9XoIj8mZEoJmmZ1I+XRL6O1UixpCgp8RW04eWe3fiPpm8m +1wk8OhwRDqZsN/etRIcsKMfYdIKz0G9KV7s1KSegi+ghp4dkNl3M2Basx7InQJJV +OCiNUW7dFGdTbHFcJoRNdVq2fmBWqU2t+5sel/MN2dKXVHfaPRK34B7vCAas+YWH +6aLcr34YEoP9VhdBLtUpgn2Z9DH2canPLAEnpQW5qrJITirvn5NSUZU8UnOOVkwX +QMAJKOSLakhT2+zNVVXxxvjpoixMptEmX36vWkzaH6byHCx+rgIW0lbQL1dTR+iS +-----END CERTIFICATE----- + +# Issuer: CN=AAA Certificate Services O=Comodo CA Limited +# Subject: CN=AAA Certificate Services O=Comodo CA Limited +# Label: "Comodo AAA Services root" +# Serial: 1 +# MD5 Fingerprint: 49:79:04:b0:eb:87:19:ac:47:b0:bc:11:51:9b:74:d0 +# SHA1 Fingerprint: d1:eb:23:a4:6d:17:d6:8f:d9:25:64:c2:f1:f1:60:17:64:d8:e3:49 +# SHA256 Fingerprint: d7:a7:a0:fb:5d:7e:27:31:d7:71:e9:48:4e:bc:de:f7:1d:5f:0c:3e:0a:29:48:78:2b:c8:3e:e0:ea:69:9e:f4 +-----BEGIN CERTIFICATE----- +MIIEMjCCAxqgAwIBAgIBATANBgkqhkiG9w0BAQUFADB7MQswCQYDVQQGEwJHQjEb +MBkGA1UECAwSR3JlYXRlciBNYW5jaGVzdGVyMRAwDgYDVQQHDAdTYWxmb3JkMRow +GAYDVQQKDBFDb21vZG8gQ0EgTGltaXRlZDEhMB8GA1UEAwwYQUFBIENlcnRpZmlj +YXRlIFNlcnZpY2VzMB4XDTA0MDEwMTAwMDAwMFoXDTI4MTIzMTIzNTk1OVowezEL +MAkGA1UEBhMCR0IxGzAZBgNVBAgMEkdyZWF0ZXIgTWFuY2hlc3RlcjEQMA4GA1UE +BwwHU2FsZm9yZDEaMBgGA1UECgwRQ29tb2RvIENBIExpbWl0ZWQxITAfBgNVBAMM +GEFBQSBDZXJ0aWZpY2F0ZSBTZXJ2aWNlczCCASIwDQYJKoZIhvcNAQEBBQADggEP +ADCCAQoCggEBAL5AnfRu4ep2hxxNRUSOvkbIgwadwSr+GB+O5AL686tdUIoWMQua +BtDFcCLNSS1UY8y2bmhGC1Pqy0wkwLxyTurxFa70VJoSCsN6sjNg4tqJVfMiWPPe +3M/vg4aijJRPn2jymJBGhCfHdr/jzDUsi14HZGWCwEiwqJH5YZ92IFCokcdmtet4 +YgNW8IoaE+oxox6gmf049vYnMlhvB/VruPsUK6+3qszWY19zjNoFmag4qMsXeDZR +rOme9Hg6jc8P2ULimAyrL58OAd7vn5lJ8S3frHRNG5i1R8XlKdH5kBjHYpy+g8cm +ez6KJcfA3Z3mNWgQIJ2P2N7Sw4ScDV7oL8kCAwEAAaOBwDCBvTAdBgNVHQ4EFgQU +oBEKIz6W8Qfs4q8p74Klf9AwpLQwDgYDVR0PAQH/BAQDAgEGMA8GA1UdEwEB/wQF +MAMBAf8wewYDVR0fBHQwcjA4oDagNIYyaHR0cDovL2NybC5jb21vZG9jYS5jb20v +QUFBQ2VydGlmaWNhdGVTZXJ2aWNlcy5jcmwwNqA0oDKGMGh0dHA6Ly9jcmwuY29t +b2RvLm5ldC9BQUFDZXJ0aWZpY2F0ZVNlcnZpY2VzLmNybDANBgkqhkiG9w0BAQUF +AAOCAQEACFb8AvCb6P+k+tZ7xkSAzk/ExfYAWMymtrwUSWgEdujm7l3sAg9g1o1Q +GE8mTgHj5rCl7r+8dFRBv/38ErjHT1r0iWAFf2C3BUrz9vHCv8S5dIa2LX1rzNLz +Rt0vxuBqw8M0Ayx9lt1awg6nCpnBBYurDC/zXDrPbDdVCYfeU0BsWO/8tqtlbgT2 +G9w84FoVxp7Z8VlIMCFlA2zs6SFz7JsDoeA3raAVGI/6ugLOpyypEBMs1OUIJqsi +l2D4kF501KKaU73yqWjgom7C12yxow+ev+to51byrvLjKzg6CYG1a4XXvi3tPxq3 +smPi9WIsgtRqAEFQ8TmDn5XpNpaYbg== +-----END CERTIFICATE----- + +# Issuer: CN=QuoVadis Root Certification Authority O=QuoVadis Limited OU=Root Certification Authority +# Subject: CN=QuoVadis Root Certification Authority O=QuoVadis Limited OU=Root Certification Authority +# Label: "QuoVadis Root CA" +# Serial: 985026699 +# MD5 Fingerprint: 27:de:36:fe:72:b7:00:03:00:9d:f4:f0:1e:6c:04:24 +# SHA1 Fingerprint: de:3f:40:bd:50:93:d3:9b:6c:60:f6:da:bc:07:62:01:00:89:76:c9 +# SHA256 Fingerprint: a4:5e:de:3b:bb:f0:9c:8a:e1:5c:72:ef:c0:72:68:d6:93:a2:1c:99:6f:d5:1e:67:ca:07:94:60:fd:6d:88:73 +-----BEGIN CERTIFICATE----- +MIIF0DCCBLigAwIBAgIEOrZQizANBgkqhkiG9w0BAQUFADB/MQswCQYDVQQGEwJC +TTEZMBcGA1UEChMQUXVvVmFkaXMgTGltaXRlZDElMCMGA1UECxMcUm9vdCBDZXJ0 +aWZpY2F0aW9uIEF1dGhvcml0eTEuMCwGA1UEAxMlUXVvVmFkaXMgUm9vdCBDZXJ0 +aWZpY2F0aW9uIEF1dGhvcml0eTAeFw0wMTAzMTkxODMzMzNaFw0yMTAzMTcxODMz +MzNaMH8xCzAJBgNVBAYTAkJNMRkwFwYDVQQKExBRdW9WYWRpcyBMaW1pdGVkMSUw +IwYDVQQLExxSb290IENlcnRpZmljYXRpb24gQXV0aG9yaXR5MS4wLAYDVQQDEyVR +dW9WYWRpcyBSb290IENlcnRpZmljYXRpb24gQXV0aG9yaXR5MIIBIjANBgkqhkiG +9w0BAQEFAAOCAQ8AMIIBCgKCAQEAv2G1lVO6V/z68mcLOhrfEYBklbTRvM16z/Yp +li4kVEAkOPcahdxYTMukJ0KX0J+DisPkBgNbAKVRHnAEdOLB1Dqr1607BxgFjv2D +rOpm2RgbaIr1VxqYuvXtdj182d6UajtLF8HVj71lODqV0D1VNk7feVcxKh7YWWVJ +WCCYfqtffp/p1k3sg3Spx2zY7ilKhSoGFPlU5tPaZQeLYzcS19Dsw3sgQUSj7cug +F+FxZc4dZjH3dgEZyH0DWLaVSR2mEiboxgx24ONmy+pdpibu5cxfvWenAScOospU +xbF6lR1xHkopigPcakXBpBlebzbNw6Kwt/5cOOJSvPhEQ+aQuwIDAQABo4ICUjCC +Ak4wPQYIKwYBBQUHAQEEMTAvMC0GCCsGAQUFBzABhiFodHRwczovL29jc3AucXVv +dmFkaXNvZmZzaG9yZS5jb20wDwYDVR0TAQH/BAUwAwEB/zCCARoGA1UdIASCAREw +ggENMIIBCQYJKwYBBAG+WAABMIH7MIHUBggrBgEFBQcCAjCBxxqBxFJlbGlhbmNl +IG9uIHRoZSBRdW9WYWRpcyBSb290IENlcnRpZmljYXRlIGJ5IGFueSBwYXJ0eSBh +c3N1bWVzIGFjY2VwdGFuY2Ugb2YgdGhlIHRoZW4gYXBwbGljYWJsZSBzdGFuZGFy +ZCB0ZXJtcyBhbmQgY29uZGl0aW9ucyBvZiB1c2UsIGNlcnRpZmljYXRpb24gcHJh +Y3RpY2VzLCBhbmQgdGhlIFF1b1ZhZGlzIENlcnRpZmljYXRlIFBvbGljeS4wIgYI +KwYBBQUHAgEWFmh0dHA6Ly93d3cucXVvdmFkaXMuYm0wHQYDVR0OBBYEFItLbe3T +KbkGGew5Oanwl4Rqy+/fMIGuBgNVHSMEgaYwgaOAFItLbe3TKbkGGew5Oanwl4Rq +y+/foYGEpIGBMH8xCzAJBgNVBAYTAkJNMRkwFwYDVQQKExBRdW9WYWRpcyBMaW1p +dGVkMSUwIwYDVQQLExxSb290IENlcnRpZmljYXRpb24gQXV0aG9yaXR5MS4wLAYD +VQQDEyVRdW9WYWRpcyBSb290IENlcnRpZmljYXRpb24gQXV0aG9yaXR5ggQ6tlCL +MA4GA1UdDwEB/wQEAwIBBjANBgkqhkiG9w0BAQUFAAOCAQEAitQUtf70mpKnGdSk +fnIYj9lofFIk3WdvOXrEql494liwTXCYhGHoG+NpGA7O+0dQoE7/8CQfvbLO9Sf8 +7C9TqnN7Az10buYWnuulLsS/VidQK2K6vkscPFVcQR0kvoIgR13VRH56FmjffU1R +cHhXHTMe/QKZnAzNCgVPx7uOpHX6Sm2xgI4JVrmcGmD+XcHXetwReNDWXcG31a0y +mQM6isxUJTkxgXsTIlG6Rmyhu576BGxJJnSP0nPrzDCi5upZIof4l/UO/erMkqQW +xFIY6iHOsfHmhIHluqmGKPJDWl0Snawe2ajlCmqnf6CHKc/yiU3U7MXi5nrQNiOK +SnQ2+Q== +-----END CERTIFICATE----- + +# Issuer: CN=QuoVadis Root CA 2 O=QuoVadis Limited +# Subject: CN=QuoVadis Root CA 2 O=QuoVadis Limited +# Label: "QuoVadis Root CA 2" +# Serial: 1289 +# MD5 Fingerprint: 5e:39:7b:dd:f8:ba:ec:82:e9:ac:62:ba:0c:54:00:2b +# SHA1 Fingerprint: ca:3a:fb:cf:12:40:36:4b:44:b2:16:20:88:80:48:39:19:93:7c:f7 +# SHA256 Fingerprint: 85:a0:dd:7d:d7:20:ad:b7:ff:05:f8:3d:54:2b:20:9d:c7:ff:45:28:f7:d6:77:b1:83:89:fe:a5:e5:c4:9e:86 +-----BEGIN CERTIFICATE----- +MIIFtzCCA5+gAwIBAgICBQkwDQYJKoZIhvcNAQEFBQAwRTELMAkGA1UEBhMCQk0x +GTAXBgNVBAoTEFF1b1ZhZGlzIExpbWl0ZWQxGzAZBgNVBAMTElF1b1ZhZGlzIFJv +b3QgQ0EgMjAeFw0wNjExMjQxODI3MDBaFw0zMTExMjQxODIzMzNaMEUxCzAJBgNV +BAYTAkJNMRkwFwYDVQQKExBRdW9WYWRpcyBMaW1pdGVkMRswGQYDVQQDExJRdW9W +YWRpcyBSb290IENBIDIwggIiMA0GCSqGSIb3DQEBAQUAA4ICDwAwggIKAoICAQCa +GMpLlA0ALa8DKYrwD4HIrkwZhR0In6spRIXzL4GtMh6QRr+jhiYaHv5+HBg6XJxg +Fyo6dIMzMH1hVBHL7avg5tKifvVrbxi3Cgst/ek+7wrGsxDp3MJGF/hd/aTa/55J +WpzmM+Yklvc/ulsrHHo1wtZn/qtmUIttKGAr79dgw8eTvI02kfN/+NsRE8Scd3bB +rrcCaoF6qUWD4gXmuVbBlDePSHFjIuwXZQeVikvfj8ZaCuWw419eaxGrDPmF60Tp ++ARz8un+XJiM9XOva7R+zdRcAitMOeGylZUtQofX1bOQQ7dsE/He3fbE+Ik/0XX1 +ksOR1YqI0JDs3G3eicJlcZaLDQP9nL9bFqyS2+r+eXyt66/3FsvbzSUr5R/7mp/i +Ucw6UwxI5g69ybR2BlLmEROFcmMDBOAENisgGQLodKcftslWZvB1JdxnwQ5hYIiz +PtGo/KPaHbDRsSNU30R2be1B2MGyIrZTHN81Hdyhdyox5C315eXbyOD/5YDXC2Og +/zOhD7osFRXql7PSorW+8oyWHhqPHWykYTe5hnMz15eWniN9gqRMgeKh0bpnX5UH +oycR7hYQe7xFSkyyBNKr79X9DFHOUGoIMfmR2gyPZFwDwzqLID9ujWc9Otb+fVuI +yV77zGHcizN300QyNQliBJIWENieJ0f7OyHj+OsdWwIDAQABo4GwMIGtMA8GA1Ud +EwEB/wQFMAMBAf8wCwYDVR0PBAQDAgEGMB0GA1UdDgQWBBQahGK8SEwzJQTU7tD2 +A8QZRtGUazBuBgNVHSMEZzBlgBQahGK8SEwzJQTU7tD2A8QZRtGUa6FJpEcwRTEL +MAkGA1UEBhMCQk0xGTAXBgNVBAoTEFF1b1ZhZGlzIExpbWl0ZWQxGzAZBgNVBAMT +ElF1b1ZhZGlzIFJvb3QgQ0EgMoICBQkwDQYJKoZIhvcNAQEFBQADggIBAD4KFk2f +BluornFdLwUvZ+YTRYPENvbzwCYMDbVHZF34tHLJRqUDGCdViXh9duqWNIAXINzn +g/iN/Ae42l9NLmeyhP3ZRPx3UIHmfLTJDQtyU/h2BwdBR5YM++CCJpNVjP4iH2Bl +fF/nJrP3MpCYUNQ3cVX2kiF495V5+vgtJodmVjB3pjd4M1IQWK4/YY7yarHvGH5K +WWPKjaJW1acvvFYfzznB4vsKqBUsfU16Y8Zsl0Q80m/DShcK+JDSV6IZUaUtl0Ha +B0+pUNqQjZRG4T7wlP0QADj1O+hA4bRuVhogzG9Yje0uRY/W6ZM/57Es3zrWIozc +hLsib9D45MY56QSIPMO661V6bYCZJPVsAfv4l7CUW+v90m/xd2gNNWQjrLhVoQPR +TUIZ3Ph1WVaj+ahJefivDrkRoHy3au000LYmYjgahwz46P0u05B/B5EqHdZ+XIWD +mbA4CD/pXvk1B+TJYm5Xf6dQlfe6yJvmjqIBxdZmv3lh8zwc4bmCXF2gw+nYSL0Z +ohEUGW6yhhtoPkg3Goi3XZZenMfvJ2II4pEZXNLxId26F0KCl3GBUzGpn/Z9Yr9y +4aOTHcyKJloJONDO1w2AFrR4pTqHTI2KpdVGl/IsELm8VCLAAVBpQ570su9t+Oza +8eOx79+Rj1QqCyXBJhnEUhAFZdWCEOrCMc0u +-----END CERTIFICATE----- + +# Issuer: CN=QuoVadis Root CA 3 O=QuoVadis Limited +# Subject: CN=QuoVadis Root CA 3 O=QuoVadis Limited +# Label: "QuoVadis Root CA 3" +# Serial: 1478 +# MD5 Fingerprint: 31:85:3c:62:94:97:63:b9:aa:fd:89:4e:af:6f:e0:cf +# SHA1 Fingerprint: 1f:49:14:f7:d8:74:95:1d:dd:ae:02:c0:be:fd:3a:2d:82:75:51:85 +# SHA256 Fingerprint: 18:f1:fc:7f:20:5d:f8:ad:dd:eb:7f:e0:07:dd:57:e3:af:37:5a:9c:4d:8d:73:54:6b:f4:f1:fe:d1:e1:8d:35 +-----BEGIN CERTIFICATE----- +MIIGnTCCBIWgAwIBAgICBcYwDQYJKoZIhvcNAQEFBQAwRTELMAkGA1UEBhMCQk0x +GTAXBgNVBAoTEFF1b1ZhZGlzIExpbWl0ZWQxGzAZBgNVBAMTElF1b1ZhZGlzIFJv +b3QgQ0EgMzAeFw0wNjExMjQxOTExMjNaFw0zMTExMjQxOTA2NDRaMEUxCzAJBgNV +BAYTAkJNMRkwFwYDVQQKExBRdW9WYWRpcyBMaW1pdGVkMRswGQYDVQQDExJRdW9W +YWRpcyBSb290IENBIDMwggIiMA0GCSqGSIb3DQEBAQUAA4ICDwAwggIKAoICAQDM +V0IWVJzmmNPTTe7+7cefQzlKZbPoFog02w1ZkXTPkrgEQK0CSzGrvI2RaNggDhoB +4hp7Thdd4oq3P5kazethq8Jlph+3t723j/z9cI8LoGe+AaJZz3HmDyl2/7FWeUUr +H556VOijKTVopAFPD6QuN+8bv+OPEKhyq1hX51SGyMnzW9os2l2ObjyjPtr7guXd +8lyyBTNvijbO0BNO/79KDDRMpsMhvVAEVeuxu537RR5kFd5VAYwCdrXLoT9Cabwv +vWhDFlaJKjdhkf2mrk7AyxRllDdLkgbvBNDInIjbC3uBr7E9KsRlOni27tyAsdLT +mZw67mtaa7ONt9XOnMK+pUsvFrGeaDsGb659n/je7Mwpp5ijJUMv7/FfJuGITfhe +btfZFG4ZM2mnO4SJk8RTVROhUXhA+LjJou57ulJCg54U7QVSWllWp5f8nT8KKdjc +T5EOE7zelaTfi5m+rJsziO+1ga8bxiJTyPbH7pcUsMV8eFLI8M5ud2CEpukqdiDt +WAEXMJPpGovgc2PZapKUSU60rUqFxKMiMPwJ7Wgic6aIDFUhWMXhOp8q3crhkODZ +c6tsgLjoC2SToJyMGf+z0gzskSaHirOi4XCPLArlzW1oUevaPwV/izLmE1xr/l9A +4iLItLRkT9a6fUg+qGkM17uGcclzuD87nSVL2v9A6wIDAQABo4IBlTCCAZEwDwYD +VR0TAQH/BAUwAwEB/zCB4QYDVR0gBIHZMIHWMIHTBgkrBgEEAb5YAAMwgcUwgZMG +CCsGAQUFBwICMIGGGoGDQW55IHVzZSBvZiB0aGlzIENlcnRpZmljYXRlIGNvbnN0 +aXR1dGVzIGFjY2VwdGFuY2Ugb2YgdGhlIFF1b1ZhZGlzIFJvb3QgQ0EgMyBDZXJ0 +aWZpY2F0ZSBQb2xpY3kgLyBDZXJ0aWZpY2F0aW9uIFByYWN0aWNlIFN0YXRlbWVu +dC4wLQYIKwYBBQUHAgEWIWh0dHA6Ly93d3cucXVvdmFkaXNnbG9iYWwuY29tL2Nw +czALBgNVHQ8EBAMCAQYwHQYDVR0OBBYEFPLAE+CCQz777i9nMpY1XNu4ywLQMG4G +A1UdIwRnMGWAFPLAE+CCQz777i9nMpY1XNu4ywLQoUmkRzBFMQswCQYDVQQGEwJC +TTEZMBcGA1UEChMQUXVvVmFkaXMgTGltaXRlZDEbMBkGA1UEAxMSUXVvVmFkaXMg +Um9vdCBDQSAzggIFxjANBgkqhkiG9w0BAQUFAAOCAgEAT62gLEz6wPJv92ZVqyM0 +7ucp2sNbtrCD2dDQ4iH782CnO11gUyeim/YIIirnv6By5ZwkajGxkHon24QRiSem +d1o417+shvzuXYO8BsbRd2sPbSQvS3pspweWyuOEn62Iix2rFo1bZhfZFvSLgNLd ++LJ2w/w4E6oM3kJpK27zPOuAJ9v1pkQNn1pVWQvVDVJIxa6f8i+AxeoyUDUSly7B +4f/xI4hROJ/yZlZ25w9Rl6VSDE1JUZU2Pb+iSwwQHYaZTKrzchGT5Or2m9qoXadN +t54CrnMAyNojA+j56hl0YgCUyyIgvpSnWbWCar6ZeXqp8kokUvd0/bpO5qgdAm6x +DYBEwa7TIzdfu4V8K5Iu6H6li92Z4b8nby1dqnuH/grdS/yO9SbkbnBCbjPsMZ57 +k8HkyWkaPcBrTiJt7qtYTcbQQcEr6k8Sh17rRdhs9ZgC06DYVYoGmRmioHfRMJ6s +zHXug/WwYjnPbFfiTNKRCw51KBuav/0aQ/HKd/s7j2G4aSgWQgRecCocIdiP4b0j +Wy10QJLZYxkNc91pvGJHvOB0K7Lrfb5BG7XARsWhIstfTsEokt4YutUqKLsRixeT +mJlglFwjz1onl14LBQaTNx47aTbrqZ5hHY8y2o4M1nQ+ewkk2gF3R8Q7zTSMmfXK +4SVhM7JZG+Ju1zdXtg2pEto= +-----END CERTIFICATE----- + +# Issuer: O=SECOM Trust.net OU=Security Communication RootCA1 +# Subject: O=SECOM Trust.net OU=Security Communication RootCA1 +# Label: "Security Communication Root CA" +# Serial: 0 +# MD5 Fingerprint: f1:bc:63:6a:54:e0:b5:27:f5:cd:e7:1a:e3:4d:6e:4a +# SHA1 Fingerprint: 36:b1:2b:49:f9:81:9e:d7:4c:9e:bc:38:0f:c6:56:8f:5d:ac:b2:f7 +# SHA256 Fingerprint: e7:5e:72:ed:9f:56:0e:ec:6e:b4:80:00:73:a4:3f:c3:ad:19:19:5a:39:22:82:01:78:95:97:4a:99:02:6b:6c +-----BEGIN CERTIFICATE----- +MIIDWjCCAkKgAwIBAgIBADANBgkqhkiG9w0BAQUFADBQMQswCQYDVQQGEwJKUDEY +MBYGA1UEChMPU0VDT00gVHJ1c3QubmV0MScwJQYDVQQLEx5TZWN1cml0eSBDb21t +dW5pY2F0aW9uIFJvb3RDQTEwHhcNMDMwOTMwMDQyMDQ5WhcNMjMwOTMwMDQyMDQ5 +WjBQMQswCQYDVQQGEwJKUDEYMBYGA1UEChMPU0VDT00gVHJ1c3QubmV0MScwJQYD +VQQLEx5TZWN1cml0eSBDb21tdW5pY2F0aW9uIFJvb3RDQTEwggEiMA0GCSqGSIb3 +DQEBAQUAA4IBDwAwggEKAoIBAQCzs/5/022x7xZ8V6UMbXaKL0u/ZPtM7orw8yl8 +9f/uKuDp6bpbZCKamm8sOiZpUQWZJtzVHGpxxpp9Hp3dfGzGjGdnSj74cbAZJ6kJ +DKaVv0uMDPpVmDvY6CKhS3E4eayXkmmziX7qIWgGmBSWh9JhNrxtJ1aeV+7AwFb9 +Ms+k2Y7CI9eNqPPYJayX5HA49LY6tJ07lyZDo6G8SVlyTCMwhwFY9k6+HGhWZq/N +QV3Is00qVUarH9oe4kA92819uZKAnDfdDJZkndwi92SL32HeFZRSFaB9UslLqCHJ +xrHty8OVYNEP8Ktw+N/LTX7s1vqr2b1/VPKl6Xn62dZ2JChzAgMBAAGjPzA9MB0G +A1UdDgQWBBSgc0mZaNyFW2XjmygvV5+9M7wHSDALBgNVHQ8EBAMCAQYwDwYDVR0T +AQH/BAUwAwEB/zANBgkqhkiG9w0BAQUFAAOCAQEAaECpqLvkT115swW1F7NgE+vG +kl3g0dNq/vu+m22/xwVtWSDEHPC32oRYAmP6SBbvT6UL90qY8j+eG61Ha2POCEfr +Uj94nK9NrvjVT8+amCoQQTlSxN3Zmw7vkwGusi7KaEIkQmywszo+zenaSMQVy+n5 +Bw+SUEmK3TGXX8npN6o7WWWXlDLJs58+OmJYxUmtYg5xpTKqL8aJdkNAExNnPaJU +JRDL8Try2frbSVa7pv6nQTXD4IhhyYjH3zYQIphZ6rBK+1YWc26sTfcioU+tHXot +RSflMMFe8toTyyVCUZVHA4xsIcx0Qu1T/zOLjw9XARYvz6buyXAiFL39vmwLAw== +-----END CERTIFICATE----- + +# Issuer: CN=Sonera Class2 CA O=Sonera +# Subject: CN=Sonera Class2 CA O=Sonera +# Label: "Sonera Class 2 Root CA" +# Serial: 29 +# MD5 Fingerprint: a3:ec:75:0f:2e:88:df:fa:48:01:4e:0b:5c:48:6f:fb +# SHA1 Fingerprint: 37:f7:6d:e6:07:7c:90:c5:b1:3e:93:1a:b7:41:10:b4:f2:e4:9a:27 +# SHA256 Fingerprint: 79:08:b4:03:14:c1:38:10:0b:51:8d:07:35:80:7f:fb:fc:f8:51:8a:00:95:33:71:05:ba:38:6b:15:3d:d9:27 +-----BEGIN CERTIFICATE----- +MIIDIDCCAgigAwIBAgIBHTANBgkqhkiG9w0BAQUFADA5MQswCQYDVQQGEwJGSTEP +MA0GA1UEChMGU29uZXJhMRkwFwYDVQQDExBTb25lcmEgQ2xhc3MyIENBMB4XDTAx +MDQwNjA3Mjk0MFoXDTIxMDQwNjA3Mjk0MFowOTELMAkGA1UEBhMCRkkxDzANBgNV +BAoTBlNvbmVyYTEZMBcGA1UEAxMQU29uZXJhIENsYXNzMiBDQTCCASIwDQYJKoZI +hvcNAQEBBQADggEPADCCAQoCggEBAJAXSjWdyvANlsdE+hY3/Ei9vX+ALTU74W+o +Z6m/AxxNjG8yR9VBaKQTBME1DJqEQ/xcHf+Js+gXGM2RX/uJ4+q/Tl18GybTdXnt +5oTjV+WtKcT0OijnpXuENmmz/V52vaMtmdOQTiMofRhj8VQ7Jp12W5dCsv+u8E7s +3TmVToMGf+dJQMjFAbJUWmYdPfz56TwKnoG4cPABi+QjVHzIrviQHgCWctRUz2Ej +vOr7nQKV0ba5cTppCD8PtOFCx4j1P5iop7oc4HFx71hXgVB6XGt0Rg6DA5jDjqhu +8nYybieDwnPz3BjotJPqdURrBGAgcVeHnfO+oJAjPYok4doh28MCAwEAAaMzMDEw +DwYDVR0TAQH/BAUwAwEB/zARBgNVHQ4ECgQISqCqWITTXjwwCwYDVR0PBAQDAgEG +MA0GCSqGSIb3DQEBBQUAA4IBAQBazof5FnIVV0sd2ZvnoiYw7JNn39Yt0jSv9zil +zqsWuasvfDXLrNAPtEwr/IDva4yRXzZ299uzGxnq9LIR/WFxRL8oszodv7ND6J+/ +3DEIcbCdjdY0RzKQxmUk96BKfARzjzlvF4xytb1LyHr4e4PDKE6cCepnP7JnBBvD +FNr450kkkdAdavphOe9r5yF1BgfYErQhIHBCcYHaPJo2vqZbDWpsmh+Re/n570K6 +Tk6ezAyNlNzZRZxe7EJQY670XcSxEtzKO6gunRRaBXW37Ndj4ro1tgQIkejanZz2 +ZrUYrAqmVCY0M9IbwdR/GjqOC6oybtv8TyWf2TLHllpwrN9M +-----END CERTIFICATE----- + +# Issuer: CN=XRamp Global Certification Authority O=XRamp Security Services Inc OU=www.xrampsecurity.com +# Subject: CN=XRamp Global Certification Authority O=XRamp Security Services Inc OU=www.xrampsecurity.com +# Label: "XRamp Global CA Root" +# Serial: 107108908803651509692980124233745014957 +# MD5 Fingerprint: a1:0b:44:b3:ca:10:d8:00:6e:9d:0f:d8:0f:92:0a:d1 +# SHA1 Fingerprint: b8:01:86:d1:eb:9c:86:a5:41:04:cf:30:54:f3:4c:52:b7:e5:58:c6 +# SHA256 Fingerprint: ce:cd:dc:90:50:99:d8:da:df:c5:b1:d2:09:b7:37:cb:e2:c1:8c:fb:2c:10:c0:ff:0b:cf:0d:32:86:fc:1a:a2 +-----BEGIN CERTIFICATE----- +MIIEMDCCAxigAwIBAgIQUJRs7Bjq1ZxN1ZfvdY+grTANBgkqhkiG9w0BAQUFADCB +gjELMAkGA1UEBhMCVVMxHjAcBgNVBAsTFXd3dy54cmFtcHNlY3VyaXR5LmNvbTEk +MCIGA1UEChMbWFJhbXAgU2VjdXJpdHkgU2VydmljZXMgSW5jMS0wKwYDVQQDEyRY +UmFtcCBHbG9iYWwgQ2VydGlmaWNhdGlvbiBBdXRob3JpdHkwHhcNMDQxMTAxMTcx +NDA0WhcNMzUwMTAxMDUzNzE5WjCBgjELMAkGA1UEBhMCVVMxHjAcBgNVBAsTFXd3 +dy54cmFtcHNlY3VyaXR5LmNvbTEkMCIGA1UEChMbWFJhbXAgU2VjdXJpdHkgU2Vy +dmljZXMgSW5jMS0wKwYDVQQDEyRYUmFtcCBHbG9iYWwgQ2VydGlmaWNhdGlvbiBB +dXRob3JpdHkwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQCYJB69FbS6 +38eMpSe2OAtp87ZOqCwuIR1cRN8hXX4jdP5efrRKt6atH67gBhbim1vZZ3RrXYCP +KZ2GG9mcDZhtdhAoWORlsH9KmHmf4MMxfoArtYzAQDsRhtDLooY2YKTVMIJt2W7Q +DxIEM5dfT2Fa8OT5kavnHTu86M/0ay00fOJIYRyO82FEzG+gSqmUsE3a56k0enI4 +qEHMPJQRfevIpoy3hsvKMzvZPTeL+3o+hiznc9cKV6xkmxnr9A8ECIqsAxcZZPRa +JSKNNCyy9mgdEm3Tih4U2sSPpuIjhdV6Db1q4Ons7Be7QhtnqiXtRYMh/MHJfNVi +PvryxS3T/dRlAgMBAAGjgZ8wgZwwEwYJKwYBBAGCNxQCBAYeBABDAEEwCwYDVR0P +BAQDAgGGMA8GA1UdEwEB/wQFMAMBAf8wHQYDVR0OBBYEFMZPoj0GY4QJnM5i5ASs +jVy16bYbMDYGA1UdHwQvMC0wK6ApoCeGJWh0dHA6Ly9jcmwueHJhbXBzZWN1cml0 +eS5jb20vWEdDQS5jcmwwEAYJKwYBBAGCNxUBBAMCAQEwDQYJKoZIhvcNAQEFBQAD +ggEBAJEVOQMBG2f7Shz5CmBbodpNl2L5JFMn14JkTpAuw0kbK5rc/Kh4ZzXxHfAR +vbdI4xD2Dd8/0sm2qlWkSLoC295ZLhVbO50WfUfXN+pfTXYSNrsf16GBBEYgoyxt +qZ4Bfj8pzgCT3/3JknOJiWSe5yvkHJEs0rnOfc5vMZnT5r7SHpDwCRR5XCOrTdLa +IR9NmXmd4c8nnxCbHIgNsIpkQTG4DmyQJKSbXHGPurt+HBvbaoAPIbzp26a3QPSy +i6mx5O+aGtA9aZnuqCij4Tyz8LIRnM98QObd50N9otg6tamN8jSZxNQQ4Qb9CYQQ +O+7ETPTsJ3xCwnR8gooJybQDJbw= +-----END CERTIFICATE----- + +# Issuer: O=The Go Daddy Group, Inc. OU=Go Daddy Class 2 Certification Authority +# Subject: O=The Go Daddy Group, Inc. OU=Go Daddy Class 2 Certification Authority +# Label: "Go Daddy Class 2 CA" +# Serial: 0 +# MD5 Fingerprint: 91:de:06:25:ab:da:fd:32:17:0c:bb:25:17:2a:84:67 +# SHA1 Fingerprint: 27:96:ba:e6:3f:18:01:e2:77:26:1b:a0:d7:77:70:02:8f:20:ee:e4 +# SHA256 Fingerprint: c3:84:6b:f2:4b:9e:93:ca:64:27:4c:0e:c6:7c:1e:cc:5e:02:4f:fc:ac:d2:d7:40:19:35:0e:81:fe:54:6a:e4 +-----BEGIN CERTIFICATE----- +MIIEADCCAuigAwIBAgIBADANBgkqhkiG9w0BAQUFADBjMQswCQYDVQQGEwJVUzEh +MB8GA1UEChMYVGhlIEdvIERhZGR5IEdyb3VwLCBJbmMuMTEwLwYDVQQLEyhHbyBE +YWRkeSBDbGFzcyAyIENlcnRpZmljYXRpb24gQXV0aG9yaXR5MB4XDTA0MDYyOTE3 +MDYyMFoXDTM0MDYyOTE3MDYyMFowYzELMAkGA1UEBhMCVVMxITAfBgNVBAoTGFRo +ZSBHbyBEYWRkeSBHcm91cCwgSW5jLjExMC8GA1UECxMoR28gRGFkZHkgQ2xhc3Mg +MiBDZXJ0aWZpY2F0aW9uIEF1dGhvcml0eTCCASAwDQYJKoZIhvcNAQEBBQADggEN +ADCCAQgCggEBAN6d1+pXGEmhW+vXX0iG6r7d/+TvZxz0ZWizV3GgXne77ZtJ6XCA +PVYYYwhv2vLM0D9/AlQiVBDYsoHUwHU9S3/Hd8M+eKsaA7Ugay9qK7HFiH7Eux6w +wdhFJ2+qN1j3hybX2C32qRe3H3I2TqYXP2WYktsqbl2i/ojgC95/5Y0V4evLOtXi +EqITLdiOr18SPaAIBQi2XKVlOARFmR6jYGB0xUGlcmIbYsUfb18aQr4CUWWoriMY +avx4A6lNf4DD+qta/KFApMoZFv6yyO9ecw3ud72a9nmYvLEHZ6IVDd2gWMZEewo+ +YihfukEHU1jPEX44dMX4/7VpkI+EdOqXG68CAQOjgcAwgb0wHQYDVR0OBBYEFNLE +sNKR1EwRcbNhyz2h/t2oatTjMIGNBgNVHSMEgYUwgYKAFNLEsNKR1EwRcbNhyz2h +/t2oatTjoWekZTBjMQswCQYDVQQGEwJVUzEhMB8GA1UEChMYVGhlIEdvIERhZGR5 +IEdyb3VwLCBJbmMuMTEwLwYDVQQLEyhHbyBEYWRkeSBDbGFzcyAyIENlcnRpZmlj +YXRpb24gQXV0aG9yaXR5ggEAMAwGA1UdEwQFMAMBAf8wDQYJKoZIhvcNAQEFBQAD +ggEBADJL87LKPpH8EsahB4yOd6AzBhRckB4Y9wimPQoZ+YeAEW5p5JYXMP80kWNy +OO7MHAGjHZQopDH2esRU1/blMVgDoszOYtuURXO1v0XJJLXVggKtI3lpjbi2Tc7P +TMozI+gciKqdi0FuFskg5YmezTvacPd+mSYgFFQlq25zheabIZ0KbIIOqPjCDPoQ +HmyW74cNxA9hi63ugyuV+I6ShHI56yDqg+2DzZduCLzrTia2cyvk0/ZM/iZx4mER +dEr/VxqHD3VILs9RaRegAhJhldXRQLIQTO7ErBBDpqWeCtWVYpoNz4iCxTIM5Cuf +ReYNnyicsbkqWletNw+vHX/bvZ8= +-----END CERTIFICATE----- + +# Issuer: O=Starfield Technologies, Inc. OU=Starfield Class 2 Certification Authority +# Subject: O=Starfield Technologies, Inc. OU=Starfield Class 2 Certification Authority +# Label: "Starfield Class 2 CA" +# Serial: 0 +# MD5 Fingerprint: 32:4a:4b:bb:c8:63:69:9b:be:74:9a:c6:dd:1d:46:24 +# SHA1 Fingerprint: ad:7e:1c:28:b0:64:ef:8f:60:03:40:20:14:c3:d0:e3:37:0e:b5:8a +# SHA256 Fingerprint: 14:65:fa:20:53:97:b8:76:fa:a6:f0:a9:95:8e:55:90:e4:0f:cc:7f:aa:4f:b7:c2:c8:67:75:21:fb:5f:b6:58 +-----BEGIN CERTIFICATE----- +MIIEDzCCAvegAwIBAgIBADANBgkqhkiG9w0BAQUFADBoMQswCQYDVQQGEwJVUzEl +MCMGA1UEChMcU3RhcmZpZWxkIFRlY2hub2xvZ2llcywgSW5jLjEyMDAGA1UECxMp +U3RhcmZpZWxkIENsYXNzIDIgQ2VydGlmaWNhdGlvbiBBdXRob3JpdHkwHhcNMDQw +NjI5MTczOTE2WhcNMzQwNjI5MTczOTE2WjBoMQswCQYDVQQGEwJVUzElMCMGA1UE +ChMcU3RhcmZpZWxkIFRlY2hub2xvZ2llcywgSW5jLjEyMDAGA1UECxMpU3RhcmZp +ZWxkIENsYXNzIDIgQ2VydGlmaWNhdGlvbiBBdXRob3JpdHkwggEgMA0GCSqGSIb3 +DQEBAQUAA4IBDQAwggEIAoIBAQC3Msj+6XGmBIWtDBFk385N78gDGIc/oav7PKaf +8MOh2tTYbitTkPskpD6E8J7oX+zlJ0T1KKY/e97gKvDIr1MvnsoFAZMej2YcOadN ++lq2cwQlZut3f+dZxkqZJRRU6ybH838Z1TBwj6+wRir/resp7defqgSHo9T5iaU0 +X9tDkYI22WY8sbi5gv2cOj4QyDvvBmVmepsZGD3/cVE8MC5fvj13c7JdBmzDI1aa +K4UmkhynArPkPw2vCHmCuDY96pzTNbO8acr1zJ3o/WSNF4Azbl5KXZnJHoe0nRrA +1W4TNSNe35tfPe/W93bC6j67eA0cQmdrBNj41tpvi/JEoAGrAgEDo4HFMIHCMB0G +A1UdDgQWBBS/X7fRzt0fhvRbVazc1xDCDqmI5zCBkgYDVR0jBIGKMIGHgBS/X7fR +zt0fhvRbVazc1xDCDqmI56FspGowaDELMAkGA1UEBhMCVVMxJTAjBgNVBAoTHFN0 +YXJmaWVsZCBUZWNobm9sb2dpZXMsIEluYy4xMjAwBgNVBAsTKVN0YXJmaWVsZCBD +bGFzcyAyIENlcnRpZmljYXRpb24gQXV0aG9yaXR5ggEAMAwGA1UdEwQFMAMBAf8w +DQYJKoZIhvcNAQEFBQADggEBAAWdP4id0ckaVaGsafPzWdqbAYcaT1epoXkJKtv3 +L7IezMdeatiDh6GX70k1PncGQVhiv45YuApnP+yz3SFmH8lU+nLMPUxA2IGvd56D +eruix/U0F47ZEUD0/CwqTRV/p2JdLiXTAAsgGh1o+Re49L2L7ShZ3U0WixeDyLJl +xy16paq8U4Zt3VekyvggQQto8PT7dL5WXXp59fkdheMtlb71cZBDzI0fmgAKhynp +VSJYACPq4xJDKVtHCN2MQWplBqjlIapBtJUhlbl90TSrE9atvNziPTnNvT51cKEY +WQPJIrSPnNVeKtelttQKbfi3QBFGmh95DmK/D5fs4C8fF5Q= +-----END CERTIFICATE----- + +# Issuer: O=Government Root Certification Authority +# Subject: O=Government Root Certification Authority +# Label: "Taiwan GRCA" +# Serial: 42023070807708724159991140556527066870 +# MD5 Fingerprint: 37:85:44:53:32:45:1f:20:f0:f3:95:e1:25:c4:43:4e +# SHA1 Fingerprint: f4:8b:11:bf:de:ab:be:94:54:20:71:e6:41:de:6b:be:88:2b:40:b9 +# SHA256 Fingerprint: 76:00:29:5e:ef:e8:5b:9e:1f:d6:24:db:76:06:2a:aa:ae:59:81:8a:54:d2:77:4c:d4:c0:b2:c0:11:31:e1:b3 +-----BEGIN CERTIFICATE----- +MIIFcjCCA1qgAwIBAgIQH51ZWtcvwgZEpYAIaeNe9jANBgkqhkiG9w0BAQUFADA/ +MQswCQYDVQQGEwJUVzEwMC4GA1UECgwnR292ZXJubWVudCBSb290IENlcnRpZmlj +YXRpb24gQXV0aG9yaXR5MB4XDTAyMTIwNTEzMjMzM1oXDTMyMTIwNTEzMjMzM1ow +PzELMAkGA1UEBhMCVFcxMDAuBgNVBAoMJ0dvdmVybm1lbnQgUm9vdCBDZXJ0aWZp +Y2F0aW9uIEF1dGhvcml0eTCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIB +AJoluOzMonWoe/fOW1mKydGGEghU7Jzy50b2iPN86aXfTEc2pBsBHH8eV4qNw8XR +IePaJD9IK/ufLqGU5ywck9G/GwGHU5nOp/UKIXZ3/6m3xnOUT0b3EEk3+qhZSV1q +gQdW8or5BtD3cCJNtLdBuTK4sfCxw5w/cP1T3YGq2GN49thTbqGsaoQkclSGxtKy +yhwOeYHWtXBiCAEuTk8O1RGvqa/lmr/czIdtJuTJV6L7lvnM4T9TjGxMfptTCAts +F/tnyMKtsc2AtJfcdgEWFelq16TheEfOhtX7MfP6Mb40qij7cEwdScevLJ1tZqa2 +jWR+tSBqnTuBto9AAGdLiYa4zGX+FVPpBMHWXx1E1wovJ5pGfaENda1UhhXcSTvx +ls4Pm6Dso3pdvtUqdULle96ltqqvKKyskKw4t9VoNSZ63Pc78/1Fm9G7Q3hub/FC +VGqY8A2tl+lSXunVanLeavcbYBT0peS2cWeqH+riTcFCQP5nRhc4L0c/cZyu5SHK +YS1tB6iEfC3uUSXxY5Ce/eFXiGvviiNtsea9P63RPZYLhY3Naye7twWb7LuRqQoH +EgKXTiCQ8P8NHuJBO9NAOueNXdpm5AKwB1KYXA6OM5zCppX7VRluTI6uSw+9wThN +Xo+EHWbNxWCWtFJaBYmOlXqYwZE8lSOyDvR5tMl8wUohAgMBAAGjajBoMB0GA1Ud +DgQWBBTMzO/MKWCkO7GStjz6MmKPrCUVOzAMBgNVHRMEBTADAQH/MDkGBGcqBwAE +MTAvMC0CAQAwCQYFKw4DAhoFADAHBgVnKgMAAAQUA5vwIhP/lSg209yewDL7MTqK +UWUwDQYJKoZIhvcNAQEFBQADggIBAECASvomyc5eMN1PhnR2WPWus4MzeKR6dBcZ +TulStbngCnRiqmjKeKBMmo4sIy7VahIkv9Ro04rQ2JyftB8M3jh+Vzj8jeJPXgyf +qzvS/3WXy6TjZwj/5cAWtUgBfen5Cv8b5Wppv3ghqMKnI6mGq3ZW6A4M9hPdKmaK +ZEk9GhiHkASfQlK3T8v+R0F2Ne//AHY2RTKbxkaFXeIksB7jSJaYV0eUVXoPQbFE +JPPB/hprv4j9wabak2BegUqZIJxIZhm1AHlUD7gsL0u8qV1bYH+Mh6XgUmMqvtg7 +hUAV/h62ZT/FS9p+tXo1KaMuephgIqP0fSdOLeq0dDzpD6QzDxARvBMB1uUO07+1 +EqLhRSPAzAhuYbeJq4PjJB7mXQfnHyA+z2fI56wwbSdLaG5LKlwCCDTb+HbkZ6Mm +nD+iMsJKxYEYMRBWqoTvLQr/uB930r+lWKBi5NdLkXWNiYCYfm3LU05er/ayl4WX +udpVBrkk7tfGOB5jGxI7leFYrPLfhNVfmS8NVVvmONsuP3LpSIXLuykTjx44Vbnz +ssQwmSNOXfJIoRIM3BKQCZBUkQM8R+XVyWXgt0t97EfTsws+rZ7QdAAO671RrcDe +LMDDav7v3Aun+kbfYNucpllQdSNpc5Oy+fwC00fmcc4QAu4njIT/rEUNE1yDMuAl +pYYsfPQS +-----END CERTIFICATE----- + +# Issuer: CN=DigiCert Assured ID Root CA O=DigiCert Inc OU=www.digicert.com +# Subject: CN=DigiCert Assured ID Root CA O=DigiCert Inc OU=www.digicert.com +# Label: "DigiCert Assured ID Root CA" +# Serial: 17154717934120587862167794914071425081 +# MD5 Fingerprint: 87:ce:0b:7b:2a:0e:49:00:e1:58:71:9b:37:a8:93:72 +# SHA1 Fingerprint: 05:63:b8:63:0d:62:d7:5a:bb:c8:ab:1e:4b:df:b5:a8:99:b2:4d:43 +# SHA256 Fingerprint: 3e:90:99:b5:01:5e:8f:48:6c:00:bc:ea:9d:11:1e:e7:21:fa:ba:35:5a:89:bc:f1:df:69:56:1e:3d:c6:32:5c +-----BEGIN CERTIFICATE----- +MIIDtzCCAp+gAwIBAgIQDOfg5RfYRv6P5WD8G/AwOTANBgkqhkiG9w0BAQUFADBl +MQswCQYDVQQGEwJVUzEVMBMGA1UEChMMRGlnaUNlcnQgSW5jMRkwFwYDVQQLExB3 +d3cuZGlnaWNlcnQuY29tMSQwIgYDVQQDExtEaWdpQ2VydCBBc3N1cmVkIElEIFJv +b3QgQ0EwHhcNMDYxMTEwMDAwMDAwWhcNMzExMTEwMDAwMDAwWjBlMQswCQYDVQQG +EwJVUzEVMBMGA1UEChMMRGlnaUNlcnQgSW5jMRkwFwYDVQQLExB3d3cuZGlnaWNl +cnQuY29tMSQwIgYDVQQDExtEaWdpQ2VydCBBc3N1cmVkIElEIFJvb3QgQ0EwggEi +MA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQCtDhXO5EOAXLGH87dg+XESpa7c +JpSIqvTO9SA5KFhgDPiA2qkVlTJhPLWxKISKityfCgyDF3qPkKyK53lTXDGEKvYP +mDI2dsze3Tyoou9q+yHyUmHfnyDXH+Kx2f4YZNISW1/5WBg1vEfNoTb5a3/UsDg+ +wRvDjDPZ2C8Y/igPs6eD1sNuRMBhNZYW/lmci3Zt1/GiSw0r/wty2p5g0I6QNcZ4 +VYcgoc/lbQrISXwxmDNsIumH0DJaoroTghHtORedmTpyoeb6pNnVFzF1roV9Iq4/ +AUaG9ih5yLHa5FcXxH4cDrC0kqZWs72yl+2qp/C3xag/lRbQ/6GW6whfGHdPAgMB +AAGjYzBhMA4GA1UdDwEB/wQEAwIBhjAPBgNVHRMBAf8EBTADAQH/MB0GA1UdDgQW +BBRF66Kv9JLLgjEtUYunpyGd823IDzAfBgNVHSMEGDAWgBRF66Kv9JLLgjEtUYun +pyGd823IDzANBgkqhkiG9w0BAQUFAAOCAQEAog683+Lt8ONyc3pklL/3cmbYMuRC +dWKuh+vy1dneVrOfzM4UKLkNl2BcEkxY5NM9g0lFWJc1aRqoR+pWxnmrEthngYTf +fwk8lOa4JiwgvT2zKIn3X/8i4peEH+ll74fg38FnSbNd67IJKusm7Xi+fT8r87cm +NW1fiQG2SVufAQWbqz0lwcy2f8Lxb4bG+mRo64EtlOtCt/qMHt1i8b5QZ7dsvfPx +H2sMNgcWfzd8qVttevESRmCD1ycEvkvOl77DZypoEd+A5wwzZr8TDRRu838fYxAe ++o0bJW1sj6W3YQGx0qMmoRBxna3iw/nDmVG3KwcIzi7mULKn+gpFL6Lw8g== +-----END CERTIFICATE----- + +# Issuer: CN=DigiCert Global Root CA O=DigiCert Inc OU=www.digicert.com +# Subject: CN=DigiCert Global Root CA O=DigiCert Inc OU=www.digicert.com +# Label: "DigiCert Global Root CA" +# Serial: 10944719598952040374951832963794454346 +# MD5 Fingerprint: 79:e4:a9:84:0d:7d:3a:96:d7:c0:4f:e2:43:4c:89:2e +# SHA1 Fingerprint: a8:98:5d:3a:65:e5:e5:c4:b2:d7:d6:6d:40:c6:dd:2f:b1:9c:54:36 +# SHA256 Fingerprint: 43:48:a0:e9:44:4c:78:cb:26:5e:05:8d:5e:89:44:b4:d8:4f:96:62:bd:26:db:25:7f:89:34:a4:43:c7:01:61 +-----BEGIN CERTIFICATE----- +MIIDrzCCApegAwIBAgIQCDvgVpBCRrGhdWrJWZHHSjANBgkqhkiG9w0BAQUFADBh +MQswCQYDVQQGEwJVUzEVMBMGA1UEChMMRGlnaUNlcnQgSW5jMRkwFwYDVQQLExB3 +d3cuZGlnaWNlcnQuY29tMSAwHgYDVQQDExdEaWdpQ2VydCBHbG9iYWwgUm9vdCBD +QTAeFw0wNjExMTAwMDAwMDBaFw0zMTExMTAwMDAwMDBaMGExCzAJBgNVBAYTAlVT +MRUwEwYDVQQKEwxEaWdpQ2VydCBJbmMxGTAXBgNVBAsTEHd3dy5kaWdpY2VydC5j +b20xIDAeBgNVBAMTF0RpZ2lDZXJ0IEdsb2JhbCBSb290IENBMIIBIjANBgkqhkiG +9w0BAQEFAAOCAQ8AMIIBCgKCAQEA4jvhEXLeqKTTo1eqUKKPC3eQyaKl7hLOllsB +CSDMAZOnTjC3U/dDxGkAV53ijSLdhwZAAIEJzs4bg7/fzTtxRuLWZscFs3YnFo97 +nh6Vfe63SKMI2tavegw5BmV/Sl0fvBf4q77uKNd0f3p4mVmFaG5cIzJLv07A6Fpt +43C/dxC//AH2hdmoRBBYMql1GNXRor5H4idq9Joz+EkIYIvUX7Q6hL+hqkpMfT7P +T19sdl6gSzeRntwi5m3OFBqOasv+zbMUZBfHWymeMr/y7vrTC0LUq7dBMtoM1O/4 +gdW7jVg/tRvoSSiicNoxBN33shbyTApOB6jtSj1etX+jkMOvJwIDAQABo2MwYTAO +BgNVHQ8BAf8EBAMCAYYwDwYDVR0TAQH/BAUwAwEB/zAdBgNVHQ4EFgQUA95QNVbR +TLtm8KPiGxvDl7I90VUwHwYDVR0jBBgwFoAUA95QNVbRTLtm8KPiGxvDl7I90VUw +DQYJKoZIhvcNAQEFBQADggEBAMucN6pIExIK+t1EnE9SsPTfrgT1eXkIoyQY/Esr +hMAtudXH/vTBH1jLuG2cenTnmCmrEbXjcKChzUyImZOMkXDiqw8cvpOp/2PV5Adg +06O/nVsJ8dWO41P0jmP6P6fbtGbfYmbW0W5BjfIttep3Sp+dWOIrWcBAI+0tKIJF +PnlUkiaY4IBIqDfv8NZ5YBberOgOzW6sRBc4L0na4UU+Krk2U886UAb3LujEV0ls +YSEY1QSteDwsOoBrp+uvFRTp2InBuThs4pFsiv9kuXclVzDAGySj4dzp30d8tbQk +CAUw7C29C79Fv1C5qfPrmAESrciIxpg0X40KPMbp1ZWVbd4= +-----END CERTIFICATE----- + +# Issuer: CN=DigiCert High Assurance EV Root CA O=DigiCert Inc OU=www.digicert.com +# Subject: CN=DigiCert High Assurance EV Root CA O=DigiCert Inc OU=www.digicert.com +# Label: "DigiCert High Assurance EV Root CA" +# Serial: 3553400076410547919724730734378100087 +# MD5 Fingerprint: d4:74:de:57:5c:39:b2:d3:9c:85:83:c5:c0:65:49:8a +# SHA1 Fingerprint: 5f:b7:ee:06:33:e2:59:db:ad:0c:4c:9a:e6:d3:8f:1a:61:c7:dc:25 +# SHA256 Fingerprint: 74:31:e5:f4:c3:c1:ce:46:90:77:4f:0b:61:e0:54:40:88:3b:a9:a0:1e:d0:0b:a6:ab:d7:80:6e:d3:b1:18:cf +-----BEGIN CERTIFICATE----- +MIIDxTCCAq2gAwIBAgIQAqxcJmoLQJuPC3nyrkYldzANBgkqhkiG9w0BAQUFADBs +MQswCQYDVQQGEwJVUzEVMBMGA1UEChMMRGlnaUNlcnQgSW5jMRkwFwYDVQQLExB3 +d3cuZGlnaWNlcnQuY29tMSswKQYDVQQDEyJEaWdpQ2VydCBIaWdoIEFzc3VyYW5j +ZSBFViBSb290IENBMB4XDTA2MTExMDAwMDAwMFoXDTMxMTExMDAwMDAwMFowbDEL +MAkGA1UEBhMCVVMxFTATBgNVBAoTDERpZ2lDZXJ0IEluYzEZMBcGA1UECxMQd3d3 +LmRpZ2ljZXJ0LmNvbTErMCkGA1UEAxMiRGlnaUNlcnQgSGlnaCBBc3N1cmFuY2Ug +RVYgUm9vdCBDQTCCASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoCggEBAMbM5XPm ++9S75S0tMqbf5YE/yc0lSbZxKsPVlDRnogocsF9ppkCxxLeyj9CYpKlBWTrT3JTW +PNt0OKRKzE0lgvdKpVMSOO7zSW1xkX5jtqumX8OkhPhPYlG++MXs2ziS4wblCJEM +xChBVfvLWokVfnHoNb9Ncgk9vjo4UFt3MRuNs8ckRZqnrG0AFFoEt7oT61EKmEFB +Ik5lYYeBQVCmeVyJ3hlKV9Uu5l0cUyx+mM0aBhakaHPQNAQTXKFx01p8VdteZOE3 +hzBWBOURtCmAEvF5OYiiAhF8J2a3iLd48soKqDirCmTCv2ZdlYTBoSUeh10aUAsg +EsxBu24LUTi4S8sCAwEAAaNjMGEwDgYDVR0PAQH/BAQDAgGGMA8GA1UdEwEB/wQF +MAMBAf8wHQYDVR0OBBYEFLE+w2kD+L9HAdSYJhoIAu9jZCvDMB8GA1UdIwQYMBaA +FLE+w2kD+L9HAdSYJhoIAu9jZCvDMA0GCSqGSIb3DQEBBQUAA4IBAQAcGgaX3Nec +nzyIZgYIVyHbIUf4KmeqvxgydkAQV8GK83rZEWWONfqe/EW1ntlMMUu4kehDLI6z +eM7b41N5cdblIZQB2lWHmiRk9opmzN6cN82oNLFpmyPInngiK3BD41VHMWEZ71jF +hS9OMPagMRYjyOfiZRYzy78aG6A9+MpeizGLYAiJLQwGXFK3xPkKmNEVX58Svnw2 +Yzi9RKR/5CYrCsSXaQ3pjOLAEFe4yHYSkVXySGnYvCoCWw9E1CAx2/S6cCZdkGCe +vEsXCS+0yx5DaMkHJ8HSXPfqIbloEpw8nL+e/IBcm2PN7EeqJSdnoDfzAIJ9VNep ++OkuE6N36B9K +-----END CERTIFICATE----- + +# Issuer: CN=DST Root CA X3 O=Digital Signature Trust Co. +# Subject: CN=DST Root CA X3 O=Digital Signature Trust Co. +# Label: "DST Root CA X3" +# Serial: 91299735575339953335919266965803778155 +# MD5 Fingerprint: 41:03:52:dc:0f:f7:50:1b:16:f0:02:8e:ba:6f:45:c5 +# SHA1 Fingerprint: da:c9:02:4f:54:d8:f6:df:94:93:5f:b1:73:26:38:ca:6a:d7:7c:13 +# SHA256 Fingerprint: 06:87:26:03:31:a7:24:03:d9:09:f1:05:e6:9b:cf:0d:32:e1:bd:24:93:ff:c6:d9:20:6d:11:bc:d6:77:07:39 +-----BEGIN CERTIFICATE----- +MIIDSjCCAjKgAwIBAgIQRK+wgNajJ7qJMDmGLvhAazANBgkqhkiG9w0BAQUFADA/ +MSQwIgYDVQQKExtEaWdpdGFsIFNpZ25hdHVyZSBUcnVzdCBDby4xFzAVBgNVBAMT +DkRTVCBSb290IENBIFgzMB4XDTAwMDkzMDIxMTIxOVoXDTIxMDkzMDE0MDExNVow +PzEkMCIGA1UEChMbRGlnaXRhbCBTaWduYXR1cmUgVHJ1c3QgQ28uMRcwFQYDVQQD +Ew5EU1QgUm9vdCBDQSBYMzCCASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoCggEB +AN+v6ZdQCINXtMxiZfaQguzH0yxrMMpb7NnDfcdAwRgUi+DoM3ZJKuM/IUmTrE4O +rz5Iy2Xu/NMhD2XSKtkyj4zl93ewEnu1lcCJo6m67XMuegwGMoOifooUMM0RoOEq +OLl5CjH9UL2AZd+3UWODyOKIYepLYYHsUmu5ouJLGiifSKOeDNoJjj4XLh7dIN9b +xiqKqy69cK3FCxolkHRyxXtqqzTWMIn/5WgTe1QLyNau7Fqckh49ZLOMxt+/yUFw +7BZy1SbsOFU5Q9D8/RhcQPGX69Wam40dutolucbY38EVAjqr2m7xPi71XAicPNaD +aeQQmxkqtilX4+U9m5/wAl0CAwEAAaNCMEAwDwYDVR0TAQH/BAUwAwEB/zAOBgNV +HQ8BAf8EBAMCAQYwHQYDVR0OBBYEFMSnsaR7LHH62+FLkHX/xBVghYkQMA0GCSqG +SIb3DQEBBQUAA4IBAQCjGiybFwBcqR7uKGY3Or+Dxz9LwwmglSBd49lZRNI+DT69 +ikugdB/OEIKcdBodfpga3csTS7MgROSR6cz8faXbauX+5v3gTt23ADq1cEmv8uXr +AvHRAosZy5Q6XkjEGB5YGV8eAlrwDPGxrancWYaLbumR9YbK+rlmM6pZW87ipxZz +R8srzJmwN0jP41ZL9c8PDHIyh8bwRLtTcm1D9SZImlJnt1ir/md2cXjbDaJWFBM5 +JDGFoqgCWjBH4d1QB7wCCZAA62RjYJsWvIjJEubSfZGL+T0yjWW06XyxV3bqxbYo +Ob8VZRzI9neWagqNdwvYkQsEjgfbKbYK7p2CNTUQ +-----END CERTIFICATE----- + +# Issuer: CN=SwissSign Gold CA - G2 O=SwissSign AG +# Subject: CN=SwissSign Gold CA - G2 O=SwissSign AG +# Label: "SwissSign Gold CA - G2" +# Serial: 13492815561806991280 +# MD5 Fingerprint: 24:77:d9:a8:91:d1:3b:fa:88:2d:c2:ff:f8:cd:33:93 +# SHA1 Fingerprint: d8:c5:38:8a:b7:30:1b:1b:6e:d4:7a:e6:45:25:3a:6f:9f:1a:27:61 +# SHA256 Fingerprint: 62:dd:0b:e9:b9:f5:0a:16:3e:a0:f8:e7:5c:05:3b:1e:ca:57:ea:55:c8:68:8f:64:7c:68:81:f2:c8:35:7b:95 +-----BEGIN CERTIFICATE----- +MIIFujCCA6KgAwIBAgIJALtAHEP1Xk+wMA0GCSqGSIb3DQEBBQUAMEUxCzAJBgNV +BAYTAkNIMRUwEwYDVQQKEwxTd2lzc1NpZ24gQUcxHzAdBgNVBAMTFlN3aXNzU2ln +biBHb2xkIENBIC0gRzIwHhcNMDYxMDI1MDgzMDM1WhcNMzYxMDI1MDgzMDM1WjBF +MQswCQYDVQQGEwJDSDEVMBMGA1UEChMMU3dpc3NTaWduIEFHMR8wHQYDVQQDExZT +d2lzc1NpZ24gR29sZCBDQSAtIEcyMIICIjANBgkqhkiG9w0BAQEFAAOCAg8AMIIC +CgKCAgEAr+TufoskDhJuqVAtFkQ7kpJcyrhdhJJCEyq8ZVeCQD5XJM1QiyUqt2/8 +76LQwB8CJEoTlo8jE+YoWACjR8cGp4QjK7u9lit/VcyLwVcfDmJlD909Vopz2q5+ +bbqBHH5CjCA12UNNhPqE21Is8w4ndwtrvxEvcnifLtg+5hg3Wipy+dpikJKVyh+c +6bM8K8vzARO/Ws/BtQpgvd21mWRTuKCWs2/iJneRjOBiEAKfNA+k1ZIzUd6+jbqE +emA8atufK+ze3gE/bk3lUIbLtK/tREDFylqM2tIrfKjuvqblCqoOpd8FUrdVxyJd +MmqXl2MT28nbeTZ7hTpKxVKJ+STnnXepgv9VHKVxaSvRAiTysybUa9oEVeXBCsdt +MDeQKuSeFDNeFhdVxVu1yzSJkvGdJo+hB9TGsnhQ2wwMC3wLjEHXuendjIj3o02y +MszYF9rNt85mndT9Xv+9lz4pded+p2JYryU0pUHHPbwNUMoDAw8IWh+Vc3hiv69y +FGkOpeUDDniOJihC8AcLYiAQZzlG+qkDzAQ4embvIIO1jEpWjpEA/I5cgt6IoMPi +aG59je883WX0XaxR7ySArqpWl2/5rX3aYT+YdzylkbYcjCbaZaIJbcHiVOO5ykxM +gI93e2CaHt+28kgeDrpOVG2Y4OGiGqJ3UM/EY5LsRxmd6+ZrzsECAwEAAaOBrDCB +qTAOBgNVHQ8BAf8EBAMCAQYwDwYDVR0TAQH/BAUwAwEB/zAdBgNVHQ4EFgQUWyV7 +lqRlUX64OfPAeGZe6Drn8O4wHwYDVR0jBBgwFoAUWyV7lqRlUX64OfPAeGZe6Drn +8O4wRgYDVR0gBD8wPTA7BglghXQBWQECAQEwLjAsBggrBgEFBQcCARYgaHR0cDov +L3JlcG9zaXRvcnkuc3dpc3NzaWduLmNvbS8wDQYJKoZIhvcNAQEFBQADggIBACe6 +45R88a7A3hfm5djV9VSwg/S7zV4Fe0+fdWavPOhWfvxyeDgD2StiGwC5+OlgzczO +UYrHUDFu4Up+GC9pWbY9ZIEr44OE5iKHjn3g7gKZYbge9LgriBIWhMIxkziWMaa5 +O1M/wySTVltpkuzFwbs4AOPsF6m43Md8AYOfMke6UiI0HTJ6CVanfCU2qT1L2sCC +bwq7EsiHSycR+R4tx5M/nttfJmtS2S6K8RTGRI0Vqbe/vd6mGu6uLftIdxf+u+yv +GPUqUfA5hJeVbG4bwyvEdGB5JbAKJ9/fXtI5z0V9QkvfsywexcZdylU6oJxpmo/a +77KwPJ+HbBIrZXAVUjEaJM9vMSNQH4xPjyPDdEFjHFWoFN0+4FFQz/EbMFYOkrCC +hdiDyyJkvC24JdVUorgG6q2SpCSgwYa1ShNqR88uC1aVVMvOmttqtKay20EIhid3 +92qgQmwLOM7XdVAyksLfKzAiSNDVQTglXaTpXZ/GlHXQRf0wl0OPkKsKx4ZzYEpp +Ld6leNcG2mqeSz53OiATIgHQv2ieY2BrNU0LbbqhPcCT4H8js1WtciVORvnSFu+w +ZMEBnunKoGqYDs/YYPIvSbjkQuE4NRb0yG5P94FW6LqjviOvrv1vA+ACOzB2+htt +Qc8Bsem4yWb02ybzOqR08kkkW8mw0FfB+j564ZfJ +-----END CERTIFICATE----- + +# Issuer: CN=SwissSign Silver CA - G2 O=SwissSign AG +# Subject: CN=SwissSign Silver CA - G2 O=SwissSign AG +# Label: "SwissSign Silver CA - G2" +# Serial: 5700383053117599563 +# MD5 Fingerprint: e0:06:a1:c9:7d:cf:c9:fc:0d:c0:56:75:96:d8:62:13 +# SHA1 Fingerprint: 9b:aa:e5:9f:56:ee:21:cb:43:5a:be:25:93:df:a7:f0:40:d1:1d:cb +# SHA256 Fingerprint: be:6c:4d:a2:bb:b9:ba:59:b6:f3:93:97:68:37:42:46:c3:c0:05:99:3f:a9:8f:02:0d:1d:ed:be:d4:8a:81:d5 +-----BEGIN CERTIFICATE----- +MIIFvTCCA6WgAwIBAgIITxvUL1S7L0swDQYJKoZIhvcNAQEFBQAwRzELMAkGA1UE +BhMCQ0gxFTATBgNVBAoTDFN3aXNzU2lnbiBBRzEhMB8GA1UEAxMYU3dpc3NTaWdu +IFNpbHZlciBDQSAtIEcyMB4XDTA2MTAyNTA4MzI0NloXDTM2MTAyNTA4MzI0Nlow +RzELMAkGA1UEBhMCQ0gxFTATBgNVBAoTDFN3aXNzU2lnbiBBRzEhMB8GA1UEAxMY +U3dpc3NTaWduIFNpbHZlciBDQSAtIEcyMIICIjANBgkqhkiG9w0BAQEFAAOCAg8A +MIICCgKCAgEAxPGHf9N4Mfc4yfjDmUO8x/e8N+dOcbpLj6VzHVxumK4DV644N0Mv +Fz0fyM5oEMF4rhkDKxD6LHmD9ui5aLlV8gREpzn5/ASLHvGiTSf5YXu6t+WiE7br +YT7QbNHm+/pe7R20nqA1W6GSy/BJkv6FCgU+5tkL4k+73JU3/JHpMjUi0R86TieF +nbAVlDLaYQ1HTWBCrpJH6INaUFjpiou5XaHc3ZlKHzZnu0jkg7Y360g6rw9njxcH +6ATK72oxh9TAtvmUcXtnZLi2kUpCe2UuMGoM9ZDulebyzYLs2aFK7PayS+VFheZt +eJMELpyCbTapxDFkH4aDCyr0NQp4yVXPQbBH6TCfmb5hqAaEuSh6XzjZG6k4sIN/ +c8HDO0gqgg8hm7jMqDXDhBuDsz6+pJVpATqJAHgE2cn0mRmrVn5bi4Y5FZGkECwJ +MoBgs5PAKrYYC51+jUnyEEp/+dVGLxmSo5mnJqy7jDzmDrxHB9xzUfFwZC8I+bRH +HTBsROopN4WSaGa8gzj+ezku01DwH/teYLappvonQfGbGHLy9YR0SslnxFSuSGTf +jNFusB3hB48IHpmccelM2KX3RxIfdNFRnobzwqIjQAtz20um53MGjMGg6cFZrEb6 +5i/4z3GcRm25xBWNOHkDRUjvxF3XCO6HOSKGsg0PWEP3calILv3q1h8CAwEAAaOB +rDCBqTAOBgNVHQ8BAf8EBAMCAQYwDwYDVR0TAQH/BAUwAwEB/zAdBgNVHQ4EFgQU +F6DNweRBtjpbO8tFnb0cwpj6hlgwHwYDVR0jBBgwFoAUF6DNweRBtjpbO8tFnb0c +wpj6hlgwRgYDVR0gBD8wPTA7BglghXQBWQEDAQEwLjAsBggrBgEFBQcCARYgaHR0 +cDovL3JlcG9zaXRvcnkuc3dpc3NzaWduLmNvbS8wDQYJKoZIhvcNAQEFBQADggIB +AHPGgeAn0i0P4JUw4ppBf1AsX19iYamGamkYDHRJ1l2E6kFSGG9YrVBWIGrGvShp +WJHckRE1qTodvBqlYJ7YH39FkWnZfrt4csEGDyrOj4VwYaygzQu4OSlWhDJOhrs9 +xCrZ1x9y7v5RoSJBsXECYxqCsGKrXlcSH9/L3XWgwF15kIwb4FDm3jH+mHtwX6WQ +2K34ArZv02DdQEsixT2tOnqfGhpHkXkzuoLcMmkDlm4fS/Bx/uNncqCxv1yL5PqZ +IseEuRuNI5c/7SXgz2W79WEE790eslpBIlqhn10s6FvJbakMDHiqYMZWjwFaDGi8 +aRl5xB9+lwW/xekkUV7U1UtT7dkjWjYDZaPBA61BMPNGG4WQr2W11bHkFlt4dR2X +em1ZqSqPe97Dh4kQmUlzeMg9vVE1dCrV8X5pGyq7O70luJpaPXJhkGaH7gzWTdQR +dAtq/gsD/KNVV4n+SsuuWxcFyPKNIzFTONItaj+CuY0IavdeQXRuwxF+B6wpYJE/ +OMpXEA29MC/HpeZBoNquBYeaoKRlbEwJDIm6uNO5wJOKMPqN5ZprFQFOZ6raYlY+ +hAhm0sQ2fac+EPyI4NSA5QC9qvNOBqN6avlicuMJT+ubDgEj8Z+7fNzcbBGXJbLy +tGMU0gYqZ4yD9c7qB9iaah7s5Aq7KkzrCWA5zspi2C5u +-----END CERTIFICATE----- + +# Issuer: CN=GeoTrust Primary Certification Authority O=GeoTrust Inc. +# Subject: CN=GeoTrust Primary Certification Authority O=GeoTrust Inc. +# Label: "GeoTrust Primary Certification Authority" +# Serial: 32798226551256963324313806436981982369 +# MD5 Fingerprint: 02:26:c3:01:5e:08:30:37:43:a9:d0:7d:cf:37:e6:bf +# SHA1 Fingerprint: 32:3c:11:8e:1b:f7:b8:b6:52:54:e2:e2:10:0d:d6:02:90:37:f0:96 +# SHA256 Fingerprint: 37:d5:10:06:c5:12:ea:ab:62:64:21:f1:ec:8c:92:01:3f:c5:f8:2a:e9:8e:e5:33:eb:46:19:b8:de:b4:d0:6c +-----BEGIN CERTIFICATE----- +MIIDfDCCAmSgAwIBAgIQGKy1av1pthU6Y2yv2vrEoTANBgkqhkiG9w0BAQUFADBY +MQswCQYDVQQGEwJVUzEWMBQGA1UEChMNR2VvVHJ1c3QgSW5jLjExMC8GA1UEAxMo +R2VvVHJ1c3QgUHJpbWFyeSBDZXJ0aWZpY2F0aW9uIEF1dGhvcml0eTAeFw0wNjEx +MjcwMDAwMDBaFw0zNjA3MTYyMzU5NTlaMFgxCzAJBgNVBAYTAlVTMRYwFAYDVQQK +Ew1HZW9UcnVzdCBJbmMuMTEwLwYDVQQDEyhHZW9UcnVzdCBQcmltYXJ5IENlcnRp +ZmljYXRpb24gQXV0aG9yaXR5MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKC +AQEAvrgVe//UfH1nrYNke8hCUy3f9oQIIGHWAVlqnEQRr+92/ZV+zmEwu3qDXwK9 +AWbK7hWNb6EwnL2hhZ6UOvNWiAAxz9juapYC2e0DjPt1befquFUWBRaa9OBesYjA +ZIVcFU2Ix7e64HXprQU9nceJSOC7KMgD4TCTZF5SwFlwIjVXiIrxlQqD17wxcwE0 +7e9GceBrAqg1cmuXm2bgyxx5X9gaBGgeRwLmnWDiNpcB3841kt++Z8dtd1k7j53W +kBWUvEI0EME5+bEnPn7WinXFsq+W06Lem+SYvn3h6YGttm/81w7a4DSwDRp35+MI +mO9Y+pyEtzavwt+s0vQQBnBxNQIDAQABo0IwQDAPBgNVHRMBAf8EBTADAQH/MA4G +A1UdDwEB/wQEAwIBBjAdBgNVHQ4EFgQULNVQQZcVi/CPNmFbSvtr2ZnJM5IwDQYJ +KoZIhvcNAQEFBQADggEBAFpwfyzdtzRP9YZRqSa+S7iq8XEN3GHHoOo0Hnp3DwQ1 +6CePbJC/kRYkRj5KTs4rFtULUh38H2eiAkUxT87z+gOneZ1TatnaYzr4gNfTmeGl +4b7UVXGYNTq+k+qurUKykG/g/CFNNWMziUnWm07Kx+dOCQD32sfvmWKZd7aVIl6K +oKv0uHiYyjgZmclynnjNS6yvGaBzEi38wkG6gZHaFloxt/m0cYASSJlyc1pZU8Fj +UjPtp8nSOQJw+uCxQmYpqptR7TBUIhRf2asdweSU8Pj1K/fqynhG1riR/aYNKxoU +AT6A8EKglQdebc3MS6RFjasS6LPeWuWgfOgPIh1a6Vk= +-----END CERTIFICATE----- + +# Issuer: CN=thawte Primary Root CA O=thawte, Inc. OU=Certification Services Division/(c) 2006 thawte, Inc. - For authorized use only +# Subject: CN=thawte Primary Root CA O=thawte, Inc. OU=Certification Services Division/(c) 2006 thawte, Inc. - For authorized use only +# Label: "thawte Primary Root CA" +# Serial: 69529181992039203566298953787712940909 +# MD5 Fingerprint: 8c:ca:dc:0b:22:ce:f5:be:72:ac:41:1a:11:a8:d8:12 +# SHA1 Fingerprint: 91:c6:d6:ee:3e:8a:c8:63:84:e5:48:c2:99:29:5c:75:6c:81:7b:81 +# SHA256 Fingerprint: 8d:72:2f:81:a9:c1:13:c0:79:1d:f1:36:a2:96:6d:b2:6c:95:0a:97:1d:b4:6b:41:99:f4:ea:54:b7:8b:fb:9f +-----BEGIN CERTIFICATE----- +MIIEIDCCAwigAwIBAgIQNE7VVyDV7exJ9C/ON9srbTANBgkqhkiG9w0BAQUFADCB +qTELMAkGA1UEBhMCVVMxFTATBgNVBAoTDHRoYXd0ZSwgSW5jLjEoMCYGA1UECxMf +Q2VydGlmaWNhdGlvbiBTZXJ2aWNlcyBEaXZpc2lvbjE4MDYGA1UECxMvKGMpIDIw +MDYgdGhhd3RlLCBJbmMuIC0gRm9yIGF1dGhvcml6ZWQgdXNlIG9ubHkxHzAdBgNV +BAMTFnRoYXd0ZSBQcmltYXJ5IFJvb3QgQ0EwHhcNMDYxMTE3MDAwMDAwWhcNMzYw +NzE2MjM1OTU5WjCBqTELMAkGA1UEBhMCVVMxFTATBgNVBAoTDHRoYXd0ZSwgSW5j +LjEoMCYGA1UECxMfQ2VydGlmaWNhdGlvbiBTZXJ2aWNlcyBEaXZpc2lvbjE4MDYG +A1UECxMvKGMpIDIwMDYgdGhhd3RlLCBJbmMuIC0gRm9yIGF1dGhvcml6ZWQgdXNl +IG9ubHkxHzAdBgNVBAMTFnRoYXd0ZSBQcmltYXJ5IFJvb3QgQ0EwggEiMA0GCSqG +SIb3DQEBAQUAA4IBDwAwggEKAoIBAQCsoPD7gFnUnMekz52hWXMJEEUMDSxuaPFs +W0hoSVk3/AszGcJ3f8wQLZU0HObrTQmnHNK4yZc2AreJ1CRfBsDMRJSUjQJib+ta +3RGNKJpchJAQeg29dGYvajig4tVUROsdB58Hum/u6f1OCyn1PoSgAfGcq/gcfomk +6KHYcWUNo1F77rzSImANuVud37r8UVsLr5iy6S7pBOhih94ryNdOwUxkHt3Ph1i6 +Sk/KaAcdHJ1KxtUvkcx8cXIcxcBn6zL9yZJclNqFwJu/U30rCfSMnZEfl2pSy94J +NqR32HuHUETVPm4pafs5SSYeCaWAe0At6+gnhcn+Yf1+5nyXHdWdAgMBAAGjQjBA +MA8GA1UdEwEB/wQFMAMBAf8wDgYDVR0PAQH/BAQDAgEGMB0GA1UdDgQWBBR7W0XP +r87Lev0xkhpqtvNG61dIUDANBgkqhkiG9w0BAQUFAAOCAQEAeRHAS7ORtvzw6WfU +DW5FvlXok9LOAz/t2iWwHVfLHjp2oEzsUHboZHIMpKnxuIvW1oeEuzLlQRHAd9mz +YJ3rG9XRbkREqaYB7FViHXe4XI5ISXycO1cRrK1zN44veFyQaEfZYGDm/Ac9IiAX +xPcW6cTYcvnIc3zfFi8VqT79aie2oetaupgf1eNNZAqdE8hhuvU5HIe6uL17In/2 +/qxAeeWsEG89jxt5dovEN7MhGITlNgDrYyCZuen+MwS7QcjBAvlEYyCegc5C09Y/ +LHbTY5xZ3Y+m4Q6gLkH3LpVHz7z9M/P2C2F+fpErgUfCJzDupxBdN49cOSvkBPB7 +jVaMaA== +-----END CERTIFICATE----- + +# Issuer: CN=VeriSign Class 3 Public Primary Certification Authority - G5 O=VeriSign, Inc. OU=VeriSign Trust Network/(c) 2006 VeriSign, Inc. - For authorized use only +# Subject: CN=VeriSign Class 3 Public Primary Certification Authority - G5 O=VeriSign, Inc. OU=VeriSign Trust Network/(c) 2006 VeriSign, Inc. - For authorized use only +# Label: "VeriSign Class 3 Public Primary Certification Authority - G5" +# Serial: 33037644167568058970164719475676101450 +# MD5 Fingerprint: cb:17:e4:31:67:3e:e2:09:fe:45:57:93:f3:0a:fa:1c +# SHA1 Fingerprint: 4e:b6:d5:78:49:9b:1c:cf:5f:58:1e:ad:56:be:3d:9b:67:44:a5:e5 +# SHA256 Fingerprint: 9a:cf:ab:7e:43:c8:d8:80:d0:6b:26:2a:94:de:ee:e4:b4:65:99:89:c3:d0:ca:f1:9b:af:64:05:e4:1a:b7:df +-----BEGIN CERTIFICATE----- +MIIE0zCCA7ugAwIBAgIQGNrRniZ96LtKIVjNzGs7SjANBgkqhkiG9w0BAQUFADCB +yjELMAkGA1UEBhMCVVMxFzAVBgNVBAoTDlZlcmlTaWduLCBJbmMuMR8wHQYDVQQL +ExZWZXJpU2lnbiBUcnVzdCBOZXR3b3JrMTowOAYDVQQLEzEoYykgMjAwNiBWZXJp +U2lnbiwgSW5jLiAtIEZvciBhdXRob3JpemVkIHVzZSBvbmx5MUUwQwYDVQQDEzxW +ZXJpU2lnbiBDbGFzcyAzIFB1YmxpYyBQcmltYXJ5IENlcnRpZmljYXRpb24gQXV0 +aG9yaXR5IC0gRzUwHhcNMDYxMTA4MDAwMDAwWhcNMzYwNzE2MjM1OTU5WjCByjEL +MAkGA1UEBhMCVVMxFzAVBgNVBAoTDlZlcmlTaWduLCBJbmMuMR8wHQYDVQQLExZW +ZXJpU2lnbiBUcnVzdCBOZXR3b3JrMTowOAYDVQQLEzEoYykgMjAwNiBWZXJpU2ln +biwgSW5jLiAtIEZvciBhdXRob3JpemVkIHVzZSBvbmx5MUUwQwYDVQQDEzxWZXJp +U2lnbiBDbGFzcyAzIFB1YmxpYyBQcmltYXJ5IENlcnRpZmljYXRpb24gQXV0aG9y +aXR5IC0gRzUwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQCvJAgIKXo1 +nmAMqudLO07cfLw8RRy7K+D+KQL5VwijZIUVJ/XxrcgxiV0i6CqqpkKzj/i5Vbex +t0uz/o9+B1fs70PbZmIVYc9gDaTY3vjgw2IIPVQT60nKWVSFJuUrjxuf6/WhkcIz +SdhDY2pSS9KP6HBRTdGJaXvHcPaz3BJ023tdS1bTlr8Vd6Gw9KIl8q8ckmcY5fQG +BO+QueQA5N06tRn/Arr0PO7gi+s3i+z016zy9vA9r911kTMZHRxAy3QkGSGT2RT+ +rCpSx4/VBEnkjWNHiDxpg8v+R70rfk/Fla4OndTRQ8Bnc+MUCH7lP59zuDMKz10/ +NIeWiu5T6CUVAgMBAAGjgbIwga8wDwYDVR0TAQH/BAUwAwEB/zAOBgNVHQ8BAf8E +BAMCAQYwbQYIKwYBBQUHAQwEYTBfoV2gWzBZMFcwVRYJaW1hZ2UvZ2lmMCEwHzAH +BgUrDgMCGgQUj+XTGoasjY5rw8+AatRIGCx7GS4wJRYjaHR0cDovL2xvZ28udmVy +aXNpZ24uY29tL3ZzbG9nby5naWYwHQYDVR0OBBYEFH/TZafC3ey78DAJ80M5+gKv +MzEzMA0GCSqGSIb3DQEBBQUAA4IBAQCTJEowX2LP2BqYLz3q3JktvXf2pXkiOOzE +p6B4Eq1iDkVwZMXnl2YtmAl+X6/WzChl8gGqCBpH3vn5fJJaCGkgDdk+bW48DW7Y +5gaRQBi5+MHt39tBquCWIMnNZBU4gcmU7qKEKQsTb47bDN0lAtukixlE0kF6BWlK +WE9gyn6CagsCqiUXObXbf+eEZSqVir2G3l6BFoMtEMze/aiCKm0oHw0LxOXnGiYZ +4fQRbxC1lfznQgUy286dUV4otp6F01vvpX1FQHKOtw5rDgb7MzVIcbidJ4vEZV8N +hnacRHr2lVz2XTIIM6RUthg/aFzyQkqFOFSDX9HoLPKsEdao7WNq +-----END CERTIFICATE----- + +# Issuer: CN=SecureTrust CA O=SecureTrust Corporation +# Subject: CN=SecureTrust CA O=SecureTrust Corporation +# Label: "SecureTrust CA" +# Serial: 17199774589125277788362757014266862032 +# MD5 Fingerprint: dc:32:c3:a7:6d:25:57:c7:68:09:9d:ea:2d:a9:a2:d1 +# SHA1 Fingerprint: 87:82:c6:c3:04:35:3b:cf:d2:96:92:d2:59:3e:7d:44:d9:34:ff:11 +# SHA256 Fingerprint: f1:c1:b5:0a:e5:a2:0d:d8:03:0e:c9:f6:bc:24:82:3d:d3:67:b5:25:57:59:b4:e7:1b:61:fc:e9:f7:37:5d:73 +-----BEGIN CERTIFICATE----- +MIIDuDCCAqCgAwIBAgIQDPCOXAgWpa1Cf/DrJxhZ0DANBgkqhkiG9w0BAQUFADBI +MQswCQYDVQQGEwJVUzEgMB4GA1UEChMXU2VjdXJlVHJ1c3QgQ29ycG9yYXRpb24x +FzAVBgNVBAMTDlNlY3VyZVRydXN0IENBMB4XDTA2MTEwNzE5MzExOFoXDTI5MTIz +MTE5NDA1NVowSDELMAkGA1UEBhMCVVMxIDAeBgNVBAoTF1NlY3VyZVRydXN0IENv +cnBvcmF0aW9uMRcwFQYDVQQDEw5TZWN1cmVUcnVzdCBDQTCCASIwDQYJKoZIhvcN +AQEBBQADggEPADCCAQoCggEBAKukgeWVzfX2FI7CT8rU4niVWJxB4Q2ZQCQXOZEz +Zum+4YOvYlyJ0fwkW2Gz4BERQRwdbvC4u/jep4G6pkjGnx29vo6pQT64lO0pGtSO +0gMdA+9tDWccV9cGrcrI9f4Or2YlSASWC12juhbDCE/RRvgUXPLIXgGZbf2IzIao +wW8xQmxSPmjL8xk037uHGFaAJsTQ3MBv396gwpEWoGQRS0S8Hvbn+mPeZqx2pHGj +7DaUaHp3pLHnDi+BeuK1cobvomuL8A/b01k/unK8RCSc43Oz969XL0Imnal0ugBS +8kvNU3xHCzaFDmapCJcWNFfBZveA4+1wVMeT4C4oFVmHursCAwEAAaOBnTCBmjAT +BgkrBgEEAYI3FAIEBh4EAEMAQTALBgNVHQ8EBAMCAYYwDwYDVR0TAQH/BAUwAwEB +/zAdBgNVHQ4EFgQUQjK2FvoE/f5dS3rD/fdMQB1aQ68wNAYDVR0fBC0wKzApoCeg +JYYjaHR0cDovL2NybC5zZWN1cmV0cnVzdC5jb20vU1RDQS5jcmwwEAYJKwYBBAGC +NxUBBAMCAQAwDQYJKoZIhvcNAQEFBQADggEBADDtT0rhWDpSclu1pqNlGKa7UTt3 +6Z3q059c4EVlew3KW+JwULKUBRSuSceNQQcSc5R+DCMh/bwQf2AQWnL1mA6s7Ll/ +3XpvXdMc9P+IBWlCqQVxyLesJugutIxq/3HcuLHfmbx8IVQr5Fiiu1cprp6poxkm +D5kuCLDv/WnPmRoJjeOnnyvJNjR7JLN4TJUXpAYmHrZkUjZfYGfZnMUFdAvnZyPS +CPyI6a6Lf+Ew9Dd+/cYy2i2eRDAwbO4H3tI0/NL/QPZL9GZGBlSm8jIKYyYwa5vR +3ItHuuG51WLQoqD0ZwV4KWMabwTW+MZMo5qxN7SN5ShLHZ4swrhovO0C7jE= +-----END CERTIFICATE----- + +# Issuer: CN=Secure Global CA O=SecureTrust Corporation +# Subject: CN=Secure Global CA O=SecureTrust Corporation +# Label: "Secure Global CA" +# Serial: 9751836167731051554232119481456978597 +# MD5 Fingerprint: cf:f4:27:0d:d4:ed:dc:65:16:49:6d:3d:da:bf:6e:de +# SHA1 Fingerprint: 3a:44:73:5a:e5:81:90:1f:24:86:61:46:1e:3b:9c:c4:5f:f5:3a:1b +# SHA256 Fingerprint: 42:00:f5:04:3a:c8:59:0e:bb:52:7d:20:9e:d1:50:30:29:fb:cb:d4:1c:a1:b5:06:ec:27:f1:5a:de:7d:ac:69 +-----BEGIN CERTIFICATE----- +MIIDvDCCAqSgAwIBAgIQB1YipOjUiolN9BPI8PjqpTANBgkqhkiG9w0BAQUFADBK +MQswCQYDVQQGEwJVUzEgMB4GA1UEChMXU2VjdXJlVHJ1c3QgQ29ycG9yYXRpb24x +GTAXBgNVBAMTEFNlY3VyZSBHbG9iYWwgQ0EwHhcNMDYxMTA3MTk0MjI4WhcNMjkx +MjMxMTk1MjA2WjBKMQswCQYDVQQGEwJVUzEgMB4GA1UEChMXU2VjdXJlVHJ1c3Qg +Q29ycG9yYXRpb24xGTAXBgNVBAMTEFNlY3VyZSBHbG9iYWwgQ0EwggEiMA0GCSqG +SIb3DQEBAQUAA4IBDwAwggEKAoIBAQCvNS7YrGxVaQZx5RNoJLNP2MwhR/jxYDiJ +iQPpvepeRlMJ3Fz1Wuj3RSoC6zFh1ykzTM7HfAo3fg+6MpjhHZevj8fcyTiW89sa +/FHtaMbQbqR8JNGuQsiWUGMu4P51/pinX0kuleM5M2SOHqRfkNJnPLLZ/kG5VacJ +jnIFHovdRIWCQtBJwB1g8NEXLJXr9qXBkqPFwqcIYA1gBBCWeZ4WNOaptvolRTnI +HmX5k/Wq8VLcmZg9pYYaDDUz+kulBAYVHDGA76oYa8J719rO+TMg1fW9ajMtgQT7 +sFzUnKPiXB3jqUJ1XnvUd+85VLrJChgbEplJL4hL/VBi0XPnj3pDAgMBAAGjgZ0w +gZowEwYJKwYBBAGCNxQCBAYeBABDAEEwCwYDVR0PBAQDAgGGMA8GA1UdEwEB/wQF +MAMBAf8wHQYDVR0OBBYEFK9EBMJBfkiD2045AuzshHrmzsmkMDQGA1UdHwQtMCsw +KaAnoCWGI2h0dHA6Ly9jcmwuc2VjdXJldHJ1c3QuY29tL1NHQ0EuY3JsMBAGCSsG +AQQBgjcVAQQDAgEAMA0GCSqGSIb3DQEBBQUAA4IBAQBjGghAfaReUw132HquHw0L +URYD7xh8yOOvaliTFGCRsoTciE6+OYo68+aCiV0BN7OrJKQVDpI1WkpEXk5X+nXO +H0jOZvQ8QCaSmGwb7iRGDBezUqXbpZGRzzfTb+cnCDpOGR86p1hcF895P4vkp9Mm +I50mD1hp/Ed+stCNi5O/KU9DaXR2Z0vPB4zmAve14bRDtUstFJ/53CYNv6ZHdAbY +iNE6KTCEztI5gGIbqMdXSbxqVVFnFUq+NQfk1XWYN3kwFNspnWzFacxHVaIw98xc +f8LDmBxrThaA63p4ZUWiABqvDA1VZDRIuJK58bRQKfJPIx/abKwfROHdI3hRW8cW +-----END CERTIFICATE----- + +# Issuer: CN=COMODO Certification Authority O=COMODO CA Limited +# Subject: CN=COMODO Certification Authority O=COMODO CA Limited +# Label: "COMODO Certification Authority" +# Serial: 104350513648249232941998508985834464573 +# MD5 Fingerprint: 5c:48:dc:f7:42:72:ec:56:94:6d:1c:cc:71:35:80:75 +# SHA1 Fingerprint: 66:31:bf:9e:f7:4f:9e:b6:c9:d5:a6:0c:ba:6a:be:d1:f7:bd:ef:7b +# SHA256 Fingerprint: 0c:2c:d6:3d:f7:80:6f:a3:99:ed:e8:09:11:6b:57:5b:f8:79:89:f0:65:18:f9:80:8c:86:05:03:17:8b:af:66 +-----BEGIN CERTIFICATE----- +MIIEHTCCAwWgAwIBAgIQToEtioJl4AsC7j41AkblPTANBgkqhkiG9w0BAQUFADCB +gTELMAkGA1UEBhMCR0IxGzAZBgNVBAgTEkdyZWF0ZXIgTWFuY2hlc3RlcjEQMA4G +A1UEBxMHU2FsZm9yZDEaMBgGA1UEChMRQ09NT0RPIENBIExpbWl0ZWQxJzAlBgNV +BAMTHkNPTU9ETyBDZXJ0aWZpY2F0aW9uIEF1dGhvcml0eTAeFw0wNjEyMDEwMDAw +MDBaFw0yOTEyMzEyMzU5NTlaMIGBMQswCQYDVQQGEwJHQjEbMBkGA1UECBMSR3Jl +YXRlciBNYW5jaGVzdGVyMRAwDgYDVQQHEwdTYWxmb3JkMRowGAYDVQQKExFDT01P +RE8gQ0EgTGltaXRlZDEnMCUGA1UEAxMeQ09NT0RPIENlcnRpZmljYXRpb24gQXV0 +aG9yaXR5MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEA0ECLi3LjkRv3 +UcEbVASY06m/weaKXTuH+7uIzg3jLz8GlvCiKVCZrts7oVewdFFxze1CkU1B/qnI +2GqGd0S7WWaXUF601CxwRM/aN5VCaTwwxHGzUvAhTaHYujl8HJ6jJJ3ygxaYqhZ8 +Q5sVW7euNJH+1GImGEaaP+vB+fGQV+useg2L23IwambV4EajcNxo2f8ESIl33rXp ++2dtQem8Ob0y2WIC8bGoPW43nOIv4tOiJovGuFVDiOEjPqXSJDlqR6sA1KGzqSX+ +DT+nHbrTUcELpNqsOO9VUCQFZUaTNE8tja3G1CEZ0o7KBWFxB3NH5YoZEr0ETc5O +nKVIrLsm9wIDAQABo4GOMIGLMB0GA1UdDgQWBBQLWOWLxkwVN6RAqTCpIb5HNlpW +/zAOBgNVHQ8BAf8EBAMCAQYwDwYDVR0TAQH/BAUwAwEB/zBJBgNVHR8EQjBAMD6g +PKA6hjhodHRwOi8vY3JsLmNvbW9kb2NhLmNvbS9DT01PRE9DZXJ0aWZpY2F0aW9u +QXV0aG9yaXR5LmNybDANBgkqhkiG9w0BAQUFAAOCAQEAPpiem/Yb6dc5t3iuHXIY +SdOH5EOC6z/JqvWote9VfCFSZfnVDeFs9D6Mk3ORLgLETgdxb8CPOGEIqB6BCsAv +IC9Bi5HcSEW88cbeunZrM8gALTFGTO3nnc+IlP8zwFboJIYmuNg4ON8qa90SzMc/ +RxdMosIGlgnW2/4/PEZB31jiVg88O8EckzXZOFKs7sjsLjBOlDW0JB9LeGna8gI4 +zJVSk/BwJVmcIGfE7vmLV2H0knZ9P4SNVbfo5azV8fUZVqZa+5Acr5Pr5RzUZ5dd +BA6+C4OmF4O5MBKgxTMVBbkN+8cFduPYSo38NBejxiEovjBFMR7HeL5YYTisO+IB +ZQ== +-----END CERTIFICATE----- + +# Issuer: CN=Network Solutions Certificate Authority O=Network Solutions L.L.C. +# Subject: CN=Network Solutions Certificate Authority O=Network Solutions L.L.C. +# Label: "Network Solutions Certificate Authority" +# Serial: 116697915152937497490437556386812487904 +# MD5 Fingerprint: d3:f3:a6:16:c0:fa:6b:1d:59:b1:2d:96:4d:0e:11:2e +# SHA1 Fingerprint: 74:f8:a3:c3:ef:e7:b3:90:06:4b:83:90:3c:21:64:60:20:e5:df:ce +# SHA256 Fingerprint: 15:f0:ba:00:a3:ac:7a:f3:ac:88:4c:07:2b:10:11:a0:77:bd:77:c0:97:f4:01:64:b2:f8:59:8a:bd:83:86:0c +-----BEGIN CERTIFICATE----- +MIID5jCCAs6gAwIBAgIQV8szb8JcFuZHFhfjkDFo4DANBgkqhkiG9w0BAQUFADBi +MQswCQYDVQQGEwJVUzEhMB8GA1UEChMYTmV0d29yayBTb2x1dGlvbnMgTC5MLkMu +MTAwLgYDVQQDEydOZXR3b3JrIFNvbHV0aW9ucyBDZXJ0aWZpY2F0ZSBBdXRob3Jp +dHkwHhcNMDYxMjAxMDAwMDAwWhcNMjkxMjMxMjM1OTU5WjBiMQswCQYDVQQGEwJV +UzEhMB8GA1UEChMYTmV0d29yayBTb2x1dGlvbnMgTC5MLkMuMTAwLgYDVQQDEydO +ZXR3b3JrIFNvbHV0aW9ucyBDZXJ0aWZpY2F0ZSBBdXRob3JpdHkwggEiMA0GCSqG +SIb3DQEBAQUAA4IBDwAwggEKAoIBAQDkvH6SMG3G2I4rC7xGzuAnlt7e+foS0zwz +c7MEL7xxjOWftiJgPl9dzgn/ggwbmlFQGiaJ3dVhXRncEg8tCqJDXRfQNJIg6nPP +OCwGJgl6cvf6UDL4wpPTaaIjzkGxzOTVHzbRijr4jGPiFFlp7Q3Tf2vouAPlT2rl +mGNpSAW+Lv8ztumXWWn4Zxmuk2GWRBXTcrA/vGp97Eh/jcOrqnErU2lBUzS1sLnF +BgrEsEX1QV1uiUV7PTsmjHTC5dLRfbIR1PtYMiKagMnc/Qzpf14Dl847ABSHJ3A4 +qY5usyd2mFHgBeMhqxrVhSI8KbWaFsWAqPS7azCPL0YCorEMIuDTAgMBAAGjgZcw +gZQwHQYDVR0OBBYEFCEwyfsA106Y2oeqKtCnLrFAMadMMA4GA1UdDwEB/wQEAwIB +BjAPBgNVHRMBAf8EBTADAQH/MFIGA1UdHwRLMEkwR6BFoEOGQWh0dHA6Ly9jcmwu +bmV0c29sc3NsLmNvbS9OZXR3b3JrU29sdXRpb25zQ2VydGlmaWNhdGVBdXRob3Jp +dHkuY3JsMA0GCSqGSIb3DQEBBQUAA4IBAQC7rkvnt1frf6ott3NHhWrB5KUd5Oc8 +6fRZZXe1eltajSU24HqXLjjAV2CDmAaDn7l2em5Q4LqILPxFzBiwmZVRDuwduIj/ +h1AcgsLj4DKAv6ALR8jDMe+ZZzKATxcheQxpXN5eNK4CtSbqUN9/GGUsyfJj4akH +/nxxH2szJGoeBfcFaMBqEssuXmHLrijTfsK0ZpEmXzwuJF/LWA/rKOyvEZbz3Htv +wKeI8lN3s2Berq4o2jUsbzRF0ybh3uxbTydrFny9RAQYgrOJeRcQcT16ohZO9QHN +pGxlaKFJdlxDydi8NmdspZS11My5vWo1ViHe2MPr+8ukYEywVaCge1ey +-----END CERTIFICATE----- + +# Issuer: CN=COMODO ECC Certification Authority O=COMODO CA Limited +# Subject: CN=COMODO ECC Certification Authority O=COMODO CA Limited +# Label: "COMODO ECC Certification Authority" +# Serial: 41578283867086692638256921589707938090 +# MD5 Fingerprint: 7c:62:ff:74:9d:31:53:5e:68:4a:d5:78:aa:1e:bf:23 +# SHA1 Fingerprint: 9f:74:4e:9f:2b:4d:ba:ec:0f:31:2c:50:b6:56:3b:8e:2d:93:c3:11 +# SHA256 Fingerprint: 17:93:92:7a:06:14:54:97:89:ad:ce:2f:8f:34:f7:f0:b6:6d:0f:3a:e3:a3:b8:4d:21:ec:15:db:ba:4f:ad:c7 +-----BEGIN CERTIFICATE----- +MIICiTCCAg+gAwIBAgIQH0evqmIAcFBUTAGem2OZKjAKBggqhkjOPQQDAzCBhTEL +MAkGA1UEBhMCR0IxGzAZBgNVBAgTEkdyZWF0ZXIgTWFuY2hlc3RlcjEQMA4GA1UE +BxMHU2FsZm9yZDEaMBgGA1UEChMRQ09NT0RPIENBIExpbWl0ZWQxKzApBgNVBAMT +IkNPTU9ETyBFQ0MgQ2VydGlmaWNhdGlvbiBBdXRob3JpdHkwHhcNMDgwMzA2MDAw +MDAwWhcNMzgwMTE4MjM1OTU5WjCBhTELMAkGA1UEBhMCR0IxGzAZBgNVBAgTEkdy +ZWF0ZXIgTWFuY2hlc3RlcjEQMA4GA1UEBxMHU2FsZm9yZDEaMBgGA1UEChMRQ09N +T0RPIENBIExpbWl0ZWQxKzApBgNVBAMTIkNPTU9ETyBFQ0MgQ2VydGlmaWNhdGlv +biBBdXRob3JpdHkwdjAQBgcqhkjOPQIBBgUrgQQAIgNiAAQDR3svdcmCFYX7deSR +FtSrYpn1PlILBs5BAH+X4QokPB0BBO490o0JlwzgdeT6+3eKKvUDYEs2ixYjFq0J +cfRK9ChQtP6IHG4/bC8vCVlbpVsLM5niwz2J+Wos77LTBumjQjBAMB0GA1UdDgQW +BBR1cacZSBm8nZ3qQUfflMRId5nTeTAOBgNVHQ8BAf8EBAMCAQYwDwYDVR0TAQH/ +BAUwAwEB/zAKBggqhkjOPQQDAwNoADBlAjEA7wNbeqy3eApyt4jf/7VGFAkK+qDm +fQjGGoe9GKhzvSbKYAydzpmfz1wPMOG+FDHqAjAU9JM8SaczepBGR7NjfRObTrdv +GDeAU/7dIOA1mjbRxwG55tzd8/8dLDoWV9mSOdY= +-----END CERTIFICATE----- + +# Issuer: CN=OISTE WISeKey Global Root GA CA O=WISeKey OU=Copyright (c) 2005/OISTE Foundation Endorsed +# Subject: CN=OISTE WISeKey Global Root GA CA O=WISeKey OU=Copyright (c) 2005/OISTE Foundation Endorsed +# Label: "OISTE WISeKey Global Root GA CA" +# Serial: 86718877871133159090080555911823548314 +# MD5 Fingerprint: bc:6c:51:33:a7:e9:d3:66:63:54:15:72:1b:21:92:93 +# SHA1 Fingerprint: 59:22:a1:e1:5a:ea:16:35:21:f8:98:39:6a:46:46:b0:44:1b:0f:a9 +# SHA256 Fingerprint: 41:c9:23:86:6a:b4:ca:d6:b7:ad:57:80:81:58:2e:02:07:97:a6:cb:df:4f:ff:78:ce:83:96:b3:89:37:d7:f5 +-----BEGIN CERTIFICATE----- +MIID8TCCAtmgAwIBAgIQQT1yx/RrH4FDffHSKFTfmjANBgkqhkiG9w0BAQUFADCB +ijELMAkGA1UEBhMCQ0gxEDAOBgNVBAoTB1dJU2VLZXkxGzAZBgNVBAsTEkNvcHly +aWdodCAoYykgMjAwNTEiMCAGA1UECxMZT0lTVEUgRm91bmRhdGlvbiBFbmRvcnNl +ZDEoMCYGA1UEAxMfT0lTVEUgV0lTZUtleSBHbG9iYWwgUm9vdCBHQSBDQTAeFw0w +NTEyMTExNjAzNDRaFw0zNzEyMTExNjA5NTFaMIGKMQswCQYDVQQGEwJDSDEQMA4G +A1UEChMHV0lTZUtleTEbMBkGA1UECxMSQ29weXJpZ2h0IChjKSAyMDA1MSIwIAYD +VQQLExlPSVNURSBGb3VuZGF0aW9uIEVuZG9yc2VkMSgwJgYDVQQDEx9PSVNURSBX +SVNlS2V5IEdsb2JhbCBSb290IEdBIENBMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8A +MIIBCgKCAQEAy0+zAJs9Nt350UlqaxBJH+zYK7LG+DKBKUOVTJoZIyEVRd7jyBxR +VVuuk+g3/ytr6dTqvirdqFEr12bDYVxgAsj1znJ7O7jyTmUIms2kahnBAbtzptf2 +w93NvKSLtZlhuAGio9RN1AU9ka34tAhxZK9w8RxrfvbDd50kc3vkDIzh2TbhmYsF +mQvtRTEJysIA2/dyoJaqlYfQjse2YXMNdmaM3Bu0Y6Kff5MTMPGhJ9vZ/yxViJGg +4E8HsChWjBgbl0SOid3gF27nKu+POQoxhILYQBRJLnpB5Kf+42TMwVlxSywhp1t9 +4B3RLoGbw9ho972WG6xwsRYUC9tguSYBBQIDAQABo1EwTzALBgNVHQ8EBAMCAYYw +DwYDVR0TAQH/BAUwAwEB/zAdBgNVHQ4EFgQUswN+rja8sHnR3JQmthG+IbJphpQw +EAYJKwYBBAGCNxUBBAMCAQAwDQYJKoZIhvcNAQEFBQADggEBAEuh/wuHbrP5wUOx +SPMowB0uyQlB+pQAHKSkq0lPjz0e701vvbyk9vImMMkQyh2I+3QZH4VFvbBsUfk2 +ftv1TDI6QU9bR8/oCy22xBmddMVHxjtqD6wU2zz0c5ypBd8A3HR4+vg1YFkCExh8 +vPtNsCBtQ7tgMHpnM1zFmdH4LTlSc/uMqpclXHLZCB6rTjzjgTGfA6b7wP4piFXa +hNVQA7bihKOmNqoROgHhGEvWRGizPflTdISzRpFGlgC3gCy24eMQ4tui5yiPAZZi +Fj4A4xylNoEYokxSdsARo27mHbrjWr42U8U+dY+GaSlYU7Wcu2+fXMUY7N0v4ZjJ +/L7fCg0= +-----END CERTIFICATE----- + +# Issuer: CN=Certigna O=Dhimyotis +# Subject: CN=Certigna O=Dhimyotis +# Label: "Certigna" +# Serial: 18364802974209362175 +# MD5 Fingerprint: ab:57:a6:5b:7d:42:82:19:b5:d8:58:26:28:5e:fd:ff +# SHA1 Fingerprint: b1:2e:13:63:45:86:a4:6f:1a:b2:60:68:37:58:2d:c4:ac:fd:94:97 +# SHA256 Fingerprint: e3:b6:a2:db:2e:d7:ce:48:84:2f:7a:c5:32:41:c7:b7:1d:54:14:4b:fb:40:c1:1f:3f:1d:0b:42:f5:ee:a1:2d +-----BEGIN CERTIFICATE----- +MIIDqDCCApCgAwIBAgIJAP7c4wEPyUj/MA0GCSqGSIb3DQEBBQUAMDQxCzAJBgNV +BAYTAkZSMRIwEAYDVQQKDAlEaGlteW90aXMxETAPBgNVBAMMCENlcnRpZ25hMB4X +DTA3MDYyOTE1MTMwNVoXDTI3MDYyOTE1MTMwNVowNDELMAkGA1UEBhMCRlIxEjAQ +BgNVBAoMCURoaW15b3RpczERMA8GA1UEAwwIQ2VydGlnbmEwggEiMA0GCSqGSIb3 +DQEBAQUAA4IBDwAwggEKAoIBAQDIaPHJ1tazNHUmgh7stL7qXOEm7RFHYeGifBZ4 +QCHkYJ5ayGPhxLGWkv8YbWkj4Sti993iNi+RB7lIzw7sebYs5zRLcAglozyHGxny +gQcPOJAZ0xH+hrTy0V4eHpbNgGzOOzGTtvKg0KmVEn2lmsxryIRWijOp5yIVUxbw +zBfsV1/pogqYCd7jX5xv3EjjhQsVWqa6n6xI4wmy9/Qy3l40vhx4XUJbzg4ij02Q +130yGLMLLGq/jj8UEYkgDncUtT2UCIf3JR7VsmAA7G8qKCVuKj4YYxclPz5EIBb2 +JsglrgVKtOdjLPOMFlN+XPsRGgjBRmKfIrjxwo1p3Po6WAbfAgMBAAGjgbwwgbkw +DwYDVR0TAQH/BAUwAwEB/zAdBgNVHQ4EFgQUGu3+QTmQtCRZvgHyUtVF9lo53BEw +ZAYDVR0jBF0wW4AUGu3+QTmQtCRZvgHyUtVF9lo53BGhOKQ2MDQxCzAJBgNVBAYT +AkZSMRIwEAYDVQQKDAlEaGlteW90aXMxETAPBgNVBAMMCENlcnRpZ25hggkA/tzj +AQ/JSP8wDgYDVR0PAQH/BAQDAgEGMBEGCWCGSAGG+EIBAQQEAwIABzANBgkqhkiG +9w0BAQUFAAOCAQEAhQMeknH2Qq/ho2Ge6/PAD/Kl1NqV5ta+aDY9fm4fTIrv0Q8h +bV6lUmPOEvjvKtpv6zf+EwLHyzs+ImvaYS5/1HI93TDhHkxAGYwP15zRgzB7mFnc +fca5DClMoTOi62c6ZYTTluLtdkVwj7Ur3vkj1kluPBS1xp81HlDQwY9qcEQCYsuu +HWhBp6pX6FOqB9IG9tUUBguRA3UsbHK1YZWaDYu5Def131TN3ubY1gkIl2PlwS6w +t0QmwCbAr1UwnjvVNioZBPRcHv/PLLf/0P2HQBHVESO7SMAhqaQoLf0V+LBOK/Qw +WyH8EZE0vkHve52Xdf+XlcCWWC/qu0bXu+TZLg== +-----END CERTIFICATE----- + +# Issuer: CN=Cybertrust Global Root O=Cybertrust, Inc +# Subject: CN=Cybertrust Global Root O=Cybertrust, Inc +# Label: "Cybertrust Global Root" +# Serial: 4835703278459682877484360 +# MD5 Fingerprint: 72:e4:4a:87:e3:69:40:80:77:ea:bc:e3:f4:ff:f0:e1 +# SHA1 Fingerprint: 5f:43:e5:b1:bf:f8:78:8c:ac:1c:c7:ca:4a:9a:c6:22:2b:cc:34:c6 +# SHA256 Fingerprint: 96:0a:df:00:63:e9:63:56:75:0c:29:65:dd:0a:08:67:da:0b:9c:bd:6e:77:71:4a:ea:fb:23:49:ab:39:3d:a3 +-----BEGIN CERTIFICATE----- +MIIDoTCCAomgAwIBAgILBAAAAAABD4WqLUgwDQYJKoZIhvcNAQEFBQAwOzEYMBYG +A1UEChMPQ3liZXJ0cnVzdCwgSW5jMR8wHQYDVQQDExZDeWJlcnRydXN0IEdsb2Jh +bCBSb290MB4XDTA2MTIxNTA4MDAwMFoXDTIxMTIxNTA4MDAwMFowOzEYMBYGA1UE +ChMPQ3liZXJ0cnVzdCwgSW5jMR8wHQYDVQQDExZDeWJlcnRydXN0IEdsb2JhbCBS +b290MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEA+Mi8vRRQZhP/8NN5 +7CPytxrHjoXxEnOmGaoQ25yiZXRadz5RfVb23CO21O1fWLE3TdVJDm71aofW0ozS +J8bi/zafmGWgE07GKmSb1ZASzxQG9Dvj1Ci+6A74q05IlG2OlTEQXO2iLb3VOm2y +HLtgwEZLAfVJrn5GitB0jaEMAs7u/OePuGtm839EAL9mJRQr3RAwHQeWP032a7iP +t3sMpTjr3kfb1V05/Iin89cqdPHoWqI7n1C6poxFNcJQZZXcY4Lv3b93TZxiyWNz +FtApD0mpSPCzqrdsxacwOUBdrsTiXSZT8M4cIwhhqJQZugRiQOwfOHB3EgZxpzAY +XSUnpQIDAQABo4GlMIGiMA4GA1UdDwEB/wQEAwIBBjAPBgNVHRMBAf8EBTADAQH/ +MB0GA1UdDgQWBBS2CHsNesysIEyGVjJez6tuhS1wVzA/BgNVHR8EODA2MDSgMqAw +hi5odHRwOi8vd3d3Mi5wdWJsaWMtdHJ1c3QuY29tL2NybC9jdC9jdHJvb3QuY3Js +MB8GA1UdIwQYMBaAFLYIew16zKwgTIZWMl7Pq26FLXBXMA0GCSqGSIb3DQEBBQUA +A4IBAQBW7wojoFROlZfJ+InaRcHUowAl9B8Tq7ejhVhpwjCt2BWKLePJzYFa+HMj +Wqd8BfP9IjsO0QbE2zZMcwSO5bAi5MXzLqXZI+O4Tkogp24CJJ8iYGd7ix1yCcUx +XOl5n4BHPa2hCwcUPUf/A2kaDAtE52Mlp3+yybh2hO0j9n0Hq0V+09+zv+mKts2o +omcrUtW3ZfA5TGOgkXmTUg9U3YO7n9GPp1Nzw8v/MOx8BLjYRB+TX3EJIrduPuoc +A06dGiBh+4E37F78CkWr1+cXVdCg6mCbpvbjjFspwgZgFJ0tl0ypkxWdYcQBX0jW +WL1WMRJOEcgh4LMRkWXbtKaIOM5V +-----END CERTIFICATE----- + +# Issuer: O=Chunghwa Telecom Co., Ltd. OU=ePKI Root Certification Authority +# Subject: O=Chunghwa Telecom Co., Ltd. OU=ePKI Root Certification Authority +# Label: "ePKI Root Certification Authority" +# Serial: 28956088682735189655030529057352760477 +# MD5 Fingerprint: 1b:2e:00:ca:26:06:90:3d:ad:fe:6f:15:68:d3:6b:b3 +# SHA1 Fingerprint: 67:65:0d:f1:7e:8e:7e:5b:82:40:a4:f4:56:4b:cf:e2:3d:69:c6:f0 +# SHA256 Fingerprint: c0:a6:f4:dc:63:a2:4b:fd:cf:54:ef:2a:6a:08:2a:0a:72:de:35:80:3e:2f:f5:ff:52:7a:e5:d8:72:06:df:d5 +-----BEGIN CERTIFICATE----- +MIIFsDCCA5igAwIBAgIQFci9ZUdcr7iXAF7kBtK8nTANBgkqhkiG9w0BAQUFADBe +MQswCQYDVQQGEwJUVzEjMCEGA1UECgwaQ2h1bmdod2EgVGVsZWNvbSBDby4sIEx0 +ZC4xKjAoBgNVBAsMIWVQS0kgUm9vdCBDZXJ0aWZpY2F0aW9uIEF1dGhvcml0eTAe +Fw0wNDEyMjAwMjMxMjdaFw0zNDEyMjAwMjMxMjdaMF4xCzAJBgNVBAYTAlRXMSMw +IQYDVQQKDBpDaHVuZ2h3YSBUZWxlY29tIENvLiwgTHRkLjEqMCgGA1UECwwhZVBL +SSBSb290IENlcnRpZmljYXRpb24gQXV0aG9yaXR5MIICIjANBgkqhkiG9w0BAQEF +AAOCAg8AMIICCgKCAgEA4SUP7o3biDN1Z82tH306Tm2d0y8U82N0ywEhajfqhFAH +SyZbCUNsIZ5qyNUD9WBpj8zwIuQf5/dqIjG3LBXy4P4AakP/h2XGtRrBp0xtInAh +ijHyl3SJCRImHJ7K2RKilTza6We/CKBk49ZCt0Xvl/T29de1ShUCWH2YWEtgvM3X +DZoTM1PRYfl61dd4s5oz9wCGzh1NlDivqOx4UXCKXBCDUSH3ET00hl7lSM2XgYI1 +TBnsZfZrxQWh7kcT1rMhJ5QQCtkkO7q+RBNGMD+XPNjX12ruOzjjK9SXDrkb5wdJ +fzcq+Xd4z1TtW0ado4AOkUPB1ltfFLqfpo0kR0BZv3I4sjZsN/+Z0V0OWQqraffA +sgRFelQArr5T9rXn4fg8ozHSqf4hUmTFpmfwdQcGlBSBVcYn5AGPF8Fqcde+S/uU +WH1+ETOxQvdibBjWzwloPn9s9h6PYq2lY9sJpx8iQkEeb5mKPtf5P0B6ebClAZLS +nT0IFaUQAS2zMnaolQ2zepr7BxB4EW/hj8e6DyUadCrlHJhBmd8hh+iVBmoKs2pH +dmX2Os+PYhcZewoozRrSgx4hxyy/vv9haLdnG7t4TY3OZ+XkwY63I2binZB1NJip +NiuKmpS5nezMirH4JYlcWrYvjB9teSSnUmjDhDXiZo1jDiVN1Rmy5nk3pyKdVDEC +AwEAAaNqMGgwHQYDVR0OBBYEFB4M97Zn8uGSJglFwFU5Lnc/QkqiMAwGA1UdEwQF +MAMBAf8wOQYEZyoHAAQxMC8wLQIBADAJBgUrDgMCGgUAMAcGBWcqAwAABBRFsMLH +ClZ87lt4DJX5GFPBphzYEDANBgkqhkiG9w0BAQUFAAOCAgEACbODU1kBPpVJufGB +uvl2ICO1J2B01GqZNF5sAFPZn/KmsSQHRGoqxqWOeBLoR9lYGxMqXnmbnwoqZ6Yl +PwZpVnPDimZI+ymBV3QGypzqKOg4ZyYr8dW1P2WT+DZdjo2NQCCHGervJ8A9tDkP +JXtoUHRVnAxZfVo9QZQlUgjgRywVMRnVvwdVxrsStZf0X4OFunHB2WyBEXYKCrC/ +gpf36j36+uwtqSiUO1bd0lEursC9CBWMd1I0ltabrNMdjmEPNXubrjlpC2JgQCA2 +j6/7Nu4tCEoduL+bXPjqpRugc6bY+G7gMwRfaKonh+3ZwZCc7b3jajWvY9+rGNm6 +5ulK6lCKD2GTHuItGeIwlDWSXQ62B68ZgI9HkFFLLk3dheLSClIKF5r8GrBQAuUB +o2M3IUxExJtRmREOc5wGj1QupyheRDmHVi03vYVElOEMSyycw5KFNGHLD7ibSkNS +/jQ6fbjpKdx2qcgw+BRxgMYeNkh0IkFch4LoGHGLQYlE535YW6i4jRPpp2zDR+2z +Gp1iro2C6pSe3VkQw63d4k3jMdXH7OjysP6SHhYKGvzZ8/gntsm+HbRsZJB/9OTE +W9c3rkIO3aQab3yIVMUWbuF6aC74Or8NpDyJO3inTmODBCEIZ43ygknQW/2xzQ+D +hNQ+IIX3Sj0rnP0qCglN6oH4EZw= +-----END CERTIFICATE----- + +# Issuer: O=certSIGN OU=certSIGN ROOT CA +# Subject: O=certSIGN OU=certSIGN ROOT CA +# Label: "certSIGN ROOT CA" +# Serial: 35210227249154 +# MD5 Fingerprint: 18:98:c0:d6:e9:3a:fc:f9:b0:f5:0c:f7:4b:01:44:17 +# SHA1 Fingerprint: fa:b7:ee:36:97:26:62:fb:2d:b0:2a:f6:bf:03:fd:e8:7c:4b:2f:9b +# SHA256 Fingerprint: ea:a9:62:c4:fa:4a:6b:af:eb:e4:15:19:6d:35:1c:cd:88:8d:4f:53:f3:fa:8a:e6:d7:c4:66:a9:4e:60:42:bb +-----BEGIN CERTIFICATE----- +MIIDODCCAiCgAwIBAgIGIAYFFnACMA0GCSqGSIb3DQEBBQUAMDsxCzAJBgNVBAYT +AlJPMREwDwYDVQQKEwhjZXJ0U0lHTjEZMBcGA1UECxMQY2VydFNJR04gUk9PVCBD +QTAeFw0wNjA3MDQxNzIwMDRaFw0zMTA3MDQxNzIwMDRaMDsxCzAJBgNVBAYTAlJP +MREwDwYDVQQKEwhjZXJ0U0lHTjEZMBcGA1UECxMQY2VydFNJR04gUk9PVCBDQTCC +ASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoCggEBALczuX7IJUqOtdu0KBuqV5Do +0SLTZLrTk+jUrIZhQGpgV2hUhE28alQCBf/fm5oqrl0Hj0rDKH/v+yv6efHHrfAQ +UySQi2bJqIirr1qjAOm+ukbuW3N7LBeCgV5iLKECZbO9xSsAfsT8AzNXDe3i+s5d +RdY4zTW2ssHQnIFKquSyAVwdj1+ZxLGt24gh65AIgoDzMKND5pCCrlUoSe1b16kQ +OA7+j0xbm0bqQfWwCHTD0IgztnzXdN/chNFDDnU5oSVAKOp4yw4sLjmdjItuFhwv +JoIQ4uNllAoEwF73XVv4EOLQunpL+943AAAaWyjj0pxzPjKHmKHJUS/X3qwzs08C +AwEAAaNCMEAwDwYDVR0TAQH/BAUwAwEB/zAOBgNVHQ8BAf8EBAMCAcYwHQYDVR0O +BBYEFOCMm9slSbPxfIbWskKHC9BroNnkMA0GCSqGSIb3DQEBBQUAA4IBAQA+0hyJ +LjX8+HXd5n9liPRyTMks1zJO890ZeUe9jjtbkw9QSSQTaxQGcu8J06Gh40CEyecY +MnQ8SG4Pn0vU9x7Tk4ZkVJdjclDVVc/6IJMCopvDI5NOFlV2oHB5bc0hH88vLbwZ +44gx+FkagQnIl6Z0x2DEW8xXjrJ1/RsCCdtZb3KTafcxQdaIOL+Hsr0Wefmq5L6I +Jd1hJyMctTEHBDa0GpC9oHRxUIltvBTjD4au8as+x6AJzKNI0eDbZOeStc+vckNw +i/nDhDwTqn6Sm1dTk/pwwpEOMfmbZ13pljheX7NzTogVZ96edhBiIL5VaZVDADlN +9u6wWk5JRFRYX0KD +-----END CERTIFICATE----- + +# Issuer: CN=GeoTrust Primary Certification Authority - G3 O=GeoTrust Inc. OU=(c) 2008 GeoTrust Inc. - For authorized use only +# Subject: CN=GeoTrust Primary Certification Authority - G3 O=GeoTrust Inc. OU=(c) 2008 GeoTrust Inc. - For authorized use only +# Label: "GeoTrust Primary Certification Authority - G3" +# Serial: 28809105769928564313984085209975885599 +# MD5 Fingerprint: b5:e8:34:36:c9:10:44:58:48:70:6d:2e:83:d4:b8:05 +# SHA1 Fingerprint: 03:9e:ed:b8:0b:e7:a0:3c:69:53:89:3b:20:d2:d9:32:3a:4c:2a:fd +# SHA256 Fingerprint: b4:78:b8:12:25:0d:f8:78:63:5c:2a:a7:ec:7d:15:5e:aa:62:5e:e8:29:16:e2:cd:29:43:61:88:6c:d1:fb:d4 +-----BEGIN CERTIFICATE----- +MIID/jCCAuagAwIBAgIQFaxulBmyeUtB9iepwxgPHzANBgkqhkiG9w0BAQsFADCB +mDELMAkGA1UEBhMCVVMxFjAUBgNVBAoTDUdlb1RydXN0IEluYy4xOTA3BgNVBAsT +MChjKSAyMDA4IEdlb1RydXN0IEluYy4gLSBGb3IgYXV0aG9yaXplZCB1c2Ugb25s +eTE2MDQGA1UEAxMtR2VvVHJ1c3QgUHJpbWFyeSBDZXJ0aWZpY2F0aW9uIEF1dGhv +cml0eSAtIEczMB4XDTA4MDQwMjAwMDAwMFoXDTM3MTIwMTIzNTk1OVowgZgxCzAJ +BgNVBAYTAlVTMRYwFAYDVQQKEw1HZW9UcnVzdCBJbmMuMTkwNwYDVQQLEzAoYykg +MjAwOCBHZW9UcnVzdCBJbmMuIC0gRm9yIGF1dGhvcml6ZWQgdXNlIG9ubHkxNjA0 +BgNVBAMTLUdlb1RydXN0IFByaW1hcnkgQ2VydGlmaWNhdGlvbiBBdXRob3JpdHkg +LSBHMzCCASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoCggEBANziXmJYHTNXOTIz ++uvLh4yn1ErdBojqZI4xmKU4kB6Yzy5jK/BGvESyiaHAKAxJcCGVn2TAppMSAmUm +hsalifD614SgcK9PGpc/BkTVyetyEH3kMSj7HGHmKAdEc5IiaacDiGydY8hS2pgn +5whMcD60yRLBxWeDXTPzAxHsatBT4tG6NmCUgLthY2xbF37fQJQeqw3CIShwiP/W +JmxsYAQlTlV+fe+/lEjetx3dcI0FX4ilm/LC7urRQEFtYjgdVgbFA0dRIBn8exAL +DmKudlW/X3e+PkkBUz2YJQN2JFodtNuJ6nnltrM7P7pMKEF/BqxqjsHQ9gUdfeZC +huOl1UcCAwEAAaNCMEAwDwYDVR0TAQH/BAUwAwEB/zAOBgNVHQ8BAf8EBAMCAQYw +HQYDVR0OBBYEFMR5yo6hTgMdHNxr2zFblD4/MH8tMA0GCSqGSIb3DQEBCwUAA4IB +AQAtxRPPVoB7eni9n64smefv2t+UXglpp+duaIy9cr5HqQ6XErhK8WTTOd8lNNTB +zU6B8A8ExCSzNJbGpqow32hhc9f5joWJ7w5elShKKiePEI4ufIbEAp7aDHdlDkQN +kv39sxY2+hENHYwOB4lqKVb3cvTdFZx3NWZXqxNT2I7BQMXXExZacse3aQHEerGD +AWh9jUGhlBjBJVz88P6DAod8DQ3PLghcSkANPuyBYeYk28rgDi0Hsj5W3I31QYUH +SJsMC8tJP33st/3LjWeJGqvtux6jAAgIFyqCXDFdRootD4abdNlF+9RAsXqqaC2G +spki4cErx5z481+oghLrGREt +-----END CERTIFICATE----- + +# Issuer: CN=thawte Primary Root CA - G2 O=thawte, Inc. OU=(c) 2007 thawte, Inc. - For authorized use only +# Subject: CN=thawte Primary Root CA - G2 O=thawte, Inc. OU=(c) 2007 thawte, Inc. - For authorized use only +# Label: "thawte Primary Root CA - G2" +# Serial: 71758320672825410020661621085256472406 +# MD5 Fingerprint: 74:9d:ea:60:24:c4:fd:22:53:3e:cc:3a:72:d9:29:4f +# SHA1 Fingerprint: aa:db:bc:22:23:8f:c4:01:a1:27:bb:38:dd:f4:1d:db:08:9e:f0:12 +# SHA256 Fingerprint: a4:31:0d:50:af:18:a6:44:71:90:37:2a:86:af:af:8b:95:1f:fb:43:1d:83:7f:1e:56:88:b4:59:71:ed:15:57 +-----BEGIN CERTIFICATE----- +MIICiDCCAg2gAwIBAgIQNfwmXNmET8k9Jj1Xm67XVjAKBggqhkjOPQQDAzCBhDEL +MAkGA1UEBhMCVVMxFTATBgNVBAoTDHRoYXd0ZSwgSW5jLjE4MDYGA1UECxMvKGMp +IDIwMDcgdGhhd3RlLCBJbmMuIC0gRm9yIGF1dGhvcml6ZWQgdXNlIG9ubHkxJDAi +BgNVBAMTG3RoYXd0ZSBQcmltYXJ5IFJvb3QgQ0EgLSBHMjAeFw0wNzExMDUwMDAw +MDBaFw0zODAxMTgyMzU5NTlaMIGEMQswCQYDVQQGEwJVUzEVMBMGA1UEChMMdGhh +d3RlLCBJbmMuMTgwNgYDVQQLEy8oYykgMjAwNyB0aGF3dGUsIEluYy4gLSBGb3Ig +YXV0aG9yaXplZCB1c2Ugb25seTEkMCIGA1UEAxMbdGhhd3RlIFByaW1hcnkgUm9v +dCBDQSAtIEcyMHYwEAYHKoZIzj0CAQYFK4EEACIDYgAEotWcgnuVnfFSeIf+iha/ +BebfowJPDQfGAFG6DAJSLSKkQjnE/o/qycG+1E3/n3qe4rF8mq2nhglzh9HnmuN6 +papu+7qzcMBniKI11KOasf2twu8x+qi58/sIxpHR+ymVo0IwQDAPBgNVHRMBAf8E +BTADAQH/MA4GA1UdDwEB/wQEAwIBBjAdBgNVHQ4EFgQUmtgAMADna3+FGO6Lts6K +DPgR4bswCgYIKoZIzj0EAwMDaQAwZgIxAN344FdHW6fmCsO99YCKlzUNG4k8VIZ3 +KMqh9HneteY4sPBlcIx/AlTCv//YoT7ZzwIxAMSNlPzcU9LcnXgWHxUzI1NS41ox +XZ3Krr0TKUQNJ1uo52icEvdYPy5yAlejj6EULg== +-----END CERTIFICATE----- + +# Issuer: CN=thawte Primary Root CA - G3 O=thawte, Inc. OU=Certification Services Division/(c) 2008 thawte, Inc. - For authorized use only +# Subject: CN=thawte Primary Root CA - G3 O=thawte, Inc. OU=Certification Services Division/(c) 2008 thawte, Inc. - For authorized use only +# Label: "thawte Primary Root CA - G3" +# Serial: 127614157056681299805556476275995414779 +# MD5 Fingerprint: fb:1b:5d:43:8a:94:cd:44:c6:76:f2:43:4b:47:e7:31 +# SHA1 Fingerprint: f1:8b:53:8d:1b:e9:03:b6:a6:f0:56:43:5b:17:15:89:ca:f3:6b:f2 +# SHA256 Fingerprint: 4b:03:f4:58:07:ad:70:f2:1b:fc:2c:ae:71:c9:fd:e4:60:4c:06:4c:f5:ff:b6:86:ba:e5:db:aa:d7:fd:d3:4c +-----BEGIN CERTIFICATE----- +MIIEKjCCAxKgAwIBAgIQYAGXt0an6rS0mtZLL/eQ+zANBgkqhkiG9w0BAQsFADCB +rjELMAkGA1UEBhMCVVMxFTATBgNVBAoTDHRoYXd0ZSwgSW5jLjEoMCYGA1UECxMf +Q2VydGlmaWNhdGlvbiBTZXJ2aWNlcyBEaXZpc2lvbjE4MDYGA1UECxMvKGMpIDIw +MDggdGhhd3RlLCBJbmMuIC0gRm9yIGF1dGhvcml6ZWQgdXNlIG9ubHkxJDAiBgNV +BAMTG3RoYXd0ZSBQcmltYXJ5IFJvb3QgQ0EgLSBHMzAeFw0wODA0MDIwMDAwMDBa +Fw0zNzEyMDEyMzU5NTlaMIGuMQswCQYDVQQGEwJVUzEVMBMGA1UEChMMdGhhd3Rl +LCBJbmMuMSgwJgYDVQQLEx9DZXJ0aWZpY2F0aW9uIFNlcnZpY2VzIERpdmlzaW9u +MTgwNgYDVQQLEy8oYykgMjAwOCB0aGF3dGUsIEluYy4gLSBGb3IgYXV0aG9yaXpl +ZCB1c2Ugb25seTEkMCIGA1UEAxMbdGhhd3RlIFByaW1hcnkgUm9vdCBDQSAtIEcz +MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAsr8nLPvb2FvdeHsbnndm +gcs+vHyu86YnmjSjaDFxODNi5PNxZnmxqWWjpYvVj2AtP0LMqmsywCPLLEHd5N/8 +YZzic7IilRFDGF/Eth9XbAoFWCLINkw6fKXRz4aviKdEAhN0cXMKQlkC+BsUa0Lf +b1+6a4KinVvnSr0eAXLbS3ToO39/fR8EtCab4LRarEc9VbjXsCZSKAExQGbY2SS9 +9irY7CFJXJv2eul/VTV+lmuNk5Mny5K76qxAwJ/C+IDPXfRa3M50hqY+bAtTyr2S +zhkGcuYMXDhpxwTWvGzOW/b3aJzcJRVIiKHpqfiYnODz1TEoYRFsZ5aNOZnLwkUk +OQIDAQABo0IwQDAPBgNVHRMBAf8EBTADAQH/MA4GA1UdDwEB/wQEAwIBBjAdBgNV +HQ4EFgQUrWyqlGCc7eT/+j4KdCtjA/e2Wb8wDQYJKoZIhvcNAQELBQADggEBABpA +2JVlrAmSicY59BDlqQ5mU1143vokkbvnRFHfxhY0Cu9qRFHqKweKA3rD6z8KLFIW +oCtDuSWQP3CpMyVtRRooOyfPqsMpQhvfO0zAMzRbQYi/aytlryjvsvXDqmbOe1bu +t8jLZ8HJnBoYuMTDSQPxYA5QzUbF83d597YV4Djbxy8ooAw/dyZ02SUS2jHaGh7c +KUGRIjxpp7sC8rZcJwOJ9Abqm+RyguOhCcHpABnTPtRwa7pxpqpYrvS76Wy274fM +m7v/OeZWYdMKp8RcTGB7BXcmer/YB1IsYvdwY9k5vG8cwnncdimvzsUsZAReiDZu +MdRAGmI0Nj81Aa6sY6A= +-----END CERTIFICATE----- + +# Issuer: CN=GeoTrust Primary Certification Authority - G2 O=GeoTrust Inc. OU=(c) 2007 GeoTrust Inc. - For authorized use only +# Subject: CN=GeoTrust Primary Certification Authority - G2 O=GeoTrust Inc. OU=(c) 2007 GeoTrust Inc. - For authorized use only +# Label: "GeoTrust Primary Certification Authority - G2" +# Serial: 80682863203381065782177908751794619243 +# MD5 Fingerprint: 01:5e:d8:6b:bd:6f:3d:8e:a1:31:f8:12:e0:98:73:6a +# SHA1 Fingerprint: 8d:17:84:d5:37:f3:03:7d:ec:70:fe:57:8b:51:9a:99:e6:10:d7:b0 +# SHA256 Fingerprint: 5e:db:7a:c4:3b:82:a0:6a:87:61:e8:d7:be:49:79:eb:f2:61:1f:7d:d7:9b:f9:1c:1c:6b:56:6a:21:9e:d7:66 +-----BEGIN CERTIFICATE----- +MIICrjCCAjWgAwIBAgIQPLL0SAoA4v7rJDteYD7DazAKBggqhkjOPQQDAzCBmDEL +MAkGA1UEBhMCVVMxFjAUBgNVBAoTDUdlb1RydXN0IEluYy4xOTA3BgNVBAsTMChj +KSAyMDA3IEdlb1RydXN0IEluYy4gLSBGb3IgYXV0aG9yaXplZCB1c2Ugb25seTE2 +MDQGA1UEAxMtR2VvVHJ1c3QgUHJpbWFyeSBDZXJ0aWZpY2F0aW9uIEF1dGhvcml0 +eSAtIEcyMB4XDTA3MTEwNTAwMDAwMFoXDTM4MDExODIzNTk1OVowgZgxCzAJBgNV +BAYTAlVTMRYwFAYDVQQKEw1HZW9UcnVzdCBJbmMuMTkwNwYDVQQLEzAoYykgMjAw +NyBHZW9UcnVzdCBJbmMuIC0gRm9yIGF1dGhvcml6ZWQgdXNlIG9ubHkxNjA0BgNV +BAMTLUdlb1RydXN0IFByaW1hcnkgQ2VydGlmaWNhdGlvbiBBdXRob3JpdHkgLSBH +MjB2MBAGByqGSM49AgEGBSuBBAAiA2IABBWx6P0DFUPlrOuHNxFi79KDNlJ9RVcL +So17VDs6bl8VAsBQps8lL33KSLjHUGMcKiEIfJo22Av+0SbFWDEwKCXzXV2juLal +tJLtbCyf691DiaI8S0iRHVDsJt/WYC69IaNCMEAwDwYDVR0TAQH/BAUwAwEB/zAO +BgNVHQ8BAf8EBAMCAQYwHQYDVR0OBBYEFBVfNVdRVfslsq0DafwBo/q+EVXVMAoG +CCqGSM49BAMDA2cAMGQCMGSWWaboCd6LuvpaiIjwH5HTRqjySkwCY/tsXzjbLkGT +qQ7mndwxHLKgpxgceeHHNgIwOlavmnRs9vuD4DPTCF+hnMJbn0bWtsuRBmOiBucz +rD6ogRLQy7rQkgu2npaqBA+K +-----END CERTIFICATE----- + +# Issuer: CN=VeriSign Universal Root Certification Authority O=VeriSign, Inc. OU=VeriSign Trust Network/(c) 2008 VeriSign, Inc. - For authorized use only +# Subject: CN=VeriSign Universal Root Certification Authority O=VeriSign, Inc. OU=VeriSign Trust Network/(c) 2008 VeriSign, Inc. - For authorized use only +# Label: "VeriSign Universal Root Certification Authority" +# Serial: 85209574734084581917763752644031726877 +# MD5 Fingerprint: 8e:ad:b5:01:aa:4d:81:e4:8c:1d:d1:e1:14:00:95:19 +# SHA1 Fingerprint: 36:79:ca:35:66:87:72:30:4d:30:a5:fb:87:3b:0f:a7:7b:b7:0d:54 +# SHA256 Fingerprint: 23:99:56:11:27:a5:71:25:de:8c:ef:ea:61:0d:df:2f:a0:78:b5:c8:06:7f:4e:82:82:90:bf:b8:60:e8:4b:3c +-----BEGIN CERTIFICATE----- +MIIEuTCCA6GgAwIBAgIQQBrEZCGzEyEDDrvkEhrFHTANBgkqhkiG9w0BAQsFADCB +vTELMAkGA1UEBhMCVVMxFzAVBgNVBAoTDlZlcmlTaWduLCBJbmMuMR8wHQYDVQQL +ExZWZXJpU2lnbiBUcnVzdCBOZXR3b3JrMTowOAYDVQQLEzEoYykgMjAwOCBWZXJp +U2lnbiwgSW5jLiAtIEZvciBhdXRob3JpemVkIHVzZSBvbmx5MTgwNgYDVQQDEy9W +ZXJpU2lnbiBVbml2ZXJzYWwgUm9vdCBDZXJ0aWZpY2F0aW9uIEF1dGhvcml0eTAe +Fw0wODA0MDIwMDAwMDBaFw0zNzEyMDEyMzU5NTlaMIG9MQswCQYDVQQGEwJVUzEX +MBUGA1UEChMOVmVyaVNpZ24sIEluYy4xHzAdBgNVBAsTFlZlcmlTaWduIFRydXN0 +IE5ldHdvcmsxOjA4BgNVBAsTMShjKSAyMDA4IFZlcmlTaWduLCBJbmMuIC0gRm9y +IGF1dGhvcml6ZWQgdXNlIG9ubHkxODA2BgNVBAMTL1ZlcmlTaWduIFVuaXZlcnNh +bCBSb290IENlcnRpZmljYXRpb24gQXV0aG9yaXR5MIIBIjANBgkqhkiG9w0BAQEF +AAOCAQ8AMIIBCgKCAQEAx2E3XrEBNNti1xWb/1hajCMj1mCOkdeQmIN65lgZOIzF +9uVkhbSicfvtvbnazU0AtMgtc6XHaXGVHzk8skQHnOgO+k1KxCHfKWGPMiJhgsWH +H26MfF8WIFFE0XBPV+rjHOPMee5Y2A7Cs0WTwCznmhcrewA3ekEzeOEz4vMQGn+H +LL729fdC4uW/h2KJXwBL38Xd5HVEMkE6HnFuacsLdUYI0crSK5XQz/u5QGtkjFdN +/BMReYTtXlT2NJ8IAfMQJQYXStrxHXpma5hgZqTZ79IugvHw7wnqRMkVauIDbjPT +rJ9VAMf2CGqUuV/c4DPxhGD5WycRtPwW8rtWaoAljQIDAQABo4GyMIGvMA8GA1Ud +EwEB/wQFMAMBAf8wDgYDVR0PAQH/BAQDAgEGMG0GCCsGAQUFBwEMBGEwX6FdoFsw +WTBXMFUWCWltYWdlL2dpZjAhMB8wBwYFKw4DAhoEFI/l0xqGrI2Oa8PPgGrUSBgs +exkuMCUWI2h0dHA6Ly9sb2dvLnZlcmlzaWduLmNvbS92c2xvZ28uZ2lmMB0GA1Ud +DgQWBBS2d/ppSEefUxLVwuoHMnYH0ZcHGTANBgkqhkiG9w0BAQsFAAOCAQEASvj4 +sAPmLGd75JR3Y8xuTPl9Dg3cyLk1uXBPY/ok+myDjEedO2Pzmvl2MpWRsXe8rJq+ +seQxIcaBlVZaDrHC1LGmWazxY8u4TB1ZkErvkBYoH1quEPuBUDgMbMzxPcP1Y+Oz +4yHJJDnp/RVmRvQbEdBNc6N9Rvk97ahfYtTxP/jgdFcrGJ2BtMQo2pSXpXDrrB2+ +BxHw1dvd5Yzw1TKwg+ZX4o+/vqGqvz0dtdQ46tewXDpPaj+PwGZsY6rp2aQW9IHR +lRQOfc2VNNnSj3BzgXucfr2YYdhFh5iQxeuGMMY1v/D/w1WIg0vvBZIGcfK4mJO3 +7M2CYfE45k+XmCpajQ== +-----END CERTIFICATE----- + +# Issuer: CN=VeriSign Class 3 Public Primary Certification Authority - G4 O=VeriSign, Inc. OU=VeriSign Trust Network/(c) 2007 VeriSign, Inc. - For authorized use only +# Subject: CN=VeriSign Class 3 Public Primary Certification Authority - G4 O=VeriSign, Inc. OU=VeriSign Trust Network/(c) 2007 VeriSign, Inc. - For authorized use only +# Label: "VeriSign Class 3 Public Primary Certification Authority - G4" +# Serial: 63143484348153506665311985501458640051 +# MD5 Fingerprint: 3a:52:e1:e7:fd:6f:3a:e3:6f:f3:6f:99:1b:f9:22:41 +# SHA1 Fingerprint: 22:d5:d8:df:8f:02:31:d1:8d:f7:9d:b7:cf:8a:2d:64:c9:3f:6c:3a +# SHA256 Fingerprint: 69:dd:d7:ea:90:bb:57:c9:3e:13:5d:c8:5e:a6:fc:d5:48:0b:60:32:39:bd:c4:54:fc:75:8b:2a:26:cf:7f:79 +-----BEGIN CERTIFICATE----- +MIIDhDCCAwqgAwIBAgIQL4D+I4wOIg9IZxIokYesszAKBggqhkjOPQQDAzCByjEL +MAkGA1UEBhMCVVMxFzAVBgNVBAoTDlZlcmlTaWduLCBJbmMuMR8wHQYDVQQLExZW +ZXJpU2lnbiBUcnVzdCBOZXR3b3JrMTowOAYDVQQLEzEoYykgMjAwNyBWZXJpU2ln +biwgSW5jLiAtIEZvciBhdXRob3JpemVkIHVzZSBvbmx5MUUwQwYDVQQDEzxWZXJp +U2lnbiBDbGFzcyAzIFB1YmxpYyBQcmltYXJ5IENlcnRpZmljYXRpb24gQXV0aG9y +aXR5IC0gRzQwHhcNMDcxMTA1MDAwMDAwWhcNMzgwMTE4MjM1OTU5WjCByjELMAkG +A1UEBhMCVVMxFzAVBgNVBAoTDlZlcmlTaWduLCBJbmMuMR8wHQYDVQQLExZWZXJp +U2lnbiBUcnVzdCBOZXR3b3JrMTowOAYDVQQLEzEoYykgMjAwNyBWZXJpU2lnbiwg +SW5jLiAtIEZvciBhdXRob3JpemVkIHVzZSBvbmx5MUUwQwYDVQQDEzxWZXJpU2ln +biBDbGFzcyAzIFB1YmxpYyBQcmltYXJ5IENlcnRpZmljYXRpb24gQXV0aG9yaXR5 +IC0gRzQwdjAQBgcqhkjOPQIBBgUrgQQAIgNiAASnVnp8Utpkmw4tXNherJI9/gHm +GUo9FANL+mAnINmDiWn6VMaaGF5VKmTeBvaNSjutEDxlPZCIBIngMGGzrl0Bp3ve +fLK+ymVhAIau2o970ImtTR1ZmkGxvEeA3J5iw/mjgbIwga8wDwYDVR0TAQH/BAUw +AwEB/zAOBgNVHQ8BAf8EBAMCAQYwbQYIKwYBBQUHAQwEYTBfoV2gWzBZMFcwVRYJ +aW1hZ2UvZ2lmMCEwHzAHBgUrDgMCGgQUj+XTGoasjY5rw8+AatRIGCx7GS4wJRYj +aHR0cDovL2xvZ28udmVyaXNpZ24uY29tL3ZzbG9nby5naWYwHQYDVR0OBBYEFLMW +kf3upm7ktS5Jj4d4gYDs5bG1MAoGCCqGSM49BAMDA2gAMGUCMGYhDBgmYFo4e1ZC +4Kf8NoRRkSAsdk1DPcQdhCPQrNZ8NQbOzWm9kA3bbEhCHQ6qQgIxAJw9SDkjOVga +FRJZap7v1VmyHVIsmXHNxynfGyphe3HR3vPA5Q06Sqotp9iGKt0uEA== +-----END CERTIFICATE----- + +# Issuer: CN=NetLock Arany (Class Gold) F\u0151tan\xfas\xedtv\xe1ny O=NetLock Kft. OU=Tan\xfas\xedtv\xe1nykiad\xf3k (Certification Services) +# Subject: CN=NetLock Arany (Class Gold) F\u0151tan\xfas\xedtv\xe1ny O=NetLock Kft. OU=Tan\xfas\xedtv\xe1nykiad\xf3k (Certification Services) +# Label: "NetLock Arany (Class Gold) F\u0151tan\xfas\xedtv\xe1ny" +# Serial: 80544274841616 +# MD5 Fingerprint: c5:a1:b7:ff:73:dd:d6:d7:34:32:18:df:fc:3c:ad:88 +# SHA1 Fingerprint: 06:08:3f:59:3f:15:a1:04:a0:69:a4:6b:a9:03:d0:06:b7:97:09:91 +# SHA256 Fingerprint: 6c:61:da:c3:a2:de:f0:31:50:6b:e0:36:d2:a6:fe:40:19:94:fb:d1:3d:f9:c8:d4:66:59:92:74:c4:46:ec:98 +-----BEGIN CERTIFICATE----- +MIIEFTCCAv2gAwIBAgIGSUEs5AAQMA0GCSqGSIb3DQEBCwUAMIGnMQswCQYDVQQG +EwJIVTERMA8GA1UEBwwIQnVkYXBlc3QxFTATBgNVBAoMDE5ldExvY2sgS2Z0LjE3 +MDUGA1UECwwuVGFuw7pzw610dsOhbnlraWFkw7NrIChDZXJ0aWZpY2F0aW9uIFNl +cnZpY2VzKTE1MDMGA1UEAwwsTmV0TG9jayBBcmFueSAoQ2xhc3MgR29sZCkgRsWR +dGFuw7pzw610dsOhbnkwHhcNMDgxMjExMTUwODIxWhcNMjgxMjA2MTUwODIxWjCB +pzELMAkGA1UEBhMCSFUxETAPBgNVBAcMCEJ1ZGFwZXN0MRUwEwYDVQQKDAxOZXRM +b2NrIEtmdC4xNzA1BgNVBAsMLlRhbsO6c8OtdHbDoW55a2lhZMOzayAoQ2VydGlm +aWNhdGlvbiBTZXJ2aWNlcykxNTAzBgNVBAMMLE5ldExvY2sgQXJhbnkgKENsYXNz +IEdvbGQpIEbFkXRhbsO6c8OtdHbDoW55MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8A +MIIBCgKCAQEAxCRec75LbRTDofTjl5Bu0jBFHjzuZ9lk4BqKf8owyoPjIMHj9DrT +lF8afFttvzBPhCf2nx9JvMaZCpDyD/V/Q4Q3Y1GLeqVw/HpYzY6b7cNGbIRwXdrz +AZAj/E4wqX7hJ2Pn7WQ8oLjJM2P+FpD/sLj916jAwJRDC7bVWaaeVtAkH3B5r9s5 +VA1lddkVQZQBr17s9o3x/61k/iCa11zr/qYfCGSji3ZVrR47KGAuhyXoqq8fxmRG +ILdwfzzeSNuWU7c5d+Qa4scWhHaXWy+7GRWF+GmF9ZmnqfI0p6m2pgP8b4Y9VHx2 +BJtr+UBdADTHLpl1neWIA6pN+APSQnbAGwIDAKiLo0UwQzASBgNVHRMBAf8ECDAG +AQH/AgEEMA4GA1UdDwEB/wQEAwIBBjAdBgNVHQ4EFgQUzPpnk/C2uNClwB7zU/2M +U9+D15YwDQYJKoZIhvcNAQELBQADggEBAKt/7hwWqZw8UQCgwBEIBaeZ5m8BiFRh +bvG5GK1Krf6BQCOUL/t1fC8oS2IkgYIL9WHxHG64YTjrgfpioTtaYtOUZcTh5m2C ++C8lcLIhJsFyUR+MLMOEkMNaj7rP9KdlpeuY0fsFskZ1FSNqb4VjMIDw1Z4fKRzC +bLBQWV2QWzuoDTDPv31/zvGdg73JRm4gpvlhUbohL3u+pRVjodSVh/GeufOJ8z2F +uLjbvrW5KfnaNwUASZQDhETnv0Mxz3WLJdH0pmT1kvarBes96aULNmLazAZfNou2 +XjG4Kvte9nHfRCaexOYNkbQudZWAUWpLMKawYqGT8ZvYzsRjdT9ZR7E= +-----END CERTIFICATE----- + +# Issuer: CN=Staat der Nederlanden Root CA - G2 O=Staat der Nederlanden +# Subject: CN=Staat der Nederlanden Root CA - G2 O=Staat der Nederlanden +# Label: "Staat der Nederlanden Root CA - G2" +# Serial: 10000012 +# MD5 Fingerprint: 7c:a5:0f:f8:5b:9a:7d:6d:30:ae:54:5a:e3:42:a2:8a +# SHA1 Fingerprint: 59:af:82:79:91:86:c7:b4:75:07:cb:cf:03:57:46:eb:04:dd:b7:16 +# SHA256 Fingerprint: 66:8c:83:94:7d:a6:3b:72:4b:ec:e1:74:3c:31:a0:e6:ae:d0:db:8e:c5:b3:1b:e3:77:bb:78:4f:91:b6:71:6f +-----BEGIN CERTIFICATE----- +MIIFyjCCA7KgAwIBAgIEAJiWjDANBgkqhkiG9w0BAQsFADBaMQswCQYDVQQGEwJO +TDEeMBwGA1UECgwVU3RhYXQgZGVyIE5lZGVybGFuZGVuMSswKQYDVQQDDCJTdGFh +dCBkZXIgTmVkZXJsYW5kZW4gUm9vdCBDQSAtIEcyMB4XDTA4MDMyNjExMTgxN1oX +DTIwMDMyNTExMDMxMFowWjELMAkGA1UEBhMCTkwxHjAcBgNVBAoMFVN0YWF0IGRl +ciBOZWRlcmxhbmRlbjErMCkGA1UEAwwiU3RhYXQgZGVyIE5lZGVybGFuZGVuIFJv +b3QgQ0EgLSBHMjCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAMVZ5291 +qj5LnLW4rJ4L5PnZyqtdj7U5EILXr1HgO+EASGrP2uEGQxGZqhQlEq0i6ABtQ8Sp +uOUfiUtnvWFI7/3S4GCI5bkYYCjDdyutsDeqN95kWSpGV+RLufg3fNU254DBtvPU +Z5uW6M7XxgpT0GtJlvOjCwV3SPcl5XCsMBQgJeN/dVrlSPhOewMHBPqCYYdu8DvE +pMfQ9XQ+pV0aCPKbJdL2rAQmPlU6Yiile7Iwr/g3wtG61jj99O9JMDeZJiFIhQGp +5Rbn3JBV3w/oOM2ZNyFPXfUib2rFEhZgF1XyZWampzCROME4HYYEhLoaJXhena/M +UGDWE4dS7WMfbWV9whUYdMrhfmQpjHLYFhN9C0lK8SgbIHRrxT3dsKpICT0ugpTN +GmXZK4iambwYfp/ufWZ8Pr2UuIHOzZgweMFvZ9C+X+Bo7d7iscksWXiSqt8rYGPy +5V6548r6f1CGPqI0GAwJaCgRHOThuVw+R7oyPxjMW4T182t0xHJ04eOLoEq9jWYv +6q012iDTiIJh8BIitrzQ1aTsr1SIJSQ8p22xcik/Plemf1WvbibG/ufMQFxRRIEK +eN5KzlW/HdXZt1bv8Hb/C3m1r737qWmRRpdogBQ2HbN/uymYNqUg+oJgYjOk7Na6 +B6duxc8UpufWkjTYgfX8HV2qXB72o007uPc5AgMBAAGjgZcwgZQwDwYDVR0TAQH/ +BAUwAwEB/zBSBgNVHSAESzBJMEcGBFUdIAAwPzA9BggrBgEFBQcCARYxaHR0cDov +L3d3dy5wa2lvdmVyaGVpZC5ubC9wb2xpY2llcy9yb290LXBvbGljeS1HMjAOBgNV +HQ8BAf8EBAMCAQYwHQYDVR0OBBYEFJFoMocVHYnitfGsNig0jQt8YojrMA0GCSqG +SIb3DQEBCwUAA4ICAQCoQUpnKpKBglBu4dfYszk78wIVCVBR7y29JHuIhjv5tLyS +CZa59sCrI2AGeYwRTlHSeYAz+51IvuxBQ4EffkdAHOV6CMqqi3WtFMTC6GY8ggen +5ieCWxjmD27ZUD6KQhgpxrRW/FYQoAUXvQwjf/ST7ZwaUb7dRUG/kSS0H4zpX897 +IZmflZ85OkYcbPnNe5yQzSipx6lVu6xiNGI1E0sUOlWDuYaNkqbG9AclVMwWVxJK +gnjIFNkXgiYtXSAfea7+1HAWFpWD2DU5/1JddRwWxRNVz0fMdWVSSt7wsKfkCpYL ++63C4iWEst3kvX5ZbJvw8NjnyvLplzh+ib7M+zkXYT9y2zqR2GUBGR2tUKRXCnxL +vJxxcypFURmFzI79R6d0lR2o0a9OF7FpJsKqeFdbxU2n5Z4FF5TKsl+gSRiNNOkm +bEgeqmiSBeGCc1qb3AdbCG19ndeNIdn8FCCqwkXfP+cAslHkwvgFuXkajDTznlvk +N1trSt8sV4pAWja63XVECDdCcAz+3F4hoKOKwJCcaNpQ5kUQR3i2TtJlycM33+FC +Y7BXN0Ute4qcvwXqZVUz9zkQxSgqIXobisQk+T8VyJoVIPVVYpbtbZNQvOSqeK3Z +ywplh6ZmwcSBo3c6WB4L7oOLnR7SUqTMHW+wmG2UMbX4cQrcufx9MmDm66+KAQ== +-----END CERTIFICATE----- + +# Issuer: CN=Hongkong Post Root CA 1 O=Hongkong Post +# Subject: CN=Hongkong Post Root CA 1 O=Hongkong Post +# Label: "Hongkong Post Root CA 1" +# Serial: 1000 +# MD5 Fingerprint: a8:0d:6f:39:78:b9:43:6d:77:42:6d:98:5a:cc:23:ca +# SHA1 Fingerprint: d6:da:a8:20:8d:09:d2:15:4d:24:b5:2f:cb:34:6e:b2:58:b2:8a:58 +# SHA256 Fingerprint: f9:e6:7d:33:6c:51:00:2a:c0:54:c6:32:02:2d:66:dd:a2:e7:e3:ff:f1:0a:d0:61:ed:31:d8:bb:b4:10:cf:b2 +-----BEGIN CERTIFICATE----- +MIIDMDCCAhigAwIBAgICA+gwDQYJKoZIhvcNAQEFBQAwRzELMAkGA1UEBhMCSEsx +FjAUBgNVBAoTDUhvbmdrb25nIFBvc3QxIDAeBgNVBAMTF0hvbmdrb25nIFBvc3Qg +Um9vdCBDQSAxMB4XDTAzMDUxNTA1MTMxNFoXDTIzMDUxNTA0NTIyOVowRzELMAkG +A1UEBhMCSEsxFjAUBgNVBAoTDUhvbmdrb25nIFBvc3QxIDAeBgNVBAMTF0hvbmdr +b25nIFBvc3QgUm9vdCBDQSAxMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKC +AQEArP84tulmAknjorThkPlAj3n54r15/gK97iSSHSL22oVyaf7XPwnU3ZG1ApzQ +jVrhVcNQhrkpJsLj2aDxaQMoIIBFIi1WpztUlVYiWR8o3x8gPW2iNr4joLFutbEn +PzlTCeqrauh0ssJlXI6/fMN4hM2eFvz1Lk8gKgifd/PFHsSaUmYeSF7jEAaPIpjh +ZY4bXSNmO7ilMlHIhqqhqZ5/dpTCpmy3QfDVyAY45tQM4vM7TG1QjMSDJ8EThFk9 +nnV0ttgCXjqQesBCNnLsak3c78QA3xMYV18meMjWCnl3v/evt3a5pQuEF10Q6m/h +q5URX208o1xNg1vysxmKgIsLhwIDAQABoyYwJDASBgNVHRMBAf8ECDAGAQH/AgED +MA4GA1UdDwEB/wQEAwIBxjANBgkqhkiG9w0BAQUFAAOCAQEADkbVPK7ih9legYsC +mEEIjEy82tvuJxuC52pF7BaLT4Wg87JwvVqWuspube5Gi27nKi6Wsxkz67SfqLI3 +7piol7Yutmcn1KZJ/RyTZXaeQi/cImyaT/JaFTmxcdcrUehtHJjA2Sr0oYJ71clB +oiMBdDhViw+5LmeiIAQ32pwL0xch4I+XeTRvhEgCIDMb5jREn5Fw9IBehEPCKdJs +EhTkYY2sEJCehFC78JZvRZ+K88psT/oROhUVRsPNH4NbLUES7VBnQRM9IauUiqpO +fMGx+6fWtScvl6tu4B3i0RwsH0Ti/L6RoZz71ilTc4afU9hDDl3WY4JxHYB0yvbi +AmvZWg== +-----END CERTIFICATE----- + +# Issuer: CN=SecureSign RootCA11 O=Japan Certification Services, Inc. +# Subject: CN=SecureSign RootCA11 O=Japan Certification Services, Inc. +# Label: "SecureSign RootCA11" +# Serial: 1 +# MD5 Fingerprint: b7:52:74:e2:92:b4:80:93:f2:75:e4:cc:d7:f2:ea:26 +# SHA1 Fingerprint: 3b:c4:9f:48:f8:f3:73:a0:9c:1e:bd:f8:5b:b1:c3:65:c7:d8:11:b3 +# SHA256 Fingerprint: bf:0f:ee:fb:9e:3a:58:1a:d5:f9:e9:db:75:89:98:57:43:d2:61:08:5c:4d:31:4f:6f:5d:72:59:aa:42:16:12 +-----BEGIN CERTIFICATE----- +MIIDbTCCAlWgAwIBAgIBATANBgkqhkiG9w0BAQUFADBYMQswCQYDVQQGEwJKUDEr +MCkGA1UEChMiSmFwYW4gQ2VydGlmaWNhdGlvbiBTZXJ2aWNlcywgSW5jLjEcMBoG +A1UEAxMTU2VjdXJlU2lnbiBSb290Q0ExMTAeFw0wOTA0MDgwNDU2NDdaFw0yOTA0 +MDgwNDU2NDdaMFgxCzAJBgNVBAYTAkpQMSswKQYDVQQKEyJKYXBhbiBDZXJ0aWZp +Y2F0aW9uIFNlcnZpY2VzLCBJbmMuMRwwGgYDVQQDExNTZWN1cmVTaWduIFJvb3RD +QTExMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEA/XeqpRyQBTvLTJsz +i1oURaTnkBbR31fSIRCkF/3frNYfp+TbfPfs37gD2pRY/V1yfIw/XwFndBWW4wI8 +h9uuywGOwvNmxoVF9ALGOrVisq/6nL+k5tSAMJjzDbaTj6nU2DbysPyKyiyhFTOV +MdrAG/LuYpmGYz+/3ZMqg6h2uRMft85OQoWPIucuGvKVCbIFtUROd6EgvanyTgp9 +UK31BQ1FT0Zx/Sg+U/sE2C3XZR1KG/rPO7AxmjVuyIsG0wCR8pQIZUyxNAYAeoni +8McDWc/V1uinMrPmmECGxc0nEovMe863ETxiYAcjPitAbpSACW22s293bzUIUPsC +h8U+iQIDAQABo0IwQDAdBgNVHQ4EFgQUW/hNT7KlhtQ60vFjmqC+CfZXt94wDgYD +VR0PAQH/BAQDAgEGMA8GA1UdEwEB/wQFMAMBAf8wDQYJKoZIhvcNAQEFBQADggEB +AKChOBZmLqdWHyGcBvod7bkixTgm2E5P7KN/ed5GIaGHd48HCJqypMWvDzKYC3xm +KbabfSVSSUOrTC4rbnpwrxYO4wJs+0LmGJ1F2FXI6Dvd5+H0LgscNFxsWEr7jIhQ +X5Ucv+2rIrVls4W6ng+4reV6G4pQOh29Dbx7VFALuUKvVaAYga1lme++5Jy/xIWr +QbJUb9wlze144o4MjQlJ3WN7WmmWAiGovVJZ6X01y8hSyn+B/tlr0/cR7SXf+Of5 +pPpyl4RTDaXQMhhRdlkUbA/r7F+AjHVDg8OFmP9Mni0N5HeDk061lgeLKBObjBmN +QSdJQO7e5iNEOdyhIta6A/I= +-----END CERTIFICATE----- + +# Issuer: CN=Microsec e-Szigno Root CA 2009 O=Microsec Ltd. +# Subject: CN=Microsec e-Szigno Root CA 2009 O=Microsec Ltd. +# Label: "Microsec e-Szigno Root CA 2009" +# Serial: 14014712776195784473 +# MD5 Fingerprint: f8:49:f4:03:bc:44:2d:83:be:48:69:7d:29:64:fc:b1 +# SHA1 Fingerprint: 89:df:74:fe:5c:f4:0f:4a:80:f9:e3:37:7d:54:da:91:e1:01:31:8e +# SHA256 Fingerprint: 3c:5f:81:fe:a5:fa:b8:2c:64:bf:a2:ea:ec:af:cd:e8:e0:77:fc:86:20:a7:ca:e5:37:16:3d:f3:6e:db:f3:78 +-----BEGIN CERTIFICATE----- +MIIECjCCAvKgAwIBAgIJAMJ+QwRORz8ZMA0GCSqGSIb3DQEBCwUAMIGCMQswCQYD +VQQGEwJIVTERMA8GA1UEBwwIQnVkYXBlc3QxFjAUBgNVBAoMDU1pY3Jvc2VjIEx0 +ZC4xJzAlBgNVBAMMHk1pY3Jvc2VjIGUtU3ppZ25vIFJvb3QgQ0EgMjAwOTEfMB0G +CSqGSIb3DQEJARYQaW5mb0BlLXN6aWduby5odTAeFw0wOTA2MTYxMTMwMThaFw0y +OTEyMzAxMTMwMThaMIGCMQswCQYDVQQGEwJIVTERMA8GA1UEBwwIQnVkYXBlc3Qx +FjAUBgNVBAoMDU1pY3Jvc2VjIEx0ZC4xJzAlBgNVBAMMHk1pY3Jvc2VjIGUtU3pp +Z25vIFJvb3QgQ0EgMjAwOTEfMB0GCSqGSIb3DQEJARYQaW5mb0BlLXN6aWduby5o +dTCCASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoCggEBAOn4j/NjrdqG2KfgQvvP +kd6mJviZpWNwrZuuyjNAfW2WbqEORO7hE52UQlKavXWFdCyoDh2Tthi3jCyoz/tc +cbna7P7ofo/kLx2yqHWH2Leh5TvPmUpG0IMZfcChEhyVbUr02MelTTMuhTlAdX4U +fIASmFDHQWe4oIBhVKZsTh/gnQ4H6cm6M+f+wFUoLAKApxn1ntxVUwOXewdI/5n7 +N4okxFnMUBBjjqqpGrCEGob5X7uxUG6k0QrM1XF+H6cbfPVTbiJfyyvm1HxdrtbC +xkzlBQHZ7Vf8wSN5/PrIJIOV87VqUQHQd9bpEqH5GoP7ghu5sJf0dgYzQ0mg/wu1 ++rUCAwEAAaOBgDB+MA8GA1UdEwEB/wQFMAMBAf8wDgYDVR0PAQH/BAQDAgEGMB0G +A1UdDgQWBBTLD8bfQkPMPcu1SCOhGnqmKrs0aDAfBgNVHSMEGDAWgBTLD8bfQkPM +Pcu1SCOhGnqmKrs0aDAbBgNVHREEFDASgRBpbmZvQGUtc3ppZ25vLmh1MA0GCSqG +SIb3DQEBCwUAA4IBAQDJ0Q5eLtXMs3w+y/w9/w0olZMEyL/azXm4Q5DwpL7v8u8h +mLzU1F0G9u5C7DBsoKqpyvGvivo/C3NqPuouQH4frlRheesuCDfXI/OMn74dseGk +ddug4lQUsbocKaQY9hK6ohQU4zE1yED/t+AFdlfBHFny+L/k7SViXITwfn4fs775 +tyERzAMBVnCnEJIeGzSBHq2cGsMEPO0CYdYeBvNfOofyK/FFh+U9rNHHV4S9a67c +2Pm2G2JwCz02yULyMtd6YebS2z3PyKnJm9zbWETXbzivf3jTo60adbocwTZ8jx5t +HMN1Rq41Bab2XD0h7lbwyYIiLXpUq3DDfSJlgnCW +-----END CERTIFICATE----- + +# Issuer: CN=GlobalSign O=GlobalSign OU=GlobalSign Root CA - R3 +# Subject: CN=GlobalSign O=GlobalSign OU=GlobalSign Root CA - R3 +# Label: "GlobalSign Root CA - R3" +# Serial: 4835703278459759426209954 +# MD5 Fingerprint: c5:df:b8:49:ca:05:13:55:ee:2d:ba:1a:c3:3e:b0:28 +# SHA1 Fingerprint: d6:9b:56:11:48:f0:1c:77:c5:45:78:c1:09:26:df:5b:85:69:76:ad +# SHA256 Fingerprint: cb:b5:22:d7:b7:f1:27:ad:6a:01:13:86:5b:df:1c:d4:10:2e:7d:07:59:af:63:5a:7c:f4:72:0d:c9:63:c5:3b +-----BEGIN CERTIFICATE----- +MIIDXzCCAkegAwIBAgILBAAAAAABIVhTCKIwDQYJKoZIhvcNAQELBQAwTDEgMB4G +A1UECxMXR2xvYmFsU2lnbiBSb290IENBIC0gUjMxEzARBgNVBAoTCkdsb2JhbFNp +Z24xEzARBgNVBAMTCkdsb2JhbFNpZ24wHhcNMDkwMzE4MTAwMDAwWhcNMjkwMzE4 +MTAwMDAwWjBMMSAwHgYDVQQLExdHbG9iYWxTaWduIFJvb3QgQ0EgLSBSMzETMBEG +A1UEChMKR2xvYmFsU2lnbjETMBEGA1UEAxMKR2xvYmFsU2lnbjCCASIwDQYJKoZI +hvcNAQEBBQADggEPADCCAQoCggEBAMwldpB5BngiFvXAg7aEyiie/QV2EcWtiHL8 +RgJDx7KKnQRfJMsuS+FggkbhUqsMgUdwbN1k0ev1LKMPgj0MK66X17YUhhB5uzsT +gHeMCOFJ0mpiLx9e+pZo34knlTifBtc+ycsmWQ1z3rDI6SYOgxXG71uL0gRgykmm +KPZpO/bLyCiR5Z2KYVc3rHQU3HTgOu5yLy6c+9C7v/U9AOEGM+iCK65TpjoWc4zd +QQ4gOsC0p6Hpsk+QLjJg6VfLuQSSaGjlOCZgdbKfd/+RFO+uIEn8rUAVSNECMWEZ +XriX7613t2Saer9fwRPvm2L7DWzgVGkWqQPabumDk3F2xmmFghcCAwEAAaNCMEAw +DgYDVR0PAQH/BAQDAgEGMA8GA1UdEwEB/wQFMAMBAf8wHQYDVR0OBBYEFI/wS3+o +LkUkrk1Q+mOai97i3Ru8MA0GCSqGSIb3DQEBCwUAA4IBAQBLQNvAUKr+yAzv95ZU +RUm7lgAJQayzE4aGKAczymvmdLm6AC2upArT9fHxD4q/c2dKg8dEe3jgr25sbwMp +jjM5RcOO5LlXbKr8EpbsU8Yt5CRsuZRj+9xTaGdWPoO4zzUhw8lo/s7awlOqzJCK +6fBdRoyV3XpYKBovHd7NADdBj+1EbddTKJd+82cEHhXXipa0095MJ6RMG3NzdvQX +mcIfeg7jLQitChws/zyrVQ4PkX4268NXSb7hLi18YIvDQVETI53O9zJrlAGomecs +Mx86OyXShkDOOyyGeMlhLxS67ttVb9+E7gUJTb0o2HLO02JQZR7rkpeDMdmztcpH +WD9f +-----END CERTIFICATE----- + +# Issuer: CN=Autoridad de Certificacion Firmaprofesional CIF A62634068 +# Subject: CN=Autoridad de Certificacion Firmaprofesional CIF A62634068 +# Label: "Autoridad de Certificacion Firmaprofesional CIF A62634068" +# Serial: 6047274297262753887 +# MD5 Fingerprint: 73:3a:74:7a:ec:bb:a3:96:a6:c2:e4:e2:c8:9b:c0:c3 +# SHA1 Fingerprint: ae:c5:fb:3f:c8:e1:bf:c4:e5:4f:03:07:5a:9a:e8:00:b7:f7:b6:fa +# SHA256 Fingerprint: 04:04:80:28:bf:1f:28:64:d4:8f:9a:d4:d8:32:94:36:6a:82:88:56:55:3f:3b:14:30:3f:90:14:7f:5d:40:ef +-----BEGIN CERTIFICATE----- +MIIGFDCCA/ygAwIBAgIIU+w77vuySF8wDQYJKoZIhvcNAQEFBQAwUTELMAkGA1UE +BhMCRVMxQjBABgNVBAMMOUF1dG9yaWRhZCBkZSBDZXJ0aWZpY2FjaW9uIEZpcm1h +cHJvZmVzaW9uYWwgQ0lGIEE2MjYzNDA2ODAeFw0wOTA1MjAwODM4MTVaFw0zMDEy +MzEwODM4MTVaMFExCzAJBgNVBAYTAkVTMUIwQAYDVQQDDDlBdXRvcmlkYWQgZGUg +Q2VydGlmaWNhY2lvbiBGaXJtYXByb2Zlc2lvbmFsIENJRiBBNjI2MzQwNjgwggIi +MA0GCSqGSIb3DQEBAQUAA4ICDwAwggIKAoICAQDKlmuO6vj78aI14H9M2uDDUtd9 +thDIAl6zQyrET2qyyhxdKJp4ERppWVevtSBC5IsP5t9bpgOSL/UR5GLXMnE42QQM +cas9UX4PB99jBVzpv5RvwSmCwLTaUbDBPLutN0pcyvFLNg4kq7/DhHf9qFD0sefG +L9ItWY16Ck6WaVICqjaY7Pz6FIMMNx/Jkjd/14Et5cS54D40/mf0PmbR0/RAz15i +NA9wBj4gGFrO93IbJWyTdBSTo3OxDqqHECNZXyAFGUftaI6SEspd/NYrspI8IM/h +X68gvqB2f3bl7BqGYTM+53u0P6APjqK5am+5hyZvQWyIplD9amML9ZMWGxmPsu2b +m8mQ9QEM3xk9Dz44I8kvjwzRAv4bVdZO0I08r0+k8/6vKtMFnXkIoctXMbScyJCy +Z/QYFpM6/EfY0XiWMR+6KwxfXZmtY4laJCB22N/9q06mIqqdXuYnin1oKaPnirja +EbsXLZmdEyRG98Xi2J+Of8ePdG1asuhy9azuJBCtLxTa/y2aRnFHvkLfuwHb9H/T +KI8xWVvTyQKmtFLKbpf7Q8UIJm+K9Lv9nyiqDdVF8xM6HdjAeI9BZzwelGSuewvF +6NkBiDkal4ZkQdU7hwxu+g/GvUgUvzlN1J5Bto+WHWOWk9mVBngxaJ43BjuAiUVh +OSPHG0SjFeUc+JIwuwIDAQABo4HvMIHsMBIGA1UdEwEB/wQIMAYBAf8CAQEwDgYD +VR0PAQH/BAQDAgEGMB0GA1UdDgQWBBRlzeurNR4APn7VdMActHNHDhpkLzCBpgYD +VR0gBIGeMIGbMIGYBgRVHSAAMIGPMC8GCCsGAQUFBwIBFiNodHRwOi8vd3d3LmZp +cm1hcHJvZmVzaW9uYWwuY29tL2NwczBcBggrBgEFBQcCAjBQHk4AUABhAHMAZQBv +ACAAZABlACAAbABhACAAQgBvAG4AYQBuAG8AdgBhACAANAA3ACAAQgBhAHIAYwBl +AGwAbwBuAGEAIAAwADgAMAAxADcwDQYJKoZIhvcNAQEFBQADggIBABd9oPm03cXF +661LJLWhAqvdpYhKsg9VSytXjDvlMd3+xDLx51tkljYyGOylMnfX40S2wBEqgLk9 +am58m9Ot/MPWo+ZkKXzR4Tgegiv/J2Wv+xYVxC5xhOW1//qkR71kMrv2JYSiJ0L1 +ILDCExARzRAVukKQKtJE4ZYm6zFIEv0q2skGz3QeqUvVhyj5eTSSPi5E6PaPT481 +PyWzOdxjKpBrIF/EUhJOlywqrJ2X3kjyo2bbwtKDlaZmp54lD+kLM5FlClrD2VQS +3a/DTg4fJl4N3LON7NWBcN7STyQF82xO9UxJZo3R/9ILJUFI/lGExkKvgATP0H5k +SeTy36LssUzAKh3ntLFlosS88Zj0qnAHY7S42jtM+kAiMFsRpvAFDsYCA0irhpuF +3dvd6qJ2gHN99ZwExEWN57kci57q13XRcrHedUTnQn3iV2t93Jm8PYMo6oCTjcVM +ZcFwgbg4/EMxsvYDNEeyrPsiBsse3RdHHF9mudMaotoRsaS8I8nkvof/uZS2+F0g +StRf571oe2XyFR7SOqkt6dhrJKyXWERHrVkY8SFlcN7ONGCoQPHzPKTDKCOM/icz +Q0CgFzzr6juwcqajuUpLXhZI9LK8yIySxZ2frHI2vDSANGupi5LAuBft7HZT9SQB +jLMi6Et8Vcad+qMUu2WFbm5PEn4KPJ2V +-----END CERTIFICATE----- + +# Issuer: CN=Izenpe.com O=IZENPE S.A. +# Subject: CN=Izenpe.com O=IZENPE S.A. +# Label: "Izenpe.com" +# Serial: 917563065490389241595536686991402621 +# MD5 Fingerprint: a6:b0:cd:85:80:da:5c:50:34:a3:39:90:2f:55:67:73 +# SHA1 Fingerprint: 2f:78:3d:25:52:18:a7:4a:65:39:71:b5:2c:a2:9c:45:15:6f:e9:19 +# SHA256 Fingerprint: 25:30:cc:8e:98:32:15:02:ba:d9:6f:9b:1f:ba:1b:09:9e:2d:29:9e:0f:45:48:bb:91:4f:36:3b:c0:d4:53:1f +-----BEGIN CERTIFICATE----- +MIIF8TCCA9mgAwIBAgIQALC3WhZIX7/hy/WL1xnmfTANBgkqhkiG9w0BAQsFADA4 +MQswCQYDVQQGEwJFUzEUMBIGA1UECgwLSVpFTlBFIFMuQS4xEzARBgNVBAMMCkl6 +ZW5wZS5jb20wHhcNMDcxMjEzMTMwODI4WhcNMzcxMjEzMDgyNzI1WjA4MQswCQYD +VQQGEwJFUzEUMBIGA1UECgwLSVpFTlBFIFMuQS4xEzARBgNVBAMMCkl6ZW5wZS5j +b20wggIiMA0GCSqGSIb3DQEBAQUAA4ICDwAwggIKAoICAQDJ03rKDx6sp4boFmVq +scIbRTJxldn+EFvMr+eleQGPicPK8lVx93e+d5TzcqQsRNiekpsUOqHnJJAKClaO +xdgmlOHZSOEtPtoKct2jmRXagaKH9HtuJneJWK3W6wyyQXpzbm3benhB6QiIEn6H +LmYRY2xU+zydcsC8Lv/Ct90NduM61/e0aL6i9eOBbsFGb12N4E3GVFWJGjMxCrFX +uaOKmMPsOzTFlUFpfnXCPCDFYbpRR6AgkJOhkEvzTnyFRVSa0QUmQbC1TR0zvsQD +yCV8wXDbO/QJLVQnSKwv4cSsPsjLkkxTOTcj7NMB+eAJRE1NZMDhDVqHIrytG6P+ +JrUV86f8hBnp7KGItERphIPzidF0BqnMC9bC3ieFUCbKF7jJeodWLBoBHmy+E60Q +rLUk9TiRodZL2vG70t5HtfG8gfZZa88ZU+mNFctKy6lvROUbQc/hhqfK0GqfvEyN +BjNaooXlkDWgYlwWTvDjovoDGrQscbNYLN57C9saD+veIR8GdwYDsMnvmfzAuU8L +hij+0rnq49qlw0dpEuDb8PYZi+17cNcC1u2HGCgsBCRMd+RIihrGO5rUD8r6ddIB +QFqNeb+Lz0vPqhbBleStTIo+F5HUsWLlguWABKQDfo2/2n+iD5dPDNMN+9fR5XJ+ +HMh3/1uaD7euBUbl8agW7EekFwIDAQABo4H2MIHzMIGwBgNVHREEgagwgaWBD2lu +Zm9AaXplbnBlLmNvbaSBkTCBjjFHMEUGA1UECgw+SVpFTlBFIFMuQS4gLSBDSUYg +QTAxMzM3MjYwLVJNZXJjLlZpdG9yaWEtR2FzdGVpeiBUMTA1NSBGNjIgUzgxQzBB +BgNVBAkMOkF2ZGEgZGVsIE1lZGl0ZXJyYW5lbyBFdG9yYmlkZWEgMTQgLSAwMTAx +MCBWaXRvcmlhLUdhc3RlaXowDwYDVR0TAQH/BAUwAwEB/zAOBgNVHQ8BAf8EBAMC +AQYwHQYDVR0OBBYEFB0cZQ6o8iV7tJHP5LGx5r1VdGwFMA0GCSqGSIb3DQEBCwUA +A4ICAQB4pgwWSp9MiDrAyw6lFn2fuUhfGI8NYjb2zRlrrKvV9pF9rnHzP7MOeIWb +laQnIUdCSnxIOvVFfLMMjlF4rJUT3sb9fbgakEyrkgPH7UIBzg/YsfqikuFgba56 +awmqxinuaElnMIAkejEWOVt+8Rwu3WwJrfIxwYJOubv5vr8qhT/AQKM6WfxZSzwo +JNu0FXWuDYi6LnPAvViH5ULy617uHjAimcs30cQhbIHsvm0m5hzkQiCeR7Csg1lw +LDXWrzY0tM07+DKo7+N4ifuNRSzanLh+QBxh5z6ikixL8s36mLYp//Pye6kfLqCT +VyvehQP5aTfLnnhqBbTFMXiJ7HqnheG5ezzevh55hM6fcA5ZwjUukCox2eRFekGk +LhObNA5me0mrZJfQRsN5nXJQY6aYWwa9SG3YOYNw6DXwBdGqvOPbyALqfP2C2sJb +UjWumDqtujWTI6cfSN01RpiyEGjkpTHCClguGYEQyVB1/OpaFs4R1+7vUIgtYf8/ +QnMFlEPVjjxOAToZpR9GTnfQXeWBIiGH/pR9hNiTrdZoQ0iy2+tzJOeRf1SktoA+ +naM8THLCV8Sg1Mw4J87VBp6iSNnpn86CcDaTmjvfliHjWbcM2pE38P1ZWrOZyGls +QyYBNWNgVYkDOnXYukrZVP/u3oDYLdE41V4tC5h9Pmzb/CaIxw== +-----END CERTIFICATE----- + +# Issuer: CN=Chambers of Commerce Root - 2008 O=AC Camerfirma S.A. +# Subject: CN=Chambers of Commerce Root - 2008 O=AC Camerfirma S.A. +# Label: "Chambers of Commerce Root - 2008" +# Serial: 11806822484801597146 +# MD5 Fingerprint: 5e:80:9e:84:5a:0e:65:0b:17:02:f3:55:18:2a:3e:d7 +# SHA1 Fingerprint: 78:6a:74:ac:76:ab:14:7f:9c:6a:30:50:ba:9e:a8:7e:fe:9a:ce:3c +# SHA256 Fingerprint: 06:3e:4a:fa:c4:91:df:d3:32:f3:08:9b:85:42:e9:46:17:d8:93:d7:fe:94:4e:10:a7:93:7e:e2:9d:96:93:c0 +-----BEGIN CERTIFICATE----- +MIIHTzCCBTegAwIBAgIJAKPaQn6ksa7aMA0GCSqGSIb3DQEBBQUAMIGuMQswCQYD +VQQGEwJFVTFDMEEGA1UEBxM6TWFkcmlkIChzZWUgY3VycmVudCBhZGRyZXNzIGF0 +IHd3dy5jYW1lcmZpcm1hLmNvbS9hZGRyZXNzKTESMBAGA1UEBRMJQTgyNzQzMjg3 +MRswGQYDVQQKExJBQyBDYW1lcmZpcm1hIFMuQS4xKTAnBgNVBAMTIENoYW1iZXJz +IG9mIENvbW1lcmNlIFJvb3QgLSAyMDA4MB4XDTA4MDgwMTEyMjk1MFoXDTM4MDcz +MTEyMjk1MFowga4xCzAJBgNVBAYTAkVVMUMwQQYDVQQHEzpNYWRyaWQgKHNlZSBj +dXJyZW50IGFkZHJlc3MgYXQgd3d3LmNhbWVyZmlybWEuY29tL2FkZHJlc3MpMRIw +EAYDVQQFEwlBODI3NDMyODcxGzAZBgNVBAoTEkFDIENhbWVyZmlybWEgUy5BLjEp +MCcGA1UEAxMgQ2hhbWJlcnMgb2YgQ29tbWVyY2UgUm9vdCAtIDIwMDgwggIiMA0G +CSqGSIb3DQEBAQUAA4ICDwAwggIKAoICAQCvAMtwNyuAWko6bHiUfaN/Gh/2NdW9 +28sNRHI+JrKQUrpjOyhYb6WzbZSm891kDFX29ufyIiKAXuFixrYp4YFs8r/lfTJq +VKAyGVn+H4vXPWCGhSRv4xGzdz4gljUha7MI2XAuZPeEklPWDrCQiorjh40G072Q +DuKZoRuGDtqaCrsLYVAGUvGef3bsyw/QHg3PmTA9HMRFEFis1tPo1+XqxQEHd9ZR +5gN/ikilTWh1uem8nk4ZcfUyS5xtYBkL+8ydddy/Js2Pk3g5eXNeJQ7KXOt3EgfL +ZEFHcpOrUMPrCXZkNNI5t3YRCQ12RcSprj1qr7V9ZS+UWBDsXHyvfuK2GNnQm05a +Sd+pZgvMPMZ4fKecHePOjlO+Bd5gD2vlGts/4+EhySnB8esHnFIbAURRPHsl18Tl +UlRdJQfKFiC4reRB7noI/plvg6aRArBsNlVq5331lubKgdaX8ZSD6e2wsWsSaR6s ++12pxZjptFtYer49okQ6Y1nUCyXeG0+95QGezdIp1Z8XGQpvvwyQ0wlf2eOKNcx5 +Wk0ZN5K3xMGtr/R5JJqyAQuxr1yW84Ay+1w9mPGgP0revq+ULtlVmhduYJ1jbLhj +ya6BXBg14JC7vjxPNyK5fuvPnnchpj04gftI2jE9K+OJ9dC1vX7gUMQSibMjmhAx +hduub+84Mxh2EQIDAQABo4IBbDCCAWgwEgYDVR0TAQH/BAgwBgEB/wIBDDAdBgNV +HQ4EFgQU+SSsD7K1+HnA+mCIG8TZTQKeFxkwgeMGA1UdIwSB2zCB2IAU+SSsD7K1 ++HnA+mCIG8TZTQKeFxmhgbSkgbEwga4xCzAJBgNVBAYTAkVVMUMwQQYDVQQHEzpN +YWRyaWQgKHNlZSBjdXJyZW50IGFkZHJlc3MgYXQgd3d3LmNhbWVyZmlybWEuY29t +L2FkZHJlc3MpMRIwEAYDVQQFEwlBODI3NDMyODcxGzAZBgNVBAoTEkFDIENhbWVy +ZmlybWEgUy5BLjEpMCcGA1UEAxMgQ2hhbWJlcnMgb2YgQ29tbWVyY2UgUm9vdCAt +IDIwMDiCCQCj2kJ+pLGu2jAOBgNVHQ8BAf8EBAMCAQYwPQYDVR0gBDYwNDAyBgRV +HSAAMCowKAYIKwYBBQUHAgEWHGh0dHA6Ly9wb2xpY3kuY2FtZXJmaXJtYS5jb20w +DQYJKoZIhvcNAQEFBQADggIBAJASryI1wqM58C7e6bXpeHxIvj99RZJe6dqxGfwW +PJ+0W2aeaufDuV2I6A+tzyMP3iU6XsxPpcG1Lawk0lgH3qLPaYRgM+gQDROpI9CF +5Y57pp49chNyM/WqfcZjHwj0/gF/JM8rLFQJ3uIrbZLGOU8W6jx+ekbURWpGqOt1 +glanq6B8aBMz9p0w8G8nOSQjKpD9kCk18pPfNKXG9/jvjA9iSnyu0/VU+I22mlaH +FoI6M6taIgj3grrqLuBHmrS1RaMFO9ncLkVAO+rcf+g769HsJtg1pDDFOqxXnrN2 +pSB7+R5KBWIBpih1YJeSDW4+TTdDDZIVnBgizVGZoCkaPF+KMjNbMMeJL0eYD6MD +xvbxrN8y8NmBGuScvfaAFPDRLLmF9dijscilIeUcE5fuDr3fKanvNFNb0+RqE4QG +tjICxFKuItLcsiFCGtpA8CnJ7AoMXOLQusxI0zcKzBIKinmwPQN/aUv0NCB9szTq +jktk9T79syNnFQ0EuPAtwQlRPLJsFfClI9eDdOTlLsn+mCdCxqvGnrDQWzilm1De +fhiYtUU79nm06PcaewaD+9CL2rvHvRirCG88gGtAPxkZumWK5r7VXNM21+9AUiRg +OGcEMeyP84LG3rlV8zsxkVrctQgVrXYlCg17LofiDKYGvCYQbTed7N14jHyAxfDZ +d0jQ +-----END CERTIFICATE----- + +# Issuer: CN=Global Chambersign Root - 2008 O=AC Camerfirma S.A. +# Subject: CN=Global Chambersign Root - 2008 O=AC Camerfirma S.A. +# Label: "Global Chambersign Root - 2008" +# Serial: 14541511773111788494 +# MD5 Fingerprint: 9e:80:ff:78:01:0c:2e:c1:36:bd:fe:96:90:6e:08:f3 +# SHA1 Fingerprint: 4a:bd:ee:ec:95:0d:35:9c:89:ae:c7:52:a1:2c:5b:29:f6:d6:aa:0c +# SHA256 Fingerprint: 13:63:35:43:93:34:a7:69:80:16:a0:d3:24:de:72:28:4e:07:9d:7b:52:20:bb:8f:bd:74:78:16:ee:be:ba:ca +-----BEGIN CERTIFICATE----- +MIIHSTCCBTGgAwIBAgIJAMnN0+nVfSPOMA0GCSqGSIb3DQEBBQUAMIGsMQswCQYD +VQQGEwJFVTFDMEEGA1UEBxM6TWFkcmlkIChzZWUgY3VycmVudCBhZGRyZXNzIGF0 +IHd3dy5jYW1lcmZpcm1hLmNvbS9hZGRyZXNzKTESMBAGA1UEBRMJQTgyNzQzMjg3 +MRswGQYDVQQKExJBQyBDYW1lcmZpcm1hIFMuQS4xJzAlBgNVBAMTHkdsb2JhbCBD +aGFtYmVyc2lnbiBSb290IC0gMjAwODAeFw0wODA4MDExMjMxNDBaFw0zODA3MzEx +MjMxNDBaMIGsMQswCQYDVQQGEwJFVTFDMEEGA1UEBxM6TWFkcmlkIChzZWUgY3Vy +cmVudCBhZGRyZXNzIGF0IHd3dy5jYW1lcmZpcm1hLmNvbS9hZGRyZXNzKTESMBAG +A1UEBRMJQTgyNzQzMjg3MRswGQYDVQQKExJBQyBDYW1lcmZpcm1hIFMuQS4xJzAl +BgNVBAMTHkdsb2JhbCBDaGFtYmVyc2lnbiBSb290IC0gMjAwODCCAiIwDQYJKoZI +hvcNAQEBBQADggIPADCCAgoCggIBAMDfVtPkOpt2RbQT2//BthmLN0EYlVJH6xed +KYiONWwGMi5HYvNJBL99RDaxccy9Wglz1dmFRP+RVyXfXjaOcNFccUMd2drvXNL7 +G706tcuto8xEpw2uIRU/uXpbknXYpBI4iRmKt4DS4jJvVpyR1ogQC7N0ZJJ0YPP2 +zxhPYLIj0Mc7zmFLmY/CDNBAspjcDahOo7kKrmCgrUVSY7pmvWjg+b4aqIG7HkF4 +ddPB/gBVsIdU6CeQNR1MM62X/JcumIS/LMmjv9GYERTtY/jKmIhYF5ntRQOXfjyG +HoiMvvKRhI9lNNgATH23MRdaKXoKGCQwoze1eqkBfSbW+Q6OWfH9GzO1KTsXO0G2 +Id3UwD2ln58fQ1DJu7xsepeY7s2MH/ucUa6LcL0nn3HAa6x9kGbo1106DbDVwo3V +yJ2dwW3Q0L9R5OP4wzg2rtandeavhENdk5IMagfeOx2YItaswTXbo6Al/3K1dh3e +beksZixShNBFks4c5eUzHdwHU1SjqoI7mjcv3N2gZOnm3b2u/GSFHTynyQbehP9r +6GsaPMWis0L7iwk+XwhSx2LE1AVxv8Rk5Pihg+g+EpuoHtQ2TS9x9o0o9oOpE9Jh +wZG7SMA0j0GMS0zbaRL/UJScIINZc+18ofLx/d33SdNDWKBWY8o9PeU1VlnpDsog +zCtLkykPAgMBAAGjggFqMIIBZjASBgNVHRMBAf8ECDAGAQH/AgEMMB0GA1UdDgQW +BBS5CcqcHtvTbDprru1U8VuTBjUuXjCB4QYDVR0jBIHZMIHWgBS5CcqcHtvTbDpr +ru1U8VuTBjUuXqGBsqSBrzCBrDELMAkGA1UEBhMCRVUxQzBBBgNVBAcTOk1hZHJp +ZCAoc2VlIGN1cnJlbnQgYWRkcmVzcyBhdCB3d3cuY2FtZXJmaXJtYS5jb20vYWRk +cmVzcykxEjAQBgNVBAUTCUE4Mjc0MzI4NzEbMBkGA1UEChMSQUMgQ2FtZXJmaXJt +YSBTLkEuMScwJQYDVQQDEx5HbG9iYWwgQ2hhbWJlcnNpZ24gUm9vdCAtIDIwMDiC +CQDJzdPp1X0jzjAOBgNVHQ8BAf8EBAMCAQYwPQYDVR0gBDYwNDAyBgRVHSAAMCow +KAYIKwYBBQUHAgEWHGh0dHA6Ly9wb2xpY3kuY2FtZXJmaXJtYS5jb20wDQYJKoZI +hvcNAQEFBQADggIBAICIf3DekijZBZRG/5BXqfEv3xoNa/p8DhxJJHkn2EaqbylZ +UohwEurdPfWbU1Rv4WCiqAm57OtZfMY18dwY6fFn5a+6ReAJ3spED8IXDneRRXoz +X1+WLGiLwUePmJs9wOzL9dWCkoQ10b42OFZyMVtHLaoXpGNR6woBrX/sdZ7LoR/x +fxKxueRkf2fWIyr0uDldmOghp+G9PUIadJpwr2hsUF1Jz//7Dl3mLEfXgTpZALVz +a2Mg9jFFCDkO9HB+QHBaP9BrQql0PSgvAm11cpUJjUhjxsYjV5KTXjXBjfkK9yyd +Yhz2rXzdpjEetrHHfoUm+qRqtdpjMNHvkzeyZi99Bffnt0uYlDXA2TopwZ2yUDMd +SqlapskD7+3056huirRXhOukP9DuqqqHW2Pok+JrqNS4cnhrG+055F3Lm6qH1U9O +AP7Zap88MQ8oAgF9mOinsKJknnn4SPIVqczmyETrP3iZ8ntxPjzxmKfFGBI/5rso +M0LpRQp8bfKGeS/Fghl9CYl8slR2iK7ewfPM4W7bMdaTrpmg7yVqc5iJWzouE4ge +v8CSlDQb4ye3ix5vQv/n6TebUB0tovkC7stYWDpxvGjjqsGvHCgfotwjZT+B6q6Z +09gwzxMNTxXJhLynSC34MCN32EZLeW32jO06f2ARePTpm67VVMB0gNELQp/B +-----END CERTIFICATE----- + +# Issuer: CN=Go Daddy Root Certificate Authority - G2 O=GoDaddy.com, Inc. +# Subject: CN=Go Daddy Root Certificate Authority - G2 O=GoDaddy.com, Inc. +# Label: "Go Daddy Root Certificate Authority - G2" +# Serial: 0 +# MD5 Fingerprint: 80:3a:bc:22:c1:e6:fb:8d:9b:3b:27:4a:32:1b:9a:01 +# SHA1 Fingerprint: 47:be:ab:c9:22:ea:e8:0e:78:78:34:62:a7:9f:45:c2:54:fd:e6:8b +# SHA256 Fingerprint: 45:14:0b:32:47:eb:9c:c8:c5:b4:f0:d7:b5:30:91:f7:32:92:08:9e:6e:5a:63:e2:74:9d:d3:ac:a9:19:8e:da +-----BEGIN CERTIFICATE----- +MIIDxTCCAq2gAwIBAgIBADANBgkqhkiG9w0BAQsFADCBgzELMAkGA1UEBhMCVVMx +EDAOBgNVBAgTB0FyaXpvbmExEzARBgNVBAcTClNjb3R0c2RhbGUxGjAYBgNVBAoT +EUdvRGFkZHkuY29tLCBJbmMuMTEwLwYDVQQDEyhHbyBEYWRkeSBSb290IENlcnRp +ZmljYXRlIEF1dGhvcml0eSAtIEcyMB4XDTA5MDkwMTAwMDAwMFoXDTM3MTIzMTIz +NTk1OVowgYMxCzAJBgNVBAYTAlVTMRAwDgYDVQQIEwdBcml6b25hMRMwEQYDVQQH +EwpTY290dHNkYWxlMRowGAYDVQQKExFHb0RhZGR5LmNvbSwgSW5jLjExMC8GA1UE +AxMoR28gRGFkZHkgUm9vdCBDZXJ0aWZpY2F0ZSBBdXRob3JpdHkgLSBHMjCCASIw +DQYJKoZIhvcNAQEBBQADggEPADCCAQoCggEBAL9xYgjx+lk09xvJGKP3gElY6SKD +E6bFIEMBO4Tx5oVJnyfq9oQbTqC023CYxzIBsQU+B07u9PpPL1kwIuerGVZr4oAH +/PMWdYA5UXvl+TW2dE6pjYIT5LY/qQOD+qK+ihVqf94Lw7YZFAXK6sOoBJQ7Rnwy +DfMAZiLIjWltNowRGLfTshxgtDj6AozO091GB94KPutdfMh8+7ArU6SSYmlRJQVh +GkSBjCypQ5Yj36w6gZoOKcUcqeldHraenjAKOc7xiID7S13MMuyFYkMlNAJWJwGR +tDtwKj9useiciAF9n9T521NtYJ2/LOdYq7hfRvzOxBsDPAnrSTFcaUaz4EcCAwEA +AaNCMEAwDwYDVR0TAQH/BAUwAwEB/zAOBgNVHQ8BAf8EBAMCAQYwHQYDVR0OBBYE +FDqahQcQZyi27/a9BUFuIMGU2g/eMA0GCSqGSIb3DQEBCwUAA4IBAQCZ21151fmX +WWcDYfF+OwYxdS2hII5PZYe096acvNjpL9DbWu7PdIxztDhC2gV7+AJ1uP2lsdeu +9tfeE8tTEH6KRtGX+rcuKxGrkLAngPnon1rpN5+r5N9ss4UXnT3ZJE95kTXWXwTr +gIOrmgIttRD02JDHBHNA7XIloKmf7J6raBKZV8aPEjoJpL1E/QYVN8Gb5DKj7Tjo +2GTzLH4U/ALqn83/B2gX2yKQOC16jdFU8WnjXzPKej17CuPKf1855eJ1usV2GDPO +LPAvTK33sefOT6jEm0pUBsV/fdUID+Ic/n4XuKxe9tQWskMJDE32p2u0mYRlynqI +4uJEvlz36hz1 +-----END CERTIFICATE----- + +# Issuer: CN=Starfield Root Certificate Authority - G2 O=Starfield Technologies, Inc. +# Subject: CN=Starfield Root Certificate Authority - G2 O=Starfield Technologies, Inc. +# Label: "Starfield Root Certificate Authority - G2" +# Serial: 0 +# MD5 Fingerprint: d6:39:81:c6:52:7e:96:69:fc:fc:ca:66:ed:05:f2:96 +# SHA1 Fingerprint: b5:1c:06:7c:ee:2b:0c:3d:f8:55:ab:2d:92:f4:fe:39:d4:e7:0f:0e +# SHA256 Fingerprint: 2c:e1:cb:0b:f9:d2:f9:e1:02:99:3f:be:21:51:52:c3:b2:dd:0c:ab:de:1c:68:e5:31:9b:83:91:54:db:b7:f5 +-----BEGIN CERTIFICATE----- +MIID3TCCAsWgAwIBAgIBADANBgkqhkiG9w0BAQsFADCBjzELMAkGA1UEBhMCVVMx +EDAOBgNVBAgTB0FyaXpvbmExEzARBgNVBAcTClNjb3R0c2RhbGUxJTAjBgNVBAoT +HFN0YXJmaWVsZCBUZWNobm9sb2dpZXMsIEluYy4xMjAwBgNVBAMTKVN0YXJmaWVs +ZCBSb290IENlcnRpZmljYXRlIEF1dGhvcml0eSAtIEcyMB4XDTA5MDkwMTAwMDAw +MFoXDTM3MTIzMTIzNTk1OVowgY8xCzAJBgNVBAYTAlVTMRAwDgYDVQQIEwdBcml6 +b25hMRMwEQYDVQQHEwpTY290dHNkYWxlMSUwIwYDVQQKExxTdGFyZmllbGQgVGVj +aG5vbG9naWVzLCBJbmMuMTIwMAYDVQQDEylTdGFyZmllbGQgUm9vdCBDZXJ0aWZp +Y2F0ZSBBdXRob3JpdHkgLSBHMjCCASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoC +ggEBAL3twQP89o/8ArFvW59I2Z154qK3A2FWGMNHttfKPTUuiUP3oWmb3ooa/RMg +nLRJdzIpVv257IzdIvpy3Cdhl+72WoTsbhm5iSzchFvVdPtrX8WJpRBSiUZV9Lh1 +HOZ/5FSuS/hVclcCGfgXcVnrHigHdMWdSL5stPSksPNkN3mSwOxGXn/hbVNMYq/N +Hwtjuzqd+/x5AJhhdM8mgkBj87JyahkNmcrUDnXMN/uLicFZ8WJ/X7NfZTD4p7dN +dloedl40wOiWVpmKs/B/pM293DIxfJHP4F8R+GuqSVzRmZTRouNjWwl2tVZi4Ut0 +HZbUJtQIBFnQmA4O5t78w+wfkPECAwEAAaNCMEAwDwYDVR0TAQH/BAUwAwEB/zAO +BgNVHQ8BAf8EBAMCAQYwHQYDVR0OBBYEFHwMMh+n2TB/xH1oo2Kooc6rB1snMA0G +CSqGSIb3DQEBCwUAA4IBAQARWfolTwNvlJk7mh+ChTnUdgWUXuEok21iXQnCoKjU +sHU48TRqneSfioYmUeYs0cYtbpUgSpIB7LiKZ3sx4mcujJUDJi5DnUox9g61DLu3 +4jd/IroAow57UvtruzvE03lRTs2Q9GcHGcg8RnoNAX3FWOdt5oUwF5okxBDgBPfg +8n/Uqgr/Qh037ZTlZFkSIHc40zI+OIF1lnP6aI+xy84fxez6nH7PfrHxBy22/L/K +pL/QlwVKvOoYKAKQvVR4CSFx09F9HdkWsKlhPdAKACL8x3vLCWRFCztAgfd9fDL1 +mMpYjn0q7pBZc2T5NnReJaH1ZgUufzkVqSr7UIuOhWn0 +-----END CERTIFICATE----- + +# Issuer: CN=Starfield Services Root Certificate Authority - G2 O=Starfield Technologies, Inc. +# Subject: CN=Starfield Services Root Certificate Authority - G2 O=Starfield Technologies, Inc. +# Label: "Starfield Services Root Certificate Authority - G2" +# Serial: 0 +# MD5 Fingerprint: 17:35:74:af:7b:61:1c:eb:f4:f9:3c:e2:ee:40:f9:a2 +# SHA1 Fingerprint: 92:5a:8f:8d:2c:6d:04:e0:66:5f:59:6a:ff:22:d8:63:e8:25:6f:3f +# SHA256 Fingerprint: 56:8d:69:05:a2:c8:87:08:a4:b3:02:51:90:ed:cf:ed:b1:97:4a:60:6a:13:c6:e5:29:0f:cb:2a:e6:3e:da:b5 +-----BEGIN CERTIFICATE----- +MIID7zCCAtegAwIBAgIBADANBgkqhkiG9w0BAQsFADCBmDELMAkGA1UEBhMCVVMx +EDAOBgNVBAgTB0FyaXpvbmExEzARBgNVBAcTClNjb3R0c2RhbGUxJTAjBgNVBAoT +HFN0YXJmaWVsZCBUZWNobm9sb2dpZXMsIEluYy4xOzA5BgNVBAMTMlN0YXJmaWVs +ZCBTZXJ2aWNlcyBSb290IENlcnRpZmljYXRlIEF1dGhvcml0eSAtIEcyMB4XDTA5 +MDkwMTAwMDAwMFoXDTM3MTIzMTIzNTk1OVowgZgxCzAJBgNVBAYTAlVTMRAwDgYD +VQQIEwdBcml6b25hMRMwEQYDVQQHEwpTY290dHNkYWxlMSUwIwYDVQQKExxTdGFy +ZmllbGQgVGVjaG5vbG9naWVzLCBJbmMuMTswOQYDVQQDEzJTdGFyZmllbGQgU2Vy +dmljZXMgUm9vdCBDZXJ0aWZpY2F0ZSBBdXRob3JpdHkgLSBHMjCCASIwDQYJKoZI +hvcNAQEBBQADggEPADCCAQoCggEBANUMOsQq+U7i9b4Zl1+OiFOxHz/Lz58gE20p +OsgPfTz3a3Y4Y9k2YKibXlwAgLIvWX/2h/klQ4bnaRtSmpDhcePYLQ1Ob/bISdm2 +8xpWriu2dBTrz/sm4xq6HZYuajtYlIlHVv8loJNwU4PahHQUw2eeBGg6345AWh1K +Ts9DkTvnVtYAcMtS7nt9rjrnvDH5RfbCYM8TWQIrgMw0R9+53pBlbQLPLJGmpufe +hRhJfGZOozptqbXuNC66DQO4M99H67FrjSXZm86B0UVGMpZwh94CDklDhbZsc7tk +6mFBrMnUVN+HL8cisibMn1lUaJ/8viovxFUcdUBgF4UCVTmLfwUCAwEAAaNCMEAw +DwYDVR0TAQH/BAUwAwEB/zAOBgNVHQ8BAf8EBAMCAQYwHQYDVR0OBBYEFJxfAN+q +AdcwKziIorhtSpzyEZGDMA0GCSqGSIb3DQEBCwUAA4IBAQBLNqaEd2ndOxmfZyMI +bw5hyf2E3F/YNoHN2BtBLZ9g3ccaaNnRbobhiCPPE95Dz+I0swSdHynVv/heyNXB +ve6SbzJ08pGCL72CQnqtKrcgfU28elUSwhXqvfdqlS5sdJ/PHLTyxQGjhdByPq1z +qwubdQxtRbeOlKyWN7Wg0I8VRw7j6IPdj/3vQQF3zCepYoUz8jcI73HPdwbeyBkd +iEDPfUYd/x7H4c7/I9vG+o1VTqkC50cRRj70/b17KSa7qWFiNyi2LSr2EIZkyXCn +0q23KXB56jzaYyWf/Wi3MOxw+3WKt21gZ7IeyLnp2KhvAotnDU0mV3HaIPzBSlCN +sSi6 +-----END CERTIFICATE----- + +# Issuer: CN=AffirmTrust Commercial O=AffirmTrust +# Subject: CN=AffirmTrust Commercial O=AffirmTrust +# Label: "AffirmTrust Commercial" +# Serial: 8608355977964138876 +# MD5 Fingerprint: 82:92:ba:5b:ef:cd:8a:6f:a6:3d:55:f9:84:f6:d6:b7 +# SHA1 Fingerprint: f9:b5:b6:32:45:5f:9c:be:ec:57:5f:80:dc:e9:6e:2c:c7:b2:78:b7 +# SHA256 Fingerprint: 03:76:ab:1d:54:c5:f9:80:3c:e4:b2:e2:01:a0:ee:7e:ef:7b:57:b6:36:e8:a9:3c:9b:8d:48:60:c9:6f:5f:a7 +-----BEGIN CERTIFICATE----- +MIIDTDCCAjSgAwIBAgIId3cGJyapsXwwDQYJKoZIhvcNAQELBQAwRDELMAkGA1UE +BhMCVVMxFDASBgNVBAoMC0FmZmlybVRydXN0MR8wHQYDVQQDDBZBZmZpcm1UcnVz +dCBDb21tZXJjaWFsMB4XDTEwMDEyOTE0MDYwNloXDTMwMTIzMTE0MDYwNlowRDEL +MAkGA1UEBhMCVVMxFDASBgNVBAoMC0FmZmlybVRydXN0MR8wHQYDVQQDDBZBZmZp +cm1UcnVzdCBDb21tZXJjaWFsMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKC +AQEA9htPZwcroRX1BiLLHwGy43NFBkRJLLtJJRTWzsO3qyxPxkEylFf6EqdbDuKP +Hx6GGaeqtS25Xw2Kwq+FNXkyLbscYjfysVtKPcrNcV/pQr6U6Mje+SJIZMblq8Yr +ba0F8PrVC8+a5fBQpIs7R6UjW3p6+DM/uO+Zl+MgwdYoic+U+7lF7eNAFxHUdPAL +MeIrJmqbTFeurCA+ukV6BfO9m2kVrn1OIGPENXY6BwLJN/3HR+7o8XYdcxXyl6S1 +yHp52UKqK39c/s4mT6NmgTWvRLpUHhwwMmWd5jyTXlBOeuM61G7MGvv50jeuJCqr +VwMiKA1JdX+3KNp1v47j3A55MQIDAQABo0IwQDAdBgNVHQ4EFgQUnZPGU4teyq8/ +nx4P5ZmVvCT2lI8wDwYDVR0TAQH/BAUwAwEB/zAOBgNVHQ8BAf8EBAMCAQYwDQYJ +KoZIhvcNAQELBQADggEBAFis9AQOzcAN/wr91LoWXym9e2iZWEnStB03TX8nfUYG +XUPGhi4+c7ImfU+TqbbEKpqrIZcUsd6M06uJFdhrJNTxFq7YpFzUf1GO7RgBsZNj +vbz4YYCanrHOQnDiqX0GJX0nof5v7LMeJNrjS1UaADs1tDvZ110w/YETifLCBivt +Z8SOyUOyXGsViQK8YvxO8rUzqrJv0wqiUOP2O+guRMLbZjipM1ZI8W0bM40NjD9g +N53Tym1+NH4Nn3J2ixufcv1SNUFFApYvHLKac0khsUlHRUe072o0EclNmsxZt9YC +nlpOZbWUrhvfKbAW8b8Angc6F2S1BLUjIZkKlTuXfO8= +-----END CERTIFICATE----- + +# Issuer: CN=AffirmTrust Networking O=AffirmTrust +# Subject: CN=AffirmTrust Networking O=AffirmTrust +# Label: "AffirmTrust Networking" +# Serial: 8957382827206547757 +# MD5 Fingerprint: 42:65:ca:be:01:9a:9a:4c:a9:8c:41:49:cd:c0:d5:7f +# SHA1 Fingerprint: 29:36:21:02:8b:20:ed:02:f5:66:c5:32:d1:d6:ed:90:9f:45:00:2f +# SHA256 Fingerprint: 0a:81:ec:5a:92:97:77:f1:45:90:4a:f3:8d:5d:50:9f:66:b5:e2:c5:8f:cd:b5:31:05:8b:0e:17:f3:f0:b4:1b +-----BEGIN CERTIFICATE----- +MIIDTDCCAjSgAwIBAgIIfE8EORzUmS0wDQYJKoZIhvcNAQEFBQAwRDELMAkGA1UE +BhMCVVMxFDASBgNVBAoMC0FmZmlybVRydXN0MR8wHQYDVQQDDBZBZmZpcm1UcnVz +dCBOZXR3b3JraW5nMB4XDTEwMDEyOTE0MDgyNFoXDTMwMTIzMTE0MDgyNFowRDEL +MAkGA1UEBhMCVVMxFDASBgNVBAoMC0FmZmlybVRydXN0MR8wHQYDVQQDDBZBZmZp +cm1UcnVzdCBOZXR3b3JraW5nMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKC +AQEAtITMMxcua5Rsa2FSoOujz3mUTOWUgJnLVWREZY9nZOIG41w3SfYvm4SEHi3y +YJ0wTsyEheIszx6e/jarM3c1RNg1lho9Nuh6DtjVR6FqaYvZ/Ls6rnla1fTWcbua +kCNrmreIdIcMHl+5ni36q1Mr3Lt2PpNMCAiMHqIjHNRqrSK6mQEubWXLviRmVSRL +QESxG9fhwoXA3hA/Pe24/PHxI1Pcv2WXb9n5QHGNfb2V1M6+oF4nI979ptAmDgAp +6zxG8D1gvz9Q0twmQVGeFDdCBKNwV6gbh+0t+nvujArjqWaJGctB+d1ENmHP4ndG +yH329JKBNv3bNPFyfvMMFr20FQIDAQABo0IwQDAdBgNVHQ4EFgQUBx/S55zawm6i +QLSwelAQUHTEyL0wDwYDVR0TAQH/BAUwAwEB/zAOBgNVHQ8BAf8EBAMCAQYwDQYJ +KoZIhvcNAQEFBQADggEBAIlXshZ6qML91tmbmzTCnLQyFE2npN/svqe++EPbkTfO +tDIuUFUaNU52Q3Eg75N3ThVwLofDwR1t3Mu1J9QsVtFSUzpE0nPIxBsFZVpikpzu +QY0x2+c06lkh1QF612S4ZDnNye2v7UsDSKegmQGA3GWjNq5lWUhPgkvIZfFXHeVZ +Lgo/bNjR9eUJtGxUAArgFU2HdW23WJZa3W3SAKD0m0i+wzekujbgfIeFlxoVot4u +olu9rxj5kFDNcFn4J2dHy8egBzp90SxdbBk6ZrV9/ZFvgrG+CJPbFEfxojfHRZ48 +x3evZKiT3/Zpg4Jg8klCNO1aAFSFHBY2kgxc+qatv9s= +-----END CERTIFICATE----- + +# Issuer: CN=AffirmTrust Premium O=AffirmTrust +# Subject: CN=AffirmTrust Premium O=AffirmTrust +# Label: "AffirmTrust Premium" +# Serial: 7893706540734352110 +# MD5 Fingerprint: c4:5d:0e:48:b6:ac:28:30:4e:0a:bc:f9:38:16:87:57 +# SHA1 Fingerprint: d8:a6:33:2c:e0:03:6f:b1:85:f6:63:4f:7d:6a:06:65:26:32:28:27 +# SHA256 Fingerprint: 70:a7:3f:7f:37:6b:60:07:42:48:90:45:34:b1:14:82:d5:bf:0e:69:8e:cc:49:8d:f5:25:77:eb:f2:e9:3b:9a +-----BEGIN CERTIFICATE----- +MIIFRjCCAy6gAwIBAgIIbYwURrGmCu4wDQYJKoZIhvcNAQEMBQAwQTELMAkGA1UE +BhMCVVMxFDASBgNVBAoMC0FmZmlybVRydXN0MRwwGgYDVQQDDBNBZmZpcm1UcnVz +dCBQcmVtaXVtMB4XDTEwMDEyOTE0MTAzNloXDTQwMTIzMTE0MTAzNlowQTELMAkG +A1UEBhMCVVMxFDASBgNVBAoMC0FmZmlybVRydXN0MRwwGgYDVQQDDBNBZmZpcm1U +cnVzdCBQcmVtaXVtMIICIjANBgkqhkiG9w0BAQEFAAOCAg8AMIICCgKCAgEAxBLf +qV/+Qd3d9Z+K4/as4Tx4mrzY8H96oDMq3I0gW64tb+eT2TZwamjPjlGjhVtnBKAQ +JG9dKILBl1fYSCkTtuG+kU3fhQxTGJoeJKJPj/CihQvL9Cl/0qRY7iZNyaqoe5rZ ++jjeRFcV5fiMyNlI4g0WJx0eyIOFJbe6qlVBzAMiSy2RjYvmia9mx+n/K+k8rNrS +s8PhaJyJ+HoAVt70VZVs+7pk3WKL3wt3MutizCaam7uqYoNMtAZ6MMgpv+0GTZe5 +HMQxK9VfvFMSF5yZVylmd2EhMQcuJUmdGPLu8ytxjLW6OQdJd/zvLpKQBY0tL3d7 +70O/Nbua2Plzpyzy0FfuKE4mX4+QaAkvuPjcBukumj5Rp9EixAqnOEhss/n/fauG +V+O61oV4d7pD6kh/9ti+I20ev9E2bFhc8e6kGVQa9QPSdubhjL08s9NIS+LI+H+S +qHZGnEJlPqQewQcDWkYtuJfzt9WyVSHvutxMAJf7FJUnM7/oQ0dG0giZFmA7mn7S +5u046uwBHjxIVkkJx0w3AJ6IDsBz4W9m6XJHMD4Q5QsDyZpCAGzFlH5hxIrff4Ia +C1nEWTJ3s7xgaVY5/bQGeyzWZDbZvUjthB9+pSKPKrhC9IK31FOQeE4tGv2Bb0TX +OwF0lkLgAOIua+rF7nKsu7/+6qqo+Nz2snmKtmcCAwEAAaNCMEAwHQYDVR0OBBYE +FJ3AZ6YMItkm9UWrpmVSESfYRaxjMA8GA1UdEwEB/wQFMAMBAf8wDgYDVR0PAQH/ +BAQDAgEGMA0GCSqGSIb3DQEBDAUAA4ICAQCzV00QYk465KzquByvMiPIs0laUZx2 +KI15qldGF9X1Uva3ROgIRL8YhNILgM3FEv0AVQVhh0HctSSePMTYyPtwni94loMg +Nt58D2kTiKV1NpgIpsbfrM7jWNa3Pt668+s0QNiigfV4Py/VpfzZotReBA4Xrf5B +8OWycvpEgjNC6C1Y91aMYj+6QrCcDFx+LmUmXFNPALJ4fqENmS2NuB2OosSw/WDQ +MKSOyARiqcTtNd56l+0OOF6SL5Nwpamcb6d9Ex1+xghIsV5n61EIJenmJWtSKZGc +0jlzCFfemQa0W50QBuHCAKi4HEoCChTQwUHK+4w1IX2COPKpVJEZNZOUbWo6xbLQ +u4mGk+ibyQ86p3q4ofB4Rvr8Ny/lioTz3/4E2aFooC8k4gmVBtWVyuEklut89pMF +u+1z6S3RdTnX5yTb2E5fQ4+e0BQ5v1VwSJlXMbSc7kqYA5YwH2AG7hsj/oFgIxpH +YoWlzBk0gG+zrBrjn/B7SK3VAdlntqlyk+otZrWyuOQ9PLLvTIzq6we/qzWaVYa8 +GKa1qF60g2xraUDTn9zxw2lrueFtCfTxqlB2Cnp9ehehVZZCmTEJ3WARjQUwfuaO +RtGdFNrHF+QFlozEJLUbzxQHskD4o55BhrwE0GuWyCqANP2/7waj3VjFhT0+j/6e +KeC2uAloGRwYQw== +-----END CERTIFICATE----- + +# Issuer: CN=AffirmTrust Premium ECC O=AffirmTrust +# Subject: CN=AffirmTrust Premium ECC O=AffirmTrust +# Label: "AffirmTrust Premium ECC" +# Serial: 8401224907861490260 +# MD5 Fingerprint: 64:b0:09:55:cf:b1:d5:99:e2:be:13:ab:a6:5d:ea:4d +# SHA1 Fingerprint: b8:23:6b:00:2f:1d:16:86:53:01:55:6c:11:a4:37:ca:eb:ff:c3:bb +# SHA256 Fingerprint: bd:71:fd:f6:da:97:e4:cf:62:d1:64:7a:dd:25:81:b0:7d:79:ad:f8:39:7e:b4:ec:ba:9c:5e:84:88:82:14:23 +-----BEGIN CERTIFICATE----- +MIIB/jCCAYWgAwIBAgIIdJclisc/elQwCgYIKoZIzj0EAwMwRTELMAkGA1UEBhMC +VVMxFDASBgNVBAoMC0FmZmlybVRydXN0MSAwHgYDVQQDDBdBZmZpcm1UcnVzdCBQ +cmVtaXVtIEVDQzAeFw0xMDAxMjkxNDIwMjRaFw00MDEyMzExNDIwMjRaMEUxCzAJ +BgNVBAYTAlVTMRQwEgYDVQQKDAtBZmZpcm1UcnVzdDEgMB4GA1UEAwwXQWZmaXJt +VHJ1c3QgUHJlbWl1bSBFQ0MwdjAQBgcqhkjOPQIBBgUrgQQAIgNiAAQNMF4bFZ0D +0KF5Nbc6PJJ6yhUczWLznCZcBz3lVPqj1swS6vQUX+iOGasvLkjmrBhDeKzQN8O9 +ss0s5kfiGuZjuD0uL3jET9v0D6RoTFVya5UdThhClXjMNzyR4ptlKymjQjBAMB0G +A1UdDgQWBBSaryl6wBE1NSZRMADDav5A1a7WPDAPBgNVHRMBAf8EBTADAQH/MA4G +A1UdDwEB/wQEAwIBBjAKBggqhkjOPQQDAwNnADBkAjAXCfOHiFBar8jAQr9HX/Vs +aobgxCd05DhT1wV/GzTjxi+zygk8N53X57hG8f2h4nECMEJZh0PUUd+60wkyWs6I +flc9nF9Ca/UHLbXwgpP5WW+uZPpY5Yse42O+tYHNbwKMeQ== +-----END CERTIFICATE----- + +# Issuer: CN=Certum Trusted Network CA O=Unizeto Technologies S.A. OU=Certum Certification Authority +# Subject: CN=Certum Trusted Network CA O=Unizeto Technologies S.A. OU=Certum Certification Authority +# Label: "Certum Trusted Network CA" +# Serial: 279744 +# MD5 Fingerprint: d5:e9:81:40:c5:18:69:fc:46:2c:89:75:62:0f:aa:78 +# SHA1 Fingerprint: 07:e0:32:e0:20:b7:2c:3f:19:2f:06:28:a2:59:3a:19:a7:0f:06:9e +# SHA256 Fingerprint: 5c:58:46:8d:55:f5:8e:49:7e:74:39:82:d2:b5:00:10:b6:d1:65:37:4a:cf:83:a7:d4:a3:2d:b7:68:c4:40:8e +-----BEGIN CERTIFICATE----- +MIIDuzCCAqOgAwIBAgIDBETAMA0GCSqGSIb3DQEBBQUAMH4xCzAJBgNVBAYTAlBM +MSIwIAYDVQQKExlVbml6ZXRvIFRlY2hub2xvZ2llcyBTLkEuMScwJQYDVQQLEx5D +ZXJ0dW0gQ2VydGlmaWNhdGlvbiBBdXRob3JpdHkxIjAgBgNVBAMTGUNlcnR1bSBU +cnVzdGVkIE5ldHdvcmsgQ0EwHhcNMDgxMDIyMTIwNzM3WhcNMjkxMjMxMTIwNzM3 +WjB+MQswCQYDVQQGEwJQTDEiMCAGA1UEChMZVW5pemV0byBUZWNobm9sb2dpZXMg +Uy5BLjEnMCUGA1UECxMeQ2VydHVtIENlcnRpZmljYXRpb24gQXV0aG9yaXR5MSIw +IAYDVQQDExlDZXJ0dW0gVHJ1c3RlZCBOZXR3b3JrIENBMIIBIjANBgkqhkiG9w0B +AQEFAAOCAQ8AMIIBCgKCAQEA4/t9o3K6wvDJFIf1awFO4W5AB7ptJ11/91sts1rH +UV+rpDKmYYe2bg+G0jACl/jXaVehGDldamR5xgFZrDwxSjh80gTSSyjoIF87B6LM +TXPb865Px1bVWqeWifrzq2jUI4ZZJ88JJ7ysbnKDHDBy3+Ci6dLhdHUZvSqeexVU +BBvXQzmtVSjF4hq79MDkrjhJM8x2hZ85RdKknvISjFH4fOQtf/WsX+sWn7Et0brM +kUJ3TCXJkDhv2/DM+44el1k+1WBO5gUo7Ul5E0u6SNsv+XLTOcr+H9g0cvW0QM8x +AcPs3hEtF10fuFDRXhmnad4HMyjKUJX5p1TLVIZQRan5SQIDAQABo0IwQDAPBgNV +HRMBAf8EBTADAQH/MB0GA1UdDgQWBBQIds3LB/8k9sXN7buQvOKEN0Z19zAOBgNV +HQ8BAf8EBAMCAQYwDQYJKoZIhvcNAQEFBQADggEBAKaorSLOAT2mo/9i0Eidi15y +sHhE49wcrwn9I0j6vSrEuVUEtRCjjSfeC4Jj0O7eDDd5QVsisrCaQVymcODU0HfL +I9MA4GxWL+FpDQ3Zqr8hgVDZBqWo/5U30Kr+4rP1mS1FhIrlQgnXdAIv94nYmem8 +J9RHjboNRhx3zxSkHLmkMcScKHQDNP8zGSal6Q10tz6XxnboJ5ajZt3hrvJBW8qY +VoNzcOSGGtIxQbovvi0TWnZvTuhOgQ4/WwMioBK+ZlgRSssDxLQqKi2WF+A5VLxI +03YnnZotBqbJ7DnSq9ufmgsnAjUpsUCV5/nonFWIGUbWtzT1fs45mtk48VH3Tyw= +-----END CERTIFICATE----- + +# Issuer: CN=TWCA Root Certification Authority O=TAIWAN-CA OU=Root CA +# Subject: CN=TWCA Root Certification Authority O=TAIWAN-CA OU=Root CA +# Label: "TWCA Root Certification Authority" +# Serial: 1 +# MD5 Fingerprint: aa:08:8f:f6:f9:7b:b7:f2:b1:a7:1e:9b:ea:ea:bd:79 +# SHA1 Fingerprint: cf:9e:87:6d:d3:eb:fc:42:26:97:a3:b5:a3:7a:a0:76:a9:06:23:48 +# SHA256 Fingerprint: bf:d8:8f:e1:10:1c:41:ae:3e:80:1b:f8:be:56:35:0e:e9:ba:d1:a6:b9:bd:51:5e:dc:5c:6d:5b:87:11:ac:44 +-----BEGIN CERTIFICATE----- +MIIDezCCAmOgAwIBAgIBATANBgkqhkiG9w0BAQUFADBfMQswCQYDVQQGEwJUVzES +MBAGA1UECgwJVEFJV0FOLUNBMRAwDgYDVQQLDAdSb290IENBMSowKAYDVQQDDCFU +V0NBIFJvb3QgQ2VydGlmaWNhdGlvbiBBdXRob3JpdHkwHhcNMDgwODI4MDcyNDMz +WhcNMzAxMjMxMTU1OTU5WjBfMQswCQYDVQQGEwJUVzESMBAGA1UECgwJVEFJV0FO +LUNBMRAwDgYDVQQLDAdSb290IENBMSowKAYDVQQDDCFUV0NBIFJvb3QgQ2VydGlm +aWNhdGlvbiBBdXRob3JpdHkwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIB +AQCwfnK4pAOU5qfeCTiRShFAh6d8WWQUe7UREN3+v9XAu1bihSX0NXIP+FPQQeFE +AcK0HMMxQhZHhTMidrIKbw/lJVBPhYa+v5guEGcevhEFhgWQxFnQfHgQsIBct+HH +K3XLfJ+utdGdIzdjp9xCoi2SBBtQwXu4PhvJVgSLL1KbralW6cH/ralYhzC2gfeX +RfwZVzsrb+RH9JlF/h3x+JejiB03HFyP4HYlmlD4oFT/RJB2I9IyxsOrBr/8+7/z +rX2SYgJbKdM1o5OaQ2RgXbL6Mv87BK9NQGr5x+PvI/1ry+UPizgN7gr8/g+YnzAx +3WxSZfmLgb4i4RxYA7qRG4kHAgMBAAGjQjBAMA4GA1UdDwEB/wQEAwIBBjAPBgNV +HRMBAf8EBTADAQH/MB0GA1UdDgQWBBRqOFsmjd6LWvJPelSDGRjjCDWmujANBgkq +hkiG9w0BAQUFAAOCAQEAPNV3PdrfibqHDAhUaiBQkr6wQT25JmSDCi/oQMCXKCeC +MErJk/9q56YAf4lCmtYR5VPOL8zy2gXE/uJQxDqGfczafhAJO5I1KlOy/usrBdls +XebQ79NqZp4VKIV66IIArB6nCWlWQtNoURi+VJq/REG6Sb4gumlc7rh3zc5sH62D +lhh9DrUUOYTxKOkto557HnpyWoOzeW/vtPzQCqVYT0bf+215WfKEIlKuD8z7fDvn +aspHYcN6+NOSBB+4IIThNlQWx0DeO4pz3N/GCUzf7Nr/1FNCocnyYh0igzyXxfkZ +YiesZSLX0zzG5Y6yU8xJzrww/nsOM5D77dIUkR8Hrw== +-----END CERTIFICATE----- + +# Issuer: O=SECOM Trust Systems CO.,LTD. OU=Security Communication RootCA2 +# Subject: O=SECOM Trust Systems CO.,LTD. OU=Security Communication RootCA2 +# Label: "Security Communication RootCA2" +# Serial: 0 +# MD5 Fingerprint: 6c:39:7d:a4:0e:55:59:b2:3f:d6:41:b1:12:50:de:43 +# SHA1 Fingerprint: 5f:3b:8c:f2:f8:10:b3:7d:78:b4:ce:ec:19:19:c3:73:34:b9:c7:74 +# SHA256 Fingerprint: 51:3b:2c:ec:b8:10:d4:cd:e5:dd:85:39:1a:df:c6:c2:dd:60:d8:7b:b7:36:d2:b5:21:48:4a:a4:7a:0e:be:f6 +-----BEGIN CERTIFICATE----- +MIIDdzCCAl+gAwIBAgIBADANBgkqhkiG9w0BAQsFADBdMQswCQYDVQQGEwJKUDEl +MCMGA1UEChMcU0VDT00gVHJ1c3QgU3lzdGVtcyBDTy4sTFRELjEnMCUGA1UECxMe +U2VjdXJpdHkgQ29tbXVuaWNhdGlvbiBSb290Q0EyMB4XDTA5MDUyOTA1MDAzOVoX +DTI5MDUyOTA1MDAzOVowXTELMAkGA1UEBhMCSlAxJTAjBgNVBAoTHFNFQ09NIFRy +dXN0IFN5c3RlbXMgQ08uLExURC4xJzAlBgNVBAsTHlNlY3VyaXR5IENvbW11bmlj +YXRpb24gUm9vdENBMjCCASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoCggEBANAV +OVKxUrO6xVmCxF1SrjpDZYBLx/KWvNs2l9amZIyoXvDjChz335c9S672XewhtUGr +zbl+dp+++T42NKA7wfYxEUV0kz1XgMX5iZnK5atq1LXaQZAQwdbWQonCv/Q4EpVM +VAX3NuRFg3sUZdbcDE3R3n4MqzvEFb46VqZab3ZpUql6ucjrappdUtAtCms1FgkQ +hNBqyjoGADdH5H5XTz+L62e4iKrFvlNVspHEfbmwhRkGeC7bYRr6hfVKkaHnFtWO +ojnflLhwHyg/i/xAXmODPIMqGplrz95Zajv8bxbXH/1KEOtOghY6rCcMU/Gt1SSw +awNQwS08Ft1ENCcadfsCAwEAAaNCMEAwHQYDVR0OBBYEFAqFqXdlBZh8QIH4D5cs +OPEK7DzPMA4GA1UdDwEB/wQEAwIBBjAPBgNVHRMBAf8EBTADAQH/MA0GCSqGSIb3 +DQEBCwUAA4IBAQBMOqNErLlFsceTfsgLCkLfZOoc7llsCLqJX2rKSpWeeo8HxdpF +coJxDjrSzG+ntKEju/Ykn8sX/oymzsLS28yN/HH8AynBbF0zX2S2ZTuJbxh2ePXc +okgfGT+Ok+vx+hfuzU7jBBJV1uXk3fs+BXziHV7Gp7yXT2g69ekuCkO2r1dcYmh8 +t/2jioSgrGK+KwmHNPBqAbubKVY8/gA3zyNs8U6qtnRGEmyR7jTV7JqR50S+kDFy +1UkC9gLl9B/rfNmWVan/7Ir5mUf/NVoCqgTLiluHcSmRvaS0eg29mvVXIwAHIRc/ +SjnRBUkLp7Y3gaVdjKozXoEofKd9J+sAro03 +-----END CERTIFICATE----- + +# Issuer: CN=Hellenic Academic and Research Institutions RootCA 2011 O=Hellenic Academic and Research Institutions Cert. Authority +# Subject: CN=Hellenic Academic and Research Institutions RootCA 2011 O=Hellenic Academic and Research Institutions Cert. Authority +# Label: "Hellenic Academic and Research Institutions RootCA 2011" +# Serial: 0 +# MD5 Fingerprint: 73:9f:4c:4b:73:5b:79:e9:fa:ba:1c:ef:6e:cb:d5:c9 +# SHA1 Fingerprint: fe:45:65:9b:79:03:5b:98:a1:61:b5:51:2e:ac:da:58:09:48:22:4d +# SHA256 Fingerprint: bc:10:4f:15:a4:8b:e7:09:dc:a5:42:a7:e1:d4:b9:df:6f:05:45:27:e8:02:ea:a9:2d:59:54:44:25:8a:fe:71 +-----BEGIN CERTIFICATE----- +MIIEMTCCAxmgAwIBAgIBADANBgkqhkiG9w0BAQUFADCBlTELMAkGA1UEBhMCR1Ix +RDBCBgNVBAoTO0hlbGxlbmljIEFjYWRlbWljIGFuZCBSZXNlYXJjaCBJbnN0aXR1 +dGlvbnMgQ2VydC4gQXV0aG9yaXR5MUAwPgYDVQQDEzdIZWxsZW5pYyBBY2FkZW1p +YyBhbmQgUmVzZWFyY2ggSW5zdGl0dXRpb25zIFJvb3RDQSAyMDExMB4XDTExMTIw +NjEzNDk1MloXDTMxMTIwMTEzNDk1MlowgZUxCzAJBgNVBAYTAkdSMUQwQgYDVQQK +EztIZWxsZW5pYyBBY2FkZW1pYyBhbmQgUmVzZWFyY2ggSW5zdGl0dXRpb25zIENl +cnQuIEF1dGhvcml0eTFAMD4GA1UEAxM3SGVsbGVuaWMgQWNhZGVtaWMgYW5kIFJl +c2VhcmNoIEluc3RpdHV0aW9ucyBSb290Q0EgMjAxMTCCASIwDQYJKoZIhvcNAQEB +BQADggEPADCCAQoCggEBAKlTAOMupvaO+mDYLZU++CwqVE7NuYRhlFhPjz2L5EPz +dYmNUeTDN9KKiE15HrcS3UN4SoqS5tdI1Q+kOilENbgH9mgdVc04UfCMJDGFr4PJ +fel3r+0ae50X+bOdOFAPplp5kYCvN66m0zH7tSYJnTxa71HFK9+WXesyHgLacEns +bgzImjeN9/E2YEsmLIKe0HjzDQ9jpFEw4fkrJxIH2Oq9GGKYsFk3fb7u8yBRQlqD +75O6aRXxYp2fmTmCobd0LovUxQt7L/DICto9eQqakxylKHJzkUOap9FNhYS5qXSP +FEDH3N6sQWRstBmbAmNtJGSPRLIl6s5ddAxjMlyNh+UCAwEAAaOBiTCBhjAPBgNV +HRMBAf8EBTADAQH/MAsGA1UdDwQEAwIBBjAdBgNVHQ4EFgQUppFC/RNhSiOeCKQp +5dgTBCPuQSUwRwYDVR0eBEAwPqA8MAWCAy5ncjAFggMuZXUwBoIELmVkdTAGggQu +b3JnMAWBAy5ncjAFgQMuZXUwBoEELmVkdTAGgQQub3JnMA0GCSqGSIb3DQEBBQUA +A4IBAQAf73lB4XtuP7KMhjdCSk4cNx6NZrokgclPEg8hwAOXhiVtXdMiKahsog2p +6z0GW5k6x8zDmjR/qw7IThzh+uTczQ2+vyT+bOdrwg3IBp5OjWEopmr95fZi6hg8 +TqBTnbI6nOulnJEWtk2C4AwFSKls9cz4y51JtPACpf1wA+2KIaWuE4ZJwzNzvoc7 +dIsXRSZMFpGD/md9zU1jZ/rzAxKWeAaNsWftjj++n08C9bMJL/NMh98qy5V8Acys +Nnq/onN694/BtZqhFLKPM58N7yLcZnuEvUUXBj08yrl3NI/K6s8/MT7jiOOASSXI +l7WdmplNsDz4SgCbZN2fOUvRJ9e4 +-----END CERTIFICATE----- + +# Issuer: CN=Actalis Authentication Root CA O=Actalis S.p.A./03358520967 +# Subject: CN=Actalis Authentication Root CA O=Actalis S.p.A./03358520967 +# Label: "Actalis Authentication Root CA" +# Serial: 6271844772424770508 +# MD5 Fingerprint: 69:c1:0d:4f:07:a3:1b:c3:fe:56:3d:04:bc:11:f6:a6 +# SHA1 Fingerprint: f3:73:b3:87:06:5a:28:84:8a:f2:f3:4a:ce:19:2b:dd:c7:8e:9c:ac +# SHA256 Fingerprint: 55:92:60:84:ec:96:3a:64:b9:6e:2a:be:01:ce:0b:a8:6a:64:fb:fe:bc:c7:aa:b5:af:c1:55:b3:7f:d7:60:66 +-----BEGIN CERTIFICATE----- +MIIFuzCCA6OgAwIBAgIIVwoRl0LE48wwDQYJKoZIhvcNAQELBQAwazELMAkGA1UE +BhMCSVQxDjAMBgNVBAcMBU1pbGFuMSMwIQYDVQQKDBpBY3RhbGlzIFMucC5BLi8w +MzM1ODUyMDk2NzEnMCUGA1UEAwweQWN0YWxpcyBBdXRoZW50aWNhdGlvbiBSb290 +IENBMB4XDTExMDkyMjExMjIwMloXDTMwMDkyMjExMjIwMlowazELMAkGA1UEBhMC +SVQxDjAMBgNVBAcMBU1pbGFuMSMwIQYDVQQKDBpBY3RhbGlzIFMucC5BLi8wMzM1 +ODUyMDk2NzEnMCUGA1UEAwweQWN0YWxpcyBBdXRoZW50aWNhdGlvbiBSb290IENB +MIICIjANBgkqhkiG9w0BAQEFAAOCAg8AMIICCgKCAgEAp8bEpSmkLO/lGMWwUKNv +UTufClrJwkg4CsIcoBh/kbWHuUA/3R1oHwiD1S0eiKD4j1aPbZkCkpAW1V8IbInX +4ay8IMKx4INRimlNAJZaby/ARH6jDuSRzVju3PvHHkVH3Se5CAGfpiEd9UEtL0z9 +KK3giq0itFZljoZUj5NDKd45RnijMCO6zfB9E1fAXdKDa0hMxKufgFpbOr3JpyI/ +gCczWw63igxdBzcIy2zSekciRDXFzMwujt0q7bd9Zg1fYVEiVRvjRuPjPdA1Yprb +rxTIW6HMiRvhMCb8oJsfgadHHwTrozmSBp+Z07/T6k9QnBn+locePGX2oxgkg4YQ +51Q+qDp2JE+BIcXjDwL4k5RHILv+1A7TaLndxHqEguNTVHnd25zS8gebLra8Pu2F +be8lEfKXGkJh90qX6IuxEAf6ZYGyojnP9zz/GPvG8VqLWeICrHuS0E4UT1lF9gxe +KF+w6D9Fz8+vm2/7hNN3WpVvrJSEnu68wEqPSpP4RCHiMUVhUE4Q2OM1fEwZtN4F +v6MGn8i1zeQf1xcGDXqVdFUNaBr8EBtiZJ1t4JWgw5QHVw0U5r0F+7if5t+L4sbn +fpb2U8WANFAoWPASUHEXMLrmeGO89LKtmyuy/uE5jF66CyCU3nuDuP/jVo23Eek7 +jPKxwV2dpAtMK9myGPW1n0sCAwEAAaNjMGEwHQYDVR0OBBYEFFLYiDrIn3hm7Ynz +ezhwlMkCAjbQMA8GA1UdEwEB/wQFMAMBAf8wHwYDVR0jBBgwFoAUUtiIOsifeGbt +ifN7OHCUyQICNtAwDgYDVR0PAQH/BAQDAgEGMA0GCSqGSIb3DQEBCwUAA4ICAQAL +e3KHwGCmSUyIWOYdiPcUZEim2FgKDk8TNd81HdTtBjHIgT5q1d07GjLukD0R0i70 +jsNjLiNmsGe+b7bAEzlgqqI0JZN1Ut6nna0Oh4lScWoWPBkdg/iaKWW+9D+a2fDz +WochcYBNy+A4mz+7+uAwTc+G02UQGRjRlwKxK3JCaKygvU5a2hi/a5iB0P2avl4V +SM0RFbnAKVy06Ij3Pjaut2L9HmLecHgQHEhb2rykOLpn7VU+Xlff1ANATIGk0k9j +pwlCCRT8AKnCgHNPLsBA2RF7SOp6AsDT6ygBJlh0wcBzIm2Tlf05fbsq4/aC4yyX +X04fkZT6/iyj2HYauE2yOE+b+h1IYHkm4vP9qdCa6HCPSXrW5b0KDtst842/6+Ok +fcvHlXHo2qN8xcL4dJIEG4aspCJTQLas/kx2z/uUMsA1n3Y/buWQbqCmJqK4LL7R +K4X9p2jIugErsWx0Hbhzlefut8cl8ABMALJ+tguLHPPAUJ4lueAI3jZm/zel0btU +ZCzJJ7VLkn5l/9Mt4blOvH+kQSGQQXemOR/qnuOf0GZvBeyqdn6/axag67XH/JJU +LysRJyU3eExRarDzzFhdFPFqSBX/wge2sY0PjlxQRrM9vwGYT7JZVEc+NHt4bVaT +LnPqZih4zR0Uv6CPLy64Lo7yFIrM6bV8+2ydDKXhlg== +-----END CERTIFICATE----- + +# Issuer: O=Trustis Limited OU=Trustis FPS Root CA +# Subject: O=Trustis Limited OU=Trustis FPS Root CA +# Label: "Trustis FPS Root CA" +# Serial: 36053640375399034304724988975563710553 +# MD5 Fingerprint: 30:c9:e7:1e:6b:e6:14:eb:65:b2:16:69:20:31:67:4d +# SHA1 Fingerprint: 3b:c0:38:0b:33:c3:f6:a6:0c:86:15:22:93:d9:df:f5:4b:81:c0:04 +# SHA256 Fingerprint: c1:b4:82:99:ab:a5:20:8f:e9:63:0a:ce:55:ca:68:a0:3e:da:5a:51:9c:88:02:a0:d3:a6:73:be:8f:8e:55:7d +-----BEGIN CERTIFICATE----- +MIIDZzCCAk+gAwIBAgIQGx+ttiD5JNM2a/fH8YygWTANBgkqhkiG9w0BAQUFADBF +MQswCQYDVQQGEwJHQjEYMBYGA1UEChMPVHJ1c3RpcyBMaW1pdGVkMRwwGgYDVQQL +ExNUcnVzdGlzIEZQUyBSb290IENBMB4XDTAzMTIyMzEyMTQwNloXDTI0MDEyMTEx +MzY1NFowRTELMAkGA1UEBhMCR0IxGDAWBgNVBAoTD1RydXN0aXMgTGltaXRlZDEc +MBoGA1UECxMTVHJ1c3RpcyBGUFMgUm9vdCBDQTCCASIwDQYJKoZIhvcNAQEBBQAD +ggEPADCCAQoCggEBAMVQe547NdDfxIzNjpvto8A2mfRC6qc+gIMPpqdZh8mQRUN+ +AOqGeSoDvT03mYlmt+WKVoaTnGhLaASMk5MCPjDSNzoiYYkchU59j9WvezX2fihH +iTHcDnlkH5nSW7r+f2C/revnPDgpai/lkQtV/+xvWNUtyd5MZnGPDNcE2gfmHhjj +vSkCqPoc4Vu5g6hBSLwacY3nYuUtsuvffM/bq1rKMfFMIvMFE/eC+XN5DL7XSxzA +0RU8k0Fk0ea+IxciAIleH2ulrG6nS4zto3Lmr2NNL4XSFDWaLk6M6jKYKIahkQlB +OrTh4/L68MkKokHdqeMDx4gVOxzUGpTXn2RZEm0CAwEAAaNTMFEwDwYDVR0TAQH/ +BAUwAwEB/zAfBgNVHSMEGDAWgBS6+nEleYtXQSUhhgtx67JkDoshZzAdBgNVHQ4E +FgQUuvpxJXmLV0ElIYYLceuyZA6LIWcwDQYJKoZIhvcNAQEFBQADggEBAH5Y//01 +GX2cGE+esCu8jowU/yyg2kdbw++BLa8F6nRIW/M+TgfHbcWzk88iNVy2P3UnXwmW +zaD+vkAMXBJV+JOCyinpXj9WV4s4NvdFGkwozZ5BuO1WTISkQMi4sKUraXAEasP4 +1BIy+Q7DsdwyhEQsb8tGD+pmQQ9P8Vilpg0ND2HepZ5dfWWhPBfnqFVO76DH7cZE +f1T1o+CP8HxVIo8ptoGj4W1OLBuAZ+ytIJ8MYmHVl/9D7S3B2l0pKoU/rGXuhg8F +jZBf3+6f9L/uHfuY5H+QK4R4EA5sSVPvFVtlRkpdr7r7OnIdzfYliB6XzCGcKQEN +ZetX2fNXlrtIzYE= +-----END CERTIFICATE----- + +# Issuer: CN=Buypass Class 2 Root CA O=Buypass AS-983163327 +# Subject: CN=Buypass Class 2 Root CA O=Buypass AS-983163327 +# Label: "Buypass Class 2 Root CA" +# Serial: 2 +# MD5 Fingerprint: 46:a7:d2:fe:45:fb:64:5a:a8:59:90:9b:78:44:9b:29 +# SHA1 Fingerprint: 49:0a:75:74:de:87:0a:47:fe:58:ee:f6:c7:6b:eb:c6:0b:12:40:99 +# SHA256 Fingerprint: 9a:11:40:25:19:7c:5b:b9:5d:94:e6:3d:55:cd:43:79:08:47:b6:46:b2:3c:df:11:ad:a4:a0:0e:ff:15:fb:48 +-----BEGIN CERTIFICATE----- +MIIFWTCCA0GgAwIBAgIBAjANBgkqhkiG9w0BAQsFADBOMQswCQYDVQQGEwJOTzEd +MBsGA1UECgwUQnV5cGFzcyBBUy05ODMxNjMzMjcxIDAeBgNVBAMMF0J1eXBhc3Mg +Q2xhc3MgMiBSb290IENBMB4XDTEwMTAyNjA4MzgwM1oXDTQwMTAyNjA4MzgwM1ow +TjELMAkGA1UEBhMCTk8xHTAbBgNVBAoMFEJ1eXBhc3MgQVMtOTgzMTYzMzI3MSAw +HgYDVQQDDBdCdXlwYXNzIENsYXNzIDIgUm9vdCBDQTCCAiIwDQYJKoZIhvcNAQEB +BQADggIPADCCAgoCggIBANfHXvfBB9R3+0Mh9PT1aeTuMgHbo4Yf5FkNuud1g1Lr +6hxhFUi7HQfKjK6w3Jad6sNgkoaCKHOcVgb/S2TwDCo3SbXlzwx87vFKu3MwZfPV +L4O2fuPn9Z6rYPnT8Z2SdIrkHJasW4DptfQxh6NR/Md+oW+OU3fUl8FVM5I+GC91 +1K2GScuVr1QGbNgGE41b/+EmGVnAJLqBcXmQRFBoJJRfuLMR8SlBYaNByyM21cHx +MlAQTn/0hpPshNOOvEu/XAFOBz3cFIqUCqTqc/sLUegTBxj6DvEr0VQVfTzh97QZ +QmdiXnfgolXsttlpF9U6r0TtSsWe5HonfOV116rLJeffawrbD02TTqigzXsu8lkB +arcNuAeBfos4GzjmCleZPe4h6KP1DBbdi+w0jpwqHAAVF41og9JwnxgIzRFo1clr +Us3ERo/ctfPYV3Me6ZQ5BL/T3jjetFPsaRyifsSP5BtwrfKi+fv3FmRmaZ9JUaLi +FRhnBkp/1Wy1TbMz4GHrXb7pmA8y1x1LPC5aAVKRCfLf6o3YBkBjqhHk/sM3nhRS +P/TizPJhk9H9Z2vXUq6/aKtAQ6BXNVN48FP4YUIHZMbXb5tMOA1jrGKvNouicwoN +9SG9dKpN6nIDSdvHXx1iY8f93ZHsM+71bbRuMGjeyNYmsHVee7QHIJihdjK4TWxP +AgMBAAGjQjBAMA8GA1UdEwEB/wQFMAMBAf8wHQYDVR0OBBYEFMmAd+BikoL1Rpzz +uvdMw964o605MA4GA1UdDwEB/wQEAwIBBjANBgkqhkiG9w0BAQsFAAOCAgEAU18h +9bqwOlI5LJKwbADJ784g7wbylp7ppHR/ehb8t/W2+xUbP6umwHJdELFx7rxP462s +A20ucS6vxOOto70MEae0/0qyexAQH6dXQbLArvQsWdZHEIjzIVEpMMpghq9Gqx3t +OluwlN5E40EIosHsHdb9T7bWR9AUC8rmyrV7d35BH16Dx7aMOZawP5aBQW9gkOLo ++fsicdl9sz1Gv7SEr5AcD48Saq/v7h56rgJKihcrdv6sVIkkLE8/trKnToyokZf7 +KcZ7XC25y2a2t6hbElGFtQl+Ynhw/qlqYLYdDnkM/crqJIByw5c/8nerQyIKx+u2 +DISCLIBrQYoIwOula9+ZEsuK1V6ADJHgJgg2SMX6OBE1/yWDLfJ6v9r9jv6ly0Us +H8SIU653DtmadsWOLB2jutXsMq7Aqqz30XpN69QH4kj3Io6wpJ9qzo6ysmD0oyLQ +I+uUWnpp3Q+/QFesa1lQ2aOZ4W7+jQF5JyMV3pKdewlNWudLSDBaGOYKbeaP4NK7 +5t98biGCwWg5TbSYWGZizEqQXsP6JwSxeRV0mcy+rSDeJmAc61ZRpqPq5KM/p/9h +3PFaTWwyI0PurKju7koSCTxdccK+efrCh2gdC/1cacwG0Jp9VJkqyTkaGa9LKkPz +Y11aWOIv4x3kqdbQCtCev9eBCfHJxyYNrJgWVqA= +-----END CERTIFICATE----- + +# Issuer: CN=Buypass Class 3 Root CA O=Buypass AS-983163327 +# Subject: CN=Buypass Class 3 Root CA O=Buypass AS-983163327 +# Label: "Buypass Class 3 Root CA" +# Serial: 2 +# MD5 Fingerprint: 3d:3b:18:9e:2c:64:5a:e8:d5:88:ce:0e:f9:37:c2:ec +# SHA1 Fingerprint: da:fa:f7:fa:66:84:ec:06:8f:14:50:bd:c7:c2:81:a5:bc:a9:64:57 +# SHA256 Fingerprint: ed:f7:eb:bc:a2:7a:2a:38:4d:38:7b:7d:40:10:c6:66:e2:ed:b4:84:3e:4c:29:b4:ae:1d:5b:93:32:e6:b2:4d +-----BEGIN CERTIFICATE----- +MIIFWTCCA0GgAwIBAgIBAjANBgkqhkiG9w0BAQsFADBOMQswCQYDVQQGEwJOTzEd +MBsGA1UECgwUQnV5cGFzcyBBUy05ODMxNjMzMjcxIDAeBgNVBAMMF0J1eXBhc3Mg +Q2xhc3MgMyBSb290IENBMB4XDTEwMTAyNjA4Mjg1OFoXDTQwMTAyNjA4Mjg1OFow +TjELMAkGA1UEBhMCTk8xHTAbBgNVBAoMFEJ1eXBhc3MgQVMtOTgzMTYzMzI3MSAw +HgYDVQQDDBdCdXlwYXNzIENsYXNzIDMgUm9vdCBDQTCCAiIwDQYJKoZIhvcNAQEB +BQADggIPADCCAgoCggIBAKXaCpUWUOOV8l6ddjEGMnqb8RB2uACatVI2zSRHsJ8Y +ZLya9vrVediQYkwiL944PdbgqOkcLNt4EemOaFEVcsfzM4fkoF0LXOBXByow9c3E +N3coTRiR5r/VUv1xLXA+58bEiuPwKAv0dpihi4dVsjoT/Lc+JzeOIuOoTyrvYLs9 +tznDDgFHmV0ST9tD+leh7fmdvhFHJlsTmKtdFoqwNxxXnUX/iJY2v7vKB3tvh2PX +0DJq1l1sDPGzbjniazEuOQAnFN44wOwZZoYS6J1yFhNkUsepNxz9gjDthBgd9K5c +/3ATAOux9TN6S9ZV+AWNS2mw9bMoNlwUxFFzTWsL8TQH2xc519woe2v1n/MuwU8X +KhDzzMro6/1rqy6any2CbgTUUgGTLT2G/H783+9CHaZr77kgxve9oKeV/afmiSTY +zIw0bOIjL9kSGiG5VZFvC5F5GQytQIgLcOJ60g7YaEi7ghM5EFjp2CoHxhLbWNvS +O1UQRwUVZ2J+GGOmRj8JDlQyXr8NYnon74Do29lLBlo3WiXQCBJ31G8JUJc9yB3D +34xFMFbG02SrZvPAXpacw8Tvw3xrizp5f7NJzz3iiZ+gMEuFuZyUJHmPfWupRWgP +K9Dx2hzLabjKSWJtyNBjYt1gD1iqj6G8BaVmos8bdrKEZLFMOVLAMLrwjEsCsLa3 +AgMBAAGjQjBAMA8GA1UdEwEB/wQFMAMBAf8wHQYDVR0OBBYEFEe4zf/lb+74suwv +Tg75JbCOPGvDMA4GA1UdDwEB/wQEAwIBBjANBgkqhkiG9w0BAQsFAAOCAgEAACAj +QTUEkMJAYmDv4jVM1z+s4jSQuKFvdvoWFqRINyzpkMLyPPgKn9iB5btb2iUspKdV +cSQy9sgL8rxq+JOssgfCX5/bzMiKqr5qb+FJEMwx14C7u8jYog5kV+qi9cKpMRXS +IGrs/CIBKM+GuIAeqcwRpTzyFrNHnfzSgCHEy9BHcEGhyoMZCCxt8l13nIoUE9Q2 +HJLw5QY33KbmkJs4j1xrG0aGQ0JfPgEHU1RdZX33inOhmlRaHylDFCfChQ+1iHsa +O5S3HWCntZznKWlXWpuTekMwGwPXYshApqr8ZORK15FTAaggiG6cX0S5y2CBNOxv +033aSF/rtJC8LakcC6wc1aJoIIAE1vyxjy+7SjENSoYc6+I2KSb12tjE8nVhz36u +dmNKekBlk4f4HoCMhuWG1o8O/FMsYOgWYRqiPkN7zTlgVGr18okmAWiDSKIz6MkE +kbIRNBE+6tBDGR8Dk5AM/1E9V/RBbuHLoL7ryWPNbczk+DaqaJ3tvV2XcEQNtg41 +3OEMXbugUZTLfhbrES+jkkXITHHZvMmZUldGL1DPvTVp9D0VzgalLA8+9oG6lLvD +u79leNKGef9JOxqDDPDeeOzI8k1MGt6CKfjBWtrt7uYnXuhF0J0cUahoq0Tj0Itq +4/g7u9xN12TyUb7mqqta6THuBrxzvxNiCp/HuZc= +-----END CERTIFICATE----- + +# Issuer: CN=T-TeleSec GlobalRoot Class 3 O=T-Systems Enterprise Services GmbH OU=T-Systems Trust Center +# Subject: CN=T-TeleSec GlobalRoot Class 3 O=T-Systems Enterprise Services GmbH OU=T-Systems Trust Center +# Label: "T-TeleSec GlobalRoot Class 3" +# Serial: 1 +# MD5 Fingerprint: ca:fb:40:a8:4e:39:92:8a:1d:fe:8e:2f:c4:27:ea:ef +# SHA1 Fingerprint: 55:a6:72:3e:cb:f2:ec:cd:c3:23:74:70:19:9d:2a:be:11:e3:81:d1 +# SHA256 Fingerprint: fd:73:da:d3:1c:64:4f:f1:b4:3b:ef:0c:cd:da:96:71:0b:9c:d9:87:5e:ca:7e:31:70:7a:f3:e9:6d:52:2b:bd +-----BEGIN CERTIFICATE----- +MIIDwzCCAqugAwIBAgIBATANBgkqhkiG9w0BAQsFADCBgjELMAkGA1UEBhMCREUx +KzApBgNVBAoMIlQtU3lzdGVtcyBFbnRlcnByaXNlIFNlcnZpY2VzIEdtYkgxHzAd +BgNVBAsMFlQtU3lzdGVtcyBUcnVzdCBDZW50ZXIxJTAjBgNVBAMMHFQtVGVsZVNl +YyBHbG9iYWxSb290IENsYXNzIDMwHhcNMDgxMDAxMTAyOTU2WhcNMzMxMDAxMjM1 +OTU5WjCBgjELMAkGA1UEBhMCREUxKzApBgNVBAoMIlQtU3lzdGVtcyBFbnRlcnBy +aXNlIFNlcnZpY2VzIEdtYkgxHzAdBgNVBAsMFlQtU3lzdGVtcyBUcnVzdCBDZW50 +ZXIxJTAjBgNVBAMMHFQtVGVsZVNlYyBHbG9iYWxSb290IENsYXNzIDMwggEiMA0G +CSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQC9dZPwYiJvJK7genasfb3ZJNW4t/zN +8ELg63iIVl6bmlQdTQyK9tPPcPRStdiTBONGhnFBSivwKixVA9ZIw+A5OO3yXDw/ +RLyTPWGrTs0NvvAgJ1gORH8EGoel15YUNpDQSXuhdfsaa3Ox+M6pCSzyU9XDFES4 +hqX2iys52qMzVNn6chr3IhUciJFrf2blw2qAsCTz34ZFiP0Zf3WHHx+xGwpzJFu5 +ZeAsVMhg02YXP+HMVDNzkQI6pn97djmiH5a2OK61yJN0HZ65tOVgnS9W0eDrXltM +EnAMbEQgqxHY9Bn20pxSN+f6tsIxO0rUFJmtxxr1XV/6B7h8DR/Wgx6zAgMBAAGj +QjBAMA8GA1UdEwEB/wQFMAMBAf8wDgYDVR0PAQH/BAQDAgEGMB0GA1UdDgQWBBS1 +A/d2O2GCahKqGFPrAyGUv/7OyjANBgkqhkiG9w0BAQsFAAOCAQEAVj3vlNW92nOy +WL6ukK2YJ5f+AbGwUgC4TeQbIXQbfsDuXmkqJa9c1h3a0nnJ85cp4IaH3gRZD/FZ +1GSFS5mvJQQeyUapl96Cshtwn5z2r3Ex3XsFpSzTucpH9sry9uetuUg/vBa3wW30 +6gmv7PO15wWeph6KU1HWk4HMdJP2udqmJQV0eVp+QD6CSyYRMG7hP0HHRwA11fXT +91Q+gT3aSWqas+8QPebrb9HIIkfLzM8BMZLZGOMivgkeGj5asuRrDFR6fUNOuIml +e9eiPZaGzPImNC1qkp2aGtAw4l1OBLBfiyB+d8E9lYLRRpo7PHi4b6HQDWSieB4p +TpPDpFQUWw== +-----END CERTIFICATE----- + +# Issuer: CN=EE Certification Centre Root CA O=AS Sertifitseerimiskeskus +# Subject: CN=EE Certification Centre Root CA O=AS Sertifitseerimiskeskus +# Label: "EE Certification Centre Root CA" +# Serial: 112324828676200291871926431888494945866 +# MD5 Fingerprint: 43:5e:88:d4:7d:1a:4a:7e:fd:84:2e:52:eb:01:d4:6f +# SHA1 Fingerprint: c9:a8:b9:e7:55:80:5e:58:e3:53:77:a7:25:eb:af:c3:7b:27:cc:d7 +# SHA256 Fingerprint: 3e:84:ba:43:42:90:85:16:e7:75:73:c0:99:2f:09:79:ca:08:4e:46:85:68:1f:f1:95:cc:ba:8a:22:9b:8a:76 +-----BEGIN CERTIFICATE----- +MIIEAzCCAuugAwIBAgIQVID5oHPtPwBMyonY43HmSjANBgkqhkiG9w0BAQUFADB1 +MQswCQYDVQQGEwJFRTEiMCAGA1UECgwZQVMgU2VydGlmaXRzZWVyaW1pc2tlc2t1 +czEoMCYGA1UEAwwfRUUgQ2VydGlmaWNhdGlvbiBDZW50cmUgUm9vdCBDQTEYMBYG +CSqGSIb3DQEJARYJcGtpQHNrLmVlMCIYDzIwMTAxMDMwMTAxMDMwWhgPMjAzMDEy +MTcyMzU5NTlaMHUxCzAJBgNVBAYTAkVFMSIwIAYDVQQKDBlBUyBTZXJ0aWZpdHNl +ZXJpbWlza2Vza3VzMSgwJgYDVQQDDB9FRSBDZXJ0aWZpY2F0aW9uIENlbnRyZSBS +b290IENBMRgwFgYJKoZIhvcNAQkBFglwa2lAc2suZWUwggEiMA0GCSqGSIb3DQEB +AQUAA4IBDwAwggEKAoIBAQDIIMDs4MVLqwd4lfNE7vsLDP90jmG7sWLqI9iroWUy +euuOF0+W2Ap7kaJjbMeMTC55v6kF/GlclY1i+blw7cNRfdCT5mzrMEvhvH2/UpvO +bntl8jixwKIy72KyaOBhU8E2lf/slLo2rpwcpzIP5Xy0xm90/XsY6KxX7QYgSzIw +WFv9zajmofxwvI6Sc9uXp3whrj3B9UiHbCe9nyV0gVWw93X2PaRka9ZP585ArQ/d +MtO8ihJTmMmJ+xAdTX7Nfh9WDSFwhfYggx/2uh8Ej+p3iDXE/+pOoYtNP2MbRMNE +1CV2yreN1x5KZmTNXMWcg+HCCIia7E6j8T4cLNlsHaFLAgMBAAGjgYowgYcwDwYD +VR0TAQH/BAUwAwEB/zAOBgNVHQ8BAf8EBAMCAQYwHQYDVR0OBBYEFBLyWj7qVhy/ +zQas8fElyalL1BSZMEUGA1UdJQQ+MDwGCCsGAQUFBwMCBggrBgEFBQcDAQYIKwYB +BQUHAwMGCCsGAQUFBwMEBggrBgEFBQcDCAYIKwYBBQUHAwkwDQYJKoZIhvcNAQEF +BQADggEBAHv25MANqhlHt01Xo/6tu7Fq1Q+e2+RjxY6hUFaTlrg4wCQiZrxTFGGV +v9DHKpY5P30osxBAIWrEr7BSdxjhlthWXePdNl4dp1BUoMUq5KqMlIpPnTX/dqQG +E5Gion0ARD9V04I8GtVbvFZMIi5GQ4okQC3zErg7cBqklrkar4dBGmoYDQZPxz5u +uSlNDUmJEYcyW+ZLBMjkXOZ0c5RdFpgTlf7727FE5TpwrDdr5rMzcijJs1eg9gIW +iAYLtqZLICjU3j2LrTcFU3T+bsy8QxdxXvnFzBqpYe73dgzzcvRyrc9yAjYHR8/v +GVCJYMzpJJUPwssd8m92kMfMdcGWxZ0= +-----END CERTIFICATE----- + +# Issuer: CN=D-TRUST Root Class 3 CA 2 2009 O=D-Trust GmbH +# Subject: CN=D-TRUST Root Class 3 CA 2 2009 O=D-Trust GmbH +# Label: "D-TRUST Root Class 3 CA 2 2009" +# Serial: 623603 +# MD5 Fingerprint: cd:e0:25:69:8d:47:ac:9c:89:35:90:f7:fd:51:3d:2f +# SHA1 Fingerprint: 58:e8:ab:b0:36:15:33:fb:80:f7:9b:1b:6d:29:d3:ff:8d:5f:00:f0 +# SHA256 Fingerprint: 49:e7:a4:42:ac:f0:ea:62:87:05:00:54:b5:25:64:b6:50:e4:f4:9e:42:e3:48:d6:aa:38:e0:39:e9:57:b1:c1 +-----BEGIN CERTIFICATE----- +MIIEMzCCAxugAwIBAgIDCYPzMA0GCSqGSIb3DQEBCwUAME0xCzAJBgNVBAYTAkRF +MRUwEwYDVQQKDAxELVRydXN0IEdtYkgxJzAlBgNVBAMMHkQtVFJVU1QgUm9vdCBD +bGFzcyAzIENBIDIgMjAwOTAeFw0wOTExMDUwODM1NThaFw0yOTExMDUwODM1NTha +ME0xCzAJBgNVBAYTAkRFMRUwEwYDVQQKDAxELVRydXN0IEdtYkgxJzAlBgNVBAMM +HkQtVFJVU1QgUm9vdCBDbGFzcyAzIENBIDIgMjAwOTCCASIwDQYJKoZIhvcNAQEB +BQADggEPADCCAQoCggEBANOySs96R+91myP6Oi/WUEWJNTrGa9v+2wBoqOADER03 +UAifTUpolDWzU9GUY6cgVq/eUXjsKj3zSEhQPgrfRlWLJ23DEE0NkVJD2IfgXU42 +tSHKXzlABF9bfsyjxiupQB7ZNoTWSPOSHjRGICTBpFGOShrvUD9pXRl/RcPHAY9R +ySPocq60vFYJfxLLHLGvKZAKyVXMD9O0Gu1HNVpK7ZxzBCHQqr0ME7UAyiZsxGsM +lFqVlNpQmvH/pStmMaTJOKDfHR+4CS7zp+hnUquVH+BGPtikw8paxTGA6Eian5Rp +/hnd2HN8gcqW3o7tszIFZYQ05ub9VxC1X3a/L7AQDcUCAwEAAaOCARowggEWMA8G +A1UdEwEB/wQFMAMBAf8wHQYDVR0OBBYEFP3aFMSfMN4hvR5COfyrYyNJ4PGEMA4G +A1UdDwEB/wQEAwIBBjCB0wYDVR0fBIHLMIHIMIGAoH6gfIZ6bGRhcDovL2RpcmVj +dG9yeS5kLXRydXN0Lm5ldC9DTj1ELVRSVVNUJTIwUm9vdCUyMENsYXNzJTIwMyUy +MENBJTIwMiUyMDIwMDksTz1ELVRydXN0JTIwR21iSCxDPURFP2NlcnRpZmljYXRl +cmV2b2NhdGlvbmxpc3QwQ6BBoD+GPWh0dHA6Ly93d3cuZC10cnVzdC5uZXQvY3Js +L2QtdHJ1c3Rfcm9vdF9jbGFzc18zX2NhXzJfMjAwOS5jcmwwDQYJKoZIhvcNAQEL +BQADggEBAH+X2zDI36ScfSF6gHDOFBJpiBSVYEQBrLLpME+bUMJm2H6NMLVwMeni +acfzcNsgFYbQDfC+rAF1hM5+n02/t2A7nPPKHeJeaNijnZflQGDSNiH+0LS4F9p0 +o3/U37CYAqxva2ssJSRyoWXuJVrl5jLn8t+rSfrzkGkj2wTZ51xY/GXUl77M/C4K +zCUqNQT4YJEVdT1B/yMfGchs64JTBKbkTCJNjYy6zltz7GRUUG3RnFX7acM2w4y8 +PIWmawomDeCTmGCufsYkl4phX5GOZpIJhzbNi5stPvZR1FDUWSi9g/LMKHtThm3Y +Johw1+qRzT65ysCQblrGXnRl11z+o+I= +-----END CERTIFICATE----- + +# Issuer: CN=D-TRUST Root Class 3 CA 2 EV 2009 O=D-Trust GmbH +# Subject: CN=D-TRUST Root Class 3 CA 2 EV 2009 O=D-Trust GmbH +# Label: "D-TRUST Root Class 3 CA 2 EV 2009" +# Serial: 623604 +# MD5 Fingerprint: aa:c6:43:2c:5e:2d:cd:c4:34:c0:50:4f:11:02:4f:b6 +# SHA1 Fingerprint: 96:c9:1b:0b:95:b4:10:98:42:fa:d0:d8:22:79:fe:60:fa:b9:16:83 +# SHA256 Fingerprint: ee:c5:49:6b:98:8c:e9:86:25:b9:34:09:2e:ec:29:08:be:d0:b0:f3:16:c2:d4:73:0c:84:ea:f1:f3:d3:48:81 +-----BEGIN CERTIFICATE----- +MIIEQzCCAyugAwIBAgIDCYP0MA0GCSqGSIb3DQEBCwUAMFAxCzAJBgNVBAYTAkRF +MRUwEwYDVQQKDAxELVRydXN0IEdtYkgxKjAoBgNVBAMMIUQtVFJVU1QgUm9vdCBD +bGFzcyAzIENBIDIgRVYgMjAwOTAeFw0wOTExMDUwODUwNDZaFw0yOTExMDUwODUw +NDZaMFAxCzAJBgNVBAYTAkRFMRUwEwYDVQQKDAxELVRydXN0IEdtYkgxKjAoBgNV +BAMMIUQtVFJVU1QgUm9vdCBDbGFzcyAzIENBIDIgRVYgMjAwOTCCASIwDQYJKoZI +hvcNAQEBBQADggEPADCCAQoCggEBAJnxhDRwui+3MKCOvXwEz75ivJn9gpfSegpn +ljgJ9hBOlSJzmY3aFS3nBfwZcyK3jpgAvDw9rKFs+9Z5JUut8Mxk2og+KbgPCdM0 +3TP1YtHhzRnp7hhPTFiu4h7WDFsVWtg6uMQYZB7jM7K1iXdODL/ZlGsTl28So/6Z +qQTMFexgaDbtCHu39b+T7WYxg4zGcTSHThfqr4uRjRxWQa4iN1438h3Z0S0NL2lR +p75mpoo6Kr3HGrHhFPC+Oh25z1uxav60sUYgovseO3Dvk5h9jHOW8sXvhXCtKSb8 +HgQ+HKDYD8tSg2J87otTlZCpV6LqYQXY+U3EJ/pure3511H3a6UCAwEAAaOCASQw +ggEgMA8GA1UdEwEB/wQFMAMBAf8wHQYDVR0OBBYEFNOUikxiEyoZLsyvcop9Ntea +HNxnMA4GA1UdDwEB/wQEAwIBBjCB3QYDVR0fBIHVMIHSMIGHoIGEoIGBhn9sZGFw +Oi8vZGlyZWN0b3J5LmQtdHJ1c3QubmV0L0NOPUQtVFJVU1QlMjBSb290JTIwQ2xh +c3MlMjAzJTIwQ0ElMjAyJTIwRVYlMjAyMDA5LE89RC1UcnVzdCUyMEdtYkgsQz1E +RT9jZXJ0aWZpY2F0ZXJldm9jYXRpb25saXN0MEagRKBChkBodHRwOi8vd3d3LmQt +dHJ1c3QubmV0L2NybC9kLXRydXN0X3Jvb3RfY2xhc3NfM19jYV8yX2V2XzIwMDku +Y3JsMA0GCSqGSIb3DQEBCwUAA4IBAQA07XtaPKSUiO8aEXUHL7P+PPoeUSbrh/Yp +3uDx1MYkCenBz1UbtDDZzhr+BlGmFaQt77JLvyAoJUnRpjZ3NOhk31KxEcdzes05 +nsKtjHEh8lprr988TlWvsoRlFIm5d8sqMb7Po23Pb0iUMkZv53GMoKaEGTcH8gNF +CSuGdXzfX2lXANtu2KZyIktQ1HWYVt+3GP9DQ1CuekR78HlR10M9p9OB0/DJT7na +xpeG0ILD5EJt/rDiZE4OJudANCa1CInXCGNjOCd1HjPqbqjdn5lPdE2BiYBL3ZqX +KVwvvoFBuYz/6n1gBp7N1z3TLqMVvKjmJuVvw9y4AyHqnxbxLFS1 +-----END CERTIFICATE----- + +# Issuer: CN=CA Disig Root R2 O=Disig a.s. +# Subject: CN=CA Disig Root R2 O=Disig a.s. +# Label: "CA Disig Root R2" +# Serial: 10572350602393338211 +# MD5 Fingerprint: 26:01:fb:d8:27:a7:17:9a:45:54:38:1a:43:01:3b:03 +# SHA1 Fingerprint: b5:61:eb:ea:a4:de:e4:25:4b:69:1a:98:a5:57:47:c2:34:c7:d9:71 +# SHA256 Fingerprint: e2:3d:4a:03:6d:7b:70:e9:f5:95:b1:42:20:79:d2:b9:1e:df:bb:1f:b6:51:a0:63:3e:aa:8a:9d:c5:f8:07:03 +-----BEGIN CERTIFICATE----- +MIIFaTCCA1GgAwIBAgIJAJK4iNuwisFjMA0GCSqGSIb3DQEBCwUAMFIxCzAJBgNV +BAYTAlNLMRMwEQYDVQQHEwpCcmF0aXNsYXZhMRMwEQYDVQQKEwpEaXNpZyBhLnMu +MRkwFwYDVQQDExBDQSBEaXNpZyBSb290IFIyMB4XDTEyMDcxOTA5MTUzMFoXDTQy +MDcxOTA5MTUzMFowUjELMAkGA1UEBhMCU0sxEzARBgNVBAcTCkJyYXRpc2xhdmEx +EzARBgNVBAoTCkRpc2lnIGEucy4xGTAXBgNVBAMTEENBIERpc2lnIFJvb3QgUjIw +ggIiMA0GCSqGSIb3DQEBAQUAA4ICDwAwggIKAoICAQCio8QACdaFXS1tFPbCw3Oe +NcJxVX6B+6tGUODBfEl45qt5WDza/3wcn9iXAng+a0EE6UG9vgMsRfYvZNSrXaNH +PWSb6WiaxswbP7q+sos0Ai6YVRn8jG+qX9pMzk0DIaPY0jSTVpbLTAwAFjxfGs3I +x2ymrdMxp7zo5eFm1tL7A7RBZckQrg4FY8aAamkw/dLukO8NJ9+flXP04SXabBbe +QTg06ov80egEFGEtQX6sx3dOy1FU+16SGBsEWmjGycT6txOgmLcRK7fWV8x8nhfR +yyX+hk4kLlYMeE2eARKmK6cBZW58Yh2EhN/qwGu1pSqVg8NTEQxzHQuyRpDRQjrO +QG6Vrf/GlK1ul4SOfW+eioANSW1z4nuSHsPzwfPrLgVv2RvPN3YEyLRa5Beny912 +H9AZdugsBbPWnDTYltxhh5EF5EQIM8HauQhl1K6yNg3ruji6DOWbnuuNZt2Zz9aJ +QfYEkoopKW1rOhzndX0CcQ7zwOe9yxndnWCywmZgtrEE7snmhrmaZkCo5xHtgUUD +i/ZnWejBBhG93c+AAk9lQHhcR1DIm+YfgXvkRKhbhZri3lrVx/k6RGZL5DJUfORs +nLMOPReisjQS1n6yqEm70XooQL6iFh/f5DcfEXP7kAplQ6INfPgGAVUzfbANuPT1 +rqVCV3w2EYx7XsQDnYx5nQIDAQABo0IwQDAPBgNVHRMBAf8EBTADAQH/MA4GA1Ud +DwEB/wQEAwIBBjAdBgNVHQ4EFgQUtZn4r7CU9eMg1gqtzk5WpC5uQu0wDQYJKoZI +hvcNAQELBQADggIBACYGXnDnZTPIgm7ZnBc6G3pmsgH2eDtpXi/q/075KMOYKmFM +tCQSin1tERT3nLXK5ryeJ45MGcipvXrA1zYObYVybqjGom32+nNjf7xueQgcnYqf +GopTpti72TVVsRHFqQOzVju5hJMiXn7B9hJSi+osZ7z+Nkz1uM/Rs0mSO9MpDpkb +lvdhuDvEK7Z4bLQjb/D907JedR+Zlais9trhxTF7+9FGs9K8Z7RiVLoJ92Owk6Ka ++elSLotgEqv89WBW7xBci8QaQtyDW2QOy7W81k/BfDxujRNt+3vrMNDcTa/F1bal +TFtxyegxvug4BkihGuLq0t4SOVga/4AOgnXmt8kHbA7v/zjxmHHEt38OFdAlab0i +nSvtBfZGR6ztwPDUO+Ls7pZbkBNOHlY667DvlruWIxG68kOGdGSVyCh13x01utI3 +gzhTODY7z2zp+WsO0PsE6E9312UBeIYMej4hYvF/Y3EMyZ9E26gnonW+boE+18Dr +G5gPcFw0sorMwIUY6256s/daoQe/qUKS82Ail+QUoQebTnbAjn39pCXHR+3/H3Os +zMOl6W8KjptlwlCFtaOgUxLMVYdh84GuEEZhvUQhuMI9dM9+JDX6HAcOmz0iyu8x +L4ysEr3vQCj8KWefshNPZiTEUxnpHikV7+ZtsH8tZ/3zbBt1RqPlShfppNcL +-----END CERTIFICATE----- + +# Issuer: CN=ACCVRAIZ1 O=ACCV OU=PKIACCV +# Subject: CN=ACCVRAIZ1 O=ACCV OU=PKIACCV +# Label: "ACCVRAIZ1" +# Serial: 6828503384748696800 +# MD5 Fingerprint: d0:a0:5a:ee:05:b6:09:94:21:a1:7d:f1:b2:29:82:02 +# SHA1 Fingerprint: 93:05:7a:88:15:c6:4f:ce:88:2f:fa:91:16:52:28:78:bc:53:64:17 +# SHA256 Fingerprint: 9a:6e:c0:12:e1:a7:da:9d:be:34:19:4d:47:8a:d7:c0:db:18:22:fb:07:1d:f1:29:81:49:6e:d1:04:38:41:13 +-----BEGIN CERTIFICATE----- +MIIH0zCCBbugAwIBAgIIXsO3pkN/pOAwDQYJKoZIhvcNAQEFBQAwQjESMBAGA1UE +AwwJQUNDVlJBSVoxMRAwDgYDVQQLDAdQS0lBQ0NWMQ0wCwYDVQQKDARBQ0NWMQsw +CQYDVQQGEwJFUzAeFw0xMTA1MDUwOTM3MzdaFw0zMDEyMzEwOTM3MzdaMEIxEjAQ +BgNVBAMMCUFDQ1ZSQUlaMTEQMA4GA1UECwwHUEtJQUNDVjENMAsGA1UECgwEQUND +VjELMAkGA1UEBhMCRVMwggIiMA0GCSqGSIb3DQEBAQUAA4ICDwAwggIKAoICAQCb +qau/YUqXry+XZpp0X9DZlv3P4uRm7x8fRzPCRKPfmt4ftVTdFXxpNRFvu8gMjmoY +HtiP2Ra8EEg2XPBjs5BaXCQ316PWywlxufEBcoSwfdtNgM3802/J+Nq2DoLSRYWo +G2ioPej0RGy9ocLLA76MPhMAhN9KSMDjIgro6TenGEyxCQ0jVn8ETdkXhBilyNpA +lHPrzg5XPAOBOp0KoVdDaaxXbXmQeOW1tDvYvEyNKKGno6e6Ak4l0Squ7a4DIrhr +IA8wKFSVf+DuzgpmndFALW4ir50awQUZ0m/A8p/4e7MCQvtQqR0tkw8jq8bBD5L/ +0KIV9VMJcRz/RROE5iZe+OCIHAr8Fraocwa48GOEAqDGWuzndN9wrqODJerWx5eH +k6fGioozl2A3ED6XPm4pFdahD9GILBKfb6qkxkLrQaLjlUPTAYVtjrs78yM2x/47 +4KElB0iryYl0/wiPgL/AlmXz7uxLaL2diMMxs0Dx6M/2OLuc5NF/1OVYm3z61PMO +m3WR5LpSLhl+0fXNWhn8ugb2+1KoS5kE3fj5tItQo05iifCHJPqDQsGH+tUtKSpa +cXpkatcnYGMN285J9Y0fkIkyF/hzQ7jSWpOGYdbhdQrqeWZ2iE9x6wQl1gpaepPl +uUsXQA+xtrn13k/c4LOsOxFwYIRKQ26ZIMApcQrAZQIDAQABo4ICyzCCAscwfQYI +KwYBBQUHAQEEcTBvMEwGCCsGAQUFBzAChkBodHRwOi8vd3d3LmFjY3YuZXMvZmls +ZWFkbWluL0FyY2hpdm9zL2NlcnRpZmljYWRvcy9yYWl6YWNjdjEuY3J0MB8GCCsG +AQUFBzABhhNodHRwOi8vb2NzcC5hY2N2LmVzMB0GA1UdDgQWBBTSh7Tj3zcnk1X2 +VuqB5TbMjB4/vTAPBgNVHRMBAf8EBTADAQH/MB8GA1UdIwQYMBaAFNKHtOPfNyeT +VfZW6oHlNsyMHj+9MIIBcwYDVR0gBIIBajCCAWYwggFiBgRVHSAAMIIBWDCCASIG +CCsGAQUFBwICMIIBFB6CARAAQQB1AHQAbwByAGkAZABhAGQAIABkAGUAIABDAGUA +cgB0AGkAZgBpAGMAYQBjAGkA8wBuACAAUgBhAO0AegAgAGQAZQAgAGwAYQAgAEEA +QwBDAFYAIAAoAEEAZwBlAG4AYwBpAGEAIABkAGUAIABUAGUAYwBuAG8AbABvAGcA +7QBhACAAeQAgAEMAZQByAHQAaQBmAGkAYwBhAGMAaQDzAG4AIABFAGwAZQBjAHQA +cgDzAG4AaQBjAGEALAAgAEMASQBGACAAUQA0ADYAMAAxADEANQA2AEUAKQAuACAA +QwBQAFMAIABlAG4AIABoAHQAdABwADoALwAvAHcAdwB3AC4AYQBjAGMAdgAuAGUA +czAwBggrBgEFBQcCARYkaHR0cDovL3d3dy5hY2N2LmVzL2xlZ2lzbGFjaW9uX2Mu +aHRtMFUGA1UdHwROMEwwSqBIoEaGRGh0dHA6Ly93d3cuYWNjdi5lcy9maWxlYWRt +aW4vQXJjaGl2b3MvY2VydGlmaWNhZG9zL3JhaXphY2N2MV9kZXIuY3JsMA4GA1Ud +DwEB/wQEAwIBBjAXBgNVHREEEDAOgQxhY2N2QGFjY3YuZXMwDQYJKoZIhvcNAQEF +BQADggIBAJcxAp/n/UNnSEQU5CmH7UwoZtCPNdpNYbdKl02125DgBS4OxnnQ8pdp +D70ER9m+27Up2pvZrqmZ1dM8MJP1jaGo/AaNRPTKFpV8M9xii6g3+CfYCS0b78gU +JyCpZET/LtZ1qmxNYEAZSUNUY9rizLpm5U9EelvZaoErQNV/+QEnWCzI7UiRfD+m +AM/EKXMRNt6GGT6d7hmKG9Ww7Y49nCrADdg9ZuM8Db3VlFzi4qc1GwQA9j9ajepD +vV+JHanBsMyZ4k0ACtrJJ1vnE5Bc5PUzolVt3OAJTS+xJlsndQAJxGJ3KQhfnlms +tn6tn1QwIgPBHnFk/vk4CpYY3QIUrCPLBhwepH2NDd4nQeit2hW3sCPdK6jT2iWH +7ehVRE2I9DZ+hJp4rPcOVkkO1jMl1oRQQmwgEh0q1b688nCBpHBgvgW1m54ERL5h +I6zppSSMEYCUWqKiuUnSwdzRp+0xESyeGabu4VXhwOrPDYTkF7eifKXeVSUG7szA +h1xA2syVP1XgNce4hL60Xc16gwFy7ofmXx2utYXGJt/mwZrpHgJHnyqobalbz+xF +d3+YJ5oyXSrjhO7FmGYvliAd3djDJ9ew+f7Zfc3Qn48LFFhRny+Lwzgt3uiP1o2H +pPVWQxaZLPSkVrQ0uGE3ycJYgBugl6H8WY3pEfbRD0tVNEYqi4Y7 +-----END CERTIFICATE----- + +# Issuer: CN=TWCA Global Root CA O=TAIWAN-CA OU=Root CA +# Subject: CN=TWCA Global Root CA O=TAIWAN-CA OU=Root CA +# Label: "TWCA Global Root CA" +# Serial: 3262 +# MD5 Fingerprint: f9:03:7e:cf:e6:9e:3c:73:7a:2a:90:07:69:ff:2b:96 +# SHA1 Fingerprint: 9c:bb:48:53:f6:a4:f6:d3:52:a4:e8:32:52:55:60:13:f5:ad:af:65 +# SHA256 Fingerprint: 59:76:90:07:f7:68:5d:0f:cd:50:87:2f:9f:95:d5:75:5a:5b:2b:45:7d:81:f3:69:2b:61:0a:98:67:2f:0e:1b +-----BEGIN CERTIFICATE----- +MIIFQTCCAymgAwIBAgICDL4wDQYJKoZIhvcNAQELBQAwUTELMAkGA1UEBhMCVFcx +EjAQBgNVBAoTCVRBSVdBTi1DQTEQMA4GA1UECxMHUm9vdCBDQTEcMBoGA1UEAxMT +VFdDQSBHbG9iYWwgUm9vdCBDQTAeFw0xMjA2MjcwNjI4MzNaFw0zMDEyMzExNTU5 +NTlaMFExCzAJBgNVBAYTAlRXMRIwEAYDVQQKEwlUQUlXQU4tQ0ExEDAOBgNVBAsT +B1Jvb3QgQ0ExHDAaBgNVBAMTE1RXQ0EgR2xvYmFsIFJvb3QgQ0EwggIiMA0GCSqG +SIb3DQEBAQUAA4ICDwAwggIKAoICAQCwBdvI64zEbooh745NnHEKH1Jw7W2CnJfF +10xORUnLQEK1EjRsGcJ0pDFfhQKX7EMzClPSnIyOt7h52yvVavKOZsTuKwEHktSz +0ALfUPZVr2YOy+BHYC8rMjk1Ujoog/h7FsYYuGLWRyWRzvAZEk2tY/XTP3VfKfCh +MBwqoJimFb3u/Rk28OKRQ4/6ytYQJ0lM793B8YVwm8rqqFpD/G2Gb3PpN0Wp8DbH +zIh1HrtsBv+baz4X7GGqcXzGHaL3SekVtTzWoWH1EfcFbx39Eb7QMAfCKbAJTibc +46KokWofwpFFiFzlmLhxpRUZyXx1EcxwdE8tmx2RRP1WKKD+u4ZqyPpcC1jcxkt2 +yKsi2XMPpfRaAok/T54igu6idFMqPVMnaR1sjjIsZAAmY2E2TqNGtz99sy2sbZCi +laLOz9qC5wc0GZbpuCGqKX6mOL6OKUohZnkfs8O1CWfe1tQHRvMq2uYiN2DLgbYP +oA/pyJV/v1WRBXrPPRXAb94JlAGD1zQbzECl8LibZ9WYkTunhHiVJqRaCPgrdLQA +BDzfuBSO6N+pjWxnkjMdwLfS7JLIvgm/LCkFbwJrnu+8vyq8W8BQj0FwcYeyTbcE +qYSjMq+u7msXi7Kx/mzhkIyIqJdIzshNy/MGz19qCkKxHh53L46g5pIOBvwFItIm +4TFRfTLcDwIDAQABoyMwITAOBgNVHQ8BAf8EBAMCAQYwDwYDVR0TAQH/BAUwAwEB +/zANBgkqhkiG9w0BAQsFAAOCAgEAXzSBdu+WHdXltdkCY4QWwa6gcFGn90xHNcgL +1yg9iXHZqjNB6hQbbCEAwGxCGX6faVsgQt+i0trEfJdLjbDorMjupWkEmQqSpqsn +LhpNgb+E1HAerUf+/UqdM+DyucRFCCEK2mlpc3INvjT+lIutwx4116KD7+U4x6WF +H6vPNOw/KP4M8VeGTslV9xzU2KV9Bnpv1d8Q34FOIWWxtuEXeZVFBs5fzNxGiWNo +RI2T9GRwoD2dKAXDOXC4Ynsg/eTb6QihuJ49CcdP+yz4k3ZB3lLg4VfSnQO8d57+ +nile98FRYB/e2guyLXW3Q0iT5/Z5xoRdgFlglPx4mI88k1HtQJAH32RjJMtOcQWh +15QaiDLxInQirqWm2BJpTGCjAu4r7NRjkgtevi92a6O2JryPA9gK8kxkRr05YuWW +6zRjESjMlfGt7+/cgFhI6Uu46mWs6fyAtbXIRfmswZ/ZuepiiI7E8UuDEq3mi4TW +nsLrgxifarsbJGAzcMzs9zLzXNl5fe+epP7JI8Mk7hWSsT2RTyaGvWZzJBPqpK5j +wa19hAM8EHiGG3njxPPyBJUgriOCxLM6AGK/5jYk4Ve6xx6QddVfP5VhK8E7zeWz +aGHQRiapIVJpLesux+t3zqY6tQMzT3bR51xUAV3LePTJDL/PEo4XLSNolOer/qmy +KwbQBM0= +-----END CERTIFICATE----- + +# Issuer: CN=TeliaSonera Root CA v1 O=TeliaSonera +# Subject: CN=TeliaSonera Root CA v1 O=TeliaSonera +# Label: "TeliaSonera Root CA v1" +# Serial: 199041966741090107964904287217786801558 +# MD5 Fingerprint: 37:41:49:1b:18:56:9a:26:f5:ad:c2:66:fb:40:a5:4c +# SHA1 Fingerprint: 43:13:bb:96:f1:d5:86:9b:c1:4e:6a:92:f6:cf:f6:34:69:87:82:37 +# SHA256 Fingerprint: dd:69:36:fe:21:f8:f0:77:c1:23:a1:a5:21:c1:22:24:f7:22:55:b7:3e:03:a7:26:06:93:e8:a2:4b:0f:a3:89 +-----BEGIN CERTIFICATE----- +MIIFODCCAyCgAwIBAgIRAJW+FqD3LkbxezmCcvqLzZYwDQYJKoZIhvcNAQEFBQAw +NzEUMBIGA1UECgwLVGVsaWFTb25lcmExHzAdBgNVBAMMFlRlbGlhU29uZXJhIFJv +b3QgQ0EgdjEwHhcNMDcxMDE4MTIwMDUwWhcNMzIxMDE4MTIwMDUwWjA3MRQwEgYD +VQQKDAtUZWxpYVNvbmVyYTEfMB0GA1UEAwwWVGVsaWFTb25lcmEgUm9vdCBDQSB2 +MTCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAMK+6yfwIaPzaSZVfp3F +VRaRXP3vIb9TgHot0pGMYzHw7CTww6XScnwQbfQ3t+XmfHnqjLWCi65ItqwA3GV1 +7CpNX8GH9SBlK4GoRz6JI5UwFpB/6FcHSOcZrr9FZ7E3GwYq/t75rH2D+1665I+X +Z75Ljo1kB1c4VWk0Nj0TSO9P4tNmHqTPGrdeNjPUtAa9GAH9d4RQAEX1jF3oI7x+ +/jXh7VB7qTCNGdMJjmhnXb88lxhTuylixcpecsHHltTbLaC0H2kD7OriUPEMPPCs +81Mt8Bz17Ww5OXOAFshSsCPN4D7c3TxHoLs1iuKYaIu+5b9y7tL6pe0S7fyYGKkm +dtwoSxAgHNN/Fnct7W+A90m7UwW7XWjH1Mh1Fj+JWov3F0fUTPHSiXk+TT2YqGHe +Oh7S+F4D4MHJHIzTjU3TlTazN19jY5szFPAtJmtTfImMMsJu7D0hADnJoWjiUIMu +sDor8zagrC/kb2HCUQk5PotTubtn2txTuXZZNp1D5SDgPTJghSJRt8czu90VL6R4 +pgd7gUY2BIbdeTXHlSw7sKMXNeVzH7RcWe/a6hBle3rQf5+ztCo3O3CLm1u5K7fs +slESl1MpWtTwEhDcTwK7EpIvYtQ/aUN8Ddb8WHUBiJ1YFkveupD/RwGJBmr2X7KQ +arMCpgKIv7NHfirZ1fpoeDVNAgMBAAGjPzA9MA8GA1UdEwEB/wQFMAMBAf8wCwYD +VR0PBAQDAgEGMB0GA1UdDgQWBBTwj1k4ALP1j5qWDNXr+nuqF+gTEjANBgkqhkiG +9w0BAQUFAAOCAgEAvuRcYk4k9AwI//DTDGjkk0kiP0Qnb7tt3oNmzqjMDfz1mgbl +dxSR651Be5kqhOX//CHBXfDkH1e3damhXwIm/9fH907eT/j3HEbAek9ALCI18Bmx +0GtnLLCo4MBANzX2hFxc469CeP6nyQ1Q6g2EdvZR74NTxnr/DlZJLo961gzmJ1Tj +TQpgcmLNkQfWpb/ImWvtxBnmq0wROMVvMeJuScg/doAmAyYp4Db29iBT4xdwNBed +Y2gea+zDTYa4EzAvXUYNR0PVG6pZDrlcjQZIrXSHX8f8MVRBE+LHIQ6e4B4N4cB7 +Q4WQxYpYxmUKeFfyxiMPAdkgS94P+5KFdSpcc41teyWRyu5FrgZLAMzTsVlQ2jqI +OylDRl6XK1TOU2+NSueW+r9xDkKLfP0ooNBIytrEgUy7onOTJsjrDNYmiLbAJM+7 +vVvrdX3pCI6GMyx5dwlppYn8s3CQh3aP0yK7Qs69cwsgJirQmz1wHiRszYd2qReW +t88NkvuOGKmYSdGe/mBEciG5Ge3C9THxOUiIkCR1VBatzvT4aRRkOfujuLpwQMcn +HL/EVlP6Y2XQ8xwOFvVrhlhNGNTkDY6lnVuR3HYkUD/GKvvZt5y11ubQ2egZixVx +SK236thZiNSQvxaz2emsWWFUyBy6ysHK4bkgTI86k4mloMy/0/Z1pHWWbVY= +-----END CERTIFICATE----- + +# Issuer: CN=E-Tugra Certification Authority O=E-Tu\u011fra EBG Bili\u015fim Teknolojileri ve Hizmetleri A.\u015e. OU=E-Tugra Sertifikasyon Merkezi +# Subject: CN=E-Tugra Certification Authority O=E-Tu\u011fra EBG Bili\u015fim Teknolojileri ve Hizmetleri A.\u015e. OU=E-Tugra Sertifikasyon Merkezi +# Label: "E-Tugra Certification Authority" +# Serial: 7667447206703254355 +# MD5 Fingerprint: b8:a1:03:63:b0:bd:21:71:70:8a:6f:13:3a:bb:79:49 +# SHA1 Fingerprint: 51:c6:e7:08:49:06:6e:f3:92:d4:5c:a0:0d:6d:a3:62:8f:c3:52:39 +# SHA256 Fingerprint: b0:bf:d5:2b:b0:d7:d9:bd:92:bf:5d:4d:c1:3d:a2:55:c0:2c:54:2f:37:83:65:ea:89:39:11:f5:5e:55:f2:3c +-----BEGIN CERTIFICATE----- +MIIGSzCCBDOgAwIBAgIIamg+nFGby1MwDQYJKoZIhvcNAQELBQAwgbIxCzAJBgNV +BAYTAlRSMQ8wDQYDVQQHDAZBbmthcmExQDA+BgNVBAoMN0UtVHXEn3JhIEVCRyBC +aWxpxZ9pbSBUZWtub2xvamlsZXJpIHZlIEhpem1ldGxlcmkgQS7Fni4xJjAkBgNV +BAsMHUUtVHVncmEgU2VydGlmaWthc3lvbiBNZXJrZXppMSgwJgYDVQQDDB9FLVR1 +Z3JhIENlcnRpZmljYXRpb24gQXV0aG9yaXR5MB4XDTEzMDMwNTEyMDk0OFoXDTIz +MDMwMzEyMDk0OFowgbIxCzAJBgNVBAYTAlRSMQ8wDQYDVQQHDAZBbmthcmExQDA+ +BgNVBAoMN0UtVHXEn3JhIEVCRyBCaWxpxZ9pbSBUZWtub2xvamlsZXJpIHZlIEhp +em1ldGxlcmkgQS7Fni4xJjAkBgNVBAsMHUUtVHVncmEgU2VydGlmaWthc3lvbiBN +ZXJrZXppMSgwJgYDVQQDDB9FLVR1Z3JhIENlcnRpZmljYXRpb24gQXV0aG9yaXR5 +MIICIjANBgkqhkiG9w0BAQEFAAOCAg8AMIICCgKCAgEA4vU/kwVRHoViVF56C/UY +B4Oufq9899SKa6VjQzm5S/fDxmSJPZQuVIBSOTkHS0vdhQd2h8y/L5VMzH2nPbxH +D5hw+IyFHnSOkm0bQNGZDbt1bsipa5rAhDGvykPL6ys06I+XawGb1Q5KCKpbknSF +Q9OArqGIW66z6l7LFpp3RMih9lRozt6Plyu6W0ACDGQXwLWTzeHxE2bODHnv0ZEo +q1+gElIwcxmOj+GMB6LDu0rw6h8VqO4lzKRG+Bsi77MOQ7osJLjFLFzUHPhdZL3D +k14opz8n8Y4e0ypQBaNV2cvnOVPAmJ6MVGKLJrD3fY185MaeZkJVgkfnsliNZvcH +fC425lAcP9tDJMW/hkd5s3kc91r0E+xs+D/iWR+V7kI+ua2oMoVJl0b+SzGPWsut +dEcf6ZG33ygEIqDUD13ieU/qbIWGvaimzuT6w+Gzrt48Ue7LE3wBf4QOXVGUnhMM +ti6lTPk5cDZvlsouDERVxcr6XQKj39ZkjFqzAQqptQpHF//vkUAqjqFGOjGY5RH8 +zLtJVor8udBhmm9lbObDyz51Sf6Pp+KJxWfXnUYTTjF2OySznhFlhqt/7x3U+Lzn +rFpct1pHXFXOVbQicVtbC/DP3KBhZOqp12gKY6fgDT+gr9Oq0n7vUaDmUStVkhUX +U8u3Zg5mTPj5dUyQ5xJwx0UCAwEAAaNjMGEwHQYDVR0OBBYEFC7j27JJ0JxUeVz6 +Jyr+zE7S6E5UMA8GA1UdEwEB/wQFMAMBAf8wHwYDVR0jBBgwFoAULuPbsknQnFR5 +XPonKv7MTtLoTlQwDgYDVR0PAQH/BAQDAgEGMA0GCSqGSIb3DQEBCwUAA4ICAQAF +Nzr0TbdF4kV1JI+2d1LoHNgQk2Xz8lkGpD4eKexd0dCrfOAKkEh47U6YA5n+KGCR +HTAduGN8qOY1tfrTYXbm1gdLymmasoR6d5NFFxWfJNCYExL/u6Au/U5Mh/jOXKqY +GwXgAEZKgoClM4so3O0409/lPun++1ndYYRP0lSWE2ETPo+Aab6TR7U1Q9Jauz1c +77NCR807VRMGsAnb/WP2OogKmW9+4c4bU2pEZiNRCHu8W1Ki/QY3OEBhj0qWuJA3 ++GbHeJAAFS6LrVE1Uweoa2iu+U48BybNCAVwzDk/dr2l02cmAYamU9JgO3xDf1WK +vJUawSg5TB9D0pH0clmKuVb8P7Sd2nCcdlqMQ1DujjByTd//SffGqWfZbawCEeI6 +FiWnWAjLb1NBnEg4R2gz0dfHj9R0IdTDBZB6/86WiLEVKV0jq9BgoRJP3vQXzTLl +yb/IQ639Lo7xr+L0mPoSHyDYwKcMhcWQ9DstliaxLL5Mq+ux0orJ23gTDx4JnW2P +AJ8C2sH6H3p6CcRK5ogql5+Ji/03X186zjhZhkuvcQu02PJwT58yE+Owp1fl2tpD +y4Q08ijE6m30Ku/Ba3ba+367hTzSU8JNvnHhRdH9I2cNE3X7z2VnIp2usAnRCf8d +NL/+I5c30jn6PQ0GC7TbO6Orb1wdtn7os4I07QZcJA== +-----END CERTIFICATE----- + +# Issuer: CN=T-TeleSec GlobalRoot Class 2 O=T-Systems Enterprise Services GmbH OU=T-Systems Trust Center +# Subject: CN=T-TeleSec GlobalRoot Class 2 O=T-Systems Enterprise Services GmbH OU=T-Systems Trust Center +# Label: "T-TeleSec GlobalRoot Class 2" +# Serial: 1 +# MD5 Fingerprint: 2b:9b:9e:e4:7b:6c:1f:00:72:1a:cc:c1:77:79:df:6a +# SHA1 Fingerprint: 59:0d:2d:7d:88:4f:40:2e:61:7e:a5:62:32:17:65:cf:17:d8:94:e9 +# SHA256 Fingerprint: 91:e2:f5:78:8d:58:10:eb:a7:ba:58:73:7d:e1:54:8a:8e:ca:cd:01:45:98:bc:0b:14:3e:04:1b:17:05:25:52 +-----BEGIN CERTIFICATE----- +MIIDwzCCAqugAwIBAgIBATANBgkqhkiG9w0BAQsFADCBgjELMAkGA1UEBhMCREUx +KzApBgNVBAoMIlQtU3lzdGVtcyBFbnRlcnByaXNlIFNlcnZpY2VzIEdtYkgxHzAd +BgNVBAsMFlQtU3lzdGVtcyBUcnVzdCBDZW50ZXIxJTAjBgNVBAMMHFQtVGVsZVNl +YyBHbG9iYWxSb290IENsYXNzIDIwHhcNMDgxMDAxMTA0MDE0WhcNMzMxMDAxMjM1 +OTU5WjCBgjELMAkGA1UEBhMCREUxKzApBgNVBAoMIlQtU3lzdGVtcyBFbnRlcnBy +aXNlIFNlcnZpY2VzIEdtYkgxHzAdBgNVBAsMFlQtU3lzdGVtcyBUcnVzdCBDZW50 +ZXIxJTAjBgNVBAMMHFQtVGVsZVNlYyBHbG9iYWxSb290IENsYXNzIDIwggEiMA0G +CSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQCqX9obX+hzkeXaXPSi5kfl82hVYAUd +AqSzm1nzHoqvNK38DcLZSBnuaY/JIPwhqgcZ7bBcrGXHX+0CfHt8LRvWurmAwhiC +FoT6ZrAIxlQjgeTNuUk/9k9uN0goOA/FvudocP05l03Sx5iRUKrERLMjfTlH6VJi +1hKTXrcxlkIF+3anHqP1wvzpesVsqXFP6st4vGCvx9702cu+fjOlbpSD8DT6Iavq +jnKgP6TeMFvvhk1qlVtDRKgQFRzlAVfFmPHmBiiRqiDFt1MmUUOyCxGVWOHAD3bZ +wI18gfNycJ5v/hqO2V81xrJvNHy+SE/iWjnX2J14np+GPgNeGYtEotXHAgMBAAGj +QjBAMA8GA1UdEwEB/wQFMAMBAf8wDgYDVR0PAQH/BAQDAgEGMB0GA1UdDgQWBBS/ +WSA2AHmgoCJrjNXyYdK4LMuCSjANBgkqhkiG9w0BAQsFAAOCAQEAMQOiYQsfdOhy +NsZt+U2e+iKo4YFWz827n+qrkRk4r6p8FU3ztqONpfSO9kSpp+ghla0+AGIWiPAC +uvxhI+YzmzB6azZie60EI4RYZeLbK4rnJVM3YlNfvNoBYimipidx5joifsFvHZVw +IEoHNN/q/xWA5brXethbdXwFeilHfkCoMRN3zUA7tFFHei4R40cR3p1m0IvVVGb6 +g1XqfMIpiRvpb7PO4gWEyS8+eIVibslfwXhjdFjASBgMmTnrpMwatXlajRWc2BQN +9noHV8cigwUtPJslJj0Ys6lDfMjIq2SPDqO/nBudMNva0Bkuqjzx+zOAduTNrRlP +BSeOE6Fuwg== +-----END CERTIFICATE----- + +# Issuer: CN=Atos TrustedRoot 2011 O=Atos +# Subject: CN=Atos TrustedRoot 2011 O=Atos +# Label: "Atos TrustedRoot 2011" +# Serial: 6643877497813316402 +# MD5 Fingerprint: ae:b9:c4:32:4b:ac:7f:5d:66:cc:77:94:bb:2a:77:56 +# SHA1 Fingerprint: 2b:b1:f5:3e:55:0c:1d:c5:f1:d4:e6:b7:6a:46:4b:55:06:02:ac:21 +# SHA256 Fingerprint: f3:56:be:a2:44:b7:a9:1e:b3:5d:53:ca:9a:d7:86:4a:ce:01:8e:2d:35:d5:f8:f9:6d:df:68:a6:f4:1a:a4:74 +-----BEGIN CERTIFICATE----- +MIIDdzCCAl+gAwIBAgIIXDPLYixfszIwDQYJKoZIhvcNAQELBQAwPDEeMBwGA1UE +AwwVQXRvcyBUcnVzdGVkUm9vdCAyMDExMQ0wCwYDVQQKDARBdG9zMQswCQYDVQQG +EwJERTAeFw0xMTA3MDcxNDU4MzBaFw0zMDEyMzEyMzU5NTlaMDwxHjAcBgNVBAMM +FUF0b3MgVHJ1c3RlZFJvb3QgMjAxMTENMAsGA1UECgwEQXRvczELMAkGA1UEBhMC +REUwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQCVhTuXbyo7LjvPpvMp +Nb7PGKw+qtn4TaA+Gke5vJrf8v7MPkfoepbCJI419KkM/IL9bcFyYie96mvr54rM +VD6QUM+A1JX76LWC1BTFtqlVJVfbsVD2sGBkWXppzwO3bw2+yj5vdHLqqjAqc2K+ +SZFhyBH+DgMq92og3AIVDV4VavzjgsG1xZ1kCWyjWZgHJ8cblithdHFsQ/H3NYkQ +4J7sVaE3IqKHBAUsR320HLliKWYoyrfhk/WklAOZuXCFteZI6o1Q/NnezG8HDt0L +cp2AMBYHlT8oDv3FdU9T1nSatCQujgKRz3bFmx5VdJx4IbHwLfELn8LVlhgf8FQi +eowHAgMBAAGjfTB7MB0GA1UdDgQWBBSnpQaxLKYJYO7Rl+lwrrw7GWzbITAPBgNV +HRMBAf8EBTADAQH/MB8GA1UdIwQYMBaAFKelBrEspglg7tGX6XCuvDsZbNshMBgG +A1UdIAQRMA8wDQYLKwYBBAGwLQMEAQEwDgYDVR0PAQH/BAQDAgGGMA0GCSqGSIb3 +DQEBCwUAA4IBAQAmdzTblEiGKkGdLD4GkGDEjKwLVLgfuXvTBznk+j57sj1O7Z8j +vZfza1zv7v1Apt+hk6EKhqzvINB5Ab149xnYJDE0BAGmuhWawyfc2E8PzBhj/5kP +DpFrdRbhIfzYJsdHt6bPWHJxfrrhTZVHO8mvbaG0weyJ9rQPOLXiZNwlz6bb65pc +maHFCN795trV1lpFDMS3wrUU77QR/w4VtfX128a961qn8FYiqTxlVMYVqL2Gns2D +lmh6cYGJ4Qvh6hEbaAjMaZ7snkGeRDImeuKHCnE96+RapNLbxc3G3mB/ufNPRJLv +KrcYPqcZ2Qt9sTdBQrC6YB3y/gkRsPCHe6ed +-----END CERTIFICATE----- + +# Issuer: CN=QuoVadis Root CA 1 G3 O=QuoVadis Limited +# Subject: CN=QuoVadis Root CA 1 G3 O=QuoVadis Limited +# Label: "QuoVadis Root CA 1 G3" +# Serial: 687049649626669250736271037606554624078720034195 +# MD5 Fingerprint: a4:bc:5b:3f:fe:37:9a:fa:64:f0:e2:fa:05:3d:0b:ab +# SHA1 Fingerprint: 1b:8e:ea:57:96:29:1a:c9:39:ea:b8:0a:81:1a:73:73:c0:93:79:67 +# SHA256 Fingerprint: 8a:86:6f:d1:b2:76:b5:7e:57:8e:92:1c:65:82:8a:2b:ed:58:e9:f2:f2:88:05:41:34:b7:f1:f4:bf:c9:cc:74 +-----BEGIN CERTIFICATE----- +MIIFYDCCA0igAwIBAgIUeFhfLq0sGUvjNwc1NBMotZbUZZMwDQYJKoZIhvcNAQEL +BQAwSDELMAkGA1UEBhMCQk0xGTAXBgNVBAoTEFF1b1ZhZGlzIExpbWl0ZWQxHjAc +BgNVBAMTFVF1b1ZhZGlzIFJvb3QgQ0EgMSBHMzAeFw0xMjAxMTIxNzI3NDRaFw00 +MjAxMTIxNzI3NDRaMEgxCzAJBgNVBAYTAkJNMRkwFwYDVQQKExBRdW9WYWRpcyBM +aW1pdGVkMR4wHAYDVQQDExVRdW9WYWRpcyBSb290IENBIDEgRzMwggIiMA0GCSqG +SIb3DQEBAQUAA4ICDwAwggIKAoICAQCgvlAQjunybEC0BJyFuTHK3C3kEakEPBtV +wedYMB0ktMPvhd6MLOHBPd+C5k+tR4ds7FtJwUrVu4/sh6x/gpqG7D0DmVIB0jWe +rNrwU8lmPNSsAgHaJNM7qAJGr6Qc4/hzWHa39g6QDbXwz8z6+cZM5cOGMAqNF341 +68Xfuw6cwI2H44g4hWf6Pser4BOcBRiYz5P1sZK0/CPTz9XEJ0ngnjybCKOLXSoh +4Pw5qlPafX7PGglTvF0FBM+hSo+LdoINofjSxxR3W5A2B4GbPgb6Ul5jxaYA/qXp +UhtStZI5cgMJYr2wYBZupt0lwgNm3fME0UDiTouG9G/lg6AnhF4EwfWQvTA9xO+o +abw4m6SkltFi2mnAAZauy8RRNOoMqv8hjlmPSlzkYZqn0ukqeI1RPToV7qJZjqlc +3sX5kCLliEVx3ZGZbHqfPT2YfF72vhZooF6uCyP8Wg+qInYtyaEQHeTTRCOQiJ/G +KubX9ZqzWB4vMIkIG1SitZgj7Ah3HJVdYdHLiZxfokqRmu8hqkkWCKi9YSgxyXSt +hfbZxbGL0eUQMk1fiyA6PEkfM4VZDdvLCXVDaXP7a3F98N/ETH3Goy7IlXnLc6KO +Tk0k+17kBL5yG6YnLUlamXrXXAkgt3+UuU/xDRxeiEIbEbfnkduebPRq34wGmAOt +zCjvpUfzUwIDAQABo0IwQDAPBgNVHRMBAf8EBTADAQH/MA4GA1UdDwEB/wQEAwIB +BjAdBgNVHQ4EFgQUo5fW816iEOGrRZ88F2Q87gFwnMwwDQYJKoZIhvcNAQELBQAD +ggIBABj6W3X8PnrHX3fHyt/PX8MSxEBd1DKquGrX1RUVRpgjpeaQWxiZTOOtQqOC +MTaIzen7xASWSIsBx40Bz1szBpZGZnQdT+3Btrm0DWHMY37XLneMlhwqI2hrhVd2 +cDMT/uFPpiN3GPoajOi9ZcnPP/TJF9zrx7zABC4tRi9pZsMbj/7sPtPKlL92CiUN +qXsCHKnQO18LwIE6PWThv6ctTr1NxNgpxiIY0MWscgKCP6o6ojoilzHdCGPDdRS5 +YCgtW2jgFqlmgiNR9etT2DGbe+m3nUvriBbP+V04ikkwj+3x6xn0dxoxGE1nVGwv +b2X52z3sIexe9PSLymBlVNFxZPT5pqOBMzYzcfCkeF9OrYMh3jRJjehZrJ3ydlo2 +8hP0r+AJx2EqbPfgna67hkooby7utHnNkDPDs3b69fBsnQGQ+p6Q9pxyz0fawx/k +NSBT8lTR32GDpgLiJTjehTItXnOQUl1CxM49S+H5GYQd1aJQzEH7QRTDvdbJWqNj +ZgKAvQU6O0ec7AAmTPWIUb+oI38YB7AL7YsmoWTTYUrrXJ/es69nA7Mf3W1daWhp +q1467HxpvMc7hU6eFbm0FU/DlXpY18ls6Wy58yljXrQs8C097Vpl4KlbQMJImYFt +nh8GKjwStIsPm6Ik8KaN1nrgS7ZklmOVhMJKzRwuJIczYOXD +-----END CERTIFICATE----- + +# Issuer: CN=QuoVadis Root CA 2 G3 O=QuoVadis Limited +# Subject: CN=QuoVadis Root CA 2 G3 O=QuoVadis Limited +# Label: "QuoVadis Root CA 2 G3" +# Serial: 390156079458959257446133169266079962026824725800 +# MD5 Fingerprint: af:0c:86:6e:bf:40:2d:7f:0b:3e:12:50:ba:12:3d:06 +# SHA1 Fingerprint: 09:3c:61:f3:8b:8b:dc:7d:55:df:75:38:02:05:00:e1:25:f5:c8:36 +# SHA256 Fingerprint: 8f:e4:fb:0a:f9:3a:4d:0d:67:db:0b:eb:b2:3e:37:c7:1b:f3:25:dc:bc:dd:24:0e:a0:4d:af:58:b4:7e:18:40 +-----BEGIN CERTIFICATE----- +MIIFYDCCA0igAwIBAgIURFc0JFuBiZs18s64KztbpybwdSgwDQYJKoZIhvcNAQEL +BQAwSDELMAkGA1UEBhMCQk0xGTAXBgNVBAoTEFF1b1ZhZGlzIExpbWl0ZWQxHjAc +BgNVBAMTFVF1b1ZhZGlzIFJvb3QgQ0EgMiBHMzAeFw0xMjAxMTIxODU5MzJaFw00 +MjAxMTIxODU5MzJaMEgxCzAJBgNVBAYTAkJNMRkwFwYDVQQKExBRdW9WYWRpcyBM +aW1pdGVkMR4wHAYDVQQDExVRdW9WYWRpcyBSb290IENBIDIgRzMwggIiMA0GCSqG +SIb3DQEBAQUAA4ICDwAwggIKAoICAQChriWyARjcV4g/Ruv5r+LrI3HimtFhZiFf +qq8nUeVuGxbULX1QsFN3vXg6YOJkApt8hpvWGo6t/x8Vf9WVHhLL5hSEBMHfNrMW +n4rjyduYNM7YMxcoRvynyfDStNVNCXJJ+fKH46nafaF9a7I6JaltUkSs+L5u+9ym +c5GQYaYDFCDy54ejiK2toIz/pgslUiXnFgHVy7g1gQyjO/Dh4fxaXc6AcW34Sas+ +O7q414AB+6XrW7PFXmAqMaCvN+ggOp+oMiwMzAkd056OXbxMmO7FGmh77FOm6RQ1 +o9/NgJ8MSPsc9PG/Srj61YxxSscfrf5BmrODXfKEVu+lV0POKa2Mq1W/xPtbAd0j +IaFYAI7D0GoT7RPjEiuA3GfmlbLNHiJuKvhB1PLKFAeNilUSxmn1uIZoL1NesNKq +IcGY5jDjZ1XHm26sGahVpkUG0CM62+tlXSoREfA7T8pt9DTEceT/AFr2XK4jYIVz +8eQQsSWu1ZK7E8EM4DnatDlXtas1qnIhO4M15zHfeiFuuDIIfR0ykRVKYnLP43eh +vNURG3YBZwjgQQvD6xVu+KQZ2aKrr+InUlYrAoosFCT5v0ICvybIxo/gbjh9Uy3l +7ZizlWNof/k19N+IxWA1ksB8aRxhlRbQ694Lrz4EEEVlWFA4r0jyWbYW8jwNkALG +cC4BrTwV1wIDAQABo0IwQDAPBgNVHRMBAf8EBTADAQH/MA4GA1UdDwEB/wQEAwIB +BjAdBgNVHQ4EFgQU7edvdlq/YOxJW8ald7tyFnGbxD0wDQYJKoZIhvcNAQELBQAD +ggIBAJHfgD9DCX5xwvfrs4iP4VGyvD11+ShdyLyZm3tdquXK4Qr36LLTn91nMX66 +AarHakE7kNQIXLJgapDwyM4DYvmL7ftuKtwGTTwpD4kWilhMSA/ohGHqPHKmd+RC +roijQ1h5fq7KpVMNqT1wvSAZYaRsOPxDMuHBR//47PERIjKWnML2W2mWeyAMQ0Ga +W/ZZGYjeVYg3UQt4XAoeo0L9x52ID8DyeAIkVJOviYeIyUqAHerQbj5hLja7NQ4n +lv1mNDthcnPxFlxHBlRJAHpYErAK74X9sbgzdWqTHBLmYF5vHX/JHyPLhGGfHoJE ++V+tYlUkmlKY7VHnoX6XOuYvHxHaU4AshZ6rNRDbIl9qxV6XU/IyAgkwo1jwDQHV +csaxfGl7w/U2Rcxhbl5MlMVerugOXou/983g7aEOGzPuVBj+D77vfoRrQ+NwmNtd +dbINWQeFFSM51vHfqSYP1kjHs6Yi9TM3WpVHn3u6GBVv/9YUZINJ0gpnIdsPNWNg +KCLjsZWDzYWm3S8P52dSbrsvhXz1SnPnxT7AvSESBT/8twNJAlvIJebiVDj1eYeM +HVOyToV7BjjHLPj4sHKNJeV3UvQDHEimUF+IIDBu8oJDqz2XhOdT+yHBTw8imoa4 +WSr2Rz0ZiC3oheGe7IUIarFsNMkd7EgrO3jtZsSOeWmD3n+M +-----END CERTIFICATE----- + +# Issuer: CN=QuoVadis Root CA 3 G3 O=QuoVadis Limited +# Subject: CN=QuoVadis Root CA 3 G3 O=QuoVadis Limited +# Label: "QuoVadis Root CA 3 G3" +# Serial: 268090761170461462463995952157327242137089239581 +# MD5 Fingerprint: df:7d:b9:ad:54:6f:68:a1:df:89:57:03:97:43:b0:d7 +# SHA1 Fingerprint: 48:12:bd:92:3c:a8:c4:39:06:e7:30:6d:27:96:e6:a4:cf:22:2e:7d +# SHA256 Fingerprint: 88:ef:81:de:20:2e:b0:18:45:2e:43:f8:64:72:5c:ea:5f:bd:1f:c2:d9:d2:05:73:07:09:c5:d8:b8:69:0f:46 +-----BEGIN CERTIFICATE----- +MIIFYDCCA0igAwIBAgIULvWbAiin23r/1aOp7r0DoM8Sah0wDQYJKoZIhvcNAQEL +BQAwSDELMAkGA1UEBhMCQk0xGTAXBgNVBAoTEFF1b1ZhZGlzIExpbWl0ZWQxHjAc +BgNVBAMTFVF1b1ZhZGlzIFJvb3QgQ0EgMyBHMzAeFw0xMjAxMTIyMDI2MzJaFw00 +MjAxMTIyMDI2MzJaMEgxCzAJBgNVBAYTAkJNMRkwFwYDVQQKExBRdW9WYWRpcyBM +aW1pdGVkMR4wHAYDVQQDExVRdW9WYWRpcyBSb290IENBIDMgRzMwggIiMA0GCSqG +SIb3DQEBAQUAA4ICDwAwggIKAoICAQCzyw4QZ47qFJenMioKVjZ/aEzHs286IxSR +/xl/pcqs7rN2nXrpixurazHb+gtTTK/FpRp5PIpM/6zfJd5O2YIyC0TeytuMrKNu +FoM7pmRLMon7FhY4futD4tN0SsJiCnMK3UmzV9KwCoWdcTzeo8vAMvMBOSBDGzXR +U7Ox7sWTaYI+FrUoRqHe6okJ7UO4BUaKhvVZR74bbwEhELn9qdIoyhA5CcoTNs+c +ra1AdHkrAj80//ogaX3T7mH1urPnMNA3I4ZyYUUpSFlob3emLoG+B01vr87ERROR +FHAGjx+f+IdpsQ7vw4kZ6+ocYfx6bIrc1gMLnia6Et3UVDmrJqMz6nWB2i3ND0/k +A9HvFZcba5DFApCTZgIhsUfei5pKgLlVj7WiL8DWM2fafsSntARE60f75li59wzw +eyuxwHApw0BiLTtIadwjPEjrewl5qW3aqDCYz4ByA4imW0aucnl8CAMhZa634Ryl +sSqiMd5mBPfAdOhx3v89WcyWJhKLhZVXGqtrdQtEPREoPHtht+KPZ0/l7DxMYIBp +VzgeAVuNVejH38DMdyM0SXV89pgR6y3e7UEuFAUCf+D+IOs15xGsIs5XPd7JMG0Q +A4XN8f+MFrXBsj6IbGB/kE+V9/YtrQE5BwT6dYB9v0lQ7e/JxHwc64B+27bQ3RP+ +ydOc17KXqQIDAQABo0IwQDAPBgNVHRMBAf8EBTADAQH/MA4GA1UdDwEB/wQEAwIB +BjAdBgNVHQ4EFgQUxhfQvKjqAkPyGwaZXSuQILnXnOQwDQYJKoZIhvcNAQELBQAD +ggIBADRh2Va1EodVTd2jNTFGu6QHcrxfYWLopfsLN7E8trP6KZ1/AvWkyaiTt3px +KGmPc+FSkNrVvjrlt3ZqVoAh313m6Tqe5T72omnHKgqwGEfcIHB9UqM+WXzBusnI +FUBhynLWcKzSt/Ac5IYp8M7vaGPQtSCKFWGafoaYtMnCdvvMujAWzKNhxnQT5Wvv +oxXqA/4Ti2Tk08HS6IT7SdEQTXlm66r99I0xHnAUrdzeZxNMgRVhvLfZkXdxGYFg +u/BYpbWcC/ePIlUnwEsBbTuZDdQdm2NnL9DuDcpmvJRPpq3t/O5jrFc/ZSXPsoaP +0Aj/uHYUbt7lJ+yreLVTubY/6CD50qi+YUbKh4yE8/nxoGibIh6BJpsQBJFxwAYf +3KDTuVan45gtf4Od34wrnDKOMpTwATwiKp9Dwi7DmDkHOHv8XgBCH/MyJnmDhPbl +8MFREsALHgQjDFSlTC9JxUrRtm5gDWv8a4uFJGS3iQ6rJUdbPM9+Sb3H6QrG2vd+ +DhcI00iX0HGS8A85PjRqHH3Y8iKuu2n0M7SmSFXRDw4m6Oy2Cy2nhTXN/VnIn9HN +PlopNLk9hM6xZdRZkZFWdSHBd575euFgndOtBBj0fOtek49TSiIp+EgrPk2GrFt/ +ywaZWWDYWGWVjUTR939+J399roD1B0y2PpxxVJkES/1Y+Zj0 +-----END CERTIFICATE----- + +# Issuer: CN=DigiCert Assured ID Root G2 O=DigiCert Inc OU=www.digicert.com +# Subject: CN=DigiCert Assured ID Root G2 O=DigiCert Inc OU=www.digicert.com +# Label: "DigiCert Assured ID Root G2" +# Serial: 15385348160840213938643033620894905419 +# MD5 Fingerprint: 92:38:b9:f8:63:24:82:65:2c:57:33:e6:fe:81:8f:9d +# SHA1 Fingerprint: a1:4b:48:d9:43:ee:0a:0e:40:90:4f:3c:e0:a4:c0:91:93:51:5d:3f +# SHA256 Fingerprint: 7d:05:eb:b6:82:33:9f:8c:94:51:ee:09:4e:eb:fe:fa:79:53:a1:14:ed:b2:f4:49:49:45:2f:ab:7d:2f:c1:85 +-----BEGIN CERTIFICATE----- +MIIDljCCAn6gAwIBAgIQC5McOtY5Z+pnI7/Dr5r0SzANBgkqhkiG9w0BAQsFADBl +MQswCQYDVQQGEwJVUzEVMBMGA1UEChMMRGlnaUNlcnQgSW5jMRkwFwYDVQQLExB3 +d3cuZGlnaWNlcnQuY29tMSQwIgYDVQQDExtEaWdpQ2VydCBBc3N1cmVkIElEIFJv +b3QgRzIwHhcNMTMwODAxMTIwMDAwWhcNMzgwMTE1MTIwMDAwWjBlMQswCQYDVQQG +EwJVUzEVMBMGA1UEChMMRGlnaUNlcnQgSW5jMRkwFwYDVQQLExB3d3cuZGlnaWNl +cnQuY29tMSQwIgYDVQQDExtEaWdpQ2VydCBBc3N1cmVkIElEIFJvb3QgRzIwggEi +MA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQDZ5ygvUj82ckmIkzTz+GoeMVSA +n61UQbVH35ao1K+ALbkKz3X9iaV9JPrjIgwrvJUXCzO/GU1BBpAAvQxNEP4Htecc +biJVMWWXvdMX0h5i89vqbFCMP4QMls+3ywPgym2hFEwbid3tALBSfK+RbLE4E9Hp +EgjAALAcKxHad3A2m67OeYfcgnDmCXRwVWmvo2ifv922ebPynXApVfSr/5Vh88lA +bx3RvpO704gqu52/clpWcTs/1PPRCv4o76Pu2ZmvA9OPYLfykqGxvYmJHzDNw6Yu +YjOuFgJ3RFrngQo8p0Quebg/BLxcoIfhG69Rjs3sLPr4/m3wOnyqi+RnlTGNAgMB +AAGjQjBAMA8GA1UdEwEB/wQFMAMBAf8wDgYDVR0PAQH/BAQDAgGGMB0GA1UdDgQW +BBTOw0q5mVXyuNtgv6l+vVa1lzan1jANBgkqhkiG9w0BAQsFAAOCAQEAyqVVjOPI +QW5pJ6d1Ee88hjZv0p3GeDgdaZaikmkuOGybfQTUiaWxMTeKySHMq2zNixya1r9I +0jJmwYrA8y8678Dj1JGG0VDjA9tzd29KOVPt3ibHtX2vK0LRdWLjSisCx1BL4Gni +lmwORGYQRI+tBev4eaymG+g3NJ1TyWGqolKvSnAWhsI6yLETcDbYz+70CjTVW0z9 +B5yiutkBclzzTcHdDrEcDcRjvq30FPuJ7KJBDkzMyFdA0G4Dqs0MjomZmWzwPDCv +ON9vvKO+KSAnq3T/EyJ43pdSVR6DtVQgA+6uwE9W3jfMw3+qBCe703e4YtsXfJwo +IhNzbM8m9Yop5w== +-----END CERTIFICATE----- + +# Issuer: CN=DigiCert Assured ID Root G3 O=DigiCert Inc OU=www.digicert.com +# Subject: CN=DigiCert Assured ID Root G3 O=DigiCert Inc OU=www.digicert.com +# Label: "DigiCert Assured ID Root G3" +# Serial: 15459312981008553731928384953135426796 +# MD5 Fingerprint: 7c:7f:65:31:0c:81:df:8d:ba:3e:99:e2:5c:ad:6e:fb +# SHA1 Fingerprint: f5:17:a2:4f:9a:48:c6:c9:f8:a2:00:26:9f:dc:0f:48:2c:ab:30:89 +# SHA256 Fingerprint: 7e:37:cb:8b:4c:47:09:0c:ab:36:55:1b:a6:f4:5d:b8:40:68:0f:ba:16:6a:95:2d:b1:00:71:7f:43:05:3f:c2 +-----BEGIN CERTIFICATE----- +MIICRjCCAc2gAwIBAgIQC6Fa+h3foLVJRK/NJKBs7DAKBggqhkjOPQQDAzBlMQsw +CQYDVQQGEwJVUzEVMBMGA1UEChMMRGlnaUNlcnQgSW5jMRkwFwYDVQQLExB3d3cu +ZGlnaWNlcnQuY29tMSQwIgYDVQQDExtEaWdpQ2VydCBBc3N1cmVkIElEIFJvb3Qg +RzMwHhcNMTMwODAxMTIwMDAwWhcNMzgwMTE1MTIwMDAwWjBlMQswCQYDVQQGEwJV +UzEVMBMGA1UEChMMRGlnaUNlcnQgSW5jMRkwFwYDVQQLExB3d3cuZGlnaWNlcnQu +Y29tMSQwIgYDVQQDExtEaWdpQ2VydCBBc3N1cmVkIElEIFJvb3QgRzMwdjAQBgcq +hkjOPQIBBgUrgQQAIgNiAAQZ57ysRGXtzbg/WPuNsVepRC0FFfLvC/8QdJ+1YlJf +Zn4f5dwbRXkLzMZTCp2NXQLZqVneAlr2lSoOjThKiknGvMYDOAdfVdp+CW7if17Q +RSAPWXYQ1qAk8C3eNvJsKTmjQjBAMA8GA1UdEwEB/wQFMAMBAf8wDgYDVR0PAQH/ +BAQDAgGGMB0GA1UdDgQWBBTL0L2p4ZgFUaFNN6KDec6NHSrkhDAKBggqhkjOPQQD +AwNnADBkAjAlpIFFAmsSS3V0T8gj43DydXLefInwz5FyYZ5eEJJZVrmDxxDnOOlY +JjZ91eQ0hjkCMHw2U/Aw5WJjOpnitqM7mzT6HtoQknFekROn3aRukswy1vUhZscv +6pZjamVFkpUBtA== +-----END CERTIFICATE----- + +# Issuer: CN=DigiCert Global Root G2 O=DigiCert Inc OU=www.digicert.com +# Subject: CN=DigiCert Global Root G2 O=DigiCert Inc OU=www.digicert.com +# Label: "DigiCert Global Root G2" +# Serial: 4293743540046975378534879503202253541 +# MD5 Fingerprint: e4:a6:8a:c8:54:ac:52:42:46:0a:fd:72:48:1b:2a:44 +# SHA1 Fingerprint: df:3c:24:f9:bf:d6:66:76:1b:26:80:73:fe:06:d1:cc:8d:4f:82:a4 +# SHA256 Fingerprint: cb:3c:cb:b7:60:31:e5:e0:13:8f:8d:d3:9a:23:f9:de:47:ff:c3:5e:43:c1:14:4c:ea:27:d4:6a:5a:b1:cb:5f +-----BEGIN CERTIFICATE----- +MIIDjjCCAnagAwIBAgIQAzrx5qcRqaC7KGSxHQn65TANBgkqhkiG9w0BAQsFADBh +MQswCQYDVQQGEwJVUzEVMBMGA1UEChMMRGlnaUNlcnQgSW5jMRkwFwYDVQQLExB3 +d3cuZGlnaWNlcnQuY29tMSAwHgYDVQQDExdEaWdpQ2VydCBHbG9iYWwgUm9vdCBH +MjAeFw0xMzA4MDExMjAwMDBaFw0zODAxMTUxMjAwMDBaMGExCzAJBgNVBAYTAlVT +MRUwEwYDVQQKEwxEaWdpQ2VydCBJbmMxGTAXBgNVBAsTEHd3dy5kaWdpY2VydC5j +b20xIDAeBgNVBAMTF0RpZ2lDZXJ0IEdsb2JhbCBSb290IEcyMIIBIjANBgkqhkiG +9w0BAQEFAAOCAQ8AMIIBCgKCAQEAuzfNNNx7a8myaJCtSnX/RrohCgiN9RlUyfuI +2/Ou8jqJkTx65qsGGmvPrC3oXgkkRLpimn7Wo6h+4FR1IAWsULecYxpsMNzaHxmx +1x7e/dfgy5SDN67sH0NO3Xss0r0upS/kqbitOtSZpLYl6ZtrAGCSYP9PIUkY92eQ +q2EGnI/yuum06ZIya7XzV+hdG82MHauVBJVJ8zUtluNJbd134/tJS7SsVQepj5Wz +tCO7TG1F8PapspUwtP1MVYwnSlcUfIKdzXOS0xZKBgyMUNGPHgm+F6HmIcr9g+UQ +vIOlCsRnKPZzFBQ9RnbDhxSJITRNrw9FDKZJobq7nMWxM4MphQIDAQABo0IwQDAP +BgNVHRMBAf8EBTADAQH/MA4GA1UdDwEB/wQEAwIBhjAdBgNVHQ4EFgQUTiJUIBiV +5uNu5g/6+rkS7QYXjzkwDQYJKoZIhvcNAQELBQADggEBAGBnKJRvDkhj6zHd6mcY +1Yl9PMWLSn/pvtsrF9+wX3N3KjITOYFnQoQj8kVnNeyIv/iPsGEMNKSuIEyExtv4 +NeF22d+mQrvHRAiGfzZ0JFrabA0UWTW98kndth/Jsw1HKj2ZL7tcu7XUIOGZX1NG +Fdtom/DzMNU+MeKNhJ7jitralj41E6Vf8PlwUHBHQRFXGU7Aj64GxJUTFy8bJZ91 +8rGOmaFvE7FBcf6IKshPECBV1/MUReXgRPTqh5Uykw7+U0b6LJ3/iyK5S9kJRaTe +pLiaWN0bfVKfjllDiIGknibVb63dDcY3fe0Dkhvld1927jyNxF1WW6LZZm6zNTfl +MrY= +-----END CERTIFICATE----- + +# Issuer: CN=DigiCert Global Root G3 O=DigiCert Inc OU=www.digicert.com +# Subject: CN=DigiCert Global Root G3 O=DigiCert Inc OU=www.digicert.com +# Label: "DigiCert Global Root G3" +# Serial: 7089244469030293291760083333884364146 +# MD5 Fingerprint: f5:5d:a4:50:a5:fb:28:7e:1e:0f:0d:cc:96:57:56:ca +# SHA1 Fingerprint: 7e:04:de:89:6a:3e:66:6d:00:e6:87:d3:3f:fa:d9:3b:e8:3d:34:9e +# SHA256 Fingerprint: 31:ad:66:48:f8:10:41:38:c7:38:f3:9e:a4:32:01:33:39:3e:3a:18:cc:02:29:6e:f9:7c:2a:c9:ef:67:31:d0 +-----BEGIN CERTIFICATE----- +MIICPzCCAcWgAwIBAgIQBVVWvPJepDU1w6QP1atFcjAKBggqhkjOPQQDAzBhMQsw +CQYDVQQGEwJVUzEVMBMGA1UEChMMRGlnaUNlcnQgSW5jMRkwFwYDVQQLExB3d3cu +ZGlnaWNlcnQuY29tMSAwHgYDVQQDExdEaWdpQ2VydCBHbG9iYWwgUm9vdCBHMzAe +Fw0xMzA4MDExMjAwMDBaFw0zODAxMTUxMjAwMDBaMGExCzAJBgNVBAYTAlVTMRUw +EwYDVQQKEwxEaWdpQ2VydCBJbmMxGTAXBgNVBAsTEHd3dy5kaWdpY2VydC5jb20x +IDAeBgNVBAMTF0RpZ2lDZXJ0IEdsb2JhbCBSb290IEczMHYwEAYHKoZIzj0CAQYF +K4EEACIDYgAE3afZu4q4C/sLfyHS8L6+c/MzXRq8NOrexpu80JX28MzQC7phW1FG +fp4tn+6OYwwX7Adw9c+ELkCDnOg/QW07rdOkFFk2eJ0DQ+4QE2xy3q6Ip6FrtUPO +Z9wj/wMco+I+o0IwQDAPBgNVHRMBAf8EBTADAQH/MA4GA1UdDwEB/wQEAwIBhjAd +BgNVHQ4EFgQUs9tIpPmhxdiuNkHMEWNpYim8S8YwCgYIKoZIzj0EAwMDaAAwZQIx +AK288mw/EkrRLTnDCgmXc/SINoyIJ7vmiI1Qhadj+Z4y3maTD/HMsQmP3Wyr+mt/ +oAIwOWZbwmSNuJ5Q3KjVSaLtx9zRSX8XAbjIho9OjIgrqJqpisXRAL34VOKa5Vt8 +sycX +-----END CERTIFICATE----- + +# Issuer: CN=DigiCert Trusted Root G4 O=DigiCert Inc OU=www.digicert.com +# Subject: CN=DigiCert Trusted Root G4 O=DigiCert Inc OU=www.digicert.com +# Label: "DigiCert Trusted Root G4" +# Serial: 7451500558977370777930084869016614236 +# MD5 Fingerprint: 78:f2:fc:aa:60:1f:2f:b4:eb:c9:37:ba:53:2e:75:49 +# SHA1 Fingerprint: dd:fb:16:cd:49:31:c9:73:a2:03:7d:3f:c8:3a:4d:7d:77:5d:05:e4 +# SHA256 Fingerprint: 55:2f:7b:dc:f1:a7:af:9e:6c:e6:72:01:7f:4f:12:ab:f7:72:40:c7:8e:76:1a:c2:03:d1:d9:d2:0a:c8:99:88 +-----BEGIN CERTIFICATE----- +MIIFkDCCA3igAwIBAgIQBZsbV56OITLiOQe9p3d1XDANBgkqhkiG9w0BAQwFADBi +MQswCQYDVQQGEwJVUzEVMBMGA1UEChMMRGlnaUNlcnQgSW5jMRkwFwYDVQQLExB3 +d3cuZGlnaWNlcnQuY29tMSEwHwYDVQQDExhEaWdpQ2VydCBUcnVzdGVkIFJvb3Qg +RzQwHhcNMTMwODAxMTIwMDAwWhcNMzgwMTE1MTIwMDAwWjBiMQswCQYDVQQGEwJV +UzEVMBMGA1UEChMMRGlnaUNlcnQgSW5jMRkwFwYDVQQLExB3d3cuZGlnaWNlcnQu +Y29tMSEwHwYDVQQDExhEaWdpQ2VydCBUcnVzdGVkIFJvb3QgRzQwggIiMA0GCSqG +SIb3DQEBAQUAA4ICDwAwggIKAoICAQC/5pBzaN675F1KPDAiMGkz7MKnJS7JIT3y +ithZwuEppz1Yq3aaza57G4QNxDAf8xukOBbrVsaXbR2rsnnyyhHS5F/WBTxSD1If +xp4VpX6+n6lXFllVcq9ok3DCsrp1mWpzMpTREEQQLt+C8weE5nQ7bXHiLQwb7iDV +ySAdYyktzuxeTsiT+CFhmzTrBcZe7FsavOvJz82sNEBfsXpm7nfISKhmV1efVFiO +DCu3T6cw2Vbuyntd463JT17lNecxy9qTXtyOj4DatpGYQJB5w3jHtrHEtWoYOAMQ +jdjUN6QuBX2I9YI+EJFwq1WCQTLX2wRzKm6RAXwhTNS8rhsDdV14Ztk6MUSaM0C/ +CNdaSaTC5qmgZ92kJ7yhTzm1EVgX9yRcRo9k98FpiHaYdj1ZXUJ2h4mXaXpI8OCi +EhtmmnTK3kse5w5jrubU75KSOp493ADkRSWJtppEGSt+wJS00mFt6zPZxd9LBADM +fRyVw4/3IbKyEbe7f/LVjHAsQWCqsWMYRJUadmJ+9oCw++hkpjPRiQfhvbfmQ6QY +uKZ3AeEPlAwhHbJUKSWJbOUOUlFHdL4mrLZBdd56rF+NP8m800ERElvlEFDrMcXK +chYiCd98THU/Y+whX8QgUWtvsauGi0/C1kVfnSD8oR7FwI+isX4KJpn15GkvmB0t +9dmpsh3lGwIDAQABo0IwQDAPBgNVHRMBAf8EBTADAQH/MA4GA1UdDwEB/wQEAwIB +hjAdBgNVHQ4EFgQU7NfjgtJxXWRM3y5nP+e6mK4cD08wDQYJKoZIhvcNAQEMBQAD +ggIBALth2X2pbL4XxJEbw6GiAI3jZGgPVs93rnD5/ZpKmbnJeFwMDF/k5hQpVgs2 +SV1EY+CtnJYYZhsjDT156W1r1lT40jzBQ0CuHVD1UvyQO7uYmWlrx8GnqGikJ9yd ++SeuMIW59mdNOj6PWTkiU0TryF0Dyu1Qen1iIQqAyHNm0aAFYF/opbSnr6j3bTWc +fFqK1qI4mfN4i/RN0iAL3gTujJtHgXINwBQy7zBZLq7gcfJW5GqXb5JQbZaNaHqa +sjYUegbyJLkJEVDXCLG4iXqEI2FCKeWjzaIgQdfRnGTZ6iahixTXTBmyUEFxPT9N +cCOGDErcgdLMMpSEDQgJlxxPwO5rIHQw0uA5NBCFIRUBCOhVMt5xSdkoF1BN5r5N +0XWs0Mr7QbhDparTwwVETyw2m+L64kW4I1NsBm9nVX9GtUw/bihaeSbSpKhil9Ie +4u1Ki7wb/UdKDd9nZn6yW0HQO+T0O/QEY+nvwlQAUaCKKsnOeMzV6ocEGLPOr0mI +r/OSmbaz5mEP0oUA51Aa5BuVnRmhuZyxm7EAHu/QD09CbMkKvO5D+jpxpchNJqU1 +/YldvIViHTLSoCtU7ZpXwdv6EM8Zt4tKG48BtieVU+i2iW1bvGjUI+iLUaJW+fCm +gKDWHrO8Dw9TdSmq6hN35N6MgSGtBxBHEa2HPQfRdbzP82Z+ +-----END CERTIFICATE----- + +# Issuer: CN=COMODO RSA Certification Authority O=COMODO CA Limited +# Subject: CN=COMODO RSA Certification Authority O=COMODO CA Limited +# Label: "COMODO RSA Certification Authority" +# Serial: 101909084537582093308941363524873193117 +# MD5 Fingerprint: 1b:31:b0:71:40:36:cc:14:36:91:ad:c4:3e:fd:ec:18 +# SHA1 Fingerprint: af:e5:d2:44:a8:d1:19:42:30:ff:47:9f:e2:f8:97:bb:cd:7a:8c:b4 +# SHA256 Fingerprint: 52:f0:e1:c4:e5:8e:c6:29:29:1b:60:31:7f:07:46:71:b8:5d:7e:a8:0d:5b:07:27:34:63:53:4b:32:b4:02:34 +-----BEGIN CERTIFICATE----- +MIIF2DCCA8CgAwIBAgIQTKr5yttjb+Af907YWwOGnTANBgkqhkiG9w0BAQwFADCB +hTELMAkGA1UEBhMCR0IxGzAZBgNVBAgTEkdyZWF0ZXIgTWFuY2hlc3RlcjEQMA4G +A1UEBxMHU2FsZm9yZDEaMBgGA1UEChMRQ09NT0RPIENBIExpbWl0ZWQxKzApBgNV +BAMTIkNPTU9ETyBSU0EgQ2VydGlmaWNhdGlvbiBBdXRob3JpdHkwHhcNMTAwMTE5 +MDAwMDAwWhcNMzgwMTE4MjM1OTU5WjCBhTELMAkGA1UEBhMCR0IxGzAZBgNVBAgT +EkdyZWF0ZXIgTWFuY2hlc3RlcjEQMA4GA1UEBxMHU2FsZm9yZDEaMBgGA1UEChMR +Q09NT0RPIENBIExpbWl0ZWQxKzApBgNVBAMTIkNPTU9ETyBSU0EgQ2VydGlmaWNh +dGlvbiBBdXRob3JpdHkwggIiMA0GCSqGSIb3DQEBAQUAA4ICDwAwggIKAoICAQCR +6FSS0gpWsawNJN3Fz0RndJkrN6N9I3AAcbxT38T6KhKPS38QVr2fcHK3YX/JSw8X +pz3jsARh7v8Rl8f0hj4K+j5c+ZPmNHrZFGvnnLOFoIJ6dq9xkNfs/Q36nGz637CC +9BR++b7Epi9Pf5l/tfxnQ3K9DADWietrLNPtj5gcFKt+5eNu/Nio5JIk2kNrYrhV +/erBvGy2i/MOjZrkm2xpmfh4SDBF1a3hDTxFYPwyllEnvGfDyi62a+pGx8cgoLEf +Zd5ICLqkTqnyg0Y3hOvozIFIQ2dOciqbXL1MGyiKXCJ7tKuY2e7gUYPDCUZObT6Z ++pUX2nwzV0E8jVHtC7ZcryxjGt9XyD+86V3Em69FmeKjWiS0uqlWPc9vqv9JWL7w +qP/0uK3pN/u6uPQLOvnoQ0IeidiEyxPx2bvhiWC4jChWrBQdnArncevPDt09qZah +SL0896+1DSJMwBGB7FY79tOi4lu3sgQiUpWAk2nojkxl8ZEDLXB0AuqLZxUpaVIC +u9ffUGpVRr+goyhhf3DQw6KqLCGqR84onAZFdr+CGCe01a60y1Dma/RMhnEw6abf +Fobg2P9A3fvQQoh/ozM6LlweQRGBY84YcWsr7KaKtzFcOmpH4MN5WdYgGq/yapiq +crxXStJLnbsQ/LBMQeXtHT1eKJ2czL+zUdqnR+WEUwIDAQABo0IwQDAdBgNVHQ4E +FgQUu69+Aj36pvE8hI6t7jiY7NkyMtQwDgYDVR0PAQH/BAQDAgEGMA8GA1UdEwEB +/wQFMAMBAf8wDQYJKoZIhvcNAQEMBQADggIBAArx1UaEt65Ru2yyTUEUAJNMnMvl +wFTPoCWOAvn9sKIN9SCYPBMtrFaisNZ+EZLpLrqeLppysb0ZRGxhNaKatBYSaVqM +4dc+pBroLwP0rmEdEBsqpIt6xf4FpuHA1sj+nq6PK7o9mfjYcwlYRm6mnPTXJ9OV +2jeDchzTc+CiR5kDOF3VSXkAKRzH7JsgHAckaVd4sjn8OoSgtZx8jb8uk2Intzna +FxiuvTwJaP+EmzzV1gsD41eeFPfR60/IvYcjt7ZJQ3mFXLrrkguhxuhoqEwWsRqZ +CuhTLJK7oQkYdQxlqHvLI7cawiiFwxv/0Cti76R7CZGYZ4wUAc1oBmpjIXUDgIiK +boHGhfKppC3n9KUkEEeDys30jXlYsQab5xoq2Z0B15R97QNKyvDb6KkBPvVWmcke +jkk9u+UJueBPSZI9FoJAzMxZxuY67RIuaTxslbH9qh17f4a+Hg4yRvv7E491f0yL +S0Zj/gA0QHDBw7mh3aZw4gSzQbzpgJHqZJx64SIDqZxubw5lT2yHh17zbqD5daWb +QOhTsiedSrnAdyGN/4fy3ryM7xfft0kL0fJuMAsaDk527RH89elWsn2/x20Kk4yl +0MC2Hb46TpSi125sC8KKfPog88Tk5c0NqMuRkrF8hey1FGlmDoLnzc7ILaZRfyHB +NVOFBkpdn627G190 +-----END CERTIFICATE----- + +# Issuer: CN=USERTrust RSA Certification Authority O=The USERTRUST Network +# Subject: CN=USERTrust RSA Certification Authority O=The USERTRUST Network +# Label: "USERTrust RSA Certification Authority" +# Serial: 2645093764781058787591871645665788717 +# MD5 Fingerprint: 1b:fe:69:d1:91:b7:19:33:a3:72:a8:0f:e1:55:e5:b5 +# SHA1 Fingerprint: 2b:8f:1b:57:33:0d:bb:a2:d0:7a:6c:51:f7:0e:e9:0d:da:b9:ad:8e +# SHA256 Fingerprint: e7:93:c9:b0:2f:d8:aa:13:e2:1c:31:22:8a:cc:b0:81:19:64:3b:74:9c:89:89:64:b1:74:6d:46:c3:d4:cb:d2 +-----BEGIN CERTIFICATE----- +MIIF3jCCA8agAwIBAgIQAf1tMPyjylGoG7xkDjUDLTANBgkqhkiG9w0BAQwFADCB +iDELMAkGA1UEBhMCVVMxEzARBgNVBAgTCk5ldyBKZXJzZXkxFDASBgNVBAcTC0pl +cnNleSBDaXR5MR4wHAYDVQQKExVUaGUgVVNFUlRSVVNUIE5ldHdvcmsxLjAsBgNV +BAMTJVVTRVJUcnVzdCBSU0EgQ2VydGlmaWNhdGlvbiBBdXRob3JpdHkwHhcNMTAw +MjAxMDAwMDAwWhcNMzgwMTE4MjM1OTU5WjCBiDELMAkGA1UEBhMCVVMxEzARBgNV +BAgTCk5ldyBKZXJzZXkxFDASBgNVBAcTC0plcnNleSBDaXR5MR4wHAYDVQQKExVU +aGUgVVNFUlRSVVNUIE5ldHdvcmsxLjAsBgNVBAMTJVVTRVJUcnVzdCBSU0EgQ2Vy +dGlmaWNhdGlvbiBBdXRob3JpdHkwggIiMA0GCSqGSIb3DQEBAQUAA4ICDwAwggIK +AoICAQCAEmUXNg7D2wiz0KxXDXbtzSfTTK1Qg2HiqiBNCS1kCdzOiZ/MPans9s/B +3PHTsdZ7NygRK0faOca8Ohm0X6a9fZ2jY0K2dvKpOyuR+OJv0OwWIJAJPuLodMkY +tJHUYmTbf6MG8YgYapAiPLz+E/CHFHv25B+O1ORRxhFnRghRy4YUVD+8M/5+bJz/ +Fp0YvVGONaanZshyZ9shZrHUm3gDwFA66Mzw3LyeTP6vBZY1H1dat//O+T23LLb2 +VN3I5xI6Ta5MirdcmrS3ID3KfyI0rn47aGYBROcBTkZTmzNg95S+UzeQc0PzMsNT +79uq/nROacdrjGCT3sTHDN/hMq7MkztReJVni+49Vv4M0GkPGw/zJSZrM233bkf6 +c0Plfg6lZrEpfDKEY1WJxA3Bk1QwGROs0303p+tdOmw1XNtB1xLaqUkL39iAigmT +Yo61Zs8liM2EuLE/pDkP2QKe6xJMlXzzawWpXhaDzLhn4ugTncxbgtNMs+1b/97l +c6wjOy0AvzVVdAlJ2ElYGn+SNuZRkg7zJn0cTRe8yexDJtC/QV9AqURE9JnnV4ee +UB9XVKg+/XRjL7FQZQnmWEIuQxpMtPAlR1n6BB6T1CZGSlCBst6+eLf8ZxXhyVeE +Hg9j1uliutZfVS7qXMYoCAQlObgOK6nyTJccBz8NUvXt7y+CDwIDAQABo0IwQDAd +BgNVHQ4EFgQUU3m/WqorSs9UgOHYm8Cd8rIDZsswDgYDVR0PAQH/BAQDAgEGMA8G +A1UdEwEB/wQFMAMBAf8wDQYJKoZIhvcNAQEMBQADggIBAFzUfA3P9wF9QZllDHPF +Up/L+M+ZBn8b2kMVn54CVVeWFPFSPCeHlCjtHzoBN6J2/FNQwISbxmtOuowhT6KO +VWKR82kV2LyI48SqC/3vqOlLVSoGIG1VeCkZ7l8wXEskEVX/JJpuXior7gtNn3/3 +ATiUFJVDBwn7YKnuHKsSjKCaXqeYalltiz8I+8jRRa8YFWSQEg9zKC7F4iRO/Fjs +8PRF/iKz6y+O0tlFYQXBl2+odnKPi4w2r78NBc5xjeambx9spnFixdjQg3IM8WcR +iQycE0xyNN+81XHfqnHd4blsjDwSXWXavVcStkNr/+XeTWYRUc+ZruwXtuhxkYze +Sf7dNXGiFSeUHM9h4ya7b6NnJSFd5t0dCy5oGzuCr+yDZ4XUmFF0sbmZgIn/f3gZ +XHlKYC6SQK5MNyosycdiyA5d9zZbyuAlJQG03RoHnHcAP9Dc1ew91Pq7P8yF1m9/ +qS3fuQL39ZeatTXaw2ewh0qpKJ4jjv9cJ2vhsE/zB+4ALtRZh8tSQZXq9EfX7mRB +VXyNWQKV3WKdwrnuWih0hKWbt5DHDAff9Yk2dDLWKMGwsAvgnEzDHNb842m1R0aB +L6KCq9NjRHDEjf8tM7qtj3u1cIiuPhnPQCjY/MiQu12ZIvVS5ljFH4gxQ+6IHdfG +jjxDah2nGN59PRbxYvnKkKj9 +-----END CERTIFICATE----- + +# Issuer: CN=USERTrust ECC Certification Authority O=The USERTRUST Network +# Subject: CN=USERTrust ECC Certification Authority O=The USERTRUST Network +# Label: "USERTrust ECC Certification Authority" +# Serial: 123013823720199481456569720443997572134 +# MD5 Fingerprint: fa:68:bc:d9:b5:7f:ad:fd:c9:1d:06:83:28:cc:24:c1 +# SHA1 Fingerprint: d1:cb:ca:5d:b2:d5:2a:7f:69:3b:67:4d:e5:f0:5a:1d:0c:95:7d:f0 +# SHA256 Fingerprint: 4f:f4:60:d5:4b:9c:86:da:bf:bc:fc:57:12:e0:40:0d:2b:ed:3f:bc:4d:4f:bd:aa:86:e0:6a:dc:d2:a9:ad:7a +-----BEGIN CERTIFICATE----- +MIICjzCCAhWgAwIBAgIQXIuZxVqUxdJxVt7NiYDMJjAKBggqhkjOPQQDAzCBiDEL +MAkGA1UEBhMCVVMxEzARBgNVBAgTCk5ldyBKZXJzZXkxFDASBgNVBAcTC0plcnNl +eSBDaXR5MR4wHAYDVQQKExVUaGUgVVNFUlRSVVNUIE5ldHdvcmsxLjAsBgNVBAMT +JVVTRVJUcnVzdCBFQ0MgQ2VydGlmaWNhdGlvbiBBdXRob3JpdHkwHhcNMTAwMjAx +MDAwMDAwWhcNMzgwMTE4MjM1OTU5WjCBiDELMAkGA1UEBhMCVVMxEzARBgNVBAgT +Ck5ldyBKZXJzZXkxFDASBgNVBAcTC0plcnNleSBDaXR5MR4wHAYDVQQKExVUaGUg +VVNFUlRSVVNUIE5ldHdvcmsxLjAsBgNVBAMTJVVTRVJUcnVzdCBFQ0MgQ2VydGlm +aWNhdGlvbiBBdXRob3JpdHkwdjAQBgcqhkjOPQIBBgUrgQQAIgNiAAQarFRaqflo +I+d61SRvU8Za2EurxtW20eZzca7dnNYMYf3boIkDuAUU7FfO7l0/4iGzzvfUinng +o4N+LZfQYcTxmdwlkWOrfzCjtHDix6EznPO/LlxTsV+zfTJ/ijTjeXmjQjBAMB0G +A1UdDgQWBBQ64QmG1M8ZwpZ2dEl23OA1xmNjmjAOBgNVHQ8BAf8EBAMCAQYwDwYD +VR0TAQH/BAUwAwEB/zAKBggqhkjOPQQDAwNoADBlAjA2Z6EWCNzklwBBHU6+4WMB +zzuqQhFkoJ2UOQIReVx7Hfpkue4WQrO/isIJxOzksU0CMQDpKmFHjFJKS04YcPbW +RNZu9YO6bVi9JNlWSOrvxKJGgYhqOkbRqZtNyWHa0V1Xahg= +-----END CERTIFICATE----- + +# Issuer: CN=GlobalSign O=GlobalSign OU=GlobalSign ECC Root CA - R4 +# Subject: CN=GlobalSign O=GlobalSign OU=GlobalSign ECC Root CA - R4 +# Label: "GlobalSign ECC Root CA - R4" +# Serial: 14367148294922964480859022125800977897474 +# MD5 Fingerprint: 20:f0:27:68:d1:7e:a0:9d:0e:e6:2a:ca:df:5c:89:8e +# SHA1 Fingerprint: 69:69:56:2e:40:80:f4:24:a1:e7:19:9f:14:ba:f3:ee:58:ab:6a:bb +# SHA256 Fingerprint: be:c9:49:11:c2:95:56:76:db:6c:0a:55:09:86:d7:6e:3b:a0:05:66:7c:44:2c:97:62:b4:fb:b7:73:de:22:8c +-----BEGIN CERTIFICATE----- +MIIB4TCCAYegAwIBAgIRKjikHJYKBN5CsiilC+g0mAIwCgYIKoZIzj0EAwIwUDEk +MCIGA1UECxMbR2xvYmFsU2lnbiBFQ0MgUm9vdCBDQSAtIFI0MRMwEQYDVQQKEwpH +bG9iYWxTaWduMRMwEQYDVQQDEwpHbG9iYWxTaWduMB4XDTEyMTExMzAwMDAwMFoX +DTM4MDExOTAzMTQwN1owUDEkMCIGA1UECxMbR2xvYmFsU2lnbiBFQ0MgUm9vdCBD +QSAtIFI0MRMwEQYDVQQKEwpHbG9iYWxTaWduMRMwEQYDVQQDEwpHbG9iYWxTaWdu +MFkwEwYHKoZIzj0CAQYIKoZIzj0DAQcDQgAEuMZ5049sJQ6fLjkZHAOkrprlOQcJ +FspjsbmG+IpXwVfOQvpzofdlQv8ewQCybnMO/8ch5RikqtlxP6jUuc6MHaNCMEAw +DgYDVR0PAQH/BAQDAgEGMA8GA1UdEwEB/wQFMAMBAf8wHQYDVR0OBBYEFFSwe61F +uOJAf/sKbvu+M8k8o4TVMAoGCCqGSM49BAMCA0gAMEUCIQDckqGgE6bPA7DmxCGX +kPoUVy0D7O48027KqGx2vKLeuwIgJ6iFJzWbVsaj8kfSt24bAgAXqmemFZHe+pTs +ewv4n4Q= +-----END CERTIFICATE----- + +# Issuer: CN=GlobalSign O=GlobalSign OU=GlobalSign ECC Root CA - R5 +# Subject: CN=GlobalSign O=GlobalSign OU=GlobalSign ECC Root CA - R5 +# Label: "GlobalSign ECC Root CA - R5" +# Serial: 32785792099990507226680698011560947931244 +# MD5 Fingerprint: 9f:ad:3b:1c:02:1e:8a:ba:17:74:38:81:0c:a2:bc:08 +# SHA1 Fingerprint: 1f:24:c6:30:cd:a4:18:ef:20:69:ff:ad:4f:dd:5f:46:3a:1b:69:aa +# SHA256 Fingerprint: 17:9f:bc:14:8a:3d:d0:0f:d2:4e:a1:34:58:cc:43:bf:a7:f5:9c:81:82:d7:83:a5:13:f6:eb:ec:10:0c:89:24 +-----BEGIN CERTIFICATE----- +MIICHjCCAaSgAwIBAgIRYFlJ4CYuu1X5CneKcflK2GwwCgYIKoZIzj0EAwMwUDEk +MCIGA1UECxMbR2xvYmFsU2lnbiBFQ0MgUm9vdCBDQSAtIFI1MRMwEQYDVQQKEwpH +bG9iYWxTaWduMRMwEQYDVQQDEwpHbG9iYWxTaWduMB4XDTEyMTExMzAwMDAwMFoX +DTM4MDExOTAzMTQwN1owUDEkMCIGA1UECxMbR2xvYmFsU2lnbiBFQ0MgUm9vdCBD +QSAtIFI1MRMwEQYDVQQKEwpHbG9iYWxTaWduMRMwEQYDVQQDEwpHbG9iYWxTaWdu +MHYwEAYHKoZIzj0CAQYFK4EEACIDYgAER0UOlvt9Xb/pOdEh+J8LttV7HpI6SFkc +8GIxLcB6KP4ap1yztsyX50XUWPrRd21DosCHZTQKH3rd6zwzocWdTaRvQZU4f8ke +hOvRnkmSh5SHDDqFSmafnVmTTZdhBoZKo0IwQDAOBgNVHQ8BAf8EBAMCAQYwDwYD +VR0TAQH/BAUwAwEB/zAdBgNVHQ4EFgQUPeYpSJvqB8ohREom3m7e0oPQn1kwCgYI +KoZIzj0EAwMDaAAwZQIxAOVpEslu28YxuglB4Zf4+/2a4n0Sye18ZNPLBSWLVtmg +515dTguDnFt2KaAJJiFqYgIwcdK1j1zqO+F4CYWodZI7yFz9SO8NdCKoCOJuxUnO +xwy8p2Fp8fc74SrL+SvzZpA3 +-----END CERTIFICATE----- + +# Issuer: CN=Staat der Nederlanden Root CA - G3 O=Staat der Nederlanden +# Subject: CN=Staat der Nederlanden Root CA - G3 O=Staat der Nederlanden +# Label: "Staat der Nederlanden Root CA - G3" +# Serial: 10003001 +# MD5 Fingerprint: 0b:46:67:07:db:10:2f:19:8c:35:50:60:d1:0b:f4:37 +# SHA1 Fingerprint: d8:eb:6b:41:51:92:59:e0:f3:e7:85:00:c0:3d:b6:88:97:c9:ee:fc +# SHA256 Fingerprint: 3c:4f:b0:b9:5a:b8:b3:00:32:f4:32:b8:6f:53:5f:e1:72:c1:85:d0:fd:39:86:58:37:cf:36:18:7f:a6:f4:28 +-----BEGIN CERTIFICATE----- +MIIFdDCCA1ygAwIBAgIEAJiiOTANBgkqhkiG9w0BAQsFADBaMQswCQYDVQQGEwJO +TDEeMBwGA1UECgwVU3RhYXQgZGVyIE5lZGVybGFuZGVuMSswKQYDVQQDDCJTdGFh +dCBkZXIgTmVkZXJsYW5kZW4gUm9vdCBDQSAtIEczMB4XDTEzMTExNDExMjg0MloX +DTI4MTExMzIzMDAwMFowWjELMAkGA1UEBhMCTkwxHjAcBgNVBAoMFVN0YWF0IGRl +ciBOZWRlcmxhbmRlbjErMCkGA1UEAwwiU3RhYXQgZGVyIE5lZGVybGFuZGVuIFJv +b3QgQ0EgLSBHMzCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAL4yolQP +cPssXFnrbMSkUeiFKrPMSjTysF/zDsccPVMeiAho2G89rcKezIJnByeHaHE6n3WW +IkYFsO2tx1ueKt6c/DrGlaf1F2cY5y9JCAxcz+bMNO14+1Cx3Gsy8KL+tjzk7FqX +xz8ecAgwoNzFs21v0IJyEavSgWhZghe3eJJg+szeP4TrjTgzkApyI/o1zCZxMdFy +KJLZWyNtZrVtB0LrpjPOktvA9mxjeM3KTj215VKb8b475lRgsGYeCasH/lSJEULR +9yS6YHgamPfJEf0WwTUaVHXvQ9Plrk7O53vDxk5hUUurmkVLoR9BvUhTFXFkC4az +5S6+zqQbwSmEorXLCCN2QyIkHxcE1G6cxvx/K2Ya7Irl1s9N9WMJtxU51nus6+N8 +6U78dULI7ViVDAZCopz35HCz33JvWjdAidiFpNfxC95DGdRKWCyMijmev4SH8RY7 +Ngzp07TKbBlBUgmhHbBqv4LvcFEhMtwFdozL92TkA1CvjJFnq8Xy7ljY3r735zHP +bMk7ccHViLVlvMDoFxcHErVc0qsgk7TmgoNwNsXNo42ti+yjwUOH5kPiNL6VizXt +BznaqB16nzaeErAMZRKQFWDZJkBE41ZgpRDUajz9QdwOWke275dhdU/Z/seyHdTt +XUmzqWrLZoQT1Vyg3N9udwbRcXXIV2+vD3dbAgMBAAGjQjBAMA8GA1UdEwEB/wQF +MAMBAf8wDgYDVR0PAQH/BAQDAgEGMB0GA1UdDgQWBBRUrfrHkleuyjWcLhL75Lpd +INyUVzANBgkqhkiG9w0BAQsFAAOCAgEAMJmdBTLIXg47mAE6iqTnB/d6+Oea31BD +U5cqPco8R5gu4RV78ZLzYdqQJRZlwJ9UXQ4DO1t3ApyEtg2YXzTdO2PCwyiBwpwp +LiniyMMB8jPqKqrMCQj3ZWfGzd/TtiunvczRDnBfuCPRy5FOCvTIeuXZYzbB1N/8 +Ipf3YF3qKS9Ysr1YvY2WTxB1v0h7PVGHoTx0IsL8B3+A3MSs/mrBcDCw6Y5p4ixp +gZQJut3+TcCDjJRYwEYgr5wfAvg1VUkvRtTA8KCWAg8zxXHzniN9lLf9OtMJgwYh +/WA9rjLA0u6NpvDntIJ8CsxwyXmA+P5M9zWEGYox+wrZ13+b8KKaa8MFSu1BYBQw +0aoRQm7TIwIEC8Zl3d1Sd9qBa7Ko+gE4uZbqKmxnl4mUnrzhVNXkanjvSr0rmj1A +fsbAddJu+2gw7OyLnflJNZoaLNmzlTnVHpL3prllL+U9bTpITAjc5CgSKL59NVzq +4BZ+Extq1z7XnvwtdbLBFNUjA9tbbws+eC8N3jONFrdI54OagQ97wUNNVQQXOEpR +1VmiiXTTn74eS9fGbbeIJG9gkaSChVtWQbzQRKtqE77RLFi3EjNYsjdj3BP1lB0/ +QFH1T/U67cjF68IeHRaVesd+QnGTbksVtzDfqu1XhUisHWrdOWnk4Xl4vs4Fv6EM +94B7IWcnMFk= +-----END CERTIFICATE----- + +# Issuer: CN=Staat der Nederlanden EV Root CA O=Staat der Nederlanden +# Subject: CN=Staat der Nederlanden EV Root CA O=Staat der Nederlanden +# Label: "Staat der Nederlanden EV Root CA" +# Serial: 10000013 +# MD5 Fingerprint: fc:06:af:7b:e8:1a:f1:9a:b4:e8:d2:70:1f:c0:f5:ba +# SHA1 Fingerprint: 76:e2:7e:c1:4f:db:82:c1:c0:a6:75:b5:05:be:3d:29:b4:ed:db:bb +# SHA256 Fingerprint: 4d:24:91:41:4c:fe:95:67:46:ec:4c:ef:a6:cf:6f:72:e2:8a:13:29:43:2f:9d:8a:90:7a:c4:cb:5d:ad:c1:5a +-----BEGIN CERTIFICATE----- +MIIFcDCCA1igAwIBAgIEAJiWjTANBgkqhkiG9w0BAQsFADBYMQswCQYDVQQGEwJO +TDEeMBwGA1UECgwVU3RhYXQgZGVyIE5lZGVybGFuZGVuMSkwJwYDVQQDDCBTdGFh +dCBkZXIgTmVkZXJsYW5kZW4gRVYgUm9vdCBDQTAeFw0xMDEyMDgxMTE5MjlaFw0y +MjEyMDgxMTEwMjhaMFgxCzAJBgNVBAYTAk5MMR4wHAYDVQQKDBVTdGFhdCBkZXIg +TmVkZXJsYW5kZW4xKTAnBgNVBAMMIFN0YWF0IGRlciBOZWRlcmxhbmRlbiBFViBS +b290IENBMIICIjANBgkqhkiG9w0BAQEFAAOCAg8AMIICCgKCAgEA48d+ifkkSzrS +M4M1LGns3Amk41GoJSt5uAg94JG6hIXGhaTK5skuU6TJJB79VWZxXSzFYGgEt9nC +UiY4iKTWO0Cmws0/zZiTs1QUWJZV1VD+hq2kY39ch/aO5ieSZxeSAgMs3NZmdO3d +Z//BYY1jTw+bbRcwJu+r0h8QoPnFfxZpgQNH7R5ojXKhTbImxrpsX23Wr9GxE46p +rfNeaXUmGD5BKyF/7otdBwadQ8QpCiv8Kj6GyzyDOvnJDdrFmeK8eEEzduG/L13l +pJhQDBXd4Pqcfzho0LKmeqfRMb1+ilgnQ7O6M5HTp5gVXJrm0w912fxBmJc+qiXb +j5IusHsMX/FjqTf5m3VpTCgmJdrV8hJwRVXj33NeN/UhbJCONVrJ0yPr08C+eKxC +KFhmpUZtcALXEPlLVPxdhkqHz3/KRawRWrUgUY0viEeXOcDPusBCAUCZSCELa6fS +/ZbV0b5GnUngC6agIk440ME8MLxwjyx1zNDFjFE7PZQIZCZhfbnDZY8UnCHQqv0X +cgOPvZuM5l5Tnrmd74K74bzickFbIZTTRTeU0d8JOV3nI6qaHcptqAqGhYqCvkIH +1vI4gnPah1vlPNOePqc7nvQDs/nxfRN0Av+7oeX6AHkcpmZBiFxgV6YuCcS6/ZrP +px9Aw7vMWgpVSzs4dlG4Y4uElBbmVvMCAwEAAaNCMEAwDwYDVR0TAQH/BAUwAwEB +/zAOBgNVHQ8BAf8EBAMCAQYwHQYDVR0OBBYEFP6rAJCYniT8qcwaivsnuL8wbqg7 +MA0GCSqGSIb3DQEBCwUAA4ICAQDPdyxuVr5Os7aEAJSrR8kN0nbHhp8dB9O2tLsI +eK9p0gtJ3jPFrK3CiAJ9Brc1AsFgyb/E6JTe1NOpEyVa/m6irn0F3H3zbPB+po3u +2dfOWBfoqSmuc0iH55vKbimhZF8ZE/euBhD/UcabTVUlT5OZEAFTdfETzsemQUHS +v4ilf0X8rLiltTMMgsT7B/Zq5SWEXwbKwYY5EdtYzXc7LMJMD16a4/CrPmEbUCTC +wPTxGfARKbalGAKb12NMcIxHowNDXLldRqANb/9Zjr7dn3LDWyvfjFvO5QxGbJKy +CqNMVEIYFRIYvdr8unRu/8G2oGTYqV9Vrp9canaW2HNnh/tNf1zuacpzEPuKqf2e +vTY4SUmH9A4U8OmHuD+nT3pajnnUk+S7aFKErGzp85hwVXIy+TSrK0m1zSBi5Dp6 +Z2Orltxtrpfs/J92VoguZs9btsmksNcFuuEnL5O7Jiqik7Ab846+HUCjuTaPPoIa +Gl6I6lD4WeKDRikL40Rc4ZW2aZCaFG+XroHPaO+Zmr615+F/+PoTRxZMzG0IQOeL +eG9QgkRQP2YGiqtDhFZKDyAthg710tvSeopLzaXoTvFeJiUBWSOgftL2fiFX1ye8 +FVdMpEbB4IMeDExNH08GGeL5qPQ6gqGyeUN51q1veieQA6TqJIc/2b3Z6fJfUEkc +7uzXLg== +-----END CERTIFICATE----- + +# Issuer: CN=IdenTrust Commercial Root CA 1 O=IdenTrust +# Subject: CN=IdenTrust Commercial Root CA 1 O=IdenTrust +# Label: "IdenTrust Commercial Root CA 1" +# Serial: 13298821034946342390520003877796839426 +# MD5 Fingerprint: b3:3e:77:73:75:ee:a0:d3:e3:7e:49:63:49:59:bb:c7 +# SHA1 Fingerprint: df:71:7e:aa:4a:d9:4e:c9:55:84:99:60:2d:48:de:5f:bc:f0:3a:25 +# SHA256 Fingerprint: 5d:56:49:9b:e4:d2:e0:8b:cf:ca:d0:8a:3e:38:72:3d:50:50:3b:de:70:69:48:e4:2f:55:60:30:19:e5:28:ae +-----BEGIN CERTIFICATE----- +MIIFYDCCA0igAwIBAgIQCgFCgAAAAUUjyES1AAAAAjANBgkqhkiG9w0BAQsFADBK +MQswCQYDVQQGEwJVUzESMBAGA1UEChMJSWRlblRydXN0MScwJQYDVQQDEx5JZGVu +VHJ1c3QgQ29tbWVyY2lhbCBSb290IENBIDEwHhcNMTQwMTE2MTgxMjIzWhcNMzQw +MTE2MTgxMjIzWjBKMQswCQYDVQQGEwJVUzESMBAGA1UEChMJSWRlblRydXN0MScw +JQYDVQQDEx5JZGVuVHJ1c3QgQ29tbWVyY2lhbCBSb290IENBIDEwggIiMA0GCSqG +SIb3DQEBAQUAA4ICDwAwggIKAoICAQCnUBneP5k91DNG8W9RYYKyqU+PZ4ldhNlT +3Qwo2dfw/66VQ3KZ+bVdfIrBQuExUHTRgQ18zZshq0PirK1ehm7zCYofWjK9ouuU ++ehcCuz/mNKvcbO0U59Oh++SvL3sTzIwiEsXXlfEU8L2ApeN2WIrvyQfYo3fw7gp +S0l4PJNgiCL8mdo2yMKi1CxUAGc1bnO/AljwpN3lsKImesrgNqUZFvX9t++uP0D1 +bVoE/c40yiTcdCMbXTMTEl3EASX2MN0CXZ/g1Ue9tOsbobtJSdifWwLziuQkkORi +T0/Br4sOdBeo0XKIanoBScy0RnnGF7HamB4HWfp1IYVl3ZBWzvurpWCdxJ35UrCL +vYf5jysjCiN2O/cz4ckA82n5S6LgTrx+kzmEB/dEcH7+B1rlsazRGMzyNeVJSQjK +Vsk9+w8YfYs7wRPCTY/JTw436R+hDmrfYi7LNQZReSzIJTj0+kuniVyc0uMNOYZK +dHzVWYfCP04MXFL0PfdSgvHqo6z9STQaKPNBiDoT7uje/5kdX7rL6B7yuVBgwDHT +c+XvvqDtMwt0viAgxGds8AgDelWAf0ZOlqf0Hj7h9tgJ4TNkK2PXMl6f+cB7D3hv +l7yTmvmcEpB4eoCHFddydJxVdHixuuFucAS6T6C6aMN7/zHwcz09lCqxC0EOoP5N +iGVreTO01wIDAQABo0IwQDAOBgNVHQ8BAf8EBAMCAQYwDwYDVR0TAQH/BAUwAwEB +/zAdBgNVHQ4EFgQU7UQZwNPwBovupHu+QucmVMiONnYwDQYJKoZIhvcNAQELBQAD +ggIBAA2ukDL2pkt8RHYZYR4nKM1eVO8lvOMIkPkp165oCOGUAFjvLi5+U1KMtlwH +6oi6mYtQlNeCgN9hCQCTrQ0U5s7B8jeUeLBfnLOic7iPBZM4zY0+sLj7wM+x8uwt +LRvM7Kqas6pgghstO8OEPVeKlh6cdbjTMM1gCIOQ045U8U1mwF10A0Cj7oV+wh93 +nAbowacYXVKV7cndJZ5t+qntozo00Fl72u1Q8zW/7esUTTHHYPTa8Yec4kjixsU3 ++wYQ+nVZZjFHKdp2mhzpgq7vmrlR94gjmmmVYjzlVYA211QC//G5Xc7UI2/YRYRK +W2XviQzdFKcgyxilJbQN+QHwotL0AMh0jqEqSI5l2xPE4iUXfeu+h1sXIFRRk0pT +AwvsXcoz7WL9RccvW9xYoIA55vrX/hMUpu09lEpCdNTDd1lzzY9GvlU47/rokTLq +l1gEIt44w8y8bckzOmoKaT+gyOpyj4xjhiO9bTyWnpXgSUyqorkqG5w2gXjtw+hG +4iZZRHUe2XWJUc0QhJ1hYMtd+ZciTY6Y5uN/9lu7rs3KSoFrXgvzUeF0K+l+J6fZ +mUlO+KWA2yUPHGNiiskzZ2s8EIPGrd6ozRaOjfAHN3Gf8qv8QfXBi+wAN10J5U6A +7/qxXDgGpRtK4dw4LTzcqx+QGtVKnO7RcGzM7vRX+Bi6hG6H +-----END CERTIFICATE----- + +# Issuer: CN=IdenTrust Public Sector Root CA 1 O=IdenTrust +# Subject: CN=IdenTrust Public Sector Root CA 1 O=IdenTrust +# Label: "IdenTrust Public Sector Root CA 1" +# Serial: 13298821034946342390521976156843933698 +# MD5 Fingerprint: 37:06:a5:b0:fc:89:9d:ba:f4:6b:8c:1a:64:cd:d5:ba +# SHA1 Fingerprint: ba:29:41:60:77:98:3f:f4:f3:ef:f2:31:05:3b:2e:ea:6d:4d:45:fd +# SHA256 Fingerprint: 30:d0:89:5a:9a:44:8a:26:20:91:63:55:22:d1:f5:20:10:b5:86:7a:ca:e1:2c:78:ef:95:8f:d4:f4:38:9f:2f +-----BEGIN CERTIFICATE----- +MIIFZjCCA06gAwIBAgIQCgFCgAAAAUUjz0Z8AAAAAjANBgkqhkiG9w0BAQsFADBN +MQswCQYDVQQGEwJVUzESMBAGA1UEChMJSWRlblRydXN0MSowKAYDVQQDEyFJZGVu +VHJ1c3QgUHVibGljIFNlY3RvciBSb290IENBIDEwHhcNMTQwMTE2MTc1MzMyWhcN +MzQwMTE2MTc1MzMyWjBNMQswCQYDVQQGEwJVUzESMBAGA1UEChMJSWRlblRydXN0 +MSowKAYDVQQDEyFJZGVuVHJ1c3QgUHVibGljIFNlY3RvciBSb290IENBIDEwggIi +MA0GCSqGSIb3DQEBAQUAA4ICDwAwggIKAoICAQC2IpT8pEiv6EdrCvsnduTyP4o7 +ekosMSqMjbCpwzFrqHd2hCa2rIFCDQjrVVi7evi8ZX3yoG2LqEfpYnYeEe4IFNGy +RBb06tD6Hi9e28tzQa68ALBKK0CyrOE7S8ItneShm+waOh7wCLPQ5CQ1B5+ctMlS +bdsHyo+1W/CD80/HLaXIrcuVIKQxKFdYWuSNG5qrng0M8gozOSI5Cpcu81N3uURF +/YTLNiCBWS2ab21ISGHKTN9T0a9SvESfqy9rg3LvdYDaBjMbXcjaY8ZNzaxmMc3R +3j6HEDbhuaR672BQssvKplbgN6+rNBM5Jeg5ZuSYeqoSmJxZZoY+rfGwyj4GD3vw +EUs3oERte8uojHH01bWRNszwFcYr3lEXsZdMUD2xlVl8BX0tIdUAvwFnol57plzy +9yLxkA2T26pEUWbMfXYD62qoKjgZl3YNa4ph+bz27nb9cCvdKTz4Ch5bQhyLVi9V +GxyhLrXHFub4qjySjmm2AcG1hp2JDws4lFTo6tyePSW8Uybt1as5qsVATFSrsrTZ +2fjXctscvG29ZV/viDUqZi/u9rNl8DONfJhBaUYPQxxp+pu10GFqzcpL2UyQRqsV +WaFHVCkugyhfHMKiq3IXAAaOReyL4jM9f9oZRORicsPfIsbyVtTdX5Vy7W1f90gD +W/3FKqD2cyOEEBsB5wIDAQABo0IwQDAOBgNVHQ8BAf8EBAMCAQYwDwYDVR0TAQH/ +BAUwAwEB/zAdBgNVHQ4EFgQU43HgntinQtnbcZFrlJPrw6PRFKMwDQYJKoZIhvcN +AQELBQADggIBAEf63QqwEZE4rU1d9+UOl1QZgkiHVIyqZJnYWv6IAcVYpZmxI1Qj +t2odIFflAWJBF9MJ23XLblSQdf4an4EKwt3X9wnQW3IV5B4Jaj0z8yGa5hV+rVHV +DRDtfULAj+7AmgjVQdZcDiFpboBhDhXAuM/FSRJSzL46zNQuOAXeNf0fb7iAaJg9 +TaDKQGXSc3z1i9kKlT/YPyNtGtEqJBnZhbMX73huqVjRI9PHE+1yJX9dsXNw0H8G +lwmEKYBhHfpe/3OsoOOJuBxxFcbeMX8S3OFtm6/n6J91eEyrRjuazr8FGF1NFTwW +mhlQBJqymm9li1JfPFgEKCXAZmExfrngdbkaqIHWchezxQMxNRF4eKLg6TCMf4Df +WN88uieW4oA0beOY02QnrEh+KHdcxiVhJfiFDGX6xDIvpZgF5PgLZxYWxoK4Mhn5 ++bl53B/N66+rDt0b20XkeucC4pVd/GnwU2lhlXV5C15V5jgclKlZM57IcXR5f1GJ +tshquDDIajjDbp7hNxbqBWJMWxJH7ae0s1hWx0nzfxJoCTFx8G34Tkf71oXuxVhA +GaQdp/lLQzfcaFpPz+vCZHTetBXZ9FRUGi8c15dxVJCO2SCdUyt/q4/i6jC8UDfv +8Ue1fXwsBOxonbRJRBD0ckscZOf85muQ3Wl9af0AVqW3rLatt8o+Ae+c +-----END CERTIFICATE----- + +# Issuer: CN=Entrust Root Certification Authority - G2 O=Entrust, Inc. OU=See www.entrust.net/legal-terms/(c) 2009 Entrust, Inc. - for authorized use only +# Subject: CN=Entrust Root Certification Authority - G2 O=Entrust, Inc. OU=See www.entrust.net/legal-terms/(c) 2009 Entrust, Inc. - for authorized use only +# Label: "Entrust Root Certification Authority - G2" +# Serial: 1246989352 +# MD5 Fingerprint: 4b:e2:c9:91:96:65:0c:f4:0e:5a:93:92:a0:0a:fe:b2 +# SHA1 Fingerprint: 8c:f4:27:fd:79:0c:3a:d1:66:06:8d:e8:1e:57:ef:bb:93:22:72:d4 +# SHA256 Fingerprint: 43:df:57:74:b0:3e:7f:ef:5f:e4:0d:93:1a:7b:ed:f1:bb:2e:6b:42:73:8c:4e:6d:38:41:10:3d:3a:a7:f3:39 +-----BEGIN CERTIFICATE----- +MIIEPjCCAyagAwIBAgIESlOMKDANBgkqhkiG9w0BAQsFADCBvjELMAkGA1UEBhMC +VVMxFjAUBgNVBAoTDUVudHJ1c3QsIEluYy4xKDAmBgNVBAsTH1NlZSB3d3cuZW50 +cnVzdC5uZXQvbGVnYWwtdGVybXMxOTA3BgNVBAsTMChjKSAyMDA5IEVudHJ1c3Qs +IEluYy4gLSBmb3IgYXV0aG9yaXplZCB1c2Ugb25seTEyMDAGA1UEAxMpRW50cnVz +dCBSb290IENlcnRpZmljYXRpb24gQXV0aG9yaXR5IC0gRzIwHhcNMDkwNzA3MTcy +NTU0WhcNMzAxMjA3MTc1NTU0WjCBvjELMAkGA1UEBhMCVVMxFjAUBgNVBAoTDUVu +dHJ1c3QsIEluYy4xKDAmBgNVBAsTH1NlZSB3d3cuZW50cnVzdC5uZXQvbGVnYWwt +dGVybXMxOTA3BgNVBAsTMChjKSAyMDA5IEVudHJ1c3QsIEluYy4gLSBmb3IgYXV0 +aG9yaXplZCB1c2Ugb25seTEyMDAGA1UEAxMpRW50cnVzdCBSb290IENlcnRpZmlj +YXRpb24gQXV0aG9yaXR5IC0gRzIwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEK +AoIBAQC6hLZy254Ma+KZ6TABp3bqMriVQRrJ2mFOWHLP/vaCeb9zYQYKpSfYs1/T +RU4cctZOMvJyig/3gxnQaoCAAEUesMfnmr8SVycco2gvCoe9amsOXmXzHHfV1IWN +cCG0szLni6LVhjkCsbjSR87kyUnEO6fe+1R9V77w6G7CebI6C1XiUJgWMhNcL3hW +wcKUs/Ja5CeanyTXxuzQmyWC48zCxEXFjJd6BmsqEZ+pCm5IO2/b1BEZQvePB7/1 +U1+cPvQXLOZprE4yTGJ36rfo5bs0vBmLrpxR57d+tVOxMyLlbc9wPBr64ptntoP0 +jaWvYkxN4FisZDQSA/i2jZRjJKRxAgMBAAGjQjBAMA4GA1UdDwEB/wQEAwIBBjAP +BgNVHRMBAf8EBTADAQH/MB0GA1UdDgQWBBRqciZ60B7vfec7aVHUbI2fkBJmqzAN +BgkqhkiG9w0BAQsFAAOCAQEAeZ8dlsa2eT8ijYfThwMEYGprmi5ZiXMRrEPR9RP/ +jTkrwPK9T3CMqS/qF8QLVJ7UG5aYMzyorWKiAHarWWluBh1+xLlEjZivEtRh2woZ +Rkfz6/djwUAFQKXSt/S1mja/qYh2iARVBCuch38aNzx+LaUa2NSJXsq9rD1s2G2v +1fN2D807iDginWyTmsQ9v4IbZT+mD12q/OWyFcq1rca8PdCE6OoGcrBNOTJ4vz4R +nAuknZoh8/CbCzB428Hch0P+vGOaysXCHMnHjf87ElgI5rY97HosTvuDls4MPGmH +VHOkc8KT/1EQrBVUAdj8BbGJoX90g5pJ19xOe4pIb4tF9g== +-----END CERTIFICATE----- + +# Issuer: CN=Entrust Root Certification Authority - EC1 O=Entrust, Inc. OU=See www.entrust.net/legal-terms/(c) 2012 Entrust, Inc. - for authorized use only +# Subject: CN=Entrust Root Certification Authority - EC1 O=Entrust, Inc. OU=See www.entrust.net/legal-terms/(c) 2012 Entrust, Inc. - for authorized use only +# Label: "Entrust Root Certification Authority - EC1" +# Serial: 51543124481930649114116133369 +# MD5 Fingerprint: b6:7e:1d:f0:58:c5:49:6c:24:3b:3d:ed:98:18:ed:bc +# SHA1 Fingerprint: 20:d8:06:40:df:9b:25:f5:12:25:3a:11:ea:f7:59:8a:eb:14:b5:47 +# SHA256 Fingerprint: 02:ed:0e:b2:8c:14:da:45:16:5c:56:67:91:70:0d:64:51:d7:fb:56:f0:b2:ab:1d:3b:8e:b0:70:e5:6e:df:f5 +-----BEGIN CERTIFICATE----- +MIIC+TCCAoCgAwIBAgINAKaLeSkAAAAAUNCR+TAKBggqhkjOPQQDAzCBvzELMAkG +A1UEBhMCVVMxFjAUBgNVBAoTDUVudHJ1c3QsIEluYy4xKDAmBgNVBAsTH1NlZSB3 +d3cuZW50cnVzdC5uZXQvbGVnYWwtdGVybXMxOTA3BgNVBAsTMChjKSAyMDEyIEVu +dHJ1c3QsIEluYy4gLSBmb3IgYXV0aG9yaXplZCB1c2Ugb25seTEzMDEGA1UEAxMq +RW50cnVzdCBSb290IENlcnRpZmljYXRpb24gQXV0aG9yaXR5IC0gRUMxMB4XDTEy +MTIxODE1MjUzNloXDTM3MTIxODE1NTUzNlowgb8xCzAJBgNVBAYTAlVTMRYwFAYD +VQQKEw1FbnRydXN0LCBJbmMuMSgwJgYDVQQLEx9TZWUgd3d3LmVudHJ1c3QubmV0 +L2xlZ2FsLXRlcm1zMTkwNwYDVQQLEzAoYykgMjAxMiBFbnRydXN0LCBJbmMuIC0g +Zm9yIGF1dGhvcml6ZWQgdXNlIG9ubHkxMzAxBgNVBAMTKkVudHJ1c3QgUm9vdCBD +ZXJ0aWZpY2F0aW9uIEF1dGhvcml0eSAtIEVDMTB2MBAGByqGSM49AgEGBSuBBAAi +A2IABIQTydC6bUF74mzQ61VfZgIaJPRbiWlH47jCffHyAsWfoPZb1YsGGYZPUxBt +ByQnoaD41UcZYUx9ypMn6nQM72+WCf5j7HBdNq1nd67JnXxVRDqiY1Ef9eNi1KlH +Bz7MIKNCMEAwDgYDVR0PAQH/BAQDAgEGMA8GA1UdEwEB/wQFMAMBAf8wHQYDVR0O +BBYEFLdj5xrdjekIplWDpOBqUEFlEUJJMAoGCCqGSM49BAMDA2cAMGQCMGF52OVC +R98crlOZF7ZvHH3hvxGU0QOIdeSNiaSKd0bebWHvAvX7td/M/k7//qnmpwIwW5nX +hTcGtXsI/esni0qU+eH6p44mCOh8kmhtc9hvJqwhAriZtyZBWyVgrtBIGu4G +-----END CERTIFICATE----- + +# Issuer: CN=CFCA EV ROOT O=China Financial Certification Authority +# Subject: CN=CFCA EV ROOT O=China Financial Certification Authority +# Label: "CFCA EV ROOT" +# Serial: 407555286 +# MD5 Fingerprint: 74:e1:b6:ed:26:7a:7a:44:30:33:94:ab:7b:27:81:30 +# SHA1 Fingerprint: e2:b8:29:4b:55:84:ab:6b:58:c2:90:46:6c:ac:3f:b8:39:8f:84:83 +# SHA256 Fingerprint: 5c:c3:d7:8e:4e:1d:5e:45:54:7a:04:e6:87:3e:64:f9:0c:f9:53:6d:1c:cc:2e:f8:00:f3:55:c4:c5:fd:70:fd +-----BEGIN CERTIFICATE----- +MIIFjTCCA3WgAwIBAgIEGErM1jANBgkqhkiG9w0BAQsFADBWMQswCQYDVQQGEwJD +TjEwMC4GA1UECgwnQ2hpbmEgRmluYW5jaWFsIENlcnRpZmljYXRpb24gQXV0aG9y +aXR5MRUwEwYDVQQDDAxDRkNBIEVWIFJPT1QwHhcNMTIwODA4MDMwNzAxWhcNMjkx +MjMxMDMwNzAxWjBWMQswCQYDVQQGEwJDTjEwMC4GA1UECgwnQ2hpbmEgRmluYW5j +aWFsIENlcnRpZmljYXRpb24gQXV0aG9yaXR5MRUwEwYDVQQDDAxDRkNBIEVWIFJP +T1QwggIiMA0GCSqGSIb3DQEBAQUAA4ICDwAwggIKAoICAQDXXWvNED8fBVnVBU03 +sQ7smCuOFR36k0sXgiFxEFLXUWRwFsJVaU2OFW2fvwwbwuCjZ9YMrM8irq93VCpL +TIpTUnrD7i7es3ElweldPe6hL6P3KjzJIx1qqx2hp/Hz7KDVRM8Vz3IvHWOX6Jn5 +/ZOkVIBMUtRSqy5J35DNuF++P96hyk0g1CXohClTt7GIH//62pCfCqktQT+x8Rgp +7hZZLDRJGqgG16iI0gNyejLi6mhNbiyWZXvKWfry4t3uMCz7zEasxGPrb382KzRz +EpR/38wmnvFyXVBlWY9ps4deMm/DGIq1lY+wejfeWkU7xzbh72fROdOXW3NiGUgt +hxwG+3SYIElz8AXSG7Ggo7cbcNOIabla1jj0Ytwli3i/+Oh+uFzJlU9fpy25IGvP +a931DfSCt/SyZi4QKPaXWnuWFo8BGS1sbn85WAZkgwGDg8NNkt0yxoekN+kWzqot +aK8KgWU6cMGbrU1tVMoqLUuFG7OA5nBFDWteNfB/O7ic5ARwiRIlk9oKmSJgamNg +TnYGmE69g60dWIolhdLHZR4tjsbftsbhf4oEIRUpdPA+nJCdDC7xij5aqgwJHsfV +PKPtl8MeNPo4+QgO48BdK4PRVmrJtqhUUy54Mmc9gn900PvhtgVguXDbjgv5E1hv +cWAQUhC5wUEJ73IfZzF4/5YFjQIDAQABo2MwYTAfBgNVHSMEGDAWgBTj/i39KNAL +tbq2osS/BqoFjJP7LzAPBgNVHRMBAf8EBTADAQH/MA4GA1UdDwEB/wQEAwIBBjAd +BgNVHQ4EFgQU4/4t/SjQC7W6tqLEvwaqBYyT+y8wDQYJKoZIhvcNAQELBQADggIB +ACXGumvrh8vegjmWPfBEp2uEcwPenStPuiB/vHiyz5ewG5zz13ku9Ui20vsXiObT +ej/tUxPQ4i9qecsAIyjmHjdXNYmEwnZPNDatZ8POQQaIxffu2Bq41gt/UP+TqhdL +jOztUmCypAbqTuv0axn96/Ua4CUqmtzHQTb3yHQFhDmVOdYLO6Qn+gjYXB74BGBS +ESgoA//vU2YApUo0FmZ8/Qmkrp5nGm9BC2sGE5uPhnEFtC+NiWYzKXZUmhH4J/qy +P5Hgzg0b8zAarb8iXRvTvyUFTeGSGn+ZnzxEk8rUQElsgIfXBDrDMlI1Dlb4pd19 +xIsNER9Tyx6yF7Zod1rg1MvIB671Oi6ON7fQAUtDKXeMOZePglr4UeWJoBjnaH9d +Ci77o0cOPaYjesYBx4/IXr9tgFa+iiS6M+qf4TIRnvHST4D2G0CvOJ4RUHlzEhLN +5mydLIhyPDCBBpEi6lmt2hkuIsKNuYyH4Ga8cyNfIWRjgEj1oDwYPZTISEEdQLpe +/v5WOaHIz16eGWRGENoXkbcFgKyLmZJ956LYBws2J+dIeWCKw9cTXPhyQN9Ky8+Z +AAoACxGV2lZFA4gKn2fQ1XmxqI1AbQ3CekD6819kR5LLU7m7Wc5P/dAVUwHY3+vZ +5nbv0CO7O6l5s9UCKc2Jo5YPSjXnTkLAdc0Hz+Ys63su +-----END CERTIFICATE----- + +# Issuer: CN=OISTE WISeKey Global Root GB CA O=WISeKey OU=OISTE Foundation Endorsed +# Subject: CN=OISTE WISeKey Global Root GB CA O=WISeKey OU=OISTE Foundation Endorsed +# Label: "OISTE WISeKey Global Root GB CA" +# Serial: 157768595616588414422159278966750757568 +# MD5 Fingerprint: a4:eb:b9:61:28:2e:b7:2f:98:b0:35:26:90:99:51:1d +# SHA1 Fingerprint: 0f:f9:40:76:18:d3:d7:6a:4b:98:f0:a8:35:9e:0c:fd:27:ac:cc:ed +# SHA256 Fingerprint: 6b:9c:08:e8:6e:b0:f7:67:cf:ad:65:cd:98:b6:21:49:e5:49:4a:67:f5:84:5e:7b:d1:ed:01:9f:27:b8:6b:d6 +-----BEGIN CERTIFICATE----- +MIIDtTCCAp2gAwIBAgIQdrEgUnTwhYdGs/gjGvbCwDANBgkqhkiG9w0BAQsFADBt +MQswCQYDVQQGEwJDSDEQMA4GA1UEChMHV0lTZUtleTEiMCAGA1UECxMZT0lTVEUg +Rm91bmRhdGlvbiBFbmRvcnNlZDEoMCYGA1UEAxMfT0lTVEUgV0lTZUtleSBHbG9i +YWwgUm9vdCBHQiBDQTAeFw0xNDEyMDExNTAwMzJaFw0zOTEyMDExNTEwMzFaMG0x +CzAJBgNVBAYTAkNIMRAwDgYDVQQKEwdXSVNlS2V5MSIwIAYDVQQLExlPSVNURSBG +b3VuZGF0aW9uIEVuZG9yc2VkMSgwJgYDVQQDEx9PSVNURSBXSVNlS2V5IEdsb2Jh +bCBSb290IEdCIENBMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEA2Be3 +HEokKtaXscriHvt9OO+Y9bI5mE4nuBFde9IllIiCFSZqGzG7qFshISvYD06fWvGx +WuR51jIjK+FTzJlFXHtPrby/h0oLS5daqPZI7H17Dc0hBt+eFf1Biki3IPShehtX +1F1Q/7pn2COZH8g/497/b1t3sWtuuMlk9+HKQUYOKXHQuSP8yYFfTvdv37+ErXNk +u7dCjmn21HYdfp2nuFeKUWdy19SouJVUQHMD9ur06/4oQnc/nSMbsrY9gBQHTC5P +99UKFg29ZkM3fiNDecNAhvVMKdqOmq0NpQSHiB6F4+lT1ZvIiwNjeOvgGUpuuy9r +M2RYk61pv48b74JIxwIDAQABo1EwTzALBgNVHQ8EBAMCAYYwDwYDVR0TAQH/BAUw +AwEB/zAdBgNVHQ4EFgQUNQ/INmNe4qPs+TtmFc5RUuORmj0wEAYJKwYBBAGCNxUB +BAMCAQAwDQYJKoZIhvcNAQELBQADggEBAEBM+4eymYGQfp3FsLAmzYh7KzKNbrgh +cViXfa43FK8+5/ea4n32cZiZBKpDdHij40lhPnOMTZTg+XHEthYOU3gf1qKHLwI5 +gSk8rxWYITD+KJAAjNHhy/peyP34EEY7onhCkRd0VQreUGdNZtGn//3ZwLWoo4rO +ZvUPQ82nK1d7Y0Zqqi5S2PTt4W2tKZB4SLrhI6qjiey1q5bAtEuiHZeeevJuQHHf +aPFlTc58Bd9TZaml8LGXBHAVRgOY1NK/VLSgWH1Sb9pWJmLU2NuJMW8c8CLC02Ic +Nc1MaRVUGpCY3useX8p3x8uOPUNpnJpY0CQ73xtAln41rYHHTnG6iBM= +-----END CERTIFICATE----- + +# Issuer: CN=SZAFIR ROOT CA2 O=Krajowa Izba Rozliczeniowa S.A. +# Subject: CN=SZAFIR ROOT CA2 O=Krajowa Izba Rozliczeniowa S.A. +# Label: "SZAFIR ROOT CA2" +# Serial: 357043034767186914217277344587386743377558296292 +# MD5 Fingerprint: 11:64:c1:89:b0:24:b1:8c:b1:07:7e:89:9e:51:9e:99 +# SHA1 Fingerprint: e2:52:fa:95:3f:ed:db:24:60:bd:6e:28:f3:9c:cc:cf:5e:b3:3f:de +# SHA256 Fingerprint: a1:33:9d:33:28:1a:0b:56:e5:57:d3:d3:2b:1c:e7:f9:36:7e:b0:94:bd:5f:a7:2a:7e:50:04:c8:de:d7:ca:fe +-----BEGIN CERTIFICATE----- +MIIDcjCCAlqgAwIBAgIUPopdB+xV0jLVt+O2XwHrLdzk1uQwDQYJKoZIhvcNAQEL +BQAwUTELMAkGA1UEBhMCUEwxKDAmBgNVBAoMH0tyYWpvd2EgSXpiYSBSb3psaWN6 +ZW5pb3dhIFMuQS4xGDAWBgNVBAMMD1NaQUZJUiBST09UIENBMjAeFw0xNTEwMTkw +NzQzMzBaFw0zNTEwMTkwNzQzMzBaMFExCzAJBgNVBAYTAlBMMSgwJgYDVQQKDB9L +cmFqb3dhIEl6YmEgUm96bGljemVuaW93YSBTLkEuMRgwFgYDVQQDDA9TWkFGSVIg +Uk9PVCBDQTIwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQC3vD5QqEvN +QLXOYeeWyrSh2gwisPq1e3YAd4wLz32ohswmUeQgPYUM1ljj5/QqGJ3a0a4m7utT +3PSQ1hNKDJA8w/Ta0o4NkjrcsbH/ON7Dui1fgLkCvUqdGw+0w8LBZwPd3BucPbOw +3gAeqDRHu5rr/gsUvTaE2g0gv/pby6kWIK05YO4vdbbnl5z5Pv1+TW9NL++IDWr6 +3fE9biCloBK0TXC5ztdyO4mTp4CEHCdJckm1/zuVnsHMyAHs6A6KCpbns6aH5db5 +BSsNl0BwPLqsdVqc1U2dAgrSS5tmS0YHF2Wtn2yIANwiieDhZNRnvDF5YTy7ykHN +XGoAyDw4jlivAgMBAAGjQjBAMA8GA1UdEwEB/wQFMAMBAf8wDgYDVR0PAQH/BAQD +AgEGMB0GA1UdDgQWBBQuFqlKGLXLzPVvUPMjX/hd56zwyDANBgkqhkiG9w0BAQsF +AAOCAQEAtXP4A9xZWx126aMqe5Aosk3AM0+qmrHUuOQn/6mWmc5G4G18TKI4pAZw +8PRBEew/R40/cof5O/2kbytTAOD/OblqBw7rHRz2onKQy4I9EYKL0rufKq8h5mOG +nXkZ7/e7DDWQw4rtTw/1zBLZpD67oPwglV9PJi8RI4NOdQcPv5vRtB3pEAT+ymCP +oky4rc/hkA/NrgrHXXu3UNLUYfrVFdvXn4dRVOul4+vJhaAlIDf7js4MNIThPIGy +d05DpYhfhmehPea0XGG2Ptv+tyjFogeutcrKjSoS75ftwjCkySp6+/NNIxuZMzSg +LvWpCz/UXeHPhJ/iGcJfitYgHuNztw== +-----END CERTIFICATE----- + +# Issuer: CN=Certum Trusted Network CA 2 O=Unizeto Technologies S.A. OU=Certum Certification Authority +# Subject: CN=Certum Trusted Network CA 2 O=Unizeto Technologies S.A. OU=Certum Certification Authority +# Label: "Certum Trusted Network CA 2" +# Serial: 44979900017204383099463764357512596969 +# MD5 Fingerprint: 6d:46:9e:d9:25:6d:08:23:5b:5e:74:7d:1e:27:db:f2 +# SHA1 Fingerprint: d3:dd:48:3e:2b:bf:4c:05:e8:af:10:f5:fa:76:26:cf:d3:dc:30:92 +# SHA256 Fingerprint: b6:76:f2:ed:da:e8:77:5c:d3:6c:b0:f6:3c:d1:d4:60:39:61:f4:9e:62:65:ba:01:3a:2f:03:07:b6:d0:b8:04 +-----BEGIN CERTIFICATE----- +MIIF0jCCA7qgAwIBAgIQIdbQSk8lD8kyN/yqXhKN6TANBgkqhkiG9w0BAQ0FADCB +gDELMAkGA1UEBhMCUEwxIjAgBgNVBAoTGVVuaXpldG8gVGVjaG5vbG9naWVzIFMu +QS4xJzAlBgNVBAsTHkNlcnR1bSBDZXJ0aWZpY2F0aW9uIEF1dGhvcml0eTEkMCIG +A1UEAxMbQ2VydHVtIFRydXN0ZWQgTmV0d29yayBDQSAyMCIYDzIwMTExMDA2MDgz +OTU2WhgPMjA0NjEwMDYwODM5NTZaMIGAMQswCQYDVQQGEwJQTDEiMCAGA1UEChMZ +VW5pemV0byBUZWNobm9sb2dpZXMgUy5BLjEnMCUGA1UECxMeQ2VydHVtIENlcnRp +ZmljYXRpb24gQXV0aG9yaXR5MSQwIgYDVQQDExtDZXJ0dW0gVHJ1c3RlZCBOZXR3 +b3JrIENBIDIwggIiMA0GCSqGSIb3DQEBAQUAA4ICDwAwggIKAoICAQC9+Xj45tWA +DGSdhhuWZGc/IjoedQF97/tcZ4zJzFxrqZHmuULlIEub2pt7uZld2ZuAS9eEQCsn +0+i6MLs+CRqnSZXvK0AkwpfHp+6bJe+oCgCXhVqqndwpyeI1B+twTUrWwbNWuKFB +OJvR+zF/j+Bf4bE/D44WSWDXBo0Y+aomEKsq09DRZ40bRr5HMNUuctHFY9rnY3lE +fktjJImGLjQ/KUxSiyqnwOKRKIm5wFv5HdnnJ63/mgKXwcZQkpsCLL2puTRZCr+E +Sv/f/rOf69me4Jgj7KZrdxYq28ytOxykh9xGc14ZYmhFV+SQgkK7QtbwYeDBoz1m +o130GO6IyY0XRSmZMnUCMe4pJshrAua1YkV/NxVaI2iJ1D7eTiew8EAMvE0Xy02i +sx7QBlrd9pPPV3WZ9fqGGmd4s7+W/jTcvedSVuWz5XV710GRBdxdaeOVDUO5/IOW +OZV7bIBaTxNyxtd9KXpEulKkKtVBRgkg/iKgtlswjbyJDNXXcPiHUv3a76xRLgez +Tv7QCdpw75j6VuZt27VXS9zlLCUVyJ4ueE742pyehizKV/Ma5ciSixqClnrDvFAS +adgOWkaLOusm+iPJtrCBvkIApPjW/jAux9JG9uWOdf3yzLnQh1vMBhBgu4M1t15n +3kfsmUjxpKEV/q2MYo45VU85FrmxY53/twIDAQABo0IwQDAPBgNVHRMBAf8EBTAD +AQH/MB0GA1UdDgQWBBS2oVQ5AsOgP46KvPrU+Bym0ToO/TAOBgNVHQ8BAf8EBAMC +AQYwDQYJKoZIhvcNAQENBQADggIBAHGlDs7k6b8/ONWJWsQCYftMxRQXLYtPU2sQ +F/xlhMcQSZDe28cmk4gmb3DWAl45oPePq5a1pRNcgRRtDoGCERuKTsZPpd1iHkTf +CVn0W3cLN+mLIMb4Ck4uWBzrM9DPhmDJ2vuAL55MYIR4PSFk1vtBHxgP58l1cb29 +XN40hz5BsA72udY/CROWFC/emh1auVbONTqwX3BNXuMp8SMoclm2q8KMZiYcdywm +djWLKKdpoPk79SPdhRB0yZADVpHnr7pH1BKXESLjokmUbOe3lEu6LaTaM4tMpkT/ +WjzGHWTYtTHkpjx6qFcL2+1hGsvxznN3Y6SHb0xRONbkX8eftoEq5IVIeVheO/jb +AoJnwTnbw3RLPTYe+SmTiGhbqEQZIfCn6IENLOiTNrQ3ssqwGyZ6miUfmpqAnksq +P/ujmv5zMnHCnsZy4YpoJ/HkD7TETKVhk/iXEAcqMCWpuchxuO9ozC1+9eB+D4Ko +b7a6bINDd82Kkhehnlt4Fj1F4jNy3eFmypnTycUm/Q1oBEauttmbjL4ZvrHG8hnj +XALKLNhvSgfZyTXaQHXyxKcZb55CEJh15pWLYLztxRLXis7VmFxWlgPF7ncGNf/P +5O4/E2Hu29othfDNrp2yGAlFw5Khchf8R7agCyzxxN5DaAhqXzvwdmP7zAYspsbi +DrW5viSP +-----END CERTIFICATE----- + +# Issuer: CN=Hellenic Academic and Research Institutions RootCA 2015 O=Hellenic Academic and Research Institutions Cert. Authority +# Subject: CN=Hellenic Academic and Research Institutions RootCA 2015 O=Hellenic Academic and Research Institutions Cert. Authority +# Label: "Hellenic Academic and Research Institutions RootCA 2015" +# Serial: 0 +# MD5 Fingerprint: ca:ff:e2:db:03:d9:cb:4b:e9:0f:ad:84:fd:7b:18:ce +# SHA1 Fingerprint: 01:0c:06:95:a6:98:19:14:ff:bf:5f:c6:b0:b6:95:ea:29:e9:12:a6 +# SHA256 Fingerprint: a0:40:92:9a:02:ce:53:b4:ac:f4:f2:ff:c6:98:1c:e4:49:6f:75:5e:6d:45:fe:0b:2a:69:2b:cd:52:52:3f:36 +-----BEGIN CERTIFICATE----- +MIIGCzCCA/OgAwIBAgIBADANBgkqhkiG9w0BAQsFADCBpjELMAkGA1UEBhMCR1Ix +DzANBgNVBAcTBkF0aGVuczFEMEIGA1UEChM7SGVsbGVuaWMgQWNhZGVtaWMgYW5k +IFJlc2VhcmNoIEluc3RpdHV0aW9ucyBDZXJ0LiBBdXRob3JpdHkxQDA+BgNVBAMT +N0hlbGxlbmljIEFjYWRlbWljIGFuZCBSZXNlYXJjaCBJbnN0aXR1dGlvbnMgUm9v +dENBIDIwMTUwHhcNMTUwNzA3MTAxMTIxWhcNNDAwNjMwMTAxMTIxWjCBpjELMAkG +A1UEBhMCR1IxDzANBgNVBAcTBkF0aGVuczFEMEIGA1UEChM7SGVsbGVuaWMgQWNh +ZGVtaWMgYW5kIFJlc2VhcmNoIEluc3RpdHV0aW9ucyBDZXJ0LiBBdXRob3JpdHkx +QDA+BgNVBAMTN0hlbGxlbmljIEFjYWRlbWljIGFuZCBSZXNlYXJjaCBJbnN0aXR1 +dGlvbnMgUm9vdENBIDIwMTUwggIiMA0GCSqGSIb3DQEBAQUAA4ICDwAwggIKAoIC +AQDC+Kk/G4n8PDwEXT2QNrCROnk8ZlrvbTkBSRq0t89/TSNTt5AA4xMqKKYx8ZEA +4yjsriFBzh/a/X0SWwGDD7mwX5nh8hKDgE0GPt+sr+ehiGsxr/CL0BgzuNtFajT0 +AoAkKAoCFZVedioNmToUW/bLy1O8E00BiDeUJRtCvCLYjqOWXjrZMts+6PAQZe10 +4S+nfK8nNLspfZu2zwnI5dMK/IhlZXQK3HMcXM1AsRzUtoSMTFDPaI6oWa7CJ06C +ojXdFPQf/7J31Ycvqm59JCfnxssm5uX+Zwdj2EUN3TpZZTlYepKZcj2chF6IIbjV +9Cz82XBST3i4vTwri5WY9bPRaM8gFH5MXF/ni+X1NYEZN9cRCLdmvtNKzoNXADrD +gfgXy5I2XdGj2HUb4Ysn6npIQf1FGQatJ5lOwXBH3bWfgVMS5bGMSF0xQxfjjMZ6 +Y5ZLKTBOhE5iGV48zpeQpX8B653g+IuJ3SWYPZK2fu/Z8VFRfS0myGlZYeCsargq +NhEEelC9MoS+L9xy1dcdFkfkR2YgP/SWxa+OAXqlD3pk9Q0Yh9muiNX6hME6wGko +LfINaFGq46V3xqSQDqE3izEjR8EJCOtu93ib14L8hCCZSRm2Ekax+0VVFqmjZayc +Bw/qa9wfLgZy7IaIEuQt218FL+TwA9MmM+eAws1CoRc0CwIDAQABo0IwQDAPBgNV +HRMBAf8EBTADAQH/MA4GA1UdDwEB/wQEAwIBBjAdBgNVHQ4EFgQUcRVnyMjJvXVd +ctA4GGqd83EkVAswDQYJKoZIhvcNAQELBQADggIBAHW7bVRLqhBYRjTyYtcWNl0I +XtVsyIe9tC5G8jH4fOpCtZMWVdyhDBKg2mF+D1hYc2Ryx+hFjtyp8iY/xnmMsVMI +M4GwVhO+5lFc2JsKT0ucVlMC6U/2DWDqTUJV6HwbISHTGzrMd/K4kPFox/la/vot +9L/J9UUbzjgQKjeKeaO04wlshYaT/4mWJ3iBj2fjRnRUjtkNaeJK9E10A/+yd+2V +Z5fkscWrv2oj6NSU4kQoYsRL4vDY4ilrGnB+JGGTe08DMiUNRSQrlrRGar9KC/ea +j8GsGsVn82800vpzY4zvFrCopEYq+OsS7HK07/grfoxSwIuEVPkvPuNVqNxmsdnh +X9izjFk0WaSrT2y7HxjbdavYy5LNlDhhDgcGH0tGEPEVvo2FXDtKK4F5D7Rpn0lQ +l033DlZdwJVqwjbDG2jJ9SrcR5q+ss7FJej6A7na+RZukYT1HCjI/CbM1xyQVqdf +bzoEvM14iQuODy+jqk+iGxI9FghAD/FGTNeqewjBCvVtJ94Cj8rDtSvK6evIIVM4 +pcw72Hc3MKJP2W/R8kCtQXoXxdZKNYm3QdV8hn9VTYNKpXMgwDqvkPGaJI7ZjnHK +e7iG2rKPmT4dEw0SEe7Uq/DpFXYC5ODfqiAeW2GFZECpkJcNrVPSWh2HagCXZWK0 +vm9qp/UsQu0yrbYhnr68 +-----END CERTIFICATE----- + +# Issuer: CN=Hellenic Academic and Research Institutions ECC RootCA 2015 O=Hellenic Academic and Research Institutions Cert. Authority +# Subject: CN=Hellenic Academic and Research Institutions ECC RootCA 2015 O=Hellenic Academic and Research Institutions Cert. Authority +# Label: "Hellenic Academic and Research Institutions ECC RootCA 2015" +# Serial: 0 +# MD5 Fingerprint: 81:e5:b4:17:eb:c2:f5:e1:4b:0d:41:7b:49:92:fe:ef +# SHA1 Fingerprint: 9f:f1:71:8d:92:d5:9a:f3:7d:74:97:b4:bc:6f:84:68:0b:ba:b6:66 +# SHA256 Fingerprint: 44:b5:45:aa:8a:25:e6:5a:73:ca:15:dc:27:fc:36:d2:4c:1c:b9:95:3a:06:65:39:b1:15:82:dc:48:7b:48:33 +-----BEGIN CERTIFICATE----- +MIICwzCCAkqgAwIBAgIBADAKBggqhkjOPQQDAjCBqjELMAkGA1UEBhMCR1IxDzAN +BgNVBAcTBkF0aGVuczFEMEIGA1UEChM7SGVsbGVuaWMgQWNhZGVtaWMgYW5kIFJl +c2VhcmNoIEluc3RpdHV0aW9ucyBDZXJ0LiBBdXRob3JpdHkxRDBCBgNVBAMTO0hl +bGxlbmljIEFjYWRlbWljIGFuZCBSZXNlYXJjaCBJbnN0aXR1dGlvbnMgRUNDIFJv +b3RDQSAyMDE1MB4XDTE1MDcwNzEwMzcxMloXDTQwMDYzMDEwMzcxMlowgaoxCzAJ +BgNVBAYTAkdSMQ8wDQYDVQQHEwZBdGhlbnMxRDBCBgNVBAoTO0hlbGxlbmljIEFj +YWRlbWljIGFuZCBSZXNlYXJjaCBJbnN0aXR1dGlvbnMgQ2VydC4gQXV0aG9yaXR5 +MUQwQgYDVQQDEztIZWxsZW5pYyBBY2FkZW1pYyBhbmQgUmVzZWFyY2ggSW5zdGl0 +dXRpb25zIEVDQyBSb290Q0EgMjAxNTB2MBAGByqGSM49AgEGBSuBBAAiA2IABJKg +QehLgoRc4vgxEZmGZE4JJS+dQS8KrjVPdJWyUWRrjWvmP3CV8AVER6ZyOFB2lQJa +jq4onvktTpnvLEhvTCUp6NFxW98dwXU3tNf6e3pCnGoKVlp8aQuqgAkkbH7BRqNC +MEAwDwYDVR0TAQH/BAUwAwEB/zAOBgNVHQ8BAf8EBAMCAQYwHQYDVR0OBBYEFLQi +C4KZJAEOnLvkDv2/+5cgk5kqMAoGCCqGSM49BAMCA2cAMGQCMGfOFmI4oqxiRaep +lSTAGiecMjvAwNW6qef4BENThe5SId6d9SWDPp5YSy/XZxMOIQIwBeF1Ad5o7Sof +TUwJCA3sS61kFyjndc5FZXIhF8siQQ6ME5g4mlRtm8rifOoCWCKR +-----END CERTIFICATE----- + +# Issuer: CN=ISRG Root X1 O=Internet Security Research Group +# Subject: CN=ISRG Root X1 O=Internet Security Research Group +# Label: "ISRG Root X1" +# Serial: 172886928669790476064670243504169061120 +# MD5 Fingerprint: 0c:d2:f9:e0:da:17:73:e9:ed:86:4d:a5:e3:70:e7:4e +# SHA1 Fingerprint: ca:bd:2a:79:a1:07:6a:31:f2:1d:25:36:35:cb:03:9d:43:29:a5:e8 +# SHA256 Fingerprint: 96:bc:ec:06:26:49:76:f3:74:60:77:9a:cf:28:c5:a7:cf:e8:a3:c0:aa:e1:1a:8f:fc:ee:05:c0:bd:df:08:c6 +-----BEGIN CERTIFICATE----- +MIIFazCCA1OgAwIBAgIRAIIQz7DSQONZRGPgu2OCiwAwDQYJKoZIhvcNAQELBQAw +TzELMAkGA1UEBhMCVVMxKTAnBgNVBAoTIEludGVybmV0IFNlY3VyaXR5IFJlc2Vh +cmNoIEdyb3VwMRUwEwYDVQQDEwxJU1JHIFJvb3QgWDEwHhcNMTUwNjA0MTEwNDM4 +WhcNMzUwNjA0MTEwNDM4WjBPMQswCQYDVQQGEwJVUzEpMCcGA1UEChMgSW50ZXJu +ZXQgU2VjdXJpdHkgUmVzZWFyY2ggR3JvdXAxFTATBgNVBAMTDElTUkcgUm9vdCBY +MTCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAK3oJHP0FDfzm54rVygc +h77ct984kIxuPOZXoHj3dcKi/vVqbvYATyjb3miGbESTtrFj/RQSa78f0uoxmyF+ +0TM8ukj13Xnfs7j/EvEhmkvBioZxaUpmZmyPfjxwv60pIgbz5MDmgK7iS4+3mX6U +A5/TR5d8mUgjU+g4rk8Kb4Mu0UlXjIB0ttov0DiNewNwIRt18jA8+o+u3dpjq+sW +T8KOEUt+zwvo/7V3LvSye0rgTBIlDHCNAymg4VMk7BPZ7hm/ELNKjD+Jo2FR3qyH +B5T0Y3HsLuJvW5iB4YlcNHlsdu87kGJ55tukmi8mxdAQ4Q7e2RCOFvu396j3x+UC +B5iPNgiV5+I3lg02dZ77DnKxHZu8A/lJBdiB3QW0KtZB6awBdpUKD9jf1b0SHzUv +KBds0pjBqAlkd25HN7rOrFleaJ1/ctaJxQZBKT5ZPt0m9STJEadao0xAH0ahmbWn +OlFuhjuefXKnEgV4We0+UXgVCwOPjdAvBbI+e0ocS3MFEvzG6uBQE3xDk3SzynTn +jh8BCNAw1FtxNrQHusEwMFxIt4I7mKZ9YIqioymCzLq9gwQbooMDQaHWBfEbwrbw +qHyGO0aoSCqI3Haadr8faqU9GY/rOPNk3sgrDQoo//fb4hVC1CLQJ13hef4Y53CI +rU7m2Ys6xt0nUW7/vGT1M0NPAgMBAAGjQjBAMA4GA1UdDwEB/wQEAwIBBjAPBgNV +HRMBAf8EBTADAQH/MB0GA1UdDgQWBBR5tFnme7bl5AFzgAiIyBpY9umbbjANBgkq +hkiG9w0BAQsFAAOCAgEAVR9YqbyyqFDQDLHYGmkgJykIrGF1XIpu+ILlaS/V9lZL +ubhzEFnTIZd+50xx+7LSYK05qAvqFyFWhfFQDlnrzuBZ6brJFe+GnY+EgPbk6ZGQ +3BebYhtF8GaV0nxvwuo77x/Py9auJ/GpsMiu/X1+mvoiBOv/2X/qkSsisRcOj/KK +NFtY2PwByVS5uCbMiogziUwthDyC3+6WVwW6LLv3xLfHTjuCvjHIInNzktHCgKQ5 +ORAzI4JMPJ+GslWYHb4phowim57iaztXOoJwTdwJx4nLCgdNbOhdjsnvzqvHu7Ur +TkXWStAmzOVyyghqpZXjFaH3pO3JLF+l+/+sKAIuvtd7u+Nxe5AW0wdeRlN8NwdC +jNPElpzVmbUq4JUagEiuTDkHzsxHpFKVK7q4+63SM1N95R1NbdWhscdCb+ZAJzVc +oyi3B43njTOQ5yOf+1CceWxG1bQVs5ZufpsMljq4Ui0/1lvh+wjChP4kqKOJ2qxq +4RgqsahDYVvTH9w7jXbyLeiNdd8XM2w9U/t7y0Ff/9yi0GE44Za4rF2LN9d11TPA +mRGunUHBcnWEvgJBQl9nJEiU0Zsnvgc/ubhPgXRR4Xq37Z0j4r7g1SgEEzwxA57d +emyPxgcYxn/eR44/KJ4EBs+lVDR3veyJm+kXQ99b21/+jh5Xos1AnX5iItreGCc= +-----END CERTIFICATE----- + +# Issuer: O=FNMT-RCM OU=AC RAIZ FNMT-RCM +# Subject: O=FNMT-RCM OU=AC RAIZ FNMT-RCM +# Label: "AC RAIZ FNMT-RCM" +# Serial: 485876308206448804701554682760554759 +# MD5 Fingerprint: e2:09:04:b4:d3:bd:d1:a0:14:fd:1a:d2:47:c4:57:1d +# SHA1 Fingerprint: ec:50:35:07:b2:15:c4:95:62:19:e2:a8:9a:5b:42:99:2c:4c:2c:20 +# SHA256 Fingerprint: eb:c5:57:0c:29:01:8c:4d:67:b1:aa:12:7b:af:12:f7:03:b4:61:1e:bc:17:b7:da:b5:57:38:94:17:9b:93:fa +-----BEGIN CERTIFICATE----- +MIIFgzCCA2ugAwIBAgIPXZONMGc2yAYdGsdUhGkHMA0GCSqGSIb3DQEBCwUAMDsx +CzAJBgNVBAYTAkVTMREwDwYDVQQKDAhGTk1ULVJDTTEZMBcGA1UECwwQQUMgUkFJ +WiBGTk1ULVJDTTAeFw0wODEwMjkxNTU5NTZaFw0zMDAxMDEwMDAwMDBaMDsxCzAJ +BgNVBAYTAkVTMREwDwYDVQQKDAhGTk1ULVJDTTEZMBcGA1UECwwQQUMgUkFJWiBG +Tk1ULVJDTTCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBALpxgHpMhm5/ +yBNtwMZ9HACXjywMI7sQmkCpGreHiPibVmr75nuOi5KOpyVdWRHbNi63URcfqQgf +BBckWKo3Shjf5TnUV/3XwSyRAZHiItQDwFj8d0fsjz50Q7qsNI1NOHZnjrDIbzAz +WHFctPVrbtQBULgTfmxKo0nRIBnuvMApGGWn3v7v3QqQIecaZ5JCEJhfTzC8PhxF +tBDXaEAUwED653cXeuYLj2VbPNmaUtu1vZ5Gzz3rkQUCwJaydkxNEJY7kvqcfw+Z +374jNUUeAlz+taibmSXaXvMiwzn15Cou08YfxGyqxRxqAQVKL9LFwag0Jl1mpdIC +IfkYtwb1TplvqKtMUejPUBjFd8g5CSxJkjKZqLsXF3mwWsXmo8RZZUc1g16p6DUL +mbvkzSDGm0oGObVo/CK67lWMK07q87Hj/LaZmtVC+nFNCM+HHmpxffnTtOmlcYF7 +wk5HlqX2doWjKI/pgG6BU6VtX7hI+cL5NqYuSf+4lsKMB7ObiFj86xsc3i1w4peS +MKGJ47xVqCfWS+2QrYv6YyVZLag13cqXM7zlzced0ezvXg5KkAYmY6252TUtB7p2 +ZSysV4999AeU14ECll2jB0nVetBX+RvnU0Z1qrB5QstocQjpYL05ac70r8NWQMet +UqIJ5G+GR4of6ygnXYMgrwTJbFaai0b1AgMBAAGjgYMwgYAwDwYDVR0TAQH/BAUw +AwEB/zAOBgNVHQ8BAf8EBAMCAQYwHQYDVR0OBBYEFPd9xf3E6Jobd2Sn9R2gzL+H +YJptMD4GA1UdIAQ3MDUwMwYEVR0gADArMCkGCCsGAQUFBwIBFh1odHRwOi8vd3d3 +LmNlcnQuZm5tdC5lcy9kcGNzLzANBgkqhkiG9w0BAQsFAAOCAgEAB5BK3/MjTvDD +nFFlm5wioooMhfNzKWtN/gHiqQxjAb8EZ6WdmF/9ARP67Jpi6Yb+tmLSbkyU+8B1 +RXxlDPiyN8+sD8+Nb/kZ94/sHvJwnvDKuO+3/3Y3dlv2bojzr2IyIpMNOmqOFGYM +LVN0V2Ue1bLdI4E7pWYjJ2cJj+F3qkPNZVEI7VFY/uY5+ctHhKQV8Xa7pO6kO8Rf +77IzlhEYt8llvhjho6Tc+hj507wTmzl6NLrTQfv6MooqtyuGC2mDOL7Nii4LcK2N +JpLuHvUBKwrZ1pebbuCoGRw6IYsMHkCtA+fdZn71uSANA+iW+YJF1DngoABd15jm +fZ5nc8OaKveri6E6FO80vFIOiZiaBECEHX5FaZNXzuvO+FB8TxxuBEOb+dY7Ixjp +6o7RTUaN8Tvkasq6+yO3m/qZASlaWFot4/nUbQ4mrcFuNLwy+AwF+mWj2zs3gyLp +1txyM/1d8iC9djwj2ij3+RvrWWTV3F9yfiD8zYm1kGdNYno/Tq0dwzn+evQoFt9B +9kiABdcPUXmsEKvU7ANm5mqwujGSQkBqvjrTcuFqN1W8rB2Vt2lh8kORdOag0wok +RqEIr9baRRmW1FMdW4R58MD3R++Lj8UGrp1MYp3/RgT408m2ECVAdf4WqslKYIYv +uu8wd+RU4riEmViAqhOLUTpPSPaLtrM= +-----END CERTIFICATE----- + +# Issuer: CN=Amazon Root CA 1 O=Amazon +# Subject: CN=Amazon Root CA 1 O=Amazon +# Label: "Amazon Root CA 1" +# Serial: 143266978916655856878034712317230054538369994 +# MD5 Fingerprint: 43:c6:bf:ae:ec:fe:ad:2f:18:c6:88:68:30:fc:c8:e6 +# SHA1 Fingerprint: 8d:a7:f9:65:ec:5e:fc:37:91:0f:1c:6e:59:fd:c1:cc:6a:6e:de:16 +# SHA256 Fingerprint: 8e:cd:e6:88:4f:3d:87:b1:12:5b:a3:1a:c3:fc:b1:3d:70:16:de:7f:57:cc:90:4f:e1:cb:97:c6:ae:98:19:6e +-----BEGIN CERTIFICATE----- +MIIDQTCCAimgAwIBAgITBmyfz5m/jAo54vB4ikPmljZbyjANBgkqhkiG9w0BAQsF +ADA5MQswCQYDVQQGEwJVUzEPMA0GA1UEChMGQW1hem9uMRkwFwYDVQQDExBBbWF6 +b24gUm9vdCBDQSAxMB4XDTE1MDUyNjAwMDAwMFoXDTM4MDExNzAwMDAwMFowOTEL +MAkGA1UEBhMCVVMxDzANBgNVBAoTBkFtYXpvbjEZMBcGA1UEAxMQQW1hem9uIFJv +b3QgQ0EgMTCCASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoCggEBALJ4gHHKeNXj +ca9HgFB0fW7Y14h29Jlo91ghYPl0hAEvrAIthtOgQ3pOsqTQNroBvo3bSMgHFzZM +9O6II8c+6zf1tRn4SWiw3te5djgdYZ6k/oI2peVKVuRF4fn9tBb6dNqcmzU5L/qw +IFAGbHrQgLKm+a/sRxmPUDgH3KKHOVj4utWp+UhnMJbulHheb4mjUcAwhmahRWa6 +VOujw5H5SNz/0egwLX0tdHA114gk957EWW67c4cX8jJGKLhD+rcdqsq08p8kDi1L +93FcXmn/6pUCyziKrlA4b9v7LWIbxcceVOF34GfID5yHI9Y/QCB/IIDEgEw+OyQm +jgSubJrIqg0CAwEAAaNCMEAwDwYDVR0TAQH/BAUwAwEB/zAOBgNVHQ8BAf8EBAMC +AYYwHQYDVR0OBBYEFIQYzIU07LwMlJQuCFmcx7IQTgoIMA0GCSqGSIb3DQEBCwUA +A4IBAQCY8jdaQZChGsV2USggNiMOruYou6r4lK5IpDB/G/wkjUu0yKGX9rbxenDI +U5PMCCjjmCXPI6T53iHTfIUJrU6adTrCC2qJeHZERxhlbI1Bjjt/msv0tadQ1wUs +N+gDS63pYaACbvXy8MWy7Vu33PqUXHeeE6V/Uq2V8viTO96LXFvKWlJbYK8U90vv +o/ufQJVtMVT8QtPHRh8jrdkPSHCa2XV4cdFyQzR1bldZwgJcJmApzyMZFo6IQ6XU +5MsI+yMRQ+hDKXJioaldXgjUkK642M4UwtBV8ob2xJNDd2ZhwLnoQdeXeGADbkpy +rqXRfboQnoZsG4q5WTP468SQvvG5 +-----END CERTIFICATE----- + +# Issuer: CN=Amazon Root CA 2 O=Amazon +# Subject: CN=Amazon Root CA 2 O=Amazon +# Label: "Amazon Root CA 2" +# Serial: 143266982885963551818349160658925006970653239 +# MD5 Fingerprint: c8:e5:8d:ce:a8:42:e2:7a:c0:2a:5c:7c:9e:26:bf:66 +# SHA1 Fingerprint: 5a:8c:ef:45:d7:a6:98:59:76:7a:8c:8b:44:96:b5:78:cf:47:4b:1a +# SHA256 Fingerprint: 1b:a5:b2:aa:8c:65:40:1a:82:96:01:18:f8:0b:ec:4f:62:30:4d:83:ce:c4:71:3a:19:c3:9c:01:1e:a4:6d:b4 +-----BEGIN CERTIFICATE----- +MIIFQTCCAymgAwIBAgITBmyf0pY1hp8KD+WGePhbJruKNzANBgkqhkiG9w0BAQwF +ADA5MQswCQYDVQQGEwJVUzEPMA0GA1UEChMGQW1hem9uMRkwFwYDVQQDExBBbWF6 +b24gUm9vdCBDQSAyMB4XDTE1MDUyNjAwMDAwMFoXDTQwMDUyNjAwMDAwMFowOTEL +MAkGA1UEBhMCVVMxDzANBgNVBAoTBkFtYXpvbjEZMBcGA1UEAxMQQW1hem9uIFJv +b3QgQ0EgMjCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAK2Wny2cSkxK +gXlRmeyKy2tgURO8TW0G/LAIjd0ZEGrHJgw12MBvIITplLGbhQPDW9tK6Mj4kHbZ +W0/jTOgGNk3Mmqw9DJArktQGGWCsN0R5hYGCrVo34A3MnaZMUnbqQ523BNFQ9lXg +1dKmSYXpN+nKfq5clU1Imj+uIFptiJXZNLhSGkOQsL9sBbm2eLfq0OQ6PBJTYv9K +8nu+NQWpEjTj82R0Yiw9AElaKP4yRLuH3WUnAnE72kr3H9rN9yFVkE8P7K6C4Z9r +2UXTu/Bfh+08LDmG2j/e7HJV63mjrdvdfLC6HM783k81ds8P+HgfajZRRidhW+me +z/CiVX18JYpvL7TFz4QuK/0NURBs+18bvBt+xa47mAExkv8LV/SasrlX6avvDXbR +8O70zoan4G7ptGmh32n2M8ZpLpcTnqWHsFcQgTfJU7O7f/aS0ZzQGPSSbtqDT6Zj +mUyl+17vIWR6IF9sZIUVyzfpYgwLKhbcAS4y2j5L9Z469hdAlO+ekQiG+r5jqFoz +7Mt0Q5X5bGlSNscpb/xVA1wf+5+9R+vnSUeVC06JIglJ4PVhHvG/LopyboBZ/1c6 ++XUyo05f7O0oYtlNc/LMgRdg7c3r3NunysV+Ar3yVAhU/bQtCSwXVEqY0VThUWcI +0u1ufm8/0i2BWSlmy5A5lREedCf+3euvAgMBAAGjQjBAMA8GA1UdEwEB/wQFMAMB +Af8wDgYDVR0PAQH/BAQDAgGGMB0GA1UdDgQWBBSwDPBMMPQFWAJI/TPlUq9LhONm +UjANBgkqhkiG9w0BAQwFAAOCAgEAqqiAjw54o+Ci1M3m9Zh6O+oAA7CXDpO8Wqj2 +LIxyh6mx/H9z/WNxeKWHWc8w4Q0QshNabYL1auaAn6AFC2jkR2vHat+2/XcycuUY ++gn0oJMsXdKMdYV2ZZAMA3m3MSNjrXiDCYZohMr/+c8mmpJ5581LxedhpxfL86kS +k5Nrp+gvU5LEYFiwzAJRGFuFjWJZY7attN6a+yb3ACfAXVU3dJnJUH/jWS5E4ywl +7uxMMne0nxrpS10gxdr9HIcWxkPo1LsmmkVwXqkLN1PiRnsn/eBG8om3zEK2yygm +btmlyTrIQRNg91CMFa6ybRoVGld45pIq2WWQgj9sAq+uEjonljYE1x2igGOpm/Hl +urR8FLBOybEfdF849lHqm/osohHUqS0nGkWxr7JOcQ3AWEbWaQbLU8uz/mtBzUF+ +fUwPfHJ5elnNXkoOrJupmHN5fLT0zLm4BwyydFy4x2+IoZCn9Kr5v2c69BoVYh63 +n749sSmvZ6ES8lgQGVMDMBu4Gon2nL2XA46jCfMdiyHxtN/kHNGfZQIG6lzWE7OE +76KlXIx3KadowGuuQNKotOrN8I1LOJwZmhsoVLiJkO/KdYE+HvJkJMcYr07/R54H +9jVlpNMKVv/1F2Rs76giJUmTtt8AF9pYfl3uxRuw0dFfIRDH+fO6AgonB8Xx1sfT +4PsJYGw= +-----END CERTIFICATE----- + +# Issuer: CN=Amazon Root CA 3 O=Amazon +# Subject: CN=Amazon Root CA 3 O=Amazon +# Label: "Amazon Root CA 3" +# Serial: 143266986699090766294700635381230934788665930 +# MD5 Fingerprint: a0:d4:ef:0b:f7:b5:d8:49:95:2a:ec:f5:c4:fc:81:87 +# SHA1 Fingerprint: 0d:44:dd:8c:3c:8c:1a:1a:58:75:64:81:e9:0f:2e:2a:ff:b3:d2:6e +# SHA256 Fingerprint: 18:ce:6c:fe:7b:f1:4e:60:b2:e3:47:b8:df:e8:68:cb:31:d0:2e:bb:3a:da:27:15:69:f5:03:43:b4:6d:b3:a4 +-----BEGIN CERTIFICATE----- +MIIBtjCCAVugAwIBAgITBmyf1XSXNmY/Owua2eiedgPySjAKBggqhkjOPQQDAjA5 +MQswCQYDVQQGEwJVUzEPMA0GA1UEChMGQW1hem9uMRkwFwYDVQQDExBBbWF6b24g +Um9vdCBDQSAzMB4XDTE1MDUyNjAwMDAwMFoXDTQwMDUyNjAwMDAwMFowOTELMAkG +A1UEBhMCVVMxDzANBgNVBAoTBkFtYXpvbjEZMBcGA1UEAxMQQW1hem9uIFJvb3Qg +Q0EgMzBZMBMGByqGSM49AgEGCCqGSM49AwEHA0IABCmXp8ZBf8ANm+gBG1bG8lKl +ui2yEujSLtf6ycXYqm0fc4E7O5hrOXwzpcVOho6AF2hiRVd9RFgdszflZwjrZt6j +QjBAMA8GA1UdEwEB/wQFMAMBAf8wDgYDVR0PAQH/BAQDAgGGMB0GA1UdDgQWBBSr +ttvXBp43rDCGB5Fwx5zEGbF4wDAKBggqhkjOPQQDAgNJADBGAiEA4IWSoxe3jfkr +BqWTrBqYaGFy+uGh0PsceGCmQ5nFuMQCIQCcAu/xlJyzlvnrxir4tiz+OpAUFteM +YyRIHN8wfdVoOw== +-----END CERTIFICATE----- + +# Issuer: CN=Amazon Root CA 4 O=Amazon +# Subject: CN=Amazon Root CA 4 O=Amazon +# Label: "Amazon Root CA 4" +# Serial: 143266989758080763974105200630763877849284878 +# MD5 Fingerprint: 89:bc:27:d5:eb:17:8d:06:6a:69:d5:fd:89:47:b4:cd +# SHA1 Fingerprint: f6:10:84:07:d6:f8:bb:67:98:0c:c2:e2:44:c2:eb:ae:1c:ef:63:be +# SHA256 Fingerprint: e3:5d:28:41:9e:d0:20:25:cf:a6:90:38:cd:62:39:62:45:8d:a5:c6:95:fb:de:a3:c2:2b:0b:fb:25:89:70:92 +-----BEGIN CERTIFICATE----- +MIIB8jCCAXigAwIBAgITBmyf18G7EEwpQ+Vxe3ssyBrBDjAKBggqhkjOPQQDAzA5 +MQswCQYDVQQGEwJVUzEPMA0GA1UEChMGQW1hem9uMRkwFwYDVQQDExBBbWF6b24g +Um9vdCBDQSA0MB4XDTE1MDUyNjAwMDAwMFoXDTQwMDUyNjAwMDAwMFowOTELMAkG +A1UEBhMCVVMxDzANBgNVBAoTBkFtYXpvbjEZMBcGA1UEAxMQQW1hem9uIFJvb3Qg +Q0EgNDB2MBAGByqGSM49AgEGBSuBBAAiA2IABNKrijdPo1MN/sGKe0uoe0ZLY7Bi +9i0b2whxIdIA6GO9mif78DluXeo9pcmBqqNbIJhFXRbb/egQbeOc4OO9X4Ri83Bk +M6DLJC9wuoihKqB1+IGuYgbEgds5bimwHvouXKNCMEAwDwYDVR0TAQH/BAUwAwEB +/zAOBgNVHQ8BAf8EBAMCAYYwHQYDVR0OBBYEFNPsxzplbszh2naaVvuc84ZtV+WB +MAoGCCqGSM49BAMDA2gAMGUCMDqLIfG9fhGt0O9Yli/W651+kI0rz2ZVwyzjKKlw +CkcO8DdZEv8tmZQoTipPNU0zWgIxAOp1AE47xDqUEpHJWEadIRNyp4iciuRMStuW +1KyLa2tJElMzrdfkviT8tQp21KW8EA== +-----END CERTIFICATE----- + +# Issuer: CN=LuxTrust Global Root 2 O=LuxTrust S.A. +# Subject: CN=LuxTrust Global Root 2 O=LuxTrust S.A. +# Label: "LuxTrust Global Root 2" +# Serial: 59914338225734147123941058376788110305822489521 +# MD5 Fingerprint: b2:e1:09:00:61:af:f7:f1:91:6f:c4:ad:8d:5e:3b:7c +# SHA1 Fingerprint: 1e:0e:56:19:0a:d1:8b:25:98:b2:04:44:ff:66:8a:04:17:99:5f:3f +# SHA256 Fingerprint: 54:45:5f:71:29:c2:0b:14:47:c4:18:f9:97:16:8f:24:c5:8f:c5:02:3b:f5:da:5b:e2:eb:6e:1d:d8:90:2e:d5 +-----BEGIN CERTIFICATE----- +MIIFwzCCA6ugAwIBAgIUCn6m30tEntpqJIWe5rgV0xZ/u7EwDQYJKoZIhvcNAQEL +BQAwRjELMAkGA1UEBhMCTFUxFjAUBgNVBAoMDUx1eFRydXN0IFMuQS4xHzAdBgNV +BAMMFkx1eFRydXN0IEdsb2JhbCBSb290IDIwHhcNMTUwMzA1MTMyMTU3WhcNMzUw +MzA1MTMyMTU3WjBGMQswCQYDVQQGEwJMVTEWMBQGA1UECgwNTHV4VHJ1c3QgUy5B +LjEfMB0GA1UEAwwWTHV4VHJ1c3QgR2xvYmFsIFJvb3QgMjCCAiIwDQYJKoZIhvcN +AQEBBQADggIPADCCAgoCggIBANeFl78RmOnwYoNMPIf5U2o3C/IPPIfOb9wmKb3F +ibrJgz337spbxm1Jc7TJRqMbNBM/wYlFV/TZsfs2ZUv7COJIcRHIbjuend+JZTem +hfY7RBi2xjcwYkSSl2l9QjAk5A0MiWtj3sXh306pFGxT4GHO9hcvHTy95iJMHZP1 +EMShduxq3sVs35a0VkBCwGKSMKEtFZSg0iAGCW5qbeXrt77U8PEVfIvmTroTzEsn +Xpk8F12PgX8zPU/TPxvsXD/wPEx1bvKm1Z3aLQdjAsZy6ZS8TEmVT4hSyNvoaYL4 +zDRbIvCGp4m9SAptZoFtyMhk+wHh9OHe2Z7d21vUKpkmFRseTJIpgp7VkoGSQXAZ +96Tlk0u8d2cx3Rz9MXANF5kM+Qw5GSoXtTBxVdUPrljhPS80m8+f9niFwpN6cj5m +j5wWEWCPnolvZ77gR1o7DJpni89Gxq44o/KnvObWhWszJHAiS8sIm7vI+AIpHb4g +DEa/a4ebsypmQjVGbKq6rfmYe+lQVRQxv7HaLe2ArWgk+2mr2HETMOZns4dA/Yl+ +8kPREd8vZS9kzl8UubG/Mb2HeFpZZYiq/FkySIbWTLkpS5XTdvN3JW1CHDiDTf2j +X5t/Lax5Gw5CMZdjpPuKadUiDTSQMC6otOBttpSsvItO13D8xTiOZCXhTTmQzsmH +hFhxAgMBAAGjgagwgaUwDwYDVR0TAQH/BAUwAwEB/zBCBgNVHSAEOzA5MDcGByuB +KwEBAQowLDAqBggrBgEFBQcCARYeaHR0cHM6Ly9yZXBvc2l0b3J5Lmx1eHRydXN0 +Lmx1MA4GA1UdDwEB/wQEAwIBBjAfBgNVHSMEGDAWgBT/GCh2+UgFLKGu8SsbK7JT ++Et8szAdBgNVHQ4EFgQU/xgodvlIBSyhrvErGyuyU/hLfLMwDQYJKoZIhvcNAQEL +BQADggIBAGoZFO1uecEsh9QNcH7X9njJCwROxLHOk3D+sFTAMs2ZMGQXvw/l4jP9 +BzZAcg4atmpZ1gDlaCDdLnINH2pkMSCEfUmmWjfrRcmF9dTHF5kH5ptV5AzoqbTO +jFu1EVzPig4N1qx3gf4ynCSecs5U89BvolbW7MM3LGVYvlcAGvI1+ut7MV3CwRI9 +loGIlonBWVx65n9wNOeD4rHh4bhY79SV5GCc8JaXcozrhAIuZY+kt9J/Z93I055c +qqmkoCUUBpvsT34tC38ddfEz2O3OuHVtPlu5mB0xDVbYQw8wkbIEa91WvpWAVWe+ +2M2D2RjuLg+GLZKecBPs3lHJQ3gCpU3I+V/EkVhGFndadKpAvAefMLmx9xIX3eP/ +JEAdemrRTxgKqpAd60Ae36EeRJIQmvKN4dFLRp7oRUKX6kWZ8+xm1QL68qZKJKre +zrnK+T+Tb/mjuuqlPpmt/f97mfVl7vBZKGfXkJWkE4SphMHozs51k2MavDzq1WQf +LSoSOcbDWjLtR5EWDrw4wVDej8oqkDQc7kGUnF4ZLvhFSZl0kbAEb+MEWrGrKqv+ +x9CWttrhSmQGbmBNvUJO/3jaJMobtNeWOWyu8Q6qp31IiyBMz2TWuJdGsE7RKlY6 +oJO9r4Ak4Ap+58rVyuiFVdw2KuGUaJPHZnJED4AhMmwlxyOAgwrr +-----END CERTIFICATE----- + +# Issuer: CN=TUBITAK Kamu SM SSL Kok Sertifikasi - Surum 1 O=Turkiye Bilimsel ve Teknolojik Arastirma Kurumu - TUBITAK OU=Kamu Sertifikasyon Merkezi - Kamu SM +# Subject: CN=TUBITAK Kamu SM SSL Kok Sertifikasi - Surum 1 O=Turkiye Bilimsel ve Teknolojik Arastirma Kurumu - TUBITAK OU=Kamu Sertifikasyon Merkezi - Kamu SM +# Label: "TUBITAK Kamu SM SSL Kok Sertifikasi - Surum 1" +# Serial: 1 +# MD5 Fingerprint: dc:00:81:dc:69:2f:3e:2f:b0:3b:f6:3d:5a:91:8e:49 +# SHA1 Fingerprint: 31:43:64:9b:ec:ce:27:ec:ed:3a:3f:0b:8f:0d:e4:e8:91:dd:ee:ca +# SHA256 Fingerprint: 46:ed:c3:68:90:46:d5:3a:45:3f:b3:10:4a:b8:0d:ca:ec:65:8b:26:60:ea:16:29:dd:7e:86:79:90:64:87:16 +-----BEGIN CERTIFICATE----- +MIIEYzCCA0ugAwIBAgIBATANBgkqhkiG9w0BAQsFADCB0jELMAkGA1UEBhMCVFIx +GDAWBgNVBAcTD0dlYnplIC0gS29jYWVsaTFCMEAGA1UEChM5VHVya2l5ZSBCaWxp +bXNlbCB2ZSBUZWtub2xvamlrIEFyYXN0aXJtYSBLdXJ1bXUgLSBUVUJJVEFLMS0w +KwYDVQQLEyRLYW11IFNlcnRpZmlrYXN5b24gTWVya2V6aSAtIEthbXUgU00xNjA0 +BgNVBAMTLVRVQklUQUsgS2FtdSBTTSBTU0wgS29rIFNlcnRpZmlrYXNpIC0gU3Vy +dW0gMTAeFw0xMzExMjUwODI1NTVaFw00MzEwMjUwODI1NTVaMIHSMQswCQYDVQQG +EwJUUjEYMBYGA1UEBxMPR2ViemUgLSBLb2NhZWxpMUIwQAYDVQQKEzlUdXJraXll +IEJpbGltc2VsIHZlIFRla25vbG9qaWsgQXJhc3Rpcm1hIEt1cnVtdSAtIFRVQklU +QUsxLTArBgNVBAsTJEthbXUgU2VydGlmaWthc3lvbiBNZXJrZXppIC0gS2FtdSBT +TTE2MDQGA1UEAxMtVFVCSVRBSyBLYW11IFNNIFNTTCBLb2sgU2VydGlmaWthc2kg +LSBTdXJ1bSAxMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAr3UwM6q7 +a9OZLBI3hNmNe5eA027n/5tQlT6QlVZC1xl8JoSNkvoBHToP4mQ4t4y86Ij5iySr +LqP1N+RAjhgleYN1Hzv/bKjFxlb4tO2KRKOrbEz8HdDc72i9z+SqzvBV96I01INr +N3wcwv61A+xXzry0tcXtAA9TNypN9E8Mg/uGz8v+jE69h/mniyFXnHrfA2eJLJ2X +YacQuFWQfw4tJzh03+f92k4S400VIgLI4OD8D62K18lUUMw7D8oWgITQUVbDjlZ/ +iSIzL+aFCr2lqBs23tPcLG07xxO9WSMs5uWk99gL7eqQQESolbuT1dCANLZGeA4f +AJNG4e7p+exPFwIDAQABo0IwQDAdBgNVHQ4EFgQUZT/HiobGPN08VFw1+DrtUgxH +V8gwDgYDVR0PAQH/BAQDAgEGMA8GA1UdEwEB/wQFMAMBAf8wDQYJKoZIhvcNAQEL +BQADggEBACo/4fEyjq7hmFxLXs9rHmoJ0iKpEsdeV31zVmSAhHqT5Am5EM2fKifh +AHe+SMg1qIGf5LgsyX8OsNJLN13qudULXjS99HMpw+0mFZx+CFOKWI3QSyjfwbPf +IPP54+M638yclNhOT8NrF7f3cuitZjO1JVOr4PhMqZ398g26rrnZqsZr+ZO7rqu4 +lzwDGrpDxpa5RXI4s6ehlj2Re37AIVNMh+3yC1SVUZPVIqUNivGTDj5UDrDYyU7c +8jEyVupk+eq1nRZmQnLzf9OxMUP8pI4X8W0jq5Rm+K37DwhuJi1/FwcJsoz7UMCf +lo3Ptv0AnVoUmr8CRPXBwp8iXqIPoeM= +-----END CERTIFICATE----- + +# Issuer: CN=GDCA TrustAUTH R5 ROOT O=GUANG DONG CERTIFICATE AUTHORITY CO.,LTD. +# Subject: CN=GDCA TrustAUTH R5 ROOT O=GUANG DONG CERTIFICATE AUTHORITY CO.,LTD. +# Label: "GDCA TrustAUTH R5 ROOT" +# Serial: 9009899650740120186 +# MD5 Fingerprint: 63:cc:d9:3d:34:35:5c:6f:53:a3:e2:08:70:48:1f:b4 +# SHA1 Fingerprint: 0f:36:38:5b:81:1a:25:c3:9b:31:4e:83:ca:e9:34:66:70:cc:74:b4 +# SHA256 Fingerprint: bf:ff:8f:d0:44:33:48:7d:6a:8a:a6:0c:1a:29:76:7a:9f:c2:bb:b0:5e:42:0f:71:3a:13:b9:92:89:1d:38:93 +-----BEGIN CERTIFICATE----- +MIIFiDCCA3CgAwIBAgIIfQmX/vBH6nowDQYJKoZIhvcNAQELBQAwYjELMAkGA1UE +BhMCQ04xMjAwBgNVBAoMKUdVQU5HIERPTkcgQ0VSVElGSUNBVEUgQVVUSE9SSVRZ +IENPLixMVEQuMR8wHQYDVQQDDBZHRENBIFRydXN0QVVUSCBSNSBST09UMB4XDTE0 +MTEyNjA1MTMxNVoXDTQwMTIzMTE1NTk1OVowYjELMAkGA1UEBhMCQ04xMjAwBgNV +BAoMKUdVQU5HIERPTkcgQ0VSVElGSUNBVEUgQVVUSE9SSVRZIENPLixMVEQuMR8w +HQYDVQQDDBZHRENBIFRydXN0QVVUSCBSNSBST09UMIICIjANBgkqhkiG9w0BAQEF +AAOCAg8AMIICCgKCAgEA2aMW8Mh0dHeb7zMNOwZ+Vfy1YI92hhJCfVZmPoiC7XJj +Dp6L3TQsAlFRwxn9WVSEyfFrs0yw6ehGXTjGoqcuEVe6ghWinI9tsJlKCvLriXBj +TnnEt1u9ol2x8kECK62pOqPseQrsXzrj/e+APK00mxqriCZ7VqKChh/rNYmDf1+u +KU49tm7srsHwJ5uu4/Ts765/94Y9cnrrpftZTqfrlYwiOXnhLQiPzLyRuEH3FMEj +qcOtmkVEs7LXLM3GKeJQEK5cy4KOFxg2fZfmiJqwTTQJ9Cy5WmYqsBebnh52nUpm +MUHfP/vFBu8btn4aRjb3ZGM74zkYI+dndRTVdVeSN72+ahsmUPI2JgaQxXABZG12 +ZuGR224HwGGALrIuL4xwp9E7PLOR5G62xDtw8mySlwnNR30YwPO7ng/Wi64HtloP +zgsMR6flPri9fcebNaBhlzpBdRfMK5Z3KpIhHtmVdiBnaM8Nvd/WHwlqmuLMc3Gk +L30SgLdTMEZeS1SZD2fJpcjyIMGC7J0R38IC+xo70e0gmu9lZJIQDSri3nDxGGeC +jGHeuLzRL5z7D9Ar7Rt2ueQ5Vfj4oR24qoAATILnsn8JuLwwoC8N9VKejveSswoA +HQBUlwbgsQfZxw9cZX08bVlX5O2ljelAU58VS6Bx9hoh49pwBiFYFIeFd3mqgnkC +AwEAAaNCMEAwHQYDVR0OBBYEFOLJQJ9NzuiaoXzPDj9lxSmIahlRMA8GA1UdEwEB +/wQFMAMBAf8wDgYDVR0PAQH/BAQDAgGGMA0GCSqGSIb3DQEBCwUAA4ICAQDRSVfg +p8xoWLoBDysZzY2wYUWsEe1jUGn4H3++Fo/9nesLqjJHdtJnJO29fDMylyrHBYZm +DRd9FBUb1Ov9H5r2XpdptxolpAqzkT9fNqyL7FeoPueBihhXOYV0GkLH6VsTX4/5 +COmSdI31R9KrO9b7eGZONn356ZLpBN79SWP8bfsUcZNnL0dKt7n/HipzcEYwv1ry +L3ml4Y0M2fmyYzeMN2WFcGpcWwlyua1jPLHd+PwyvzeG5LuOmCd+uh8W4XAR8gPf +JWIyJyYYMoSf/wA6E7qaTfRPuBRwIrHKK5DOKcFw9C+df/KQHtZa37dG/OaG+svg +IHZ6uqbL9XzeYqWxi+7egmaKTjowHz+Ay60nugxe19CxVsp3cbK1daFQqUBDF8Io +2c9Si1vIY9RCPqAzekYu9wogRlR+ak8x8YF+QnQ4ZXMn7sZ8uI7XpTrXmKGcjBBV +09tL7ECQ8s1uV9JiDnxXk7Gnbc2dg7sq5+W2O3FYrf3RRbxake5TFW/TRQl1brqQ +XR4EzzffHqhmsYzmIGrv/EhOdJhCrylvLmrH+33RZjEizIYAfmaDDEL0vTSSwxrq +T8p+ck0LcIymSLumoRT2+1hEmRSuqguTaaApJUqlyyvdimYHFngVV3Eb7PVHhPOe +MTd61X8kreS8/f3MboPoDKi3QWwH3b08hpcv0g== +-----END CERTIFICATE----- + +# Issuer: CN=TrustCor RootCert CA-1 O=TrustCor Systems S. de R.L. OU=TrustCor Certificate Authority +# Subject: CN=TrustCor RootCert CA-1 O=TrustCor Systems S. de R.L. OU=TrustCor Certificate Authority +# Label: "TrustCor RootCert CA-1" +# Serial: 15752444095811006489 +# MD5 Fingerprint: 6e:85:f1:dc:1a:00:d3:22:d5:b2:b2:ac:6b:37:05:45 +# SHA1 Fingerprint: ff:bd:cd:e7:82:c8:43:5e:3c:6f:26:86:5c:ca:a8:3a:45:5b:c3:0a +# SHA256 Fingerprint: d4:0e:9c:86:cd:8f:e4:68:c1:77:69:59:f4:9e:a7:74:fa:54:86:84:b6:c4:06:f3:90:92:61:f4:dc:e2:57:5c +-----BEGIN CERTIFICATE----- +MIIEMDCCAxigAwIBAgIJANqb7HHzA7AZMA0GCSqGSIb3DQEBCwUAMIGkMQswCQYD +VQQGEwJQQTEPMA0GA1UECAwGUGFuYW1hMRQwEgYDVQQHDAtQYW5hbWEgQ2l0eTEk +MCIGA1UECgwbVHJ1c3RDb3IgU3lzdGVtcyBTLiBkZSBSLkwuMScwJQYDVQQLDB5U +cnVzdENvciBDZXJ0aWZpY2F0ZSBBdXRob3JpdHkxHzAdBgNVBAMMFlRydXN0Q29y +IFJvb3RDZXJ0IENBLTEwHhcNMTYwMjA0MTIzMjE2WhcNMjkxMjMxMTcyMzE2WjCB +pDELMAkGA1UEBhMCUEExDzANBgNVBAgMBlBhbmFtYTEUMBIGA1UEBwwLUGFuYW1h +IENpdHkxJDAiBgNVBAoMG1RydXN0Q29yIFN5c3RlbXMgUy4gZGUgUi5MLjEnMCUG +A1UECwweVHJ1c3RDb3IgQ2VydGlmaWNhdGUgQXV0aG9yaXR5MR8wHQYDVQQDDBZU +cnVzdENvciBSb290Q2VydCBDQS0xMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIB +CgKCAQEAv463leLCJhJrMxnHQFgKq1mqjQCj/IDHUHuO1CAmujIS2CNUSSUQIpid +RtLByZ5OGy4sDjjzGiVoHKZaBeYei0i/mJZ0PmnK6bV4pQa81QBeCQryJ3pS/C3V +seq0iWEk8xoT26nPUu0MJLq5nux+AHT6k61sKZKuUbS701e/s/OojZz0JEsq1pme +9J7+wH5COucLlVPat2gOkEz7cD+PSiyU8ybdY2mplNgQTsVHCJCZGxdNuWxu72CV +EY4hgLW9oHPY0LJ3xEXqWib7ZnZ2+AYfYW0PVcWDtxBWcgYHpfOxGgMFZA6dWorW +hnAbJN7+KIor0Gqw/Hqi3LJ5DotlDwIDAQABo2MwYTAdBgNVHQ4EFgQU7mtJPHo/ +DeOxCbeKyKsZn3MzUOcwHwYDVR0jBBgwFoAU7mtJPHo/DeOxCbeKyKsZn3MzUOcw +DwYDVR0TAQH/BAUwAwEB/zAOBgNVHQ8BAf8EBAMCAYYwDQYJKoZIhvcNAQELBQAD +ggEBACUY1JGPE+6PHh0RU9otRCkZoB5rMZ5NDp6tPVxBb5UrJKF5mDo4Nvu7Zp5I +/5CQ7z3UuJu0h3U/IJvOcs+hVcFNZKIZBqEHMwwLKeXx6quj7LUKdJDHfXLy11yf +ke+Ri7fc7Waiz45mO7yfOgLgJ90WmMCV1Aqk5IGadZQ1nJBfiDcGrVmVCrDRZ9MZ +yonnMlo2HD6CqFqTvsbQZJG2z9m2GM/bftJlo6bEjhcxwft+dtvTheNYsnd6djts +L1Ac59v2Z3kf9YKVmgenFK+P3CghZwnS1k1aHBkcjndcw5QkPTJrS37UeJSDvjdN +zl/HHk484IkzlQsPpTLWPFp5LBk= +-----END CERTIFICATE----- + +# Issuer: CN=TrustCor RootCert CA-2 O=TrustCor Systems S. de R.L. OU=TrustCor Certificate Authority +# Subject: CN=TrustCor RootCert CA-2 O=TrustCor Systems S. de R.L. OU=TrustCor Certificate Authority +# Label: "TrustCor RootCert CA-2" +# Serial: 2711694510199101698 +# MD5 Fingerprint: a2:e1:f8:18:0b:ba:45:d5:c7:41:2a:bb:37:52:45:64 +# SHA1 Fingerprint: b8:be:6d:cb:56:f1:55:b9:63:d4:12:ca:4e:06:34:c7:94:b2:1c:c0 +# SHA256 Fingerprint: 07:53:e9:40:37:8c:1b:d5:e3:83:6e:39:5d:ae:a5:cb:83:9e:50:46:f1:bd:0e:ae:19:51:cf:10:fe:c7:c9:65 +-----BEGIN CERTIFICATE----- +MIIGLzCCBBegAwIBAgIIJaHfyjPLWQIwDQYJKoZIhvcNAQELBQAwgaQxCzAJBgNV +BAYTAlBBMQ8wDQYDVQQIDAZQYW5hbWExFDASBgNVBAcMC1BhbmFtYSBDaXR5MSQw +IgYDVQQKDBtUcnVzdENvciBTeXN0ZW1zIFMuIGRlIFIuTC4xJzAlBgNVBAsMHlRy +dXN0Q29yIENlcnRpZmljYXRlIEF1dGhvcml0eTEfMB0GA1UEAwwWVHJ1c3RDb3Ig +Um9vdENlcnQgQ0EtMjAeFw0xNjAyMDQxMjMyMjNaFw0zNDEyMzExNzI2MzlaMIGk +MQswCQYDVQQGEwJQQTEPMA0GA1UECAwGUGFuYW1hMRQwEgYDVQQHDAtQYW5hbWEg +Q2l0eTEkMCIGA1UECgwbVHJ1c3RDb3IgU3lzdGVtcyBTLiBkZSBSLkwuMScwJQYD +VQQLDB5UcnVzdENvciBDZXJ0aWZpY2F0ZSBBdXRob3JpdHkxHzAdBgNVBAMMFlRy +dXN0Q29yIFJvb3RDZXJ0IENBLTIwggIiMA0GCSqGSIb3DQEBAQUAA4ICDwAwggIK +AoICAQCnIG7CKqJiJJWQdsg4foDSq8GbZQWU9MEKENUCrO2fk8eHyLAnK0IMPQo+ +QVqedd2NyuCb7GgypGmSaIwLgQ5WoD4a3SwlFIIvl9NkRvRUqdw6VC0xK5mC8tkq +1+9xALgxpL56JAfDQiDyitSSBBtlVkxs1Pu2YVpHI7TYabS3OtB0PAx1oYxOdqHp +2yqlO/rOsP9+aij9JxzIsekp8VduZLTQwRVtDr4uDkbIXvRR/u8OYzo7cbrPb1nK +DOObXUm4TOJXsZiKQlecdu/vvdFoqNL0Cbt3Nb4lggjEFixEIFapRBF37120Hape +az6LMvYHL1cEksr1/p3C6eizjkxLAjHZ5DxIgif3GIJ2SDpxsROhOdUuxTTCHWKF +3wP+TfSvPd9cW436cOGlfifHhi5qjxLGhF5DUVCcGZt45vz27Ud+ez1m7xMTiF88 +oWP7+ayHNZ/zgp6kPwqcMWmLmaSISo5uZk3vFsQPeSghYA2FFn3XVDjxklb9tTNM +g9zXEJ9L/cb4Qr26fHMC4P99zVvh1Kxhe1fVSntb1IVYJ12/+CtgrKAmrhQhJ8Z3 +mjOAPF5GP/fDsaOGM8boXg25NSyqRsGFAnWAoOsk+xWq5Gd/bnc/9ASKL3x74xdh +8N0JqSDIvgmk0H5Ew7IwSjiqqewYmgeCK9u4nBit2uBGF6zPXQIDAQABo2MwYTAd +BgNVHQ4EFgQU2f4hQG6UnrybPZx9mCAZ5YwwYrIwHwYDVR0jBBgwFoAU2f4hQG6U +nrybPZx9mCAZ5YwwYrIwDwYDVR0TAQH/BAUwAwEB/zAOBgNVHQ8BAf8EBAMCAYYw +DQYJKoZIhvcNAQELBQADggIBAJ5Fngw7tu/hOsh80QA9z+LqBrWyOrsGS2h60COX +dKcs8AjYeVrXWoSK2BKaG9l9XE1wxaX5q+WjiYndAfrs3fnpkpfbsEZC89NiqpX+ +MWcUaViQCqoL7jcjx1BRtPV+nuN79+TMQjItSQzL/0kMmx40/W5ulop5A7Zv2wnL +/V9lFDfhOPXzYRZY5LVtDQsEGz9QLX+zx3oaFoBg+Iof6Rsqxvm6ARppv9JYx1RX +CI/hOWB3S6xZhBqI8d3LT3jX5+EzLfzuQfogsL7L9ziUwOHQhQ+77Sxzq+3+knYa +ZH9bDTMJBzN7Bj8RpFxwPIXAz+OQqIN3+tvmxYxoZxBnpVIt8MSZj3+/0WvitUfW +2dCFmU2Umw9Lje4AWkcdEQOsQRivh7dvDDqPys/cA8GiCcjl/YBeyGBCARsaU1q7 +N6a3vLqE6R5sGtRk2tRD/pOLS/IseRYQ1JMLiI+h2IYURpFHmygk71dSTlxCnKr3 +Sewn6EAes6aJInKc9Q0ztFijMDvd1GpUk74aTfOTlPf8hAs/hCBcNANExdqtvArB +As8e5ZTZ845b2EzwnexhF7sUMlQMAimTHpKG9n/v55IFDlndmQguLvqcAFLTxWYp +5KeXRKQOKIETNcX2b2TmQcTVL8w0RSXPQQCWPUouwpaYT05KnJe32x+SMsj/D1Fu +1uwJ +-----END CERTIFICATE----- + +# Issuer: CN=TrustCor ECA-1 O=TrustCor Systems S. de R.L. OU=TrustCor Certificate Authority +# Subject: CN=TrustCor ECA-1 O=TrustCor Systems S. de R.L. OU=TrustCor Certificate Authority +# Label: "TrustCor ECA-1" +# Serial: 9548242946988625984 +# MD5 Fingerprint: 27:92:23:1d:0a:f5:40:7c:e9:e6:6b:9d:d8:f5:e7:6c +# SHA1 Fingerprint: 58:d1:df:95:95:67:6b:63:c0:f0:5b:1c:17:4d:8b:84:0b:c8:78:bd +# SHA256 Fingerprint: 5a:88:5d:b1:9c:01:d9:12:c5:75:93:88:93:8c:af:bb:df:03:1a:b2:d4:8e:91:ee:15:58:9b:42:97:1d:03:9c +-----BEGIN CERTIFICATE----- +MIIEIDCCAwigAwIBAgIJAISCLF8cYtBAMA0GCSqGSIb3DQEBCwUAMIGcMQswCQYD +VQQGEwJQQTEPMA0GA1UECAwGUGFuYW1hMRQwEgYDVQQHDAtQYW5hbWEgQ2l0eTEk +MCIGA1UECgwbVHJ1c3RDb3IgU3lzdGVtcyBTLiBkZSBSLkwuMScwJQYDVQQLDB5U +cnVzdENvciBDZXJ0aWZpY2F0ZSBBdXRob3JpdHkxFzAVBgNVBAMMDlRydXN0Q29y +IEVDQS0xMB4XDTE2MDIwNDEyMzIzM1oXDTI5MTIzMTE3MjgwN1owgZwxCzAJBgNV +BAYTAlBBMQ8wDQYDVQQIDAZQYW5hbWExFDASBgNVBAcMC1BhbmFtYSBDaXR5MSQw +IgYDVQQKDBtUcnVzdENvciBTeXN0ZW1zIFMuIGRlIFIuTC4xJzAlBgNVBAsMHlRy +dXN0Q29yIENlcnRpZmljYXRlIEF1dGhvcml0eTEXMBUGA1UEAwwOVHJ1c3RDb3Ig +RUNBLTEwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQDPj+ARtZ+odnbb +3w9U73NjKYKtR8aja+3+XzP4Q1HpGjORMRegdMTUpwHmspI+ap3tDvl0mEDTPwOA +BoJA6LHip1GnHYMma6ve+heRK9jGrB6xnhkB1Zem6g23xFUfJ3zSCNV2HykVh0A5 +3ThFEXXQmqc04L/NyFIduUd+Dbi7xgz2c1cWWn5DkR9VOsZtRASqnKmcp0yJF4Ou +owReUoCLHhIlERnXDH19MURB6tuvsBzvgdAsxZohmz3tQjtQJvLsznFhBmIhVE5/ +wZ0+fyCMgMsq2JdiyIMzkX2woloPV+g7zPIlstR8L+xNxqE6FXrntl019fZISjZF +ZtS6mFjBAgMBAAGjYzBhMB0GA1UdDgQWBBREnkj1zG1I1KBLf/5ZJC+Dl5mahjAf +BgNVHSMEGDAWgBREnkj1zG1I1KBLf/5ZJC+Dl5mahjAPBgNVHRMBAf8EBTADAQH/ +MA4GA1UdDwEB/wQEAwIBhjANBgkqhkiG9w0BAQsFAAOCAQEABT41XBVwm8nHc2Fv +civUwo/yQ10CzsSUuZQRg2dd4mdsdXa/uwyqNsatR5Nj3B5+1t4u/ukZMjgDfxT2 +AHMsWbEhBuH7rBiVDKP/mZb3Kyeb1STMHd3BOuCYRLDE5D53sXOpZCz2HAF8P11F +hcCF5yWPldwX8zyfGm6wyuMdKulMY/okYWLW2n62HGz1Ah3UKt1VkOsqEUc8Ll50 +soIipX1TH0XsJ5F95yIW6MBoNtjG8U+ARDL54dHRHareqKucBK+tIA5kmE2la8BI +WJZpTdwHjFGTot+fDz2LYLSCjaoITmJF4PkL0uDgPFveXHEnJcLmA4GLEFPjx1Wi +tJ/X5g== +-----END CERTIFICATE----- + +# Issuer: CN=SSL.com Root Certification Authority RSA O=SSL Corporation +# Subject: CN=SSL.com Root Certification Authority RSA O=SSL Corporation +# Label: "SSL.com Root Certification Authority RSA" +# Serial: 8875640296558310041 +# MD5 Fingerprint: 86:69:12:c0:70:f1:ec:ac:ac:c2:d5:bc:a5:5b:a1:29 +# SHA1 Fingerprint: b7:ab:33:08:d1:ea:44:77:ba:14:80:12:5a:6f:bd:a9:36:49:0c:bb +# SHA256 Fingerprint: 85:66:6a:56:2e:e0:be:5c:e9:25:c1:d8:89:0a:6f:76:a8:7e:c1:6d:4d:7d:5f:29:ea:74:19:cf:20:12:3b:69 +-----BEGIN CERTIFICATE----- +MIIF3TCCA8WgAwIBAgIIeyyb0xaAMpkwDQYJKoZIhvcNAQELBQAwfDELMAkGA1UE +BhMCVVMxDjAMBgNVBAgMBVRleGFzMRAwDgYDVQQHDAdIb3VzdG9uMRgwFgYDVQQK +DA9TU0wgQ29ycG9yYXRpb24xMTAvBgNVBAMMKFNTTC5jb20gUm9vdCBDZXJ0aWZp +Y2F0aW9uIEF1dGhvcml0eSBSU0EwHhcNMTYwMjEyMTczOTM5WhcNNDEwMjEyMTcz +OTM5WjB8MQswCQYDVQQGEwJVUzEOMAwGA1UECAwFVGV4YXMxEDAOBgNVBAcMB0hv +dXN0b24xGDAWBgNVBAoMD1NTTCBDb3Jwb3JhdGlvbjExMC8GA1UEAwwoU1NMLmNv +bSBSb290IENlcnRpZmljYXRpb24gQXV0aG9yaXR5IFJTQTCCAiIwDQYJKoZIhvcN +AQEBBQADggIPADCCAgoCggIBAPkP3aMrfcvQKv7sZ4Wm5y4bunfh4/WvpOz6Sl2R +xFdHaxh3a3by/ZPkPQ/CFp4LZsNWlJ4Xg4XOVu/yFv0AYvUiCVToZRdOQbngT0aX +qhvIuG5iXmmxX9sqAn78bMrzQdjt0Oj8P2FI7bADFB0QDksZ4LtO7IZl/zbzXmcC +C52GVWH9ejjt/uIZALdvoVBidXQ8oPrIJZK0bnoix/geoeOy3ZExqysdBP+lSgQ3 +6YWkMyv94tZVNHwZpEpox7Ko07fKoZOI68GXvIz5HdkihCR0xwQ9aqkpk8zruFvh +/l8lqjRYyMEjVJ0bmBHDOJx+PYZspQ9AhnwC9FwCTyjLrnGfDzrIM/4RJTXq/LrF +YD3ZfBjVsqnTdXgDciLKOsMf7yzlLqn6niy2UUb9rwPW6mBo6oUWNmuF6R7As93E +JNyAKoFBbZQ+yODJgUEAnl6/f8UImKIYLEJAs/lvOCdLToD0PYFH4Ih86hzOtXVc +US4cK38acijnALXRdMbX5J+tB5O2UzU1/Dfkw/ZdFr4hc96SCvigY2q8lpJqPvi8 +ZVWb3vUNiSYE/CUapiVpy8JtynziWV+XrOvvLsi81xtZPCvM8hnIk2snYxnP/Okm ++Mpxm3+T/jRnhE6Z6/yzeAkzcLpmpnbtG3PrGqUNxCITIJRWCk4sbE6x/c+cCbqi +M+2HAgMBAAGjYzBhMB0GA1UdDgQWBBTdBAkHovV6fVJTEpKV7jiAJQ2mWTAPBgNV +HRMBAf8EBTADAQH/MB8GA1UdIwQYMBaAFN0ECQei9Xp9UlMSkpXuOIAlDaZZMA4G +A1UdDwEB/wQEAwIBhjANBgkqhkiG9w0BAQsFAAOCAgEAIBgRlCn7Jp0cHh5wYfGV +cpNxJK1ok1iOMq8bs3AD/CUrdIWQPXhq9LmLpZc7tRiRux6n+UBbkflVma8eEdBc +Hadm47GUBwwyOabqG7B52B2ccETjit3E+ZUfijhDPwGFpUenPUayvOUiaPd7nNgs +PgohyC0zrL/FgZkxdMF1ccW+sfAjRfSda/wZY52jvATGGAslu1OJD7OAUN5F7kR/ +q5R4ZJjT9ijdh9hwZXT7DrkT66cPYakylszeu+1jTBi7qUD3oFRuIIhxdRjqerQ0 +cuAjJ3dctpDqhiVAq+8zD8ufgr6iIPv2tS0a5sKFsXQP+8hlAqRSAUfdSSLBv9jr +a6x+3uxjMxW3IwiPxg+NQVrdjsW5j+VFP3jbutIbQLH+cU0/4IGiul607BXgk90I +H37hVZkLId6Tngr75qNJvTYw/ud3sqB1l7UtgYgXZSD32pAAn8lSzDLKNXz1PQ/Y +K9f1JmzJBjSWFupwWRoyeXkLtoh/D1JIPb9s2KJELtFOt3JY04kTlf5Eq/jXixtu +nLwsoFvVagCvXzfh1foQC5ichucmj87w7G6KVwuA406ywKBjYZC6VWg3dGq2ktuf +oYYitmUnDuy2n0Jg5GfCtdpBC8TTi2EbvPofkSvXRAdeuims2cXp71NIWuuA8ShY +Ic2wBlX7Jz9TkHCpBB5XJ7k= +-----END CERTIFICATE----- + +# Issuer: CN=SSL.com Root Certification Authority ECC O=SSL Corporation +# Subject: CN=SSL.com Root Certification Authority ECC O=SSL Corporation +# Label: "SSL.com Root Certification Authority ECC" +# Serial: 8495723813297216424 +# MD5 Fingerprint: 2e:da:e4:39:7f:9c:8f:37:d1:70:9f:26:17:51:3a:8e +# SHA1 Fingerprint: c3:19:7c:39:24:e6:54:af:1b:c4:ab:20:95:7a:e2:c3:0e:13:02:6a +# SHA256 Fingerprint: 34:17:bb:06:cc:60:07:da:1b:96:1c:92:0b:8a:b4:ce:3f:ad:82:0e:4a:a3:0b:9a:cb:c4:a7:4e:bd:ce:bc:65 +-----BEGIN CERTIFICATE----- +MIICjTCCAhSgAwIBAgIIdebfy8FoW6gwCgYIKoZIzj0EAwIwfDELMAkGA1UEBhMC +VVMxDjAMBgNVBAgMBVRleGFzMRAwDgYDVQQHDAdIb3VzdG9uMRgwFgYDVQQKDA9T +U0wgQ29ycG9yYXRpb24xMTAvBgNVBAMMKFNTTC5jb20gUm9vdCBDZXJ0aWZpY2F0 +aW9uIEF1dGhvcml0eSBFQ0MwHhcNMTYwMjEyMTgxNDAzWhcNNDEwMjEyMTgxNDAz +WjB8MQswCQYDVQQGEwJVUzEOMAwGA1UECAwFVGV4YXMxEDAOBgNVBAcMB0hvdXN0 +b24xGDAWBgNVBAoMD1NTTCBDb3Jwb3JhdGlvbjExMC8GA1UEAwwoU1NMLmNvbSBS +b290IENlcnRpZmljYXRpb24gQXV0aG9yaXR5IEVDQzB2MBAGByqGSM49AgEGBSuB +BAAiA2IABEVuqVDEpiM2nl8ojRfLliJkP9x6jh3MCLOicSS6jkm5BBtHllirLZXI +7Z4INcgn64mMU1jrYor+8FsPazFSY0E7ic3s7LaNGdM0B9y7xgZ/wkWV7Mt/qCPg +CemB+vNH06NjMGEwHQYDVR0OBBYEFILRhXMw5zUE044CkvvlpNHEIejNMA8GA1Ud +EwEB/wQFMAMBAf8wHwYDVR0jBBgwFoAUgtGFczDnNQTTjgKS++Wk0cQh6M0wDgYD +VR0PAQH/BAQDAgGGMAoGCCqGSM49BAMCA2cAMGQCMG/n61kRpGDPYbCWe+0F+S8T +kdzt5fxQaxFGRrMcIQBiu77D5+jNB5n5DQtdcj7EqgIwH7y6C+IwJPt8bYBVCpk+ +gA0z5Wajs6O7pdWLjwkspl1+4vAHCGht0nxpbl/f5Wpl +-----END CERTIFICATE----- + +# Issuer: CN=SSL.com EV Root Certification Authority RSA R2 O=SSL Corporation +# Subject: CN=SSL.com EV Root Certification Authority RSA R2 O=SSL Corporation +# Label: "SSL.com EV Root Certification Authority RSA R2" +# Serial: 6248227494352943350 +# MD5 Fingerprint: e1:1e:31:58:1a:ae:54:53:02:f6:17:6a:11:7b:4d:95 +# SHA1 Fingerprint: 74:3a:f0:52:9b:d0:32:a0:f4:4a:83:cd:d4:ba:a9:7b:7c:2e:c4:9a +# SHA256 Fingerprint: 2e:7b:f1:6c:c2:24:85:a7:bb:e2:aa:86:96:75:07:61:b0:ae:39:be:3b:2f:e9:d0:cc:6d:4e:f7:34:91:42:5c +-----BEGIN CERTIFICATE----- +MIIF6zCCA9OgAwIBAgIIVrYpzTS8ePYwDQYJKoZIhvcNAQELBQAwgYIxCzAJBgNV +BAYTAlVTMQ4wDAYDVQQIDAVUZXhhczEQMA4GA1UEBwwHSG91c3RvbjEYMBYGA1UE +CgwPU1NMIENvcnBvcmF0aW9uMTcwNQYDVQQDDC5TU0wuY29tIEVWIFJvb3QgQ2Vy +dGlmaWNhdGlvbiBBdXRob3JpdHkgUlNBIFIyMB4XDTE3MDUzMTE4MTQzN1oXDTQy +MDUzMDE4MTQzN1owgYIxCzAJBgNVBAYTAlVTMQ4wDAYDVQQIDAVUZXhhczEQMA4G +A1UEBwwHSG91c3RvbjEYMBYGA1UECgwPU1NMIENvcnBvcmF0aW9uMTcwNQYDVQQD +DC5TU0wuY29tIEVWIFJvb3QgQ2VydGlmaWNhdGlvbiBBdXRob3JpdHkgUlNBIFIy +MIICIjANBgkqhkiG9w0BAQEFAAOCAg8AMIICCgKCAgEAjzZlQOHWTcDXtOlG2mvq +M0fNTPl9fb69LT3w23jhhqXZuglXaO1XPqDQCEGD5yhBJB/jchXQARr7XnAjssuf +OePPxU7Gkm0mxnu7s9onnQqG6YE3Bf7wcXHswxzpY6IXFJ3vG2fThVUCAtZJycxa +4bH3bzKfydQ7iEGonL3Lq9ttewkfokxykNorCPzPPFTOZw+oz12WGQvE43LrrdF9 +HSfvkusQv1vrO6/PgN3B0pYEW3p+pKk8OHakYo6gOV7qd89dAFmPZiw+B6KjBSYR +aZfqhbcPlgtLyEDhULouisv3D5oi53+aNxPN8k0TayHRwMwi8qFG9kRpnMphNQcA +b9ZhCBHqurj26bNg5U257J8UZslXWNvNh2n4ioYSA0e/ZhN2rHd9NCSFg83XqpyQ +Gp8hLH94t2S42Oim9HizVcuE0jLEeK6jj2HdzghTreyI/BXkmg3mnxp3zkyPuBQV +PWKchjgGAGYS5Fl2WlPAApiiECtoRHuOec4zSnaqW4EWG7WK2NAAe15itAnWhmMO +pgWVSbooi4iTsjQc2KRVbrcc0N6ZVTsj9CLg+SlmJuwgUHfbSguPvuUCYHBBXtSu +UDkiFCbLsjtzdFVHB3mBOagwE0TlBIqulhMlQg+5U8Sb/M3kHN48+qvWBkofZ6aY +MBzdLNvcGJVXZsb/XItW9XcCAwEAAaNjMGEwDwYDVR0TAQH/BAUwAwEB/zAfBgNV +HSMEGDAWgBT5YLvU49U09rj1BoAlp3PbRmmonjAdBgNVHQ4EFgQU+WC71OPVNPa4 +9QaAJadz20ZpqJ4wDgYDVR0PAQH/BAQDAgGGMA0GCSqGSIb3DQEBCwUAA4ICAQBW +s47LCp1Jjr+kxJG7ZhcFUZh1++VQLHqe8RT6q9OKPv+RKY9ji9i0qVQBDb6Thi/5 +Sm3HXvVX+cpVHBK+Rw82xd9qt9t1wkclf7nxY/hoLVUE0fKNsKTPvDxeH3jnpaAg +cLAExbf3cqfeIg29MyVGjGSSJuM+LmOW2puMPfgYCdcDzH2GguDKBAdRUNf/ktUM +79qGn5nX67evaOI5JpS6aLe/g9Pqemc9YmeuJeVy6OLk7K4S9ksrPJ/psEDzOFSz +/bdoyNrGj1E8svuR3Bznm53htw1yj+KkxKl4+esUrMZDBcJlOSgYAsOCsp0FvmXt +ll9ldDz7CTUue5wT/RsPXcdtgTpWD8w74a8CLyKsRspGPKAcTNZEtF4uXBVmCeEm +Kf7GUmG6sXP/wwyc5WxqlD8UykAWlYTzWamsX0xhk23RO8yilQwipmdnRC652dKK +QbNmC1r7fSOl8hqw/96bg5Qu0T/fkreRrwU7ZcegbLHNYhLDkBvjJc40vG93drEQ +w/cFGsDWr3RiSBd3kmmQYRzelYB0VI8YHMPzA9C/pEN1hlMYegouCRw2n5H9gooi +S9EOUCXdywMMF8mDAAhONU2Ki+3wApRmLER/y5UnlhetCTCstnEXbosX9hwJ1C07 +mKVx01QT2WDz9UtmT/rx7iASjbSsV7FFY6GsdqnC+w== +-----END CERTIFICATE----- + +# Issuer: CN=SSL.com EV Root Certification Authority ECC O=SSL Corporation +# Subject: CN=SSL.com EV Root Certification Authority ECC O=SSL Corporation +# Label: "SSL.com EV Root Certification Authority ECC" +# Serial: 3182246526754555285 +# MD5 Fingerprint: 59:53:22:65:83:42:01:54:c0:ce:42:b9:5a:7c:f2:90 +# SHA1 Fingerprint: 4c:dd:51:a3:d1:f5:20:32:14:b0:c6:c5:32:23:03:91:c7:46:42:6d +# SHA256 Fingerprint: 22:a2:c1:f7:bd:ed:70:4c:c1:e7:01:b5:f4:08:c3:10:88:0f:e9:56:b5:de:2a:4a:44:f9:9c:87:3a:25:a7:c8 +-----BEGIN CERTIFICATE----- +MIIClDCCAhqgAwIBAgIILCmcWxbtBZUwCgYIKoZIzj0EAwIwfzELMAkGA1UEBhMC +VVMxDjAMBgNVBAgMBVRleGFzMRAwDgYDVQQHDAdIb3VzdG9uMRgwFgYDVQQKDA9T +U0wgQ29ycG9yYXRpb24xNDAyBgNVBAMMK1NTTC5jb20gRVYgUm9vdCBDZXJ0aWZp +Y2F0aW9uIEF1dGhvcml0eSBFQ0MwHhcNMTYwMjEyMTgxNTIzWhcNNDEwMjEyMTgx +NTIzWjB/MQswCQYDVQQGEwJVUzEOMAwGA1UECAwFVGV4YXMxEDAOBgNVBAcMB0hv +dXN0b24xGDAWBgNVBAoMD1NTTCBDb3Jwb3JhdGlvbjE0MDIGA1UEAwwrU1NMLmNv +bSBFViBSb290IENlcnRpZmljYXRpb24gQXV0aG9yaXR5IEVDQzB2MBAGByqGSM49 +AgEGBSuBBAAiA2IABKoSR5CYG/vvw0AHgyBO8TCCogbR8pKGYfL2IWjKAMTH6kMA +VIbc/R/fALhBYlzccBYy3h+Z1MzFB8gIH2EWB1E9fVwHU+M1OIzfzZ/ZLg1Kthku +WnBaBu2+8KGwytAJKaNjMGEwHQYDVR0OBBYEFFvKXuXe0oGqzagtZFG22XKbl+ZP +MA8GA1UdEwEB/wQFMAMBAf8wHwYDVR0jBBgwFoAUW8pe5d7SgarNqC1kUbbZcpuX +5k8wDgYDVR0PAQH/BAQDAgGGMAoGCCqGSM49BAMCA2gAMGUCMQCK5kCJN+vp1RPZ +ytRrJPOwPYdGWBrssd9v+1a6cGvHOMzosYxPD/fxZ3YOg9AeUY8CMD32IygmTMZg +h5Mmm7I1HrrW9zzRHM76JTymGoEVW/MSD2zuZYrJh6j5B+BimoxcSg== +-----END CERTIFICATE----- + +# Issuer: CN=GlobalSign O=GlobalSign OU=GlobalSign Root CA - R6 +# Subject: CN=GlobalSign O=GlobalSign OU=GlobalSign Root CA - R6 +# Label: "GlobalSign Root CA - R6" +# Serial: 1417766617973444989252670301619537 +# MD5 Fingerprint: 4f:dd:07:e4:d4:22:64:39:1e:0c:37:42:ea:d1:c6:ae +# SHA1 Fingerprint: 80:94:64:0e:b5:a7:a1:ca:11:9c:1f:dd:d5:9f:81:02:63:a7:fb:d1 +# SHA256 Fingerprint: 2c:ab:ea:fe:37:d0:6c:a2:2a:ba:73:91:c0:03:3d:25:98:29:52:c4:53:64:73:49:76:3a:3a:b5:ad:6c:cf:69 +-----BEGIN CERTIFICATE----- +MIIFgzCCA2ugAwIBAgIORea7A4Mzw4VlSOb/RVEwDQYJKoZIhvcNAQEMBQAwTDEg +MB4GA1UECxMXR2xvYmFsU2lnbiBSb290IENBIC0gUjYxEzARBgNVBAoTCkdsb2Jh +bFNpZ24xEzARBgNVBAMTCkdsb2JhbFNpZ24wHhcNMTQxMjEwMDAwMDAwWhcNMzQx +MjEwMDAwMDAwWjBMMSAwHgYDVQQLExdHbG9iYWxTaWduIFJvb3QgQ0EgLSBSNjET +MBEGA1UEChMKR2xvYmFsU2lnbjETMBEGA1UEAxMKR2xvYmFsU2lnbjCCAiIwDQYJ +KoZIhvcNAQEBBQADggIPADCCAgoCggIBAJUH6HPKZvnsFMp7PPcNCPG0RQssgrRI +xutbPK6DuEGSMxSkb3/pKszGsIhrxbaJ0cay/xTOURQh7ErdG1rG1ofuTToVBu1k +ZguSgMpE3nOUTvOniX9PeGMIyBJQbUJmL025eShNUhqKGoC3GYEOfsSKvGRMIRxD +aNc9PIrFsmbVkJq3MQbFvuJtMgamHvm566qjuL++gmNQ0PAYid/kD3n16qIfKtJw +LnvnvJO7bVPiSHyMEAc4/2ayd2F+4OqMPKq0pPbzlUoSB239jLKJz9CgYXfIWHSw +1CM69106yqLbnQneXUQtkPGBzVeS+n68UARjNN9rkxi+azayOeSsJDa38O+2HBNX +k7besvjihbdzorg1qkXy4J02oW9UivFyVm4uiMVRQkQVlO6jxTiWm05OWgtH8wY2 +SXcwvHE35absIQh1/OZhFj931dmRl4QKbNQCTXTAFO39OfuD8l4UoQSwC+n+7o/h +bguyCLNhZglqsQY6ZZZZwPA1/cnaKI0aEYdwgQqomnUdnjqGBQCe24DWJfncBZ4n +WUx2OVvq+aWh2IMP0f/fMBH5hc8zSPXKbWQULHpYT9NLCEnFlWQaYw55PfWzjMpY +rZxCRXluDocZXFSxZba/jJvcE+kNb7gu3GduyYsRtYQUigAZcIN5kZeR1Bonvzce +MgfYFGM8KEyvAgMBAAGjYzBhMA4GA1UdDwEB/wQEAwIBBjAPBgNVHRMBAf8EBTAD +AQH/MB0GA1UdDgQWBBSubAWjkxPioufi1xzWx/B/yGdToDAfBgNVHSMEGDAWgBSu +bAWjkxPioufi1xzWx/B/yGdToDANBgkqhkiG9w0BAQwFAAOCAgEAgyXt6NH9lVLN +nsAEoJFp5lzQhN7craJP6Ed41mWYqVuoPId8AorRbrcWc+ZfwFSY1XS+wc3iEZGt +Ixg93eFyRJa0lV7Ae46ZeBZDE1ZXs6KzO7V33EByrKPrmzU+sQghoefEQzd5Mr61 +55wsTLxDKZmOMNOsIeDjHfrYBzN2VAAiKrlNIC5waNrlU/yDXNOd8v9EDERm8tLj +vUYAGm0CuiVdjaExUd1URhxN25mW7xocBFymFe944Hn+Xds+qkxV/ZoVqW/hpvvf +cDDpw+5CRu3CkwWJ+n1jez/QcYF8AOiYrg54NMMl+68KnyBr3TsTjxKM4kEaSHpz +oHdpx7Zcf4LIHv5YGygrqGytXm3ABdJ7t+uA/iU3/gKbaKxCXcPu9czc8FB10jZp +nOZ7BN9uBmm23goJSFmH63sUYHpkqmlD75HHTOwY3WzvUy2MmeFe8nI+z1TIvWfs +pA9MRf/TuTAjB0yPEL+GltmZWrSZVxykzLsViVO6LAUP5MSeGbEYNNVMnbrt9x+v +JJUEeKgDu+6B5dpffItKoZB0JaezPkvILFa9x8jvOOJckvB595yEunQtYQEgfn7R +8k8HWV+LLUNS60YMlOH1Zkd5d9VUWx+tJDfLRVpOoERIyNiwmcUVhAn21klJwGW4 +5hpxbqCo8YLoRT5s1gLXCmeDBVrJpBA= +-----END CERTIFICATE----- + +# Issuer: CN=OISTE WISeKey Global Root GC CA O=WISeKey OU=OISTE Foundation Endorsed +# Subject: CN=OISTE WISeKey Global Root GC CA O=WISeKey OU=OISTE Foundation Endorsed +# Label: "OISTE WISeKey Global Root GC CA" +# Serial: 44084345621038548146064804565436152554 +# MD5 Fingerprint: a9:d6:b9:2d:2f:93:64:f8:a5:69:ca:91:e9:68:07:23 +# SHA1 Fingerprint: e0:11:84:5e:34:de:be:88:81:b9:9c:f6:16:26:d1:96:1f:c3:b9:31 +# SHA256 Fingerprint: 85:60:f9:1c:36:24:da:ba:95:70:b5:fe:a0:db:e3:6f:f1:1a:83:23:be:94:86:85:4f:b3:f3:4a:55:71:19:8d +-----BEGIN CERTIFICATE----- +MIICaTCCAe+gAwIBAgIQISpWDK7aDKtARb8roi066jAKBggqhkjOPQQDAzBtMQsw +CQYDVQQGEwJDSDEQMA4GA1UEChMHV0lTZUtleTEiMCAGA1UECxMZT0lTVEUgRm91 +bmRhdGlvbiBFbmRvcnNlZDEoMCYGA1UEAxMfT0lTVEUgV0lTZUtleSBHbG9iYWwg +Um9vdCBHQyBDQTAeFw0xNzA1MDkwOTQ4MzRaFw00MjA1MDkwOTU4MzNaMG0xCzAJ +BgNVBAYTAkNIMRAwDgYDVQQKEwdXSVNlS2V5MSIwIAYDVQQLExlPSVNURSBGb3Vu +ZGF0aW9uIEVuZG9yc2VkMSgwJgYDVQQDEx9PSVNURSBXSVNlS2V5IEdsb2JhbCBS +b290IEdDIENBMHYwEAYHKoZIzj0CAQYFK4EEACIDYgAETOlQwMYPchi82PG6s4ni +eUqjFqdrVCTbUf/q9Akkwwsin8tqJ4KBDdLArzHkdIJuyiXZjHWd8dvQmqJLIX4W +p2OQ0jnUsYd4XxiWD1AbNTcPasbc2RNNpI6QN+a9WzGRo1QwUjAOBgNVHQ8BAf8E +BAMCAQYwDwYDVR0TAQH/BAUwAwEB/zAdBgNVHQ4EFgQUSIcUrOPDnpBgOtfKie7T +rYy0UGYwEAYJKwYBBAGCNxUBBAMCAQAwCgYIKoZIzj0EAwMDaAAwZQIwJsdpW9zV +57LnyAyMjMPdeYwbY9XJUpROTYJKcx6ygISpJcBMWm1JKWB4E+J+SOtkAjEA2zQg +Mgj/mkkCtojeFK9dbJlxjRo/i9fgojaGHAeCOnZT/cKi7e97sIBPWA9LUzm9 +-----END CERTIFICATE----- + +# Issuer: CN=GTS Root R1 O=Google Trust Services LLC +# Subject: CN=GTS Root R1 O=Google Trust Services LLC +# Label: "GTS Root R1" +# Serial: 146587175971765017618439757810265552097 +# MD5 Fingerprint: 82:1a:ef:d4:d2:4a:f2:9f:e2:3d:97:06:14:70:72:85 +# SHA1 Fingerprint: e1:c9:50:e6:ef:22:f8:4c:56:45:72:8b:92:20:60:d7:d5:a7:a3:e8 +# SHA256 Fingerprint: 2a:57:54:71:e3:13:40:bc:21:58:1c:bd:2c:f1:3e:15:84:63:20:3e:ce:94:bc:f9:d3:cc:19:6b:f0:9a:54:72 +-----BEGIN CERTIFICATE----- +MIIFWjCCA0KgAwIBAgIQbkepxUtHDA3sM9CJuRz04TANBgkqhkiG9w0BAQwFADBH +MQswCQYDVQQGEwJVUzEiMCAGA1UEChMZR29vZ2xlIFRydXN0IFNlcnZpY2VzIExM +QzEUMBIGA1UEAxMLR1RTIFJvb3QgUjEwHhcNMTYwNjIyMDAwMDAwWhcNMzYwNjIy +MDAwMDAwWjBHMQswCQYDVQQGEwJVUzEiMCAGA1UEChMZR29vZ2xlIFRydXN0IFNl +cnZpY2VzIExMQzEUMBIGA1UEAxMLR1RTIFJvb3QgUjEwggIiMA0GCSqGSIb3DQEB +AQUAA4ICDwAwggIKAoICAQC2EQKLHuOhd5s73L+UPreVp0A8of2C+X0yBoJx9vaM +f/vo27xqLpeXo4xL+Sv2sfnOhB2x+cWX3u+58qPpvBKJXqeqUqv4IyfLpLGcY9vX +mX7wCl7raKb0xlpHDU0QM+NOsROjyBhsS+z8CZDfnWQpJSMHobTSPS5g4M/SCYe7 +zUjwTcLCeoiKu7rPWRnWr4+wB7CeMfGCwcDfLqZtbBkOtdh+JhpFAz2weaSUKK0P +fyblqAj+lug8aJRT7oM6iCsVlgmy4HqMLnXWnOunVmSPlk9orj2XwoSPwLxAwAtc +vfaHszVsrBhQf4TgTM2S0yDpM7xSma8ytSmzJSq0SPly4cpk9+aCEI3oncKKiPo4 +Zor8Y/kB+Xj9e1x3+naH+uzfsQ55lVe0vSbv1gHR6xYKu44LtcXFilWr06zqkUsp +zBmkMiVOKvFlRNACzqrOSbTqn3yDsEB750Orp2yjj32JgfpMpf/VjsPOS+C12LOO +Rc92wO1AK/1TD7Cn1TsNsYqiA94xrcx36m97PtbfkSIS5r762DL8EGMUUXLeXdYW +k70paDPvOmbsB4om3xPXV2V4J95eSRQAogB/mqghtqmxlbCluQ0WEdrHbEg8QOB+ +DVrNVjzRlwW5y0vtOUucxD/SVRNuJLDWcfr0wbrM7Rv1/oFB2ACYPTrIrnqYNxgF +lQIDAQABo0IwQDAOBgNVHQ8BAf8EBAMCAQYwDwYDVR0TAQH/BAUwAwEB/zAdBgNV +HQ4EFgQU5K8rJnEaK0gnhS9SZizv8IkTcT4wDQYJKoZIhvcNAQEMBQADggIBADiW +Cu49tJYeX++dnAsznyvgyv3SjgofQXSlfKqE1OXyHuY3UjKcC9FhHb8owbZEKTV1 +d5iyfNm9dKyKaOOpMQkpAWBz40d8U6iQSifvS9efk+eCNs6aaAyC58/UEBZvXw6Z +XPYfcX3v73svfuo21pdwCxXu11xWajOl40k4DLh9+42FpLFZXvRq4d2h9mREruZR +gyFmxhE+885H7pwoHyXa/6xmld01D1zvICxi/ZG6qcz8WpyTgYMpl0p8WnK0OdC3 +d8t5/Wk6kjftbjhlRn7pYL15iJdfOBL07q9bgsiG1eGZbYwE8na6SfZu6W0eX6Dv +J4J2QPim01hcDyxC2kLGe4g0x8HYRZvBPsVhHdljUEn2NIVq4BjFbkerQUIpm/Zg +DdIx02OYI5NaAIFItO/Nis3Jz5nu2Z6qNuFoS3FJFDYoOj0dzpqPJeaAcWErtXvM ++SUWgeExX6GjfhaknBZqlxi9dnKlC54dNuYvoS++cJEPqOba+MSSQGwlfnuzCdyy +F62ARPBopY+Udf90WuioAnwMCeKpSwughQtiue+hMZL77/ZRBIls6Kl0obsXs7X9 +SQ98POyDGCBDTtWTurQ0sR8WNh8M5mQ5Fkzc4P4dyKliPUDqysU0ArSuiYgzNdws +E3PYJ/HQcu51OyLemGhmW/HGY0dVHLqlCFF1pkgl +-----END CERTIFICATE----- + +# Issuer: CN=GTS Root R2 O=Google Trust Services LLC +# Subject: CN=GTS Root R2 O=Google Trust Services LLC +# Label: "GTS Root R2" +# Serial: 146587176055767053814479386953112547951 +# MD5 Fingerprint: 44:ed:9a:0e:a4:09:3b:00:f2:ae:4c:a3:c6:61:b0:8b +# SHA1 Fingerprint: d2:73:96:2a:2a:5e:39:9f:73:3f:e1:c7:1e:64:3f:03:38:34:fc:4d +# SHA256 Fingerprint: c4:5d:7b:b0:8e:6d:67:e6:2e:42:35:11:0b:56:4e:5f:78:fd:92:ef:05:8c:84:0a:ea:4e:64:55:d7:58:5c:60 +-----BEGIN CERTIFICATE----- +MIIFWjCCA0KgAwIBAgIQbkepxlqz5yDFMJo/aFLybzANBgkqhkiG9w0BAQwFADBH +MQswCQYDVQQGEwJVUzEiMCAGA1UEChMZR29vZ2xlIFRydXN0IFNlcnZpY2VzIExM +QzEUMBIGA1UEAxMLR1RTIFJvb3QgUjIwHhcNMTYwNjIyMDAwMDAwWhcNMzYwNjIy +MDAwMDAwWjBHMQswCQYDVQQGEwJVUzEiMCAGA1UEChMZR29vZ2xlIFRydXN0IFNl +cnZpY2VzIExMQzEUMBIGA1UEAxMLR1RTIFJvb3QgUjIwggIiMA0GCSqGSIb3DQEB +AQUAA4ICDwAwggIKAoICAQDO3v2m++zsFDQ8BwZabFn3GTXd98GdVarTzTukk3Lv +CvptnfbwhYBboUhSnznFt+4orO/LdmgUud+tAWyZH8QiHZ/+cnfgLFuv5AS/T3Kg +GjSY6Dlo7JUle3ah5mm5hRm9iYz+re026nO8/4Piy33B0s5Ks40FnotJk9/BW9Bu +XvAuMC6C/Pq8tBcKSOWIm8Wba96wyrQD8Nr0kLhlZPdcTK3ofmZemde4wj7I0BOd +re7kRXuJVfeKH2JShBKzwkCX44ofR5GmdFrS+LFjKBC4swm4VndAoiaYecb+3yXu +PuWgf9RhD1FLPD+M2uFwdNjCaKH5wQzpoeJ/u1U8dgbuak7MkogwTZq9TwtImoS1 +mKPV+3PBV2HdKFZ1E66HjucMUQkQdYhMvI35ezzUIkgfKtzra7tEscszcTJGr61K +8YzodDqs5xoic4DSMPclQsciOzsSrZYuxsN2B6ogtzVJV+mSSeh2FnIxZyuWfoqj +x5RWIr9qS34BIbIjMt/kmkRtWVtd9QCgHJvGeJeNkP+byKq0rxFROV7Z+2et1VsR +nTKaG73VululycslaVNVJ1zgyjbLiGH7HrfQy+4W+9OmTN6SpdTi3/UGVN4unUu0 +kzCqgc7dGtxRcw1PcOnlthYhGXmy5okLdWTK1au8CcEYof/UVKGFPP0UJAOyh9Ok +twIDAQABo0IwQDAOBgNVHQ8BAf8EBAMCAQYwDwYDVR0TAQH/BAUwAwEB/zAdBgNV +HQ4EFgQUu//KjiOfT5nK2+JopqUVJxce2Q4wDQYJKoZIhvcNAQEMBQADggIBALZp +8KZ3/p7uC4Gt4cCpx/k1HUCCq+YEtN/L9x0Pg/B+E02NjO7jMyLDOfxA325BS0JT +vhaI8dI4XsRomRyYUpOM52jtG2pzegVATX9lO9ZY8c6DR2Dj/5epnGB3GFW1fgiT +z9D2PGcDFWEJ+YF59exTpJ/JjwGLc8R3dtyDovUMSRqodt6Sm2T4syzFJ9MHwAiA +pJiS4wGWAqoC7o87xdFtCjMwc3i5T1QWvwsHoaRc5svJXISPD+AVdyx+Jn7axEvb +pxZ3B7DNdehyQtaVhJ2Gg/LkkM0JR9SLA3DaWsYDQvTtN6LwG1BUSw7YhN4ZKJmB +R64JGz9I0cNv4rBgF/XuIwKl2gBbbZCr7qLpGzvpx0QnRY5rn/WkhLx3+WuXrD5R +RaIRpsyF7gpo8j5QOHokYh4XIDdtak23CZvJ/KRY9bb7nE4Yu5UC56GtmwfuNmsk +0jmGwZODUNKBRqhfYlcsu2xkiAhu7xNUX90txGdj08+JN7+dIPT7eoOboB6BAFDC +5AwiWVIQ7UNWhwD4FFKnHYuTjKJNRn8nxnGbJN7k2oaLDX5rIMHAnuFl2GqjpuiF +izoHCBy69Y9Vmhh1fuXsgWbRIXOhNUQLgD1bnF5vKheW0YMjiGZt5obicDIvUiLn +yOd/xCxgXS/Dr55FBcOEArf9LAhST4Ldo/DUhgkC +-----END CERTIFICATE----- + +# Issuer: CN=GTS Root R3 O=Google Trust Services LLC +# Subject: CN=GTS Root R3 O=Google Trust Services LLC +# Label: "GTS Root R3" +# Serial: 146587176140553309517047991083707763997 +# MD5 Fingerprint: 1a:79:5b:6b:04:52:9c:5d:c7:74:33:1b:25:9a:f9:25 +# SHA1 Fingerprint: 30:d4:24:6f:07:ff:db:91:89:8a:0b:e9:49:66:11:eb:8c:5e:46:e5 +# SHA256 Fingerprint: 15:d5:b8:77:46:19:ea:7d:54:ce:1c:a6:d0:b0:c4:03:e0:37:a9:17:f1:31:e8:a0:4e:1e:6b:7a:71:ba:bc:e5 +-----BEGIN CERTIFICATE----- +MIICDDCCAZGgAwIBAgIQbkepx2ypcyRAiQ8DVd2NHTAKBggqhkjOPQQDAzBHMQsw +CQYDVQQGEwJVUzEiMCAGA1UEChMZR29vZ2xlIFRydXN0IFNlcnZpY2VzIExMQzEU +MBIGA1UEAxMLR1RTIFJvb3QgUjMwHhcNMTYwNjIyMDAwMDAwWhcNMzYwNjIyMDAw +MDAwWjBHMQswCQYDVQQGEwJVUzEiMCAGA1UEChMZR29vZ2xlIFRydXN0IFNlcnZp +Y2VzIExMQzEUMBIGA1UEAxMLR1RTIFJvb3QgUjMwdjAQBgcqhkjOPQIBBgUrgQQA +IgNiAAQfTzOHMymKoYTey8chWEGJ6ladK0uFxh1MJ7x/JlFyb+Kf1qPKzEUURout +736GjOyxfi//qXGdGIRFBEFVbivqJn+7kAHjSxm65FSWRQmx1WyRRK2EE46ajA2A +DDL24CejQjBAMA4GA1UdDwEB/wQEAwIBBjAPBgNVHRMBAf8EBTADAQH/MB0GA1Ud +DgQWBBTB8Sa6oC2uhYHP0/EqEr24Cmf9vDAKBggqhkjOPQQDAwNpADBmAjEAgFuk +fCPAlaUs3L6JbyO5o91lAFJekazInXJ0glMLfalAvWhgxeG4VDvBNhcl2MG9AjEA +njWSdIUlUfUk7GRSJFClH9voy8l27OyCbvWFGFPouOOaKaqW04MjyaR7YbPMAuhd +-----END CERTIFICATE----- + +# Issuer: CN=GTS Root R4 O=Google Trust Services LLC +# Subject: CN=GTS Root R4 O=Google Trust Services LLC +# Label: "GTS Root R4" +# Serial: 146587176229350439916519468929765261721 +# MD5 Fingerprint: 5d:b6:6a:c4:60:17:24:6a:1a:99:a8:4b:ee:5e:b4:26 +# SHA1 Fingerprint: 2a:1d:60:27:d9:4a:b1:0a:1c:4d:91:5c:cd:33:a0:cb:3e:2d:54:cb +# SHA256 Fingerprint: 71:cc:a5:39:1f:9e:79:4b:04:80:25:30:b3:63:e1:21:da:8a:30:43:bb:26:66:2f:ea:4d:ca:7f:c9:51:a4:bd +-----BEGIN CERTIFICATE----- +MIICCjCCAZGgAwIBAgIQbkepyIuUtui7OyrYorLBmTAKBggqhkjOPQQDAzBHMQsw +CQYDVQQGEwJVUzEiMCAGA1UEChMZR29vZ2xlIFRydXN0IFNlcnZpY2VzIExMQzEU +MBIGA1UEAxMLR1RTIFJvb3QgUjQwHhcNMTYwNjIyMDAwMDAwWhcNMzYwNjIyMDAw +MDAwWjBHMQswCQYDVQQGEwJVUzEiMCAGA1UEChMZR29vZ2xlIFRydXN0IFNlcnZp +Y2VzIExMQzEUMBIGA1UEAxMLR1RTIFJvb3QgUjQwdjAQBgcqhkjOPQIBBgUrgQQA +IgNiAATzdHOnaItgrkO4NcWBMHtLSZ37wWHO5t5GvWvVYRg1rkDdc/eJkTBa6zzu +hXyiQHY7qca4R9gq55KRanPpsXI5nymfopjTX15YhmUPoYRlBtHci8nHc8iMai/l +xKvRHYqjQjBAMA4GA1UdDwEB/wQEAwIBBjAPBgNVHRMBAf8EBTADAQH/MB0GA1Ud +DgQWBBSATNbrdP9JNqPV2Py1PsVq8JQdjDAKBggqhkjOPQQDAwNnADBkAjBqUFJ0 +CMRw3J5QdCHojXohw0+WbhXRIjVhLfoIN+4Zba3bssx9BzT1YBkstTTZbyACMANx +sbqjYAuG7ZoIapVon+Kz4ZNkfF6Tpt95LY2F45TPI11xzPKwTdb+mciUqXWi4w== +-----END CERTIFICATE----- + +# Issuer: CN=UCA Global G2 Root O=UniTrust +# Subject: CN=UCA Global G2 Root O=UniTrust +# Label: "UCA Global G2 Root" +# Serial: 124779693093741543919145257850076631279 +# MD5 Fingerprint: 80:fe:f0:c4:4a:f0:5c:62:32:9f:1c:ba:78:a9:50:f8 +# SHA1 Fingerprint: 28:f9:78:16:19:7a:ff:18:25:18:aa:44:fe:c1:a0:ce:5c:b6:4c:8a +# SHA256 Fingerprint: 9b:ea:11:c9:76:fe:01:47:64:c1:be:56:a6:f9:14:b5:a5:60:31:7a:bd:99:88:39:33:82:e5:16:1a:a0:49:3c +-----BEGIN CERTIFICATE----- +MIIFRjCCAy6gAwIBAgIQXd+x2lqj7V2+WmUgZQOQ7zANBgkqhkiG9w0BAQsFADA9 +MQswCQYDVQQGEwJDTjERMA8GA1UECgwIVW5pVHJ1c3QxGzAZBgNVBAMMElVDQSBH +bG9iYWwgRzIgUm9vdDAeFw0xNjAzMTEwMDAwMDBaFw00MDEyMzEwMDAwMDBaMD0x +CzAJBgNVBAYTAkNOMREwDwYDVQQKDAhVbmlUcnVzdDEbMBkGA1UEAwwSVUNBIEds +b2JhbCBHMiBSb290MIICIjANBgkqhkiG9w0BAQEFAAOCAg8AMIICCgKCAgEAxeYr +b3zvJgUno4Ek2m/LAfmZmqkywiKHYUGRO8vDaBsGxUypK8FnFyIdK+35KYmToni9 +kmugow2ifsqTs6bRjDXVdfkX9s9FxeV67HeToI8jrg4aA3++1NDtLnurRiNb/yzm +VHqUwCoV8MmNsHo7JOHXaOIxPAYzRrZUEaalLyJUKlgNAQLx+hVRZ2zA+te2G3/R +VogvGjqNO7uCEeBHANBSh6v7hn4PJGtAnTRnvI3HLYZveT6OqTwXS3+wmeOwcWDc +C/Vkw85DvG1xudLeJ1uK6NjGruFZfc8oLTW4lVYa8bJYS7cSN8h8s+1LgOGN+jIj +tm+3SJUIsUROhYw6AlQgL9+/V087OpAh18EmNVQg7Mc/R+zvWr9LesGtOxdQXGLY +D0tK3Cv6brxzks3sx1DoQZbXqX5t2Okdj4q1uViSukqSKwxW/YDrCPBeKW4bHAyv +j5OJrdu9o54hyokZ7N+1wxrrFv54NkzWbtA+FxyQF2smuvt6L78RHBgOLXMDj6Dl +NaBa4kx1HXHhOThTeEDMg5PXCp6dW4+K5OXgSORIskfNTip1KnvyIvbJvgmRlld6 +iIis7nCs+dwp4wwcOxJORNanTrAmyPPZGpeRaOrvjUYG0lZFWJo8DA+DuAUlwznP +O6Q0ibd5Ei9Hxeepl2n8pndntd978XplFeRhVmUCAwEAAaNCMEAwDgYDVR0PAQH/ +BAQDAgEGMA8GA1UdEwEB/wQFMAMBAf8wHQYDVR0OBBYEFIHEjMz15DD/pQwIX4wV +ZyF0Ad/fMA0GCSqGSIb3DQEBCwUAA4ICAQATZSL1jiutROTL/7lo5sOASD0Ee/oj +L3rtNtqyzm325p7lX1iPyzcyochltq44PTUbPrw7tgTQvPlJ9Zv3hcU2tsu8+Mg5 +1eRfB70VVJd0ysrtT7q6ZHafgbiERUlMjW+i67HM0cOU2kTC5uLqGOiiHycFutfl +1qnN3e92mI0ADs0b+gO3joBYDic/UvuUospeZcnWhNq5NXHzJsBPd+aBJ9J3O5oU +b3n09tDh05S60FdRvScFDcH9yBIw7m+NESsIndTUv4BFFJqIRNow6rSn4+7vW4LV +PtateJLbXDzz2K36uGt/xDYotgIVilQsnLAXc47QN6MUPJiVAAwpBVueSUmxX8fj +y88nZY41F7dXyDDZQVu5FLbowg+UMaeUmMxq67XhJ/UQqAHojhJi6IjMtX9Gl8Cb +EGY4GjZGXyJoPd/JxhMnq1MGrKI8hgZlb7F+sSlEmqO6SWkoaY/X5V+tBIZkbxqg +DMUIYs6Ao9Dz7GjevjPHF1t/gMRMTLGmhIrDO7gJzRSBuhjjVFc2/tsvfEehOjPI ++Vg7RE+xygKJBJYoaMVLuCaJu9YzL1DV/pqJuhgyklTGW+Cd+V7lDSKb9triyCGy +YiGqhkCyLmTTX8jjfhFnRR8F/uOi77Oos/N9j/gMHyIfLXC0uAE0djAA5SN4p1bX +UB+K+wb1whnw0A== +-----END CERTIFICATE----- + +# Issuer: CN=UCA Extended Validation Root O=UniTrust +# Subject: CN=UCA Extended Validation Root O=UniTrust +# Label: "UCA Extended Validation Root" +# Serial: 106100277556486529736699587978573607008 +# MD5 Fingerprint: a1:f3:5f:43:c6:34:9b:da:bf:8c:7e:05:53:ad:96:e2 +# SHA1 Fingerprint: a3:a1:b0:6f:24:61:23:4a:e3:36:a5:c2:37:fc:a6:ff:dd:f0:d7:3a +# SHA256 Fingerprint: d4:3a:f9:b3:54:73:75:5c:96:84:fc:06:d7:d8:cb:70:ee:5c:28:e7:73:fb:29:4e:b4:1e:e7:17:22:92:4d:24 +-----BEGIN CERTIFICATE----- +MIIFWjCCA0KgAwIBAgIQT9Irj/VkyDOeTzRYZiNwYDANBgkqhkiG9w0BAQsFADBH +MQswCQYDVQQGEwJDTjERMA8GA1UECgwIVW5pVHJ1c3QxJTAjBgNVBAMMHFVDQSBF +eHRlbmRlZCBWYWxpZGF0aW9uIFJvb3QwHhcNMTUwMzEzMDAwMDAwWhcNMzgxMjMx +MDAwMDAwWjBHMQswCQYDVQQGEwJDTjERMA8GA1UECgwIVW5pVHJ1c3QxJTAjBgNV +BAMMHFVDQSBFeHRlbmRlZCBWYWxpZGF0aW9uIFJvb3QwggIiMA0GCSqGSIb3DQEB +AQUAA4ICDwAwggIKAoICAQCpCQcoEwKwmeBkqh5DFnpzsZGgdT6o+uM4AHrsiWog +D4vFsJszA1qGxliG1cGFu0/GnEBNyr7uaZa4rYEwmnySBesFK5pI0Lh2PpbIILvS +sPGP2KxFRv+qZ2C0d35qHzwaUnoEPQc8hQ2E0B92CvdqFN9y4zR8V05WAT558aop +O2z6+I9tTcg1367r3CTueUWnhbYFiN6IXSV8l2RnCdm/WhUFhvMJHuxYMjMR83dk +sHYf5BA1FxvyDrFspCqjc/wJHx4yGVMR59mzLC52LqGj3n5qiAno8geK+LLNEOfi +c0CTuwjRP+H8C5SzJe98ptfRr5//lpr1kXuYC3fUfugH0mK1lTnj8/FtDw5lhIpj +VMWAtuCeS31HJqcBCF3RiJ7XwzJE+oJKCmhUfzhTA8ykADNkUVkLo4KRel7sFsLz +KuZi2irbWWIQJUoqgQtHB0MGcIfS+pMRKXpITeuUx3BNr2fVUbGAIAEBtHoIppB/ +TuDvB0GHr2qlXov7z1CymlSvw4m6WC31MJixNnI5fkkE/SmnTHnkBVfblLkWU41G +sx2VYVdWf6/wFlthWG82UBEL2KwrlRYaDh8IzTY0ZRBiZtWAXxQgXy0MoHgKaNYs +1+lvK9JKBZP8nm9rZ/+I8U6laUpSNwXqxhaN0sSZ0YIrO7o1dfdRUVjzyAfd5LQD +fwIDAQABo0IwQDAdBgNVHQ4EFgQU2XQ65DA9DfcS3H5aBZ8eNJr34RQwDwYDVR0T +AQH/BAUwAwEB/zAOBgNVHQ8BAf8EBAMCAYYwDQYJKoZIhvcNAQELBQADggIBADaN +l8xCFWQpN5smLNb7rhVpLGsaGvdftvkHTFnq88nIua7Mui563MD1sC3AO6+fcAUR +ap8lTwEpcOPlDOHqWnzcSbvBHiqB9RZLcpHIojG5qtr8nR/zXUACE/xOHAbKsxSQ +VBcZEhrxH9cMaVr2cXj0lH2RC47skFSOvG+hTKv8dGT9cZr4QQehzZHkPJrgmzI5 +c6sq1WnIeJEmMX3ixzDx/BR4dxIOE/TdFpS/S2d7cFOFyrC78zhNLJA5wA3CXWvp +4uXViI3WLL+rG761KIcSF3Ru/H38j9CHJrAb+7lsq+KePRXBOy5nAliRn+/4Qh8s +t2j1da3Ptfb/EX3C8CSlrdP6oDyp+l3cpaDvRKS+1ujl5BOWF3sGPjLtx7dCvHaj +2GU4Kzg1USEODm8uNBNA4StnDG1KQTAYI1oyVZnJF+A83vbsea0rWBmirSwiGpWO +vpaQXUJXxPkUAzUrHC1RVwinOt4/5Mi0A3PCwSaAuwtCH60NryZy2sy+s6ODWA2C +xR9GUeOcGMyNm43sSet1UNWMKFnKdDTajAshqx7qG+XH/RU+wBeq+yNuJkbL+vmx +cmtpzyKEC2IPrNkZAJSidjzULZrtBJ4tBmIQN1IchXIbJ+XMxjHsN+xjWZsLHXbM +fjKaiJUINlK73nZfdklJrX+9ZSCyycErdhh2n1ax +-----END CERTIFICATE----- + +# Issuer: CN=Certigna Root CA O=Dhimyotis OU=0002 48146308100036 +# Subject: CN=Certigna Root CA O=Dhimyotis OU=0002 48146308100036 +# Label: "Certigna Root CA" +# Serial: 269714418870597844693661054334862075617 +# MD5 Fingerprint: 0e:5c:30:62:27:eb:5b:bc:d7:ae:62:ba:e9:d5:df:77 +# SHA1 Fingerprint: 2d:0d:52:14:ff:9e:ad:99:24:01:74:20:47:6e:6c:85:27:27:f5:43 +# SHA256 Fingerprint: d4:8d:3d:23:ee:db:50:a4:59:e5:51:97:60:1c:27:77:4b:9d:7b:18:c9:4d:5a:05:95:11:a1:02:50:b9:31:68 +-----BEGIN CERTIFICATE----- +MIIGWzCCBEOgAwIBAgIRAMrpG4nxVQMNo+ZBbcTjpuEwDQYJKoZIhvcNAQELBQAw +WjELMAkGA1UEBhMCRlIxEjAQBgNVBAoMCURoaW15b3RpczEcMBoGA1UECwwTMDAw +MiA0ODE0NjMwODEwMDAzNjEZMBcGA1UEAwwQQ2VydGlnbmEgUm9vdCBDQTAeFw0x +MzEwMDEwODMyMjdaFw0zMzEwMDEwODMyMjdaMFoxCzAJBgNVBAYTAkZSMRIwEAYD +VQQKDAlEaGlteW90aXMxHDAaBgNVBAsMEzAwMDIgNDgxNDYzMDgxMDAwMzYxGTAX +BgNVBAMMEENlcnRpZ25hIFJvb3QgQ0EwggIiMA0GCSqGSIb3DQEBAQUAA4ICDwAw +ggIKAoICAQDNGDllGlmx6mQWDoyUJJV8g9PFOSbcDO8WV43X2KyjQn+Cyu3NW9sO +ty3tRQgXstmzy9YXUnIo245Onoq2C/mehJpNdt4iKVzSs9IGPjA5qXSjklYcoW9M +CiBtnyN6tMbaLOQdLNyzKNAT8kxOAkmhVECe5uUFoC2EyP+YbNDrihqECB63aCPu +I9Vwzm1RaRDuoXrC0SIxwoKF0vJVdlB8JXrJhFwLrN1CTivngqIkicuQstDuI7pm +TLtipPlTWmR7fJj6o0ieD5Wupxj0auwuA0Wv8HT4Ks16XdG+RCYyKfHx9WzMfgIh +C59vpD++nVPiz32pLHxYGpfhPTc3GGYo0kDFUYqMwy3OU4gkWGQwFsWq4NYKpkDf +ePb1BHxpE4S80dGnBs8B92jAqFe7OmGtBIyT46388NtEbVncSVmurJqZNjBBe3Yz +IoejwpKGbvlw7q6Hh5UbxHq9MfPU0uWZ/75I7HX1eBYdpnDBfzwboZL7z8g81sWT +Co/1VTp2lc5ZmIoJlXcymoO6LAQ6l73UL77XbJuiyn1tJslV1c/DeVIICZkHJC1k +JWumIWmbat10TWuXekG9qxf5kBdIjzb5LdXF2+6qhUVB+s06RbFo5jZMm5BX7CO5 +hwjCxAnxl4YqKE3idMDaxIzb3+KhF1nOJFl0Mdp//TBt2dzhauH8XwIDAQABo4IB +GjCCARYwDwYDVR0TAQH/BAUwAwEB/zAOBgNVHQ8BAf8EBAMCAQYwHQYDVR0OBBYE +FBiHVuBud+4kNTxOc5of1uHieX4rMB8GA1UdIwQYMBaAFBiHVuBud+4kNTxOc5of +1uHieX4rMEQGA1UdIAQ9MDswOQYEVR0gADAxMC8GCCsGAQUFBwIBFiNodHRwczov +L3d3d3cuY2VydGlnbmEuZnIvYXV0b3JpdGVzLzBtBgNVHR8EZjBkMC+gLaArhilo +dHRwOi8vY3JsLmNlcnRpZ25hLmZyL2NlcnRpZ25hcm9vdGNhLmNybDAxoC+gLYYr +aHR0cDovL2NybC5kaGlteW90aXMuY29tL2NlcnRpZ25hcm9vdGNhLmNybDANBgkq +hkiG9w0BAQsFAAOCAgEAlLieT/DjlQgi581oQfccVdV8AOItOoldaDgvUSILSo3L +6btdPrtcPbEo/uRTVRPPoZAbAh1fZkYJMyjhDSSXcNMQH+pkV5a7XdrnxIxPTGRG +HVyH41neQtGbqH6mid2PHMkwgu07nM3A6RngatgCdTer9zQoKJHyBApPNeNgJgH6 +0BGM+RFq7q89w1DTj18zeTyGqHNFkIwgtnJzFyO+B2XleJINugHA64wcZr+shncB +lA2c5uk5jR+mUYyZDDl34bSb+hxnV29qao6pK0xXeXpXIs/NX2NGjVxZOob4Mkdi +o2cNGJHc+6Zr9UhhcyNZjgKnvETq9Emd8VRY+WCv2hikLyhF3HqgiIZd8zvn/yk1 +gPxkQ5Tm4xxvvq0OKmOZK8l+hfZx6AYDlf7ej0gcWtSS6Cvu5zHbugRqh5jnxV/v +faci9wHYTfmJ0A6aBVmknpjZbyvKcL5kwlWj9Omvw5Ip3IgWJJk8jSaYtlu3zM63 +Nwf9JtmYhST/WSMDmu2dnajkXjjO11INb9I/bbEFa0nOipFGc/T2L/Coc3cOZayh +jWZSaX5LaAzHHjcng6WMxwLkFM1JAbBzs/3GkDpv0mztO+7skb6iQ12LAEpmJURw +3kAP+HwV96LOPNdeE4yBFxgX0b3xdxA61GU5wSesVywlVP+i2k+KYTlerj1KjL0= +-----END CERTIFICATE----- + +# Issuer: CN=emSign Root CA - G1 O=eMudhra Technologies Limited OU=emSign PKI +# Subject: CN=emSign Root CA - G1 O=eMudhra Technologies Limited OU=emSign PKI +# Label: "emSign Root CA - G1" +# Serial: 235931866688319308814040 +# MD5 Fingerprint: 9c:42:84:57:dd:cb:0b:a7:2e:95:ad:b6:f3:da:bc:ac +# SHA1 Fingerprint: 8a:c7:ad:8f:73:ac:4e:c1:b5:75:4d:a5:40:f4:fc:cf:7c:b5:8e:8c +# SHA256 Fingerprint: 40:f6:af:03:46:a9:9a:a1:cd:1d:55:5a:4e:9c:ce:62:c7:f9:63:46:03:ee:40:66:15:83:3d:c8:c8:d0:03:67 +-----BEGIN CERTIFICATE----- +MIIDlDCCAnygAwIBAgIKMfXkYgxsWO3W2DANBgkqhkiG9w0BAQsFADBnMQswCQYD +VQQGEwJJTjETMBEGA1UECxMKZW1TaWduIFBLSTElMCMGA1UEChMcZU11ZGhyYSBU +ZWNobm9sb2dpZXMgTGltaXRlZDEcMBoGA1UEAxMTZW1TaWduIFJvb3QgQ0EgLSBH +MTAeFw0xODAyMTgxODMwMDBaFw00MzAyMTgxODMwMDBaMGcxCzAJBgNVBAYTAklO +MRMwEQYDVQQLEwplbVNpZ24gUEtJMSUwIwYDVQQKExxlTXVkaHJhIFRlY2hub2xv +Z2llcyBMaW1pdGVkMRwwGgYDVQQDExNlbVNpZ24gUm9vdCBDQSAtIEcxMIIBIjAN +BgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAk0u76WaK7p1b1TST0Bsew+eeuGQz +f2N4aLTNLnF115sgxk0pvLZoYIr3IZpWNVrzdr3YzZr/k1ZLpVkGoZM0Kd0WNHVO +8oG0x5ZOrRkVUkr+PHB1cM2vK6sVmjM8qrOLqs1D/fXqcP/tzxE7lM5OMhbTI0Aq +d7OvPAEsbO2ZLIvZTmmYsvePQbAyeGHWDV/D+qJAkh1cF+ZwPjXnorfCYuKrpDhM +tTk1b+oDafo6VGiFbdbyL0NVHpENDtjVaqSW0RM8LHhQ6DqS0hdW5TUaQBw+jSzt +Od9C4INBdN+jzcKGYEho42kLVACL5HZpIQ15TjQIXhTCzLG3rdd8cIrHhQIDAQAB +o0IwQDAdBgNVHQ4EFgQU++8Nhp6w492pufEhF38+/PB3KxowDgYDVR0PAQH/BAQD +AgEGMA8GA1UdEwEB/wQFMAMBAf8wDQYJKoZIhvcNAQELBQADggEBAFn/8oz1h31x +PaOfG1vR2vjTnGs2vZupYeveFix0PZ7mddrXuqe8QhfnPZHr5X3dPpzxz5KsbEjM +wiI/aTvFthUvozXGaCocV685743QNcMYDHsAVhzNixl03r4PEuDQqqE/AjSxcM6d +GNYIAwlG7mDgfrbESQRRfXBgvKqy/3lyeqYdPV8q+Mri/Tm3R7nrft8EI6/6nAYH +6ftjk4BAtcZsCjEozgyfz7MjNYBBjWzEN3uBL4ChQEKF6dk4jeihU80Bv2noWgby +RQuQ+q7hv53yrlc8pa6yVvSLZUDp/TGBLPQ5Cdjua6e0ph0VpZj3AYHYhX3zUVxx +iN66zB+Afko= +-----END CERTIFICATE----- + +# Issuer: CN=emSign ECC Root CA - G3 O=eMudhra Technologies Limited OU=emSign PKI +# Subject: CN=emSign ECC Root CA - G3 O=eMudhra Technologies Limited OU=emSign PKI +# Label: "emSign ECC Root CA - G3" +# Serial: 287880440101571086945156 +# MD5 Fingerprint: ce:0b:72:d1:9f:88:8e:d0:50:03:e8:e3:b8:8b:67:40 +# SHA1 Fingerprint: 30:43:fa:4f:f2:57:dc:a0:c3:80:ee:2e:58:ea:78:b2:3f:e6:bb:c1 +# SHA256 Fingerprint: 86:a1:ec:ba:08:9c:4a:8d:3b:be:27:34:c6:12:ba:34:1d:81:3e:04:3c:f9:e8:a8:62:cd:5c:57:a3:6b:be:6b +-----BEGIN CERTIFICATE----- +MIICTjCCAdOgAwIBAgIKPPYHqWhwDtqLhDAKBggqhkjOPQQDAzBrMQswCQYDVQQG +EwJJTjETMBEGA1UECxMKZW1TaWduIFBLSTElMCMGA1UEChMcZU11ZGhyYSBUZWNo +bm9sb2dpZXMgTGltaXRlZDEgMB4GA1UEAxMXZW1TaWduIEVDQyBSb290IENBIC0g +RzMwHhcNMTgwMjE4MTgzMDAwWhcNNDMwMjE4MTgzMDAwWjBrMQswCQYDVQQGEwJJ +TjETMBEGA1UECxMKZW1TaWduIFBLSTElMCMGA1UEChMcZU11ZGhyYSBUZWNobm9s +b2dpZXMgTGltaXRlZDEgMB4GA1UEAxMXZW1TaWduIEVDQyBSb290IENBIC0gRzMw +djAQBgcqhkjOPQIBBgUrgQQAIgNiAAQjpQy4LRL1KPOxst3iAhKAnjlfSU2fySU0 +WXTsuwYc58Byr+iuL+FBVIcUqEqy6HyC5ltqtdyzdc6LBtCGI79G1Y4PPwT01xyS +fvalY8L1X44uT6EYGQIrMgqCZH0Wk9GjQjBAMB0GA1UdDgQWBBR8XQKEE9TMipuB +zhccLikenEhjQjAOBgNVHQ8BAf8EBAMCAQYwDwYDVR0TAQH/BAUwAwEB/zAKBggq +hkjOPQQDAwNpADBmAjEAvvNhzwIQHWSVB7gYboiFBS+DCBeQyh+KTOgNG3qxrdWB +CUfvO6wIBHxcmbHtRwfSAjEAnbpV/KlK6O3t5nYBQnvI+GDZjVGLVTv7jHvrZQnD ++JbNR6iC8hZVdyR+EhCVBCyj +-----END CERTIFICATE----- + +# Issuer: CN=emSign Root CA - C1 O=eMudhra Inc OU=emSign PKI +# Subject: CN=emSign Root CA - C1 O=eMudhra Inc OU=emSign PKI +# Label: "emSign Root CA - C1" +# Serial: 825510296613316004955058 +# MD5 Fingerprint: d8:e3:5d:01:21:fa:78:5a:b0:df:ba:d2:ee:2a:5f:68 +# SHA1 Fingerprint: e7:2e:f1:df:fc:b2:09:28:cf:5d:d4:d5:67:37:b1:51:cb:86:4f:01 +# SHA256 Fingerprint: 12:56:09:aa:30:1d:a0:a2:49:b9:7a:82:39:cb:6a:34:21:6f:44:dc:ac:9f:39:54:b1:42:92:f2:e8:c8:60:8f +-----BEGIN CERTIFICATE----- +MIIDczCCAlugAwIBAgILAK7PALrEzzL4Q7IwDQYJKoZIhvcNAQELBQAwVjELMAkG +A1UEBhMCVVMxEzARBgNVBAsTCmVtU2lnbiBQS0kxFDASBgNVBAoTC2VNdWRocmEg +SW5jMRwwGgYDVQQDExNlbVNpZ24gUm9vdCBDQSAtIEMxMB4XDTE4MDIxODE4MzAw +MFoXDTQzMDIxODE4MzAwMFowVjELMAkGA1UEBhMCVVMxEzARBgNVBAsTCmVtU2ln +biBQS0kxFDASBgNVBAoTC2VNdWRocmEgSW5jMRwwGgYDVQQDExNlbVNpZ24gUm9v +dCBDQSAtIEMxMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAz+upufGZ +BczYKCFK83M0UYRWEPWgTywS4/oTmifQz/l5GnRfHXk5/Fv4cI7gklL35CX5VIPZ +HdPIWoU/Xse2B+4+wM6ar6xWQio5JXDWv7V7Nq2s9nPczdcdioOl+yuQFTdrHCZH +3DspVpNqs8FqOp099cGXOFgFixwR4+S0uF2FHYP+eF8LRWgYSKVGczQ7/g/IdrvH +GPMF0Ybzhe3nudkyrVWIzqa2kbBPrH4VI5b2P/AgNBbeCsbEBEV5f6f9vtKppa+c +xSMq9zwhbL2vj07FOrLzNBL834AaSaTUqZX3noleoomslMuoaJuvimUnzYnu3Yy1 +aylwQ6BpC+S5DwIDAQABo0IwQDAdBgNVHQ4EFgQU/qHgcB4qAzlSWkK+XJGFehiq +TbUwDgYDVR0PAQH/BAQDAgEGMA8GA1UdEwEB/wQFMAMBAf8wDQYJKoZIhvcNAQEL +BQADggEBAMJKVvoVIXsoounlHfv4LcQ5lkFMOycsxGwYFYDGrK9HWS8mC+M2sO87 +/kOXSTKZEhVb3xEp/6tT+LvBeA+snFOvV71ojD1pM/CjoCNjO2RnIkSt1XHLVip4 +kqNPEjE2NuLe/gDEo2APJ62gsIq1NnpSob0n9CAnYuhNlCQT5AoE6TyrLshDCUrG +YQTlSTR+08TI9Q/Aqum6VF7zYytPT1DU/rl7mYw9wC68AivTxEDkigcxHpvOJpkT ++xHqmiIMERnHXhuBUDDIlhJu58tBf5E7oke3VIAb3ADMmpDqw8NQBmIMMMAVSKeo +WXzhriKi4gp6D/piq1JM4fHfyr6DDUI= +-----END CERTIFICATE----- + +# Issuer: CN=emSign ECC Root CA - C3 O=eMudhra Inc OU=emSign PKI +# Subject: CN=emSign ECC Root CA - C3 O=eMudhra Inc OU=emSign PKI +# Label: "emSign ECC Root CA - C3" +# Serial: 582948710642506000014504 +# MD5 Fingerprint: 3e:53:b3:a3:81:ee:d7:10:f8:d3:b0:1d:17:92:f5:d5 +# SHA1 Fingerprint: b6:af:43:c2:9b:81:53:7d:f6:ef:6b:c3:1f:1f:60:15:0c:ee:48:66 +# SHA256 Fingerprint: bc:4d:80:9b:15:18:9d:78:db:3e:1d:8c:f4:f9:72:6a:79:5d:a1:64:3c:a5:f1:35:8e:1d:db:0e:dc:0d:7e:b3 +-----BEGIN CERTIFICATE----- +MIICKzCCAbGgAwIBAgIKe3G2gla4EnycqDAKBggqhkjOPQQDAzBaMQswCQYDVQQG +EwJVUzETMBEGA1UECxMKZW1TaWduIFBLSTEUMBIGA1UEChMLZU11ZGhyYSBJbmMx +IDAeBgNVBAMTF2VtU2lnbiBFQ0MgUm9vdCBDQSAtIEMzMB4XDTE4MDIxODE4MzAw +MFoXDTQzMDIxODE4MzAwMFowWjELMAkGA1UEBhMCVVMxEzARBgNVBAsTCmVtU2ln +biBQS0kxFDASBgNVBAoTC2VNdWRocmEgSW5jMSAwHgYDVQQDExdlbVNpZ24gRUND +IFJvb3QgQ0EgLSBDMzB2MBAGByqGSM49AgEGBSuBBAAiA2IABP2lYa57JhAd6bci +MK4G9IGzsUJxlTm801Ljr6/58pc1kjZGDoeVjbk5Wum739D+yAdBPLtVb4Ojavti +sIGJAnB9SMVK4+kiVCJNk7tCDK93nCOmfddhEc5lx/h//vXyqaNCMEAwHQYDVR0O +BBYEFPtaSNCAIEDyqOkAB2kZd6fmw/TPMA4GA1UdDwEB/wQEAwIBBjAPBgNVHRMB +Af8EBTADAQH/MAoGCCqGSM49BAMDA2gAMGUCMQC02C8Cif22TGK6Q04ThHK1rt0c +3ta13FaPWEBaLd4gTCKDypOofu4SQMfWh0/434UCMBwUZOR8loMRnLDRWmFLpg9J +0wD8ofzkpf9/rdcw0Md3f76BB1UwUCAU9Vc4CqgxUQ== +-----END CERTIFICATE----- + +# Issuer: CN=Hongkong Post Root CA 3 O=Hongkong Post +# Subject: CN=Hongkong Post Root CA 3 O=Hongkong Post +# Label: "Hongkong Post Root CA 3" +# Serial: 46170865288971385588281144162979347873371282084 +# MD5 Fingerprint: 11:fc:9f:bd:73:30:02:8a:fd:3f:f3:58:b9:cb:20:f0 +# SHA1 Fingerprint: 58:a2:d0:ec:20:52:81:5b:c1:f3:f8:64:02:24:4e:c2:8e:02:4b:02 +# SHA256 Fingerprint: 5a:2f:c0:3f:0c:83:b0:90:bb:fa:40:60:4b:09:88:44:6c:76:36:18:3d:f9:84:6e:17:10:1a:44:7f:b8:ef:d6 +-----BEGIN CERTIFICATE----- +MIIFzzCCA7egAwIBAgIUCBZfikyl7ADJk0DfxMauI7gcWqQwDQYJKoZIhvcNAQEL +BQAwbzELMAkGA1UEBhMCSEsxEjAQBgNVBAgTCUhvbmcgS29uZzESMBAGA1UEBxMJ +SG9uZyBLb25nMRYwFAYDVQQKEw1Ib25na29uZyBQb3N0MSAwHgYDVQQDExdIb25n +a29uZyBQb3N0IFJvb3QgQ0EgMzAeFw0xNzA2MDMwMjI5NDZaFw00MjA2MDMwMjI5 +NDZaMG8xCzAJBgNVBAYTAkhLMRIwEAYDVQQIEwlIb25nIEtvbmcxEjAQBgNVBAcT +CUhvbmcgS29uZzEWMBQGA1UEChMNSG9uZ2tvbmcgUG9zdDEgMB4GA1UEAxMXSG9u +Z2tvbmcgUG9zdCBSb290IENBIDMwggIiMA0GCSqGSIb3DQEBAQUAA4ICDwAwggIK +AoICAQCziNfqzg8gTr7m1gNt7ln8wlffKWihgw4+aMdoWJwcYEuJQwy51BWy7sFO +dem1p+/l6TWZ5Mwc50tfjTMwIDNT2aa71T4Tjukfh0mtUC1Qyhi+AViiE3CWu4mI +VoBc+L0sPOFMV4i707mV78vH9toxdCim5lSJ9UExyuUmGs2C4HDaOym71QP1mbpV +9WTRYA6ziUm4ii8F0oRFKHyPaFASePwLtVPLwpgchKOesL4jpNrcyCse2m5FHomY +2vkALgbpDDtw1VAliJnLzXNg99X/NWfFobxeq81KuEXryGgeDQ0URhLj0mRiikKY +vLTGCAj4/ahMZJx2Ab0vqWwzD9g/KLg8aQFChn5pwckGyuV6RmXpwtZQQS4/t+Tt +bNe/JgERohYpSms0BpDsE9K2+2p20jzt8NYt3eEV7KObLyzJPivkaTv/ciWxNoZb +x39ri1UbSsUgYT2uy1DhCDq+sI9jQVMwCFk8mB13umOResoQUGC/8Ne8lYePl8X+ +l2oBlKN8W4UdKjk60FSh0Tlxnf0h+bV78OLgAo9uliQlLKAeLKjEiafv7ZkGL7YK +TE/bosw3Gq9HhS2KX8Q0NEwA/RiTZxPRN+ZItIsGxVd7GYYKecsAyVKvQv83j+Gj +Hno9UKtjBucVtT+2RTeUN7F+8kjDf8V1/peNRY8apxpyKBpADwIDAQABo2MwYTAP +BgNVHRMBAf8EBTADAQH/MA4GA1UdDwEB/wQEAwIBBjAfBgNVHSMEGDAWgBQXnc0e +i9Y5K3DTXNSguB+wAPzFYTAdBgNVHQ4EFgQUF53NHovWOStw01zUoLgfsAD8xWEw +DQYJKoZIhvcNAQELBQADggIBAFbVe27mIgHSQpsY1Q7XZiNc4/6gx5LS6ZStS6LG +7BJ8dNVI0lkUmcDrudHr9EgwW62nV3OZqdPlt9EuWSRY3GguLmLYauRwCy0gUCCk +MpXRAJi70/33MvJJrsZ64Ee+bs7Lo3I6LWldy8joRTnU+kLBEUx3XZL7av9YROXr +gZ6voJmtvqkBZss4HTzfQx/0TW60uhdG/H39h4F5ag0zD/ov+BS5gLNdTaqX4fnk +GMX41TiMJjz98iji7lpJiCzfeT2OnpA8vUFKOt1b9pq0zj8lMH8yfaIDlNDceqFS +3m6TjRgm/VWsvY+b0s+v54Ysyx8Jb6NvqYTUc79NoXQbTiNg8swOqn+knEwlqLJm +Ozj/2ZQw9nKEvmhVEA/GcywWaZMH/rFF7buiVWqw2rVKAiUnhde3t4ZEFolsgCs+ +l6mc1X5VTMbeRRAc6uk7nwNT7u56AQIWeNTowr5GdogTPyK7SBIdUgC0An4hGh6c +JfTzPV4e0hz5sy229zdcxsshTrD3mUcYhcErulWuBurQB7Lcq9CClnXO0lD+mefP +L5/ndtFhKvshuzHQqp9HpLIiyhY6UFfEW0NnxWViA0kB60PZ2Pierc+xYw5F9KBa +LJstxabArahH9CdMOA0uG0k7UvToiIMrVCjU8jVStDKDYmlkDJGcn5fqdBb9HxEG +mpv0 +-----END CERTIFICATE----- + +# Issuer: CN=Entrust Root Certification Authority - G4 O=Entrust, Inc. OU=See www.entrust.net/legal-terms/(c) 2015 Entrust, Inc. - for authorized use only +# Subject: CN=Entrust Root Certification Authority - G4 O=Entrust, Inc. OU=See www.entrust.net/legal-terms/(c) 2015 Entrust, Inc. - for authorized use only +# Label: "Entrust Root Certification Authority - G4" +# Serial: 289383649854506086828220374796556676440 +# MD5 Fingerprint: 89:53:f1:83:23:b7:7c:8e:05:f1:8c:71:38:4e:1f:88 +# SHA1 Fingerprint: 14:88:4e:86:26:37:b0:26:af:59:62:5c:40:77:ec:35:29:ba:96:01 +# SHA256 Fingerprint: db:35:17:d1:f6:73:2a:2d:5a:b9:7c:53:3e:c7:07:79:ee:32:70:a6:2f:b4:ac:42:38:37:24:60:e6:f0:1e:88 +-----BEGIN CERTIFICATE----- +MIIGSzCCBDOgAwIBAgIRANm1Q3+vqTkPAAAAAFVlrVgwDQYJKoZIhvcNAQELBQAw +gb4xCzAJBgNVBAYTAlVTMRYwFAYDVQQKEw1FbnRydXN0LCBJbmMuMSgwJgYDVQQL +Ex9TZWUgd3d3LmVudHJ1c3QubmV0L2xlZ2FsLXRlcm1zMTkwNwYDVQQLEzAoYykg +MjAxNSBFbnRydXN0LCBJbmMuIC0gZm9yIGF1dGhvcml6ZWQgdXNlIG9ubHkxMjAw +BgNVBAMTKUVudHJ1c3QgUm9vdCBDZXJ0aWZpY2F0aW9uIEF1dGhvcml0eSAtIEc0 +MB4XDTE1MDUyNzExMTExNloXDTM3MTIyNzExNDExNlowgb4xCzAJBgNVBAYTAlVT +MRYwFAYDVQQKEw1FbnRydXN0LCBJbmMuMSgwJgYDVQQLEx9TZWUgd3d3LmVudHJ1 +c3QubmV0L2xlZ2FsLXRlcm1zMTkwNwYDVQQLEzAoYykgMjAxNSBFbnRydXN0LCBJ +bmMuIC0gZm9yIGF1dGhvcml6ZWQgdXNlIG9ubHkxMjAwBgNVBAMTKUVudHJ1c3Qg +Um9vdCBDZXJ0aWZpY2F0aW9uIEF1dGhvcml0eSAtIEc0MIICIjANBgkqhkiG9w0B +AQEFAAOCAg8AMIICCgKCAgEAsewsQu7i0TD/pZJH4i3DumSXbcr3DbVZwbPLqGgZ +2K+EbTBwXX7zLtJTmeH+H17ZSK9dE43b/2MzTdMAArzE+NEGCJR5WIoV3imz/f3E +T+iq4qA7ec2/a0My3dl0ELn39GjUu9CH1apLiipvKgS1sqbHoHrmSKvS0VnM1n4j +5pds8ELl3FFLFUHtSUrJ3hCX1nbB76W1NhSXNdh4IjVS70O92yfbYVaCNNzLiGAM +C1rlLAHGVK/XqsEQe9IFWrhAnoanw5CGAlZSCXqc0ieCU0plUmr1POeo8pyvi73T +DtTUXm6Hnmo9RR3RXRv06QqsYJn7ibT/mCzPfB3pAqoEmh643IhuJbNsZvc8kPNX +wbMv9W3y+8qh+CmdRouzavbmZwe+LGcKKh9asj5XxNMhIWNlUpEbsZmOeX7m640A +2Vqq6nPopIICR5b+W45UYaPrL0swsIsjdXJ8ITzI9vF01Bx7owVV7rtNOzK+mndm +nqxpkCIHH2E6lr7lmk/MBTwoWdPBDFSoWWG9yHJM6Nyfh3+9nEg2XpWjDrk4JFX8 +dWbrAuMINClKxuMrLzOg2qOGpRKX/YAr2hRC45K9PvJdXmd0LhyIRyk0X+IyqJwl +N4y6mACXi0mWHv0liqzc2thddG5msP9E36EYxr5ILzeUePiVSj9/E15dWf10hkNj +c0kCAwEAAaNCMEAwDwYDVR0TAQH/BAUwAwEB/zAOBgNVHQ8BAf8EBAMCAQYwHQYD +VR0OBBYEFJ84xFYjwznooHFs6FRM5Og6sb9nMA0GCSqGSIb3DQEBCwUAA4ICAQAS +5UKme4sPDORGpbZgQIeMJX6tuGguW8ZAdjwD+MlZ9POrYs4QjbRaZIxowLByQzTS +Gwv2LFPSypBLhmb8qoMi9IsabyZIrHZ3CL/FmFz0Jomee8O5ZDIBf9PD3Vht7LGr +hFV0d4QEJ1JrhkzO3bll/9bGXp+aEJlLdWr+aumXIOTkdnrG0CSqkM0gkLpHZPt/ +B7NTeLUKYvJzQ85BK4FqLoUWlFPUa19yIqtRLULVAJyZv967lDtX/Zr1hstWO1uI +AeV8KEsD+UmDfLJ/fOPtjqF/YFOOVZ1QNBIPt5d7bIdKROf1beyAN/BYGW5KaHbw +H5Lk6rWS02FREAutp9lfx1/cH6NcjKF+m7ee01ZvZl4HliDtC3T7Zk6LERXpgUl+ +b7DUUH8i119lAg2m9IUe2K4GS0qn0jFmwvjO5QimpAKWRGhXxNUzzxkvFMSUHHuk +2fCfDrGA4tGeEWSpiBE6doLlYsKA2KSD7ZPvfC+QsDJMlhVoSFLUmQjAJOgc47Ol +IQ6SwJAfzyBfyjs4x7dtOvPmRLgOMWuIjnDrnBdSqEGULoe256YSxXXfW8AKbnuk +5F6G+TaU33fD6Q3AOfF5u0aOq0NZJ7cguyPpVkAh7DE9ZapD8j3fcEThuk0mEDuY +n/PIjhs4ViFqUZPTkcpG2om3PVODLAgfi49T3f+sHw== +-----END CERTIFICATE----- diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/certifi/core.py b/venv.bak/lib/python3.7/site-packages/pip/_vendor/certifi/core.py new file mode 100644 index 0000000..7271acf --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_vendor/certifi/core.py @@ -0,0 +1,15 @@ +# -*- coding: utf-8 -*- + +""" +certifi.py +~~~~~~~~~~ + +This module returns the installation location of cacert.pem. +""" +import os + + +def where(): + f = os.path.dirname(__file__) + + return os.path.join(f, 'cacert.pem') diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/__init__.py b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/__init__.py new file mode 100644 index 0000000..0f9f820 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/__init__.py @@ -0,0 +1,39 @@ +######################## BEGIN LICENSE BLOCK ######################## +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + + +from .compat import PY2, PY3 +from .universaldetector import UniversalDetector +from .version import __version__, VERSION + + +def detect(byte_str): + """ + Detect the encoding of the given byte string. + + :param byte_str: The byte sequence to examine. + :type byte_str: ``bytes`` or ``bytearray`` + """ + if not isinstance(byte_str, bytearray): + if not isinstance(byte_str, bytes): + raise TypeError('Expected object of type bytes or bytearray, got: ' + '{0}'.format(type(byte_str))) + else: + byte_str = bytearray(byte_str) + detector = UniversalDetector() + detector.feed(byte_str) + return detector.close() diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/__pycache__/__init__.cpython-37.pyc new file mode 100644 index 0000000..4f5cfc5 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/__pycache__/__init__.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/__pycache__/big5freq.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/__pycache__/big5freq.cpython-37.pyc new file mode 100644 index 0000000..915ca56 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/__pycache__/big5freq.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/__pycache__/big5prober.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/__pycache__/big5prober.cpython-37.pyc new file mode 100644 index 0000000..da4c7ba Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/__pycache__/big5prober.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/__pycache__/chardistribution.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/__pycache__/chardistribution.cpython-37.pyc new file mode 100644 index 0000000..52f4415 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/__pycache__/chardistribution.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/__pycache__/charsetgroupprober.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/__pycache__/charsetgroupprober.cpython-37.pyc new file mode 100644 index 0000000..8ec0dd0 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/__pycache__/charsetgroupprober.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/__pycache__/charsetprober.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/__pycache__/charsetprober.cpython-37.pyc new file mode 100644 index 0000000..ed36e2e Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/__pycache__/charsetprober.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/__pycache__/codingstatemachine.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/__pycache__/codingstatemachine.cpython-37.pyc new file mode 100644 index 0000000..7cc98af Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/__pycache__/codingstatemachine.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/__pycache__/compat.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/__pycache__/compat.cpython-37.pyc new file mode 100644 index 0000000..94d78c2 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/__pycache__/compat.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/__pycache__/cp949prober.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/__pycache__/cp949prober.cpython-37.pyc new file mode 100644 index 0000000..5dc4497 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/__pycache__/cp949prober.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/__pycache__/enums.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/__pycache__/enums.cpython-37.pyc new file mode 100644 index 0000000..1f82015 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/__pycache__/enums.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/__pycache__/escprober.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/__pycache__/escprober.cpython-37.pyc new file mode 100644 index 0000000..3601b7b Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/__pycache__/escprober.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/__pycache__/escsm.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/__pycache__/escsm.cpython-37.pyc new file mode 100644 index 0000000..b4eb58c Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/__pycache__/escsm.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/__pycache__/eucjpprober.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/__pycache__/eucjpprober.cpython-37.pyc new file mode 100644 index 0000000..bcc86ff Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/__pycache__/eucjpprober.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/__pycache__/euckrfreq.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/__pycache__/euckrfreq.cpython-37.pyc new file mode 100644 index 0000000..9671b8e Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/__pycache__/euckrfreq.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/__pycache__/euckrprober.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/__pycache__/euckrprober.cpython-37.pyc new file mode 100644 index 0000000..c068cdb Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/__pycache__/euckrprober.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/__pycache__/euctwfreq.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/__pycache__/euctwfreq.cpython-37.pyc new file mode 100644 index 0000000..4f5667f Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/__pycache__/euctwfreq.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/__pycache__/euctwprober.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/__pycache__/euctwprober.cpython-37.pyc new file mode 100644 index 0000000..4e9fb87 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/__pycache__/euctwprober.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/__pycache__/gb2312freq.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/__pycache__/gb2312freq.cpython-37.pyc new file mode 100644 index 0000000..e539c08 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/__pycache__/gb2312freq.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/__pycache__/gb2312prober.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/__pycache__/gb2312prober.cpython-37.pyc new file mode 100644 index 0000000..3fc4056 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/__pycache__/gb2312prober.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/__pycache__/hebrewprober.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/__pycache__/hebrewprober.cpython-37.pyc new file mode 100644 index 0000000..20db6a3 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/__pycache__/hebrewprober.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/__pycache__/jisfreq.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/__pycache__/jisfreq.cpython-37.pyc new file mode 100644 index 0000000..f3ef728 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/__pycache__/jisfreq.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/__pycache__/jpcntx.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/__pycache__/jpcntx.cpython-37.pyc new file mode 100644 index 0000000..9e72a08 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/__pycache__/jpcntx.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/__pycache__/langbulgarianmodel.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/__pycache__/langbulgarianmodel.cpython-37.pyc new file mode 100644 index 0000000..f26f3f0 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/__pycache__/langbulgarianmodel.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/__pycache__/langcyrillicmodel.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/__pycache__/langcyrillicmodel.cpython-37.pyc new file mode 100644 index 0000000..d1a948a Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/__pycache__/langcyrillicmodel.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/__pycache__/langgreekmodel.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/__pycache__/langgreekmodel.cpython-37.pyc new file mode 100644 index 0000000..ae5797f Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/__pycache__/langgreekmodel.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/__pycache__/langhebrewmodel.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/__pycache__/langhebrewmodel.cpython-37.pyc new file mode 100644 index 0000000..8a8e784 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/__pycache__/langhebrewmodel.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/__pycache__/langhungarianmodel.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/__pycache__/langhungarianmodel.cpython-37.pyc new file mode 100644 index 0000000..3629b78 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/__pycache__/langhungarianmodel.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/__pycache__/langthaimodel.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/__pycache__/langthaimodel.cpython-37.pyc new file mode 100644 index 0000000..03c0504 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/__pycache__/langthaimodel.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/__pycache__/langturkishmodel.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/__pycache__/langturkishmodel.cpython-37.pyc new file mode 100644 index 0000000..af5c95d Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/__pycache__/langturkishmodel.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/__pycache__/latin1prober.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/__pycache__/latin1prober.cpython-37.pyc new file mode 100644 index 0000000..be1d2a2 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/__pycache__/latin1prober.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/__pycache__/mbcharsetprober.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/__pycache__/mbcharsetprober.cpython-37.pyc new file mode 100644 index 0000000..364eb0e Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/__pycache__/mbcharsetprober.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/__pycache__/mbcsgroupprober.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/__pycache__/mbcsgroupprober.cpython-37.pyc new file mode 100644 index 0000000..f7503d3 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/__pycache__/mbcsgroupprober.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/__pycache__/mbcssm.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/__pycache__/mbcssm.cpython-37.pyc new file mode 100644 index 0000000..d12c350 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/__pycache__/mbcssm.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/__pycache__/sbcharsetprober.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/__pycache__/sbcharsetprober.cpython-37.pyc new file mode 100644 index 0000000..c62e766 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/__pycache__/sbcharsetprober.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/__pycache__/sbcsgroupprober.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/__pycache__/sbcsgroupprober.cpython-37.pyc new file mode 100644 index 0000000..5aebb19 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/__pycache__/sbcsgroupprober.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/__pycache__/sjisprober.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/__pycache__/sjisprober.cpython-37.pyc new file mode 100644 index 0000000..7996e84 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/__pycache__/sjisprober.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/__pycache__/universaldetector.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/__pycache__/universaldetector.cpython-37.pyc new file mode 100644 index 0000000..40b3b3f Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/__pycache__/universaldetector.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/__pycache__/utf8prober.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/__pycache__/utf8prober.cpython-37.pyc new file mode 100644 index 0000000..8fcee11 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/__pycache__/utf8prober.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/__pycache__/version.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/__pycache__/version.cpython-37.pyc new file mode 100644 index 0000000..901e50e Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/__pycache__/version.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/big5freq.py b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/big5freq.py new file mode 100644 index 0000000..38f3251 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/big5freq.py @@ -0,0 +1,386 @@ +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Communicator client code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +# Big5 frequency table +# by Taiwan's Mandarin Promotion Council +# +# +# 128 --> 0.42261 +# 256 --> 0.57851 +# 512 --> 0.74851 +# 1024 --> 0.89384 +# 2048 --> 0.97583 +# +# Ideal Distribution Ratio = 0.74851/(1-0.74851) =2.98 +# Random Distribution Ration = 512/(5401-512)=0.105 +# +# Typical Distribution Ratio about 25% of Ideal one, still much higher than RDR + +BIG5_TYPICAL_DISTRIBUTION_RATIO = 0.75 + +#Char to FreqOrder table +BIG5_TABLE_SIZE = 5376 + +BIG5_CHAR_TO_FREQ_ORDER = ( + 1,1801,1506, 255,1431, 198, 9, 82, 6,5008, 177, 202,3681,1256,2821, 110, # 16 +3814, 33,3274, 261, 76, 44,2114, 16,2946,2187,1176, 659,3971, 26,3451,2653, # 32 +1198,3972,3350,4202, 410,2215, 302, 590, 361,1964, 8, 204, 58,4510,5009,1932, # 48 + 63,5010,5011, 317,1614, 75, 222, 159,4203,2417,1480,5012,3555,3091, 224,2822, # 64 +3682, 3, 10,3973,1471, 29,2787,1135,2866,1940, 873, 130,3275,1123, 312,5013, # 80 +4511,2052, 507, 252, 682,5014, 142,1915, 124, 206,2947, 34,3556,3204, 64, 604, # 96 +5015,2501,1977,1978, 155,1991, 645, 641,1606,5016,3452, 337, 72, 406,5017, 80, # 112 + 630, 238,3205,1509, 263, 939,1092,2654, 756,1440,1094,3453, 449, 69,2987, 591, # 128 + 179,2096, 471, 115,2035,1844, 60, 50,2988, 134, 806,1869, 734,2036,3454, 180, # 144 + 995,1607, 156, 537,2907, 688,5018, 319,1305, 779,2145, 514,2379, 298,4512, 359, # 160 +2502, 90,2716,1338, 663, 11, 906,1099,2553, 20,2441, 182, 532,1716,5019, 732, # 176 +1376,4204,1311,1420,3206, 25,2317,1056, 113, 399, 382,1950, 242,3455,2474, 529, # 192 +3276, 475,1447,3683,5020, 117, 21, 656, 810,1297,2300,2334,3557,5021, 126,4205, # 208 + 706, 456, 150, 613,4513, 71,1118,2037,4206, 145,3092, 85, 835, 486,2115,1246, # 224 +1426, 428, 727,1285,1015, 800, 106, 623, 303,1281,5022,2128,2359, 347,3815, 221, # 240 +3558,3135,5023,1956,1153,4207, 83, 296,1199,3093, 192, 624, 93,5024, 822,1898, # 256 +2823,3136, 795,2065, 991,1554,1542,1592, 27, 43,2867, 859, 139,1456, 860,4514, # 272 + 437, 712,3974, 164,2397,3137, 695, 211,3037,2097, 195,3975,1608,3559,3560,3684, # 288 +3976, 234, 811,2989,2098,3977,2233,1441,3561,1615,2380, 668,2077,1638, 305, 228, # 304 +1664,4515, 467, 415,5025, 262,2099,1593, 239, 108, 300, 200,1033, 512,1247,2078, # 320 +5026,5027,2176,3207,3685,2682, 593, 845,1062,3277, 88,1723,2038,3978,1951, 212, # 336 + 266, 152, 149, 468,1899,4208,4516, 77, 187,5028,3038, 37, 5,2990,5029,3979, # 352 +5030,5031, 39,2524,4517,2908,3208,2079, 55, 148, 74,4518, 545, 483,1474,1029, # 368 +1665, 217,1870,1531,3138,1104,2655,4209, 24, 172,3562, 900,3980,3563,3564,4519, # 384 + 32,1408,2824,1312, 329, 487,2360,2251,2717, 784,2683, 4,3039,3351,1427,1789, # 400 + 188, 109, 499,5032,3686,1717,1790, 888,1217,3040,4520,5033,3565,5034,3352,1520, # 416 +3687,3981, 196,1034, 775,5035,5036, 929,1816, 249, 439, 38,5037,1063,5038, 794, # 432 +3982,1435,2301, 46, 178,3278,2066,5039,2381,5040, 214,1709,4521, 804, 35, 707, # 448 + 324,3688,1601,2554, 140, 459,4210,5041,5042,1365, 839, 272, 978,2262,2580,3456, # 464 +2129,1363,3689,1423, 697, 100,3094, 48, 70,1231, 495,3139,2196,5043,1294,5044, # 480 +2080, 462, 586,1042,3279, 853, 256, 988, 185,2382,3457,1698, 434,1084,5045,3458, # 496 + 314,2625,2788,4522,2335,2336, 569,2285, 637,1817,2525, 757,1162,1879,1616,3459, # 512 + 287,1577,2116, 768,4523,1671,2868,3566,2526,1321,3816, 909,2418,5046,4211, 933, # 528 +3817,4212,2053,2361,1222,4524, 765,2419,1322, 786,4525,5047,1920,1462,1677,2909, # 544 +1699,5048,4526,1424,2442,3140,3690,2600,3353,1775,1941,3460,3983,4213, 309,1369, # 560 +1130,2825, 364,2234,1653,1299,3984,3567,3985,3986,2656, 525,1085,3041, 902,2001, # 576 +1475, 964,4527, 421,1845,1415,1057,2286, 940,1364,3141, 376,4528,4529,1381, 7, # 592 +2527, 983,2383, 336,1710,2684,1846, 321,3461, 559,1131,3042,2752,1809,1132,1313, # 608 + 265,1481,1858,5049, 352,1203,2826,3280, 167,1089, 420,2827, 776, 792,1724,3568, # 624 +4214,2443,3281,5050,4215,5051, 446, 229, 333,2753, 901,3818,1200,1557,4530,2657, # 640 +1921, 395,2754,2685,3819,4216,1836, 125, 916,3209,2626,4531,5052,5053,3820,5054, # 656 +5055,5056,4532,3142,3691,1133,2555,1757,3462,1510,2318,1409,3569,5057,2146, 438, # 672 +2601,2910,2384,3354,1068, 958,3043, 461, 311,2869,2686,4217,1916,3210,4218,1979, # 688 + 383, 750,2755,2627,4219, 274, 539, 385,1278,1442,5058,1154,1965, 384, 561, 210, # 704 + 98,1295,2556,3570,5059,1711,2420,1482,3463,3987,2911,1257, 129,5060,3821, 642, # 720 + 523,2789,2790,2658,5061, 141,2235,1333, 68, 176, 441, 876, 907,4220, 603,2602, # 736 + 710, 171,3464, 404, 549, 18,3143,2398,1410,3692,1666,5062,3571,4533,2912,4534, # 752 +5063,2991, 368,5064, 146, 366, 99, 871,3693,1543, 748, 807,1586,1185, 22,2263, # 768 + 379,3822,3211,5065,3212, 505,1942,2628,1992,1382,2319,5066, 380,2362, 218, 702, # 784 +1818,1248,3465,3044,3572,3355,3282,5067,2992,3694, 930,3283,3823,5068, 59,5069, # 800 + 585, 601,4221, 497,3466,1112,1314,4535,1802,5070,1223,1472,2177,5071, 749,1837, # 816 + 690,1900,3824,1773,3988,1476, 429,1043,1791,2236,2117, 917,4222, 447,1086,1629, # 832 +5072, 556,5073,5074,2021,1654, 844,1090, 105, 550, 966,1758,2828,1008,1783, 686, # 848 +1095,5075,2287, 793,1602,5076,3573,2603,4536,4223,2948,2302,4537,3825, 980,2503, # 864 + 544, 353, 527,4538, 908,2687,2913,5077, 381,2629,1943,1348,5078,1341,1252, 560, # 880 +3095,5079,3467,2870,5080,2054, 973, 886,2081, 143,4539,5081,5082, 157,3989, 496, # 896 +4224, 57, 840, 540,2039,4540,4541,3468,2118,1445, 970,2264,1748,1966,2082,4225, # 912 +3144,1234,1776,3284,2829,3695, 773,1206,2130,1066,2040,1326,3990,1738,1725,4226, # 928 + 279,3145, 51,1544,2604, 423,1578,2131,2067, 173,4542,1880,5083,5084,1583, 264, # 944 + 610,3696,4543,2444, 280, 154,5085,5086,5087,1739, 338,1282,3096, 693,2871,1411, # 960 +1074,3826,2445,5088,4544,5089,5090,1240, 952,2399,5091,2914,1538,2688, 685,1483, # 976 +4227,2475,1436, 953,4228,2055,4545, 671,2400, 79,4229,2446,3285, 608, 567,2689, # 992 +3469,4230,4231,1691, 393,1261,1792,2401,5092,4546,5093,5094,5095,5096,1383,1672, # 1008 +3827,3213,1464, 522,1119, 661,1150, 216, 675,4547,3991,1432,3574, 609,4548,2690, # 1024 +2402,5097,5098,5099,4232,3045, 0,5100,2476, 315, 231,2447, 301,3356,4549,2385, # 1040 +5101, 233,4233,3697,1819,4550,4551,5102, 96,1777,1315,2083,5103, 257,5104,1810, # 1056 +3698,2718,1139,1820,4234,2022,1124,2164,2791,1778,2659,5105,3097, 363,1655,3214, # 1072 +5106,2993,5107,5108,5109,3992,1567,3993, 718, 103,3215, 849,1443, 341,3357,2949, # 1088 +1484,5110,1712, 127, 67, 339,4235,2403, 679,1412, 821,5111,5112, 834, 738, 351, # 1104 +2994,2147, 846, 235,1497,1881, 418,1993,3828,2719, 186,1100,2148,2756,3575,1545, # 1120 +1355,2950,2872,1377, 583,3994,4236,2581,2995,5113,1298,3699,1078,2557,3700,2363, # 1136 + 78,3829,3830, 267,1289,2100,2002,1594,4237, 348, 369,1274,2197,2178,1838,4552, # 1152 +1821,2830,3701,2757,2288,2003,4553,2951,2758, 144,3358, 882,4554,3995,2759,3470, # 1168 +4555,2915,5114,4238,1726, 320,5115,3996,3046, 788,2996,5116,2831,1774,1327,2873, # 1184 +3997,2832,5117,1306,4556,2004,1700,3831,3576,2364,2660, 787,2023, 506, 824,3702, # 1200 + 534, 323,4557,1044,3359,2024,1901, 946,3471,5118,1779,1500,1678,5119,1882,4558, # 1216 + 165, 243,4559,3703,2528, 123, 683,4239, 764,4560, 36,3998,1793, 589,2916, 816, # 1232 + 626,1667,3047,2237,1639,1555,1622,3832,3999,5120,4000,2874,1370,1228,1933, 891, # 1248 +2084,2917, 304,4240,5121, 292,2997,2720,3577, 691,2101,4241,1115,4561, 118, 662, # 1264 +5122, 611,1156, 854,2386,1316,2875, 2, 386, 515,2918,5123,5124,3286, 868,2238, # 1280 +1486, 855,2661, 785,2216,3048,5125,1040,3216,3578,5126,3146, 448,5127,1525,5128, # 1296 +2165,4562,5129,3833,5130,4242,2833,3579,3147, 503, 818,4001,3148,1568, 814, 676, # 1312 +1444, 306,1749,5131,3834,1416,1030, 197,1428, 805,2834,1501,4563,5132,5133,5134, # 1328 +1994,5135,4564,5136,5137,2198, 13,2792,3704,2998,3149,1229,1917,5138,3835,2132, # 1344 +5139,4243,4565,2404,3580,5140,2217,1511,1727,1120,5141,5142, 646,3836,2448, 307, # 1360 +5143,5144,1595,3217,5145,5146,5147,3705,1113,1356,4002,1465,2529,2530,5148, 519, # 1376 +5149, 128,2133, 92,2289,1980,5150,4003,1512, 342,3150,2199,5151,2793,2218,1981, # 1392 +3360,4244, 290,1656,1317, 789, 827,2365,5152,3837,4566, 562, 581,4004,5153, 401, # 1408 +4567,2252, 94,4568,5154,1399,2794,5155,1463,2025,4569,3218,1944,5156, 828,1105, # 1424 +4245,1262,1394,5157,4246, 605,4570,5158,1784,2876,5159,2835, 819,2102, 578,2200, # 1440 +2952,5160,1502, 436,3287,4247,3288,2836,4005,2919,3472,3473,5161,2721,2320,5162, # 1456 +5163,2337,2068, 23,4571, 193, 826,3838,2103, 699,1630,4248,3098, 390,1794,1064, # 1472 +3581,5164,1579,3099,3100,1400,5165,4249,1839,1640,2877,5166,4572,4573, 137,4250, # 1488 + 598,3101,1967, 780, 104, 974,2953,5167, 278, 899, 253, 402, 572, 504, 493,1339, # 1504 +5168,4006,1275,4574,2582,2558,5169,3706,3049,3102,2253, 565,1334,2722, 863, 41, # 1520 +5170,5171,4575,5172,1657,2338, 19, 463,2760,4251, 606,5173,2999,3289,1087,2085, # 1536 +1323,2662,3000,5174,1631,1623,1750,4252,2691,5175,2878, 791,2723,2663,2339, 232, # 1552 +2421,5176,3001,1498,5177,2664,2630, 755,1366,3707,3290,3151,2026,1609, 119,1918, # 1568 +3474, 862,1026,4253,5178,4007,3839,4576,4008,4577,2265,1952,2477,5179,1125, 817, # 1584 +4254,4255,4009,1513,1766,2041,1487,4256,3050,3291,2837,3840,3152,5180,5181,1507, # 1600 +5182,2692, 733, 40,1632,1106,2879, 345,4257, 841,2531, 230,4578,3002,1847,3292, # 1616 +3475,5183,1263, 986,3476,5184, 735, 879, 254,1137, 857, 622,1300,1180,1388,1562, # 1632 +4010,4011,2954, 967,2761,2665,1349, 592,2134,1692,3361,3003,1995,4258,1679,4012, # 1648 +1902,2188,5185, 739,3708,2724,1296,1290,5186,4259,2201,2202,1922,1563,2605,2559, # 1664 +1871,2762,3004,5187, 435,5188, 343,1108, 596, 17,1751,4579,2239,3477,3709,5189, # 1680 +4580, 294,3582,2955,1693, 477, 979, 281,2042,3583, 643,2043,3710,2631,2795,2266, # 1696 +1031,2340,2135,2303,3584,4581, 367,1249,2560,5190,3585,5191,4582,1283,3362,2005, # 1712 + 240,1762,3363,4583,4584, 836,1069,3153, 474,5192,2149,2532, 268,3586,5193,3219, # 1728 +1521,1284,5194,1658,1546,4260,5195,3587,3588,5196,4261,3364,2693,1685,4262, 961, # 1744 +1673,2632, 190,2006,2203,3841,4585,4586,5197, 570,2504,3711,1490,5198,4587,2633, # 1760 +3293,1957,4588, 584,1514, 396,1045,1945,5199,4589,1968,2449,5200,5201,4590,4013, # 1776 + 619,5202,3154,3294, 215,2007,2796,2561,3220,4591,3221,4592, 763,4263,3842,4593, # 1792 +5203,5204,1958,1767,2956,3365,3712,1174, 452,1477,4594,3366,3155,5205,2838,1253, # 1808 +2387,2189,1091,2290,4264, 492,5206, 638,1169,1825,2136,1752,4014, 648, 926,1021, # 1824 +1324,4595, 520,4596, 997, 847,1007, 892,4597,3843,2267,1872,3713,2405,1785,4598, # 1840 +1953,2957,3103,3222,1728,4265,2044,3714,4599,2008,1701,3156,1551, 30,2268,4266, # 1856 +5207,2027,4600,3589,5208, 501,5209,4267, 594,3478,2166,1822,3590,3479,3591,3223, # 1872 + 829,2839,4268,5210,1680,3157,1225,4269,5211,3295,4601,4270,3158,2341,5212,4602, # 1888 +4271,5213,4015,4016,5214,1848,2388,2606,3367,5215,4603, 374,4017, 652,4272,4273, # 1904 + 375,1140, 798,5216,5217,5218,2366,4604,2269, 546,1659, 138,3051,2450,4605,5219, # 1920 +2254, 612,1849, 910, 796,3844,1740,1371, 825,3845,3846,5220,2920,2562,5221, 692, # 1936 + 444,3052,2634, 801,4606,4274,5222,1491, 244,1053,3053,4275,4276, 340,5223,4018, # 1952 +1041,3005, 293,1168, 87,1357,5224,1539, 959,5225,2240, 721, 694,4277,3847, 219, # 1968 +1478, 644,1417,3368,2666,1413,1401,1335,1389,4019,5226,5227,3006,2367,3159,1826, # 1984 + 730,1515, 184,2840, 66,4607,5228,1660,2958, 246,3369, 378,1457, 226,3480, 975, # 2000 +4020,2959,1264,3592, 674, 696,5229, 163,5230,1141,2422,2167, 713,3593,3370,4608, # 2016 +4021,5231,5232,1186, 15,5233,1079,1070,5234,1522,3224,3594, 276,1050,2725, 758, # 2032 +1126, 653,2960,3296,5235,2342, 889,3595,4022,3104,3007, 903,1250,4609,4023,3481, # 2048 +3596,1342,1681,1718, 766,3297, 286, 89,2961,3715,5236,1713,5237,2607,3371,3008, # 2064 +5238,2962,2219,3225,2880,5239,4610,2505,2533, 181, 387,1075,4024, 731,2190,3372, # 2080 +5240,3298, 310, 313,3482,2304, 770,4278, 54,3054, 189,4611,3105,3848,4025,5241, # 2096 +1230,1617,1850, 355,3597,4279,4612,3373, 111,4280,3716,1350,3160,3483,3055,4281, # 2112 +2150,3299,3598,5242,2797,4026,4027,3009, 722,2009,5243,1071, 247,1207,2343,2478, # 2128 +1378,4613,2010, 864,1437,1214,4614, 373,3849,1142,2220, 667,4615, 442,2763,2563, # 2144 +3850,4028,1969,4282,3300,1840, 837, 170,1107, 934,1336,1883,5244,5245,2119,4283, # 2160 +2841, 743,1569,5246,4616,4284, 582,2389,1418,3484,5247,1803,5248, 357,1395,1729, # 2176 +3717,3301,2423,1564,2241,5249,3106,3851,1633,4617,1114,2086,4285,1532,5250, 482, # 2192 +2451,4618,5251,5252,1492, 833,1466,5253,2726,3599,1641,2842,5254,1526,1272,3718, # 2208 +4286,1686,1795, 416,2564,1903,1954,1804,5255,3852,2798,3853,1159,2321,5256,2881, # 2224 +4619,1610,1584,3056,2424,2764, 443,3302,1163,3161,5257,5258,4029,5259,4287,2506, # 2240 +3057,4620,4030,3162,2104,1647,3600,2011,1873,4288,5260,4289, 431,3485,5261, 250, # 2256 + 97, 81,4290,5262,1648,1851,1558, 160, 848,5263, 866, 740,1694,5264,2204,2843, # 2272 +3226,4291,4621,3719,1687, 950,2479, 426, 469,3227,3720,3721,4031,5265,5266,1188, # 2288 + 424,1996, 861,3601,4292,3854,2205,2694, 168,1235,3602,4293,5267,2087,1674,4622, # 2304 +3374,3303, 220,2565,1009,5268,3855, 670,3010, 332,1208, 717,5269,5270,3603,2452, # 2320 +4032,3375,5271, 513,5272,1209,2882,3376,3163,4623,1080,5273,5274,5275,5276,2534, # 2336 +3722,3604, 815,1587,4033,4034,5277,3605,3486,3856,1254,4624,1328,3058,1390,4035, # 2352 +1741,4036,3857,4037,5278, 236,3858,2453,3304,5279,5280,3723,3859,1273,3860,4625, # 2368 +5281, 308,5282,4626, 245,4627,1852,2480,1307,2583, 430, 715,2137,2454,5283, 270, # 2384 + 199,2883,4038,5284,3606,2727,1753, 761,1754, 725,1661,1841,4628,3487,3724,5285, # 2400 +5286, 587, 14,3305, 227,2608, 326, 480,2270, 943,2765,3607, 291, 650,1884,5287, # 2416 +1702,1226, 102,1547, 62,3488, 904,4629,3489,1164,4294,5288,5289,1224,1548,2766, # 2432 + 391, 498,1493,5290,1386,1419,5291,2056,1177,4630, 813, 880,1081,2368, 566,1145, # 2448 +4631,2291,1001,1035,2566,2609,2242, 394,1286,5292,5293,2069,5294, 86,1494,1730, # 2464 +4039, 491,1588, 745, 897,2963, 843,3377,4040,2767,2884,3306,1768, 998,2221,2070, # 2480 + 397,1827,1195,1970,3725,3011,3378, 284,5295,3861,2507,2138,2120,1904,5296,4041, # 2496 +2151,4042,4295,1036,3490,1905, 114,2567,4296, 209,1527,5297,5298,2964,2844,2635, # 2512 +2390,2728,3164, 812,2568,5299,3307,5300,1559, 737,1885,3726,1210, 885, 28,2695, # 2528 +3608,3862,5301,4297,1004,1780,4632,5302, 346,1982,2222,2696,4633,3863,1742, 797, # 2544 +1642,4043,1934,1072,1384,2152, 896,4044,3308,3727,3228,2885,3609,5303,2569,1959, # 2560 +4634,2455,1786,5304,5305,5306,4045,4298,1005,1308,3728,4299,2729,4635,4636,1528, # 2576 +2610, 161,1178,4300,1983, 987,4637,1101,4301, 631,4046,1157,3229,2425,1343,1241, # 2592 +1016,2243,2570, 372, 877,2344,2508,1160, 555,1935, 911,4047,5307, 466,1170, 169, # 2608 +1051,2921,2697,3729,2481,3012,1182,2012,2571,1251,2636,5308, 992,2345,3491,1540, # 2624 +2730,1201,2071,2406,1997,2482,5309,4638, 528,1923,2191,1503,1874,1570,2369,3379, # 2640 +3309,5310, 557,1073,5311,1828,3492,2088,2271,3165,3059,3107, 767,3108,2799,4639, # 2656 +1006,4302,4640,2346,1267,2179,3730,3230, 778,4048,3231,2731,1597,2667,5312,4641, # 2672 +5313,3493,5314,5315,5316,3310,2698,1433,3311, 131, 95,1504,4049, 723,4303,3166, # 2688 +1842,3610,2768,2192,4050,2028,2105,3731,5317,3013,4051,1218,5318,3380,3232,4052, # 2704 +4304,2584, 248,1634,3864, 912,5319,2845,3732,3060,3865, 654, 53,5320,3014,5321, # 2720 +1688,4642, 777,3494,1032,4053,1425,5322, 191, 820,2121,2846, 971,4643, 931,3233, # 2736 + 135, 664, 783,3866,1998, 772,2922,1936,4054,3867,4644,2923,3234, 282,2732, 640, # 2752 +1372,3495,1127, 922, 325,3381,5323,5324, 711,2045,5325,5326,4055,2223,2800,1937, # 2768 +4056,3382,2224,2255,3868,2305,5327,4645,3869,1258,3312,4057,3235,2139,2965,4058, # 2784 +4059,5328,2225, 258,3236,4646, 101,1227,5329,3313,1755,5330,1391,3314,5331,2924, # 2800 +2057, 893,5332,5333,5334,1402,4305,2347,5335,5336,3237,3611,5337,5338, 878,1325, # 2816 +1781,2801,4647, 259,1385,2585, 744,1183,2272,4648,5339,4060,2509,5340, 684,1024, # 2832 +4306,5341, 472,3612,3496,1165,3315,4061,4062, 322,2153, 881, 455,1695,1152,1340, # 2848 + 660, 554,2154,4649,1058,4650,4307, 830,1065,3383,4063,4651,1924,5342,1703,1919, # 2864 +5343, 932,2273, 122,5344,4652, 947, 677,5345,3870,2637, 297,1906,1925,2274,4653, # 2880 +2322,3316,5346,5347,4308,5348,4309, 84,4310, 112, 989,5349, 547,1059,4064, 701, # 2896 +3613,1019,5350,4311,5351,3497, 942, 639, 457,2306,2456, 993,2966, 407, 851, 494, # 2912 +4654,3384, 927,5352,1237,5353,2426,3385, 573,4312, 680, 921,2925,1279,1875, 285, # 2928 + 790,1448,1984, 719,2168,5354,5355,4655,4065,4066,1649,5356,1541, 563,5357,1077, # 2944 +5358,3386,3061,3498, 511,3015,4067,4068,3733,4069,1268,2572,3387,3238,4656,4657, # 2960 +5359, 535,1048,1276,1189,2926,2029,3167,1438,1373,2847,2967,1134,2013,5360,4313, # 2976 +1238,2586,3109,1259,5361, 700,5362,2968,3168,3734,4314,5363,4315,1146,1876,1907, # 2992 +4658,2611,4070, 781,2427, 132,1589, 203, 147, 273,2802,2407, 898,1787,2155,4071, # 3008 +4072,5364,3871,2803,5365,5366,4659,4660,5367,3239,5368,1635,3872, 965,5369,1805, # 3024 +2699,1516,3614,1121,1082,1329,3317,4073,1449,3873, 65,1128,2848,2927,2769,1590, # 3040 +3874,5370,5371, 12,2668, 45, 976,2587,3169,4661, 517,2535,1013,1037,3240,5372, # 3056 +3875,2849,5373,3876,5374,3499,5375,2612, 614,1999,2323,3877,3110,2733,2638,5376, # 3072 +2588,4316, 599,1269,5377,1811,3735,5378,2700,3111, 759,1060, 489,1806,3388,3318, # 3088 +1358,5379,5380,2391,1387,1215,2639,2256, 490,5381,5382,4317,1759,2392,2348,5383, # 3104 +4662,3878,1908,4074,2640,1807,3241,4663,3500,3319,2770,2349, 874,5384,5385,3501, # 3120 +3736,1859, 91,2928,3737,3062,3879,4664,5386,3170,4075,2669,5387,3502,1202,1403, # 3136 +3880,2969,2536,1517,2510,4665,3503,2511,5388,4666,5389,2701,1886,1495,1731,4076, # 3152 +2370,4667,5390,2030,5391,5392,4077,2702,1216, 237,2589,4318,2324,4078,3881,4668, # 3168 +4669,2703,3615,3504, 445,4670,5393,5394,5395,5396,2771, 61,4079,3738,1823,4080, # 3184 +5397, 687,2046, 935, 925, 405,2670, 703,1096,1860,2734,4671,4081,1877,1367,2704, # 3200 +3389, 918,2106,1782,2483, 334,3320,1611,1093,4672, 564,3171,3505,3739,3390, 945, # 3216 +2641,2058,4673,5398,1926, 872,4319,5399,3506,2705,3112, 349,4320,3740,4082,4674, # 3232 +3882,4321,3741,2156,4083,4675,4676,4322,4677,2408,2047, 782,4084, 400, 251,4323, # 3248 +1624,5400,5401, 277,3742, 299,1265, 476,1191,3883,2122,4324,4325,1109, 205,5402, # 3264 +2590,1000,2157,3616,1861,5403,5404,5405,4678,5406,4679,2573, 107,2484,2158,4085, # 3280 +3507,3172,5407,1533, 541,1301, 158, 753,4326,2886,3617,5408,1696, 370,1088,4327, # 3296 +4680,3618, 579, 327, 440, 162,2244, 269,1938,1374,3508, 968,3063, 56,1396,3113, # 3312 +2107,3321,3391,5409,1927,2159,4681,3016,5410,3619,5411,5412,3743,4682,2485,5413, # 3328 +2804,5414,1650,4683,5415,2613,5416,5417,4086,2671,3392,1149,3393,4087,3884,4088, # 3344 +5418,1076, 49,5419, 951,3242,3322,3323, 450,2850, 920,5420,1812,2805,2371,4328, # 3360 +1909,1138,2372,3885,3509,5421,3243,4684,1910,1147,1518,2428,4685,3886,5422,4686, # 3376 +2393,2614, 260,1796,3244,5423,5424,3887,3324, 708,5425,3620,1704,5426,3621,1351, # 3392 +1618,3394,3017,1887, 944,4329,3395,4330,3064,3396,4331,5427,3744, 422, 413,1714, # 3408 +3325, 500,2059,2350,4332,2486,5428,1344,1911, 954,5429,1668,5430,5431,4089,2409, # 3424 +4333,3622,3888,4334,5432,2307,1318,2512,3114, 133,3115,2887,4687, 629, 31,2851, # 3440 +2706,3889,4688, 850, 949,4689,4090,2970,1732,2089,4335,1496,1853,5433,4091, 620, # 3456 +3245, 981,1242,3745,3397,1619,3746,1643,3326,2140,2457,1971,1719,3510,2169,5434, # 3472 +3246,5435,5436,3398,1829,5437,1277,4690,1565,2048,5438,1636,3623,3116,5439, 869, # 3488 +2852, 655,3890,3891,3117,4092,3018,3892,1310,3624,4691,5440,5441,5442,1733, 558, # 3504 +4692,3747, 335,1549,3065,1756,4336,3748,1946,3511,1830,1291,1192, 470,2735,2108, # 3520 +2806, 913,1054,4093,5443,1027,5444,3066,4094,4693, 982,2672,3399,3173,3512,3247, # 3536 +3248,1947,2807,5445, 571,4694,5446,1831,5447,3625,2591,1523,2429,5448,2090, 984, # 3552 +4695,3749,1960,5449,3750, 852, 923,2808,3513,3751, 969,1519, 999,2049,2325,1705, # 3568 +5450,3118, 615,1662, 151, 597,4095,2410,2326,1049, 275,4696,3752,4337, 568,3753, # 3584 +3626,2487,4338,3754,5451,2430,2275, 409,3249,5452,1566,2888,3514,1002, 769,2853, # 3600 + 194,2091,3174,3755,2226,3327,4339, 628,1505,5453,5454,1763,2180,3019,4096, 521, # 3616 +1161,2592,1788,2206,2411,4697,4097,1625,4340,4341, 412, 42,3119, 464,5455,2642, # 3632 +4698,3400,1760,1571,2889,3515,2537,1219,2207,3893,2643,2141,2373,4699,4700,3328, # 3648 +1651,3401,3627,5456,5457,3628,2488,3516,5458,3756,5459,5460,2276,2092, 460,5461, # 3664 +4701,5462,3020, 962, 588,3629, 289,3250,2644,1116, 52,5463,3067,1797,5464,5465, # 3680 +5466,1467,5467,1598,1143,3757,4342,1985,1734,1067,4702,1280,3402, 465,4703,1572, # 3696 + 510,5468,1928,2245,1813,1644,3630,5469,4704,3758,5470,5471,2673,1573,1534,5472, # 3712 +5473, 536,1808,1761,3517,3894,3175,2645,5474,5475,5476,4705,3518,2929,1912,2809, # 3728 +5477,3329,1122, 377,3251,5478, 360,5479,5480,4343,1529, 551,5481,2060,3759,1769, # 3744 +2431,5482,2930,4344,3330,3120,2327,2109,2031,4706,1404, 136,1468,1479, 672,1171, # 3760 +3252,2308, 271,3176,5483,2772,5484,2050, 678,2736, 865,1948,4707,5485,2014,4098, # 3776 +2971,5486,2737,2227,1397,3068,3760,4708,4709,1735,2931,3403,3631,5487,3895, 509, # 3792 +2854,2458,2890,3896,5488,5489,3177,3178,4710,4345,2538,4711,2309,1166,1010, 552, # 3808 + 681,1888,5490,5491,2972,2973,4099,1287,1596,1862,3179, 358, 453, 736, 175, 478, # 3824 +1117, 905,1167,1097,5492,1854,1530,5493,1706,5494,2181,3519,2292,3761,3520,3632, # 3840 +4346,2093,4347,5495,3404,1193,2489,4348,1458,2193,2208,1863,1889,1421,3331,2932, # 3856 +3069,2182,3521, 595,2123,5496,4100,5497,5498,4349,1707,2646, 223,3762,1359, 751, # 3872 +3121, 183,3522,5499,2810,3021, 419,2374, 633, 704,3897,2394, 241,5500,5501,5502, # 3888 + 838,3022,3763,2277,2773,2459,3898,1939,2051,4101,1309,3122,2246,1181,5503,1136, # 3904 +2209,3899,2375,1446,4350,2310,4712,5504,5505,4351,1055,2615, 484,3764,5506,4102, # 3920 + 625,4352,2278,3405,1499,4353,4103,5507,4104,4354,3253,2279,2280,3523,5508,5509, # 3936 +2774, 808,2616,3765,3406,4105,4355,3123,2539, 526,3407,3900,4356, 955,5510,1620, # 3952 +4357,2647,2432,5511,1429,3766,1669,1832, 994, 928,5512,3633,1260,5513,5514,5515, # 3968 +1949,2293, 741,2933,1626,4358,2738,2460, 867,1184, 362,3408,1392,5516,5517,4106, # 3984 +4359,1770,1736,3254,2934,4713,4714,1929,2707,1459,1158,5518,3070,3409,2891,1292, # 4000 +1930,2513,2855,3767,1986,1187,2072,2015,2617,4360,5519,2574,2514,2170,3768,2490, # 4016 +3332,5520,3769,4715,5521,5522, 666,1003,3023,1022,3634,4361,5523,4716,1814,2257, # 4032 + 574,3901,1603, 295,1535, 705,3902,4362, 283, 858, 417,5524,5525,3255,4717,4718, # 4048 +3071,1220,1890,1046,2281,2461,4107,1393,1599, 689,2575, 388,4363,5526,2491, 802, # 4064 +5527,2811,3903,2061,1405,2258,5528,4719,3904,2110,1052,1345,3256,1585,5529, 809, # 4080 +5530,5531,5532, 575,2739,3524, 956,1552,1469,1144,2328,5533,2329,1560,2462,3635, # 4096 +3257,4108, 616,2210,4364,3180,2183,2294,5534,1833,5535,3525,4720,5536,1319,3770, # 4112 +3771,1211,3636,1023,3258,1293,2812,5537,5538,5539,3905, 607,2311,3906, 762,2892, # 4128 +1439,4365,1360,4721,1485,3072,5540,4722,1038,4366,1450,2062,2648,4367,1379,4723, # 4144 +2593,5541,5542,4368,1352,1414,2330,2935,1172,5543,5544,3907,3908,4724,1798,1451, # 4160 +5545,5546,5547,5548,2936,4109,4110,2492,2351, 411,4111,4112,3637,3333,3124,4725, # 4176 +1561,2674,1452,4113,1375,5549,5550, 47,2974, 316,5551,1406,1591,2937,3181,5552, # 4192 +1025,2142,3125,3182, 354,2740, 884,2228,4369,2412, 508,3772, 726,3638, 996,2433, # 4208 +3639, 729,5553, 392,2194,1453,4114,4726,3773,5554,5555,2463,3640,2618,1675,2813, # 4224 + 919,2352,2975,2353,1270,4727,4115, 73,5556,5557, 647,5558,3259,2856,2259,1550, # 4240 +1346,3024,5559,1332, 883,3526,5560,5561,5562,5563,3334,2775,5564,1212, 831,1347, # 4256 +4370,4728,2331,3909,1864,3073, 720,3910,4729,4730,3911,5565,4371,5566,5567,4731, # 4272 +5568,5569,1799,4732,3774,2619,4733,3641,1645,2376,4734,5570,2938, 669,2211,2675, # 4288 +2434,5571,2893,5572,5573,1028,3260,5574,4372,2413,5575,2260,1353,5576,5577,4735, # 4304 +3183, 518,5578,4116,5579,4373,1961,5580,2143,4374,5581,5582,3025,2354,2355,3912, # 4320 + 516,1834,1454,4117,2708,4375,4736,2229,2620,1972,1129,3642,5583,2776,5584,2976, # 4336 +1422, 577,1470,3026,1524,3410,5585,5586, 432,4376,3074,3527,5587,2594,1455,2515, # 4352 +2230,1973,1175,5588,1020,2741,4118,3528,4737,5589,2742,5590,1743,1361,3075,3529, # 4368 +2649,4119,4377,4738,2295, 895, 924,4378,2171, 331,2247,3076, 166,1627,3077,1098, # 4384 +5591,1232,2894,2231,3411,4739, 657, 403,1196,2377, 542,3775,3412,1600,4379,3530, # 4400 +5592,4740,2777,3261, 576, 530,1362,4741,4742,2540,2676,3776,4120,5593, 842,3913, # 4416 +5594,2814,2032,1014,4121, 213,2709,3413, 665, 621,4380,5595,3777,2939,2435,5596, # 4432 +2436,3335,3643,3414,4743,4381,2541,4382,4744,3644,1682,4383,3531,1380,5597, 724, # 4448 +2282, 600,1670,5598,1337,1233,4745,3126,2248,5599,1621,4746,5600, 651,4384,5601, # 4464 +1612,4385,2621,5602,2857,5603,2743,2312,3078,5604, 716,2464,3079, 174,1255,2710, # 4480 +4122,3645, 548,1320,1398, 728,4123,1574,5605,1891,1197,3080,4124,5606,3081,3082, # 4496 +3778,3646,3779, 747,5607, 635,4386,4747,5608,5609,5610,4387,5611,5612,4748,5613, # 4512 +3415,4749,2437, 451,5614,3780,2542,2073,4388,2744,4389,4125,5615,1764,4750,5616, # 4528 +4390, 350,4751,2283,2395,2493,5617,4391,4126,2249,1434,4127, 488,4752, 458,4392, # 4544 +4128,3781, 771,1330,2396,3914,2576,3184,2160,2414,1553,2677,3185,4393,5618,2494, # 4560 +2895,2622,1720,2711,4394,3416,4753,5619,2543,4395,5620,3262,4396,2778,5621,2016, # 4576 +2745,5622,1155,1017,3782,3915,5623,3336,2313, 201,1865,4397,1430,5624,4129,5625, # 4592 +5626,5627,5628,5629,4398,1604,5630, 414,1866, 371,2595,4754,4755,3532,2017,3127, # 4608 +4756,1708, 960,4399, 887, 389,2172,1536,1663,1721,5631,2232,4130,2356,2940,1580, # 4624 +5632,5633,1744,4757,2544,4758,4759,5634,4760,5635,2074,5636,4761,3647,3417,2896, # 4640 +4400,5637,4401,2650,3418,2815, 673,2712,2465, 709,3533,4131,3648,4402,5638,1148, # 4656 + 502, 634,5639,5640,1204,4762,3649,1575,4763,2623,3783,5641,3784,3128, 948,3263, # 4672 + 121,1745,3916,1110,5642,4403,3083,2516,3027,4132,3785,1151,1771,3917,1488,4133, # 4688 +1987,5643,2438,3534,5644,5645,2094,5646,4404,3918,1213,1407,2816, 531,2746,2545, # 4704 +3264,1011,1537,4764,2779,4405,3129,1061,5647,3786,3787,1867,2897,5648,2018, 120, # 4720 +4406,4407,2063,3650,3265,2314,3919,2678,3419,1955,4765,4134,5649,3535,1047,2713, # 4736 +1266,5650,1368,4766,2858, 649,3420,3920,2546,2747,1102,2859,2679,5651,5652,2000, # 4752 +5653,1111,3651,2977,5654,2495,3921,3652,2817,1855,3421,3788,5655,5656,3422,2415, # 4768 +2898,3337,3266,3653,5657,2577,5658,3654,2818,4135,1460, 856,5659,3655,5660,2899, # 4784 +2978,5661,2900,3922,5662,4408, 632,2517, 875,3923,1697,3924,2296,5663,5664,4767, # 4800 +3028,1239, 580,4768,4409,5665, 914, 936,2075,1190,4136,1039,2124,5666,5667,5668, # 4816 +5669,3423,1473,5670,1354,4410,3925,4769,2173,3084,4137, 915,3338,4411,4412,3339, # 4832 +1605,1835,5671,2748, 398,3656,4413,3926,4138, 328,1913,2860,4139,3927,1331,4414, # 4848 +3029, 937,4415,5672,3657,4140,4141,3424,2161,4770,3425, 524, 742, 538,3085,1012, # 4864 +5673,5674,3928,2466,5675, 658,1103, 225,3929,5676,5677,4771,5678,4772,5679,3267, # 4880 +1243,5680,4142, 963,2250,4773,5681,2714,3658,3186,5682,5683,2596,2332,5684,4774, # 4896 +5685,5686,5687,3536, 957,3426,2547,2033,1931,2941,2467, 870,2019,3659,1746,2780, # 4912 +2781,2439,2468,5688,3930,5689,3789,3130,3790,3537,3427,3791,5690,1179,3086,5691, # 4928 +3187,2378,4416,3792,2548,3188,3131,2749,4143,5692,3428,1556,2549,2297, 977,2901, # 4944 +2034,4144,1205,3429,5693,1765,3430,3189,2125,1271, 714,1689,4775,3538,5694,2333, # 4960 +3931, 533,4417,3660,2184, 617,5695,2469,3340,3539,2315,5696,5697,3190,5698,5699, # 4976 +3932,1988, 618, 427,2651,3540,3431,5700,5701,1244,1690,5702,2819,4418,4776,5703, # 4992 +3541,4777,5704,2284,1576, 473,3661,4419,3432, 972,5705,3662,5706,3087,5707,5708, # 5008 +4778,4779,5709,3793,4145,4146,5710, 153,4780, 356,5711,1892,2902,4420,2144, 408, # 5024 + 803,2357,5712,3933,5713,4421,1646,2578,2518,4781,4782,3934,5714,3935,4422,5715, # 5040 +2416,3433, 752,5716,5717,1962,3341,2979,5718, 746,3030,2470,4783,4423,3794, 698, # 5056 +4784,1893,4424,3663,2550,4785,3664,3936,5719,3191,3434,5720,1824,1302,4147,2715, # 5072 +3937,1974,4425,5721,4426,3192, 823,1303,1288,1236,2861,3542,4148,3435, 774,3938, # 5088 +5722,1581,4786,1304,2862,3939,4787,5723,2440,2162,1083,3268,4427,4149,4428, 344, # 5104 +1173, 288,2316, 454,1683,5724,5725,1461,4788,4150,2597,5726,5727,4789, 985, 894, # 5120 +5728,3436,3193,5729,1914,2942,3795,1989,5730,2111,1975,5731,4151,5732,2579,1194, # 5136 + 425,5733,4790,3194,1245,3796,4429,5734,5735,2863,5736, 636,4791,1856,3940, 760, # 5152 +1800,5737,4430,2212,1508,4792,4152,1894,1684,2298,5738,5739,4793,4431,4432,2213, # 5168 + 479,5740,5741, 832,5742,4153,2496,5743,2980,2497,3797, 990,3132, 627,1815,2652, # 5184 +4433,1582,4434,2126,2112,3543,4794,5744, 799,4435,3195,5745,4795,2113,1737,3031, # 5200 +1018, 543, 754,4436,3342,1676,4796,4797,4154,4798,1489,5746,3544,5747,2624,2903, # 5216 +4155,5748,5749,2981,5750,5751,5752,5753,3196,4799,4800,2185,1722,5754,3269,3270, # 5232 +1843,3665,1715, 481, 365,1976,1857,5755,5756,1963,2498,4801,5757,2127,3666,3271, # 5248 + 433,1895,2064,2076,5758, 602,2750,5759,5760,5761,5762,5763,3032,1628,3437,5764, # 5264 +3197,4802,4156,2904,4803,2519,5765,2551,2782,5766,5767,5768,3343,4804,2905,5769, # 5280 +4805,5770,2864,4806,4807,1221,2982,4157,2520,5771,5772,5773,1868,1990,5774,5775, # 5296 +5776,1896,5777,5778,4808,1897,4158, 318,5779,2095,4159,4437,5780,5781, 485,5782, # 5312 + 938,3941, 553,2680, 116,5783,3942,3667,5784,3545,2681,2783,3438,3344,2820,5785, # 5328 +3668,2943,4160,1747,2944,2983,5786,5787, 207,5788,4809,5789,4810,2521,5790,3033, # 5344 + 890,3669,3943,5791,1878,3798,3439,5792,2186,2358,3440,1652,5793,5794,5795, 941, # 5360 +2299, 208,3546,4161,2020, 330,4438,3944,2906,2499,3799,4439,4811,5796,5797,5798, # 5376 +) + diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/big5prober.py b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/big5prober.py new file mode 100644 index 0000000..98f9970 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/big5prober.py @@ -0,0 +1,47 @@ +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Communicator client code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +from .mbcharsetprober import MultiByteCharSetProber +from .codingstatemachine import CodingStateMachine +from .chardistribution import Big5DistributionAnalysis +from .mbcssm import BIG5_SM_MODEL + + +class Big5Prober(MultiByteCharSetProber): + def __init__(self): + super(Big5Prober, self).__init__() + self.coding_sm = CodingStateMachine(BIG5_SM_MODEL) + self.distribution_analyzer = Big5DistributionAnalysis() + self.reset() + + @property + def charset_name(self): + return "Big5" + + @property + def language(self): + return "Chinese" diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/chardistribution.py b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/chardistribution.py new file mode 100644 index 0000000..c0395f4 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/chardistribution.py @@ -0,0 +1,233 @@ +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Communicator client code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +from .euctwfreq import (EUCTW_CHAR_TO_FREQ_ORDER, EUCTW_TABLE_SIZE, + EUCTW_TYPICAL_DISTRIBUTION_RATIO) +from .euckrfreq import (EUCKR_CHAR_TO_FREQ_ORDER, EUCKR_TABLE_SIZE, + EUCKR_TYPICAL_DISTRIBUTION_RATIO) +from .gb2312freq import (GB2312_CHAR_TO_FREQ_ORDER, GB2312_TABLE_SIZE, + GB2312_TYPICAL_DISTRIBUTION_RATIO) +from .big5freq import (BIG5_CHAR_TO_FREQ_ORDER, BIG5_TABLE_SIZE, + BIG5_TYPICAL_DISTRIBUTION_RATIO) +from .jisfreq import (JIS_CHAR_TO_FREQ_ORDER, JIS_TABLE_SIZE, + JIS_TYPICAL_DISTRIBUTION_RATIO) + + +class CharDistributionAnalysis(object): + ENOUGH_DATA_THRESHOLD = 1024 + SURE_YES = 0.99 + SURE_NO = 0.01 + MINIMUM_DATA_THRESHOLD = 3 + + def __init__(self): + # Mapping table to get frequency order from char order (get from + # GetOrder()) + self._char_to_freq_order = None + self._table_size = None # Size of above table + # This is a constant value which varies from language to language, + # used in calculating confidence. See + # http://www.mozilla.org/projects/intl/UniversalCharsetDetection.html + # for further detail. + self.typical_distribution_ratio = None + self._done = None + self._total_chars = None + self._freq_chars = None + self.reset() + + def reset(self): + """reset analyser, clear any state""" + # If this flag is set to True, detection is done and conclusion has + # been made + self._done = False + self._total_chars = 0 # Total characters encountered + # The number of characters whose frequency order is less than 512 + self._freq_chars = 0 + + def feed(self, char, char_len): + """feed a character with known length""" + if char_len == 2: + # we only care about 2-bytes character in our distribution analysis + order = self.get_order(char) + else: + order = -1 + if order >= 0: + self._total_chars += 1 + # order is valid + if order < self._table_size: + if 512 > self._char_to_freq_order[order]: + self._freq_chars += 1 + + def get_confidence(self): + """return confidence based on existing data""" + # if we didn't receive any character in our consideration range, + # return negative answer + if self._total_chars <= 0 or self._freq_chars <= self.MINIMUM_DATA_THRESHOLD: + return self.SURE_NO + + if self._total_chars != self._freq_chars: + r = (self._freq_chars / ((self._total_chars - self._freq_chars) + * self.typical_distribution_ratio)) + if r < self.SURE_YES: + return r + + # normalize confidence (we don't want to be 100% sure) + return self.SURE_YES + + def got_enough_data(self): + # It is not necessary to receive all data to draw conclusion. + # For charset detection, certain amount of data is enough + return self._total_chars > self.ENOUGH_DATA_THRESHOLD + + def get_order(self, byte_str): + # We do not handle characters based on the original encoding string, + # but convert this encoding string to a number, here called order. + # This allows multiple encodings of a language to share one frequency + # table. + return -1 + + +class EUCTWDistributionAnalysis(CharDistributionAnalysis): + def __init__(self): + super(EUCTWDistributionAnalysis, self).__init__() + self._char_to_freq_order = EUCTW_CHAR_TO_FREQ_ORDER + self._table_size = EUCTW_TABLE_SIZE + self.typical_distribution_ratio = EUCTW_TYPICAL_DISTRIBUTION_RATIO + + def get_order(self, byte_str): + # for euc-TW encoding, we are interested + # first byte range: 0xc4 -- 0xfe + # second byte range: 0xa1 -- 0xfe + # no validation needed here. State machine has done that + first_char = byte_str[0] + if first_char >= 0xC4: + return 94 * (first_char - 0xC4) + byte_str[1] - 0xA1 + else: + return -1 + + +class EUCKRDistributionAnalysis(CharDistributionAnalysis): + def __init__(self): + super(EUCKRDistributionAnalysis, self).__init__() + self._char_to_freq_order = EUCKR_CHAR_TO_FREQ_ORDER + self._table_size = EUCKR_TABLE_SIZE + self.typical_distribution_ratio = EUCKR_TYPICAL_DISTRIBUTION_RATIO + + def get_order(self, byte_str): + # for euc-KR encoding, we are interested + # first byte range: 0xb0 -- 0xfe + # second byte range: 0xa1 -- 0xfe + # no validation needed here. State machine has done that + first_char = byte_str[0] + if first_char >= 0xB0: + return 94 * (first_char - 0xB0) + byte_str[1] - 0xA1 + else: + return -1 + + +class GB2312DistributionAnalysis(CharDistributionAnalysis): + def __init__(self): + super(GB2312DistributionAnalysis, self).__init__() + self._char_to_freq_order = GB2312_CHAR_TO_FREQ_ORDER + self._table_size = GB2312_TABLE_SIZE + self.typical_distribution_ratio = GB2312_TYPICAL_DISTRIBUTION_RATIO + + def get_order(self, byte_str): + # for GB2312 encoding, we are interested + # first byte range: 0xb0 -- 0xfe + # second byte range: 0xa1 -- 0xfe + # no validation needed here. State machine has done that + first_char, second_char = byte_str[0], byte_str[1] + if (first_char >= 0xB0) and (second_char >= 0xA1): + return 94 * (first_char - 0xB0) + second_char - 0xA1 + else: + return -1 + + +class Big5DistributionAnalysis(CharDistributionAnalysis): + def __init__(self): + super(Big5DistributionAnalysis, self).__init__() + self._char_to_freq_order = BIG5_CHAR_TO_FREQ_ORDER + self._table_size = BIG5_TABLE_SIZE + self.typical_distribution_ratio = BIG5_TYPICAL_DISTRIBUTION_RATIO + + def get_order(self, byte_str): + # for big5 encoding, we are interested + # first byte range: 0xa4 -- 0xfe + # second byte range: 0x40 -- 0x7e , 0xa1 -- 0xfe + # no validation needed here. State machine has done that + first_char, second_char = byte_str[0], byte_str[1] + if first_char >= 0xA4: + if second_char >= 0xA1: + return 157 * (first_char - 0xA4) + second_char - 0xA1 + 63 + else: + return 157 * (first_char - 0xA4) + second_char - 0x40 + else: + return -1 + + +class SJISDistributionAnalysis(CharDistributionAnalysis): + def __init__(self): + super(SJISDistributionAnalysis, self).__init__() + self._char_to_freq_order = JIS_CHAR_TO_FREQ_ORDER + self._table_size = JIS_TABLE_SIZE + self.typical_distribution_ratio = JIS_TYPICAL_DISTRIBUTION_RATIO + + def get_order(self, byte_str): + # for sjis encoding, we are interested + # first byte range: 0x81 -- 0x9f , 0xe0 -- 0xfe + # second byte range: 0x40 -- 0x7e, 0x81 -- oxfe + # no validation needed here. State machine has done that + first_char, second_char = byte_str[0], byte_str[1] + if (first_char >= 0x81) and (first_char <= 0x9F): + order = 188 * (first_char - 0x81) + elif (first_char >= 0xE0) and (first_char <= 0xEF): + order = 188 * (first_char - 0xE0 + 31) + else: + return -1 + order = order + second_char - 0x40 + if second_char > 0x7F: + order = -1 + return order + + +class EUCJPDistributionAnalysis(CharDistributionAnalysis): + def __init__(self): + super(EUCJPDistributionAnalysis, self).__init__() + self._char_to_freq_order = JIS_CHAR_TO_FREQ_ORDER + self._table_size = JIS_TABLE_SIZE + self.typical_distribution_ratio = JIS_TYPICAL_DISTRIBUTION_RATIO + + def get_order(self, byte_str): + # for euc-JP encoding, we are interested + # first byte range: 0xa0 -- 0xfe + # second byte range: 0xa1 -- 0xfe + # no validation needed here. State machine has done that + char = byte_str[0] + if char >= 0xA0: + return 94 * (char - 0xA1) + byte_str[1] - 0xa1 + else: + return -1 diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/charsetgroupprober.py b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/charsetgroupprober.py new file mode 100644 index 0000000..8b3738e --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/charsetgroupprober.py @@ -0,0 +1,106 @@ +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Communicator client code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +from .enums import ProbingState +from .charsetprober import CharSetProber + + +class CharSetGroupProber(CharSetProber): + def __init__(self, lang_filter=None): + super(CharSetGroupProber, self).__init__(lang_filter=lang_filter) + self._active_num = 0 + self.probers = [] + self._best_guess_prober = None + + def reset(self): + super(CharSetGroupProber, self).reset() + self._active_num = 0 + for prober in self.probers: + if prober: + prober.reset() + prober.active = True + self._active_num += 1 + self._best_guess_prober = None + + @property + def charset_name(self): + if not self._best_guess_prober: + self.get_confidence() + if not self._best_guess_prober: + return None + return self._best_guess_prober.charset_name + + @property + def language(self): + if not self._best_guess_prober: + self.get_confidence() + if not self._best_guess_prober: + return None + return self._best_guess_prober.language + + def feed(self, byte_str): + for prober in self.probers: + if not prober: + continue + if not prober.active: + continue + state = prober.feed(byte_str) + if not state: + continue + if state == ProbingState.FOUND_IT: + self._best_guess_prober = prober + return self.state + elif state == ProbingState.NOT_ME: + prober.active = False + self._active_num -= 1 + if self._active_num <= 0: + self._state = ProbingState.NOT_ME + return self.state + return self.state + + def get_confidence(self): + state = self.state + if state == ProbingState.FOUND_IT: + return 0.99 + elif state == ProbingState.NOT_ME: + return 0.01 + best_conf = 0.0 + self._best_guess_prober = None + for prober in self.probers: + if not prober: + continue + if not prober.active: + self.logger.debug('%s not active', prober.charset_name) + continue + conf = prober.get_confidence() + self.logger.debug('%s %s confidence = %s', prober.charset_name, prober.language, conf) + if best_conf < conf: + best_conf = conf + self._best_guess_prober = prober + if not self._best_guess_prober: + return 0.0 + return best_conf diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/charsetprober.py b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/charsetprober.py new file mode 100644 index 0000000..eac4e59 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/charsetprober.py @@ -0,0 +1,145 @@ +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Universal charset detector code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 2001 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# Shy Shalom - original C code +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +import logging +import re + +from .enums import ProbingState + + +class CharSetProber(object): + + SHORTCUT_THRESHOLD = 0.95 + + def __init__(self, lang_filter=None): + self._state = None + self.lang_filter = lang_filter + self.logger = logging.getLogger(__name__) + + def reset(self): + self._state = ProbingState.DETECTING + + @property + def charset_name(self): + return None + + def feed(self, buf): + pass + + @property + def state(self): + return self._state + + def get_confidence(self): + return 0.0 + + @staticmethod + def filter_high_byte_only(buf): + buf = re.sub(b'([\x00-\x7F])+', b' ', buf) + return buf + + @staticmethod + def filter_international_words(buf): + """ + We define three types of bytes: + alphabet: english alphabets [a-zA-Z] + international: international characters [\x80-\xFF] + marker: everything else [^a-zA-Z\x80-\xFF] + + The input buffer can be thought to contain a series of words delimited + by markers. This function works to filter all words that contain at + least one international character. All contiguous sequences of markers + are replaced by a single space ascii character. + + This filter applies to all scripts which do not use English characters. + """ + filtered = bytearray() + + # This regex expression filters out only words that have at-least one + # international character. The word may include one marker character at + # the end. + words = re.findall(b'[a-zA-Z]*[\x80-\xFF]+[a-zA-Z]*[^a-zA-Z\x80-\xFF]?', + buf) + + for word in words: + filtered.extend(word[:-1]) + + # If the last character in the word is a marker, replace it with a + # space as markers shouldn't affect our analysis (they are used + # similarly across all languages and may thus have similar + # frequencies). + last_char = word[-1:] + if not last_char.isalpha() and last_char < b'\x80': + last_char = b' ' + filtered.extend(last_char) + + return filtered + + @staticmethod + def filter_with_english_letters(buf): + """ + Returns a copy of ``buf`` that retains only the sequences of English + alphabet and high byte characters that are not between <> characters. + Also retains English alphabet and high byte characters immediately + before occurrences of >. + + This filter can be applied to all scripts which contain both English + characters and extended ASCII characters, but is currently only used by + ``Latin1Prober``. + """ + filtered = bytearray() + in_tag = False + prev = 0 + + for curr in range(len(buf)): + # Slice here to get bytes instead of an int with Python 3 + buf_char = buf[curr:curr + 1] + # Check if we're coming out of or entering an HTML tag + if buf_char == b'>': + in_tag = False + elif buf_char == b'<': + in_tag = True + + # If current character is not extended-ASCII and not alphabetic... + if buf_char < b'\x80' and not buf_char.isalpha(): + # ...and we're not in a tag + if curr > prev and not in_tag: + # Keep everything after last non-extended-ASCII, + # non-alphabetic character + filtered.extend(buf[prev:curr]) + # Output a space to delimit stretch we kept + filtered.extend(b' ') + prev = curr + 1 + + # If we're not in a tag... + if not in_tag: + # Keep everything after last non-extended-ASCII, non-alphabetic + # character + filtered.extend(buf[prev:]) + + return filtered diff --git a/venv/lib/python3.7/site-packages/python_dateutil-2.8.1.dist-info/zip-safe b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/cli/__init__.py similarity index 100% rename from venv/lib/python3.7/site-packages/python_dateutil-2.8.1.dist-info/zip-safe rename to venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/cli/__init__.py diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/cli/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/cli/__pycache__/__init__.cpython-37.pyc new file mode 100644 index 0000000..43bea3b Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/cli/__pycache__/__init__.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/cli/__pycache__/chardetect.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/cli/__pycache__/chardetect.cpython-37.pyc new file mode 100644 index 0000000..4143335 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/cli/__pycache__/chardetect.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/cli/chardetect.py b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/cli/chardetect.py new file mode 100644 index 0000000..c61136b --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/cli/chardetect.py @@ -0,0 +1,85 @@ +#!/usr/bin/env python +""" +Script which takes one or more file paths and reports on their detected +encodings + +Example:: + + % chardetect somefile someotherfile + somefile: windows-1252 with confidence 0.5 + someotherfile: ascii with confidence 1.0 + +If no paths are provided, it takes its input from stdin. + +""" + +from __future__ import absolute_import, print_function, unicode_literals + +import argparse +import sys + +from pip._vendor.chardet import __version__ +from pip._vendor.chardet.compat import PY2 +from pip._vendor.chardet.universaldetector import UniversalDetector + + +def description_of(lines, name='stdin'): + """ + Return a string describing the probable encoding of a file or + list of strings. + + :param lines: The lines to get the encoding of. + :type lines: Iterable of bytes + :param name: Name of file or collection of lines + :type name: str + """ + u = UniversalDetector() + for line in lines: + line = bytearray(line) + u.feed(line) + # shortcut out of the loop to save reading further - particularly useful if we read a BOM. + if u.done: + break + u.close() + result = u.result + if PY2: + name = name.decode(sys.getfilesystemencoding(), 'ignore') + if result['encoding']: + return '{0}: {1} with confidence {2}'.format(name, result['encoding'], + result['confidence']) + else: + return '{0}: no result'.format(name) + + +def main(argv=None): + """ + Handles command line arguments and gets things started. + + :param argv: List of arguments, as if specified on the command-line. + If None, ``sys.argv[1:]`` is used instead. + :type argv: list of str + """ + # Get command line arguments + parser = argparse.ArgumentParser( + description="Takes one or more file paths and reports their detected \ + encodings") + parser.add_argument('input', + help='File whose encoding we would like to determine. \ + (default: stdin)', + type=argparse.FileType('rb'), nargs='*', + default=[sys.stdin if PY2 else sys.stdin.buffer]) + parser.add_argument('--version', action='version', + version='%(prog)s {0}'.format(__version__)) + args = parser.parse_args(argv) + + for f in args.input: + if f.isatty(): + print("You are running chardetect interactively. Press " + + "CTRL-D twice at the start of a blank line to signal the " + + "end of your input. If you want help, run chardetect " + + "--help\n", file=sys.stderr) + print(description_of(f, f.name)) + + +if __name__ == '__main__': + main() diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/codingstatemachine.py b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/codingstatemachine.py new file mode 100644 index 0000000..68fba44 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/codingstatemachine.py @@ -0,0 +1,88 @@ +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is mozilla.org code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +import logging + +from .enums import MachineState + + +class CodingStateMachine(object): + """ + A state machine to verify a byte sequence for a particular encoding. For + each byte the detector receives, it will feed that byte to every active + state machine available, one byte at a time. The state machine changes its + state based on its previous state and the byte it receives. There are 3 + states in a state machine that are of interest to an auto-detector: + + START state: This is the state to start with, or a legal byte sequence + (i.e. a valid code point) for character has been identified. + + ME state: This indicates that the state machine identified a byte sequence + that is specific to the charset it is designed for and that + there is no other possible encoding which can contain this byte + sequence. This will to lead to an immediate positive answer for + the detector. + + ERROR state: This indicates the state machine identified an illegal byte + sequence for that encoding. This will lead to an immediate + negative answer for this encoding. Detector will exclude this + encoding from consideration from here on. + """ + def __init__(self, sm): + self._model = sm + self._curr_byte_pos = 0 + self._curr_char_len = 0 + self._curr_state = None + self.logger = logging.getLogger(__name__) + self.reset() + + def reset(self): + self._curr_state = MachineState.START + + def next_state(self, c): + # for each byte we get its class + # if it is first byte, we also get byte length + byte_class = self._model['class_table'][c] + if self._curr_state == MachineState.START: + self._curr_byte_pos = 0 + self._curr_char_len = self._model['char_len_table'][byte_class] + # from byte's class and state_table, we get its next state + curr_state = (self._curr_state * self._model['class_factor'] + + byte_class) + self._curr_state = self._model['state_table'][curr_state] + self._curr_byte_pos += 1 + return self._curr_state + + def get_current_charlen(self): + return self._curr_char_len + + def get_coding_state_machine(self): + return self._model['name'] + + @property + def language(self): + return self._model['language'] diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/compat.py b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/compat.py new file mode 100644 index 0000000..ddd7468 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/compat.py @@ -0,0 +1,34 @@ +######################## BEGIN LICENSE BLOCK ######################## +# Contributor(s): +# Dan Blanchard +# Ian Cordasco +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +import sys + + +if sys.version_info < (3, 0): + PY2 = True + PY3 = False + base_str = (str, unicode) + text_type = unicode +else: + PY2 = False + PY3 = True + base_str = (bytes, str) + text_type = str diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/cp949prober.py b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/cp949prober.py new file mode 100644 index 0000000..efd793a --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/cp949prober.py @@ -0,0 +1,49 @@ +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is mozilla.org code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +from .chardistribution import EUCKRDistributionAnalysis +from .codingstatemachine import CodingStateMachine +from .mbcharsetprober import MultiByteCharSetProber +from .mbcssm import CP949_SM_MODEL + + +class CP949Prober(MultiByteCharSetProber): + def __init__(self): + super(CP949Prober, self).__init__() + self.coding_sm = CodingStateMachine(CP949_SM_MODEL) + # NOTE: CP949 is a superset of EUC-KR, so the distribution should be + # not different. + self.distribution_analyzer = EUCKRDistributionAnalysis() + self.reset() + + @property + def charset_name(self): + return "CP949" + + @property + def language(self): + return "Korean" diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/enums.py b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/enums.py new file mode 100644 index 0000000..0451207 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/enums.py @@ -0,0 +1,76 @@ +""" +All of the Enums that are used throughout the chardet package. + +:author: Dan Blanchard (dan.blanchard@gmail.com) +""" + + +class InputState(object): + """ + This enum represents the different states a universal detector can be in. + """ + PURE_ASCII = 0 + ESC_ASCII = 1 + HIGH_BYTE = 2 + + +class LanguageFilter(object): + """ + This enum represents the different language filters we can apply to a + ``UniversalDetector``. + """ + CHINESE_SIMPLIFIED = 0x01 + CHINESE_TRADITIONAL = 0x02 + JAPANESE = 0x04 + KOREAN = 0x08 + NON_CJK = 0x10 + ALL = 0x1F + CHINESE = CHINESE_SIMPLIFIED | CHINESE_TRADITIONAL + CJK = CHINESE | JAPANESE | KOREAN + + +class ProbingState(object): + """ + This enum represents the different states a prober can be in. + """ + DETECTING = 0 + FOUND_IT = 1 + NOT_ME = 2 + + +class MachineState(object): + """ + This enum represents the different states a state machine can be in. + """ + START = 0 + ERROR = 1 + ITS_ME = 2 + + +class SequenceLikelihood(object): + """ + This enum represents the likelihood of a character following the previous one. + """ + NEGATIVE = 0 + UNLIKELY = 1 + LIKELY = 2 + POSITIVE = 3 + + @classmethod + def get_num_categories(cls): + """:returns: The number of likelihood categories in the enum.""" + return 4 + + +class CharacterCategory(object): + """ + This enum represents the different categories language models for + ``SingleByteCharsetProber`` put characters into. + + Anything less than CONTROL is considered a letter. + """ + UNDEFINED = 255 + LINE_BREAK = 254 + SYMBOL = 253 + DIGIT = 252 + CONTROL = 251 diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/escprober.py b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/escprober.py new file mode 100644 index 0000000..c70493f --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/escprober.py @@ -0,0 +1,101 @@ +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is mozilla.org code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +from .charsetprober import CharSetProber +from .codingstatemachine import CodingStateMachine +from .enums import LanguageFilter, ProbingState, MachineState +from .escsm import (HZ_SM_MODEL, ISO2022CN_SM_MODEL, ISO2022JP_SM_MODEL, + ISO2022KR_SM_MODEL) + + +class EscCharSetProber(CharSetProber): + """ + This CharSetProber uses a "code scheme" approach for detecting encodings, + whereby easily recognizable escape or shift sequences are relied on to + identify these encodings. + """ + + def __init__(self, lang_filter=None): + super(EscCharSetProber, self).__init__(lang_filter=lang_filter) + self.coding_sm = [] + if self.lang_filter & LanguageFilter.CHINESE_SIMPLIFIED: + self.coding_sm.append(CodingStateMachine(HZ_SM_MODEL)) + self.coding_sm.append(CodingStateMachine(ISO2022CN_SM_MODEL)) + if self.lang_filter & LanguageFilter.JAPANESE: + self.coding_sm.append(CodingStateMachine(ISO2022JP_SM_MODEL)) + if self.lang_filter & LanguageFilter.KOREAN: + self.coding_sm.append(CodingStateMachine(ISO2022KR_SM_MODEL)) + self.active_sm_count = None + self._detected_charset = None + self._detected_language = None + self._state = None + self.reset() + + def reset(self): + super(EscCharSetProber, self).reset() + for coding_sm in self.coding_sm: + if not coding_sm: + continue + coding_sm.active = True + coding_sm.reset() + self.active_sm_count = len(self.coding_sm) + self._detected_charset = None + self._detected_language = None + + @property + def charset_name(self): + return self._detected_charset + + @property + def language(self): + return self._detected_language + + def get_confidence(self): + if self._detected_charset: + return 0.99 + else: + return 0.00 + + def feed(self, byte_str): + for c in byte_str: + for coding_sm in self.coding_sm: + if not coding_sm or not coding_sm.active: + continue + coding_state = coding_sm.next_state(c) + if coding_state == MachineState.ERROR: + coding_sm.active = False + self.active_sm_count -= 1 + if self.active_sm_count <= 0: + self._state = ProbingState.NOT_ME + return self.state + elif coding_state == MachineState.ITS_ME: + self._state = ProbingState.FOUND_IT + self._detected_charset = coding_sm.get_coding_state_machine() + self._detected_language = coding_sm.language + return self.state + + return self.state diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/escsm.py b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/escsm.py new file mode 100644 index 0000000..0069523 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/escsm.py @@ -0,0 +1,246 @@ +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is mozilla.org code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +from .enums import MachineState + +HZ_CLS = ( +1,0,0,0,0,0,0,0, # 00 - 07 +0,0,0,0,0,0,0,0, # 08 - 0f +0,0,0,0,0,0,0,0, # 10 - 17 +0,0,0,1,0,0,0,0, # 18 - 1f +0,0,0,0,0,0,0,0, # 20 - 27 +0,0,0,0,0,0,0,0, # 28 - 2f +0,0,0,0,0,0,0,0, # 30 - 37 +0,0,0,0,0,0,0,0, # 38 - 3f +0,0,0,0,0,0,0,0, # 40 - 47 +0,0,0,0,0,0,0,0, # 48 - 4f +0,0,0,0,0,0,0,0, # 50 - 57 +0,0,0,0,0,0,0,0, # 58 - 5f +0,0,0,0,0,0,0,0, # 60 - 67 +0,0,0,0,0,0,0,0, # 68 - 6f +0,0,0,0,0,0,0,0, # 70 - 77 +0,0,0,4,0,5,2,0, # 78 - 7f +1,1,1,1,1,1,1,1, # 80 - 87 +1,1,1,1,1,1,1,1, # 88 - 8f +1,1,1,1,1,1,1,1, # 90 - 97 +1,1,1,1,1,1,1,1, # 98 - 9f +1,1,1,1,1,1,1,1, # a0 - a7 +1,1,1,1,1,1,1,1, # a8 - af +1,1,1,1,1,1,1,1, # b0 - b7 +1,1,1,1,1,1,1,1, # b8 - bf +1,1,1,1,1,1,1,1, # c0 - c7 +1,1,1,1,1,1,1,1, # c8 - cf +1,1,1,1,1,1,1,1, # d0 - d7 +1,1,1,1,1,1,1,1, # d8 - df +1,1,1,1,1,1,1,1, # e0 - e7 +1,1,1,1,1,1,1,1, # e8 - ef +1,1,1,1,1,1,1,1, # f0 - f7 +1,1,1,1,1,1,1,1, # f8 - ff +) + +HZ_ST = ( +MachineState.START,MachineState.ERROR, 3,MachineState.START,MachineState.START,MachineState.START,MachineState.ERROR,MachineState.ERROR,# 00-07 +MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,# 08-0f +MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START, 4,MachineState.ERROR,# 10-17 + 5,MachineState.ERROR, 6,MachineState.ERROR, 5, 5, 4,MachineState.ERROR,# 18-1f + 4,MachineState.ERROR, 4, 4, 4,MachineState.ERROR, 4,MachineState.ERROR,# 20-27 + 4,MachineState.ITS_ME,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,# 28-2f +) + +HZ_CHAR_LEN_TABLE = (0, 0, 0, 0, 0, 0) + +HZ_SM_MODEL = {'class_table': HZ_CLS, + 'class_factor': 6, + 'state_table': HZ_ST, + 'char_len_table': HZ_CHAR_LEN_TABLE, + 'name': "HZ-GB-2312", + 'language': 'Chinese'} + +ISO2022CN_CLS = ( +2,0,0,0,0,0,0,0, # 00 - 07 +0,0,0,0,0,0,0,0, # 08 - 0f +0,0,0,0,0,0,0,0, # 10 - 17 +0,0,0,1,0,0,0,0, # 18 - 1f +0,0,0,0,0,0,0,0, # 20 - 27 +0,3,0,0,0,0,0,0, # 28 - 2f +0,0,0,0,0,0,0,0, # 30 - 37 +0,0,0,0,0,0,0,0, # 38 - 3f +0,0,0,4,0,0,0,0, # 40 - 47 +0,0,0,0,0,0,0,0, # 48 - 4f +0,0,0,0,0,0,0,0, # 50 - 57 +0,0,0,0,0,0,0,0, # 58 - 5f +0,0,0,0,0,0,0,0, # 60 - 67 +0,0,0,0,0,0,0,0, # 68 - 6f +0,0,0,0,0,0,0,0, # 70 - 77 +0,0,0,0,0,0,0,0, # 78 - 7f +2,2,2,2,2,2,2,2, # 80 - 87 +2,2,2,2,2,2,2,2, # 88 - 8f +2,2,2,2,2,2,2,2, # 90 - 97 +2,2,2,2,2,2,2,2, # 98 - 9f +2,2,2,2,2,2,2,2, # a0 - a7 +2,2,2,2,2,2,2,2, # a8 - af +2,2,2,2,2,2,2,2, # b0 - b7 +2,2,2,2,2,2,2,2, # b8 - bf +2,2,2,2,2,2,2,2, # c0 - c7 +2,2,2,2,2,2,2,2, # c8 - cf +2,2,2,2,2,2,2,2, # d0 - d7 +2,2,2,2,2,2,2,2, # d8 - df +2,2,2,2,2,2,2,2, # e0 - e7 +2,2,2,2,2,2,2,2, # e8 - ef +2,2,2,2,2,2,2,2, # f0 - f7 +2,2,2,2,2,2,2,2, # f8 - ff +) + +ISO2022CN_ST = ( +MachineState.START, 3,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,# 00-07 +MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,# 08-0f +MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,# 10-17 +MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 4,MachineState.ERROR,# 18-1f +MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,# 20-27 + 5, 6,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,# 28-2f +MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,# 30-37 +MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ERROR,MachineState.START,# 38-3f +) + +ISO2022CN_CHAR_LEN_TABLE = (0, 0, 0, 0, 0, 0, 0, 0, 0) + +ISO2022CN_SM_MODEL = {'class_table': ISO2022CN_CLS, + 'class_factor': 9, + 'state_table': ISO2022CN_ST, + 'char_len_table': ISO2022CN_CHAR_LEN_TABLE, + 'name': "ISO-2022-CN", + 'language': 'Chinese'} + +ISO2022JP_CLS = ( +2,0,0,0,0,0,0,0, # 00 - 07 +0,0,0,0,0,0,2,2, # 08 - 0f +0,0,0,0,0,0,0,0, # 10 - 17 +0,0,0,1,0,0,0,0, # 18 - 1f +0,0,0,0,7,0,0,0, # 20 - 27 +3,0,0,0,0,0,0,0, # 28 - 2f +0,0,0,0,0,0,0,0, # 30 - 37 +0,0,0,0,0,0,0,0, # 38 - 3f +6,0,4,0,8,0,0,0, # 40 - 47 +0,9,5,0,0,0,0,0, # 48 - 4f +0,0,0,0,0,0,0,0, # 50 - 57 +0,0,0,0,0,0,0,0, # 58 - 5f +0,0,0,0,0,0,0,0, # 60 - 67 +0,0,0,0,0,0,0,0, # 68 - 6f +0,0,0,0,0,0,0,0, # 70 - 77 +0,0,0,0,0,0,0,0, # 78 - 7f +2,2,2,2,2,2,2,2, # 80 - 87 +2,2,2,2,2,2,2,2, # 88 - 8f +2,2,2,2,2,2,2,2, # 90 - 97 +2,2,2,2,2,2,2,2, # 98 - 9f +2,2,2,2,2,2,2,2, # a0 - a7 +2,2,2,2,2,2,2,2, # a8 - af +2,2,2,2,2,2,2,2, # b0 - b7 +2,2,2,2,2,2,2,2, # b8 - bf +2,2,2,2,2,2,2,2, # c0 - c7 +2,2,2,2,2,2,2,2, # c8 - cf +2,2,2,2,2,2,2,2, # d0 - d7 +2,2,2,2,2,2,2,2, # d8 - df +2,2,2,2,2,2,2,2, # e0 - e7 +2,2,2,2,2,2,2,2, # e8 - ef +2,2,2,2,2,2,2,2, # f0 - f7 +2,2,2,2,2,2,2,2, # f8 - ff +) + +ISO2022JP_ST = ( +MachineState.START, 3,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,# 00-07 +MachineState.START,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,# 08-0f +MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,# 10-17 +MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,# 18-1f +MachineState.ERROR, 5,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 4,MachineState.ERROR,MachineState.ERROR,# 20-27 +MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 6,MachineState.ITS_ME,MachineState.ERROR,MachineState.ITS_ME,MachineState.ERROR,# 28-2f +MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,# 30-37 +MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,# 38-3f +MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ERROR,MachineState.START,MachineState.START,# 40-47 +) + +ISO2022JP_CHAR_LEN_TABLE = (0, 0, 0, 0, 0, 0, 0, 0, 0, 0) + +ISO2022JP_SM_MODEL = {'class_table': ISO2022JP_CLS, + 'class_factor': 10, + 'state_table': ISO2022JP_ST, + 'char_len_table': ISO2022JP_CHAR_LEN_TABLE, + 'name': "ISO-2022-JP", + 'language': 'Japanese'} + +ISO2022KR_CLS = ( +2,0,0,0,0,0,0,0, # 00 - 07 +0,0,0,0,0,0,0,0, # 08 - 0f +0,0,0,0,0,0,0,0, # 10 - 17 +0,0,0,1,0,0,0,0, # 18 - 1f +0,0,0,0,3,0,0,0, # 20 - 27 +0,4,0,0,0,0,0,0, # 28 - 2f +0,0,0,0,0,0,0,0, # 30 - 37 +0,0,0,0,0,0,0,0, # 38 - 3f +0,0,0,5,0,0,0,0, # 40 - 47 +0,0,0,0,0,0,0,0, # 48 - 4f +0,0,0,0,0,0,0,0, # 50 - 57 +0,0,0,0,0,0,0,0, # 58 - 5f +0,0,0,0,0,0,0,0, # 60 - 67 +0,0,0,0,0,0,0,0, # 68 - 6f +0,0,0,0,0,0,0,0, # 70 - 77 +0,0,0,0,0,0,0,0, # 78 - 7f +2,2,2,2,2,2,2,2, # 80 - 87 +2,2,2,2,2,2,2,2, # 88 - 8f +2,2,2,2,2,2,2,2, # 90 - 97 +2,2,2,2,2,2,2,2, # 98 - 9f +2,2,2,2,2,2,2,2, # a0 - a7 +2,2,2,2,2,2,2,2, # a8 - af +2,2,2,2,2,2,2,2, # b0 - b7 +2,2,2,2,2,2,2,2, # b8 - bf +2,2,2,2,2,2,2,2, # c0 - c7 +2,2,2,2,2,2,2,2, # c8 - cf +2,2,2,2,2,2,2,2, # d0 - d7 +2,2,2,2,2,2,2,2, # d8 - df +2,2,2,2,2,2,2,2, # e0 - e7 +2,2,2,2,2,2,2,2, # e8 - ef +2,2,2,2,2,2,2,2, # f0 - f7 +2,2,2,2,2,2,2,2, # f8 - ff +) + +ISO2022KR_ST = ( +MachineState.START, 3,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.ERROR,MachineState.ERROR,# 00-07 +MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,# 08-0f +MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 4,MachineState.ERROR,MachineState.ERROR,# 10-17 +MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 5,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,# 18-1f +MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.START,MachineState.START,MachineState.START,MachineState.START,# 20-27 +) + +ISO2022KR_CHAR_LEN_TABLE = (0, 0, 0, 0, 0, 0) + +ISO2022KR_SM_MODEL = {'class_table': ISO2022KR_CLS, + 'class_factor': 6, + 'state_table': ISO2022KR_ST, + 'char_len_table': ISO2022KR_CHAR_LEN_TABLE, + 'name': "ISO-2022-KR", + 'language': 'Korean'} + + diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/eucjpprober.py b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/eucjpprober.py new file mode 100644 index 0000000..20ce8f7 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/eucjpprober.py @@ -0,0 +1,92 @@ +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is mozilla.org code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +from .enums import ProbingState, MachineState +from .mbcharsetprober import MultiByteCharSetProber +from .codingstatemachine import CodingStateMachine +from .chardistribution import EUCJPDistributionAnalysis +from .jpcntx import EUCJPContextAnalysis +from .mbcssm import EUCJP_SM_MODEL + + +class EUCJPProber(MultiByteCharSetProber): + def __init__(self): + super(EUCJPProber, self).__init__() + self.coding_sm = CodingStateMachine(EUCJP_SM_MODEL) + self.distribution_analyzer = EUCJPDistributionAnalysis() + self.context_analyzer = EUCJPContextAnalysis() + self.reset() + + def reset(self): + super(EUCJPProber, self).reset() + self.context_analyzer.reset() + + @property + def charset_name(self): + return "EUC-JP" + + @property + def language(self): + return "Japanese" + + def feed(self, byte_str): + for i in range(len(byte_str)): + # PY3K: byte_str is a byte array, so byte_str[i] is an int, not a byte + coding_state = self.coding_sm.next_state(byte_str[i]) + if coding_state == MachineState.ERROR: + self.logger.debug('%s %s prober hit error at byte %s', + self.charset_name, self.language, i) + self._state = ProbingState.NOT_ME + break + elif coding_state == MachineState.ITS_ME: + self._state = ProbingState.FOUND_IT + break + elif coding_state == MachineState.START: + char_len = self.coding_sm.get_current_charlen() + if i == 0: + self._last_char[1] = byte_str[0] + self.context_analyzer.feed(self._last_char, char_len) + self.distribution_analyzer.feed(self._last_char, char_len) + else: + self.context_analyzer.feed(byte_str[i - 1:i + 1], + char_len) + self.distribution_analyzer.feed(byte_str[i - 1:i + 1], + char_len) + + self._last_char[0] = byte_str[-1] + + if self.state == ProbingState.DETECTING: + if (self.context_analyzer.got_enough_data() and + (self.get_confidence() > self.SHORTCUT_THRESHOLD)): + self._state = ProbingState.FOUND_IT + + return self.state + + def get_confidence(self): + context_conf = self.context_analyzer.get_confidence() + distrib_conf = self.distribution_analyzer.get_confidence() + return max(context_conf, distrib_conf) diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/euckrfreq.py b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/euckrfreq.py new file mode 100644 index 0000000..b68078c --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/euckrfreq.py @@ -0,0 +1,195 @@ +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Communicator client code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +# Sampling from about 20M text materials include literature and computer technology + +# 128 --> 0.79 +# 256 --> 0.92 +# 512 --> 0.986 +# 1024 --> 0.99944 +# 2048 --> 0.99999 +# +# Idea Distribution Ratio = 0.98653 / (1-0.98653) = 73.24 +# Random Distribution Ration = 512 / (2350-512) = 0.279. +# +# Typical Distribution Ratio + +EUCKR_TYPICAL_DISTRIBUTION_RATIO = 6.0 + +EUCKR_TABLE_SIZE = 2352 + +# Char to FreqOrder table , +EUCKR_CHAR_TO_FREQ_ORDER = ( + 13, 130, 120,1396, 481,1719,1720, 328, 609, 212,1721, 707, 400, 299,1722, 87, +1397,1723, 104, 536,1117,1203,1724,1267, 685,1268, 508,1725,1726,1727,1728,1398, +1399,1729,1730,1731, 141, 621, 326,1057, 368,1732, 267, 488, 20,1733,1269,1734, + 945,1400,1735, 47, 904,1270,1736,1737, 773, 248,1738, 409, 313, 786, 429,1739, + 116, 987, 813,1401, 683, 75,1204, 145,1740,1741,1742,1743, 16, 847, 667, 622, + 708,1744,1745,1746, 966, 787, 304, 129,1747, 60, 820, 123, 676,1748,1749,1750, +1751, 617,1752, 626,1753,1754,1755,1756, 653,1757,1758,1759,1760,1761,1762, 856, + 344,1763,1764,1765,1766, 89, 401, 418, 806, 905, 848,1767,1768,1769, 946,1205, + 709,1770,1118,1771, 241,1772,1773,1774,1271,1775, 569,1776, 999,1777,1778,1779, +1780, 337, 751,1058, 28, 628, 254,1781, 177, 906, 270, 349, 891,1079,1782, 19, +1783, 379,1784, 315,1785, 629, 754,1402, 559,1786, 636, 203,1206,1787, 710, 567, +1788, 935, 814,1789,1790,1207, 766, 528,1791,1792,1208,1793,1794,1795,1796,1797, +1403,1798,1799, 533,1059,1404,1405,1156,1406, 936, 884,1080,1800, 351,1801,1802, +1803,1804,1805, 801,1806,1807,1808,1119,1809,1157, 714, 474,1407,1810, 298, 899, + 885,1811,1120, 802,1158,1812, 892,1813,1814,1408, 659,1815,1816,1121,1817,1818, +1819,1820,1821,1822, 319,1823, 594, 545,1824, 815, 937,1209,1825,1826, 573,1409, +1022,1827,1210,1828,1829,1830,1831,1832,1833, 556, 722, 807,1122,1060,1834, 697, +1835, 900, 557, 715,1836,1410, 540,1411, 752,1159, 294, 597,1211, 976, 803, 770, +1412,1837,1838, 39, 794,1413, 358,1839, 371, 925,1840, 453, 661, 788, 531, 723, + 544,1023,1081, 869, 91,1841, 392, 430, 790, 602,1414, 677,1082, 457,1415,1416, +1842,1843, 475, 327,1024,1417, 795, 121,1844, 733, 403,1418,1845,1846,1847, 300, + 119, 711,1212, 627,1848,1272, 207,1849,1850, 796,1213, 382,1851, 519,1852,1083, + 893,1853,1854,1855, 367, 809, 487, 671,1856, 663,1857,1858, 956, 471, 306, 857, +1859,1860,1160,1084,1861,1862,1863,1864,1865,1061,1866,1867,1868,1869,1870,1871, + 282, 96, 574,1872, 502,1085,1873,1214,1874, 907,1875,1876, 827, 977,1419,1420, +1421, 268,1877,1422,1878,1879,1880, 308,1881, 2, 537,1882,1883,1215,1884,1885, + 127, 791,1886,1273,1423,1887, 34, 336, 404, 643,1888, 571, 654, 894, 840,1889, + 0, 886,1274, 122, 575, 260, 908, 938,1890,1275, 410, 316,1891,1892, 100,1893, +1894,1123, 48,1161,1124,1025,1895, 633, 901,1276,1896,1897, 115, 816,1898, 317, +1899, 694,1900, 909, 734,1424, 572, 866,1425, 691, 85, 524,1010, 543, 394, 841, +1901,1902,1903,1026,1904,1905,1906,1907,1908,1909, 30, 451, 651, 988, 310,1910, +1911,1426, 810,1216, 93,1912,1913,1277,1217,1914, 858, 759, 45, 58, 181, 610, + 269,1915,1916, 131,1062, 551, 443,1000, 821,1427, 957, 895,1086,1917,1918, 375, +1919, 359,1920, 687,1921, 822,1922, 293,1923,1924, 40, 662, 118, 692, 29, 939, + 887, 640, 482, 174,1925, 69,1162, 728,1428, 910,1926,1278,1218,1279, 386, 870, + 217, 854,1163, 823,1927,1928,1929,1930, 834,1931, 78,1932, 859,1933,1063,1934, +1935,1936,1937, 438,1164, 208, 595,1938,1939,1940,1941,1219,1125,1942, 280, 888, +1429,1430,1220,1431,1943,1944,1945,1946,1947,1280, 150, 510,1432,1948,1949,1950, +1951,1952,1953,1954,1011,1087,1955,1433,1043,1956, 881,1957, 614, 958,1064,1065, +1221,1958, 638,1001, 860, 967, 896,1434, 989, 492, 553,1281,1165,1959,1282,1002, +1283,1222,1960,1961,1962,1963, 36, 383, 228, 753, 247, 454,1964, 876, 678,1965, +1966,1284, 126, 464, 490, 835, 136, 672, 529, 940,1088,1435, 473,1967,1968, 467, + 50, 390, 227, 587, 279, 378, 598, 792, 968, 240, 151, 160, 849, 882,1126,1285, + 639,1044, 133, 140, 288, 360, 811, 563,1027, 561, 142, 523,1969,1970,1971, 7, + 103, 296, 439, 407, 506, 634, 990,1972,1973,1974,1975, 645,1976,1977,1978,1979, +1980,1981, 236,1982,1436,1983,1984,1089, 192, 828, 618, 518,1166, 333,1127,1985, + 818,1223,1986,1987,1988,1989,1990,1991,1992,1993, 342,1128,1286, 746, 842,1994, +1995, 560, 223,1287, 98, 8, 189, 650, 978,1288,1996,1437,1997, 17, 345, 250, + 423, 277, 234, 512, 226, 97, 289, 42, 167,1998, 201,1999,2000, 843, 836, 824, + 532, 338, 783,1090, 182, 576, 436,1438,1439, 527, 500,2001, 947, 889,2002,2003, +2004,2005, 262, 600, 314, 447,2006, 547,2007, 693, 738,1129,2008, 71,1440, 745, + 619, 688,2009, 829,2010,2011, 147,2012, 33, 948,2013,2014, 74, 224,2015, 61, + 191, 918, 399, 637,2016,1028,1130, 257, 902,2017,2018,2019,2020,2021,2022,2023, +2024,2025,2026, 837,2027,2028,2029,2030, 179, 874, 591, 52, 724, 246,2031,2032, +2033,2034,1167, 969,2035,1289, 630, 605, 911,1091,1168,2036,2037,2038,1441, 912, +2039, 623,2040,2041, 253,1169,1290,2042,1442, 146, 620, 611, 577, 433,2043,1224, + 719,1170, 959, 440, 437, 534, 84, 388, 480,1131, 159, 220, 198, 679,2044,1012, + 819,1066,1443, 113,1225, 194, 318,1003,1029,2045,2046,2047,2048,1067,2049,2050, +2051,2052,2053, 59, 913, 112,2054, 632,2055, 455, 144, 739,1291,2056, 273, 681, + 499,2057, 448,2058,2059, 760,2060,2061, 970, 384, 169, 245,1132,2062,2063, 414, +1444,2064,2065, 41, 235,2066, 157, 252, 877, 568, 919, 789, 580,2067, 725,2068, +2069,1292,2070,2071,1445,2072,1446,2073,2074, 55, 588, 66,1447, 271,1092,2075, +1226,2076, 960,1013, 372,2077,2078,2079,2080,2081,1293,2082,2083,2084,2085, 850, +2086,2087,2088,2089,2090, 186,2091,1068, 180,2092,2093,2094, 109,1227, 522, 606, +2095, 867,1448,1093, 991,1171, 926, 353,1133,2096, 581,2097,2098,2099,1294,1449, +1450,2100, 596,1172,1014,1228,2101,1451,1295,1173,1229,2102,2103,1296,1134,1452, + 949,1135,2104,2105,1094,1453,1454,1455,2106,1095,2107,2108,2109,2110,2111,2112, +2113,2114,2115,2116,2117, 804,2118,2119,1230,1231, 805,1456, 405,1136,2120,2121, +2122,2123,2124, 720, 701,1297, 992,1457, 927,1004,2125,2126,2127,2128,2129,2130, + 22, 417,2131, 303,2132, 385,2133, 971, 520, 513,2134,1174, 73,1096, 231, 274, + 962,1458, 673,2135,1459,2136, 152,1137,2137,2138,2139,2140,1005,1138,1460,1139, +2141,2142,2143,2144, 11, 374, 844,2145, 154,1232, 46,1461,2146, 838, 830, 721, +1233, 106,2147, 90, 428, 462, 578, 566,1175, 352,2148,2149, 538,1234, 124,1298, +2150,1462, 761, 565,2151, 686,2152, 649,2153, 72, 173,2154, 460, 415,2155,1463, +2156,1235, 305,2157,2158,2159,2160,2161,2162, 579,2163,2164,2165,2166,2167, 747, +2168,2169,2170,2171,1464, 669,2172,2173,2174,2175,2176,1465,2177, 23, 530, 285, +2178, 335, 729,2179, 397,2180,2181,2182,1030,2183,2184, 698,2185,2186, 325,2187, +2188, 369,2189, 799,1097,1015, 348,2190,1069, 680,2191, 851,1466,2192,2193, 10, +2194, 613, 424,2195, 979, 108, 449, 589, 27, 172, 81,1031, 80, 774, 281, 350, +1032, 525, 301, 582,1176,2196, 674,1045,2197,2198,1467, 730, 762,2199,2200,2201, +2202,1468,2203, 993,2204,2205, 266,1070, 963,1140,2206,2207,2208, 664,1098, 972, +2209,2210,2211,1177,1469,1470, 871,2212,2213,2214,2215,2216,1471,2217,2218,2219, +2220,2221,2222,2223,2224,2225,2226,2227,1472,1236,2228,2229,2230,2231,2232,2233, +2234,2235,1299,2236,2237, 200,2238, 477, 373,2239,2240, 731, 825, 777,2241,2242, +2243, 521, 486, 548,2244,2245,2246,1473,1300, 53, 549, 137, 875, 76, 158,2247, +1301,1474, 469, 396,1016, 278, 712,2248, 321, 442, 503, 767, 744, 941,1237,1178, +1475,2249, 82, 178,1141,1179, 973,2250,1302,2251, 297,2252,2253, 570,2254,2255, +2256, 18, 450, 206,2257, 290, 292,1142,2258, 511, 162, 99, 346, 164, 735,2259, +1476,1477, 4, 554, 343, 798,1099,2260,1100,2261, 43, 171,1303, 139, 215,2262, +2263, 717, 775,2264,1033, 322, 216,2265, 831,2266, 149,2267,1304,2268,2269, 702, +1238, 135, 845, 347, 309,2270, 484,2271, 878, 655, 238,1006,1478,2272, 67,2273, + 295,2274,2275, 461,2276, 478, 942, 412,2277,1034,2278,2279,2280, 265,2281, 541, +2282,2283,2284,2285,2286, 70, 852,1071,2287,2288,2289,2290, 21, 56, 509, 117, + 432,2291,2292, 331, 980, 552,1101, 148, 284, 105, 393,1180,1239, 755,2293, 187, +2294,1046,1479,2295, 340,2296, 63,1047, 230,2297,2298,1305, 763,1306, 101, 800, + 808, 494,2299,2300,2301, 903,2302, 37,1072, 14, 5,2303, 79, 675,2304, 312, +2305,2306,2307,2308,2309,1480, 6,1307,2310,2311,2312, 1, 470, 35, 24, 229, +2313, 695, 210, 86, 778, 15, 784, 592, 779, 32, 77, 855, 964,2314, 259,2315, + 501, 380,2316,2317, 83, 981, 153, 689,1308,1481,1482,1483,2318,2319, 716,1484, +2320,2321,2322,2323,2324,2325,1485,2326,2327, 128, 57, 68, 261,1048, 211, 170, +1240, 31,2328, 51, 435, 742,2329,2330,2331, 635,2332, 264, 456,2333,2334,2335, + 425,2336,1486, 143, 507, 263, 943,2337, 363, 920,1487, 256,1488,1102, 243, 601, +1489,2338,2339,2340,2341,2342,2343,2344, 861,2345,2346,2347,2348,2349,2350, 395, +2351,1490,1491, 62, 535, 166, 225,2352,2353, 668, 419,1241, 138, 604, 928,2354, +1181,2355,1492,1493,2356,2357,2358,1143,2359, 696,2360, 387, 307,1309, 682, 476, +2361,2362, 332, 12, 222, 156,2363, 232,2364, 641, 276, 656, 517,1494,1495,1035, + 416, 736,1496,2365,1017, 586,2366,2367,2368,1497,2369, 242,2370,2371,2372,1498, +2373, 965, 713,2374,2375,2376,2377, 740, 982,1499, 944,1500,1007,2378,2379,1310, +1501,2380,2381,2382, 785, 329,2383,2384,1502,2385,2386,2387, 932,2388,1503,2389, +2390,2391,2392,1242,2393,2394,2395,2396,2397, 994, 950,2398,2399,2400,2401,1504, +1311,2402,2403,2404,2405,1049, 749,2406,2407, 853, 718,1144,1312,2408,1182,1505, +2409,2410, 255, 516, 479, 564, 550, 214,1506,1507,1313, 413, 239, 444, 339,1145, +1036,1508,1509,1314,1037,1510,1315,2411,1511,2412,2413,2414, 176, 703, 497, 624, + 593, 921, 302,2415, 341, 165,1103,1512,2416,1513,2417,2418,2419, 376,2420, 700, +2421,2422,2423, 258, 768,1316,2424,1183,2425, 995, 608,2426,2427,2428,2429, 221, +2430,2431,2432,2433,2434,2435,2436,2437, 195, 323, 726, 188, 897, 983,1317, 377, + 644,1050, 879,2438, 452,2439,2440,2441,2442,2443,2444, 914,2445,2446,2447,2448, + 915, 489,2449,1514,1184,2450,2451, 515, 64, 427, 495,2452, 583,2453, 483, 485, +1038, 562, 213,1515, 748, 666,2454,2455,2456,2457, 334,2458, 780, 996,1008, 705, +1243,2459,2460,2461,2462,2463, 114,2464, 493,1146, 366, 163,1516, 961,1104,2465, + 291,2466,1318,1105,2467,1517, 365,2468, 355, 951,1244,2469,1319,2470, 631,2471, +2472, 218,1320, 364, 320, 756,1518,1519,1321,1520,1322,2473,2474,2475,2476, 997, +2477,2478,2479,2480, 665,1185,2481, 916,1521,2482,2483,2484, 584, 684,2485,2486, + 797,2487,1051,1186,2488,2489,2490,1522,2491,2492, 370,2493,1039,1187, 65,2494, + 434, 205, 463,1188,2495, 125, 812, 391, 402, 826, 699, 286, 398, 155, 781, 771, + 585,2496, 590, 505,1073,2497, 599, 244, 219, 917,1018, 952, 646,1523,2498,1323, +2499,2500, 49, 984, 354, 741,2501, 625,2502,1324,2503,1019, 190, 357, 757, 491, + 95, 782, 868,2504,2505,2506,2507,2508,2509, 134,1524,1074, 422,1525, 898,2510, + 161,2511,2512,2513,2514, 769,2515,1526,2516,2517, 411,1325,2518, 472,1527,2519, +2520,2521,2522,2523,2524, 985,2525,2526,2527,2528,2529,2530, 764,2531,1245,2532, +2533, 25, 204, 311,2534, 496,2535,1052,2536,2537,2538,2539,2540,2541,2542, 199, + 704, 504, 468, 758, 657,1528, 196, 44, 839,1246, 272, 750,2543, 765, 862,2544, +2545,1326,2546, 132, 615, 933,2547, 732,2548,2549,2550,1189,1529,2551, 283,1247, +1053, 607, 929,2552,2553,2554, 930, 183, 872, 616,1040,1147,2555,1148,1020, 441, + 249,1075,2556,2557,2558, 466, 743,2559,2560,2561, 92, 514, 426, 420, 526,2562, +2563,2564,2565,2566,2567,2568, 185,2569,2570,2571,2572, 776,1530, 658,2573, 362, +2574, 361, 922,1076, 793,2575,2576,2577,2578,2579,2580,1531, 251,2581,2582,2583, +2584,1532, 54, 612, 237,1327,2585,2586, 275, 408, 647, 111,2587,1533,1106, 465, + 3, 458, 9, 38,2588, 107, 110, 890, 209, 26, 737, 498,2589,1534,2590, 431, + 202, 88,1535, 356, 287,1107, 660,1149,2591, 381,1536, 986,1150, 445,1248,1151, + 974,2592,2593, 846,2594, 446, 953, 184,1249,1250, 727,2595, 923, 193, 883,2596, +2597,2598, 102, 324, 539, 817,2599, 421,1041,2600, 832,2601, 94, 175, 197, 406, +2602, 459,2603,2604,2605,2606,2607, 330, 555,2608,2609,2610, 706,1108, 389,2611, +2612,2613,2614, 233,2615, 833, 558, 931, 954,1251,2616,2617,1537, 546,2618,2619, +1009,2620,2621,2622,1538, 690,1328,2623, 955,2624,1539,2625,2626, 772,2627,2628, +2629,2630,2631, 924, 648, 863, 603,2632,2633, 934,1540, 864, 865,2634, 642,1042, + 670,1190,2635,2636,2637,2638, 168,2639, 652, 873, 542,1054,1541,2640,2641,2642, # 512, 256 +) + diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/euckrprober.py b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/euckrprober.py new file mode 100644 index 0000000..345a060 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/euckrprober.py @@ -0,0 +1,47 @@ +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is mozilla.org code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +from .mbcharsetprober import MultiByteCharSetProber +from .codingstatemachine import CodingStateMachine +from .chardistribution import EUCKRDistributionAnalysis +from .mbcssm import EUCKR_SM_MODEL + + +class EUCKRProber(MultiByteCharSetProber): + def __init__(self): + super(EUCKRProber, self).__init__() + self.coding_sm = CodingStateMachine(EUCKR_SM_MODEL) + self.distribution_analyzer = EUCKRDistributionAnalysis() + self.reset() + + @property + def charset_name(self): + return "EUC-KR" + + @property + def language(self): + return "Korean" diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/euctwfreq.py b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/euctwfreq.py new file mode 100644 index 0000000..ed7a995 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/euctwfreq.py @@ -0,0 +1,387 @@ +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Communicator client code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +# EUCTW frequency table +# Converted from big5 work +# by Taiwan's Mandarin Promotion Council +# + +# 128 --> 0.42261 +# 256 --> 0.57851 +# 512 --> 0.74851 +# 1024 --> 0.89384 +# 2048 --> 0.97583 +# +# Idea Distribution Ratio = 0.74851/(1-0.74851) =2.98 +# Random Distribution Ration = 512/(5401-512)=0.105 +# +# Typical Distribution Ratio about 25% of Ideal one, still much higher than RDR + +EUCTW_TYPICAL_DISTRIBUTION_RATIO = 0.75 + +# Char to FreqOrder table , +EUCTW_TABLE_SIZE = 5376 + +EUCTW_CHAR_TO_FREQ_ORDER = ( + 1,1800,1506, 255,1431, 198, 9, 82, 6,7310, 177, 202,3615,1256,2808, 110, # 2742 +3735, 33,3241, 261, 76, 44,2113, 16,2931,2184,1176, 659,3868, 26,3404,2643, # 2758 +1198,3869,3313,4060, 410,2211, 302, 590, 361,1963, 8, 204, 58,4296,7311,1931, # 2774 + 63,7312,7313, 317,1614, 75, 222, 159,4061,2412,1480,7314,3500,3068, 224,2809, # 2790 +3616, 3, 10,3870,1471, 29,2774,1135,2852,1939, 873, 130,3242,1123, 312,7315, # 2806 +4297,2051, 507, 252, 682,7316, 142,1914, 124, 206,2932, 34,3501,3173, 64, 604, # 2822 +7317,2494,1976,1977, 155,1990, 645, 641,1606,7318,3405, 337, 72, 406,7319, 80, # 2838 + 630, 238,3174,1509, 263, 939,1092,2644, 756,1440,1094,3406, 449, 69,2969, 591, # 2854 + 179,2095, 471, 115,2034,1843, 60, 50,2970, 134, 806,1868, 734,2035,3407, 180, # 2870 + 995,1607, 156, 537,2893, 688,7320, 319,1305, 779,2144, 514,2374, 298,4298, 359, # 2886 +2495, 90,2707,1338, 663, 11, 906,1099,2545, 20,2436, 182, 532,1716,7321, 732, # 2902 +1376,4062,1311,1420,3175, 25,2312,1056, 113, 399, 382,1949, 242,3408,2467, 529, # 2918 +3243, 475,1447,3617,7322, 117, 21, 656, 810,1297,2295,2329,3502,7323, 126,4063, # 2934 + 706, 456, 150, 613,4299, 71,1118,2036,4064, 145,3069, 85, 835, 486,2114,1246, # 2950 +1426, 428, 727,1285,1015, 800, 106, 623, 303,1281,7324,2127,2354, 347,3736, 221, # 2966 +3503,3110,7325,1955,1153,4065, 83, 296,1199,3070, 192, 624, 93,7326, 822,1897, # 2982 +2810,3111, 795,2064, 991,1554,1542,1592, 27, 43,2853, 859, 139,1456, 860,4300, # 2998 + 437, 712,3871, 164,2392,3112, 695, 211,3017,2096, 195,3872,1608,3504,3505,3618, # 3014 +3873, 234, 811,2971,2097,3874,2229,1441,3506,1615,2375, 668,2076,1638, 305, 228, # 3030 +1664,4301, 467, 415,7327, 262,2098,1593, 239, 108, 300, 200,1033, 512,1247,2077, # 3046 +7328,7329,2173,3176,3619,2673, 593, 845,1062,3244, 88,1723,2037,3875,1950, 212, # 3062 + 266, 152, 149, 468,1898,4066,4302, 77, 187,7330,3018, 37, 5,2972,7331,3876, # 3078 +7332,7333, 39,2517,4303,2894,3177,2078, 55, 148, 74,4304, 545, 483,1474,1029, # 3094 +1665, 217,1869,1531,3113,1104,2645,4067, 24, 172,3507, 900,3877,3508,3509,4305, # 3110 + 32,1408,2811,1312, 329, 487,2355,2247,2708, 784,2674, 4,3019,3314,1427,1788, # 3126 + 188, 109, 499,7334,3620,1717,1789, 888,1217,3020,4306,7335,3510,7336,3315,1520, # 3142 +3621,3878, 196,1034, 775,7337,7338, 929,1815, 249, 439, 38,7339,1063,7340, 794, # 3158 +3879,1435,2296, 46, 178,3245,2065,7341,2376,7342, 214,1709,4307, 804, 35, 707, # 3174 + 324,3622,1601,2546, 140, 459,4068,7343,7344,1365, 839, 272, 978,2257,2572,3409, # 3190 +2128,1363,3623,1423, 697, 100,3071, 48, 70,1231, 495,3114,2193,7345,1294,7346, # 3206 +2079, 462, 586,1042,3246, 853, 256, 988, 185,2377,3410,1698, 434,1084,7347,3411, # 3222 + 314,2615,2775,4308,2330,2331, 569,2280, 637,1816,2518, 757,1162,1878,1616,3412, # 3238 + 287,1577,2115, 768,4309,1671,2854,3511,2519,1321,3737, 909,2413,7348,4069, 933, # 3254 +3738,7349,2052,2356,1222,4310, 765,2414,1322, 786,4311,7350,1919,1462,1677,2895, # 3270 +1699,7351,4312,1424,2437,3115,3624,2590,3316,1774,1940,3413,3880,4070, 309,1369, # 3286 +1130,2812, 364,2230,1653,1299,3881,3512,3882,3883,2646, 525,1085,3021, 902,2000, # 3302 +1475, 964,4313, 421,1844,1415,1057,2281, 940,1364,3116, 376,4314,4315,1381, 7, # 3318 +2520, 983,2378, 336,1710,2675,1845, 321,3414, 559,1131,3022,2742,1808,1132,1313, # 3334 + 265,1481,1857,7352, 352,1203,2813,3247, 167,1089, 420,2814, 776, 792,1724,3513, # 3350 +4071,2438,3248,7353,4072,7354, 446, 229, 333,2743, 901,3739,1200,1557,4316,2647, # 3366 +1920, 395,2744,2676,3740,4073,1835, 125, 916,3178,2616,4317,7355,7356,3741,7357, # 3382 +7358,7359,4318,3117,3625,1133,2547,1757,3415,1510,2313,1409,3514,7360,2145, 438, # 3398 +2591,2896,2379,3317,1068, 958,3023, 461, 311,2855,2677,4074,1915,3179,4075,1978, # 3414 + 383, 750,2745,2617,4076, 274, 539, 385,1278,1442,7361,1154,1964, 384, 561, 210, # 3430 + 98,1295,2548,3515,7362,1711,2415,1482,3416,3884,2897,1257, 129,7363,3742, 642, # 3446 + 523,2776,2777,2648,7364, 141,2231,1333, 68, 176, 441, 876, 907,4077, 603,2592, # 3462 + 710, 171,3417, 404, 549, 18,3118,2393,1410,3626,1666,7365,3516,4319,2898,4320, # 3478 +7366,2973, 368,7367, 146, 366, 99, 871,3627,1543, 748, 807,1586,1185, 22,2258, # 3494 + 379,3743,3180,7368,3181, 505,1941,2618,1991,1382,2314,7369, 380,2357, 218, 702, # 3510 +1817,1248,3418,3024,3517,3318,3249,7370,2974,3628, 930,3250,3744,7371, 59,7372, # 3526 + 585, 601,4078, 497,3419,1112,1314,4321,1801,7373,1223,1472,2174,7374, 749,1836, # 3542 + 690,1899,3745,1772,3885,1476, 429,1043,1790,2232,2116, 917,4079, 447,1086,1629, # 3558 +7375, 556,7376,7377,2020,1654, 844,1090, 105, 550, 966,1758,2815,1008,1782, 686, # 3574 +1095,7378,2282, 793,1602,7379,3518,2593,4322,4080,2933,2297,4323,3746, 980,2496, # 3590 + 544, 353, 527,4324, 908,2678,2899,7380, 381,2619,1942,1348,7381,1341,1252, 560, # 3606 +3072,7382,3420,2856,7383,2053, 973, 886,2080, 143,4325,7384,7385, 157,3886, 496, # 3622 +4081, 57, 840, 540,2038,4326,4327,3421,2117,1445, 970,2259,1748,1965,2081,4082, # 3638 +3119,1234,1775,3251,2816,3629, 773,1206,2129,1066,2039,1326,3887,1738,1725,4083, # 3654 + 279,3120, 51,1544,2594, 423,1578,2130,2066, 173,4328,1879,7386,7387,1583, 264, # 3670 + 610,3630,4329,2439, 280, 154,7388,7389,7390,1739, 338,1282,3073, 693,2857,1411, # 3686 +1074,3747,2440,7391,4330,7392,7393,1240, 952,2394,7394,2900,1538,2679, 685,1483, # 3702 +4084,2468,1436, 953,4085,2054,4331, 671,2395, 79,4086,2441,3252, 608, 567,2680, # 3718 +3422,4087,4088,1691, 393,1261,1791,2396,7395,4332,7396,7397,7398,7399,1383,1672, # 3734 +3748,3182,1464, 522,1119, 661,1150, 216, 675,4333,3888,1432,3519, 609,4334,2681, # 3750 +2397,7400,7401,7402,4089,3025, 0,7403,2469, 315, 231,2442, 301,3319,4335,2380, # 3766 +7404, 233,4090,3631,1818,4336,4337,7405, 96,1776,1315,2082,7406, 257,7407,1809, # 3782 +3632,2709,1139,1819,4091,2021,1124,2163,2778,1777,2649,7408,3074, 363,1655,3183, # 3798 +7409,2975,7410,7411,7412,3889,1567,3890, 718, 103,3184, 849,1443, 341,3320,2934, # 3814 +1484,7413,1712, 127, 67, 339,4092,2398, 679,1412, 821,7414,7415, 834, 738, 351, # 3830 +2976,2146, 846, 235,1497,1880, 418,1992,3749,2710, 186,1100,2147,2746,3520,1545, # 3846 +1355,2935,2858,1377, 583,3891,4093,2573,2977,7416,1298,3633,1078,2549,3634,2358, # 3862 + 78,3750,3751, 267,1289,2099,2001,1594,4094, 348, 369,1274,2194,2175,1837,4338, # 3878 +1820,2817,3635,2747,2283,2002,4339,2936,2748, 144,3321, 882,4340,3892,2749,3423, # 3894 +4341,2901,7417,4095,1726, 320,7418,3893,3026, 788,2978,7419,2818,1773,1327,2859, # 3910 +3894,2819,7420,1306,4342,2003,1700,3752,3521,2359,2650, 787,2022, 506, 824,3636, # 3926 + 534, 323,4343,1044,3322,2023,1900, 946,3424,7421,1778,1500,1678,7422,1881,4344, # 3942 + 165, 243,4345,3637,2521, 123, 683,4096, 764,4346, 36,3895,1792, 589,2902, 816, # 3958 + 626,1667,3027,2233,1639,1555,1622,3753,3896,7423,3897,2860,1370,1228,1932, 891, # 3974 +2083,2903, 304,4097,7424, 292,2979,2711,3522, 691,2100,4098,1115,4347, 118, 662, # 3990 +7425, 611,1156, 854,2381,1316,2861, 2, 386, 515,2904,7426,7427,3253, 868,2234, # 4006 +1486, 855,2651, 785,2212,3028,7428,1040,3185,3523,7429,3121, 448,7430,1525,7431, # 4022 +2164,4348,7432,3754,7433,4099,2820,3524,3122, 503, 818,3898,3123,1568, 814, 676, # 4038 +1444, 306,1749,7434,3755,1416,1030, 197,1428, 805,2821,1501,4349,7435,7436,7437, # 4054 +1993,7438,4350,7439,7440,2195, 13,2779,3638,2980,3124,1229,1916,7441,3756,2131, # 4070 +7442,4100,4351,2399,3525,7443,2213,1511,1727,1120,7444,7445, 646,3757,2443, 307, # 4086 +7446,7447,1595,3186,7448,7449,7450,3639,1113,1356,3899,1465,2522,2523,7451, 519, # 4102 +7452, 128,2132, 92,2284,1979,7453,3900,1512, 342,3125,2196,7454,2780,2214,1980, # 4118 +3323,7455, 290,1656,1317, 789, 827,2360,7456,3758,4352, 562, 581,3901,7457, 401, # 4134 +4353,2248, 94,4354,1399,2781,7458,1463,2024,4355,3187,1943,7459, 828,1105,4101, # 4150 +1262,1394,7460,4102, 605,4356,7461,1783,2862,7462,2822, 819,2101, 578,2197,2937, # 4166 +7463,1502, 436,3254,4103,3255,2823,3902,2905,3425,3426,7464,2712,2315,7465,7466, # 4182 +2332,2067, 23,4357, 193, 826,3759,2102, 699,1630,4104,3075, 390,1793,1064,3526, # 4198 +7467,1579,3076,3077,1400,7468,4105,1838,1640,2863,7469,4358,4359, 137,4106, 598, # 4214 +3078,1966, 780, 104, 974,2938,7470, 278, 899, 253, 402, 572, 504, 493,1339,7471, # 4230 +3903,1275,4360,2574,2550,7472,3640,3029,3079,2249, 565,1334,2713, 863, 41,7473, # 4246 +7474,4361,7475,1657,2333, 19, 463,2750,4107, 606,7476,2981,3256,1087,2084,1323, # 4262 +2652,2982,7477,1631,1623,1750,4108,2682,7478,2864, 791,2714,2653,2334, 232,2416, # 4278 +7479,2983,1498,7480,2654,2620, 755,1366,3641,3257,3126,2025,1609, 119,1917,3427, # 4294 + 862,1026,4109,7481,3904,3760,4362,3905,4363,2260,1951,2470,7482,1125, 817,4110, # 4310 +4111,3906,1513,1766,2040,1487,4112,3030,3258,2824,3761,3127,7483,7484,1507,7485, # 4326 +2683, 733, 40,1632,1106,2865, 345,4113, 841,2524, 230,4364,2984,1846,3259,3428, # 4342 +7486,1263, 986,3429,7487, 735, 879, 254,1137, 857, 622,1300,1180,1388,1562,3907, # 4358 +3908,2939, 967,2751,2655,1349, 592,2133,1692,3324,2985,1994,4114,1679,3909,1901, # 4374 +2185,7488, 739,3642,2715,1296,1290,7489,4115,2198,2199,1921,1563,2595,2551,1870, # 4390 +2752,2986,7490, 435,7491, 343,1108, 596, 17,1751,4365,2235,3430,3643,7492,4366, # 4406 + 294,3527,2940,1693, 477, 979, 281,2041,3528, 643,2042,3644,2621,2782,2261,1031, # 4422 +2335,2134,2298,3529,4367, 367,1249,2552,7493,3530,7494,4368,1283,3325,2004, 240, # 4438 +1762,3326,4369,4370, 836,1069,3128, 474,7495,2148,2525, 268,3531,7496,3188,1521, # 4454 +1284,7497,1658,1546,4116,7498,3532,3533,7499,4117,3327,2684,1685,4118, 961,1673, # 4470 +2622, 190,2005,2200,3762,4371,4372,7500, 570,2497,3645,1490,7501,4373,2623,3260, # 4486 +1956,4374, 584,1514, 396,1045,1944,7502,4375,1967,2444,7503,7504,4376,3910, 619, # 4502 +7505,3129,3261, 215,2006,2783,2553,3189,4377,3190,4378, 763,4119,3763,4379,7506, # 4518 +7507,1957,1767,2941,3328,3646,1174, 452,1477,4380,3329,3130,7508,2825,1253,2382, # 4534 +2186,1091,2285,4120, 492,7509, 638,1169,1824,2135,1752,3911, 648, 926,1021,1324, # 4550 +4381, 520,4382, 997, 847,1007, 892,4383,3764,2262,1871,3647,7510,2400,1784,4384, # 4566 +1952,2942,3080,3191,1728,4121,2043,3648,4385,2007,1701,3131,1551, 30,2263,4122, # 4582 +7511,2026,4386,3534,7512, 501,7513,4123, 594,3431,2165,1821,3535,3432,3536,3192, # 4598 + 829,2826,4124,7514,1680,3132,1225,4125,7515,3262,4387,4126,3133,2336,7516,4388, # 4614 +4127,7517,3912,3913,7518,1847,2383,2596,3330,7519,4389, 374,3914, 652,4128,4129, # 4630 + 375,1140, 798,7520,7521,7522,2361,4390,2264, 546,1659, 138,3031,2445,4391,7523, # 4646 +2250, 612,1848, 910, 796,3765,1740,1371, 825,3766,3767,7524,2906,2554,7525, 692, # 4662 + 444,3032,2624, 801,4392,4130,7526,1491, 244,1053,3033,4131,4132, 340,7527,3915, # 4678 +1041,2987, 293,1168, 87,1357,7528,1539, 959,7529,2236, 721, 694,4133,3768, 219, # 4694 +1478, 644,1417,3331,2656,1413,1401,1335,1389,3916,7530,7531,2988,2362,3134,1825, # 4710 + 730,1515, 184,2827, 66,4393,7532,1660,2943, 246,3332, 378,1457, 226,3433, 975, # 4726 +3917,2944,1264,3537, 674, 696,7533, 163,7534,1141,2417,2166, 713,3538,3333,4394, # 4742 +3918,7535,7536,1186, 15,7537,1079,1070,7538,1522,3193,3539, 276,1050,2716, 758, # 4758 +1126, 653,2945,3263,7539,2337, 889,3540,3919,3081,2989, 903,1250,4395,3920,3434, # 4774 +3541,1342,1681,1718, 766,3264, 286, 89,2946,3649,7540,1713,7541,2597,3334,2990, # 4790 +7542,2947,2215,3194,2866,7543,4396,2498,2526, 181, 387,1075,3921, 731,2187,3335, # 4806 +7544,3265, 310, 313,3435,2299, 770,4134, 54,3034, 189,4397,3082,3769,3922,7545, # 4822 +1230,1617,1849, 355,3542,4135,4398,3336, 111,4136,3650,1350,3135,3436,3035,4137, # 4838 +2149,3266,3543,7546,2784,3923,3924,2991, 722,2008,7547,1071, 247,1207,2338,2471, # 4854 +1378,4399,2009, 864,1437,1214,4400, 373,3770,1142,2216, 667,4401, 442,2753,2555, # 4870 +3771,3925,1968,4138,3267,1839, 837, 170,1107, 934,1336,1882,7548,7549,2118,4139, # 4886 +2828, 743,1569,7550,4402,4140, 582,2384,1418,3437,7551,1802,7552, 357,1395,1729, # 4902 +3651,3268,2418,1564,2237,7553,3083,3772,1633,4403,1114,2085,4141,1532,7554, 482, # 4918 +2446,4404,7555,7556,1492, 833,1466,7557,2717,3544,1641,2829,7558,1526,1272,3652, # 4934 +4142,1686,1794, 416,2556,1902,1953,1803,7559,3773,2785,3774,1159,2316,7560,2867, # 4950 +4405,1610,1584,3036,2419,2754, 443,3269,1163,3136,7561,7562,3926,7563,4143,2499, # 4966 +3037,4406,3927,3137,2103,1647,3545,2010,1872,4144,7564,4145, 431,3438,7565, 250, # 4982 + 97, 81,4146,7566,1648,1850,1558, 160, 848,7567, 866, 740,1694,7568,2201,2830, # 4998 +3195,4147,4407,3653,1687, 950,2472, 426, 469,3196,3654,3655,3928,7569,7570,1188, # 5014 + 424,1995, 861,3546,4148,3775,2202,2685, 168,1235,3547,4149,7571,2086,1674,4408, # 5030 +3337,3270, 220,2557,1009,7572,3776, 670,2992, 332,1208, 717,7573,7574,3548,2447, # 5046 +3929,3338,7575, 513,7576,1209,2868,3339,3138,4409,1080,7577,7578,7579,7580,2527, # 5062 +3656,3549, 815,1587,3930,3931,7581,3550,3439,3777,1254,4410,1328,3038,1390,3932, # 5078 +1741,3933,3778,3934,7582, 236,3779,2448,3271,7583,7584,3657,3780,1273,3781,4411, # 5094 +7585, 308,7586,4412, 245,4413,1851,2473,1307,2575, 430, 715,2136,2449,7587, 270, # 5110 + 199,2869,3935,7588,3551,2718,1753, 761,1754, 725,1661,1840,4414,3440,3658,7589, # 5126 +7590, 587, 14,3272, 227,2598, 326, 480,2265, 943,2755,3552, 291, 650,1883,7591, # 5142 +1702,1226, 102,1547, 62,3441, 904,4415,3442,1164,4150,7592,7593,1224,1548,2756, # 5158 + 391, 498,1493,7594,1386,1419,7595,2055,1177,4416, 813, 880,1081,2363, 566,1145, # 5174 +4417,2286,1001,1035,2558,2599,2238, 394,1286,7596,7597,2068,7598, 86,1494,1730, # 5190 +3936, 491,1588, 745, 897,2948, 843,3340,3937,2757,2870,3273,1768, 998,2217,2069, # 5206 + 397,1826,1195,1969,3659,2993,3341, 284,7599,3782,2500,2137,2119,1903,7600,3938, # 5222 +2150,3939,4151,1036,3443,1904, 114,2559,4152, 209,1527,7601,7602,2949,2831,2625, # 5238 +2385,2719,3139, 812,2560,7603,3274,7604,1559, 737,1884,3660,1210, 885, 28,2686, # 5254 +3553,3783,7605,4153,1004,1779,4418,7606, 346,1981,2218,2687,4419,3784,1742, 797, # 5270 +1642,3940,1933,1072,1384,2151, 896,3941,3275,3661,3197,2871,3554,7607,2561,1958, # 5286 +4420,2450,1785,7608,7609,7610,3942,4154,1005,1308,3662,4155,2720,4421,4422,1528, # 5302 +2600, 161,1178,4156,1982, 987,4423,1101,4157, 631,3943,1157,3198,2420,1343,1241, # 5318 +1016,2239,2562, 372, 877,2339,2501,1160, 555,1934, 911,3944,7611, 466,1170, 169, # 5334 +1051,2907,2688,3663,2474,2994,1182,2011,2563,1251,2626,7612, 992,2340,3444,1540, # 5350 +2721,1201,2070,2401,1996,2475,7613,4424, 528,1922,2188,1503,1873,1570,2364,3342, # 5366 +3276,7614, 557,1073,7615,1827,3445,2087,2266,3140,3039,3084, 767,3085,2786,4425, # 5382 +1006,4158,4426,2341,1267,2176,3664,3199, 778,3945,3200,2722,1597,2657,7616,4427, # 5398 +7617,3446,7618,7619,7620,3277,2689,1433,3278, 131, 95,1504,3946, 723,4159,3141, # 5414 +1841,3555,2758,2189,3947,2027,2104,3665,7621,2995,3948,1218,7622,3343,3201,3949, # 5430 +4160,2576, 248,1634,3785, 912,7623,2832,3666,3040,3786, 654, 53,7624,2996,7625, # 5446 +1688,4428, 777,3447,1032,3950,1425,7626, 191, 820,2120,2833, 971,4429, 931,3202, # 5462 + 135, 664, 783,3787,1997, 772,2908,1935,3951,3788,4430,2909,3203, 282,2723, 640, # 5478 +1372,3448,1127, 922, 325,3344,7627,7628, 711,2044,7629,7630,3952,2219,2787,1936, # 5494 +3953,3345,2220,2251,3789,2300,7631,4431,3790,1258,3279,3954,3204,2138,2950,3955, # 5510 +3956,7632,2221, 258,3205,4432, 101,1227,7633,3280,1755,7634,1391,3281,7635,2910, # 5526 +2056, 893,7636,7637,7638,1402,4161,2342,7639,7640,3206,3556,7641,7642, 878,1325, # 5542 +1780,2788,4433, 259,1385,2577, 744,1183,2267,4434,7643,3957,2502,7644, 684,1024, # 5558 +4162,7645, 472,3557,3449,1165,3282,3958,3959, 322,2152, 881, 455,1695,1152,1340, # 5574 + 660, 554,2153,4435,1058,4436,4163, 830,1065,3346,3960,4437,1923,7646,1703,1918, # 5590 +7647, 932,2268, 122,7648,4438, 947, 677,7649,3791,2627, 297,1905,1924,2269,4439, # 5606 +2317,3283,7650,7651,4164,7652,4165, 84,4166, 112, 989,7653, 547,1059,3961, 701, # 5622 +3558,1019,7654,4167,7655,3450, 942, 639, 457,2301,2451, 993,2951, 407, 851, 494, # 5638 +4440,3347, 927,7656,1237,7657,2421,3348, 573,4168, 680, 921,2911,1279,1874, 285, # 5654 + 790,1448,1983, 719,2167,7658,7659,4441,3962,3963,1649,7660,1541, 563,7661,1077, # 5670 +7662,3349,3041,3451, 511,2997,3964,3965,3667,3966,1268,2564,3350,3207,4442,4443, # 5686 +7663, 535,1048,1276,1189,2912,2028,3142,1438,1373,2834,2952,1134,2012,7664,4169, # 5702 +1238,2578,3086,1259,7665, 700,7666,2953,3143,3668,4170,7667,4171,1146,1875,1906, # 5718 +4444,2601,3967, 781,2422, 132,1589, 203, 147, 273,2789,2402, 898,1786,2154,3968, # 5734 +3969,7668,3792,2790,7669,7670,4445,4446,7671,3208,7672,1635,3793, 965,7673,1804, # 5750 +2690,1516,3559,1121,1082,1329,3284,3970,1449,3794, 65,1128,2835,2913,2759,1590, # 5766 +3795,7674,7675, 12,2658, 45, 976,2579,3144,4447, 517,2528,1013,1037,3209,7676, # 5782 +3796,2836,7677,3797,7678,3452,7679,2602, 614,1998,2318,3798,3087,2724,2628,7680, # 5798 +2580,4172, 599,1269,7681,1810,3669,7682,2691,3088, 759,1060, 489,1805,3351,3285, # 5814 +1358,7683,7684,2386,1387,1215,2629,2252, 490,7685,7686,4173,1759,2387,2343,7687, # 5830 +4448,3799,1907,3971,2630,1806,3210,4449,3453,3286,2760,2344, 874,7688,7689,3454, # 5846 +3670,1858, 91,2914,3671,3042,3800,4450,7690,3145,3972,2659,7691,3455,1202,1403, # 5862 +3801,2954,2529,1517,2503,4451,3456,2504,7692,4452,7693,2692,1885,1495,1731,3973, # 5878 +2365,4453,7694,2029,7695,7696,3974,2693,1216, 237,2581,4174,2319,3975,3802,4454, # 5894 +4455,2694,3560,3457, 445,4456,7697,7698,7699,7700,2761, 61,3976,3672,1822,3977, # 5910 +7701, 687,2045, 935, 925, 405,2660, 703,1096,1859,2725,4457,3978,1876,1367,2695, # 5926 +3352, 918,2105,1781,2476, 334,3287,1611,1093,4458, 564,3146,3458,3673,3353, 945, # 5942 +2631,2057,4459,7702,1925, 872,4175,7703,3459,2696,3089, 349,4176,3674,3979,4460, # 5958 +3803,4177,3675,2155,3980,4461,4462,4178,4463,2403,2046, 782,3981, 400, 251,4179, # 5974 +1624,7704,7705, 277,3676, 299,1265, 476,1191,3804,2121,4180,4181,1109, 205,7706, # 5990 +2582,1000,2156,3561,1860,7707,7708,7709,4464,7710,4465,2565, 107,2477,2157,3982, # 6006 +3460,3147,7711,1533, 541,1301, 158, 753,4182,2872,3562,7712,1696, 370,1088,4183, # 6022 +4466,3563, 579, 327, 440, 162,2240, 269,1937,1374,3461, 968,3043, 56,1396,3090, # 6038 +2106,3288,3354,7713,1926,2158,4467,2998,7714,3564,7715,7716,3677,4468,2478,7717, # 6054 +2791,7718,1650,4469,7719,2603,7720,7721,3983,2661,3355,1149,3356,3984,3805,3985, # 6070 +7722,1076, 49,7723, 951,3211,3289,3290, 450,2837, 920,7724,1811,2792,2366,4184, # 6086 +1908,1138,2367,3806,3462,7725,3212,4470,1909,1147,1518,2423,4471,3807,7726,4472, # 6102 +2388,2604, 260,1795,3213,7727,7728,3808,3291, 708,7729,3565,1704,7730,3566,1351, # 6118 +1618,3357,2999,1886, 944,4185,3358,4186,3044,3359,4187,7731,3678, 422, 413,1714, # 6134 +3292, 500,2058,2345,4188,2479,7732,1344,1910, 954,7733,1668,7734,7735,3986,2404, # 6150 +4189,3567,3809,4190,7736,2302,1318,2505,3091, 133,3092,2873,4473, 629, 31,2838, # 6166 +2697,3810,4474, 850, 949,4475,3987,2955,1732,2088,4191,1496,1852,7737,3988, 620, # 6182 +3214, 981,1242,3679,3360,1619,3680,1643,3293,2139,2452,1970,1719,3463,2168,7738, # 6198 +3215,7739,7740,3361,1828,7741,1277,4476,1565,2047,7742,1636,3568,3093,7743, 869, # 6214 +2839, 655,3811,3812,3094,3989,3000,3813,1310,3569,4477,7744,7745,7746,1733, 558, # 6230 +4478,3681, 335,1549,3045,1756,4192,3682,1945,3464,1829,1291,1192, 470,2726,2107, # 6246 +2793, 913,1054,3990,7747,1027,7748,3046,3991,4479, 982,2662,3362,3148,3465,3216, # 6262 +3217,1946,2794,7749, 571,4480,7750,1830,7751,3570,2583,1523,2424,7752,2089, 984, # 6278 +4481,3683,1959,7753,3684, 852, 923,2795,3466,3685, 969,1519, 999,2048,2320,1705, # 6294 +7754,3095, 615,1662, 151, 597,3992,2405,2321,1049, 275,4482,3686,4193, 568,3687, # 6310 +3571,2480,4194,3688,7755,2425,2270, 409,3218,7756,1566,2874,3467,1002, 769,2840, # 6326 + 194,2090,3149,3689,2222,3294,4195, 628,1505,7757,7758,1763,2177,3001,3993, 521, # 6342 +1161,2584,1787,2203,2406,4483,3994,1625,4196,4197, 412, 42,3096, 464,7759,2632, # 6358 +4484,3363,1760,1571,2875,3468,2530,1219,2204,3814,2633,2140,2368,4485,4486,3295, # 6374 +1651,3364,3572,7760,7761,3573,2481,3469,7762,3690,7763,7764,2271,2091, 460,7765, # 6390 +4487,7766,3002, 962, 588,3574, 289,3219,2634,1116, 52,7767,3047,1796,7768,7769, # 6406 +7770,1467,7771,1598,1143,3691,4198,1984,1734,1067,4488,1280,3365, 465,4489,1572, # 6422 + 510,7772,1927,2241,1812,1644,3575,7773,4490,3692,7774,7775,2663,1573,1534,7776, # 6438 +7777,4199, 536,1807,1761,3470,3815,3150,2635,7778,7779,7780,4491,3471,2915,1911, # 6454 +2796,7781,3296,1122, 377,3220,7782, 360,7783,7784,4200,1529, 551,7785,2059,3693, # 6470 +1769,2426,7786,2916,4201,3297,3097,2322,2108,2030,4492,1404, 136,1468,1479, 672, # 6486 +1171,3221,2303, 271,3151,7787,2762,7788,2049, 678,2727, 865,1947,4493,7789,2013, # 6502 +3995,2956,7790,2728,2223,1397,3048,3694,4494,4495,1735,2917,3366,3576,7791,3816, # 6518 + 509,2841,2453,2876,3817,7792,7793,3152,3153,4496,4202,2531,4497,2304,1166,1010, # 6534 + 552, 681,1887,7794,7795,2957,2958,3996,1287,1596,1861,3154, 358, 453, 736, 175, # 6550 + 478,1117, 905,1167,1097,7796,1853,1530,7797,1706,7798,2178,3472,2287,3695,3473, # 6566 +3577,4203,2092,4204,7799,3367,1193,2482,4205,1458,2190,2205,1862,1888,1421,3298, # 6582 +2918,3049,2179,3474, 595,2122,7800,3997,7801,7802,4206,1707,2636, 223,3696,1359, # 6598 + 751,3098, 183,3475,7803,2797,3003, 419,2369, 633, 704,3818,2389, 241,7804,7805, # 6614 +7806, 838,3004,3697,2272,2763,2454,3819,1938,2050,3998,1309,3099,2242,1181,7807, # 6630 +1136,2206,3820,2370,1446,4207,2305,4498,7808,7809,4208,1055,2605, 484,3698,7810, # 6646 +3999, 625,4209,2273,3368,1499,4210,4000,7811,4001,4211,3222,2274,2275,3476,7812, # 6662 +7813,2764, 808,2606,3699,3369,4002,4212,3100,2532, 526,3370,3821,4213, 955,7814, # 6678 +1620,4214,2637,2427,7815,1429,3700,1669,1831, 994, 928,7816,3578,1260,7817,7818, # 6694 +7819,1948,2288, 741,2919,1626,4215,2729,2455, 867,1184, 362,3371,1392,7820,7821, # 6710 +4003,4216,1770,1736,3223,2920,4499,4500,1928,2698,1459,1158,7822,3050,3372,2877, # 6726 +1292,1929,2506,2842,3701,1985,1187,2071,2014,2607,4217,7823,2566,2507,2169,3702, # 6742 +2483,3299,7824,3703,4501,7825,7826, 666,1003,3005,1022,3579,4218,7827,4502,1813, # 6758 +2253, 574,3822,1603, 295,1535, 705,3823,4219, 283, 858, 417,7828,7829,3224,4503, # 6774 +4504,3051,1220,1889,1046,2276,2456,4004,1393,1599, 689,2567, 388,4220,7830,2484, # 6790 + 802,7831,2798,3824,2060,1405,2254,7832,4505,3825,2109,1052,1345,3225,1585,7833, # 6806 + 809,7834,7835,7836, 575,2730,3477, 956,1552,1469,1144,2323,7837,2324,1560,2457, # 6822 +3580,3226,4005, 616,2207,3155,2180,2289,7838,1832,7839,3478,4506,7840,1319,3704, # 6838 +3705,1211,3581,1023,3227,1293,2799,7841,7842,7843,3826, 607,2306,3827, 762,2878, # 6854 +1439,4221,1360,7844,1485,3052,7845,4507,1038,4222,1450,2061,2638,4223,1379,4508, # 6870 +2585,7846,7847,4224,1352,1414,2325,2921,1172,7848,7849,3828,3829,7850,1797,1451, # 6886 +7851,7852,7853,7854,2922,4006,4007,2485,2346, 411,4008,4009,3582,3300,3101,4509, # 6902 +1561,2664,1452,4010,1375,7855,7856, 47,2959, 316,7857,1406,1591,2923,3156,7858, # 6918 +1025,2141,3102,3157, 354,2731, 884,2224,4225,2407, 508,3706, 726,3583, 996,2428, # 6934 +3584, 729,7859, 392,2191,1453,4011,4510,3707,7860,7861,2458,3585,2608,1675,2800, # 6950 + 919,2347,2960,2348,1270,4511,4012, 73,7862,7863, 647,7864,3228,2843,2255,1550, # 6966 +1346,3006,7865,1332, 883,3479,7866,7867,7868,7869,3301,2765,7870,1212, 831,1347, # 6982 +4226,4512,2326,3830,1863,3053, 720,3831,4513,4514,3832,7871,4227,7872,7873,4515, # 6998 +7874,7875,1798,4516,3708,2609,4517,3586,1645,2371,7876,7877,2924, 669,2208,2665, # 7014 +2429,7878,2879,7879,7880,1028,3229,7881,4228,2408,7882,2256,1353,7883,7884,4518, # 7030 +3158, 518,7885,4013,7886,4229,1960,7887,2142,4230,7888,7889,3007,2349,2350,3833, # 7046 + 516,1833,1454,4014,2699,4231,4519,2225,2610,1971,1129,3587,7890,2766,7891,2961, # 7062 +1422, 577,1470,3008,1524,3373,7892,7893, 432,4232,3054,3480,7894,2586,1455,2508, # 7078 +2226,1972,1175,7895,1020,2732,4015,3481,4520,7896,2733,7897,1743,1361,3055,3482, # 7094 +2639,4016,4233,4521,2290, 895, 924,4234,2170, 331,2243,3056, 166,1627,3057,1098, # 7110 +7898,1232,2880,2227,3374,4522, 657, 403,1196,2372, 542,3709,3375,1600,4235,3483, # 7126 +7899,4523,2767,3230, 576, 530,1362,7900,4524,2533,2666,3710,4017,7901, 842,3834, # 7142 +7902,2801,2031,1014,4018, 213,2700,3376, 665, 621,4236,7903,3711,2925,2430,7904, # 7158 +2431,3302,3588,3377,7905,4237,2534,4238,4525,3589,1682,4239,3484,1380,7906, 724, # 7174 +2277, 600,1670,7907,1337,1233,4526,3103,2244,7908,1621,4527,7909, 651,4240,7910, # 7190 +1612,4241,2611,7911,2844,7912,2734,2307,3058,7913, 716,2459,3059, 174,1255,2701, # 7206 +4019,3590, 548,1320,1398, 728,4020,1574,7914,1890,1197,3060,4021,7915,3061,3062, # 7222 +3712,3591,3713, 747,7916, 635,4242,4528,7917,7918,7919,4243,7920,7921,4529,7922, # 7238 +3378,4530,2432, 451,7923,3714,2535,2072,4244,2735,4245,4022,7924,1764,4531,7925, # 7254 +4246, 350,7926,2278,2390,2486,7927,4247,4023,2245,1434,4024, 488,4532, 458,4248, # 7270 +4025,3715, 771,1330,2391,3835,2568,3159,2159,2409,1553,2667,3160,4249,7928,2487, # 7286 +2881,2612,1720,2702,4250,3379,4533,7929,2536,4251,7930,3231,4252,2768,7931,2015, # 7302 +2736,7932,1155,1017,3716,3836,7933,3303,2308, 201,1864,4253,1430,7934,4026,7935, # 7318 +7936,7937,7938,7939,4254,1604,7940, 414,1865, 371,2587,4534,4535,3485,2016,3104, # 7334 +4536,1708, 960,4255, 887, 389,2171,1536,1663,1721,7941,2228,4027,2351,2926,1580, # 7350 +7942,7943,7944,1744,7945,2537,4537,4538,7946,4539,7947,2073,7948,7949,3592,3380, # 7366 +2882,4256,7950,4257,2640,3381,2802, 673,2703,2460, 709,3486,4028,3593,4258,7951, # 7382 +1148, 502, 634,7952,7953,1204,4540,3594,1575,4541,2613,3717,7954,3718,3105, 948, # 7398 +3232, 121,1745,3837,1110,7955,4259,3063,2509,3009,4029,3719,1151,1771,3838,1488, # 7414 +4030,1986,7956,2433,3487,7957,7958,2093,7959,4260,3839,1213,1407,2803, 531,2737, # 7430 +2538,3233,1011,1537,7960,2769,4261,3106,1061,7961,3720,3721,1866,2883,7962,2017, # 7446 + 120,4262,4263,2062,3595,3234,2309,3840,2668,3382,1954,4542,7963,7964,3488,1047, # 7462 +2704,1266,7965,1368,4543,2845, 649,3383,3841,2539,2738,1102,2846,2669,7966,7967, # 7478 +1999,7968,1111,3596,2962,7969,2488,3842,3597,2804,1854,3384,3722,7970,7971,3385, # 7494 +2410,2884,3304,3235,3598,7972,2569,7973,3599,2805,4031,1460, 856,7974,3600,7975, # 7510 +2885,2963,7976,2886,3843,7977,4264, 632,2510, 875,3844,1697,3845,2291,7978,7979, # 7526 +4544,3010,1239, 580,4545,4265,7980, 914, 936,2074,1190,4032,1039,2123,7981,7982, # 7542 +7983,3386,1473,7984,1354,4266,3846,7985,2172,3064,4033, 915,3305,4267,4268,3306, # 7558 +1605,1834,7986,2739, 398,3601,4269,3847,4034, 328,1912,2847,4035,3848,1331,4270, # 7574 +3011, 937,4271,7987,3602,4036,4037,3387,2160,4546,3388, 524, 742, 538,3065,1012, # 7590 +7988,7989,3849,2461,7990, 658,1103, 225,3850,7991,7992,4547,7993,4548,7994,3236, # 7606 +1243,7995,4038, 963,2246,4549,7996,2705,3603,3161,7997,7998,2588,2327,7999,4550, # 7622 +8000,8001,8002,3489,3307, 957,3389,2540,2032,1930,2927,2462, 870,2018,3604,1746, # 7638 +2770,2771,2434,2463,8003,3851,8004,3723,3107,3724,3490,3390,3725,8005,1179,3066, # 7654 +8006,3162,2373,4272,3726,2541,3163,3108,2740,4039,8007,3391,1556,2542,2292, 977, # 7670 +2887,2033,4040,1205,3392,8008,1765,3393,3164,2124,1271,1689, 714,4551,3491,8009, # 7686 +2328,3852, 533,4273,3605,2181, 617,8010,2464,3308,3492,2310,8011,8012,3165,8013, # 7702 +8014,3853,1987, 618, 427,2641,3493,3394,8015,8016,1244,1690,8017,2806,4274,4552, # 7718 +8018,3494,8019,8020,2279,1576, 473,3606,4275,3395, 972,8021,3607,8022,3067,8023, # 7734 +8024,4553,4554,8025,3727,4041,4042,8026, 153,4555, 356,8027,1891,2888,4276,2143, # 7750 + 408, 803,2352,8028,3854,8029,4277,1646,2570,2511,4556,4557,3855,8030,3856,4278, # 7766 +8031,2411,3396, 752,8032,8033,1961,2964,8034, 746,3012,2465,8035,4279,3728, 698, # 7782 +4558,1892,4280,3608,2543,4559,3609,3857,8036,3166,3397,8037,1823,1302,4043,2706, # 7798 +3858,1973,4281,8038,4282,3167, 823,1303,1288,1236,2848,3495,4044,3398, 774,3859, # 7814 +8039,1581,4560,1304,2849,3860,4561,8040,2435,2161,1083,3237,4283,4045,4284, 344, # 7830 +1173, 288,2311, 454,1683,8041,8042,1461,4562,4046,2589,8043,8044,4563, 985, 894, # 7846 +8045,3399,3168,8046,1913,2928,3729,1988,8047,2110,1974,8048,4047,8049,2571,1194, # 7862 + 425,8050,4564,3169,1245,3730,4285,8051,8052,2850,8053, 636,4565,1855,3861, 760, # 7878 +1799,8054,4286,2209,1508,4566,4048,1893,1684,2293,8055,8056,8057,4287,4288,2210, # 7894 + 479,8058,8059, 832,8060,4049,2489,8061,2965,2490,3731, 990,3109, 627,1814,2642, # 7910 +4289,1582,4290,2125,2111,3496,4567,8062, 799,4291,3170,8063,4568,2112,1737,3013, # 7926 +1018, 543, 754,4292,3309,1676,4569,4570,4050,8064,1489,8065,3497,8066,2614,2889, # 7942 +4051,8067,8068,2966,8069,8070,8071,8072,3171,4571,4572,2182,1722,8073,3238,3239, # 7958 +1842,3610,1715, 481, 365,1975,1856,8074,8075,1962,2491,4573,8076,2126,3611,3240, # 7974 + 433,1894,2063,2075,8077, 602,2741,8078,8079,8080,8081,8082,3014,1628,3400,8083, # 7990 +3172,4574,4052,2890,4575,2512,8084,2544,2772,8085,8086,8087,3310,4576,2891,8088, # 8006 +4577,8089,2851,4578,4579,1221,2967,4053,2513,8090,8091,8092,1867,1989,8093,8094, # 8022 +8095,1895,8096,8097,4580,1896,4054, 318,8098,2094,4055,4293,8099,8100, 485,8101, # 8038 + 938,3862, 553,2670, 116,8102,3863,3612,8103,3498,2671,2773,3401,3311,2807,8104, # 8054 +3613,2929,4056,1747,2930,2968,8105,8106, 207,8107,8108,2672,4581,2514,8109,3015, # 8070 + 890,3614,3864,8110,1877,3732,3402,8111,2183,2353,3403,1652,8112,8113,8114, 941, # 8086 +2294, 208,3499,4057,2019, 330,4294,3865,2892,2492,3733,4295,8115,8116,8117,8118, # 8102 +) + diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/euctwprober.py b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/euctwprober.py new file mode 100644 index 0000000..35669cc --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/euctwprober.py @@ -0,0 +1,46 @@ +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is mozilla.org code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +from .mbcharsetprober import MultiByteCharSetProber +from .codingstatemachine import CodingStateMachine +from .chardistribution import EUCTWDistributionAnalysis +from .mbcssm import EUCTW_SM_MODEL + +class EUCTWProber(MultiByteCharSetProber): + def __init__(self): + super(EUCTWProber, self).__init__() + self.coding_sm = CodingStateMachine(EUCTW_SM_MODEL) + self.distribution_analyzer = EUCTWDistributionAnalysis() + self.reset() + + @property + def charset_name(self): + return "EUC-TW" + + @property + def language(self): + return "Taiwan" diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/gb2312freq.py b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/gb2312freq.py new file mode 100644 index 0000000..697837b --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/gb2312freq.py @@ -0,0 +1,283 @@ +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Communicator client code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +# GB2312 most frequently used character table +# +# Char to FreqOrder table , from hz6763 + +# 512 --> 0.79 -- 0.79 +# 1024 --> 0.92 -- 0.13 +# 2048 --> 0.98 -- 0.06 +# 6768 --> 1.00 -- 0.02 +# +# Ideal Distribution Ratio = 0.79135/(1-0.79135) = 3.79 +# Random Distribution Ration = 512 / (3755 - 512) = 0.157 +# +# Typical Distribution Ratio about 25% of Ideal one, still much higher that RDR + +GB2312_TYPICAL_DISTRIBUTION_RATIO = 0.9 + +GB2312_TABLE_SIZE = 3760 + +GB2312_CHAR_TO_FREQ_ORDER = ( +1671, 749,1443,2364,3924,3807,2330,3921,1704,3463,2691,1511,1515, 572,3191,2205, +2361, 224,2558, 479,1711, 963,3162, 440,4060,1905,2966,2947,3580,2647,3961,3842, +2204, 869,4207, 970,2678,5626,2944,2956,1479,4048, 514,3595, 588,1346,2820,3409, + 249,4088,1746,1873,2047,1774, 581,1813, 358,1174,3590,1014,1561,4844,2245, 670, +1636,3112, 889,1286, 953, 556,2327,3060,1290,3141, 613, 185,3477,1367, 850,3820, +1715,2428,2642,2303,2732,3041,2562,2648,3566,3946,1349, 388,3098,2091,1360,3585, + 152,1687,1539, 738,1559, 59,1232,2925,2267,1388,1249,1741,1679,2960, 151,1566, +1125,1352,4271, 924,4296, 385,3166,4459, 310,1245,2850, 70,3285,2729,3534,3575, +2398,3298,3466,1960,2265, 217,3647, 864,1909,2084,4401,2773,1010,3269,5152, 853, +3051,3121,1244,4251,1895, 364,1499,1540,2313,1180,3655,2268, 562, 715,2417,3061, + 544, 336,3768,2380,1752,4075, 950, 280,2425,4382, 183,2759,3272, 333,4297,2155, +1688,2356,1444,1039,4540, 736,1177,3349,2443,2368,2144,2225, 565, 196,1482,3406, + 927,1335,4147, 692, 878,1311,1653,3911,3622,1378,4200,1840,2969,3149,2126,1816, +2534,1546,2393,2760, 737,2494, 13, 447, 245,2747, 38,2765,2129,2589,1079, 606, + 360, 471,3755,2890, 404, 848, 699,1785,1236, 370,2221,1023,3746,2074,2026,2023, +2388,1581,2119, 812,1141,3091,2536,1519, 804,2053, 406,1596,1090, 784, 548,4414, +1806,2264,2936,1100, 343,4114,5096, 622,3358, 743,3668,1510,1626,5020,3567,2513, +3195,4115,5627,2489,2991, 24,2065,2697,1087,2719, 48,1634, 315, 68, 985,2052, + 198,2239,1347,1107,1439, 597,2366,2172, 871,3307, 919,2487,2790,1867, 236,2570, +1413,3794, 906,3365,3381,1701,1982,1818,1524,2924,1205, 616,2586,2072,2004, 575, + 253,3099, 32,1365,1182, 197,1714,2454,1201, 554,3388,3224,2748, 756,2587, 250, +2567,1507,1517,3529,1922,2761,2337,3416,1961,1677,2452,2238,3153, 615, 911,1506, +1474,2495,1265,1906,2749,3756,3280,2161, 898,2714,1759,3450,2243,2444, 563, 26, +3286,2266,3769,3344,2707,3677, 611,1402, 531,1028,2871,4548,1375, 261,2948, 835, +1190,4134, 353, 840,2684,1900,3082,1435,2109,1207,1674, 329,1872,2781,4055,2686, +2104, 608,3318,2423,2957,2768,1108,3739,3512,3271,3985,2203,1771,3520,1418,2054, +1681,1153, 225,1627,2929, 162,2050,2511,3687,1954, 124,1859,2431,1684,3032,2894, + 585,4805,3969,2869,2704,2088,2032,2095,3656,2635,4362,2209, 256, 518,2042,2105, +3777,3657, 643,2298,1148,1779, 190, 989,3544, 414, 11,2135,2063,2979,1471, 403, +3678, 126, 770,1563, 671,2499,3216,2877, 600,1179, 307,2805,4937,1268,1297,2694, + 252,4032,1448,1494,1331,1394, 127,2256, 222,1647,1035,1481,3056,1915,1048, 873, +3651, 210, 33,1608,2516, 200,1520, 415, 102, 0,3389,1287, 817, 91,3299,2940, + 836,1814, 549,2197,1396,1669,2987,3582,2297,2848,4528,1070, 687, 20,1819, 121, +1552,1364,1461,1968,2617,3540,2824,2083, 177, 948,4938,2291, 110,4549,2066, 648, +3359,1755,2110,2114,4642,4845,1693,3937,3308,1257,1869,2123, 208,1804,3159,2992, +2531,2549,3361,2418,1350,2347,2800,2568,1291,2036,2680, 72, 842,1990, 212,1233, +1154,1586, 75,2027,3410,4900,1823,1337,2710,2676, 728,2810,1522,3026,4995, 157, + 755,1050,4022, 710, 785,1936,2194,2085,1406,2777,2400, 150,1250,4049,1206, 807, +1910, 534, 529,3309,1721,1660, 274, 39,2827, 661,2670,1578, 925,3248,3815,1094, +4278,4901,4252, 41,1150,3747,2572,2227,4501,3658,4902,3813,3357,3617,2884,2258, + 887, 538,4187,3199,1294,2439,3042,2329,2343,2497,1255, 107, 543,1527, 521,3478, +3568, 194,5062, 15, 961,3870,1241,1192,2664, 66,5215,3260,2111,1295,1127,2152, +3805,4135, 901,1164,1976, 398,1278, 530,1460, 748, 904,1054,1966,1426, 53,2909, + 509, 523,2279,1534, 536,1019, 239,1685, 460,2353, 673,1065,2401,3600,4298,2272, +1272,2363, 284,1753,3679,4064,1695, 81, 815,2677,2757,2731,1386, 859, 500,4221, +2190,2566, 757,1006,2519,2068,1166,1455, 337,2654,3203,1863,1682,1914,3025,1252, +1409,1366, 847, 714,2834,2038,3209, 964,2970,1901, 885,2553,1078,1756,3049, 301, +1572,3326, 688,2130,1996,2429,1805,1648,2930,3421,2750,3652,3088, 262,1158,1254, + 389,1641,1812, 526,1719, 923,2073,1073,1902, 468, 489,4625,1140, 857,2375,3070, +3319,2863, 380, 116,1328,2693,1161,2244, 273,1212,1884,2769,3011,1775,1142, 461, +3066,1200,2147,2212, 790, 702,2695,4222,1601,1058, 434,2338,5153,3640, 67,2360, +4099,2502, 618,3472,1329, 416,1132, 830,2782,1807,2653,3211,3510,1662, 192,2124, + 296,3979,1739,1611,3684, 23, 118, 324, 446,1239,1225, 293,2520,3814,3795,2535, +3116, 17,1074, 467,2692,2201, 387,2922, 45,1326,3055,1645,3659,2817, 958, 243, +1903,2320,1339,2825,1784,3289, 356, 576, 865,2315,2381,3377,3916,1088,3122,1713, +1655, 935, 628,4689,1034,1327, 441, 800, 720, 894,1979,2183,1528,5289,2702,1071, +4046,3572,2399,1571,3281, 79, 761,1103, 327, 134, 758,1899,1371,1615, 879, 442, + 215,2605,2579, 173,2048,2485,1057,2975,3317,1097,2253,3801,4263,1403,1650,2946, + 814,4968,3487,1548,2644,1567,1285, 2, 295,2636, 97, 946,3576, 832, 141,4257, +3273, 760,3821,3521,3156,2607, 949,1024,1733,1516,1803,1920,2125,2283,2665,3180, +1501,2064,3560,2171,1592, 803,3518,1416, 732,3897,4258,1363,1362,2458, 119,1427, + 602,1525,2608,1605,1639,3175, 694,3064, 10, 465, 76,2000,4846,4208, 444,3781, +1619,3353,2206,1273,3796, 740,2483, 320,1723,2377,3660,2619,1359,1137,1762,1724, +2345,2842,1850,1862, 912, 821,1866, 612,2625,1735,2573,3369,1093, 844, 89, 937, + 930,1424,3564,2413,2972,1004,3046,3019,2011, 711,3171,1452,4178, 428, 801,1943, + 432, 445,2811, 206,4136,1472, 730, 349, 73, 397,2802,2547, 998,1637,1167, 789, + 396,3217, 154,1218, 716,1120,1780,2819,4826,1931,3334,3762,2139,1215,2627, 552, +3664,3628,3232,1405,2383,3111,1356,2652,3577,3320,3101,1703, 640,1045,1370,1246, +4996, 371,1575,2436,1621,2210, 984,4033,1734,2638, 16,4529, 663,2755,3255,1451, +3917,2257,1253,1955,2234,1263,2951, 214,1229, 617, 485, 359,1831,1969, 473,2310, + 750,2058, 165, 80,2864,2419, 361,4344,2416,2479,1134, 796,3726,1266,2943, 860, +2715, 938, 390,2734,1313,1384, 248, 202, 877,1064,2854, 522,3907, 279,1602, 297, +2357, 395,3740, 137,2075, 944,4089,2584,1267,3802, 62,1533,2285, 178, 176, 780, +2440, 201,3707, 590, 478,1560,4354,2117,1075, 30, 74,4643,4004,1635,1441,2745, + 776,2596, 238,1077,1692,1912,2844, 605, 499,1742,3947, 241,3053, 980,1749, 936, +2640,4511,2582, 515,1543,2162,5322,2892,2993, 890,2148,1924, 665,1827,3581,1032, + 968,3163, 339,1044,1896, 270, 583,1791,1720,4367,1194,3488,3669, 43,2523,1657, + 163,2167, 290,1209,1622,3378, 550, 634,2508,2510, 695,2634,2384,2512,1476,1414, + 220,1469,2341,2138,2852,3183,2900,4939,2865,3502,1211,3680, 854,3227,1299,2976, +3172, 186,2998,1459, 443,1067,3251,1495, 321,1932,3054, 909, 753,1410,1828, 436, +2441,1119,1587,3164,2186,1258, 227, 231,1425,1890,3200,3942, 247, 959, 725,5254, +2741, 577,2158,2079, 929, 120, 174, 838,2813, 591,1115, 417,2024, 40,3240,1536, +1037, 291,4151,2354, 632,1298,2406,2500,3535,1825,1846,3451, 205,1171, 345,4238, + 18,1163, 811, 685,2208,1217, 425,1312,1508,1175,4308,2552,1033, 587,1381,3059, +2984,3482, 340,1316,4023,3972, 792,3176, 519, 777,4690, 918, 933,4130,2981,3741, + 90,3360,2911,2200,5184,4550, 609,3079,2030, 272,3379,2736, 363,3881,1130,1447, + 286, 779, 357,1169,3350,3137,1630,1220,2687,2391, 747,1277,3688,2618,2682,2601, +1156,3196,5290,4034,3102,1689,3596,3128, 874, 219,2783, 798, 508,1843,2461, 269, +1658,1776,1392,1913,2983,3287,2866,2159,2372, 829,4076, 46,4253,2873,1889,1894, + 915,1834,1631,2181,2318, 298, 664,2818,3555,2735, 954,3228,3117, 527,3511,2173, + 681,2712,3033,2247,2346,3467,1652, 155,2164,3382, 113,1994, 450, 899, 494, 994, +1237,2958,1875,2336,1926,3727, 545,1577,1550, 633,3473, 204,1305,3072,2410,1956, +2471, 707,2134, 841,2195,2196,2663,3843,1026,4940, 990,3252,4997, 368,1092, 437, +3212,3258,1933,1829, 675,2977,2893, 412, 943,3723,4644,3294,3283,2230,2373,5154, +2389,2241,2661,2323,1404,2524, 593, 787, 677,3008,1275,2059, 438,2709,2609,2240, +2269,2246,1446, 36,1568,1373,3892,1574,2301,1456,3962, 693,2276,5216,2035,1143, +2720,1919,1797,1811,2763,4137,2597,1830,1699,1488,1198,2090, 424,1694, 312,3634, +3390,4179,3335,2252,1214, 561,1059,3243,2295,2561, 975,5155,2321,2751,3772, 472, +1537,3282,3398,1047,2077,2348,2878,1323,3340,3076, 690,2906, 51, 369, 170,3541, +1060,2187,2688,3670,2541,1083,1683, 928,3918, 459, 109,4427, 599,3744,4286, 143, +2101,2730,2490, 82,1588,3036,2121, 281,1860, 477,4035,1238,2812,3020,2716,3312, +1530,2188,2055,1317, 843, 636,1808,1173,3495, 649, 181,1002, 147,3641,1159,2414, +3750,2289,2795, 813,3123,2610,1136,4368, 5,3391,4541,2174, 420, 429,1728, 754, +1228,2115,2219, 347,2223,2733, 735,1518,3003,2355,3134,1764,3948,3329,1888,2424, +1001,1234,1972,3321,3363,1672,1021,1450,1584, 226, 765, 655,2526,3404,3244,2302, +3665, 731, 594,2184, 319,1576, 621, 658,2656,4299,2099,3864,1279,2071,2598,2739, + 795,3086,3699,3908,1707,2352,2402,1382,3136,2475,1465,4847,3496,3865,1085,3004, +2591,1084, 213,2287,1963,3565,2250, 822, 793,4574,3187,1772,1789,3050, 595,1484, +1959,2770,1080,2650, 456, 422,2996, 940,3322,4328,4345,3092,2742, 965,2784, 739, +4124, 952,1358,2498,2949,2565, 332,2698,2378, 660,2260,2473,4194,3856,2919, 535, +1260,2651,1208,1428,1300,1949,1303,2942, 433,2455,2450,1251,1946, 614,1269, 641, +1306,1810,2737,3078,2912, 564,2365,1419,1415,1497,4460,2367,2185,1379,3005,1307, +3218,2175,1897,3063, 682,1157,4040,4005,1712,1160,1941,1399, 394, 402,2952,1573, +1151,2986,2404, 862, 299,2033,1489,3006, 346, 171,2886,3401,1726,2932, 168,2533, + 47,2507,1030,3735,1145,3370,1395,1318,1579,3609,4560,2857,4116,1457,2529,1965, + 504,1036,2690,2988,2405, 745,5871, 849,2397,2056,3081, 863,2359,3857,2096, 99, +1397,1769,2300,4428,1643,3455,1978,1757,3718,1440, 35,4879,3742,1296,4228,2280, + 160,5063,1599,2013, 166, 520,3479,1646,3345,3012, 490,1937,1545,1264,2182,2505, +1096,1188,1369,1436,2421,1667,2792,2460,1270,2122, 727,3167,2143, 806,1706,1012, +1800,3037, 960,2218,1882, 805, 139,2456,1139,1521, 851,1052,3093,3089, 342,2039, + 744,5097,1468,1502,1585,2087, 223, 939, 326,2140,2577, 892,2481,1623,4077, 982, +3708, 135,2131, 87,2503,3114,2326,1106, 876,1616, 547,2997,2831,2093,3441,4530, +4314, 9,3256,4229,4148, 659,1462,1986,1710,2046,2913,2231,4090,4880,5255,3392, +3274,1368,3689,4645,1477, 705,3384,3635,1068,1529,2941,1458,3782,1509, 100,1656, +2548, 718,2339, 408,1590,2780,3548,1838,4117,3719,1345,3530, 717,3442,2778,3220, +2898,1892,4590,3614,3371,2043,1998,1224,3483, 891, 635, 584,2559,3355, 733,1766, +1729,1172,3789,1891,2307, 781,2982,2271,1957,1580,5773,2633,2005,4195,3097,1535, +3213,1189,1934,5693,3262, 586,3118,1324,1598, 517,1564,2217,1868,1893,4445,3728, +2703,3139,1526,1787,1992,3882,2875,1549,1199,1056,2224,1904,2711,5098,4287, 338, +1993,3129,3489,2689,1809,2815,1997, 957,1855,3898,2550,3275,3057,1105,1319, 627, +1505,1911,1883,3526, 698,3629,3456,1833,1431, 746, 77,1261,2017,2296,1977,1885, + 125,1334,1600, 525,1798,1109,2222,1470,1945, 559,2236,1186,3443,2476,1929,1411, +2411,3135,1777,3372,2621,1841,1613,3229, 668,1430,1839,2643,2916, 195,1989,2671, +2358,1387, 629,3205,2293,5256,4439, 123,1310, 888,1879,4300,3021,3605,1003,1162, +3192,2910,2010, 140,2395,2859, 55,1082,2012,2901, 662, 419,2081,1438, 680,2774, +4654,3912,1620,1731,1625,5035,4065,2328, 512,1344, 802,5443,2163,2311,2537, 524, +3399, 98,1155,2103,1918,2606,3925,2816,1393,2465,1504,3773,2177,3963,1478,4346, + 180,1113,4655,3461,2028,1698, 833,2696,1235,1322,1594,4408,3623,3013,3225,2040, +3022, 541,2881, 607,3632,2029,1665,1219, 639,1385,1686,1099,2803,3231,1938,3188, +2858, 427, 676,2772,1168,2025, 454,3253,2486,3556, 230,1950, 580, 791,1991,1280, +1086,1974,2034, 630, 257,3338,2788,4903,1017, 86,4790, 966,2789,1995,1696,1131, + 259,3095,4188,1308, 179,1463,5257, 289,4107,1248, 42,3413,1725,2288, 896,1947, + 774,4474,4254, 604,3430,4264, 392,2514,2588, 452, 237,1408,3018, 988,4531,1970, +3034,3310, 540,2370,1562,1288,2990, 502,4765,1147, 4,1853,2708, 207, 294,2814, +4078,2902,2509, 684, 34,3105,3532,2551, 644, 709,2801,2344, 573,1727,3573,3557, +2021,1081,3100,4315,2100,3681, 199,2263,1837,2385, 146,3484,1195,2776,3949, 997, +1939,3973,1008,1091,1202,1962,1847,1149,4209,5444,1076, 493, 117,5400,2521, 972, +1490,2934,1796,4542,2374,1512,2933,2657, 413,2888,1135,2762,2314,2156,1355,2369, + 766,2007,2527,2170,3124,2491,2593,2632,4757,2437, 234,3125,3591,1898,1750,1376, +1942,3468,3138, 570,2127,2145,3276,4131, 962, 132,1445,4196, 19, 941,3624,3480, +3366,1973,1374,4461,3431,2629, 283,2415,2275, 808,2887,3620,2112,2563,1353,3610, + 955,1089,3103,1053, 96, 88,4097, 823,3808,1583, 399, 292,4091,3313, 421,1128, + 642,4006, 903,2539,1877,2082, 596, 29,4066,1790, 722,2157, 130, 995,1569, 769, +1485, 464, 513,2213, 288,1923,1101,2453,4316, 133, 486,2445, 50, 625, 487,2207, + 57, 423, 481,2962, 159,3729,1558, 491, 303, 482, 501, 240,2837, 112,3648,2392, +1783, 362, 8,3433,3422, 610,2793,3277,1390,1284,1654, 21,3823, 734, 367, 623, + 193, 287, 374,1009,1483, 816, 476, 313,2255,2340,1262,2150,2899,1146,2581, 782, +2116,1659,2018,1880, 255,3586,3314,1110,2867,2137,2564, 986,2767,5185,2006, 650, + 158, 926, 762, 881,3157,2717,2362,3587, 306,3690,3245,1542,3077,2427,1691,2478, +2118,2985,3490,2438, 539,2305, 983, 129,1754, 355,4201,2386, 827,2923, 104,1773, +2838,2771, 411,2905,3919, 376, 767, 122,1114, 828,2422,1817,3506, 266,3460,1007, +1609,4998, 945,2612,4429,2274, 726,1247,1964,2914,2199,2070,4002,4108, 657,3323, +1422, 579, 455,2764,4737,1222,2895,1670, 824,1223,1487,2525, 558, 861,3080, 598, +2659,2515,1967, 752,2583,2376,2214,4180, 977, 704,2464,4999,2622,4109,1210,2961, + 819,1541, 142,2284, 44, 418, 457,1126,3730,4347,4626,1644,1876,3671,1864, 302, +1063,5694, 624, 723,1984,3745,1314,1676,2488,1610,1449,3558,3569,2166,2098, 409, +1011,2325,3704,2306, 818,1732,1383,1824,1844,3757, 999,2705,3497,1216,1423,2683, +2426,2954,2501,2726,2229,1475,2554,5064,1971,1794,1666,2014,1343, 783, 724, 191, +2434,1354,2220,5065,1763,2752,2472,4152, 131, 175,2885,3434, 92,1466,4920,2616, +3871,3872,3866, 128,1551,1632, 669,1854,3682,4691,4125,1230, 188,2973,3290,1302, +1213, 560,3266, 917, 763,3909,3249,1760, 868,1958, 764,1782,2097, 145,2277,3774, +4462, 64,1491,3062, 971,2132,3606,2442, 221,1226,1617, 218, 323,1185,3207,3147, + 571, 619,1473,1005,1744,2281, 449,1887,2396,3685, 275, 375,3816,1743,3844,3731, + 845,1983,2350,4210,1377, 773, 967,3499,3052,3743,2725,4007,1697,1022,3943,1464, +3264,2855,2722,1952,1029,2839,2467, 84,4383,2215, 820,1391,2015,2448,3672, 377, +1948,2168, 797,2545,3536,2578,2645, 94,2874,1678, 405,1259,3071, 771, 546,1315, + 470,1243,3083, 895,2468, 981, 969,2037, 846,4181, 653,1276,2928, 14,2594, 557, +3007,2474, 156, 902,1338,1740,2574, 537,2518, 973,2282,2216,2433,1928, 138,2903, +1293,2631,1612, 646,3457, 839,2935, 111, 496,2191,2847, 589,3186, 149,3994,2060, +4031,2641,4067,3145,1870, 37,3597,2136,1025,2051,3009,3383,3549,1121,1016,3261, +1301, 251,2446,2599,2153, 872,3246, 637, 334,3705, 831, 884, 921,3065,3140,4092, +2198,1944, 246,2964, 108,2045,1152,1921,2308,1031, 203,3173,4170,1907,3890, 810, +1401,2003,1690, 506, 647,1242,2828,1761,1649,3208,2249,1589,3709,2931,5156,1708, + 498, 666,2613, 834,3817,1231, 184,2851,1124, 883,3197,2261,3710,1765,1553,2658, +1178,2639,2351, 93,1193, 942,2538,2141,4402, 235,1821, 870,1591,2192,1709,1871, +3341,1618,4126,2595,2334, 603, 651, 69, 701, 268,2662,3411,2555,1380,1606, 503, + 448, 254,2371,2646, 574,1187,2309,1770, 322,2235,1292,1801, 305, 566,1133, 229, +2067,2057, 706, 167, 483,2002,2672,3295,1820,3561,3067, 316, 378,2746,3452,1112, + 136,1981, 507,1651,2917,1117, 285,4591, 182,2580,3522,1304, 335,3303,1835,2504, +1795,1792,2248, 674,1018,2106,2449,1857,2292,2845, 976,3047,1781,2600,2727,1389, +1281, 52,3152, 153, 265,3950, 672,3485,3951,4463, 430,1183, 365, 278,2169, 27, +1407,1336,2304, 209,1340,1730,2202,1852,2403,2883, 979,1737,1062, 631,2829,2542, +3876,2592, 825,2086,2226,3048,3625, 352,1417,3724, 542, 991, 431,1351,3938,1861, +2294, 826,1361,2927,3142,3503,1738, 463,2462,2723, 582,1916,1595,2808, 400,3845, +3891,2868,3621,2254, 58,2492,1123, 910,2160,2614,1372,1603,1196,1072,3385,1700, +3267,1980, 696, 480,2430, 920, 799,1570,2920,1951,2041,4047,2540,1321,4223,2469, +3562,2228,1271,2602, 401,2833,3351,2575,5157, 907,2312,1256, 410, 263,3507,1582, + 996, 678,1849,2316,1480, 908,3545,2237, 703,2322, 667,1826,2849,1531,2604,2999, +2407,3146,2151,2630,1786,3711, 469,3542, 497,3899,2409, 858, 837,4446,3393,1274, + 786, 620,1845,2001,3311, 484, 308,3367,1204,1815,3691,2332,1532,2557,1842,2020, +2724,1927,2333,4440, 567, 22,1673,2728,4475,1987,1858,1144,1597, 101,1832,3601, + 12, 974,3783,4391, 951,1412, 1,3720, 453,4608,4041, 528,1041,1027,3230,2628, +1129, 875,1051,3291,1203,2262,1069,2860,2799,2149,2615,3278, 144,1758,3040, 31, + 475,1680, 366,2685,3184, 311,1642,4008,2466,5036,1593,1493,2809, 216,1420,1668, + 233, 304,2128,3284, 232,1429,1768,1040,2008,3407,2740,2967,2543, 242,2133, 778, +1565,2022,2620, 505,2189,2756,1098,2273, 372,1614, 708, 553,2846,2094,2278, 169, +3626,2835,4161, 228,2674,3165, 809,1454,1309, 466,1705,1095, 900,3423, 880,2667, +3751,5258,2317,3109,2571,4317,2766,1503,1342, 866,4447,1118, 63,2076, 314,1881, +1348,1061, 172, 978,3515,1747, 532, 511,3970, 6, 601, 905,2699,3300,1751, 276, +1467,3725,2668, 65,4239,2544,2779,2556,1604, 578,2451,1802, 992,2331,2624,1320, +3446, 713,1513,1013, 103,2786,2447,1661, 886,1702, 916, 654,3574,2031,1556, 751, +2178,2821,2179,1498,1538,2176, 271, 914,2251,2080,1325, 638,1953,2937,3877,2432, +2754, 95,3265,1716, 260,1227,4083, 775, 106,1357,3254, 426,1607, 555,2480, 772, +1985, 244,2546, 474, 495,1046,2611,1851,2061, 71,2089,1675,2590, 742,3758,2843, +3222,1433, 267,2180,2576,2826,2233,2092,3913,2435, 956,1745,3075, 856,2113,1116, + 451, 3,1988,2896,1398, 993,2463,1878,2049,1341,2718,2721,2870,2108, 712,2904, +4363,2753,2324, 277,2872,2349,2649, 384, 987, 435, 691,3000, 922, 164,3939, 652, +1500,1184,4153,2482,3373,2165,4848,2335,3775,3508,3154,2806,2830,1554,2102,1664, +2530,1434,2408, 893,1547,2623,3447,2832,2242,2532,3169,2856,3223,2078, 49,3770, +3469, 462, 318, 656,2259,3250,3069, 679,1629,2758, 344,1138,1104,3120,1836,1283, +3115,2154,1437,4448, 934, 759,1999, 794,2862,1038, 533,2560,1722,2342, 855,2626, +1197,1663,4476,3127, 85,4240,2528, 25,1111,1181,3673, 407,3470,4561,2679,2713, + 768,1925,2841,3986,1544,1165, 932, 373,1240,2146,1930,2673, 721,4766, 354,4333, + 391,2963, 187, 61,3364,1442,1102, 330,1940,1767, 341,3809,4118, 393,2496,2062, +2211, 105, 331, 300, 439, 913,1332, 626, 379,3304,1557, 328, 689,3952, 309,1555, + 931, 317,2517,3027, 325, 569, 686,2107,3084, 60,1042,1333,2794, 264,3177,4014, +1628, 258,3712, 7,4464,1176,1043,1778, 683, 114,1975, 78,1492, 383,1886, 510, + 386, 645,5291,2891,2069,3305,4138,3867,2939,2603,2493,1935,1066,1848,3588,1015, +1282,1289,4609, 697,1453,3044,2666,3611,1856,2412, 54, 719,1330, 568,3778,2459, +1748, 788, 492, 551,1191,1000, 488,3394,3763, 282,1799, 348,2016,1523,3155,2390, +1049, 382,2019,1788,1170, 729,2968,3523, 897,3926,2785,2938,3292, 350,2319,3238, +1718,1717,2655,3453,3143,4465, 161,2889,2980,2009,1421, 56,1908,1640,2387,2232, +1917,1874,2477,4921, 148, 83,3438, 592,4245,2882,1822,1055, 741, 115,1496,1624, + 381,1638,4592,1020, 516,3214, 458, 947,4575,1432, 211,1514,2926,1865,2142, 189, + 852,1221,1400,1486, 882,2299,4036, 351, 28,1122, 700,6479,6480,6481,6482,6483, #last 512 +) + diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/gb2312prober.py b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/gb2312prober.py new file mode 100644 index 0000000..8446d2d --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/gb2312prober.py @@ -0,0 +1,46 @@ +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is mozilla.org code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +from .mbcharsetprober import MultiByteCharSetProber +from .codingstatemachine import CodingStateMachine +from .chardistribution import GB2312DistributionAnalysis +from .mbcssm import GB2312_SM_MODEL + +class GB2312Prober(MultiByteCharSetProber): + def __init__(self): + super(GB2312Prober, self).__init__() + self.coding_sm = CodingStateMachine(GB2312_SM_MODEL) + self.distribution_analyzer = GB2312DistributionAnalysis() + self.reset() + + @property + def charset_name(self): + return "GB2312" + + @property + def language(self): + return "Chinese" diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/hebrewprober.py b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/hebrewprober.py new file mode 100644 index 0000000..b0e1bf4 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/hebrewprober.py @@ -0,0 +1,292 @@ +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Universal charset detector code. +# +# The Initial Developer of the Original Code is +# Shy Shalom +# Portions created by the Initial Developer are Copyright (C) 2005 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +from .charsetprober import CharSetProber +from .enums import ProbingState + +# This prober doesn't actually recognize a language or a charset. +# It is a helper prober for the use of the Hebrew model probers + +### General ideas of the Hebrew charset recognition ### +# +# Four main charsets exist in Hebrew: +# "ISO-8859-8" - Visual Hebrew +# "windows-1255" - Logical Hebrew +# "ISO-8859-8-I" - Logical Hebrew +# "x-mac-hebrew" - ?? Logical Hebrew ?? +# +# Both "ISO" charsets use a completely identical set of code points, whereas +# "windows-1255" and "x-mac-hebrew" are two different proper supersets of +# these code points. windows-1255 defines additional characters in the range +# 0x80-0x9F as some misc punctuation marks as well as some Hebrew-specific +# diacritics and additional 'Yiddish' ligature letters in the range 0xc0-0xd6. +# x-mac-hebrew defines similar additional code points but with a different +# mapping. +# +# As far as an average Hebrew text with no diacritics is concerned, all four +# charsets are identical with respect to code points. Meaning that for the +# main Hebrew alphabet, all four map the same values to all 27 Hebrew letters +# (including final letters). +# +# The dominant difference between these charsets is their directionality. +# "Visual" directionality means that the text is ordered as if the renderer is +# not aware of a BIDI rendering algorithm. The renderer sees the text and +# draws it from left to right. The text itself when ordered naturally is read +# backwards. A buffer of Visual Hebrew generally looks like so: +# "[last word of first line spelled backwards] [whole line ordered backwards +# and spelled backwards] [first word of first line spelled backwards] +# [end of line] [last word of second line] ... etc' " +# adding punctuation marks, numbers and English text to visual text is +# naturally also "visual" and from left to right. +# +# "Logical" directionality means the text is ordered "naturally" according to +# the order it is read. It is the responsibility of the renderer to display +# the text from right to left. A BIDI algorithm is used to place general +# punctuation marks, numbers and English text in the text. +# +# Texts in x-mac-hebrew are almost impossible to find on the Internet. From +# what little evidence I could find, it seems that its general directionality +# is Logical. +# +# To sum up all of the above, the Hebrew probing mechanism knows about two +# charsets: +# Visual Hebrew - "ISO-8859-8" - backwards text - Words and sentences are +# backwards while line order is natural. For charset recognition purposes +# the line order is unimportant (In fact, for this implementation, even +# word order is unimportant). +# Logical Hebrew - "windows-1255" - normal, naturally ordered text. +# +# "ISO-8859-8-I" is a subset of windows-1255 and doesn't need to be +# specifically identified. +# "x-mac-hebrew" is also identified as windows-1255. A text in x-mac-hebrew +# that contain special punctuation marks or diacritics is displayed with +# some unconverted characters showing as question marks. This problem might +# be corrected using another model prober for x-mac-hebrew. Due to the fact +# that x-mac-hebrew texts are so rare, writing another model prober isn't +# worth the effort and performance hit. +# +#### The Prober #### +# +# The prober is divided between two SBCharSetProbers and a HebrewProber, +# all of which are managed, created, fed data, inquired and deleted by the +# SBCSGroupProber. The two SBCharSetProbers identify that the text is in +# fact some kind of Hebrew, Logical or Visual. The final decision about which +# one is it is made by the HebrewProber by combining final-letter scores +# with the scores of the two SBCharSetProbers to produce a final answer. +# +# The SBCSGroupProber is responsible for stripping the original text of HTML +# tags, English characters, numbers, low-ASCII punctuation characters, spaces +# and new lines. It reduces any sequence of such characters to a single space. +# The buffer fed to each prober in the SBCS group prober is pure text in +# high-ASCII. +# The two SBCharSetProbers (model probers) share the same language model: +# Win1255Model. +# The first SBCharSetProber uses the model normally as any other +# SBCharSetProber does, to recognize windows-1255, upon which this model was +# built. The second SBCharSetProber is told to make the pair-of-letter +# lookup in the language model backwards. This in practice exactly simulates +# a visual Hebrew model using the windows-1255 logical Hebrew model. +# +# The HebrewProber is not using any language model. All it does is look for +# final-letter evidence suggesting the text is either logical Hebrew or visual +# Hebrew. Disjointed from the model probers, the results of the HebrewProber +# alone are meaningless. HebrewProber always returns 0.00 as confidence +# since it never identifies a charset by itself. Instead, the pointer to the +# HebrewProber is passed to the model probers as a helper "Name Prober". +# When the Group prober receives a positive identification from any prober, +# it asks for the name of the charset identified. If the prober queried is a +# Hebrew model prober, the model prober forwards the call to the +# HebrewProber to make the final decision. In the HebrewProber, the +# decision is made according to the final-letters scores maintained and Both +# model probers scores. The answer is returned in the form of the name of the +# charset identified, either "windows-1255" or "ISO-8859-8". + +class HebrewProber(CharSetProber): + # windows-1255 / ISO-8859-8 code points of interest + FINAL_KAF = 0xea + NORMAL_KAF = 0xeb + FINAL_MEM = 0xed + NORMAL_MEM = 0xee + FINAL_NUN = 0xef + NORMAL_NUN = 0xf0 + FINAL_PE = 0xf3 + NORMAL_PE = 0xf4 + FINAL_TSADI = 0xf5 + NORMAL_TSADI = 0xf6 + + # Minimum Visual vs Logical final letter score difference. + # If the difference is below this, don't rely solely on the final letter score + # distance. + MIN_FINAL_CHAR_DISTANCE = 5 + + # Minimum Visual vs Logical model score difference. + # If the difference is below this, don't rely at all on the model score + # distance. + MIN_MODEL_DISTANCE = 0.01 + + VISUAL_HEBREW_NAME = "ISO-8859-8" + LOGICAL_HEBREW_NAME = "windows-1255" + + def __init__(self): + super(HebrewProber, self).__init__() + self._final_char_logical_score = None + self._final_char_visual_score = None + self._prev = None + self._before_prev = None + self._logical_prober = None + self._visual_prober = None + self.reset() + + def reset(self): + self._final_char_logical_score = 0 + self._final_char_visual_score = 0 + # The two last characters seen in the previous buffer, + # mPrev and mBeforePrev are initialized to space in order to simulate + # a word delimiter at the beginning of the data + self._prev = ' ' + self._before_prev = ' ' + # These probers are owned by the group prober. + + def set_model_probers(self, logicalProber, visualProber): + self._logical_prober = logicalProber + self._visual_prober = visualProber + + def is_final(self, c): + return c in [self.FINAL_KAF, self.FINAL_MEM, self.FINAL_NUN, + self.FINAL_PE, self.FINAL_TSADI] + + def is_non_final(self, c): + # The normal Tsadi is not a good Non-Final letter due to words like + # 'lechotet' (to chat) containing an apostrophe after the tsadi. This + # apostrophe is converted to a space in FilterWithoutEnglishLetters + # causing the Non-Final tsadi to appear at an end of a word even + # though this is not the case in the original text. + # The letters Pe and Kaf rarely display a related behavior of not being + # a good Non-Final letter. Words like 'Pop', 'Winamp' and 'Mubarak' + # for example legally end with a Non-Final Pe or Kaf. However, the + # benefit of these letters as Non-Final letters outweighs the damage + # since these words are quite rare. + return c in [self.NORMAL_KAF, self.NORMAL_MEM, + self.NORMAL_NUN, self.NORMAL_PE] + + def feed(self, byte_str): + # Final letter analysis for logical-visual decision. + # Look for evidence that the received buffer is either logical Hebrew + # or visual Hebrew. + # The following cases are checked: + # 1) A word longer than 1 letter, ending with a final letter. This is + # an indication that the text is laid out "naturally" since the + # final letter really appears at the end. +1 for logical score. + # 2) A word longer than 1 letter, ending with a Non-Final letter. In + # normal Hebrew, words ending with Kaf, Mem, Nun, Pe or Tsadi, + # should not end with the Non-Final form of that letter. Exceptions + # to this rule are mentioned above in isNonFinal(). This is an + # indication that the text is laid out backwards. +1 for visual + # score + # 3) A word longer than 1 letter, starting with a final letter. Final + # letters should not appear at the beginning of a word. This is an + # indication that the text is laid out backwards. +1 for visual + # score. + # + # The visual score and logical score are accumulated throughout the + # text and are finally checked against each other in GetCharSetName(). + # No checking for final letters in the middle of words is done since + # that case is not an indication for either Logical or Visual text. + # + # We automatically filter out all 7-bit characters (replace them with + # spaces) so the word boundary detection works properly. [MAP] + + if self.state == ProbingState.NOT_ME: + # Both model probers say it's not them. No reason to continue. + return ProbingState.NOT_ME + + byte_str = self.filter_high_byte_only(byte_str) + + for cur in byte_str: + if cur == ' ': + # We stand on a space - a word just ended + if self._before_prev != ' ': + # next-to-last char was not a space so self._prev is not a + # 1 letter word + if self.is_final(self._prev): + # case (1) [-2:not space][-1:final letter][cur:space] + self._final_char_logical_score += 1 + elif self.is_non_final(self._prev): + # case (2) [-2:not space][-1:Non-Final letter][ + # cur:space] + self._final_char_visual_score += 1 + else: + # Not standing on a space + if ((self._before_prev == ' ') and + (self.is_final(self._prev)) and (cur != ' ')): + # case (3) [-2:space][-1:final letter][cur:not space] + self._final_char_visual_score += 1 + self._before_prev = self._prev + self._prev = cur + + # Forever detecting, till the end or until both model probers return + # ProbingState.NOT_ME (handled above) + return ProbingState.DETECTING + + @property + def charset_name(self): + # Make the decision: is it Logical or Visual? + # If the final letter score distance is dominant enough, rely on it. + finalsub = self._final_char_logical_score - self._final_char_visual_score + if finalsub >= self.MIN_FINAL_CHAR_DISTANCE: + return self.LOGICAL_HEBREW_NAME + if finalsub <= -self.MIN_FINAL_CHAR_DISTANCE: + return self.VISUAL_HEBREW_NAME + + # It's not dominant enough, try to rely on the model scores instead. + modelsub = (self._logical_prober.get_confidence() + - self._visual_prober.get_confidence()) + if modelsub > self.MIN_MODEL_DISTANCE: + return self.LOGICAL_HEBREW_NAME + if modelsub < -self.MIN_MODEL_DISTANCE: + return self.VISUAL_HEBREW_NAME + + # Still no good, back to final letter distance, maybe it'll save the + # day. + if finalsub < 0.0: + return self.VISUAL_HEBREW_NAME + + # (finalsub > 0 - Logical) or (don't know what to do) default to + # Logical. + return self.LOGICAL_HEBREW_NAME + + @property + def language(self): + return 'Hebrew' + + @property + def state(self): + # Remain active as long as any of the model probers are active. + if (self._logical_prober.state == ProbingState.NOT_ME) and \ + (self._visual_prober.state == ProbingState.NOT_ME): + return ProbingState.NOT_ME + return ProbingState.DETECTING diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/jisfreq.py b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/jisfreq.py new file mode 100644 index 0000000..83fc082 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/jisfreq.py @@ -0,0 +1,325 @@ +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Communicator client code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +# Sampling from about 20M text materials include literature and computer technology +# +# Japanese frequency table, applied to both S-JIS and EUC-JP +# They are sorted in order. + +# 128 --> 0.77094 +# 256 --> 0.85710 +# 512 --> 0.92635 +# 1024 --> 0.97130 +# 2048 --> 0.99431 +# +# Ideal Distribution Ratio = 0.92635 / (1-0.92635) = 12.58 +# Random Distribution Ration = 512 / (2965+62+83+86-512) = 0.191 +# +# Typical Distribution Ratio, 25% of IDR + +JIS_TYPICAL_DISTRIBUTION_RATIO = 3.0 + +# Char to FreqOrder table , +JIS_TABLE_SIZE = 4368 + +JIS_CHAR_TO_FREQ_ORDER = ( + 40, 1, 6, 182, 152, 180, 295,2127, 285, 381,3295,4304,3068,4606,3165,3510, # 16 +3511,1822,2785,4607,1193,2226,5070,4608, 171,2996,1247, 18, 179,5071, 856,1661, # 32 +1262,5072, 619, 127,3431,3512,3230,1899,1700, 232, 228,1294,1298, 284, 283,2041, # 48 +2042,1061,1062, 48, 49, 44, 45, 433, 434,1040,1041, 996, 787,2997,1255,4305, # 64 +2108,4609,1684,1648,5073,5074,5075,5076,5077,5078,3687,5079,4610,5080,3927,3928, # 80 +5081,3296,3432, 290,2285,1471,2187,5082,2580,2825,1303,2140,1739,1445,2691,3375, # 96 +1691,3297,4306,4307,4611, 452,3376,1182,2713,3688,3069,4308,5083,5084,5085,5086, # 112 +5087,5088,5089,5090,5091,5092,5093,5094,5095,5096,5097,5098,5099,5100,5101,5102, # 128 +5103,5104,5105,5106,5107,5108,5109,5110,5111,5112,4097,5113,5114,5115,5116,5117, # 144 +5118,5119,5120,5121,5122,5123,5124,5125,5126,5127,5128,5129,5130,5131,5132,5133, # 160 +5134,5135,5136,5137,5138,5139,5140,5141,5142,5143,5144,5145,5146,5147,5148,5149, # 176 +5150,5151,5152,4612,5153,5154,5155,5156,5157,5158,5159,5160,5161,5162,5163,5164, # 192 +5165,5166,5167,5168,5169,5170,5171,5172,5173,5174,5175,1472, 598, 618, 820,1205, # 208 +1309,1412,1858,1307,1692,5176,5177,5178,5179,5180,5181,5182,1142,1452,1234,1172, # 224 +1875,2043,2149,1793,1382,2973, 925,2404,1067,1241, 960,1377,2935,1491, 919,1217, # 240 +1865,2030,1406,1499,2749,4098,5183,5184,5185,5186,5187,5188,2561,4099,3117,1804, # 256 +2049,3689,4309,3513,1663,5189,3166,3118,3298,1587,1561,3433,5190,3119,1625,2998, # 272 +3299,4613,1766,3690,2786,4614,5191,5192,5193,5194,2161, 26,3377, 2,3929, 20, # 288 +3691, 47,4100, 50, 17, 16, 35, 268, 27, 243, 42, 155, 24, 154, 29, 184, # 304 + 4, 91, 14, 92, 53, 396, 33, 289, 9, 37, 64, 620, 21, 39, 321, 5, # 320 + 12, 11, 52, 13, 3, 208, 138, 0, 7, 60, 526, 141, 151,1069, 181, 275, # 336 +1591, 83, 132,1475, 126, 331, 829, 15, 69, 160, 59, 22, 157, 55,1079, 312, # 352 + 109, 38, 23, 25, 10, 19, 79,5195, 61, 382,1124, 8, 30,5196,5197,5198, # 368 +5199,5200,5201,5202,5203,5204,5205,5206, 89, 62, 74, 34,2416, 112, 139, 196, # 384 + 271, 149, 84, 607, 131, 765, 46, 88, 153, 683, 76, 874, 101, 258, 57, 80, # 400 + 32, 364, 121,1508, 169,1547, 68, 235, 145,2999, 41, 360,3027, 70, 63, 31, # 416 + 43, 259, 262,1383, 99, 533, 194, 66, 93, 846, 217, 192, 56, 106, 58, 565, # 432 + 280, 272, 311, 256, 146, 82, 308, 71, 100, 128, 214, 655, 110, 261, 104,1140, # 448 + 54, 51, 36, 87, 67,3070, 185,2618,2936,2020, 28,1066,2390,2059,5207,5208, # 464 +5209,5210,5211,5212,5213,5214,5215,5216,4615,5217,5218,5219,5220,5221,5222,5223, # 480 +5224,5225,5226,5227,5228,5229,5230,5231,5232,5233,5234,5235,5236,3514,5237,5238, # 496 +5239,5240,5241,5242,5243,5244,2297,2031,4616,4310,3692,5245,3071,5246,3598,5247, # 512 +4617,3231,3515,5248,4101,4311,4618,3808,4312,4102,5249,4103,4104,3599,5250,5251, # 528 +5252,5253,5254,5255,5256,5257,5258,5259,5260,5261,5262,5263,5264,5265,5266,5267, # 544 +5268,5269,5270,5271,5272,5273,5274,5275,5276,5277,5278,5279,5280,5281,5282,5283, # 560 +5284,5285,5286,5287,5288,5289,5290,5291,5292,5293,5294,5295,5296,5297,5298,5299, # 576 +5300,5301,5302,5303,5304,5305,5306,5307,5308,5309,5310,5311,5312,5313,5314,5315, # 592 +5316,5317,5318,5319,5320,5321,5322,5323,5324,5325,5326,5327,5328,5329,5330,5331, # 608 +5332,5333,5334,5335,5336,5337,5338,5339,5340,5341,5342,5343,5344,5345,5346,5347, # 624 +5348,5349,5350,5351,5352,5353,5354,5355,5356,5357,5358,5359,5360,5361,5362,5363, # 640 +5364,5365,5366,5367,5368,5369,5370,5371,5372,5373,5374,5375,5376,5377,5378,5379, # 656 +5380,5381, 363, 642,2787,2878,2788,2789,2316,3232,2317,3434,2011, 165,1942,3930, # 672 +3931,3932,3933,5382,4619,5383,4620,5384,5385,5386,5387,5388,5389,5390,5391,5392, # 688 +5393,5394,5395,5396,5397,5398,5399,5400,5401,5402,5403,5404,5405,5406,5407,5408, # 704 +5409,5410,5411,5412,5413,5414,5415,5416,5417,5418,5419,5420,5421,5422,5423,5424, # 720 +5425,5426,5427,5428,5429,5430,5431,5432,5433,5434,5435,5436,5437,5438,5439,5440, # 736 +5441,5442,5443,5444,5445,5446,5447,5448,5449,5450,5451,5452,5453,5454,5455,5456, # 752 +5457,5458,5459,5460,5461,5462,5463,5464,5465,5466,5467,5468,5469,5470,5471,5472, # 768 +5473,5474,5475,5476,5477,5478,5479,5480,5481,5482,5483,5484,5485,5486,5487,5488, # 784 +5489,5490,5491,5492,5493,5494,5495,5496,5497,5498,5499,5500,5501,5502,5503,5504, # 800 +5505,5506,5507,5508,5509,5510,5511,5512,5513,5514,5515,5516,5517,5518,5519,5520, # 816 +5521,5522,5523,5524,5525,5526,5527,5528,5529,5530,5531,5532,5533,5534,5535,5536, # 832 +5537,5538,5539,5540,5541,5542,5543,5544,5545,5546,5547,5548,5549,5550,5551,5552, # 848 +5553,5554,5555,5556,5557,5558,5559,5560,5561,5562,5563,5564,5565,5566,5567,5568, # 864 +5569,5570,5571,5572,5573,5574,5575,5576,5577,5578,5579,5580,5581,5582,5583,5584, # 880 +5585,5586,5587,5588,5589,5590,5591,5592,5593,5594,5595,5596,5597,5598,5599,5600, # 896 +5601,5602,5603,5604,5605,5606,5607,5608,5609,5610,5611,5612,5613,5614,5615,5616, # 912 +5617,5618,5619,5620,5621,5622,5623,5624,5625,5626,5627,5628,5629,5630,5631,5632, # 928 +5633,5634,5635,5636,5637,5638,5639,5640,5641,5642,5643,5644,5645,5646,5647,5648, # 944 +5649,5650,5651,5652,5653,5654,5655,5656,5657,5658,5659,5660,5661,5662,5663,5664, # 960 +5665,5666,5667,5668,5669,5670,5671,5672,5673,5674,5675,5676,5677,5678,5679,5680, # 976 +5681,5682,5683,5684,5685,5686,5687,5688,5689,5690,5691,5692,5693,5694,5695,5696, # 992 +5697,5698,5699,5700,5701,5702,5703,5704,5705,5706,5707,5708,5709,5710,5711,5712, # 1008 +5713,5714,5715,5716,5717,5718,5719,5720,5721,5722,5723,5724,5725,5726,5727,5728, # 1024 +5729,5730,5731,5732,5733,5734,5735,5736,5737,5738,5739,5740,5741,5742,5743,5744, # 1040 +5745,5746,5747,5748,5749,5750,5751,5752,5753,5754,5755,5756,5757,5758,5759,5760, # 1056 +5761,5762,5763,5764,5765,5766,5767,5768,5769,5770,5771,5772,5773,5774,5775,5776, # 1072 +5777,5778,5779,5780,5781,5782,5783,5784,5785,5786,5787,5788,5789,5790,5791,5792, # 1088 +5793,5794,5795,5796,5797,5798,5799,5800,5801,5802,5803,5804,5805,5806,5807,5808, # 1104 +5809,5810,5811,5812,5813,5814,5815,5816,5817,5818,5819,5820,5821,5822,5823,5824, # 1120 +5825,5826,5827,5828,5829,5830,5831,5832,5833,5834,5835,5836,5837,5838,5839,5840, # 1136 +5841,5842,5843,5844,5845,5846,5847,5848,5849,5850,5851,5852,5853,5854,5855,5856, # 1152 +5857,5858,5859,5860,5861,5862,5863,5864,5865,5866,5867,5868,5869,5870,5871,5872, # 1168 +5873,5874,5875,5876,5877,5878,5879,5880,5881,5882,5883,5884,5885,5886,5887,5888, # 1184 +5889,5890,5891,5892,5893,5894,5895,5896,5897,5898,5899,5900,5901,5902,5903,5904, # 1200 +5905,5906,5907,5908,5909,5910,5911,5912,5913,5914,5915,5916,5917,5918,5919,5920, # 1216 +5921,5922,5923,5924,5925,5926,5927,5928,5929,5930,5931,5932,5933,5934,5935,5936, # 1232 +5937,5938,5939,5940,5941,5942,5943,5944,5945,5946,5947,5948,5949,5950,5951,5952, # 1248 +5953,5954,5955,5956,5957,5958,5959,5960,5961,5962,5963,5964,5965,5966,5967,5968, # 1264 +5969,5970,5971,5972,5973,5974,5975,5976,5977,5978,5979,5980,5981,5982,5983,5984, # 1280 +5985,5986,5987,5988,5989,5990,5991,5992,5993,5994,5995,5996,5997,5998,5999,6000, # 1296 +6001,6002,6003,6004,6005,6006,6007,6008,6009,6010,6011,6012,6013,6014,6015,6016, # 1312 +6017,6018,6019,6020,6021,6022,6023,6024,6025,6026,6027,6028,6029,6030,6031,6032, # 1328 +6033,6034,6035,6036,6037,6038,6039,6040,6041,6042,6043,6044,6045,6046,6047,6048, # 1344 +6049,6050,6051,6052,6053,6054,6055,6056,6057,6058,6059,6060,6061,6062,6063,6064, # 1360 +6065,6066,6067,6068,6069,6070,6071,6072,6073,6074,6075,6076,6077,6078,6079,6080, # 1376 +6081,6082,6083,6084,6085,6086,6087,6088,6089,6090,6091,6092,6093,6094,6095,6096, # 1392 +6097,6098,6099,6100,6101,6102,6103,6104,6105,6106,6107,6108,6109,6110,6111,6112, # 1408 +6113,6114,2044,2060,4621, 997,1235, 473,1186,4622, 920,3378,6115,6116, 379,1108, # 1424 +4313,2657,2735,3934,6117,3809, 636,3233, 573,1026,3693,3435,2974,3300,2298,4105, # 1440 + 854,2937,2463, 393,2581,2417, 539, 752,1280,2750,2480, 140,1161, 440, 708,1569, # 1456 + 665,2497,1746,1291,1523,3000, 164,1603, 847,1331, 537,1997, 486, 508,1693,2418, # 1472 +1970,2227, 878,1220, 299,1030, 969, 652,2751, 624,1137,3301,2619, 65,3302,2045, # 1488 +1761,1859,3120,1930,3694,3516, 663,1767, 852, 835,3695, 269, 767,2826,2339,1305, # 1504 + 896,1150, 770,1616,6118, 506,1502,2075,1012,2519, 775,2520,2975,2340,2938,4314, # 1520 +3028,2086,1224,1943,2286,6119,3072,4315,2240,1273,1987,3935,1557, 175, 597, 985, # 1536 +3517,2419,2521,1416,3029, 585, 938,1931,1007,1052,1932,1685,6120,3379,4316,4623, # 1552 + 804, 599,3121,1333,2128,2539,1159,1554,2032,3810, 687,2033,2904, 952, 675,1467, # 1568 +3436,6121,2241,1096,1786,2440,1543,1924, 980,1813,2228, 781,2692,1879, 728,1918, # 1584 +3696,4624, 548,1950,4625,1809,1088,1356,3303,2522,1944, 502, 972, 373, 513,2827, # 1600 + 586,2377,2391,1003,1976,1631,6122,2464,1084, 648,1776,4626,2141, 324, 962,2012, # 1616 +2177,2076,1384, 742,2178,1448,1173,1810, 222, 102, 301, 445, 125,2420, 662,2498, # 1632 + 277, 200,1476,1165,1068, 224,2562,1378,1446, 450,1880, 659, 791, 582,4627,2939, # 1648 +3936,1516,1274, 555,2099,3697,1020,1389,1526,3380,1762,1723,1787,2229, 412,2114, # 1664 +1900,2392,3518, 512,2597, 427,1925,2341,3122,1653,1686,2465,2499, 697, 330, 273, # 1680 + 380,2162, 951, 832, 780, 991,1301,3073, 965,2270,3519, 668,2523,2636,1286, 535, # 1696 +1407, 518, 671, 957,2658,2378, 267, 611,2197,3030,6123, 248,2299, 967,1799,2356, # 1712 + 850,1418,3437,1876,1256,1480,2828,1718,6124,6125,1755,1664,2405,6126,4628,2879, # 1728 +2829, 499,2179, 676,4629, 557,2329,2214,2090, 325,3234, 464, 811,3001, 992,2342, # 1744 +2481,1232,1469, 303,2242, 466,1070,2163, 603,1777,2091,4630,2752,4631,2714, 322, # 1760 +2659,1964,1768, 481,2188,1463,2330,2857,3600,2092,3031,2421,4632,2318,2070,1849, # 1776 +2598,4633,1302,2254,1668,1701,2422,3811,2905,3032,3123,2046,4106,1763,1694,4634, # 1792 +1604, 943,1724,1454, 917, 868,2215,1169,2940, 552,1145,1800,1228,1823,1955, 316, # 1808 +1080,2510, 361,1807,2830,4107,2660,3381,1346,1423,1134,4108,6127, 541,1263,1229, # 1824 +1148,2540, 545, 465,1833,2880,3438,1901,3074,2482, 816,3937, 713,1788,2500, 122, # 1840 +1575, 195,1451,2501,1111,6128, 859, 374,1225,2243,2483,4317, 390,1033,3439,3075, # 1856 +2524,1687, 266, 793,1440,2599, 946, 779, 802, 507, 897,1081, 528,2189,1292, 711, # 1872 +1866,1725,1167,1640, 753, 398,2661,1053, 246, 348,4318, 137,1024,3440,1600,2077, # 1888 +2129, 825,4319, 698, 238, 521, 187,2300,1157,2423,1641,1605,1464,1610,1097,2541, # 1904 +1260,1436, 759,2255,1814,2150, 705,3235, 409,2563,3304, 561,3033,2005,2564, 726, # 1920 +1956,2343,3698,4109, 949,3812,3813,3520,1669, 653,1379,2525, 881,2198, 632,2256, # 1936 +1027, 778,1074, 733,1957, 514,1481,2466, 554,2180, 702,3938,1606,1017,1398,6129, # 1952 +1380,3521, 921, 993,1313, 594, 449,1489,1617,1166, 768,1426,1360, 495,1794,3601, # 1968 +1177,3602,1170,4320,2344, 476, 425,3167,4635,3168,1424, 401,2662,1171,3382,1998, # 1984 +1089,4110, 477,3169, 474,6130,1909, 596,2831,1842, 494, 693,1051,1028,1207,3076, # 2000 + 606,2115, 727,2790,1473,1115, 743,3522, 630, 805,1532,4321,2021, 366,1057, 838, # 2016 + 684,1114,2142,4322,2050,1492,1892,1808,2271,3814,2424,1971,1447,1373,3305,1090, # 2032 +1536,3939,3523,3306,1455,2199, 336, 369,2331,1035, 584,2393, 902, 718,2600,6131, # 2048 +2753, 463,2151,1149,1611,2467, 715,1308,3124,1268, 343,1413,3236,1517,1347,2663, # 2064 +2093,3940,2022,1131,1553,2100,2941,1427,3441,2942,1323,2484,6132,1980, 872,2368, # 2080 +2441,2943, 320,2369,2116,1082, 679,1933,3941,2791,3815, 625,1143,2023, 422,2200, # 2096 +3816,6133, 730,1695, 356,2257,1626,2301,2858,2637,1627,1778, 937, 883,2906,2693, # 2112 +3002,1769,1086, 400,1063,1325,3307,2792,4111,3077, 456,2345,1046, 747,6134,1524, # 2128 + 884,1094,3383,1474,2164,1059, 974,1688,2181,2258,1047, 345,1665,1187, 358, 875, # 2144 +3170, 305, 660,3524,2190,1334,1135,3171,1540,1649,2542,1527, 927, 968,2793, 885, # 2160 +1972,1850, 482, 500,2638,1218,1109,1085,2543,1654,2034, 876, 78,2287,1482,1277, # 2176 + 861,1675,1083,1779, 724,2754, 454, 397,1132,1612,2332, 893, 672,1237, 257,2259, # 2192 +2370, 135,3384, 337,2244, 547, 352, 340, 709,2485,1400, 788,1138,2511, 540, 772, # 2208 +1682,2260,2272,2544,2013,1843,1902,4636,1999,1562,2288,4637,2201,1403,1533, 407, # 2224 + 576,3308,1254,2071, 978,3385, 170, 136,1201,3125,2664,3172,2394, 213, 912, 873, # 2240 +3603,1713,2202, 699,3604,3699, 813,3442, 493, 531,1054, 468,2907,1483, 304, 281, # 2256 +4112,1726,1252,2094, 339,2319,2130,2639, 756,1563,2944, 748, 571,2976,1588,2425, # 2272 +2715,1851,1460,2426,1528,1392,1973,3237, 288,3309, 685,3386, 296, 892,2716,2216, # 2288 +1570,2245, 722,1747,2217, 905,3238,1103,6135,1893,1441,1965, 251,1805,2371,3700, # 2304 +2601,1919,1078, 75,2182,1509,1592,1270,2640,4638,2152,6136,3310,3817, 524, 706, # 2320 +1075, 292,3818,1756,2602, 317, 98,3173,3605,3525,1844,2218,3819,2502, 814, 567, # 2336 + 385,2908,1534,6137, 534,1642,3239, 797,6138,1670,1529, 953,4323, 188,1071, 538, # 2352 + 178, 729,3240,2109,1226,1374,2000,2357,2977, 731,2468,1116,2014,2051,6139,1261, # 2368 +1593, 803,2859,2736,3443, 556, 682, 823,1541,6140,1369,2289,1706,2794, 845, 462, # 2384 +2603,2665,1361, 387, 162,2358,1740, 739,1770,1720,1304,1401,3241,1049, 627,1571, # 2400 +2427,3526,1877,3942,1852,1500, 431,1910,1503, 677, 297,2795, 286,1433,1038,1198, # 2416 +2290,1133,1596,4113,4639,2469,1510,1484,3943,6141,2442, 108, 712,4640,2372, 866, # 2432 +3701,2755,3242,1348, 834,1945,1408,3527,2395,3243,1811, 824, 994,1179,2110,1548, # 2448 +1453, 790,3003, 690,4324,4325,2832,2909,3820,1860,3821, 225,1748, 310, 346,1780, # 2464 +2470, 821,1993,2717,2796, 828, 877,3528,2860,2471,1702,2165,2910,2486,1789, 453, # 2480 + 359,2291,1676, 73,1164,1461,1127,3311, 421, 604, 314,1037, 589, 116,2487, 737, # 2496 + 837,1180, 111, 244, 735,6142,2261,1861,1362, 986, 523, 418, 581,2666,3822, 103, # 2512 + 855, 503,1414,1867,2488,1091, 657,1597, 979, 605,1316,4641,1021,2443,2078,2001, # 2528 +1209, 96, 587,2166,1032, 260,1072,2153, 173, 94, 226,3244, 819,2006,4642,4114, # 2544 +2203, 231,1744, 782, 97,2667, 786,3387, 887, 391, 442,2219,4326,1425,6143,2694, # 2560 + 633,1544,1202, 483,2015, 592,2052,1958,2472,1655, 419, 129,4327,3444,3312,1714, # 2576 +1257,3078,4328,1518,1098, 865,1310,1019,1885,1512,1734, 469,2444, 148, 773, 436, # 2592 +1815,1868,1128,1055,4329,1245,2756,3445,2154,1934,1039,4643, 579,1238, 932,2320, # 2608 + 353, 205, 801, 115,2428, 944,2321,1881, 399,2565,1211, 678, 766,3944, 335,2101, # 2624 +1459,1781,1402,3945,2737,2131,1010, 844, 981,1326,1013, 550,1816,1545,2620,1335, # 2640 +1008, 371,2881, 936,1419,1613,3529,1456,1395,2273,1834,2604,1317,2738,2503, 416, # 2656 +1643,4330, 806,1126, 229, 591,3946,1314,1981,1576,1837,1666, 347,1790, 977,3313, # 2672 + 764,2861,1853, 688,2429,1920,1462, 77, 595, 415,2002,3034, 798,1192,4115,6144, # 2688 +2978,4331,3035,2695,2582,2072,2566, 430,2430,1727, 842,1396,3947,3702, 613, 377, # 2704 + 278, 236,1417,3388,3314,3174, 757,1869, 107,3530,6145,1194, 623,2262, 207,1253, # 2720 +2167,3446,3948, 492,1117,1935, 536,1838,2757,1246,4332, 696,2095,2406,1393,1572, # 2736 +3175,1782, 583, 190, 253,1390,2230, 830,3126,3389, 934,3245,1703,1749,2979,1870, # 2752 +2545,1656,2204, 869,2346,4116,3176,1817, 496,1764,4644, 942,1504, 404,1903,1122, # 2768 +1580,3606,2945,1022, 515, 372,1735, 955,2431,3036,6146,2797,1110,2302,2798, 617, # 2784 +6147, 441, 762,1771,3447,3607,3608,1904, 840,3037, 86, 939,1385, 572,1370,2445, # 2800 +1336, 114,3703, 898, 294, 203,3315, 703,1583,2274, 429, 961,4333,1854,1951,3390, # 2816 +2373,3704,4334,1318,1381, 966,1911,2322,1006,1155, 309, 989, 458,2718,1795,1372, # 2832 +1203, 252,1689,1363,3177, 517,1936, 168,1490, 562, 193,3823,1042,4117,1835, 551, # 2848 + 470,4645, 395, 489,3448,1871,1465,2583,2641, 417,1493, 279,1295, 511,1236,1119, # 2864 + 72,1231,1982,1812,3004, 871,1564, 984,3449,1667,2696,2096,4646,2347,2833,1673, # 2880 +3609, 695,3246,2668, 807,1183,4647, 890, 388,2333,1801,1457,2911,1765,1477,1031, # 2896 +3316,3317,1278,3391,2799,2292,2526, 163,3450,4335,2669,1404,1802,6148,2323,2407, # 2912 +1584,1728,1494,1824,1269, 298, 909,3318,1034,1632, 375, 776,1683,2061, 291, 210, # 2928 +1123, 809,1249,1002,2642,3038, 206,1011,2132, 144, 975, 882,1565, 342, 667, 754, # 2944 +1442,2143,1299,2303,2062, 447, 626,2205,1221,2739,2912,1144,1214,2206,2584, 760, # 2960 +1715, 614, 950,1281,2670,2621, 810, 577,1287,2546,4648, 242,2168, 250,2643, 691, # 2976 + 123,2644, 647, 313,1029, 689,1357,2946,1650, 216, 771,1339,1306, 808,2063, 549, # 2992 + 913,1371,2913,2914,6149,1466,1092,1174,1196,1311,2605,2396,1783,1796,3079, 406, # 3008 +2671,2117,3949,4649, 487,1825,2220,6150,2915, 448,2348,1073,6151,2397,1707, 130, # 3024 + 900,1598, 329, 176,1959,2527,1620,6152,2275,4336,3319,1983,2191,3705,3610,2155, # 3040 +3706,1912,1513,1614,6153,1988, 646, 392,2304,1589,3320,3039,1826,1239,1352,1340, # 3056 +2916, 505,2567,1709,1437,2408,2547, 906,6154,2672, 384,1458,1594,1100,1329, 710, # 3072 + 423,3531,2064,2231,2622,1989,2673,1087,1882, 333, 841,3005,1296,2882,2379, 580, # 3088 +1937,1827,1293,2585, 601, 574, 249,1772,4118,2079,1120, 645, 901,1176,1690, 795, # 3104 +2207, 478,1434, 516,1190,1530, 761,2080, 930,1264, 355, 435,1552, 644,1791, 987, # 3120 + 220,1364,1163,1121,1538, 306,2169,1327,1222, 546,2645, 218, 241, 610,1704,3321, # 3136 +1984,1839,1966,2528, 451,6155,2586,3707,2568, 907,3178, 254,2947, 186,1845,4650, # 3152 + 745, 432,1757, 428,1633, 888,2246,2221,2489,3611,2118,1258,1265, 956,3127,1784, # 3168 +4337,2490, 319, 510, 119, 457,3612, 274,2035,2007,4651,1409,3128, 970,2758, 590, # 3184 +2800, 661,2247,4652,2008,3950,1420,1549,3080,3322,3951,1651,1375,2111, 485,2491, # 3200 +1429,1156,6156,2548,2183,1495, 831,1840,2529,2446, 501,1657, 307,1894,3247,1341, # 3216 + 666, 899,2156,1539,2549,1559, 886, 349,2208,3081,2305,1736,3824,2170,2759,1014, # 3232 +1913,1386, 542,1397,2948, 490, 368, 716, 362, 159, 282,2569,1129,1658,1288,1750, # 3248 +2674, 276, 649,2016, 751,1496, 658,1818,1284,1862,2209,2087,2512,3451, 622,2834, # 3264 + 376, 117,1060,2053,1208,1721,1101,1443, 247,1250,3179,1792,3952,2760,2398,3953, # 3280 +6157,2144,3708, 446,2432,1151,2570,3452,2447,2761,2835,1210,2448,3082, 424,2222, # 3296 +1251,2449,2119,2836, 504,1581,4338, 602, 817, 857,3825,2349,2306, 357,3826,1470, # 3312 +1883,2883, 255, 958, 929,2917,3248, 302,4653,1050,1271,1751,2307,1952,1430,2697, # 3328 +2719,2359, 354,3180, 777, 158,2036,4339,1659,4340,4654,2308,2949,2248,1146,2232, # 3344 +3532,2720,1696,2623,3827,6158,3129,1550,2698,1485,1297,1428, 637, 931,2721,2145, # 3360 + 914,2550,2587, 81,2450, 612, 827,2646,1242,4655,1118,2884, 472,1855,3181,3533, # 3376 +3534, 569,1353,2699,1244,1758,2588,4119,2009,2762,2171,3709,1312,1531,6159,1152, # 3392 +1938, 134,1830, 471,3710,2276,1112,1535,3323,3453,3535, 982,1337,2950, 488, 826, # 3408 + 674,1058,1628,4120,2017, 522,2399, 211, 568,1367,3454, 350, 293,1872,1139,3249, # 3424 +1399,1946,3006,1300,2360,3324, 588, 736,6160,2606, 744, 669,3536,3828,6161,1358, # 3440 + 199, 723, 848, 933, 851,1939,1505,1514,1338,1618,1831,4656,1634,3613, 443,2740, # 3456 +3829, 717,1947, 491,1914,6162,2551,1542,4121,1025,6163,1099,1223, 198,3040,2722, # 3472 + 370, 410,1905,2589, 998,1248,3182,2380, 519,1449,4122,1710, 947, 928,1153,4341, # 3488 +2277, 344,2624,1511, 615, 105, 161,1212,1076,1960,3130,2054,1926,1175,1906,2473, # 3504 + 414,1873,2801,6164,2309, 315,1319,3325, 318,2018,2146,2157, 963, 631, 223,4342, # 3520 +4343,2675, 479,3711,1197,2625,3712,2676,2361,6165,4344,4123,6166,2451,3183,1886, # 3536 +2184,1674,1330,1711,1635,1506, 799, 219,3250,3083,3954,1677,3713,3326,2081,3614, # 3552 +1652,2073,4657,1147,3041,1752, 643,1961, 147,1974,3955,6167,1716,2037, 918,3007, # 3568 +1994, 120,1537, 118, 609,3184,4345, 740,3455,1219, 332,1615,3830,6168,1621,2980, # 3584 +1582, 783, 212, 553,2350,3714,1349,2433,2082,4124, 889,6169,2310,1275,1410, 973, # 3600 + 166,1320,3456,1797,1215,3185,2885,1846,2590,2763,4658, 629, 822,3008, 763, 940, # 3616 +1990,2862, 439,2409,1566,1240,1622, 926,1282,1907,2764, 654,2210,1607, 327,1130, # 3632 +3956,1678,1623,6170,2434,2192, 686, 608,3831,3715, 903,3957,3042,6171,2741,1522, # 3648 +1915,1105,1555,2552,1359, 323,3251,4346,3457, 738,1354,2553,2311,2334,1828,2003, # 3664 +3832,1753,2351,1227,6172,1887,4125,1478,6173,2410,1874,1712,1847, 520,1204,2607, # 3680 + 264,4659, 836,2677,2102, 600,4660,3833,2278,3084,6174,4347,3615,1342, 640, 532, # 3696 + 543,2608,1888,2400,2591,1009,4348,1497, 341,1737,3616,2723,1394, 529,3252,1321, # 3712 + 983,4661,1515,2120, 971,2592, 924, 287,1662,3186,4349,2700,4350,1519, 908,1948, # 3728 +2452, 156, 796,1629,1486,2223,2055, 694,4126,1259,1036,3392,1213,2249,2742,1889, # 3744 +1230,3958,1015, 910, 408, 559,3617,4662, 746, 725, 935,4663,3959,3009,1289, 563, # 3760 + 867,4664,3960,1567,2981,2038,2626, 988,2263,2381,4351, 143,2374, 704,1895,6175, # 3776 +1188,3716,2088, 673,3085,2362,4352, 484,1608,1921,2765,2918, 215, 904,3618,3537, # 3792 + 894, 509, 976,3043,2701,3961,4353,2837,2982, 498,6176,6177,1102,3538,1332,3393, # 3808 +1487,1636,1637, 233, 245,3962, 383, 650, 995,3044, 460,1520,1206,2352, 749,3327, # 3824 + 530, 700, 389,1438,1560,1773,3963,2264, 719,2951,2724,3834, 870,1832,1644,1000, # 3840 + 839,2474,3717, 197,1630,3394, 365,2886,3964,1285,2133, 734, 922, 818,1106, 732, # 3856 + 480,2083,1774,3458, 923,2279,1350, 221,3086, 85,2233,2234,3835,1585,3010,2147, # 3872 +1387,1705,2382,1619,2475, 133, 239,2802,1991,1016,2084,2383, 411,2838,1113, 651, # 3888 +1985,1160,3328, 990,1863,3087,1048,1276,2647, 265,2627,1599,3253,2056, 150, 638, # 3904 +2019, 656, 853, 326,1479, 680,1439,4354,1001,1759, 413,3459,3395,2492,1431, 459, # 3920 +4355,1125,3329,2265,1953,1450,2065,2863, 849, 351,2678,3131,3254,3255,1104,1577, # 3936 + 227,1351,1645,2453,2193,1421,2887, 812,2121, 634, 95,2435, 201,2312,4665,1646, # 3952 +1671,2743,1601,2554,2702,2648,2280,1315,1366,2089,3132,1573,3718,3965,1729,1189, # 3968 + 328,2679,1077,1940,1136, 558,1283, 964,1195, 621,2074,1199,1743,3460,3619,1896, # 3984 +1916,1890,3836,2952,1154,2112,1064, 862, 378,3011,2066,2113,2803,1568,2839,6178, # 4000 +3088,2919,1941,1660,2004,1992,2194, 142, 707,1590,1708,1624,1922,1023,1836,1233, # 4016 +1004,2313, 789, 741,3620,6179,1609,2411,1200,4127,3719,3720,4666,2057,3721, 593, # 4032 +2840, 367,2920,1878,6180,3461,1521, 628,1168, 692,2211,2649, 300, 720,2067,2571, # 4048 +2953,3396, 959,2504,3966,3539,3462,1977, 701,6181, 954,1043, 800, 681, 183,3722, # 4064 +1803,1730,3540,4128,2103, 815,2314, 174, 467, 230,2454,1093,2134, 755,3541,3397, # 4080 +1141,1162,6182,1738,2039, 270,3256,2513,1005,1647,2185,3837, 858,1679,1897,1719, # 4096 +2954,2324,1806, 402, 670, 167,4129,1498,2158,2104, 750,6183, 915, 189,1680,1551, # 4112 + 455,4356,1501,2455, 405,1095,2955, 338,1586,1266,1819, 570, 641,1324, 237,1556, # 4128 +2650,1388,3723,6184,1368,2384,1343,1978,3089,2436, 879,3724, 792,1191, 758,3012, # 4144 +1411,2135,1322,4357, 240,4667,1848,3725,1574,6185, 420,3045,1546,1391, 714,4358, # 4160 +1967, 941,1864, 863, 664, 426, 560,1731,2680,1785,2864,1949,2363, 403,3330,1415, # 4176 +1279,2136,1697,2335, 204, 721,2097,3838, 90,6186,2085,2505, 191,3967, 124,2148, # 4192 +1376,1798,1178,1107,1898,1405, 860,4359,1243,1272,2375,2983,1558,2456,1638, 113, # 4208 +3621, 578,1923,2609, 880, 386,4130, 784,2186,2266,1422,2956,2172,1722, 497, 263, # 4224 +2514,1267,2412,2610, 177,2703,3542, 774,1927,1344, 616,1432,1595,1018, 172,4360, # 4240 +2325, 911,4361, 438,1468,3622, 794,3968,2024,2173,1681,1829,2957, 945, 895,3090, # 4256 + 575,2212,2476, 475,2401,2681, 785,2744,1745,2293,2555,1975,3133,2865, 394,4668, # 4272 +3839, 635,4131, 639, 202,1507,2195,2766,1345,1435,2572,3726,1908,1184,1181,2457, # 4288 +3727,3134,4362, 843,2611, 437, 916,4669, 234, 769,1884,3046,3047,3623, 833,6187, # 4304 +1639,2250,2402,1355,1185,2010,2047, 999, 525,1732,1290,1488,2612, 948,1578,3728, # 4320 +2413,2477,1216,2725,2159, 334,3840,1328,3624,2921,1525,4132, 564,1056, 891,4363, # 4336 +1444,1698,2385,2251,3729,1365,2281,2235,1717,6188, 864,3841,2515, 444, 527,2767, # 4352 +2922,3625, 544, 461,6189, 566, 209,2437,3398,2098,1065,2068,3331,3626,3257,2137, # 4368 #last 512 +) + + diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/jpcntx.py b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/jpcntx.py new file mode 100644 index 0000000..20044e4 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/jpcntx.py @@ -0,0 +1,233 @@ +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Communicator client code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + + +# This is hiragana 2-char sequence table, the number in each cell represents its frequency category +jp2CharContext = ( +(0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1), +(2,4,0,4,0,3,0,4,0,3,4,4,4,2,4,3,3,4,3,2,3,3,4,2,3,3,3,2,4,1,4,3,3,1,5,4,3,4,3,4,3,5,3,0,3,5,4,2,0,3,1,0,3,3,0,3,3,0,1,1,0,4,3,0,3,3,0,4,0,2,0,3,5,5,5,5,4,0,4,1,0,3,4), +(0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2), +(0,4,0,5,0,5,0,4,0,4,5,4,4,3,5,3,5,1,5,3,4,3,4,4,3,4,3,3,4,3,5,4,4,3,5,5,3,5,5,5,3,5,5,3,4,5,5,3,1,3,2,0,3,4,0,4,2,0,4,2,1,5,3,2,3,5,0,4,0,2,0,5,4,4,5,4,5,0,4,0,0,4,4), +(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0), +(0,3,0,4,0,3,0,3,0,4,5,4,3,3,3,3,4,3,5,4,4,3,5,4,4,3,4,3,4,4,4,4,5,3,4,4,3,4,5,5,4,5,5,1,4,5,4,3,0,3,3,1,3,3,0,4,4,0,3,3,1,5,3,3,3,5,0,4,0,3,0,4,4,3,4,3,3,0,4,1,1,3,4), +(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0), +(0,4,0,3,0,3,0,4,0,3,4,4,3,2,2,1,2,1,3,1,3,3,3,3,3,4,3,1,3,3,5,3,3,0,4,3,0,5,4,3,3,5,4,4,3,4,4,5,0,1,2,0,1,2,0,2,2,0,1,0,0,5,2,2,1,4,0,3,0,1,0,4,4,3,5,4,3,0,2,1,0,4,3), +(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0), +(0,3,0,5,0,4,0,2,1,4,4,2,4,1,4,2,4,2,4,3,3,3,4,3,3,3,3,1,4,2,3,3,3,1,4,4,1,1,1,4,3,3,2,0,2,4,3,2,0,3,3,0,3,1,1,0,0,0,3,3,0,4,2,2,3,4,0,4,0,3,0,4,4,5,3,4,4,0,3,0,0,1,4), +(1,4,0,4,0,4,0,4,0,3,5,4,4,3,4,3,5,4,3,3,4,3,5,4,4,4,4,3,4,2,4,3,3,1,5,4,3,2,4,5,4,5,5,4,4,5,4,4,0,3,2,2,3,3,0,4,3,1,3,2,1,4,3,3,4,5,0,3,0,2,0,4,5,5,4,5,4,0,4,0,0,5,4), +(0,5,0,5,0,4,0,3,0,4,4,3,4,3,3,3,4,0,4,4,4,3,4,3,4,3,3,1,4,2,4,3,4,0,5,4,1,4,5,4,4,5,3,2,4,3,4,3,2,4,1,3,3,3,2,3,2,0,4,3,3,4,3,3,3,4,0,4,0,3,0,4,5,4,4,4,3,0,4,1,0,1,3), +(0,3,1,4,0,3,0,2,0,3,4,4,3,1,4,2,3,3,4,3,4,3,4,3,4,4,3,2,3,1,5,4,4,1,4,4,3,5,4,4,3,5,5,4,3,4,4,3,1,2,3,1,2,2,0,3,2,0,3,1,0,5,3,3,3,4,3,3,3,3,4,4,4,4,5,4,2,0,3,3,2,4,3), +(0,2,0,3,0,1,0,1,0,0,3,2,0,0,2,0,1,0,2,1,3,3,3,1,2,3,1,0,1,0,4,2,1,1,3,3,0,4,3,3,1,4,3,3,0,3,3,2,0,0,0,0,1,0,0,2,0,0,0,0,0,4,1,0,2,3,2,2,2,1,3,3,3,4,4,3,2,0,3,1,0,3,3), +(0,4,0,4,0,3,0,3,0,4,4,4,3,3,3,3,3,3,4,3,4,2,4,3,4,3,3,2,4,3,4,5,4,1,4,5,3,5,4,5,3,5,4,0,3,5,5,3,1,3,3,2,2,3,0,3,4,1,3,3,2,4,3,3,3,4,0,4,0,3,0,4,5,4,4,5,3,0,4,1,0,3,4), +(0,2,0,3,0,3,0,0,0,2,2,2,1,0,1,0,0,0,3,0,3,0,3,0,1,3,1,0,3,1,3,3,3,1,3,3,3,0,1,3,1,3,4,0,0,3,1,1,0,3,2,0,0,0,0,1,3,0,1,0,0,3,3,2,0,3,0,0,0,0,0,3,4,3,4,3,3,0,3,0,0,2,3), +(2,3,0,3,0,2,0,1,0,3,3,4,3,1,3,1,1,1,3,1,4,3,4,3,3,3,0,0,3,1,5,4,3,1,4,3,2,5,5,4,4,4,4,3,3,4,4,4,0,2,1,1,3,2,0,1,2,0,0,1,0,4,1,3,3,3,0,3,0,1,0,4,4,4,5,5,3,0,2,0,0,4,4), +(0,2,0,1,0,3,1,3,0,2,3,3,3,0,3,1,0,0,3,0,3,2,3,1,3,2,1,1,0,0,4,2,1,0,2,3,1,4,3,2,0,4,4,3,1,3,1,3,0,1,0,0,1,0,0,0,1,0,0,0,0,4,1,1,1,2,0,3,0,0,0,3,4,2,4,3,2,0,1,0,0,3,3), +(0,1,0,4,0,5,0,4,0,2,4,4,2,3,3,2,3,3,5,3,3,3,4,3,4,2,3,0,4,3,3,3,4,1,4,3,2,1,5,5,3,4,5,1,3,5,4,2,0,3,3,0,1,3,0,4,2,0,1,3,1,4,3,3,3,3,0,3,0,1,0,3,4,4,4,5,5,0,3,0,1,4,5), +(0,2,0,3,0,3,0,0,0,2,3,1,3,0,4,0,1,1,3,0,3,4,3,2,3,1,0,3,3,2,3,1,3,0,2,3,0,2,1,4,1,2,2,0,0,3,3,0,0,2,0,0,0,1,0,0,0,0,2,2,0,3,2,1,3,3,0,2,0,2,0,0,3,3,1,2,4,0,3,0,2,2,3), +(2,4,0,5,0,4,0,4,0,2,4,4,4,3,4,3,3,3,1,2,4,3,4,3,4,4,5,0,3,3,3,3,2,0,4,3,1,4,3,4,1,4,4,3,3,4,4,3,1,2,3,0,4,2,0,4,1,0,3,3,0,4,3,3,3,4,0,4,0,2,0,3,5,3,4,5,2,0,3,0,0,4,5), +(0,3,0,4,0,1,0,1,0,1,3,2,2,1,3,0,3,0,2,0,2,0,3,0,2,0,0,0,1,0,1,1,0,0,3,1,0,0,0,4,0,3,1,0,2,1,3,0,0,0,0,0,0,3,0,0,0,0,0,0,0,4,2,2,3,1,0,3,0,0,0,1,4,4,4,3,0,0,4,0,0,1,4), +(1,4,1,5,0,3,0,3,0,4,5,4,4,3,5,3,3,4,4,3,4,1,3,3,3,3,2,1,4,1,5,4,3,1,4,4,3,5,4,4,3,5,4,3,3,4,4,4,0,3,3,1,2,3,0,3,1,0,3,3,0,5,4,4,4,4,4,4,3,3,5,4,4,3,3,5,4,0,3,2,0,4,4), +(0,2,0,3,0,1,0,0,0,1,3,3,3,2,4,1,3,0,3,1,3,0,2,2,1,1,0,0,2,0,4,3,1,0,4,3,0,4,4,4,1,4,3,1,1,3,3,1,0,2,0,0,1,3,0,0,0,0,2,0,0,4,3,2,4,3,5,4,3,3,3,4,3,3,4,3,3,0,2,1,0,3,3), +(0,2,0,4,0,3,0,2,0,2,5,5,3,4,4,4,4,1,4,3,3,0,4,3,4,3,1,3,3,2,4,3,0,3,4,3,0,3,4,4,2,4,4,0,4,5,3,3,2,2,1,1,1,2,0,1,5,0,3,3,2,4,3,3,3,4,0,3,0,2,0,4,4,3,5,5,0,0,3,0,2,3,3), +(0,3,0,4,0,3,0,1,0,3,4,3,3,1,3,3,3,0,3,1,3,0,4,3,3,1,1,0,3,0,3,3,0,0,4,4,0,1,5,4,3,3,5,0,3,3,4,3,0,2,0,1,1,1,0,1,3,0,1,2,1,3,3,2,3,3,0,3,0,1,0,1,3,3,4,4,1,0,1,2,2,1,3), +(0,1,0,4,0,4,0,3,0,1,3,3,3,2,3,1,1,0,3,0,3,3,4,3,2,4,2,0,1,0,4,3,2,0,4,3,0,5,3,3,2,4,4,4,3,3,3,4,0,1,3,0,0,1,0,0,1,0,0,0,0,4,2,3,3,3,0,3,0,0,0,4,4,4,5,3,2,0,3,3,0,3,5), +(0,2,0,3,0,0,0,3,0,1,3,0,2,0,0,0,1,0,3,1,1,3,3,0,0,3,0,0,3,0,2,3,1,0,3,1,0,3,3,2,0,4,2,2,0,2,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,2,1,2,0,1,0,1,0,0,0,1,3,1,2,0,0,0,1,0,0,1,4), +(0,3,0,3,0,5,0,1,0,2,4,3,1,3,3,2,1,1,5,2,1,0,5,1,2,0,0,0,3,3,2,2,3,2,4,3,0,0,3,3,1,3,3,0,2,5,3,4,0,3,3,0,1,2,0,2,2,0,3,2,0,2,2,3,3,3,0,2,0,1,0,3,4,4,2,5,4,0,3,0,0,3,5), +(0,3,0,3,0,3,0,1,0,3,3,3,3,0,3,0,2,0,2,1,1,0,2,0,1,0,0,0,2,1,0,0,1,0,3,2,0,0,3,3,1,2,3,1,0,3,3,0,0,1,0,0,0,0,0,2,0,0,0,0,0,2,3,1,2,3,0,3,0,1,0,3,2,1,0,4,3,0,1,1,0,3,3), +(0,4,0,5,0,3,0,3,0,4,5,5,4,3,5,3,4,3,5,3,3,2,5,3,4,4,4,3,4,3,4,5,5,3,4,4,3,4,4,5,4,4,4,3,4,5,5,4,2,3,4,2,3,4,0,3,3,1,4,3,2,4,3,3,5,5,0,3,0,3,0,5,5,5,5,4,4,0,4,0,1,4,4), +(0,4,0,4,0,3,0,3,0,3,5,4,4,2,3,2,5,1,3,2,5,1,4,2,3,2,3,3,4,3,3,3,3,2,5,4,1,3,3,5,3,4,4,0,4,4,3,1,1,3,1,0,2,3,0,2,3,0,3,0,0,4,3,1,3,4,0,3,0,2,0,4,4,4,3,4,5,0,4,0,0,3,4), +(0,3,0,3,0,3,1,2,0,3,4,4,3,3,3,0,2,2,4,3,3,1,3,3,3,1,1,0,3,1,4,3,2,3,4,4,2,4,4,4,3,4,4,3,2,4,4,3,1,3,3,1,3,3,0,4,1,0,2,2,1,4,3,2,3,3,5,4,3,3,5,4,4,3,3,0,4,0,3,2,2,4,4), +(0,2,0,1,0,0,0,0,0,1,2,1,3,0,0,0,0,0,2,0,1,2,1,0,0,1,0,0,0,0,3,0,0,1,0,1,1,3,1,0,0,0,1,1,0,1,1,0,0,0,0,0,2,0,0,0,0,0,0,0,0,1,1,2,2,0,3,4,0,0,0,1,1,0,0,1,0,0,0,0,0,1,1), +(0,1,0,0,0,1,0,0,0,0,4,0,4,1,4,0,3,0,4,0,3,0,4,0,3,0,3,0,4,1,5,1,4,0,0,3,0,5,0,5,2,0,1,0,0,0,2,1,4,0,1,3,0,0,3,0,0,3,1,1,4,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0), +(1,4,0,5,0,3,0,2,0,3,5,4,4,3,4,3,5,3,4,3,3,0,4,3,3,3,3,3,3,2,4,4,3,1,3,4,4,5,4,4,3,4,4,1,3,5,4,3,3,3,1,2,2,3,3,1,3,1,3,3,3,5,3,3,4,5,0,3,0,3,0,3,4,3,4,4,3,0,3,0,2,4,3), +(0,1,0,4,0,0,0,0,0,1,4,0,4,1,4,2,4,0,3,0,1,0,1,0,0,0,0,0,2,0,3,1,1,1,0,3,0,0,0,1,2,1,0,0,1,1,1,1,0,1,0,0,0,1,0,0,3,0,0,0,0,3,2,0,2,2,0,1,0,0,0,2,3,2,3,3,0,0,0,0,2,1,0), +(0,5,1,5,0,3,0,3,0,5,4,4,5,1,5,3,3,0,4,3,4,3,5,3,4,3,3,2,4,3,4,3,3,0,3,3,1,4,4,3,4,4,4,3,4,5,5,3,2,3,1,1,3,3,1,3,1,1,3,3,2,4,5,3,3,5,0,4,0,3,0,4,4,3,5,3,3,0,3,4,0,4,3), +(0,5,0,5,0,3,0,2,0,4,4,3,5,2,4,3,3,3,4,4,4,3,5,3,5,3,3,1,4,0,4,3,3,0,3,3,0,4,4,4,4,5,4,3,3,5,5,3,2,3,1,2,3,2,0,1,0,0,3,2,2,4,4,3,1,5,0,4,0,3,0,4,3,1,3,2,1,0,3,3,0,3,3), +(0,4,0,5,0,5,0,4,0,4,5,5,5,3,4,3,3,2,5,4,4,3,5,3,5,3,4,0,4,3,4,4,3,2,4,4,3,4,5,4,4,5,5,0,3,5,5,4,1,3,3,2,3,3,1,3,1,0,4,3,1,4,4,3,4,5,0,4,0,2,0,4,3,4,4,3,3,0,4,0,0,5,5), +(0,4,0,4,0,5,0,1,1,3,3,4,4,3,4,1,3,0,5,1,3,0,3,1,3,1,1,0,3,0,3,3,4,0,4,3,0,4,4,4,3,4,4,0,3,5,4,1,0,3,0,0,2,3,0,3,1,0,3,1,0,3,2,1,3,5,0,3,0,1,0,3,2,3,3,4,4,0,2,2,0,4,4), +(2,4,0,5,0,4,0,3,0,4,5,5,4,3,5,3,5,3,5,3,5,2,5,3,4,3,3,4,3,4,5,3,2,1,5,4,3,2,3,4,5,3,4,1,2,5,4,3,0,3,3,0,3,2,0,2,3,0,4,1,0,3,4,3,3,5,0,3,0,1,0,4,5,5,5,4,3,0,4,2,0,3,5), +(0,5,0,4,0,4,0,2,0,5,4,3,4,3,4,3,3,3,4,3,4,2,5,3,5,3,4,1,4,3,4,4,4,0,3,5,0,4,4,4,4,5,3,1,3,4,5,3,3,3,3,3,3,3,0,2,2,0,3,3,2,4,3,3,3,5,3,4,1,3,3,5,3,2,0,0,0,0,4,3,1,3,3), +(0,1,0,3,0,3,0,1,0,1,3,3,3,2,3,3,3,0,3,0,0,0,3,1,3,0,0,0,2,2,2,3,0,0,3,2,0,1,2,4,1,3,3,0,0,3,3,3,0,1,0,0,2,1,0,0,3,0,3,1,0,3,0,0,1,3,0,2,0,1,0,3,3,1,3,3,0,0,1,1,0,3,3), +(0,2,0,3,0,2,1,4,0,2,2,3,1,1,3,1,1,0,2,0,3,1,2,3,1,3,0,0,1,0,4,3,2,3,3,3,1,4,2,3,3,3,3,1,0,3,1,4,0,1,1,0,1,2,0,1,1,0,1,1,0,3,1,3,2,2,0,1,0,0,0,2,3,3,3,1,0,0,0,0,0,2,3), +(0,5,0,4,0,5,0,2,0,4,5,5,3,3,4,3,3,1,5,4,4,2,4,4,4,3,4,2,4,3,5,5,4,3,3,4,3,3,5,5,4,5,5,1,3,4,5,3,1,4,3,1,3,3,0,3,3,1,4,3,1,4,5,3,3,5,0,4,0,3,0,5,3,3,1,4,3,0,4,0,1,5,3), +(0,5,0,5,0,4,0,2,0,4,4,3,4,3,3,3,3,3,5,4,4,4,4,4,4,5,3,3,5,2,4,4,4,3,4,4,3,3,4,4,5,5,3,3,4,3,4,3,3,4,3,3,3,3,1,2,2,1,4,3,3,5,4,4,3,4,0,4,0,3,0,4,4,4,4,4,1,0,4,2,0,2,4), +(0,4,0,4,0,3,0,1,0,3,5,2,3,0,3,0,2,1,4,2,3,3,4,1,4,3,3,2,4,1,3,3,3,0,3,3,0,0,3,3,3,5,3,3,3,3,3,2,0,2,0,0,2,0,0,2,0,0,1,0,0,3,1,2,2,3,0,3,0,2,0,4,4,3,3,4,1,0,3,0,0,2,4), +(0,0,0,4,0,0,0,0,0,0,1,0,1,0,2,0,0,0,0,0,1,0,2,0,1,0,0,0,0,0,3,1,3,0,3,2,0,0,0,1,0,3,2,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,4,0,2,0,0,0,0,0,0,2), +(0,2,1,3,0,2,0,2,0,3,3,3,3,1,3,1,3,3,3,3,3,3,4,2,2,1,2,1,4,0,4,3,1,3,3,3,2,4,3,5,4,3,3,3,3,3,3,3,0,1,3,0,2,0,0,1,0,0,1,0,0,4,2,0,2,3,0,3,3,0,3,3,4,2,3,1,4,0,1,2,0,2,3), +(0,3,0,3,0,1,0,3,0,2,3,3,3,0,3,1,2,0,3,3,2,3,3,2,3,2,3,1,3,0,4,3,2,0,3,3,1,4,3,3,2,3,4,3,1,3,3,1,1,0,1,1,0,1,0,1,0,1,0,0,0,4,1,1,0,3,0,3,1,0,2,3,3,3,3,3,1,0,0,2,0,3,3), +(0,0,0,0,0,0,0,0,0,0,3,0,2,0,3,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,3,0,3,0,3,1,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,2,0,2,3,0,0,0,0,0,0,0,0,3), +(0,2,0,3,1,3,0,3,0,2,3,3,3,1,3,1,3,1,3,1,3,3,3,1,3,0,2,3,1,1,4,3,3,2,3,3,1,2,2,4,1,3,3,0,1,4,2,3,0,1,3,0,3,0,0,1,3,0,2,0,0,3,3,2,1,3,0,3,0,2,0,3,4,4,4,3,1,0,3,0,0,3,3), +(0,2,0,1,0,2,0,0,0,1,3,2,2,1,3,0,1,1,3,0,3,2,3,1,2,0,2,0,1,1,3,3,3,0,3,3,1,1,2,3,2,3,3,1,2,3,2,0,0,1,0,0,0,0,0,0,3,0,1,0,0,2,1,2,1,3,0,3,0,0,0,3,4,4,4,3,2,0,2,0,0,2,4), +(0,0,0,1,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,2,2,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,3,1,0,0,0,0,0,0,0,3), +(0,3,0,3,0,2,0,3,0,3,3,3,2,3,2,2,2,0,3,1,3,3,3,2,3,3,0,0,3,0,3,2,2,0,2,3,1,4,3,4,3,3,2,3,1,5,4,4,0,3,1,2,1,3,0,3,1,1,2,0,2,3,1,3,1,3,0,3,0,1,0,3,3,4,4,2,1,0,2,1,0,2,4), +(0,1,0,3,0,1,0,2,0,1,4,2,5,1,4,0,2,0,2,1,3,1,4,0,2,1,0,0,2,1,4,1,1,0,3,3,0,5,1,3,2,3,3,1,0,3,2,3,0,1,0,0,0,0,0,0,1,0,0,0,0,4,0,1,0,3,0,2,0,1,0,3,3,3,4,3,3,0,0,0,0,2,3), +(0,0,0,1,0,0,0,0,0,0,2,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,1,0,0,1,0,0,0,0,0,3), +(0,1,0,3,0,4,0,3,0,2,4,3,1,0,3,2,2,1,3,1,2,2,3,1,1,1,2,1,3,0,1,2,0,1,3,2,1,3,0,5,5,1,0,0,1,3,2,1,0,3,0,0,1,0,0,0,0,0,3,4,0,1,1,1,3,2,0,2,0,1,0,2,3,3,1,2,3,0,1,0,1,0,4), +(0,0,0,1,0,3,0,3,0,2,2,1,0,0,4,0,3,0,3,1,3,0,3,0,3,0,1,0,3,0,3,1,3,0,3,3,0,0,1,2,1,1,1,0,1,2,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,2,2,1,2,0,0,2,0,0,0,0,2,3,3,3,3,0,0,0,0,1,4), +(0,0,0,3,0,3,0,0,0,0,3,1,1,0,3,0,1,0,2,0,1,0,0,0,0,0,0,0,1,0,3,0,2,0,2,3,0,0,2,2,3,1,2,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,2,0,0,0,0,2,3), +(2,4,0,5,0,5,0,4,0,3,4,3,3,3,4,3,3,3,4,3,4,4,5,4,5,5,5,2,3,0,5,5,4,1,5,4,3,1,5,4,3,4,4,3,3,4,3,3,0,3,2,0,2,3,0,3,0,0,3,3,0,5,3,2,3,3,0,3,0,3,0,3,4,5,4,5,3,0,4,3,0,3,4), +(0,3,0,3,0,3,0,3,0,3,3,4,3,2,3,2,3,0,4,3,3,3,3,3,3,3,3,0,3,2,4,3,3,1,3,4,3,4,4,4,3,4,4,3,2,4,4,1,0,2,0,0,1,1,0,2,0,0,3,1,0,5,3,2,1,3,0,3,0,1,2,4,3,2,4,3,3,0,3,2,0,4,4), +(0,3,0,3,0,1,0,0,0,1,4,3,3,2,3,1,3,1,4,2,3,2,4,2,3,4,3,0,2,2,3,3,3,0,3,3,3,0,3,4,1,3,3,0,3,4,3,3,0,1,1,0,1,0,0,0,4,0,3,0,0,3,1,2,1,3,0,4,0,1,0,4,3,3,4,3,3,0,2,0,0,3,3), +(0,3,0,4,0,1,0,3,0,3,4,3,3,0,3,3,3,1,3,1,3,3,4,3,3,3,0,0,3,1,5,3,3,1,3,3,2,5,4,3,3,4,5,3,2,5,3,4,0,1,0,0,0,0,0,2,0,0,1,1,0,4,2,2,1,3,0,3,0,2,0,4,4,3,5,3,2,0,1,1,0,3,4), +(0,5,0,4,0,5,0,2,0,4,4,3,3,2,3,3,3,1,4,3,4,1,5,3,4,3,4,0,4,2,4,3,4,1,5,4,0,4,4,4,4,5,4,1,3,5,4,2,1,4,1,1,3,2,0,3,1,0,3,2,1,4,3,3,3,4,0,4,0,3,0,4,4,4,3,3,3,0,4,2,0,3,4), +(1,4,0,4,0,3,0,1,0,3,3,3,1,1,3,3,2,2,3,3,1,0,3,2,2,1,2,0,3,1,2,1,2,0,3,2,0,2,2,3,3,4,3,0,3,3,1,2,0,1,1,3,1,2,0,0,3,0,1,1,0,3,2,2,3,3,0,3,0,0,0,2,3,3,4,3,3,0,1,0,0,1,4), +(0,4,0,4,0,4,0,0,0,3,4,4,3,1,4,2,3,2,3,3,3,1,4,3,4,0,3,0,4,2,3,3,2,2,5,4,2,1,3,4,3,4,3,1,3,3,4,2,0,2,1,0,3,3,0,0,2,0,3,1,0,4,4,3,4,3,0,4,0,1,0,2,4,4,4,4,4,0,3,2,0,3,3), +(0,0,0,1,0,4,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,3,2,0,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,2), +(0,2,0,3,0,4,0,4,0,1,3,3,3,0,4,0,2,1,2,1,1,1,2,0,3,1,1,0,1,0,3,1,0,0,3,3,2,0,1,1,0,0,0,0,0,1,0,2,0,2,2,0,3,1,0,0,1,0,1,1,0,1,2,0,3,0,0,0,0,1,0,0,3,3,4,3,1,0,1,0,3,0,2), +(0,0,0,3,0,5,0,0,0,0,1,0,2,0,3,1,0,1,3,0,0,0,2,0,0,0,1,0,0,0,1,1,0,0,4,0,0,0,2,3,0,1,4,1,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,1,0,0,0,0,0,0,0,2,0,0,3,0,0,0,0,0,3), +(0,2,0,5,0,5,0,1,0,2,4,3,3,2,5,1,3,2,3,3,3,0,4,1,2,0,3,0,4,0,2,2,1,1,5,3,0,0,1,4,2,3,2,0,3,3,3,2,0,2,4,1,1,2,0,1,1,0,3,1,0,1,3,1,2,3,0,2,0,0,0,1,3,5,4,4,4,0,3,0,0,1,3), +(0,4,0,5,0,4,0,4,0,4,5,4,3,3,4,3,3,3,4,3,4,4,5,3,4,5,4,2,4,2,3,4,3,1,4,4,1,3,5,4,4,5,5,4,4,5,5,5,2,3,3,1,4,3,1,3,3,0,3,3,1,4,3,4,4,4,0,3,0,4,0,3,3,4,4,5,0,0,4,3,0,4,5), +(0,4,0,4,0,3,0,3,0,3,4,4,4,3,3,2,4,3,4,3,4,3,5,3,4,3,2,1,4,2,4,4,3,1,3,4,2,4,5,5,3,4,5,4,1,5,4,3,0,3,2,2,3,2,1,3,1,0,3,3,3,5,3,3,3,5,4,4,2,3,3,4,3,3,3,2,1,0,3,2,1,4,3), +(0,4,0,5,0,4,0,3,0,3,5,5,3,2,4,3,4,0,5,4,4,1,4,4,4,3,3,3,4,3,5,5,2,3,3,4,1,2,5,5,3,5,5,2,3,5,5,4,0,3,2,0,3,3,1,1,5,1,4,1,0,4,3,2,3,5,0,4,0,3,0,5,4,3,4,3,0,0,4,1,0,4,4), +(1,3,0,4,0,2,0,2,0,2,5,5,3,3,3,3,3,0,4,2,3,4,4,4,3,4,0,0,3,4,5,4,3,3,3,3,2,5,5,4,5,5,5,4,3,5,5,5,1,3,1,0,1,0,0,3,2,0,4,2,0,5,2,3,2,4,1,3,0,3,0,4,5,4,5,4,3,0,4,2,0,5,4), +(0,3,0,4,0,5,0,3,0,3,4,4,3,2,3,2,3,3,3,3,3,2,4,3,3,2,2,0,3,3,3,3,3,1,3,3,3,0,4,4,3,4,4,1,1,4,4,2,0,3,1,0,1,1,0,4,1,0,2,3,1,3,3,1,3,4,0,3,0,1,0,3,1,3,0,0,1,0,2,0,0,4,4), +(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0), +(0,3,0,3,0,2,0,3,0,1,5,4,3,3,3,1,4,2,1,2,3,4,4,2,4,4,5,0,3,1,4,3,4,0,4,3,3,3,2,3,2,5,3,4,3,2,2,3,0,0,3,0,2,1,0,1,2,0,0,0,0,2,1,1,3,1,0,2,0,4,0,3,4,4,4,5,2,0,2,0,0,1,3), +(0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,1,1,0,0,1,1,0,0,0,4,2,1,1,0,1,0,3,2,0,0,3,1,1,1,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,1,0,0,0,2,0,0,0,1,4,0,4,2,1,0,0,0,0,0,1), +(0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,1,0,1,0,0,0,0,3,1,0,0,0,2,0,2,1,0,0,1,2,1,0,1,1,0,0,3,0,0,0,0,0,0,0,0,0,0,0,1,3,1,0,0,0,0,0,1,0,0,2,1,0,0,0,0,0,0,0,0,2), +(0,4,0,4,0,4,0,3,0,4,4,3,4,2,4,3,2,0,4,4,4,3,5,3,5,3,3,2,4,2,4,3,4,3,1,4,0,2,3,4,4,4,3,3,3,4,4,4,3,4,1,3,4,3,2,1,2,1,3,3,3,4,4,3,3,5,0,4,0,3,0,4,3,3,3,2,1,0,3,0,0,3,3), +(0,4,0,3,0,3,0,3,0,3,5,5,3,3,3,3,4,3,4,3,3,3,4,4,4,3,3,3,3,4,3,5,3,3,1,3,2,4,5,5,5,5,4,3,4,5,5,3,2,2,3,3,3,3,2,3,3,1,2,3,2,4,3,3,3,4,0,4,0,2,0,4,3,2,2,1,2,0,3,0,0,4,1), +) + +class JapaneseContextAnalysis(object): + NUM_OF_CATEGORY = 6 + DONT_KNOW = -1 + ENOUGH_REL_THRESHOLD = 100 + MAX_REL_THRESHOLD = 1000 + MINIMUM_DATA_THRESHOLD = 4 + + def __init__(self): + self._total_rel = None + self._rel_sample = None + self._need_to_skip_char_num = None + self._last_char_order = None + self._done = None + self.reset() + + def reset(self): + self._total_rel = 0 # total sequence received + # category counters, each integer counts sequence in its category + self._rel_sample = [0] * self.NUM_OF_CATEGORY + # if last byte in current buffer is not the last byte of a character, + # we need to know how many bytes to skip in next buffer + self._need_to_skip_char_num = 0 + self._last_char_order = -1 # The order of previous char + # If this flag is set to True, detection is done and conclusion has + # been made + self._done = False + + def feed(self, byte_str, num_bytes): + if self._done: + return + + # The buffer we got is byte oriented, and a character may span in more than one + # buffers. In case the last one or two byte in last buffer is not + # complete, we record how many byte needed to complete that character + # and skip these bytes here. We can choose to record those bytes as + # well and analyse the character once it is complete, but since a + # character will not make much difference, by simply skipping + # this character will simply our logic and improve performance. + i = self._need_to_skip_char_num + while i < num_bytes: + order, char_len = self.get_order(byte_str[i:i + 2]) + i += char_len + if i > num_bytes: + self._need_to_skip_char_num = i - num_bytes + self._last_char_order = -1 + else: + if (order != -1) and (self._last_char_order != -1): + self._total_rel += 1 + if self._total_rel > self.MAX_REL_THRESHOLD: + self._done = True + break + self._rel_sample[jp2CharContext[self._last_char_order][order]] += 1 + self._last_char_order = order + + def got_enough_data(self): + return self._total_rel > self.ENOUGH_REL_THRESHOLD + + def get_confidence(self): + # This is just one way to calculate confidence. It works well for me. + if self._total_rel > self.MINIMUM_DATA_THRESHOLD: + return (self._total_rel - self._rel_sample[0]) / self._total_rel + else: + return self.DONT_KNOW + + def get_order(self, byte_str): + return -1, 1 + +class SJISContextAnalysis(JapaneseContextAnalysis): + def __init__(self): + super(SJISContextAnalysis, self).__init__() + self._charset_name = "SHIFT_JIS" + + @property + def charset_name(self): + return self._charset_name + + def get_order(self, byte_str): + if not byte_str: + return -1, 1 + # find out current char's byte length + first_char = byte_str[0] + if (0x81 <= first_char <= 0x9F) or (0xE0 <= first_char <= 0xFC): + char_len = 2 + if (first_char == 0x87) or (0xFA <= first_char <= 0xFC): + self._charset_name = "CP932" + else: + char_len = 1 + + # return its order if it is hiragana + if len(byte_str) > 1: + second_char = byte_str[1] + if (first_char == 202) and (0x9F <= second_char <= 0xF1): + return second_char - 0x9F, char_len + + return -1, char_len + +class EUCJPContextAnalysis(JapaneseContextAnalysis): + def get_order(self, byte_str): + if not byte_str: + return -1, 1 + # find out current char's byte length + first_char = byte_str[0] + if (first_char == 0x8E) or (0xA1 <= first_char <= 0xFE): + char_len = 2 + elif first_char == 0x8F: + char_len = 3 + else: + char_len = 1 + + # return its order if it is hiragana + if len(byte_str) > 1: + second_char = byte_str[1] + if (first_char == 0xA4) and (0xA1 <= second_char <= 0xF3): + return second_char - 0xA1, char_len + + return -1, char_len + + diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/langbulgarianmodel.py b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/langbulgarianmodel.py new file mode 100644 index 0000000..2aa4fb2 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/langbulgarianmodel.py @@ -0,0 +1,228 @@ +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Communicator client code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +# 255: Control characters that usually does not exist in any text +# 254: Carriage/Return +# 253: symbol (punctuation) that does not belong to word +# 252: 0 - 9 + +# Character Mapping Table: +# this table is modified base on win1251BulgarianCharToOrderMap, so +# only number <64 is sure valid + +Latin5_BulgarianCharToOrderMap = ( +255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10 +253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20 +252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30 +253, 77, 90, 99,100, 72,109,107,101, 79,185, 81,102, 76, 94, 82, # 40 +110,186,108, 91, 74,119, 84, 96,111,187,115,253,253,253,253,253, # 50 +253, 65, 69, 70, 66, 63, 68,112,103, 92,194,104, 95, 86, 87, 71, # 60 +116,195, 85, 93, 97,113,196,197,198,199,200,253,253,253,253,253, # 70 +194,195,196,197,198,199,200,201,202,203,204,205,206,207,208,209, # 80 +210,211,212,213,214,215,216,217,218,219,220,221,222,223,224,225, # 90 + 81,226,227,228,229,230,105,231,232,233,234,235,236, 45,237,238, # a0 + 31, 32, 35, 43, 37, 44, 55, 47, 40, 59, 33, 46, 38, 36, 41, 30, # b0 + 39, 28, 34, 51, 48, 49, 53, 50, 54, 57, 61,239, 67,240, 60, 56, # c0 + 1, 18, 9, 20, 11, 3, 23, 15, 2, 26, 12, 10, 14, 6, 4, 13, # d0 + 7, 8, 5, 19, 29, 25, 22, 21, 27, 24, 17, 75, 52,241, 42, 16, # e0 + 62,242,243,244, 58,245, 98,246,247,248,249,250,251, 91,252,253, # f0 +) + +win1251BulgarianCharToOrderMap = ( +255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10 +253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20 +252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30 +253, 77, 90, 99,100, 72,109,107,101, 79,185, 81,102, 76, 94, 82, # 40 +110,186,108, 91, 74,119, 84, 96,111,187,115,253,253,253,253,253, # 50 +253, 65, 69, 70, 66, 63, 68,112,103, 92,194,104, 95, 86, 87, 71, # 60 +116,195, 85, 93, 97,113,196,197,198,199,200,253,253,253,253,253, # 70 +206,207,208,209,210,211,212,213,120,214,215,216,217,218,219,220, # 80 +221, 78, 64, 83,121, 98,117,105,222,223,224,225,226,227,228,229, # 90 + 88,230,231,232,233,122, 89,106,234,235,236,237,238, 45,239,240, # a0 + 73, 80,118,114,241,242,243,244,245, 62, 58,246,247,248,249,250, # b0 + 31, 32, 35, 43, 37, 44, 55, 47, 40, 59, 33, 46, 38, 36, 41, 30, # c0 + 39, 28, 34, 51, 48, 49, 53, 50, 54, 57, 61,251, 67,252, 60, 56, # d0 + 1, 18, 9, 20, 11, 3, 23, 15, 2, 26, 12, 10, 14, 6, 4, 13, # e0 + 7, 8, 5, 19, 29, 25, 22, 21, 27, 24, 17, 75, 52,253, 42, 16, # f0 +) + +# Model Table: +# total sequences: 100% +# first 512 sequences: 96.9392% +# first 1024 sequences:3.0618% +# rest sequences: 0.2992% +# negative sequences: 0.0020% +BulgarianLangModel = ( +0,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,2,3,3,3,3,3, +3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,2,2,3,2,2,1,2,2, +3,1,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,3,3,3,3,3,3,3,0,3,0,1, +0,0,0,0,0,0,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1, +3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,3,3,3,3,3,3,3,3,0,3,1,0, +0,1,0,0,0,0,0,0,0,0,1,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1, +3,2,2,2,3,3,3,3,3,3,3,3,3,3,3,3,3,1,3,2,3,3,3,3,3,3,3,3,0,3,0,0, +0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,2,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,1,3,2,3,3,3,3,3,3,3,3,0,3,0,0, +0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,3,3,3,3,3,3,2,3,2,2,1,3,3,3,3,2,2,2,1,1,2,0,1,0,1,0,0, +0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,1, +3,3,3,3,3,3,3,2,3,2,2,3,3,1,1,2,3,3,2,3,3,3,3,2,1,2,0,2,0,3,0,0, +0,0,0,0,0,0,0,1,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,1, +3,3,3,3,3,3,3,1,3,3,3,3,3,2,3,2,3,3,3,3,3,2,3,3,1,3,0,3,0,2,0,0, +0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1, +3,3,3,3,3,3,3,3,1,3,3,2,3,3,3,1,3,3,2,3,2,2,2,0,0,2,0,2,0,2,0,0, +0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,1, +3,3,3,3,3,3,3,3,3,0,3,3,3,2,2,3,3,3,1,2,2,3,2,1,1,2,0,2,0,0,0,0, +1,0,0,0,0,0,0,0,0,0,2,0,0,1,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1, +3,3,3,3,3,3,3,2,3,3,1,2,3,2,2,2,3,3,3,3,3,2,2,3,1,2,0,2,1,2,0,0, +0,0,0,0,0,0,0,0,0,0,3,0,0,1,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,1, +3,3,3,3,3,1,3,3,3,3,3,2,3,3,3,2,3,3,2,3,2,2,2,3,1,2,0,1,0,1,0,0, +0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1, +3,3,3,3,3,3,3,3,3,3,3,1,1,1,2,2,1,3,1,3,2,2,3,0,0,1,0,1,0,1,0,0, +0,0,0,1,0,0,0,0,1,0,2,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1, +3,3,3,3,3,2,2,3,2,2,3,1,2,1,1,1,2,3,1,3,1,2,2,0,1,1,1,1,0,1,0,0, +0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1, +3,3,3,3,3,1,3,2,2,3,3,1,2,3,1,1,3,3,3,3,1,2,2,1,1,1,0,2,0,2,0,1, +0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1, +3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,2,2,3,3,3,2,2,1,1,2,0,2,0,1,0,0, +0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1, +3,0,1,2,1,3,3,2,3,3,3,3,3,2,3,2,1,0,3,1,2,1,2,1,2,3,2,1,0,1,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +1,1,1,2,3,3,3,3,3,3,3,3,3,3,3,3,0,0,3,1,3,3,2,3,3,2,2,2,0,1,0,0, +0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,3,3,3,3,0,3,3,3,3,3,2,1,1,2,1,3,3,0,3,1,1,1,1,3,2,0,1,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1, +3,3,2,2,2,3,3,3,3,3,3,3,3,3,3,3,1,1,3,1,3,3,2,3,2,2,2,3,0,2,0,0, +0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,2,3,3,2,2,3,2,1,1,1,1,1,3,1,3,1,1,0,0,0,1,0,0,0,1,0,0, +0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,2,3,2,0,3,2,0,3,0,2,0,0,2,1,3,1,0,0,1,0,0,0,1,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1, +3,3,3,3,2,1,1,1,1,2,1,1,2,1,1,1,2,2,1,2,1,1,1,0,1,1,0,1,0,1,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1, +3,3,3,3,2,1,3,1,1,2,1,3,2,1,1,0,1,2,3,2,1,1,1,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,3,3,3,3,2,2,1,0,1,0,0,1,0,0,0,2,1,0,3,0,0,1,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1, +3,3,3,2,3,2,3,3,1,3,2,1,1,1,2,1,1,2,1,3,0,1,0,0,0,1,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,1,1,2,2,3,3,2,3,2,2,2,3,1,2,2,1,1,2,1,1,2,2,0,1,1,0,1,0,2,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,2,1,3,1,0,2,2,1,3,2,1,0,0,2,0,2,0,1,0,0,0,0,0,0,0,1,0,0, +0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1, +3,3,3,3,3,3,1,2,0,2,3,1,2,3,2,0,1,3,1,2,1,1,1,0,0,1,0,0,2,2,2,3, +2,2,2,2,1,2,1,1,2,2,1,1,2,0,1,1,1,0,0,1,1,0,0,1,1,0,0,0,1,1,0,1, +3,3,3,3,3,2,1,2,2,1,2,0,2,0,1,0,1,2,1,2,1,1,0,0,0,1,0,1,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,1, +3,3,2,3,3,1,1,3,1,0,3,2,1,0,0,0,1,2,0,2,0,1,0,0,0,1,0,1,2,1,2,2, +1,1,1,1,1,1,1,2,2,2,1,1,1,1,1,1,1,0,1,2,1,1,1,0,0,0,0,0,1,1,0,0, +3,1,0,1,0,2,3,2,2,2,3,2,2,2,2,2,1,0,2,1,2,1,1,1,0,1,2,1,2,2,2,1, +1,1,2,2,2,2,1,2,1,1,0,1,2,1,2,2,2,1,1,1,0,1,1,1,1,2,0,1,0,0,0,0, +2,3,2,3,3,0,0,2,1,0,2,1,0,0,0,0,2,3,0,2,0,0,0,0,0,1,0,0,2,0,1,2, +2,1,2,1,2,2,1,1,1,2,1,1,1,0,1,2,2,1,1,1,1,1,0,1,1,1,0,0,1,2,0,0, +3,3,2,2,3,0,2,3,1,1,2,0,0,0,1,0,0,2,0,2,0,0,0,1,0,1,0,1,2,0,2,2, +1,1,1,1,2,1,0,1,2,2,2,1,1,1,1,1,1,1,0,1,1,1,0,0,0,0,0,0,1,1,0,0, +2,3,2,3,3,0,0,3,0,1,1,0,1,0,0,0,2,2,1,2,0,0,0,0,0,0,0,0,2,0,1,2, +2,2,1,1,1,1,1,2,2,2,1,0,2,0,1,0,1,0,0,1,0,1,0,0,1,0,0,0,0,1,0,0, +3,3,3,3,2,2,2,2,2,0,2,1,1,1,1,2,1,2,1,1,0,2,0,1,0,1,0,0,2,0,1,2, +1,1,1,1,1,1,1,2,2,1,1,0,2,0,1,0,2,0,0,1,1,1,0,0,2,0,0,0,1,1,0,0, +2,3,3,3,3,1,0,0,0,0,0,0,0,0,0,0,2,0,0,1,1,0,0,0,0,0,0,1,2,0,1,2, +2,2,2,1,1,2,1,1,2,2,2,1,2,0,1,1,1,1,1,1,0,1,1,1,1,0,0,1,1,1,0,0, +2,3,3,3,3,0,2,2,0,2,1,0,0,0,1,1,1,2,0,2,0,0,0,3,0,0,0,0,2,0,2,2, +1,1,1,2,1,2,1,1,2,2,2,1,2,0,1,1,1,0,1,1,1,1,0,2,1,0,0,0,1,1,0,0, +2,3,3,3,3,0,2,1,0,0,2,0,0,0,0,0,1,2,0,2,0,0,0,0,0,0,0,0,2,0,1,2, +1,1,1,2,1,1,1,1,2,2,2,0,1,0,1,1,1,0,0,1,1,1,0,0,1,0,0,0,0,1,0,0, +3,3,2,2,3,0,1,0,1,0,0,0,0,0,0,0,1,1,0,3,0,0,0,0,0,0,0,0,1,0,2,2, +1,1,1,1,1,2,1,1,2,2,1,2,2,1,0,1,1,1,1,1,0,1,0,0,1,0,0,0,1,1,0,0, +3,1,0,1,0,2,2,2,2,3,2,1,1,1,2,3,0,0,1,0,2,1,1,0,1,1,1,1,2,1,1,1, +1,2,2,1,2,1,2,2,1,1,0,1,2,1,2,2,1,1,1,0,0,1,1,1,2,1,0,1,0,0,0,0, +2,1,0,1,0,3,1,2,2,2,2,1,2,2,1,1,1,0,2,1,2,2,1,1,2,1,1,0,2,1,1,1, +1,2,2,2,2,2,2,2,1,2,0,1,1,0,2,1,1,1,1,1,0,0,1,1,1,1,0,1,0,0,0,0, +2,1,1,1,1,2,2,2,2,1,2,2,2,1,2,2,1,1,2,1,2,3,2,2,1,1,1,1,0,1,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,2,2,3,2,0,1,2,0,1,2,1,1,0,1,0,1,2,1,2,0,0,0,1,1,0,0,0,1,0,0,2, +1,1,0,0,1,1,0,1,1,1,1,0,2,0,1,1,1,0,0,1,1,0,0,0,0,1,0,0,0,1,0,0, +2,0,0,0,0,1,2,2,2,2,2,2,2,1,2,1,1,1,1,1,1,1,0,1,1,1,1,1,2,1,1,1, +1,2,2,2,2,1,1,2,1,2,1,1,1,0,2,1,2,1,1,1,0,2,1,1,1,1,0,1,0,0,0,0, +3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0, +1,1,0,1,0,1,1,1,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,2,2,3,2,0,0,0,0,1,0,0,0,0,0,0,1,1,0,2,0,0,0,0,0,0,0,0,1,0,1,2, +1,1,1,1,1,1,0,0,2,2,2,2,2,0,1,1,0,1,1,1,1,1,0,0,1,0,0,0,1,1,0,1, +2,3,1,2,1,0,1,1,0,2,2,2,0,0,1,0,0,1,1,1,1,0,0,0,0,0,0,0,1,0,1,2, +1,1,1,1,2,1,1,1,1,1,1,1,1,0,1,1,0,1,0,1,0,1,0,0,1,0,0,0,0,1,0,0, +2,2,2,2,2,0,0,2,0,0,2,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,2,0,2,2, +1,1,1,1,1,0,0,1,2,1,1,0,1,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0, +1,2,2,2,2,0,0,2,0,1,1,0,0,0,1,0,0,2,0,2,0,0,0,0,0,0,0,0,0,0,1,1, +0,0,0,1,1,1,1,1,1,1,1,1,1,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0, +1,2,2,3,2,0,0,1,0,0,1,0,0,0,0,0,0,1,0,2,0,0,0,1,0,0,0,0,0,0,0,2, +1,1,0,0,1,0,0,0,1,1,0,0,1,0,1,1,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0, +2,1,2,2,2,1,2,1,2,2,1,1,2,1,1,1,0,1,1,1,1,2,0,1,0,1,1,1,1,0,1,1, +1,1,2,1,1,1,1,1,1,0,0,1,2,1,1,1,1,1,1,0,0,1,1,1,0,0,0,0,0,0,0,0, +1,0,0,1,3,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,2,2,2,1,0,0,1,0,2,0,0,0,0,0,1,1,1,0,1,0,0,0,0,0,0,0,0,2,0,0,1, +0,2,0,1,0,0,1,1,2,0,1,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0, +1,2,2,2,2,0,1,1,0,2,1,0,1,1,1,0,0,1,0,2,0,1,0,0,0,0,0,0,0,0,0,1, +0,1,0,0,1,0,0,0,1,1,0,0,1,0,0,1,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0, +2,2,2,2,2,0,0,1,0,0,0,1,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,1, +0,1,0,1,1,1,0,0,1,1,1,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0, +2,0,1,0,0,1,2,1,1,1,1,1,1,2,2,1,0,0,1,0,1,0,0,0,0,1,1,1,1,0,0,0, +1,1,2,1,1,1,1,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,2,1,2,1,0,0,1,0,0,0,0,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,0,0,1, +0,0,0,0,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +1,0,0,1,2,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0, +0,1,1,0,1,1,1,0,0,1,0,0,1,0,1,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0, +1,0,1,0,0,1,1,1,1,1,1,1,1,1,1,1,0,0,1,0,2,0,0,2,0,1,0,0,1,0,0,1, +1,1,0,0,1,1,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0, +1,1,1,1,1,1,1,2,0,0,0,0,0,0,2,1,0,1,1,0,0,1,1,1,0,1,0,0,0,0,0,0, +2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +1,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,0,1,1,0,1,1,1,1,1,0,1,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1, +) + +Latin5BulgarianModel = { + 'char_to_order_map': Latin5_BulgarianCharToOrderMap, + 'precedence_matrix': BulgarianLangModel, + 'typical_positive_ratio': 0.969392, + 'keep_english_letter': False, + 'charset_name': "ISO-8859-5", + 'language': 'Bulgairan', +} + +Win1251BulgarianModel = { + 'char_to_order_map': win1251BulgarianCharToOrderMap, + 'precedence_matrix': BulgarianLangModel, + 'typical_positive_ratio': 0.969392, + 'keep_english_letter': False, + 'charset_name': "windows-1251", + 'language': 'Bulgarian', +} diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/langcyrillicmodel.py b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/langcyrillicmodel.py new file mode 100644 index 0000000..e5f9a1f --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/langcyrillicmodel.py @@ -0,0 +1,333 @@ +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Communicator client code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +# KOI8-R language model +# Character Mapping Table: +KOI8R_char_to_order_map = ( +255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10 +253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20 +252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30 +253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, # 40 +155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253, # 50 +253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, # 60 + 67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253, # 70 +191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206, # 80 +207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222, # 90 +223,224,225, 68,226,227,228,229,230,231,232,233,234,235,236,237, # a0 +238,239,240,241,242,243,244,245,246,247,248,249,250,251,252,253, # b0 + 27, 3, 21, 28, 13, 2, 39, 19, 26, 4, 23, 11, 8, 12, 5, 1, # c0 + 15, 16, 9, 7, 6, 14, 24, 10, 17, 18, 20, 25, 30, 29, 22, 54, # d0 + 59, 37, 44, 58, 41, 48, 53, 46, 55, 42, 60, 36, 49, 38, 31, 34, # e0 + 35, 43, 45, 32, 40, 52, 56, 33, 61, 62, 51, 57, 47, 63, 50, 70, # f0 +) + +win1251_char_to_order_map = ( +255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10 +253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20 +252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30 +253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, # 40 +155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253, # 50 +253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, # 60 + 67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253, # 70 +191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206, +207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222, +223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238, +239,240,241,242,243,244,245,246, 68,247,248,249,250,251,252,253, + 37, 44, 33, 46, 41, 48, 56, 51, 42, 60, 36, 49, 38, 31, 34, 35, + 45, 32, 40, 52, 53, 55, 58, 50, 57, 63, 70, 62, 61, 47, 59, 43, + 3, 21, 10, 19, 13, 2, 24, 20, 4, 23, 11, 8, 12, 5, 1, 15, + 9, 7, 6, 14, 39, 26, 28, 22, 25, 29, 54, 18, 17, 30, 27, 16, +) + +latin5_char_to_order_map = ( +255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10 +253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20 +252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30 +253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, # 40 +155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253, # 50 +253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, # 60 + 67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253, # 70 +191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206, +207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222, +223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238, + 37, 44, 33, 46, 41, 48, 56, 51, 42, 60, 36, 49, 38, 31, 34, 35, + 45, 32, 40, 52, 53, 55, 58, 50, 57, 63, 70, 62, 61, 47, 59, 43, + 3, 21, 10, 19, 13, 2, 24, 20, 4, 23, 11, 8, 12, 5, 1, 15, + 9, 7, 6, 14, 39, 26, 28, 22, 25, 29, 54, 18, 17, 30, 27, 16, +239, 68,240,241,242,243,244,245,246,247,248,249,250,251,252,255, +) + +macCyrillic_char_to_order_map = ( +255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10 +253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20 +252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30 +253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, # 40 +155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253, # 50 +253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, # 60 + 67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253, # 70 + 37, 44, 33, 46, 41, 48, 56, 51, 42, 60, 36, 49, 38, 31, 34, 35, + 45, 32, 40, 52, 53, 55, 58, 50, 57, 63, 70, 62, 61, 47, 59, 43, +191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206, +207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222, +223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238, +239,240,241,242,243,244,245,246,247,248,249,250,251,252, 68, 16, + 3, 21, 10, 19, 13, 2, 24, 20, 4, 23, 11, 8, 12, 5, 1, 15, + 9, 7, 6, 14, 39, 26, 28, 22, 25, 29, 54, 18, 17, 30, 27,255, +) + +IBM855_char_to_order_map = ( +255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10 +253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20 +252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30 +253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, # 40 +155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253, # 50 +253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, # 60 + 67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253, # 70 +191,192,193,194, 68,195,196,197,198,199,200,201,202,203,204,205, +206,207,208,209,210,211,212,213,214,215,216,217, 27, 59, 54, 70, + 3, 37, 21, 44, 28, 58, 13, 41, 2, 48, 39, 53, 19, 46,218,219, +220,221,222,223,224, 26, 55, 4, 42,225,226,227,228, 23, 60,229, +230,231,232,233,234,235, 11, 36,236,237,238,239,240,241,242,243, + 8, 49, 12, 38, 5, 31, 1, 34, 15,244,245,246,247, 35, 16,248, + 43, 9, 45, 7, 32, 6, 40, 14, 52, 24, 56, 10, 33, 17, 61,249, +250, 18, 62, 20, 51, 25, 57, 30, 47, 29, 63, 22, 50,251,252,255, +) + +IBM866_char_to_order_map = ( +255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10 +253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20 +252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30 +253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, # 40 +155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253, # 50 +253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, # 60 + 67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253, # 70 + 37, 44, 33, 46, 41, 48, 56, 51, 42, 60, 36, 49, 38, 31, 34, 35, + 45, 32, 40, 52, 53, 55, 58, 50, 57, 63, 70, 62, 61, 47, 59, 43, + 3, 21, 10, 19, 13, 2, 24, 20, 4, 23, 11, 8, 12, 5, 1, 15, +191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206, +207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222, +223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238, + 9, 7, 6, 14, 39, 26, 28, 22, 25, 29, 54, 18, 17, 30, 27, 16, +239, 68,240,241,242,243,244,245,246,247,248,249,250,251,252,255, +) + +# Model Table: +# total sequences: 100% +# first 512 sequences: 97.6601% +# first 1024 sequences: 2.3389% +# rest sequences: 0.1237% +# negative sequences: 0.0009% +RussianLangModel = ( +0,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,1,3,3,3,3,1,3,3,3,2,3,2,3,3, +3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,2,2,2,2,2,0,0,2, +3,3,3,2,3,3,3,3,3,3,3,3,3,3,2,3,3,0,0,3,3,3,3,3,3,3,3,3,2,3,2,0, +0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,2,2,3,3,3,3,3,3,3,3,3,2,3,3,0,0,3,3,3,3,3,3,3,3,2,3,3,1,0, +0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,2,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,0,0,3,3,3,3,3,3,3,3,3,3,3,2,1, +0,0,0,0,0,0,0,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,0,0,3,3,3,3,3,3,3,3,3,3,3,2,1, +0,0,0,0,0,1,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,3,3,3,2,2,2,3,1,3,3,1,3,3,3,3,2,2,3,0,2,2,2,3,3,2,1,0, +0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,3,2,3,3,3,3,3,2,2,3,2,3,3,3,2,1,2,2,0,1,2,2,2,2,2,2,0, +0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,3,0,2,2,3,3,2,1,2,0, +0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,1,0,0,2,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,3,2,3,3,1,2,3,2,2,3,2,3,3,3,3,2,2,3,0,3,2,2,3,1,1,1,0, +0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,3,3,3,2,2,3,3,3,3,3,2,3,3,3,3,2,2,2,0,3,3,3,2,2,2,2,0, +0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,3,3,3,3,3,2,3,2,3,3,3,3,3,3,2,3,2,2,0,1,3,2,1,2,2,1,0, +0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,3,3,3,3,3,3,2,1,1,3,0,1,1,1,1,2,1,1,0,2,2,2,1,2,0,1,0, +0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,3,2,3,3,2,2,2,2,1,3,2,3,2,3,2,1,2,2,0,1,1,2,1,2,1,2,0, +0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,3,3,3,3,3,3,3,2,2,3,2,3,3,3,2,2,2,2,0,2,2,2,2,3,1,1,0, +0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0, +3,2,3,2,2,3,3,3,3,3,3,3,3,3,1,3,2,0,0,3,3,3,3,2,3,3,3,3,2,3,2,0, +0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,3,3,3,3,3,2,2,3,3,0,2,1,0,3,2,3,2,3,0,0,1,2,0,0,1,0,1,2,1,1,0, +0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,0,3,0,2,3,3,3,3,2,3,3,3,3,1,2,2,0,0,2,3,2,2,2,3,2,3,2,2,3,0,0, +0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,2,3,0,2,3,2,3,0,1,2,3,3,2,0,2,3,0,0,2,3,2,2,0,1,3,1,3,2,2,1,0, +0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,1,3,0,2,3,3,3,3,3,3,3,3,2,1,3,2,0,0,2,2,3,3,3,2,3,3,0,2,2,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,3,2,2,3,3,2,2,2,3,3,0,0,1,1,1,1,1,2,0,0,1,1,1,1,0,1,0, +0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,3,2,2,3,3,3,3,3,3,3,0,3,2,3,3,2,3,2,0,2,1,0,1,1,0,1,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,3,2,3,3,3,2,2,2,2,3,1,3,2,3,1,1,2,1,0,2,2,2,2,1,3,1,0, +0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0, +2,2,3,3,3,3,3,1,2,2,1,3,1,0,3,0,0,3,0,0,0,1,1,0,1,2,1,0,0,0,0,0, +0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,2,2,1,1,3,3,3,2,2,1,2,2,3,1,1,2,0,0,2,2,1,3,0,0,2,1,1,2,1,1,0, +0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,2,3,3,3,3,1,2,2,2,1,2,1,3,3,1,1,2,1,2,1,2,2,0,2,0,0,1,1,0,1,0, +0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,3,3,3,3,3,2,1,3,2,2,3,2,0,3,2,0,3,0,1,0,1,1,0,0,1,1,1,1,0,1,0, +0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,2,3,3,3,2,2,2,3,3,1,2,1,2,1,0,1,0,1,1,0,1,0,0,2,1,1,1,0,1,0, +0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0, +3,1,1,2,1,2,3,3,2,2,1,2,2,3,0,2,1,0,0,2,2,3,2,1,2,2,2,2,2,3,1,0, +0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,1,1,0,1,1,2,2,1,1,3,0,0,1,3,1,1,1,0,0,0,1,0,1,1,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,1,3,3,3,2,0,0,0,2,1,0,1,0,2,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,0,1,0,0,2,3,2,2,2,1,2,2,2,1,2,1,0,0,1,1,1,0,2,0,1,1,1,0,0,1,1, +1,0,0,0,0,0,1,2,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0, +2,3,3,3,3,0,0,0,0,1,0,0,0,0,3,0,1,2,1,0,0,0,0,0,0,0,1,1,0,0,1,1, +1,0,1,0,1,2,0,0,1,1,2,1,0,1,1,1,1,0,1,1,1,1,0,1,0,0,1,0,0,1,1,0, +2,2,3,2,2,2,3,1,2,2,2,2,2,2,2,2,1,1,1,1,1,1,1,0,1,0,1,1,1,0,2,1, +1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,1,0,1,0,1,1,0,1,1,1,0,1,1,0, +3,3,3,2,2,2,2,3,2,2,1,1,2,2,2,2,1,1,3,1,2,1,2,0,0,1,1,0,1,0,2,1, +1,1,1,1,1,2,1,0,1,1,1,1,0,1,0,0,1,1,0,0,1,0,1,0,0,1,0,0,0,1,1,0, +2,0,0,1,0,3,2,2,2,2,1,2,1,2,1,2,0,0,0,2,1,2,2,1,1,2,2,0,1,1,0,2, +1,1,1,1,1,0,1,1,1,2,1,1,1,2,1,0,1,2,1,1,1,1,0,1,1,1,0,0,1,0,0,1, +1,3,2,2,2,1,1,1,2,3,0,0,0,0,2,0,2,2,1,0,0,0,0,0,0,1,0,0,0,0,1,1, +1,0,1,1,0,1,0,1,1,0,1,1,0,2,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,1,1,0, +2,3,2,3,2,1,2,2,2,2,1,0,0,0,2,0,0,1,1,0,0,0,0,0,0,0,1,1,0,0,2,1, +1,1,2,1,0,2,0,0,1,0,1,0,0,1,0,0,1,1,0,1,1,0,0,0,0,0,1,0,0,0,0,0, +3,0,0,1,0,2,2,2,3,2,2,2,2,2,2,2,0,0,0,2,1,2,1,1,1,2,2,0,0,0,1,2, +1,1,1,1,1,0,1,2,1,1,1,1,1,1,1,0,1,1,1,1,1,1,0,1,1,1,1,1,1,0,0,1, +2,3,2,3,3,2,0,1,1,1,0,0,1,0,2,0,1,1,3,1,0,0,0,0,0,0,0,1,0,0,2,1, +1,1,1,1,1,1,1,0,1,0,1,1,1,1,0,1,1,1,0,0,1,1,0,1,0,0,0,0,0,0,1,0, +2,3,3,3,3,1,2,2,2,2,0,1,1,0,2,1,1,1,2,1,0,1,1,0,0,1,0,1,0,0,2,0, +0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,3,3,3,2,0,0,1,1,2,2,1,0,0,2,0,1,1,3,0,0,1,0,0,0,0,0,1,0,1,2,1, +1,1,2,0,1,1,1,0,1,0,1,1,0,1,0,1,1,1,1,0,1,0,0,0,0,0,0,1,0,1,1,0, +1,3,2,3,2,1,0,0,2,2,2,0,1,0,2,0,1,1,1,0,1,0,0,0,3,0,1,1,0,0,2,1, +1,1,1,0,1,1,0,0,0,0,1,1,0,1,0,0,2,1,1,0,1,0,0,0,1,0,1,0,0,1,1,0, +3,1,2,1,1,2,2,2,2,2,2,1,2,2,1,1,0,0,0,2,2,2,0,0,0,1,2,1,0,1,0,1, +2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,2,1,1,1,0,1,0,1,1,0,1,1,1,0,0,1, +3,0,0,0,0,2,0,1,1,1,1,1,1,1,0,1,0,0,0,1,1,1,0,1,0,1,1,0,0,1,0,1, +1,1,0,0,1,0,0,0,1,0,1,1,0,0,1,0,1,0,1,0,0,0,0,1,0,0,0,1,0,0,0,1, +1,3,3,2,2,0,0,0,2,2,0,0,0,1,2,0,1,1,2,0,0,0,0,0,0,0,0,1,0,0,2,1, +0,1,1,0,0,1,1,0,0,0,1,1,0,1,1,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0, +2,3,2,3,2,0,0,0,0,1,1,0,0,0,2,0,2,0,2,0,0,0,0,0,1,0,0,1,0,0,1,1, +1,1,2,0,1,2,1,0,1,1,2,1,1,1,1,1,2,1,1,0,1,0,0,1,1,1,1,1,0,1,1,0, +1,3,2,2,2,1,0,0,2,2,1,0,1,2,2,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1, +0,0,1,1,0,1,1,0,0,1,1,0,1,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0, +1,0,0,1,0,2,3,1,2,2,2,2,2,2,1,1,0,0,0,1,0,1,0,2,1,1,1,0,0,0,0,1, +1,1,0,1,1,0,1,1,1,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0, +2,0,2,0,0,1,0,3,2,1,2,1,2,2,0,1,0,0,0,2,1,0,0,2,1,1,1,1,0,2,0,2, +2,1,1,1,1,1,1,1,1,1,1,1,1,2,1,0,1,1,1,1,0,0,0,1,1,1,1,0,1,0,0,1, +1,2,2,2,2,1,0,0,1,0,0,0,0,0,2,0,1,1,1,1,0,0,0,0,1,0,1,2,0,0,2,0, +1,0,1,1,1,2,1,0,1,0,1,1,0,0,1,0,1,1,1,0,1,0,0,0,1,0,0,1,0,1,1,0, +2,1,2,2,2,0,3,0,1,1,0,0,0,0,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1, +0,0,0,1,1,1,0,0,1,0,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0, +1,2,2,3,2,2,0,0,1,1,2,0,1,2,1,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1, +0,1,1,0,0,1,1,0,0,1,1,0,0,1,1,0,1,1,0,0,1,0,0,0,0,0,0,0,0,1,1,0, +2,2,1,1,2,1,2,2,2,2,2,1,2,2,0,1,0,0,0,1,2,2,2,1,2,1,1,1,1,1,2,1, +1,1,1,1,1,1,1,1,1,1,0,0,1,1,1,0,1,1,1,0,0,0,0,1,1,1,0,1,1,0,0,1, +1,2,2,2,2,0,1,0,2,2,0,0,0,0,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,2,0, +0,0,1,0,0,1,0,0,0,0,1,0,1,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0, +0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +1,2,2,2,2,0,0,0,2,2,2,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,1, +0,1,1,0,0,1,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +1,2,2,2,2,0,0,0,0,1,0,0,1,1,2,0,0,0,0,1,0,1,0,0,1,0,0,2,0,0,0,1, +0,0,1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0, +1,2,2,2,1,1,2,0,2,1,1,1,1,0,2,2,0,0,0,0,0,0,0,0,0,1,1,0,0,0,1,1, +0,0,1,0,1,1,0,0,0,0,1,0,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0, +1,0,2,1,2,0,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0, +0,0,1,0,1,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0, +1,0,0,0,0,2,0,1,2,1,0,1,1,1,0,1,0,0,0,1,0,1,0,0,1,0,1,0,0,0,0,1, +0,0,0,0,0,1,0,0,1,1,0,0,1,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1, +2,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1, +1,0,0,0,1,0,0,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0, +2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1, +1,1,1,0,1,0,1,0,0,1,1,1,1,0,0,0,1,0,0,0,0,1,0,0,0,1,0,1,0,0,0,0, +1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1, +1,1,0,1,1,0,1,0,1,0,0,0,0,1,1,0,1,1,0,0,0,0,0,1,0,1,1,0,1,0,0,0, +0,1,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0, +) + +Koi8rModel = { + 'char_to_order_map': KOI8R_char_to_order_map, + 'precedence_matrix': RussianLangModel, + 'typical_positive_ratio': 0.976601, + 'keep_english_letter': False, + 'charset_name': "KOI8-R", + 'language': 'Russian', +} + +Win1251CyrillicModel = { + 'char_to_order_map': win1251_char_to_order_map, + 'precedence_matrix': RussianLangModel, + 'typical_positive_ratio': 0.976601, + 'keep_english_letter': False, + 'charset_name': "windows-1251", + 'language': 'Russian', +} + +Latin5CyrillicModel = { + 'char_to_order_map': latin5_char_to_order_map, + 'precedence_matrix': RussianLangModel, + 'typical_positive_ratio': 0.976601, + 'keep_english_letter': False, + 'charset_name': "ISO-8859-5", + 'language': 'Russian', +} + +MacCyrillicModel = { + 'char_to_order_map': macCyrillic_char_to_order_map, + 'precedence_matrix': RussianLangModel, + 'typical_positive_ratio': 0.976601, + 'keep_english_letter': False, + 'charset_name': "MacCyrillic", + 'language': 'Russian', +} + +Ibm866Model = { + 'char_to_order_map': IBM866_char_to_order_map, + 'precedence_matrix': RussianLangModel, + 'typical_positive_ratio': 0.976601, + 'keep_english_letter': False, + 'charset_name': "IBM866", + 'language': 'Russian', +} + +Ibm855Model = { + 'char_to_order_map': IBM855_char_to_order_map, + 'precedence_matrix': RussianLangModel, + 'typical_positive_ratio': 0.976601, + 'keep_english_letter': False, + 'charset_name': "IBM855", + 'language': 'Russian', +} diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/langgreekmodel.py b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/langgreekmodel.py new file mode 100644 index 0000000..5332221 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/langgreekmodel.py @@ -0,0 +1,225 @@ +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Communicator client code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +# 255: Control characters that usually does not exist in any text +# 254: Carriage/Return +# 253: symbol (punctuation) that does not belong to word +# 252: 0 - 9 + +# Character Mapping Table: +Latin7_char_to_order_map = ( +255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10 +253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20 +252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30 +253, 82,100,104, 94, 98,101,116,102,111,187,117, 92, 88,113, 85, # 40 + 79,118,105, 83, 67,114,119, 95, 99,109,188,253,253,253,253,253, # 50 +253, 72, 70, 80, 81, 60, 96, 93, 89, 68,120, 97, 77, 86, 69, 55, # 60 + 78,115, 65, 66, 58, 76,106,103, 87,107,112,253,253,253,253,253, # 70 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 80 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 90 +253,233, 90,253,253,253,253,253,253,253,253,253,253, 74,253,253, # a0 +253,253,253,253,247,248, 61, 36, 46, 71, 73,253, 54,253,108,123, # b0 +110, 31, 51, 43, 41, 34, 91, 40, 52, 47, 44, 53, 38, 49, 59, 39, # c0 + 35, 48,250, 37, 33, 45, 56, 50, 84, 57,120,121, 17, 18, 22, 15, # d0 +124, 1, 29, 20, 21, 3, 32, 13, 25, 5, 11, 16, 10, 6, 30, 4, # e0 + 9, 8, 14, 7, 2, 12, 28, 23, 42, 24, 64, 75, 19, 26, 27,253, # f0 +) + +win1253_char_to_order_map = ( +255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10 +253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20 +252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30 +253, 82,100,104, 94, 98,101,116,102,111,187,117, 92, 88,113, 85, # 40 + 79,118,105, 83, 67,114,119, 95, 99,109,188,253,253,253,253,253, # 50 +253, 72, 70, 80, 81, 60, 96, 93, 89, 68,120, 97, 77, 86, 69, 55, # 60 + 78,115, 65, 66, 58, 76,106,103, 87,107,112,253,253,253,253,253, # 70 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 80 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 90 +253,233, 61,253,253,253,253,253,253,253,253,253,253, 74,253,253, # a0 +253,253,253,253,247,253,253, 36, 46, 71, 73,253, 54,253,108,123, # b0 +110, 31, 51, 43, 41, 34, 91, 40, 52, 47, 44, 53, 38, 49, 59, 39, # c0 + 35, 48,250, 37, 33, 45, 56, 50, 84, 57,120,121, 17, 18, 22, 15, # d0 +124, 1, 29, 20, 21, 3, 32, 13, 25, 5, 11, 16, 10, 6, 30, 4, # e0 + 9, 8, 14, 7, 2, 12, 28, 23, 42, 24, 64, 75, 19, 26, 27,253, # f0 +) + +# Model Table: +# total sequences: 100% +# first 512 sequences: 98.2851% +# first 1024 sequences:1.7001% +# rest sequences: 0.0359% +# negative sequences: 0.0148% +GreekLangModel = ( +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,3,2,2,3,3,3,3,3,3,3,3,1,3,3,3,0,2,2,3,3,0,3,0,3,2,0,3,3,3,0, +3,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,3,3,3,3,0,3,3,0,3,2,3,3,0,3,2,3,3,3,0,0,3,0,3,0,3,3,2,0,0,0, +2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0, +0,2,3,2,2,3,3,3,3,3,3,3,3,0,3,3,3,3,0,2,3,3,0,3,3,3,3,2,3,3,3,0, +2,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,2,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,0,2,1,3,3,3,3,2,3,3,2,3,3,2,0, +0,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,3,3,3,0,3,3,3,3,3,3,0,3,3,0,3,3,3,3,3,3,3,3,3,3,0,3,2,3,3,0, +2,0,1,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0, +0,3,3,3,3,3,2,3,0,0,0,0,3,3,0,3,1,3,3,3,0,3,3,0,3,3,3,3,0,0,0,0, +2,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,3,3,3,3,0,3,0,3,3,3,3,3,0,3,2,2,2,3,0,2,3,3,3,3,3,2,3,3,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,3,3,3,3,3,2,2,2,3,3,3,3,0,3,1,3,3,3,3,2,3,3,3,3,3,3,3,2,2,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,3,3,3,3,2,0,3,0,0,0,3,3,2,3,3,3,3,3,0,0,3,2,3,0,2,3,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,0,3,3,3,3,0,0,3,3,0,2,3,0,3,0,3,3,3,0,0,3,0,3,0,2,2,3,3,0,0, +0,0,1,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,3,3,3,3,2,0,3,2,3,3,3,3,0,3,3,3,3,3,0,3,3,2,3,2,3,3,2,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,3,2,3,2,3,3,3,3,3,3,0,2,3,2,3,2,2,2,3,2,3,3,2,3,0,2,2,2,3,0, +2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,3,0,0,0,3,3,3,2,3,3,0,0,3,0,3,0,0,0,3,2,0,3,0,3,0,0,2,0,2,0, +0,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,3,3,3,0,3,3,3,3,3,3,0,3,3,0,3,0,0,0,3,3,0,3,3,3,0,0,1,2,3,0, +3,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,3,3,3,3,2,0,0,3,2,2,3,3,0,3,3,3,3,3,2,1,3,0,3,2,3,3,2,1,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,3,3,0,2,3,3,3,3,3,3,0,0,3,0,3,0,0,0,3,3,0,3,2,3,0,0,3,3,3,0, +3,0,0,0,2,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,3,3,3,0,3,3,3,3,3,3,0,0,3,0,3,0,0,0,3,2,0,3,2,3,0,0,3,2,3,0, +2,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,3,1,2,2,3,3,3,3,3,3,0,2,3,0,3,0,0,0,3,3,0,3,0,2,0,0,2,3,1,0, +2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,0,3,3,3,3,0,3,0,3,3,2,3,0,3,3,3,3,3,3,0,3,3,3,0,2,3,0,0,3,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,0,3,3,3,0,0,3,0,0,0,3,3,0,3,0,2,3,3,0,0,3,0,3,0,3,3,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,3,0,0,0,3,3,3,3,3,3,0,0,3,0,2,0,0,0,3,3,0,3,0,3,0,0,2,0,2,0, +0,0,0,0,1,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,3,3,3,3,3,0,3,0,2,0,3,2,0,3,2,3,2,3,0,0,3,2,3,2,3,3,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,3,0,0,2,3,3,3,3,3,0,0,0,3,0,2,1,0,0,3,2,2,2,0,3,0,0,2,2,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,0,3,3,3,2,0,3,0,3,0,3,3,0,2,1,2,3,3,0,0,3,0,3,0,3,3,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,2,3,3,3,0,3,3,3,3,3,3,0,2,3,0,3,0,0,0,2,1,0,2,2,3,0,0,2,2,2,0, +0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,3,0,0,2,3,3,3,2,3,0,0,1,3,0,2,0,0,0,0,3,0,1,0,2,0,0,1,1,1,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,3,3,3,3,1,0,3,0,0,0,3,2,0,3,2,3,3,3,0,0,3,0,3,2,2,2,1,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,0,3,3,3,0,0,3,0,0,0,0,2,0,2,3,3,2,2,2,2,3,0,2,0,2,2,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,3,3,3,2,0,0,0,0,0,0,2,3,0,2,0,2,3,2,0,0,3,0,3,0,3,1,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,3,2,3,3,2,2,3,0,2,0,3,0,0,0,2,0,0,0,0,1,2,0,2,0,2,0, +0,2,0,2,0,2,2,0,0,1,0,2,2,2,0,2,2,2,0,2,2,2,0,0,2,0,0,1,0,0,0,0, +0,2,0,3,3,2,0,0,0,0,0,0,1,3,0,2,0,2,2,2,0,0,2,0,3,0,0,2,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,0,2,3,2,0,2,2,0,2,0,2,2,0,2,0,2,2,2,0,0,0,0,0,0,2,3,0,0,0,2, +0,1,2,0,0,0,0,2,2,0,0,0,2,1,0,2,2,0,0,0,0,0,0,1,0,2,0,0,0,0,0,0, +0,0,2,1,0,2,3,2,2,3,2,3,2,0,0,3,3,3,0,0,3,2,0,0,0,1,1,0,2,0,2,2, +0,2,0,2,0,2,2,0,0,2,0,2,2,2,0,2,2,2,2,0,0,2,0,0,0,2,0,1,0,0,0,0, +0,3,0,3,3,2,2,0,3,0,0,0,2,2,0,2,2,2,1,2,0,0,1,2,2,0,0,3,0,0,0,2, +0,1,2,0,0,0,1,2,0,0,0,0,0,0,0,2,2,0,1,0,0,2,0,0,0,2,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,2,3,3,2,2,0,0,0,2,0,2,3,3,0,2,0,0,0,0,0,0,2,2,2,0,2,2,0,2,0,2, +0,2,2,0,0,2,2,2,2,1,0,0,2,2,0,2,0,0,2,0,0,0,0,0,0,2,0,0,0,0,0,0, +0,2,0,3,2,3,0,0,0,3,0,0,2,2,0,2,0,2,2,2,0,0,2,0,0,0,0,0,0,0,0,2, +0,0,2,2,0,0,2,2,2,0,0,0,0,0,0,2,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,2,0,0,3,2,0,2,2,2,2,2,0,0,0,2,0,0,0,0,2,0,1,0,0,2,0,1,0,0,0, +0,2,2,2,0,2,2,0,1,2,0,2,2,2,0,2,2,2,2,1,2,2,0,0,2,0,0,0,0,0,0,0, +0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0, +0,2,0,2,0,2,2,0,0,0,0,1,2,1,0,0,2,2,0,0,2,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,3,2,3,0,0,2,0,0,0,2,2,0,2,0,0,0,1,0,0,2,0,2,0,2,2,0,0,0,0, +0,0,2,0,0,0,0,2,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0, +0,2,2,3,2,2,0,0,0,0,0,0,1,3,0,2,0,2,2,0,0,0,1,0,2,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,2,0,2,0,3,2,0,2,0,0,0,0,0,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1, +0,0,2,0,0,0,0,1,1,0,0,2,1,2,0,2,2,0,1,0,0,1,0,0,0,2,0,0,0,0,0,0, +0,3,0,2,2,2,0,0,2,0,0,0,2,0,0,0,2,3,0,2,0,0,0,0,0,0,2,2,0,0,0,2, +0,1,2,0,0,0,1,2,2,1,0,0,0,2,0,0,2,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,2,1,2,0,2,2,0,2,0,0,2,0,0,0,0,1,2,1,0,2,1,0,0,0,0,0,0,0,0,0,0, +0,0,2,0,0,0,3,1,2,2,0,2,0,0,0,0,2,0,0,0,2,0,0,3,0,0,0,0,2,2,2,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,2,1,0,2,0,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,1,0,0,0,0,0,0,2, +0,2,2,0,0,2,2,2,2,2,0,1,2,0,0,0,2,2,0,1,0,2,0,0,2,2,0,0,0,0,0,0, +0,0,0,0,1,0,0,0,0,0,0,0,3,0,0,2,0,0,0,0,0,0,0,0,2,0,2,0,0,0,0,2, +0,1,2,0,0,0,0,2,2,1,0,1,0,1,0,2,2,2,1,0,0,0,0,0,0,1,0,0,0,0,0,0, +0,2,0,1,2,0,0,0,0,0,0,0,0,0,0,2,0,0,2,2,0,0,0,0,1,0,0,0,0,0,0,2, +0,2,2,0,0,0,0,2,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,2,0,0,0, +0,2,2,2,2,0,0,0,3,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,2,0,0,0,0,0,0,1, +0,0,2,0,0,0,0,1,2,0,0,0,0,0,0,2,2,1,1,0,0,0,0,0,0,1,0,0,0,0,0,0, +0,2,0,2,2,2,0,0,2,0,0,0,0,0,0,0,2,2,2,0,0,0,2,0,0,0,0,0,0,0,0,2, +0,0,1,0,0,0,0,2,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0, +0,3,0,2,0,0,0,0,0,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,2,0,0,0,0,2, +0,0,2,0,0,0,0,2,2,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,2,0,2,2,1,0,0,0,0,0,0,2,0,0,2,0,2,2,2,0,0,0,0,0,0,2,0,0,0,0,2, +0,0,2,0,0,2,0,2,2,0,0,0,0,2,0,2,0,0,0,0,0,2,0,0,0,2,0,0,0,0,0,0, +0,0,3,0,0,0,2,2,0,2,2,0,0,0,0,0,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,2,0,0,0,0,0, +0,2,2,2,2,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,1, +0,0,0,0,0,0,0,2,1,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,2,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0, +0,2,0,0,0,2,0,0,0,0,0,1,0,0,0,0,2,2,0,0,0,1,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,2,0,0,0, +0,2,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,2,0,2,0,0,0, +0,0,0,0,0,0,0,0,2,1,0,0,0,0,0,0,2,0,0,0,1,2,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +) + +Latin7GreekModel = { + 'char_to_order_map': Latin7_char_to_order_map, + 'precedence_matrix': GreekLangModel, + 'typical_positive_ratio': 0.982851, + 'keep_english_letter': False, + 'charset_name': "ISO-8859-7", + 'language': 'Greek', +} + +Win1253GreekModel = { + 'char_to_order_map': win1253_char_to_order_map, + 'precedence_matrix': GreekLangModel, + 'typical_positive_ratio': 0.982851, + 'keep_english_letter': False, + 'charset_name': "windows-1253", + 'language': 'Greek', +} diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/langhebrewmodel.py b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/langhebrewmodel.py new file mode 100644 index 0000000..58f4c87 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/langhebrewmodel.py @@ -0,0 +1,200 @@ +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Universal charset detector code. +# +# The Initial Developer of the Original Code is +# Simon Montagu +# Portions created by the Initial Developer are Copyright (C) 2005 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# Shy Shalom - original C code +# Shoshannah Forbes - original C code (?) +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +# 255: Control characters that usually does not exist in any text +# 254: Carriage/Return +# 253: symbol (punctuation) that does not belong to word +# 252: 0 - 9 + +# Windows-1255 language model +# Character Mapping Table: +WIN1255_CHAR_TO_ORDER_MAP = ( +255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10 +253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20 +252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30 +253, 69, 91, 79, 80, 92, 89, 97, 90, 68,111,112, 82, 73, 95, 85, # 40 + 78,121, 86, 71, 67,102,107, 84,114,103,115,253,253,253,253,253, # 50 +253, 50, 74, 60, 61, 42, 76, 70, 64, 53,105, 93, 56, 65, 54, 49, # 60 + 66,110, 51, 43, 44, 63, 81, 77, 98, 75,108,253,253,253,253,253, # 70 +124,202,203,204,205, 40, 58,206,207,208,209,210,211,212,213,214, +215, 83, 52, 47, 46, 72, 32, 94,216,113,217,109,218,219,220,221, + 34,116,222,118,100,223,224,117,119,104,125,225,226, 87, 99,227, +106,122,123,228, 55,229,230,101,231,232,120,233, 48, 39, 57,234, + 30, 59, 41, 88, 33, 37, 36, 31, 29, 35,235, 62, 28,236,126,237, +238, 38, 45,239,240,241,242,243,127,244,245,246,247,248,249,250, + 9, 8, 20, 16, 3, 2, 24, 14, 22, 1, 25, 15, 4, 11, 6, 23, + 12, 19, 13, 26, 18, 27, 21, 17, 7, 10, 5,251,252,128, 96,253, +) + +# Model Table: +# total sequences: 100% +# first 512 sequences: 98.4004% +# first 1024 sequences: 1.5981% +# rest sequences: 0.087% +# negative sequences: 0.0015% +HEBREW_LANG_MODEL = ( +0,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,2,3,2,1,2,0,1,0,0, +3,0,3,1,0,0,1,3,2,0,1,1,2,0,2,2,2,1,1,1,1,2,1,1,1,2,0,0,2,2,0,1, +3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2, +1,2,1,2,1,2,0,0,2,0,0,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0, +3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2, +1,2,1,3,1,1,0,0,2,0,0,0,1,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0, +3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,0,1,2,2,1,3, +1,2,1,1,2,2,0,0,2,2,0,0,0,0,1,0,1,0,0,0,1,0,0,0,0,0,0,1,0,1,1,0, +3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,2,2,2,3,2, +1,2,1,2,2,2,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0, +3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,3,2,2,3,2,2,2,1,2,2,2,2, +1,2,1,1,2,2,0,1,2,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0, +3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,0,2,2,2,2,2, +0,2,0,2,2,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0, +3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,0,2,2,2, +0,2,1,2,2,2,0,0,2,1,0,0,0,0,1,0,1,0,0,0,0,0,0,2,0,0,0,0,0,0,1,0, +3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,2,1,2,3,2,2,2, +1,2,1,2,2,2,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,1,0, +3,3,3,3,3,3,3,3,3,2,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,1,0,2,0,2, +0,2,1,2,2,2,0,0,1,2,0,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,2,0,0,1,0, +3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,3,2,2,3,2,1,2,1,1,1, +0,1,1,1,1,1,3,0,1,0,0,0,0,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0, +3,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,0,1,1,0,0,1,0,0,1,0,0,0,0, +0,0,1,0,0,0,0,0,2,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2,2,2,2, +0,2,0,1,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0, +3,3,3,3,3,3,3,3,3,2,3,3,3,2,1,2,3,3,2,3,3,3,3,2,3,2,1,2,0,2,1,2, +0,2,0,2,2,2,0,0,1,2,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0, +3,3,3,3,3,3,3,3,3,2,3,3,3,1,2,2,3,3,2,3,2,3,2,2,3,1,2,2,0,2,2,2, +0,2,1,2,2,2,0,0,1,2,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,1,0,0,1,0, +3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,2,3,3,2,2,2,3,3,3,3,1,3,2,2,2, +0,2,0,1,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0, +3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,3,3,3,2,3,2,2,2,1,2,2,0,2,2,2,2, +0,2,0,2,2,2,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0, +3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,1,3,2,3,3,2,3,3,2,2,1,2,2,2,2,2,2, +0,2,1,2,1,2,0,0,1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0, +3,3,3,3,3,3,2,3,2,3,3,2,3,3,3,3,2,3,2,3,3,3,3,3,2,2,2,2,2,2,2,1, +0,2,0,1,2,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0, +3,3,3,3,3,3,3,3,3,2,1,2,3,3,3,3,3,3,3,2,3,2,3,2,1,2,3,0,2,1,2,2, +0,2,1,1,2,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,2,0, +3,3,3,3,3,3,3,3,3,2,3,3,3,3,2,1,3,1,2,2,2,1,2,3,3,1,2,1,2,2,2,2, +0,1,1,1,1,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,2,0,0,0,0,0,0,0,0, +3,3,3,3,3,3,3,3,3,3,0,2,3,3,3,1,3,3,3,1,2,2,2,2,1,1,2,2,2,2,2,2, +0,2,0,1,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0, +3,3,3,3,3,3,2,3,3,3,2,2,3,3,3,2,1,2,3,2,3,2,2,2,2,1,2,1,1,1,2,2, +0,2,1,1,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0, +3,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,0,0,0,1,0,0,0,0,0, +1,0,1,0,0,0,0,0,2,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,2,3,3,2,3,1,2,2,2,2,3,2,3,1,1,2,2,1,2,2,1,1,0,2,2,2,2, +0,1,0,1,2,2,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0, +3,0,0,1,1,0,1,0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,2,2,0, +0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,0,1,0,1,0,1,1,0,1,1,0,0,0,1,1,0,1,1,1,0,0,0,0,0,0,1,0,0,0,0,0, +0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,0,0,0,1,1,0,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0, +3,2,2,1,2,2,2,2,2,2,2,1,2,2,1,2,2,1,1,1,1,1,1,1,1,2,1,1,0,3,3,3, +0,3,0,2,2,2,2,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0, +2,2,2,3,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,2,2,1,2,2,2,1,1,1,2,0,1, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,2,2,2,2,2,2,2,2,2,2,1,2,2,2,2,2,2,2,2,2,2,2,0,2,2,0,0,0,0,0,0, +0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,3,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,2,1,0,2,1,0, +0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,1,1,1,1,1,1,1,1,1,1,0,0,1,1,1,1,0,1,1,1,1,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0, +0,3,1,1,2,2,2,2,2,1,2,2,2,1,1,2,2,2,2,2,2,2,1,2,2,1,0,1,1,1,1,0, +0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,2,1,1,1,1,2,1,1,2,1,0,1,1,1,1,1,1,1,1,1,1,1,0,1,0,0,0,0,0,0,0, +0,0,2,0,0,0,0,0,0,0,0,1,1,0,0,0,0,1,1,0,0,1,1,0,0,0,0,0,0,1,0,0, +2,1,1,2,2,2,2,2,2,2,2,2,2,2,1,2,2,2,2,2,1,2,1,2,1,1,1,1,0,0,0,0, +0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +1,2,1,2,2,2,2,2,2,2,2,2,2,1,2,1,2,1,1,2,1,1,1,2,1,2,1,2,0,1,0,1, +0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,1,2,2,2,1,2,2,2,2,2,2,2,2,1,2,1,1,1,1,1,1,2,1,2,1,1,0,1,0,1, +0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,1,2,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2, +0,2,0,1,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0, +3,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,1,1,1,1,1,1,1,0,1,1,0,1,0,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,2,0,1,1,1,0,1,0,0,0,1,1,0,1,1,0,0,0,0,0,1,1,0,0, +0,1,1,1,2,1,2,2,2,0,2,0,2,0,1,1,2,1,1,1,1,2,1,0,1,1,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0, +1,0,1,0,0,0,0,0,1,0,1,2,2,0,1,0,0,1,1,2,2,1,2,0,2,0,0,0,1,2,0,1, +2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,2,0,2,1,2,0,2,0,0,1,1,1,1,1,1,0,1,0,0,0,1,0,0,1, +2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,1,0,0,0,0,0,1,0,2,1,1,0,1,0,0,1,1,1,2,2,0,0,1,0,0,0,1,0,0,1, +1,1,2,1,0,1,1,1,0,1,0,1,1,1,1,0,0,0,1,0,1,0,0,0,0,0,0,0,0,2,2,1, +0,2,0,1,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,1,0,0,1,0,1,1,1,1,0,0,0,0,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +1,1,1,1,1,1,1,1,1,2,1,0,1,1,1,1,1,1,1,1,1,1,1,0,1,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,1, +2,0,1,0,1,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,1,0,1,1,1,0,1,0,0,1,1,2,1,1,2,0,1,0,0,0,1,1,0,1, +1,0,0,1,0,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,1,0,1,1,2,0,1,0,0,0,0,2,1,1,2,0,2,0,0,0,1,1,0,1, +1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,1,0,2,1,1,0,1,0,0,2,2,1,2,1,1,0,1,0,0,0,1,1,0,1, +2,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,1,2,2,0,0,0,0,0,1,1,0,1,0,0,1,0,0,0,0,1,0,1, +1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,1,2,2,0,0,0,0,2,1,1,1,0,2,1,1,0,0,0,2,1,0,1, +1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,1,0,1,1,2,0,1,0,0,1,1,0,2,1,1,0,1,0,0,0,1,1,0,1, +2,2,1,1,1,0,1,1,0,1,1,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,1,0,2,1,1,0,1,0,0,1,1,0,1,2,1,0,2,0,0,0,1,1,0,1, +2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0, +0,1,0,0,2,0,2,1,1,0,1,0,1,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,1,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,1,0,1,1,2,0,1,0,0,1,1,1,0,1,0,0,1,0,0,0,1,0,0,1, +1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +1,0,0,0,0,0,0,0,1,0,1,1,0,0,1,0,0,2,1,1,1,1,1,0,1,0,0,0,0,1,0,1, +0,1,1,1,2,1,1,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,1,2,1,0,0,0,0,0,1,1,1,1,1,0,1,0,0,0,1,1,0,0, +) + +Win1255HebrewModel = { + 'char_to_order_map': WIN1255_CHAR_TO_ORDER_MAP, + 'precedence_matrix': HEBREW_LANG_MODEL, + 'typical_positive_ratio': 0.984004, + 'keep_english_letter': False, + 'charset_name': "windows-1255", + 'language': 'Hebrew', +} diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/langhungarianmodel.py b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/langhungarianmodel.py new file mode 100644 index 0000000..bb7c095 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/langhungarianmodel.py @@ -0,0 +1,225 @@ +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Communicator client code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +# 255: Control characters that usually does not exist in any text +# 254: Carriage/Return +# 253: symbol (punctuation) that does not belong to word +# 252: 0 - 9 + +# Character Mapping Table: +Latin2_HungarianCharToOrderMap = ( +255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10 +253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20 +252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30 +253, 28, 40, 54, 45, 32, 50, 49, 38, 39, 53, 36, 41, 34, 35, 47, + 46, 71, 43, 33, 37, 57, 48, 64, 68, 55, 52,253,253,253,253,253, +253, 2, 18, 26, 17, 1, 27, 12, 20, 9, 22, 7, 6, 13, 4, 8, + 23, 67, 10, 5, 3, 21, 19, 65, 62, 16, 11,253,253,253,253,253, +159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174, +175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190, +191,192,193,194,195,196,197, 75,198,199,200,201,202,203,204,205, + 79,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220, +221, 51, 81,222, 78,223,224,225,226, 44,227,228,229, 61,230,231, +232,233,234, 58,235, 66, 59,236,237,238, 60, 69, 63,239,240,241, + 82, 14, 74,242, 70, 80,243, 72,244, 15, 83, 77, 84, 30, 76, 85, +245,246,247, 25, 73, 42, 24,248,249,250, 31, 56, 29,251,252,253, +) + +win1250HungarianCharToOrderMap = ( +255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10 +253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20 +252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30 +253, 28, 40, 54, 45, 32, 50, 49, 38, 39, 53, 36, 41, 34, 35, 47, + 46, 72, 43, 33, 37, 57, 48, 64, 68, 55, 52,253,253,253,253,253, +253, 2, 18, 26, 17, 1, 27, 12, 20, 9, 22, 7, 6, 13, 4, 8, + 23, 67, 10, 5, 3, 21, 19, 65, 62, 16, 11,253,253,253,253,253, +161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176, +177,178,179,180, 78,181, 69,182,183,184,185,186,187,188,189,190, +191,192,193,194,195,196,197, 76,198,199,200,201,202,203,204,205, + 81,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220, +221, 51, 83,222, 80,223,224,225,226, 44,227,228,229, 61,230,231, +232,233,234, 58,235, 66, 59,236,237,238, 60, 70, 63,239,240,241, + 84, 14, 75,242, 71, 82,243, 73,244, 15, 85, 79, 86, 30, 77, 87, +245,246,247, 25, 74, 42, 24,248,249,250, 31, 56, 29,251,252,253, +) + +# Model Table: +# total sequences: 100% +# first 512 sequences: 94.7368% +# first 1024 sequences:5.2623% +# rest sequences: 0.8894% +# negative sequences: 0.0009% +HungarianLangModel = ( +0,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, +3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,2,2,3,3,1,1,2,2,2,2,2,1,2, +3,2,2,3,3,3,3,3,2,3,3,3,3,3,3,1,2,3,3,3,3,2,3,3,1,1,3,3,0,1,1,1, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0, +3,2,1,3,3,3,3,3,2,3,3,3,3,3,1,1,2,3,3,3,3,3,3,3,1,1,3,2,0,1,1,1, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0, +3,3,3,3,3,3,3,3,3,3,3,1,1,2,3,3,3,1,3,3,3,3,3,1,3,3,2,2,0,3,2,3, +0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0, +3,3,3,3,3,3,2,3,3,3,2,3,3,2,3,3,3,3,3,2,3,3,2,2,3,2,3,2,0,3,2,2, +0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0, +3,3,3,3,3,3,2,3,3,3,3,3,2,3,3,3,1,2,3,2,2,3,1,2,3,3,2,2,0,3,3,3, +0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, +3,3,3,3,3,3,3,3,3,3,2,2,3,3,3,3,3,3,2,3,3,3,3,2,3,3,3,3,0,2,3,2, +0,0,0,1,1,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, +3,3,3,3,3,3,3,3,3,3,3,1,1,1,3,3,2,1,3,2,2,3,2,1,3,2,2,1,0,3,3,1, +0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, +3,2,2,3,3,3,3,3,1,2,3,3,3,3,1,2,1,3,3,3,3,2,2,3,1,1,3,2,0,1,1,1, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0, +3,3,3,3,3,3,3,3,2,2,3,3,3,3,3,2,1,3,3,3,3,3,2,2,1,3,3,3,0,1,1,2, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0, +3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,2,3,3,2,3,3,3,2,0,3,2,3, +0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,1,0, +3,3,3,3,3,3,2,3,3,3,2,3,2,3,3,3,1,3,2,2,2,3,1,1,3,3,1,1,0,3,3,2, +0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, +3,3,3,3,3,3,3,2,3,3,3,2,3,2,3,3,3,2,3,3,3,3,3,1,2,3,2,2,0,2,2,2, +0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, +3,3,3,2,2,2,3,1,3,3,2,2,1,3,3,3,1,1,3,1,2,3,2,3,2,2,2,1,0,2,2,2, +0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0, +3,1,1,3,3,3,3,3,1,2,3,3,3,3,1,2,1,3,3,3,2,2,3,2,1,0,3,2,0,1,1,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,1,1,3,3,3,3,3,1,2,3,3,3,3,1,1,0,3,3,3,3,0,2,3,0,0,2,1,0,1,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,3,2,2,3,3,2,2,2,2,3,3,0,1,2,3,2,3,2,2,3,2,1,2,0,2,2,2, +0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0, +3,3,3,3,3,3,1,2,3,3,3,2,1,2,3,3,2,2,2,3,2,3,3,1,3,3,1,1,0,2,3,2, +0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, +3,3,3,1,2,2,2,2,3,3,3,1,1,1,3,3,1,1,3,1,1,3,2,1,2,3,1,1,0,2,2,2, +0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, +3,3,3,2,1,2,1,1,3,3,1,1,1,1,3,3,1,1,2,2,1,2,1,1,2,2,1,1,0,2,2,1, +0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, +3,3,3,1,1,2,1,1,3,3,1,0,1,1,3,3,2,0,1,1,2,3,1,0,2,2,1,0,0,1,3,2, +0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, +3,2,1,3,3,3,3,3,1,2,3,2,3,3,2,1,1,3,2,3,2,1,2,2,0,1,2,1,0,0,1,1, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0, +3,3,3,3,2,2,2,2,3,1,2,2,1,1,3,3,0,3,2,1,2,3,2,1,3,3,1,1,0,2,1,3, +0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, +3,3,3,2,2,2,3,2,3,3,3,2,1,1,3,3,1,1,1,2,2,3,2,3,2,2,2,1,0,2,2,1, +0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, +1,0,0,3,3,3,3,3,0,0,3,3,2,3,0,0,0,2,3,3,1,0,1,2,0,0,1,1,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,1,2,3,3,3,3,3,1,2,3,3,2,2,1,1,0,3,3,2,2,1,2,2,1,0,2,2,0,1,1,1, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,2,2,1,3,1,2,3,3,2,2,1,1,2,2,1,1,1,1,3,2,1,1,1,1,2,1,0,1,2,1, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0, +2,3,3,1,1,1,1,1,3,3,3,0,1,1,3,3,1,1,1,1,1,2,2,0,3,1,1,2,0,2,1,1, +0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, +3,1,0,1,2,1,2,2,0,1,2,3,1,2,0,0,0,2,1,1,1,1,1,2,0,0,1,1,0,0,0,0, +1,2,1,2,2,2,1,2,1,2,0,2,0,2,2,1,1,2,1,1,2,1,1,1,0,1,0,0,0,1,1,0, +1,1,1,2,3,2,3,3,0,1,2,2,3,1,0,1,0,2,1,2,2,0,1,1,0,0,1,1,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +1,0,0,3,3,2,2,1,0,0,3,2,3,2,0,0,0,1,1,3,0,0,1,1,0,0,2,1,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,1,1,2,2,3,3,1,0,1,3,2,3,1,1,1,0,1,1,1,1,1,3,1,0,0,2,2,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,1,1,1,2,2,2,1,0,1,2,3,3,2,0,0,0,2,1,1,1,2,1,1,1,0,1,1,1,0,0,0, +1,2,2,2,2,2,1,1,1,2,0,2,1,1,1,1,1,2,1,1,1,1,1,1,0,1,1,1,0,0,1,1, +3,2,2,1,0,0,1,1,2,2,0,3,0,1,2,1,1,0,0,1,1,1,0,1,1,1,1,0,2,1,1,1, +2,2,1,1,1,2,1,2,1,1,1,1,1,1,1,2,1,1,1,2,3,1,1,1,1,1,1,1,1,1,0,1, +2,3,3,0,1,0,0,0,3,3,1,0,0,1,2,2,1,0,0,0,0,2,0,0,1,1,1,0,2,1,1,1, +2,1,1,1,1,1,1,2,1,1,0,1,1,0,1,1,1,0,1,2,1,1,0,1,1,1,1,1,1,1,0,1, +2,3,3,0,1,0,0,0,2,2,0,0,0,0,1,2,2,0,0,0,0,1,0,0,1,1,0,0,2,0,1,0, +2,1,1,1,1,2,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,2,0,1,1,1,1,1,0,1, +3,2,2,0,1,0,1,0,2,3,2,0,0,1,2,2,1,0,0,1,1,1,0,0,2,1,0,1,2,2,1,1, +2,1,1,1,1,1,1,2,1,1,1,1,1,1,0,2,1,0,1,1,0,1,1,1,0,1,1,2,1,1,0,1, +2,2,2,0,0,1,0,0,2,2,1,1,0,0,2,1,1,0,0,0,1,2,0,0,2,1,0,0,2,1,1,1, +2,1,1,1,1,2,1,2,1,1,1,2,2,1,1,2,1,1,1,2,1,1,1,1,1,1,1,1,1,1,0,1, +1,2,3,0,0,0,1,0,3,2,1,0,0,1,2,1,1,0,0,0,0,2,1,0,1,1,0,0,2,1,2,1, +1,1,0,0,0,1,0,1,1,1,1,1,2,0,0,1,0,0,0,2,0,0,1,1,1,1,1,1,1,1,0,1, +3,0,0,2,1,2,2,1,0,0,2,1,2,2,0,0,0,2,1,1,1,0,1,1,0,0,1,1,2,0,0,0, +1,2,1,2,2,1,1,2,1,2,0,1,1,1,1,1,1,1,1,1,2,1,1,0,0,1,1,1,1,0,0,1, +1,3,2,0,0,0,1,0,2,2,2,0,0,0,2,2,1,0,0,0,0,3,1,1,1,1,0,0,2,1,1,1, +2,1,0,1,1,1,0,1,1,1,1,1,1,1,0,2,1,0,0,1,0,1,1,0,1,1,1,1,1,1,0,1, +2,3,2,0,0,0,1,0,2,2,0,0,0,0,2,1,1,0,0,0,0,2,1,0,1,1,0,0,2,1,1,0, +2,1,1,1,1,2,1,2,1,2,0,1,1,1,0,2,1,1,1,2,1,1,1,1,0,1,1,1,1,1,0,1, +3,1,1,2,2,2,3,2,1,1,2,2,1,1,0,1,0,2,2,1,1,1,1,1,0,0,1,1,0,1,1,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,2,2,0,0,0,0,0,2,2,0,0,0,0,2,2,1,0,0,0,1,1,0,0,1,2,0,0,2,1,1,1, +2,2,1,1,1,2,1,2,1,1,0,1,1,1,1,2,1,1,1,2,1,1,1,1,0,1,2,1,1,1,0,1, +1,0,0,1,2,3,2,1,0,0,2,0,1,1,0,0,0,1,1,1,1,0,1,1,0,0,1,0,0,0,0,0, +1,2,1,2,1,2,1,1,1,2,0,2,1,1,1,0,1,2,0,0,1,1,1,0,0,0,0,0,0,0,0,0, +2,3,2,0,0,0,0,0,1,1,2,1,0,0,1,1,1,0,0,0,0,2,0,0,1,1,0,0,2,1,1,1, +2,1,1,1,1,1,1,2,1,0,1,1,1,1,0,2,1,1,1,1,1,1,0,1,0,1,1,1,1,1,0,1, +1,2,2,0,1,1,1,0,2,2,2,0,0,0,3,2,1,0,0,0,1,1,0,0,1,1,0,1,1,1,0,0, +1,1,0,1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,2,1,1,1,0,0,1,1,1,0,1,0,1, +2,1,0,2,1,1,2,2,1,1,2,1,1,1,0,0,0,1,1,0,1,1,1,1,0,0,1,1,1,0,0,0, +1,2,2,2,2,2,1,1,1,2,0,2,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,0,0,0,1,0, +1,2,3,0,0,0,1,0,2,2,0,0,0,0,2,2,0,0,0,0,0,1,0,0,1,0,0,0,2,0,1,0, +2,1,1,1,1,1,0,2,0,0,0,1,2,1,1,1,1,0,1,2,0,1,0,1,0,1,1,1,0,1,0,1, +2,2,2,0,0,0,1,0,2,1,2,0,0,0,1,1,2,0,0,0,0,1,0,0,1,1,0,0,2,1,0,1, +2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,2,0,1,1,1,1,1,0,1, +1,2,2,0,0,0,1,0,2,2,2,0,0,0,1,1,0,0,0,0,0,1,1,0,2,0,0,1,1,1,0,1, +1,0,1,1,1,1,1,1,0,1,1,1,1,0,0,1,0,0,1,1,0,1,0,1,1,1,1,1,0,0,0,1, +1,0,0,1,0,1,2,1,0,0,1,1,1,2,0,0,0,1,1,0,1,0,1,1,0,0,1,0,0,0,0,0, +0,2,1,2,1,1,1,1,1,2,0,2,0,1,1,0,1,2,1,0,1,1,1,0,0,0,0,0,0,1,0,0, +2,1,1,0,1,2,0,0,1,1,1,0,0,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,2,1,0,1, +2,2,1,1,1,1,1,2,1,1,0,1,1,1,1,2,1,1,1,2,1,1,0,1,0,1,1,1,1,1,0,1, +1,2,2,0,0,0,0,0,1,1,0,0,0,0,2,1,0,0,0,0,0,2,0,0,2,2,0,0,2,0,0,1, +2,1,1,1,1,1,1,1,0,1,1,0,1,1,0,1,0,0,0,1,1,1,1,0,0,1,1,1,1,0,0,1, +1,1,2,0,0,3,1,0,2,1,1,1,0,0,1,1,1,0,0,0,1,1,0,0,0,1,0,0,1,0,1,0, +1,2,1,0,1,1,1,2,1,1,0,1,1,1,1,1,0,0,0,1,1,1,1,1,0,1,0,0,0,1,0,0, +2,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,0,0,0,0,2,0,0,0, +2,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,2,1,1,0,0,1,1,1,1,1,0,1, +2,1,1,1,2,1,1,1,0,1,1,2,1,0,0,0,0,1,1,1,1,0,1,0,0,0,0,1,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +1,1,0,1,1,1,1,1,0,0,1,1,2,1,0,0,0,1,1,0,0,0,1,1,0,0,1,0,1,0,0,0, +1,2,1,1,1,1,1,1,1,1,0,1,0,1,1,1,1,1,1,0,1,1,1,0,0,0,0,0,0,1,0,0, +2,0,0,0,1,1,1,1,0,0,1,1,0,0,0,0,0,1,1,1,2,0,0,1,0,0,1,0,1,0,0,0, +0,1,1,1,1,1,1,1,1,2,0,1,1,1,1,0,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0,0, +1,0,0,1,1,1,1,1,0,0,2,1,0,1,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0, +0,1,1,1,1,1,1,0,1,1,0,1,0,1,1,0,1,1,0,0,1,1,1,0,0,0,0,0,0,0,0,0, +1,0,0,1,1,1,0,0,0,0,1,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0, +0,1,1,1,1,1,0,0,1,1,0,1,0,1,0,0,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0,0, +0,0,0,1,0,0,0,0,0,0,1,1,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,1,1,1,0,1,0,0,1,1,0,1,0,1,1,0,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0,0, +2,1,1,1,1,1,1,1,1,1,1,0,0,1,1,1,0,0,1,0,0,1,0,1,0,1,1,1,0,0,1,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +1,0,0,1,1,1,1,0,0,0,1,1,1,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0, +0,1,1,1,1,1,1,0,1,1,0,1,0,1,0,0,1,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0, +) + +Latin2HungarianModel = { + 'char_to_order_map': Latin2_HungarianCharToOrderMap, + 'precedence_matrix': HungarianLangModel, + 'typical_positive_ratio': 0.947368, + 'keep_english_letter': True, + 'charset_name': "ISO-8859-2", + 'language': 'Hungarian', +} + +Win1250HungarianModel = { + 'char_to_order_map': win1250HungarianCharToOrderMap, + 'precedence_matrix': HungarianLangModel, + 'typical_positive_ratio': 0.947368, + 'keep_english_letter': True, + 'charset_name': "windows-1250", + 'language': 'Hungarian', +} diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/langthaimodel.py b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/langthaimodel.py new file mode 100644 index 0000000..15f94c2 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/langthaimodel.py @@ -0,0 +1,199 @@ +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Communicator client code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +# 255: Control characters that usually does not exist in any text +# 254: Carriage/Return +# 253: symbol (punctuation) that does not belong to word +# 252: 0 - 9 + +# The following result for thai was collected from a limited sample (1M). + +# Character Mapping Table: +TIS620CharToOrderMap = ( +255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10 +253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20 +252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30 +253,182,106,107,100,183,184,185,101, 94,186,187,108,109,110,111, # 40 +188,189,190, 89, 95,112,113,191,192,193,194,253,253,253,253,253, # 50 +253, 64, 72, 73,114, 74,115,116,102, 81,201,117, 90,103, 78, 82, # 60 + 96,202, 91, 79, 84,104,105, 97, 98, 92,203,253,253,253,253,253, # 70 +209,210,211,212,213, 88,214,215,216,217,218,219,220,118,221,222, +223,224, 99, 85, 83,225,226,227,228,229,230,231,232,233,234,235, +236, 5, 30,237, 24,238, 75, 8, 26, 52, 34, 51,119, 47, 58, 57, + 49, 53, 55, 43, 20, 19, 44, 14, 48, 3, 17, 25, 39, 62, 31, 54, + 45, 9, 16, 2, 61, 15,239, 12, 42, 46, 18, 21, 76, 4, 66, 63, + 22, 10, 1, 36, 23, 13, 40, 27, 32, 35, 86,240,241,242,243,244, + 11, 28, 41, 29, 33,245, 50, 37, 6, 7, 67, 77, 38, 93,246,247, + 68, 56, 59, 65, 69, 60, 70, 80, 71, 87,248,249,250,251,252,253, +) + +# Model Table: +# total sequences: 100% +# first 512 sequences: 92.6386% +# first 1024 sequences:7.3177% +# rest sequences: 1.0230% +# negative sequences: 0.0436% +ThaiLangModel = ( +0,1,3,3,3,3,0,0,3,3,0,3,3,0,3,3,3,3,3,3,3,3,0,0,3,3,3,0,3,3,3,3, +0,3,3,0,0,0,1,3,0,3,3,2,3,3,0,1,2,3,3,3,3,0,2,0,2,0,0,3,2,1,2,2, +3,0,3,3,2,3,0,0,3,3,0,3,3,0,3,3,3,3,3,3,3,3,3,0,3,2,3,0,2,2,2,3, +0,2,3,0,0,0,0,1,0,1,2,3,1,1,3,2,2,0,1,1,0,0,1,0,0,0,0,0,0,0,1,1, +3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2,2,2,2,3,3,2,3,2,3,3,2,2,2, +3,1,2,3,0,3,3,2,2,1,2,3,3,1,2,0,1,3,0,1,0,0,1,0,0,0,0,0,0,0,1,1, +3,3,2,2,3,3,3,3,1,2,3,3,3,3,3,2,2,2,2,3,3,2,2,3,3,2,2,3,2,3,2,2, +3,3,1,2,3,1,2,2,3,3,1,0,2,1,0,0,3,1,2,1,0,0,1,0,0,0,0,0,0,1,0,1, +3,3,3,3,3,3,2,2,3,3,3,3,2,3,2,2,3,3,2,2,3,2,2,2,2,1,1,3,1,2,1,1, +3,2,1,0,2,1,0,1,0,1,1,0,1,1,0,0,1,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0, +3,3,3,2,3,2,3,3,2,2,3,2,3,3,2,3,1,1,2,3,2,2,2,3,2,2,2,2,2,1,2,1, +2,2,1,1,3,3,2,1,0,1,2,2,0,1,3,0,0,0,1,1,0,0,0,0,0,2,3,0,0,2,1,1, +3,3,2,3,3,2,0,0,3,3,0,3,3,0,2,2,3,1,2,2,1,1,1,0,2,2,2,0,2,2,1,1, +0,2,1,0,2,0,0,2,0,1,0,0,1,0,0,0,1,1,1,1,0,0,0,0,0,0,0,0,0,0,1,0, +3,3,2,3,3,2,0,0,3,3,0,2,3,0,2,1,2,2,2,2,1,2,0,0,2,2,2,0,2,2,1,1, +0,2,1,0,2,0,0,2,0,1,1,0,1,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0, +3,3,2,3,2,3,2,0,2,2,1,3,2,1,3,2,1,2,3,2,2,3,0,2,3,2,2,1,2,2,2,2, +1,2,2,0,0,0,0,2,0,1,2,0,1,1,1,0,1,0,3,1,1,0,0,0,0,0,0,0,0,0,1,0, +3,3,2,3,3,2,3,2,2,2,3,2,2,3,2,2,1,2,3,2,2,3,1,3,2,2,2,3,2,2,2,3, +3,2,1,3,0,1,1,1,0,2,1,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,0,0,0,2,0,0, +1,0,0,3,0,3,3,3,3,3,0,0,3,0,2,2,3,3,3,3,3,0,0,0,1,1,3,0,0,0,0,2, +0,0,1,0,0,0,0,0,0,0,2,3,0,0,0,3,0,2,0,0,0,0,0,3,0,0,0,0,0,0,0,0, +2,0,3,3,3,3,0,0,2,3,0,0,3,0,3,3,2,3,3,3,3,3,0,0,3,3,3,0,0,0,3,3, +0,0,3,0,0,0,0,2,0,0,2,1,1,3,0,0,1,0,0,2,3,0,1,0,0,0,0,0,0,0,1,0, +3,3,3,3,2,3,3,3,3,3,3,3,1,2,1,3,3,2,2,1,2,2,2,3,1,1,2,0,2,1,2,1, +2,2,1,0,0,0,1,1,0,1,0,1,1,0,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0, +3,0,2,1,2,3,3,3,0,2,0,2,2,0,2,1,3,2,2,1,2,1,0,0,2,2,1,0,2,1,2,2, +0,1,1,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,2,1,3,3,1,1,3,0,2,3,1,1,3,2,1,1,2,0,2,2,3,2,1,1,1,1,1,2, +3,0,0,1,3,1,2,1,2,0,3,0,0,0,1,0,3,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0, +3,3,1,1,3,2,3,3,3,1,3,2,1,3,2,1,3,2,2,2,2,1,3,3,1,2,1,3,1,2,3,0, +2,1,1,3,2,2,2,1,2,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2, +3,3,2,3,2,3,3,2,3,2,3,2,3,3,2,1,0,3,2,2,2,1,2,2,2,1,2,2,1,2,1,1, +2,2,2,3,0,1,3,1,1,1,1,0,1,1,0,2,1,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,2,3,2,2,1,1,3,2,3,2,3,2,0,3,2,2,1,2,0,2,2,2,1,2,2,2,2,1, +3,2,1,2,2,1,0,2,0,1,0,0,1,1,0,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,0,1, +3,3,3,3,3,2,3,1,2,3,3,2,2,3,0,1,1,2,0,3,3,2,2,3,0,1,1,3,0,0,0,0, +3,1,0,3,3,0,2,0,2,1,0,0,3,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,2,3,2,3,3,0,1,3,1,1,2,1,2,1,1,3,1,1,0,2,3,1,1,1,1,1,1,1,1, +3,1,1,2,2,2,2,1,1,1,0,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1, +3,2,2,1,1,2,1,3,3,2,3,2,2,3,2,2,3,1,2,2,1,2,0,3,2,1,2,2,2,2,2,1, +3,2,1,2,2,2,1,1,1,1,0,0,1,1,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,3,3,3,1,3,3,0,2,1,0,3,2,0,0,3,1,0,1,1,0,1,0,0,0,0,0,1, +1,0,0,1,0,3,2,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,0,2,2,2,3,0,0,1,3,0,3,2,0,3,2,2,3,3,3,3,3,1,0,2,2,2,0,2,2,1,2, +0,2,3,0,0,0,0,1,0,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1, +3,0,2,3,1,3,3,2,3,3,0,3,3,0,3,2,2,3,2,3,3,3,0,0,2,2,3,0,1,1,1,3, +0,0,3,0,0,0,2,2,0,1,3,0,1,2,2,2,3,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1, +3,2,3,3,2,0,3,3,2,2,3,1,3,2,1,3,2,0,1,2,2,0,2,3,2,1,0,3,0,0,0,0, +3,0,0,2,3,1,3,0,0,3,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,1,3,2,2,2,1,2,0,1,3,1,1,3,1,3,0,0,2,1,1,1,1,2,1,1,1,0,2,1,0,1, +1,2,0,0,0,3,1,1,0,0,0,0,1,0,1,0,0,1,0,1,0,0,0,0,0,3,1,0,0,0,1,0, +3,3,3,3,2,2,2,2,2,1,3,1,1,1,2,0,1,1,2,1,2,1,3,2,0,0,3,1,1,1,1,1, +3,1,0,2,3,0,0,0,3,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,2,3,0,3,3,0,2,0,0,0,0,0,0,0,3,0,0,1,0,0,0,0,0,0,0,0,0,0,0, +0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,2,3,1,3,0,0,1,2,0,0,2,0,3,3,2,3,3,3,2,3,0,0,2,2,2,0,0,0,2,2, +0,0,1,0,0,0,0,3,0,0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0, +0,0,0,3,0,2,0,0,0,0,0,0,0,0,0,0,1,2,3,1,3,3,0,0,1,0,3,0,0,0,0,0, +0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,1,2,3,1,2,3,1,0,3,0,2,2,1,0,2,1,1,2,0,1,0,0,1,1,1,1,0,1,0,0, +1,0,0,0,0,1,1,0,3,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,2,1,0,1,1,1,3,1,2,2,2,2,2,2,1,1,1,1,0,3,1,0,1,3,1,1,1,1, +1,1,0,2,0,1,3,1,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,2,0,1, +3,0,2,2,1,3,3,2,3,3,0,1,1,0,2,2,1,2,1,3,3,1,0,0,3,2,0,0,0,0,2,1, +0,1,0,0,0,0,1,2,0,1,1,3,1,1,2,2,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0, +0,0,3,0,0,1,0,0,0,3,0,0,3,0,3,1,0,1,1,1,3,2,0,0,0,3,0,0,0,0,2,0, +0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,2,0,0,0,0,0,0,0,0,0, +3,3,1,3,2,1,3,3,1,2,2,0,1,2,1,0,1,2,0,0,0,0,0,3,0,0,0,3,0,0,0,0, +3,0,0,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,0,1,2,0,3,3,3,2,2,0,1,1,0,1,3,0,0,0,2,2,0,0,0,0,3,1,0,1,0,0,0, +0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,0,2,3,1,2,0,0,2,1,0,3,1,0,1,2,0,1,1,1,1,3,0,0,3,1,1,0,2,2,1,1, +0,2,0,0,0,0,0,1,0,1,0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,0,0,3,1,2,0,0,2,2,0,1,2,0,1,0,1,3,1,2,1,0,0,0,2,0,3,0,0,0,1,0, +0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,0,1,1,2,2,0,0,0,2,0,2,1,0,1,1,0,1,1,1,2,1,0,0,1,1,1,0,2,1,1,1, +0,1,1,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,1, +0,0,0,2,0,1,3,1,1,1,1,0,0,0,0,3,2,0,1,0,0,0,1,2,0,0,0,1,0,0,0,0, +0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,3,3,3,3,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +1,0,2,3,2,2,0,0,0,1,0,0,0,0,2,3,2,1,2,2,3,0,0,0,2,3,1,0,0,0,1,1, +0,0,1,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0, +3,3,2,2,0,1,0,0,0,0,2,0,2,0,1,0,0,0,1,1,0,0,0,2,1,0,1,0,1,1,0,0, +0,1,0,2,0,0,1,0,3,0,1,0,0,0,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,1,0,0,1,0,0,0,0,0,1,1,2,0,0,0,0,1,0,0,1,3,1,0,0,0,0,1,1,0,0, +0,1,0,0,0,0,3,0,0,0,0,0,0,3,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0, +3,3,1,1,1,1,2,3,0,0,2,1,1,1,1,1,0,2,1,1,0,0,0,2,1,0,1,2,1,1,0,1, +2,1,0,3,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +1,3,1,0,0,0,0,0,0,0,3,0,0,0,3,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1, +0,0,0,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,2,0,0,0,0,0,0,1,2,1,0,1,1,0,2,0,0,1,0,0,2,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,2,0,0,0,1,3,0,1,0,0,0,2,0,0,0,0,0,0,0,1,2,0,0,0,0,0, +3,3,0,0,1,1,2,0,0,1,2,1,0,1,1,1,0,1,1,0,0,2,1,1,0,1,0,0,1,1,1,0, +0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,2,2,1,0,0,0,0,1,0,0,0,0,3,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0, +2,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,3,0,0,1,1,0,0,0,2,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +1,1,0,1,2,0,1,2,0,0,1,1,0,2,0,1,0,0,1,0,0,0,0,1,0,0,0,2,0,0,0,0, +1,0,0,1,0,1,1,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,1,0,0,0,0,0,0,0,1,1,0,1,1,0,2,1,3,0,0,0,0,1,1,0,0,0,0,0,0,0,3, +1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,0,1,0,1,0,0,2,0,0,2,0,0,1,1,2,0,0,1,1,0,0,0,1,0,0,0,1,1,0,0,0, +1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0, +1,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,1,1,0,0,0, +2,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,0,0,0,0,2,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,3,0,0,0, +2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,1,0,0,0,0, +1,0,0,0,0,0,0,0,0,1,0,0,0,0,2,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,1,1,0,0,2,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +) + +TIS620ThaiModel = { + 'char_to_order_map': TIS620CharToOrderMap, + 'precedence_matrix': ThaiLangModel, + 'typical_positive_ratio': 0.926386, + 'keep_english_letter': False, + 'charset_name': "TIS-620", + 'language': 'Thai', +} diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/langturkishmodel.py b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/langturkishmodel.py new file mode 100644 index 0000000..a427a45 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/langturkishmodel.py @@ -0,0 +1,193 @@ +# -*- coding: utf-8 -*- +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Communicator client code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# Özgür Baskın - Turkish Language Model +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +# 255: Control characters that usually does not exist in any text +# 254: Carriage/Return +# 253: symbol (punctuation) that does not belong to word +# 252: 0 - 9 + +# Character Mapping Table: +Latin5_TurkishCharToOrderMap = ( +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, +255, 23, 37, 47, 39, 29, 52, 36, 45, 53, 60, 16, 49, 20, 46, 42, + 48, 69, 44, 35, 31, 51, 38, 62, 65, 43, 56,255,255,255,255,255, +255, 1, 21, 28, 12, 2, 18, 27, 25, 3, 24, 10, 5, 13, 4, 15, + 26, 64, 7, 8, 9, 14, 32, 57, 58, 11, 22,255,255,255,255,255, +180,179,178,177,176,175,174,173,172,171,170,169,168,167,166,165, +164,163,162,161,160,159,101,158,157,156,155,154,153,152,151,106, +150,149,148,147,146,145,144,100,143,142,141,140,139,138,137,136, + 94, 80, 93,135,105,134,133, 63,132,131,130,129,128,127,126,125, +124,104, 73, 99, 79, 85,123, 54,122, 98, 92,121,120, 91,103,119, + 68,118,117, 97,116,115, 50, 90,114,113,112,111, 55, 41, 40, 86, + 89, 70, 59, 78, 71, 82, 88, 33, 77, 66, 84, 83,110, 75, 61, 96, + 30, 67,109, 74, 87,102, 34, 95, 81,108, 76, 72, 17, 6, 19,107, +) + +TurkishLangModel = ( +3,2,3,3,3,1,3,3,3,3,3,3,3,3,2,1,1,3,3,1,3,3,0,3,3,3,3,3,0,3,1,3, +3,2,1,0,0,1,1,0,0,0,1,0,0,1,1,1,1,0,0,0,0,0,0,0,2,2,0,0,1,0,0,1, +3,2,2,3,3,0,3,3,3,3,3,3,3,2,3,1,0,3,3,1,3,3,0,3,3,3,3,3,0,3,0,3, +3,1,1,0,1,0,1,0,0,0,0,0,0,1,1,1,1,0,0,0,0,0,0,0,2,2,0,0,0,1,0,1, +3,3,2,3,3,0,3,3,3,3,3,3,3,2,3,1,1,3,3,0,3,3,1,2,3,3,3,3,0,3,0,3, +3,1,1,0,0,0,1,0,0,0,0,1,1,0,1,2,1,0,0,0,1,0,0,0,0,2,0,0,0,0,0,1, +3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,1,3,3,2,0,3,2,1,2,2,1,3,3,0,0,0,2, +2,2,0,1,0,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,1,0,1,0,0,1, +3,3,3,2,3,3,1,2,3,3,3,3,3,3,3,1,3,2,1,0,3,2,0,1,2,3,3,2,1,0,0,2, +2,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,2,0,2,0,0,0, +1,0,1,3,3,1,3,3,3,3,3,3,3,1,2,0,0,2,3,0,2,3,0,0,2,2,2,3,0,3,0,1, +2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,0,3,2,0,2,3,2,3,3,1,0,0,2, +3,2,0,0,1,0,0,0,0,0,0,2,0,0,1,0,0,0,0,0,0,0,0,0,1,1,1,0,2,0,0,1, +3,3,3,2,3,3,2,3,3,3,3,2,3,3,3,0,3,3,0,0,2,1,0,0,2,3,2,2,0,0,0,2, +2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,1,0,1,0,2,0,0,1, +3,3,3,2,3,3,3,3,3,3,3,2,3,3,3,0,3,2,0,1,3,2,1,1,3,2,3,2,1,0,0,2, +2,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0, +3,3,3,2,3,3,3,3,3,3,3,2,3,3,3,0,3,2,2,0,2,3,0,0,2,2,2,2,0,0,0,2, +3,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,2,0,1,0,0,0, +3,3,3,3,3,3,3,2,2,2,2,3,2,3,3,0,3,3,1,1,2,2,0,0,2,2,3,2,0,0,1,3, +0,3,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1, +3,3,3,2,3,3,3,2,1,2,2,3,2,3,3,0,3,2,0,0,1,1,0,1,1,2,1,2,0,0,0,1, +0,3,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,0,0, +3,3,3,2,3,3,2,3,2,2,2,3,3,3,3,1,3,1,1,0,3,2,1,1,3,3,2,3,1,0,0,1, +1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,2,0,0,1, +3,2,2,3,3,0,3,3,3,3,3,3,3,2,2,1,0,3,3,1,3,3,0,1,3,3,2,3,0,3,0,3, +2,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0, +2,2,2,3,3,0,3,3,3,3,3,3,3,3,3,0,0,3,2,0,3,3,0,3,2,3,3,3,0,3,1,3, +2,0,0,0,0,0,0,0,0,0,0,1,0,1,2,0,1,0,0,0,0,0,0,0,2,2,0,0,1,0,0,1, +3,3,3,1,2,3,3,1,0,0,1,0,0,3,3,2,3,0,0,2,0,0,2,0,2,0,0,0,2,0,2,0, +0,3,1,0,1,0,0,0,2,2,1,0,1,1,2,1,2,2,2,0,2,1,1,0,0,0,2,0,0,0,0,0, +1,2,1,3,3,0,3,3,3,3,3,2,3,0,0,0,0,2,3,0,2,3,1,0,2,3,1,3,0,3,0,2, +3,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,1,3,3,2,2,3,2,2,0,1,2,3,0,1,2,1,0,1,0,0,0,1,0,2,2,0,0,0,1, +1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,1,0,0,1,0,0,0, +3,3,3,1,3,3,1,1,3,3,1,1,3,3,1,0,2,1,2,0,2,1,0,0,1,1,2,1,0,0,0,2, +2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,1,0,2,1,3,0,0,2,0,0,3,3,0,3,0,0,1,0,1,2,0,0,1,1,2,2,0,1,0, +0,1,2,1,1,0,1,0,1,1,1,1,1,0,1,1,1,2,2,1,2,0,1,0,0,0,0,0,0,1,0,0, +3,3,3,2,3,2,3,3,0,2,2,2,3,3,3,0,3,0,0,0,2,2,0,1,2,1,1,1,0,0,0,1, +0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0, +3,3,3,3,3,3,2,1,2,2,3,3,3,3,2,0,2,0,0,0,2,2,0,0,2,1,3,3,0,0,1,1, +1,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0, +1,1,2,3,3,0,3,3,3,3,3,3,2,2,0,2,0,2,3,2,3,2,2,2,2,2,2,2,1,3,2,3, +2,0,2,1,2,2,2,2,1,1,2,2,1,2,2,1,2,0,0,2,1,1,0,2,1,0,0,1,0,0,0,1, +2,3,3,1,1,1,0,1,1,1,2,3,2,1,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0, +0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,2,2,2,3,2,3,2,2,1,3,3,3,0,2,1,2,0,2,1,0,0,1,1,1,1,1,0,0,1, +2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,2,0,1,0,0,0, +3,3,3,2,3,3,3,3,3,2,3,1,2,3,3,1,2,0,0,0,0,0,0,0,3,2,1,1,0,0,0,0, +2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0, +3,3,3,2,2,3,3,2,1,1,1,1,1,3,3,0,3,1,0,0,1,1,0,0,3,1,2,1,0,0,0,0, +0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0, +3,3,3,2,2,3,2,2,2,3,2,1,1,3,3,0,3,0,0,0,0,1,0,0,3,1,1,2,0,0,0,1, +1,0,0,1,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1, +1,1,1,3,3,0,3,3,3,3,3,2,2,2,1,2,0,2,1,2,2,1,1,0,1,2,2,2,2,2,2,2, +0,0,2,1,2,1,2,1,0,1,1,3,1,2,1,1,2,0,0,2,0,1,0,1,0,1,0,0,0,1,0,1, +3,3,3,1,3,3,3,0,1,1,0,2,2,3,1,0,3,0,0,0,1,0,0,0,1,0,0,1,0,1,0,0, +1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,2,0,0,2,2,1,0,0,1,0,0,3,3,1,3,0,0,1,1,0,2,0,3,0,0,0,2,0,1,1, +0,1,2,0,1,2,2,0,2,2,2,2,1,0,2,1,1,0,2,0,2,1,2,0,0,0,0,0,0,0,0,0, +3,3,3,1,3,2,3,2,0,2,2,2,1,3,2,0,2,1,2,0,1,2,0,0,1,0,2,2,0,0,0,2, +1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,1,0,0,0, +3,3,3,0,3,3,1,1,2,3,1,0,3,2,3,0,3,0,0,0,1,0,0,0,1,0,1,0,0,0,0,0, +1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,3,3,0,3,3,2,3,3,2,2,0,0,0,0,1,2,0,1,3,0,0,0,3,1,1,0,3,0,2, +2,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,1,2,2,1,0,3,1,1,1,1,3,3,2,3,0,0,1,0,1,2,0,2,2,0,2,2,0,2,1, +0,2,2,1,1,1,1,0,2,1,1,0,1,1,1,1,2,1,2,1,2,0,1,0,1,0,0,0,0,0,0,0, +3,3,3,0,1,1,3,0,0,1,1,0,0,2,2,0,3,0,0,1,1,0,1,0,0,0,0,0,2,0,0,0, +0,3,1,0,1,0,1,0,2,0,0,1,0,1,0,1,1,1,2,1,1,0,2,0,0,0,0,0,0,0,0,0, +3,3,3,0,2,0,2,0,1,1,1,0,0,3,3,0,2,0,0,1,0,0,2,1,1,0,1,0,1,0,1,0, +0,2,0,1,2,0,2,0,2,1,1,0,1,0,2,1,1,0,2,1,1,0,1,0,0,0,1,1,0,0,0,0, +3,2,3,0,1,0,0,0,0,0,0,0,0,1,2,0,1,0,0,1,0,0,1,0,0,0,0,0,2,0,0,0, +0,0,1,1,0,0,1,0,1,0,0,1,0,0,0,2,1,0,1,0,2,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,0,0,2,3,0,0,1,0,1,0,2,3,2,3,0,0,1,3,0,2,1,0,0,0,0,2,0,1,0, +0,2,1,0,0,1,1,0,2,1,0,0,1,0,0,1,1,0,1,1,2,0,1,0,0,0,0,1,0,0,0,0, +3,2,2,0,0,1,1,0,0,0,0,0,0,3,1,1,1,0,0,0,0,0,1,0,0,0,0,0,2,0,1,0, +0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0, +0,0,0,3,3,0,2,3,2,2,1,2,2,1,1,2,0,1,3,2,2,2,0,0,2,2,0,0,0,1,2,1, +3,0,2,1,1,0,1,1,1,0,1,2,2,2,1,1,2,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0, +0,1,1,2,3,0,3,3,3,2,2,2,2,1,0,1,0,1,0,1,2,2,0,0,2,2,1,3,1,1,2,1, +0,0,1,1,2,0,1,1,0,0,1,2,0,2,1,1,2,0,0,1,0,0,0,1,0,1,0,1,0,0,0,0, +3,3,2,0,0,3,1,0,0,0,0,0,0,3,2,1,2,0,0,1,0,0,2,0,0,0,0,0,2,0,1,0, +0,2,1,1,0,0,1,0,1,2,0,0,1,1,0,0,2,1,1,1,1,0,2,0,0,0,0,0,0,0,0,0, +3,3,2,0,0,1,0,0,0,0,1,0,0,3,3,2,2,0,0,1,0,0,2,0,1,0,0,0,2,0,1,0, +0,0,1,1,0,0,2,0,2,1,0,0,1,1,2,1,2,0,2,1,2,1,1,1,0,0,1,1,0,0,0,0, +3,3,2,0,0,2,2,0,0,0,1,1,0,2,2,1,3,1,0,1,0,1,2,0,0,0,0,0,1,0,1,0, +0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,1,1,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,2,0,0,0,1,0,0,1,0,0,2,3,1,2,0,0,1,0,0,2,0,0,0,1,0,2,0,2,0, +0,1,1,2,2,1,2,0,2,1,1,0,0,1,1,0,1,1,1,1,2,1,1,0,0,0,0,0,0,0,0,0, +3,3,3,0,2,1,2,1,0,0,1,1,0,3,3,1,2,0,0,1,0,0,2,0,2,0,1,1,2,0,0,0, +0,0,1,1,1,1,2,0,1,1,0,1,1,1,1,0,0,0,1,1,1,0,1,0,0,0,1,0,0,0,0,0, +3,3,3,0,2,2,3,2,0,0,1,0,0,2,3,1,0,0,0,0,0,0,2,0,2,0,0,0,2,0,0,0, +0,1,1,0,0,0,1,0,0,1,0,1,1,0,1,0,1,1,1,0,1,0,0,0,0,0,0,0,0,0,0,0, +3,2,3,0,0,0,0,0,0,0,1,0,0,2,2,2,2,0,0,1,0,0,2,0,0,0,0,0,2,0,1,0, +0,0,2,1,1,0,1,0,2,1,1,0,0,1,1,2,1,0,2,0,2,0,1,0,0,0,2,0,0,0,0,0, +0,0,0,2,2,0,2,1,1,1,1,2,2,0,0,1,0,1,0,0,1,3,0,0,0,0,1,0,0,2,1,0, +0,0,1,0,1,0,0,0,0,0,2,1,0,1,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0, +2,0,0,2,3,0,2,3,1,2,2,0,2,0,0,2,0,2,1,1,1,2,1,0,0,1,2,1,1,2,1,0, +1,0,2,0,1,0,1,1,0,0,2,2,1,2,1,1,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,0,2,1,2,0,0,0,1,0,0,3,2,0,1,0,0,1,0,0,2,0,0,0,1,2,1,0,1,0, +0,0,0,0,1,0,1,0,0,1,0,0,0,0,1,0,1,0,1,1,1,0,1,0,0,0,0,0,0,0,0,0, +0,0,0,2,2,0,2,2,1,1,0,1,1,1,1,1,0,0,1,2,1,1,1,0,1,0,0,0,1,1,1,1, +0,0,2,1,0,1,1,1,0,1,1,2,1,2,1,1,2,0,1,1,2,1,0,2,0,0,0,0,0,0,0,0, +3,2,2,0,0,2,0,0,0,0,0,0,0,2,2,0,2,0,0,1,0,0,2,0,0,0,0,0,2,0,0,0, +0,2,1,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0, +0,0,0,3,2,0,2,2,0,1,1,0,1,0,0,1,0,0,0,1,0,1,0,0,0,0,0,1,0,0,0,0, +2,0,1,0,1,0,1,1,0,0,1,2,0,1,0,1,1,0,0,1,0,1,0,2,0,0,0,0,0,0,0,0, +2,2,2,0,1,1,0,0,0,1,0,0,0,1,2,0,1,0,0,1,0,0,1,0,0,0,0,1,2,0,1,0, +0,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,1,0,2,0,0,0,0,0,0,0,0,0,0,0, +2,2,2,2,1,0,1,1,1,0,0,0,0,1,2,0,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0, +1,1,2,0,1,0,0,0,1,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,2,0,0,0,0,0,1, +0,0,1,2,2,0,2,1,2,1,1,2,2,0,0,0,0,1,0,0,1,1,0,0,2,0,0,0,0,1,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0, +2,2,2,0,0,0,1,0,0,0,0,0,0,2,2,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0, +0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,1,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,2,2,0,1,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,1,0,0,0,0,0,0,0,0,0,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +) + +Latin5TurkishModel = { + 'char_to_order_map': Latin5_TurkishCharToOrderMap, + 'precedence_matrix': TurkishLangModel, + 'typical_positive_ratio': 0.970290, + 'keep_english_letter': True, + 'charset_name': "ISO-8859-9", + 'language': 'Turkish', +} diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/latin1prober.py b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/latin1prober.py new file mode 100644 index 0000000..7d1e8c2 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/latin1prober.py @@ -0,0 +1,145 @@ +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Universal charset detector code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 2001 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# Shy Shalom - original C code +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +from .charsetprober import CharSetProber +from .enums import ProbingState + +FREQ_CAT_NUM = 4 + +UDF = 0 # undefined +OTH = 1 # other +ASC = 2 # ascii capital letter +ASS = 3 # ascii small letter +ACV = 4 # accent capital vowel +ACO = 5 # accent capital other +ASV = 6 # accent small vowel +ASO = 7 # accent small other +CLASS_NUM = 8 # total classes + +Latin1_CharToClass = ( + OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 00 - 07 + OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 08 - 0F + OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 10 - 17 + OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 18 - 1F + OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 20 - 27 + OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 28 - 2F + OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 30 - 37 + OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 38 - 3F + OTH, ASC, ASC, ASC, ASC, ASC, ASC, ASC, # 40 - 47 + ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, # 48 - 4F + ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, # 50 - 57 + ASC, ASC, ASC, OTH, OTH, OTH, OTH, OTH, # 58 - 5F + OTH, ASS, ASS, ASS, ASS, ASS, ASS, ASS, # 60 - 67 + ASS, ASS, ASS, ASS, ASS, ASS, ASS, ASS, # 68 - 6F + ASS, ASS, ASS, ASS, ASS, ASS, ASS, ASS, # 70 - 77 + ASS, ASS, ASS, OTH, OTH, OTH, OTH, OTH, # 78 - 7F + OTH, UDF, OTH, ASO, OTH, OTH, OTH, OTH, # 80 - 87 + OTH, OTH, ACO, OTH, ACO, UDF, ACO, UDF, # 88 - 8F + UDF, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 90 - 97 + OTH, OTH, ASO, OTH, ASO, UDF, ASO, ACO, # 98 - 9F + OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # A0 - A7 + OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # A8 - AF + OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # B0 - B7 + OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # B8 - BF + ACV, ACV, ACV, ACV, ACV, ACV, ACO, ACO, # C0 - C7 + ACV, ACV, ACV, ACV, ACV, ACV, ACV, ACV, # C8 - CF + ACO, ACO, ACV, ACV, ACV, ACV, ACV, OTH, # D0 - D7 + ACV, ACV, ACV, ACV, ACV, ACO, ACO, ACO, # D8 - DF + ASV, ASV, ASV, ASV, ASV, ASV, ASO, ASO, # E0 - E7 + ASV, ASV, ASV, ASV, ASV, ASV, ASV, ASV, # E8 - EF + ASO, ASO, ASV, ASV, ASV, ASV, ASV, OTH, # F0 - F7 + ASV, ASV, ASV, ASV, ASV, ASO, ASO, ASO, # F8 - FF +) + +# 0 : illegal +# 1 : very unlikely +# 2 : normal +# 3 : very likely +Latin1ClassModel = ( +# UDF OTH ASC ASS ACV ACO ASV ASO + 0, 0, 0, 0, 0, 0, 0, 0, # UDF + 0, 3, 3, 3, 3, 3, 3, 3, # OTH + 0, 3, 3, 3, 3, 3, 3, 3, # ASC + 0, 3, 3, 3, 1, 1, 3, 3, # ASS + 0, 3, 3, 3, 1, 2, 1, 2, # ACV + 0, 3, 3, 3, 3, 3, 3, 3, # ACO + 0, 3, 1, 3, 1, 1, 1, 3, # ASV + 0, 3, 1, 3, 1, 1, 3, 3, # ASO +) + + +class Latin1Prober(CharSetProber): + def __init__(self): + super(Latin1Prober, self).__init__() + self._last_char_class = None + self._freq_counter = None + self.reset() + + def reset(self): + self._last_char_class = OTH + self._freq_counter = [0] * FREQ_CAT_NUM + CharSetProber.reset(self) + + @property + def charset_name(self): + return "ISO-8859-1" + + @property + def language(self): + return "" + + def feed(self, byte_str): + byte_str = self.filter_with_english_letters(byte_str) + for c in byte_str: + char_class = Latin1_CharToClass[c] + freq = Latin1ClassModel[(self._last_char_class * CLASS_NUM) + + char_class] + if freq == 0: + self._state = ProbingState.NOT_ME + break + self._freq_counter[freq] += 1 + self._last_char_class = char_class + + return self.state + + def get_confidence(self): + if self.state == ProbingState.NOT_ME: + return 0.01 + + total = sum(self._freq_counter) + if total < 0.01: + confidence = 0.0 + else: + confidence = ((self._freq_counter[3] - self._freq_counter[1] * 20.0) + / total) + if confidence < 0.0: + confidence = 0.0 + # lower the confidence of latin1 so that other more accurate + # detector can take priority. + confidence = confidence * 0.73 + return confidence diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/mbcharsetprober.py b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/mbcharsetprober.py new file mode 100644 index 0000000..6256ecf --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/mbcharsetprober.py @@ -0,0 +1,91 @@ +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Universal charset detector code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 2001 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# Shy Shalom - original C code +# Proofpoint, Inc. +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +from .charsetprober import CharSetProber +from .enums import ProbingState, MachineState + + +class MultiByteCharSetProber(CharSetProber): + """ + MultiByteCharSetProber + """ + + def __init__(self, lang_filter=None): + super(MultiByteCharSetProber, self).__init__(lang_filter=lang_filter) + self.distribution_analyzer = None + self.coding_sm = None + self._last_char = [0, 0] + + def reset(self): + super(MultiByteCharSetProber, self).reset() + if self.coding_sm: + self.coding_sm.reset() + if self.distribution_analyzer: + self.distribution_analyzer.reset() + self._last_char = [0, 0] + + @property + def charset_name(self): + raise NotImplementedError + + @property + def language(self): + raise NotImplementedError + + def feed(self, byte_str): + for i in range(len(byte_str)): + coding_state = self.coding_sm.next_state(byte_str[i]) + if coding_state == MachineState.ERROR: + self.logger.debug('%s %s prober hit error at byte %s', + self.charset_name, self.language, i) + self._state = ProbingState.NOT_ME + break + elif coding_state == MachineState.ITS_ME: + self._state = ProbingState.FOUND_IT + break + elif coding_state == MachineState.START: + char_len = self.coding_sm.get_current_charlen() + if i == 0: + self._last_char[1] = byte_str[0] + self.distribution_analyzer.feed(self._last_char, char_len) + else: + self.distribution_analyzer.feed(byte_str[i - 1:i + 1], + char_len) + + self._last_char[0] = byte_str[-1] + + if self.state == ProbingState.DETECTING: + if (self.distribution_analyzer.got_enough_data() and + (self.get_confidence() > self.SHORTCUT_THRESHOLD)): + self._state = ProbingState.FOUND_IT + + return self.state + + def get_confidence(self): + return self.distribution_analyzer.get_confidence() diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/mbcsgroupprober.py b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/mbcsgroupprober.py new file mode 100644 index 0000000..530abe7 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/mbcsgroupprober.py @@ -0,0 +1,54 @@ +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Universal charset detector code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 2001 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# Shy Shalom - original C code +# Proofpoint, Inc. +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +from .charsetgroupprober import CharSetGroupProber +from .utf8prober import UTF8Prober +from .sjisprober import SJISProber +from .eucjpprober import EUCJPProber +from .gb2312prober import GB2312Prober +from .euckrprober import EUCKRProber +from .cp949prober import CP949Prober +from .big5prober import Big5Prober +from .euctwprober import EUCTWProber + + +class MBCSGroupProber(CharSetGroupProber): + def __init__(self, lang_filter=None): + super(MBCSGroupProber, self).__init__(lang_filter=lang_filter) + self.probers = [ + UTF8Prober(), + SJISProber(), + EUCJPProber(), + GB2312Prober(), + EUCKRProber(), + CP949Prober(), + Big5Prober(), + EUCTWProber() + ] + self.reset() diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/mbcssm.py b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/mbcssm.py new file mode 100644 index 0000000..8360d0f --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/mbcssm.py @@ -0,0 +1,572 @@ +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is mozilla.org code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +from .enums import MachineState + +# BIG5 + +BIG5_CLS = ( + 1,1,1,1,1,1,1,1, # 00 - 07 #allow 0x00 as legal value + 1,1,1,1,1,1,0,0, # 08 - 0f + 1,1,1,1,1,1,1,1, # 10 - 17 + 1,1,1,0,1,1,1,1, # 18 - 1f + 1,1,1,1,1,1,1,1, # 20 - 27 + 1,1,1,1,1,1,1,1, # 28 - 2f + 1,1,1,1,1,1,1,1, # 30 - 37 + 1,1,1,1,1,1,1,1, # 38 - 3f + 2,2,2,2,2,2,2,2, # 40 - 47 + 2,2,2,2,2,2,2,2, # 48 - 4f + 2,2,2,2,2,2,2,2, # 50 - 57 + 2,2,2,2,2,2,2,2, # 58 - 5f + 2,2,2,2,2,2,2,2, # 60 - 67 + 2,2,2,2,2,2,2,2, # 68 - 6f + 2,2,2,2,2,2,2,2, # 70 - 77 + 2,2,2,2,2,2,2,1, # 78 - 7f + 4,4,4,4,4,4,4,4, # 80 - 87 + 4,4,4,4,4,4,4,4, # 88 - 8f + 4,4,4,4,4,4,4,4, # 90 - 97 + 4,4,4,4,4,4,4,4, # 98 - 9f + 4,3,3,3,3,3,3,3, # a0 - a7 + 3,3,3,3,3,3,3,3, # a8 - af + 3,3,3,3,3,3,3,3, # b0 - b7 + 3,3,3,3,3,3,3,3, # b8 - bf + 3,3,3,3,3,3,3,3, # c0 - c7 + 3,3,3,3,3,3,3,3, # c8 - cf + 3,3,3,3,3,3,3,3, # d0 - d7 + 3,3,3,3,3,3,3,3, # d8 - df + 3,3,3,3,3,3,3,3, # e0 - e7 + 3,3,3,3,3,3,3,3, # e8 - ef + 3,3,3,3,3,3,3,3, # f0 - f7 + 3,3,3,3,3,3,3,0 # f8 - ff +) + +BIG5_ST = ( + MachineState.ERROR,MachineState.START,MachineState.START, 3,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#00-07 + MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,#08-0f + MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START#10-17 +) + +BIG5_CHAR_LEN_TABLE = (0, 1, 1, 2, 0) + +BIG5_SM_MODEL = {'class_table': BIG5_CLS, + 'class_factor': 5, + 'state_table': BIG5_ST, + 'char_len_table': BIG5_CHAR_LEN_TABLE, + 'name': 'Big5'} + +# CP949 + +CP949_CLS = ( + 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,0,0, # 00 - 0f + 1,1,1,1,1,1,1,1, 1,1,1,0,1,1,1,1, # 10 - 1f + 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, # 20 - 2f + 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, # 30 - 3f + 1,4,4,4,4,4,4,4, 4,4,4,4,4,4,4,4, # 40 - 4f + 4,4,5,5,5,5,5,5, 5,5,5,1,1,1,1,1, # 50 - 5f + 1,5,5,5,5,5,5,5, 5,5,5,5,5,5,5,5, # 60 - 6f + 5,5,5,5,5,5,5,5, 5,5,5,1,1,1,1,1, # 70 - 7f + 0,6,6,6,6,6,6,6, 6,6,6,6,6,6,6,6, # 80 - 8f + 6,6,6,6,6,6,6,6, 6,6,6,6,6,6,6,6, # 90 - 9f + 6,7,7,7,7,7,7,7, 7,7,7,7,7,8,8,8, # a0 - af + 7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7, # b0 - bf + 7,7,7,7,7,7,9,2, 2,3,2,2,2,2,2,2, # c0 - cf + 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, # d0 - df + 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, # e0 - ef + 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,0, # f0 - ff +) + +CP949_ST = ( +#cls= 0 1 2 3 4 5 6 7 8 9 # previous state = + MachineState.ERROR,MachineState.START, 3,MachineState.ERROR,MachineState.START,MachineState.START, 4, 5,MachineState.ERROR, 6, # MachineState.START + MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, # MachineState.ERROR + MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME, # MachineState.ITS_ME + MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START, # 3 + MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START, # 4 + MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START, # 5 + MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START, # 6 +) + +CP949_CHAR_LEN_TABLE = (0, 1, 2, 0, 1, 1, 2, 2, 0, 2) + +CP949_SM_MODEL = {'class_table': CP949_CLS, + 'class_factor': 10, + 'state_table': CP949_ST, + 'char_len_table': CP949_CHAR_LEN_TABLE, + 'name': 'CP949'} + +# EUC-JP + +EUCJP_CLS = ( + 4,4,4,4,4,4,4,4, # 00 - 07 + 4,4,4,4,4,4,5,5, # 08 - 0f + 4,4,4,4,4,4,4,4, # 10 - 17 + 4,4,4,5,4,4,4,4, # 18 - 1f + 4,4,4,4,4,4,4,4, # 20 - 27 + 4,4,4,4,4,4,4,4, # 28 - 2f + 4,4,4,4,4,4,4,4, # 30 - 37 + 4,4,4,4,4,4,4,4, # 38 - 3f + 4,4,4,4,4,4,4,4, # 40 - 47 + 4,4,4,4,4,4,4,4, # 48 - 4f + 4,4,4,4,4,4,4,4, # 50 - 57 + 4,4,4,4,4,4,4,4, # 58 - 5f + 4,4,4,4,4,4,4,4, # 60 - 67 + 4,4,4,4,4,4,4,4, # 68 - 6f + 4,4,4,4,4,4,4,4, # 70 - 77 + 4,4,4,4,4,4,4,4, # 78 - 7f + 5,5,5,5,5,5,5,5, # 80 - 87 + 5,5,5,5,5,5,1,3, # 88 - 8f + 5,5,5,5,5,5,5,5, # 90 - 97 + 5,5,5,5,5,5,5,5, # 98 - 9f + 5,2,2,2,2,2,2,2, # a0 - a7 + 2,2,2,2,2,2,2,2, # a8 - af + 2,2,2,2,2,2,2,2, # b0 - b7 + 2,2,2,2,2,2,2,2, # b8 - bf + 2,2,2,2,2,2,2,2, # c0 - c7 + 2,2,2,2,2,2,2,2, # c8 - cf + 2,2,2,2,2,2,2,2, # d0 - d7 + 2,2,2,2,2,2,2,2, # d8 - df + 0,0,0,0,0,0,0,0, # e0 - e7 + 0,0,0,0,0,0,0,0, # e8 - ef + 0,0,0,0,0,0,0,0, # f0 - f7 + 0,0,0,0,0,0,0,5 # f8 - ff +) + +EUCJP_ST = ( + 3, 4, 3, 5,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#00-07 + MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,#08-0f + MachineState.ITS_ME,MachineState.ITS_ME,MachineState.START,MachineState.ERROR,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#10-17 + MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 3,MachineState.ERROR,#18-1f + 3,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START#20-27 +) + +EUCJP_CHAR_LEN_TABLE = (2, 2, 2, 3, 1, 0) + +EUCJP_SM_MODEL = {'class_table': EUCJP_CLS, + 'class_factor': 6, + 'state_table': EUCJP_ST, + 'char_len_table': EUCJP_CHAR_LEN_TABLE, + 'name': 'EUC-JP'} + +# EUC-KR + +EUCKR_CLS = ( + 1,1,1,1,1,1,1,1, # 00 - 07 + 1,1,1,1,1,1,0,0, # 08 - 0f + 1,1,1,1,1,1,1,1, # 10 - 17 + 1,1,1,0,1,1,1,1, # 18 - 1f + 1,1,1,1,1,1,1,1, # 20 - 27 + 1,1,1,1,1,1,1,1, # 28 - 2f + 1,1,1,1,1,1,1,1, # 30 - 37 + 1,1,1,1,1,1,1,1, # 38 - 3f + 1,1,1,1,1,1,1,1, # 40 - 47 + 1,1,1,1,1,1,1,1, # 48 - 4f + 1,1,1,1,1,1,1,1, # 50 - 57 + 1,1,1,1,1,1,1,1, # 58 - 5f + 1,1,1,1,1,1,1,1, # 60 - 67 + 1,1,1,1,1,1,1,1, # 68 - 6f + 1,1,1,1,1,1,1,1, # 70 - 77 + 1,1,1,1,1,1,1,1, # 78 - 7f + 0,0,0,0,0,0,0,0, # 80 - 87 + 0,0,0,0,0,0,0,0, # 88 - 8f + 0,0,0,0,0,0,0,0, # 90 - 97 + 0,0,0,0,0,0,0,0, # 98 - 9f + 0,2,2,2,2,2,2,2, # a0 - a7 + 2,2,2,2,2,3,3,3, # a8 - af + 2,2,2,2,2,2,2,2, # b0 - b7 + 2,2,2,2,2,2,2,2, # b8 - bf + 2,2,2,2,2,2,2,2, # c0 - c7 + 2,3,2,2,2,2,2,2, # c8 - cf + 2,2,2,2,2,2,2,2, # d0 - d7 + 2,2,2,2,2,2,2,2, # d8 - df + 2,2,2,2,2,2,2,2, # e0 - e7 + 2,2,2,2,2,2,2,2, # e8 - ef + 2,2,2,2,2,2,2,2, # f0 - f7 + 2,2,2,2,2,2,2,0 # f8 - ff +) + +EUCKR_ST = ( + MachineState.ERROR,MachineState.START, 3,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#00-07 + MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START #08-0f +) + +EUCKR_CHAR_LEN_TABLE = (0, 1, 2, 0) + +EUCKR_SM_MODEL = {'class_table': EUCKR_CLS, + 'class_factor': 4, + 'state_table': EUCKR_ST, + 'char_len_table': EUCKR_CHAR_LEN_TABLE, + 'name': 'EUC-KR'} + +# EUC-TW + +EUCTW_CLS = ( + 2,2,2,2,2,2,2,2, # 00 - 07 + 2,2,2,2,2,2,0,0, # 08 - 0f + 2,2,2,2,2,2,2,2, # 10 - 17 + 2,2,2,0,2,2,2,2, # 18 - 1f + 2,2,2,2,2,2,2,2, # 20 - 27 + 2,2,2,2,2,2,2,2, # 28 - 2f + 2,2,2,2,2,2,2,2, # 30 - 37 + 2,2,2,2,2,2,2,2, # 38 - 3f + 2,2,2,2,2,2,2,2, # 40 - 47 + 2,2,2,2,2,2,2,2, # 48 - 4f + 2,2,2,2,2,2,2,2, # 50 - 57 + 2,2,2,2,2,2,2,2, # 58 - 5f + 2,2,2,2,2,2,2,2, # 60 - 67 + 2,2,2,2,2,2,2,2, # 68 - 6f + 2,2,2,2,2,2,2,2, # 70 - 77 + 2,2,2,2,2,2,2,2, # 78 - 7f + 0,0,0,0,0,0,0,0, # 80 - 87 + 0,0,0,0,0,0,6,0, # 88 - 8f + 0,0,0,0,0,0,0,0, # 90 - 97 + 0,0,0,0,0,0,0,0, # 98 - 9f + 0,3,4,4,4,4,4,4, # a0 - a7 + 5,5,1,1,1,1,1,1, # a8 - af + 1,1,1,1,1,1,1,1, # b0 - b7 + 1,1,1,1,1,1,1,1, # b8 - bf + 1,1,3,1,3,3,3,3, # c0 - c7 + 3,3,3,3,3,3,3,3, # c8 - cf + 3,3,3,3,3,3,3,3, # d0 - d7 + 3,3,3,3,3,3,3,3, # d8 - df + 3,3,3,3,3,3,3,3, # e0 - e7 + 3,3,3,3,3,3,3,3, # e8 - ef + 3,3,3,3,3,3,3,3, # f0 - f7 + 3,3,3,3,3,3,3,0 # f8 - ff +) + +EUCTW_ST = ( + MachineState.ERROR,MachineState.ERROR,MachineState.START, 3, 3, 3, 4,MachineState.ERROR,#00-07 + MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,#08-0f + MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,MachineState.START,MachineState.ERROR,#10-17 + MachineState.START,MachineState.START,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#18-1f + 5,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.ERROR,MachineState.START,MachineState.START,#20-27 + MachineState.START,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START #28-2f +) + +EUCTW_CHAR_LEN_TABLE = (0, 0, 1, 2, 2, 2, 3) + +EUCTW_SM_MODEL = {'class_table': EUCTW_CLS, + 'class_factor': 7, + 'state_table': EUCTW_ST, + 'char_len_table': EUCTW_CHAR_LEN_TABLE, + 'name': 'x-euc-tw'} + +# GB2312 + +GB2312_CLS = ( + 1,1,1,1,1,1,1,1, # 00 - 07 + 1,1,1,1,1,1,0,0, # 08 - 0f + 1,1,1,1,1,1,1,1, # 10 - 17 + 1,1,1,0,1,1,1,1, # 18 - 1f + 1,1,1,1,1,1,1,1, # 20 - 27 + 1,1,1,1,1,1,1,1, # 28 - 2f + 3,3,3,3,3,3,3,3, # 30 - 37 + 3,3,1,1,1,1,1,1, # 38 - 3f + 2,2,2,2,2,2,2,2, # 40 - 47 + 2,2,2,2,2,2,2,2, # 48 - 4f + 2,2,2,2,2,2,2,2, # 50 - 57 + 2,2,2,2,2,2,2,2, # 58 - 5f + 2,2,2,2,2,2,2,2, # 60 - 67 + 2,2,2,2,2,2,2,2, # 68 - 6f + 2,2,2,2,2,2,2,2, # 70 - 77 + 2,2,2,2,2,2,2,4, # 78 - 7f + 5,6,6,6,6,6,6,6, # 80 - 87 + 6,6,6,6,6,6,6,6, # 88 - 8f + 6,6,6,6,6,6,6,6, # 90 - 97 + 6,6,6,6,6,6,6,6, # 98 - 9f + 6,6,6,6,6,6,6,6, # a0 - a7 + 6,6,6,6,6,6,6,6, # a8 - af + 6,6,6,6,6,6,6,6, # b0 - b7 + 6,6,6,6,6,6,6,6, # b8 - bf + 6,6,6,6,6,6,6,6, # c0 - c7 + 6,6,6,6,6,6,6,6, # c8 - cf + 6,6,6,6,6,6,6,6, # d0 - d7 + 6,6,6,6,6,6,6,6, # d8 - df + 6,6,6,6,6,6,6,6, # e0 - e7 + 6,6,6,6,6,6,6,6, # e8 - ef + 6,6,6,6,6,6,6,6, # f0 - f7 + 6,6,6,6,6,6,6,0 # f8 - ff +) + +GB2312_ST = ( + MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START, 3,MachineState.ERROR,#00-07 + MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,#08-0f + MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.START,#10-17 + 4,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#18-1f + MachineState.ERROR,MachineState.ERROR, 5,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ERROR,#20-27 + MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START #28-2f +) + +# To be accurate, the length of class 6 can be either 2 or 4. +# But it is not necessary to discriminate between the two since +# it is used for frequency analysis only, and we are validating +# each code range there as well. So it is safe to set it to be +# 2 here. +GB2312_CHAR_LEN_TABLE = (0, 1, 1, 1, 1, 1, 2) + +GB2312_SM_MODEL = {'class_table': GB2312_CLS, + 'class_factor': 7, + 'state_table': GB2312_ST, + 'char_len_table': GB2312_CHAR_LEN_TABLE, + 'name': 'GB2312'} + +# Shift_JIS + +SJIS_CLS = ( + 1,1,1,1,1,1,1,1, # 00 - 07 + 1,1,1,1,1,1,0,0, # 08 - 0f + 1,1,1,1,1,1,1,1, # 10 - 17 + 1,1,1,0,1,1,1,1, # 18 - 1f + 1,1,1,1,1,1,1,1, # 20 - 27 + 1,1,1,1,1,1,1,1, # 28 - 2f + 1,1,1,1,1,1,1,1, # 30 - 37 + 1,1,1,1,1,1,1,1, # 38 - 3f + 2,2,2,2,2,2,2,2, # 40 - 47 + 2,2,2,2,2,2,2,2, # 48 - 4f + 2,2,2,2,2,2,2,2, # 50 - 57 + 2,2,2,2,2,2,2,2, # 58 - 5f + 2,2,2,2,2,2,2,2, # 60 - 67 + 2,2,2,2,2,2,2,2, # 68 - 6f + 2,2,2,2,2,2,2,2, # 70 - 77 + 2,2,2,2,2,2,2,1, # 78 - 7f + 3,3,3,3,3,2,2,3, # 80 - 87 + 3,3,3,3,3,3,3,3, # 88 - 8f + 3,3,3,3,3,3,3,3, # 90 - 97 + 3,3,3,3,3,3,3,3, # 98 - 9f + #0xa0 is illegal in sjis encoding, but some pages does + #contain such byte. We need to be more error forgiven. + 2,2,2,2,2,2,2,2, # a0 - a7 + 2,2,2,2,2,2,2,2, # a8 - af + 2,2,2,2,2,2,2,2, # b0 - b7 + 2,2,2,2,2,2,2,2, # b8 - bf + 2,2,2,2,2,2,2,2, # c0 - c7 + 2,2,2,2,2,2,2,2, # c8 - cf + 2,2,2,2,2,2,2,2, # d0 - d7 + 2,2,2,2,2,2,2,2, # d8 - df + 3,3,3,3,3,3,3,3, # e0 - e7 + 3,3,3,3,3,4,4,4, # e8 - ef + 3,3,3,3,3,3,3,3, # f0 - f7 + 3,3,3,3,3,0,0,0) # f8 - ff + + +SJIS_ST = ( + MachineState.ERROR,MachineState.START,MachineState.START, 3,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#00-07 + MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,#08-0f + MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START #10-17 +) + +SJIS_CHAR_LEN_TABLE = (0, 1, 1, 2, 0, 0) + +SJIS_SM_MODEL = {'class_table': SJIS_CLS, + 'class_factor': 6, + 'state_table': SJIS_ST, + 'char_len_table': SJIS_CHAR_LEN_TABLE, + 'name': 'Shift_JIS'} + +# UCS2-BE + +UCS2BE_CLS = ( + 0,0,0,0,0,0,0,0, # 00 - 07 + 0,0,1,0,0,2,0,0, # 08 - 0f + 0,0,0,0,0,0,0,0, # 10 - 17 + 0,0,0,3,0,0,0,0, # 18 - 1f + 0,0,0,0,0,0,0,0, # 20 - 27 + 0,3,3,3,3,3,0,0, # 28 - 2f + 0,0,0,0,0,0,0,0, # 30 - 37 + 0,0,0,0,0,0,0,0, # 38 - 3f + 0,0,0,0,0,0,0,0, # 40 - 47 + 0,0,0,0,0,0,0,0, # 48 - 4f + 0,0,0,0,0,0,0,0, # 50 - 57 + 0,0,0,0,0,0,0,0, # 58 - 5f + 0,0,0,0,0,0,0,0, # 60 - 67 + 0,0,0,0,0,0,0,0, # 68 - 6f + 0,0,0,0,0,0,0,0, # 70 - 77 + 0,0,0,0,0,0,0,0, # 78 - 7f + 0,0,0,0,0,0,0,0, # 80 - 87 + 0,0,0,0,0,0,0,0, # 88 - 8f + 0,0,0,0,0,0,0,0, # 90 - 97 + 0,0,0,0,0,0,0,0, # 98 - 9f + 0,0,0,0,0,0,0,0, # a0 - a7 + 0,0,0,0,0,0,0,0, # a8 - af + 0,0,0,0,0,0,0,0, # b0 - b7 + 0,0,0,0,0,0,0,0, # b8 - bf + 0,0,0,0,0,0,0,0, # c0 - c7 + 0,0,0,0,0,0,0,0, # c8 - cf + 0,0,0,0,0,0,0,0, # d0 - d7 + 0,0,0,0,0,0,0,0, # d8 - df + 0,0,0,0,0,0,0,0, # e0 - e7 + 0,0,0,0,0,0,0,0, # e8 - ef + 0,0,0,0,0,0,0,0, # f0 - f7 + 0,0,0,0,0,0,4,5 # f8 - ff +) + +UCS2BE_ST = ( + 5, 7, 7,MachineState.ERROR, 4, 3,MachineState.ERROR,MachineState.ERROR,#00-07 + MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,#08-0f + MachineState.ITS_ME,MachineState.ITS_ME, 6, 6, 6, 6,MachineState.ERROR,MachineState.ERROR,#10-17 + 6, 6, 6, 6, 6,MachineState.ITS_ME, 6, 6,#18-1f + 6, 6, 6, 6, 5, 7, 7,MachineState.ERROR,#20-27 + 5, 8, 6, 6,MachineState.ERROR, 6, 6, 6,#28-2f + 6, 6, 6, 6,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START #30-37 +) + +UCS2BE_CHAR_LEN_TABLE = (2, 2, 2, 0, 2, 2) + +UCS2BE_SM_MODEL = {'class_table': UCS2BE_CLS, + 'class_factor': 6, + 'state_table': UCS2BE_ST, + 'char_len_table': UCS2BE_CHAR_LEN_TABLE, + 'name': 'UTF-16BE'} + +# UCS2-LE + +UCS2LE_CLS = ( + 0,0,0,0,0,0,0,0, # 00 - 07 + 0,0,1,0,0,2,0,0, # 08 - 0f + 0,0,0,0,0,0,0,0, # 10 - 17 + 0,0,0,3,0,0,0,0, # 18 - 1f + 0,0,0,0,0,0,0,0, # 20 - 27 + 0,3,3,3,3,3,0,0, # 28 - 2f + 0,0,0,0,0,0,0,0, # 30 - 37 + 0,0,0,0,0,0,0,0, # 38 - 3f + 0,0,0,0,0,0,0,0, # 40 - 47 + 0,0,0,0,0,0,0,0, # 48 - 4f + 0,0,0,0,0,0,0,0, # 50 - 57 + 0,0,0,0,0,0,0,0, # 58 - 5f + 0,0,0,0,0,0,0,0, # 60 - 67 + 0,0,0,0,0,0,0,0, # 68 - 6f + 0,0,0,0,0,0,0,0, # 70 - 77 + 0,0,0,0,0,0,0,0, # 78 - 7f + 0,0,0,0,0,0,0,0, # 80 - 87 + 0,0,0,0,0,0,0,0, # 88 - 8f + 0,0,0,0,0,0,0,0, # 90 - 97 + 0,0,0,0,0,0,0,0, # 98 - 9f + 0,0,0,0,0,0,0,0, # a0 - a7 + 0,0,0,0,0,0,0,0, # a8 - af + 0,0,0,0,0,0,0,0, # b0 - b7 + 0,0,0,0,0,0,0,0, # b8 - bf + 0,0,0,0,0,0,0,0, # c0 - c7 + 0,0,0,0,0,0,0,0, # c8 - cf + 0,0,0,0,0,0,0,0, # d0 - d7 + 0,0,0,0,0,0,0,0, # d8 - df + 0,0,0,0,0,0,0,0, # e0 - e7 + 0,0,0,0,0,0,0,0, # e8 - ef + 0,0,0,0,0,0,0,0, # f0 - f7 + 0,0,0,0,0,0,4,5 # f8 - ff +) + +UCS2LE_ST = ( + 6, 6, 7, 6, 4, 3,MachineState.ERROR,MachineState.ERROR,#00-07 + MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,#08-0f + MachineState.ITS_ME,MachineState.ITS_ME, 5, 5, 5,MachineState.ERROR,MachineState.ITS_ME,MachineState.ERROR,#10-17 + 5, 5, 5,MachineState.ERROR, 5,MachineState.ERROR, 6, 6,#18-1f + 7, 6, 8, 8, 5, 5, 5,MachineState.ERROR,#20-27 + 5, 5, 5,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 5, 5,#28-2f + 5, 5, 5,MachineState.ERROR, 5,MachineState.ERROR,MachineState.START,MachineState.START #30-37 +) + +UCS2LE_CHAR_LEN_TABLE = (2, 2, 2, 2, 2, 2) + +UCS2LE_SM_MODEL = {'class_table': UCS2LE_CLS, + 'class_factor': 6, + 'state_table': UCS2LE_ST, + 'char_len_table': UCS2LE_CHAR_LEN_TABLE, + 'name': 'UTF-16LE'} + +# UTF-8 + +UTF8_CLS = ( + 1,1,1,1,1,1,1,1, # 00 - 07 #allow 0x00 as a legal value + 1,1,1,1,1,1,0,0, # 08 - 0f + 1,1,1,1,1,1,1,1, # 10 - 17 + 1,1,1,0,1,1,1,1, # 18 - 1f + 1,1,1,1,1,1,1,1, # 20 - 27 + 1,1,1,1,1,1,1,1, # 28 - 2f + 1,1,1,1,1,1,1,1, # 30 - 37 + 1,1,1,1,1,1,1,1, # 38 - 3f + 1,1,1,1,1,1,1,1, # 40 - 47 + 1,1,1,1,1,1,1,1, # 48 - 4f + 1,1,1,1,1,1,1,1, # 50 - 57 + 1,1,1,1,1,1,1,1, # 58 - 5f + 1,1,1,1,1,1,1,1, # 60 - 67 + 1,1,1,1,1,1,1,1, # 68 - 6f + 1,1,1,1,1,1,1,1, # 70 - 77 + 1,1,1,1,1,1,1,1, # 78 - 7f + 2,2,2,2,3,3,3,3, # 80 - 87 + 4,4,4,4,4,4,4,4, # 88 - 8f + 4,4,4,4,4,4,4,4, # 90 - 97 + 4,4,4,4,4,4,4,4, # 98 - 9f + 5,5,5,5,5,5,5,5, # a0 - a7 + 5,5,5,5,5,5,5,5, # a8 - af + 5,5,5,5,5,5,5,5, # b0 - b7 + 5,5,5,5,5,5,5,5, # b8 - bf + 0,0,6,6,6,6,6,6, # c0 - c7 + 6,6,6,6,6,6,6,6, # c8 - cf + 6,6,6,6,6,6,6,6, # d0 - d7 + 6,6,6,6,6,6,6,6, # d8 - df + 7,8,8,8,8,8,8,8, # e0 - e7 + 8,8,8,8,8,9,8,8, # e8 - ef + 10,11,11,11,11,11,11,11, # f0 - f7 + 12,13,13,13,14,15,0,0 # f8 - ff +) + +UTF8_ST = ( + MachineState.ERROR,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 12, 10,#00-07 + 9, 11, 8, 7, 6, 5, 4, 3,#08-0f + MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#10-17 + MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#18-1f + MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,#20-27 + MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,#28-2f + MachineState.ERROR,MachineState.ERROR, 5, 5, 5, 5,MachineState.ERROR,MachineState.ERROR,#30-37 + MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#38-3f + MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 5, 5, 5,MachineState.ERROR,MachineState.ERROR,#40-47 + MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#48-4f + MachineState.ERROR,MachineState.ERROR, 7, 7, 7, 7,MachineState.ERROR,MachineState.ERROR,#50-57 + MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#58-5f + MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 7, 7,MachineState.ERROR,MachineState.ERROR,#60-67 + MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#68-6f + MachineState.ERROR,MachineState.ERROR, 9, 9, 9, 9,MachineState.ERROR,MachineState.ERROR,#70-77 + MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#78-7f + MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 9,MachineState.ERROR,MachineState.ERROR,#80-87 + MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#88-8f + MachineState.ERROR,MachineState.ERROR, 12, 12, 12, 12,MachineState.ERROR,MachineState.ERROR,#90-97 + MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#98-9f + MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 12,MachineState.ERROR,MachineState.ERROR,#a0-a7 + MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#a8-af + MachineState.ERROR,MachineState.ERROR, 12, 12, 12,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#b0-b7 + MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#b8-bf + MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.ERROR,MachineState.ERROR,#c0-c7 + MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR #c8-cf +) + +UTF8_CHAR_LEN_TABLE = (0, 1, 0, 0, 0, 0, 2, 3, 3, 3, 4, 4, 5, 5, 6, 6) + +UTF8_SM_MODEL = {'class_table': UTF8_CLS, + 'class_factor': 16, + 'state_table': UTF8_ST, + 'char_len_table': UTF8_CHAR_LEN_TABLE, + 'name': 'UTF-8'} diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/sbcharsetprober.py b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/sbcharsetprober.py new file mode 100644 index 0000000..0adb51d --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/sbcharsetprober.py @@ -0,0 +1,132 @@ +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Universal charset detector code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 2001 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# Shy Shalom - original C code +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +from .charsetprober import CharSetProber +from .enums import CharacterCategory, ProbingState, SequenceLikelihood + + +class SingleByteCharSetProber(CharSetProber): + SAMPLE_SIZE = 64 + SB_ENOUGH_REL_THRESHOLD = 1024 # 0.25 * SAMPLE_SIZE^2 + POSITIVE_SHORTCUT_THRESHOLD = 0.95 + NEGATIVE_SHORTCUT_THRESHOLD = 0.05 + + def __init__(self, model, reversed=False, name_prober=None): + super(SingleByteCharSetProber, self).__init__() + self._model = model + # TRUE if we need to reverse every pair in the model lookup + self._reversed = reversed + # Optional auxiliary prober for name decision + self._name_prober = name_prober + self._last_order = None + self._seq_counters = None + self._total_seqs = None + self._total_char = None + self._freq_char = None + self.reset() + + def reset(self): + super(SingleByteCharSetProber, self).reset() + # char order of last character + self._last_order = 255 + self._seq_counters = [0] * SequenceLikelihood.get_num_categories() + self._total_seqs = 0 + self._total_char = 0 + # characters that fall in our sampling range + self._freq_char = 0 + + @property + def charset_name(self): + if self._name_prober: + return self._name_prober.charset_name + else: + return self._model['charset_name'] + + @property + def language(self): + if self._name_prober: + return self._name_prober.language + else: + return self._model.get('language') + + def feed(self, byte_str): + if not self._model['keep_english_letter']: + byte_str = self.filter_international_words(byte_str) + if not byte_str: + return self.state + char_to_order_map = self._model['char_to_order_map'] + for i, c in enumerate(byte_str): + # XXX: Order is in range 1-64, so one would think we want 0-63 here, + # but that leads to 27 more test failures than before. + order = char_to_order_map[c] + # XXX: This was SYMBOL_CAT_ORDER before, with a value of 250, but + # CharacterCategory.SYMBOL is actually 253, so we use CONTROL + # to make it closer to the original intent. The only difference + # is whether or not we count digits and control characters for + # _total_char purposes. + if order < CharacterCategory.CONTROL: + self._total_char += 1 + if order < self.SAMPLE_SIZE: + self._freq_char += 1 + if self._last_order < self.SAMPLE_SIZE: + self._total_seqs += 1 + if not self._reversed: + i = (self._last_order * self.SAMPLE_SIZE) + order + model = self._model['precedence_matrix'][i] + else: # reverse the order of the letters in the lookup + i = (order * self.SAMPLE_SIZE) + self._last_order + model = self._model['precedence_matrix'][i] + self._seq_counters[model] += 1 + self._last_order = order + + charset_name = self._model['charset_name'] + if self.state == ProbingState.DETECTING: + if self._total_seqs > self.SB_ENOUGH_REL_THRESHOLD: + confidence = self.get_confidence() + if confidence > self.POSITIVE_SHORTCUT_THRESHOLD: + self.logger.debug('%s confidence = %s, we have a winner', + charset_name, confidence) + self._state = ProbingState.FOUND_IT + elif confidence < self.NEGATIVE_SHORTCUT_THRESHOLD: + self.logger.debug('%s confidence = %s, below negative ' + 'shortcut threshhold %s', charset_name, + confidence, + self.NEGATIVE_SHORTCUT_THRESHOLD) + self._state = ProbingState.NOT_ME + + return self.state + + def get_confidence(self): + r = 0.01 + if self._total_seqs > 0: + r = ((1.0 * self._seq_counters[SequenceLikelihood.POSITIVE]) / + self._total_seqs / self._model['typical_positive_ratio']) + r = r * self._freq_char / self._total_char + if r >= 1.0: + r = 0.99 + return r diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/sbcsgroupprober.py b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/sbcsgroupprober.py new file mode 100644 index 0000000..98e95dc --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/sbcsgroupprober.py @@ -0,0 +1,73 @@ +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Universal charset detector code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 2001 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# Shy Shalom - original C code +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +from .charsetgroupprober import CharSetGroupProber +from .sbcharsetprober import SingleByteCharSetProber +from .langcyrillicmodel import (Win1251CyrillicModel, Koi8rModel, + Latin5CyrillicModel, MacCyrillicModel, + Ibm866Model, Ibm855Model) +from .langgreekmodel import Latin7GreekModel, Win1253GreekModel +from .langbulgarianmodel import Latin5BulgarianModel, Win1251BulgarianModel +# from .langhungarianmodel import Latin2HungarianModel, Win1250HungarianModel +from .langthaimodel import TIS620ThaiModel +from .langhebrewmodel import Win1255HebrewModel +from .hebrewprober import HebrewProber +from .langturkishmodel import Latin5TurkishModel + + +class SBCSGroupProber(CharSetGroupProber): + def __init__(self): + super(SBCSGroupProber, self).__init__() + self.probers = [ + SingleByteCharSetProber(Win1251CyrillicModel), + SingleByteCharSetProber(Koi8rModel), + SingleByteCharSetProber(Latin5CyrillicModel), + SingleByteCharSetProber(MacCyrillicModel), + SingleByteCharSetProber(Ibm866Model), + SingleByteCharSetProber(Ibm855Model), + SingleByteCharSetProber(Latin7GreekModel), + SingleByteCharSetProber(Win1253GreekModel), + SingleByteCharSetProber(Latin5BulgarianModel), + SingleByteCharSetProber(Win1251BulgarianModel), + # TODO: Restore Hungarian encodings (iso-8859-2 and windows-1250) + # after we retrain model. + # SingleByteCharSetProber(Latin2HungarianModel), + # SingleByteCharSetProber(Win1250HungarianModel), + SingleByteCharSetProber(TIS620ThaiModel), + SingleByteCharSetProber(Latin5TurkishModel), + ] + hebrew_prober = HebrewProber() + logical_hebrew_prober = SingleByteCharSetProber(Win1255HebrewModel, + False, hebrew_prober) + visual_hebrew_prober = SingleByteCharSetProber(Win1255HebrewModel, True, + hebrew_prober) + hebrew_prober.set_model_probers(logical_hebrew_prober, visual_hebrew_prober) + self.probers.extend([hebrew_prober, logical_hebrew_prober, + visual_hebrew_prober]) + + self.reset() diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/sjisprober.py b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/sjisprober.py new file mode 100644 index 0000000..9e29623 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/sjisprober.py @@ -0,0 +1,92 @@ +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is mozilla.org code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +from .mbcharsetprober import MultiByteCharSetProber +from .codingstatemachine import CodingStateMachine +from .chardistribution import SJISDistributionAnalysis +from .jpcntx import SJISContextAnalysis +from .mbcssm import SJIS_SM_MODEL +from .enums import ProbingState, MachineState + + +class SJISProber(MultiByteCharSetProber): + def __init__(self): + super(SJISProber, self).__init__() + self.coding_sm = CodingStateMachine(SJIS_SM_MODEL) + self.distribution_analyzer = SJISDistributionAnalysis() + self.context_analyzer = SJISContextAnalysis() + self.reset() + + def reset(self): + super(SJISProber, self).reset() + self.context_analyzer.reset() + + @property + def charset_name(self): + return self.context_analyzer.charset_name + + @property + def language(self): + return "Japanese" + + def feed(self, byte_str): + for i in range(len(byte_str)): + coding_state = self.coding_sm.next_state(byte_str[i]) + if coding_state == MachineState.ERROR: + self.logger.debug('%s %s prober hit error at byte %s', + self.charset_name, self.language, i) + self._state = ProbingState.NOT_ME + break + elif coding_state == MachineState.ITS_ME: + self._state = ProbingState.FOUND_IT + break + elif coding_state == MachineState.START: + char_len = self.coding_sm.get_current_charlen() + if i == 0: + self._last_char[1] = byte_str[0] + self.context_analyzer.feed(self._last_char[2 - char_len:], + char_len) + self.distribution_analyzer.feed(self._last_char, char_len) + else: + self.context_analyzer.feed(byte_str[i + 1 - char_len:i + 3 + - char_len], char_len) + self.distribution_analyzer.feed(byte_str[i - 1:i + 1], + char_len) + + self._last_char[0] = byte_str[-1] + + if self.state == ProbingState.DETECTING: + if (self.context_analyzer.got_enough_data() and + (self.get_confidence() > self.SHORTCUT_THRESHOLD)): + self._state = ProbingState.FOUND_IT + + return self.state + + def get_confidence(self): + context_conf = self.context_analyzer.get_confidence() + distrib_conf = self.distribution_analyzer.get_confidence() + return max(context_conf, distrib_conf) diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/universaldetector.py b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/universaldetector.py new file mode 100644 index 0000000..7b4e92d --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/universaldetector.py @@ -0,0 +1,286 @@ +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Universal charset detector code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 2001 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# Shy Shalom - original C code +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### +""" +Module containing the UniversalDetector detector class, which is the primary +class a user of ``chardet`` should use. + +:author: Mark Pilgrim (initial port to Python) +:author: Shy Shalom (original C code) +:author: Dan Blanchard (major refactoring for 3.0) +:author: Ian Cordasco +""" + + +import codecs +import logging +import re + +from .charsetgroupprober import CharSetGroupProber +from .enums import InputState, LanguageFilter, ProbingState +from .escprober import EscCharSetProber +from .latin1prober import Latin1Prober +from .mbcsgroupprober import MBCSGroupProber +from .sbcsgroupprober import SBCSGroupProber + + +class UniversalDetector(object): + """ + The ``UniversalDetector`` class underlies the ``chardet.detect`` function + and coordinates all of the different charset probers. + + To get a ``dict`` containing an encoding and its confidence, you can simply + run: + + .. code:: + + u = UniversalDetector() + u.feed(some_bytes) + u.close() + detected = u.result + + """ + + MINIMUM_THRESHOLD = 0.20 + HIGH_BYTE_DETECTOR = re.compile(b'[\x80-\xFF]') + ESC_DETECTOR = re.compile(b'(\033|~{)') + WIN_BYTE_DETECTOR = re.compile(b'[\x80-\x9F]') + ISO_WIN_MAP = {'iso-8859-1': 'Windows-1252', + 'iso-8859-2': 'Windows-1250', + 'iso-8859-5': 'Windows-1251', + 'iso-8859-6': 'Windows-1256', + 'iso-8859-7': 'Windows-1253', + 'iso-8859-8': 'Windows-1255', + 'iso-8859-9': 'Windows-1254', + 'iso-8859-13': 'Windows-1257'} + + def __init__(self, lang_filter=LanguageFilter.ALL): + self._esc_charset_prober = None + self._charset_probers = [] + self.result = None + self.done = None + self._got_data = None + self._input_state = None + self._last_char = None + self.lang_filter = lang_filter + self.logger = logging.getLogger(__name__) + self._has_win_bytes = None + self.reset() + + def reset(self): + """ + Reset the UniversalDetector and all of its probers back to their + initial states. This is called by ``__init__``, so you only need to + call this directly in between analyses of different documents. + """ + self.result = {'encoding': None, 'confidence': 0.0, 'language': None} + self.done = False + self._got_data = False + self._has_win_bytes = False + self._input_state = InputState.PURE_ASCII + self._last_char = b'' + if self._esc_charset_prober: + self._esc_charset_prober.reset() + for prober in self._charset_probers: + prober.reset() + + def feed(self, byte_str): + """ + Takes a chunk of a document and feeds it through all of the relevant + charset probers. + + After calling ``feed``, you can check the value of the ``done`` + attribute to see if you need to continue feeding the + ``UniversalDetector`` more data, or if it has made a prediction + (in the ``result`` attribute). + + .. note:: + You should always call ``close`` when you're done feeding in your + document if ``done`` is not already ``True``. + """ + if self.done: + return + + if not len(byte_str): + return + + if not isinstance(byte_str, bytearray): + byte_str = bytearray(byte_str) + + # First check for known BOMs, since these are guaranteed to be correct + if not self._got_data: + # If the data starts with BOM, we know it is UTF + if byte_str.startswith(codecs.BOM_UTF8): + # EF BB BF UTF-8 with BOM + self.result = {'encoding': "UTF-8-SIG", + 'confidence': 1.0, + 'language': ''} + elif byte_str.startswith((codecs.BOM_UTF32_LE, + codecs.BOM_UTF32_BE)): + # FF FE 00 00 UTF-32, little-endian BOM + # 00 00 FE FF UTF-32, big-endian BOM + self.result = {'encoding': "UTF-32", + 'confidence': 1.0, + 'language': ''} + elif byte_str.startswith(b'\xFE\xFF\x00\x00'): + # FE FF 00 00 UCS-4, unusual octet order BOM (3412) + self.result = {'encoding': "X-ISO-10646-UCS-4-3412", + 'confidence': 1.0, + 'language': ''} + elif byte_str.startswith(b'\x00\x00\xFF\xFE'): + # 00 00 FF FE UCS-4, unusual octet order BOM (2143) + self.result = {'encoding': "X-ISO-10646-UCS-4-2143", + 'confidence': 1.0, + 'language': ''} + elif byte_str.startswith((codecs.BOM_LE, codecs.BOM_BE)): + # FF FE UTF-16, little endian BOM + # FE FF UTF-16, big endian BOM + self.result = {'encoding': "UTF-16", + 'confidence': 1.0, + 'language': ''} + + self._got_data = True + if self.result['encoding'] is not None: + self.done = True + return + + # If none of those matched and we've only see ASCII so far, check + # for high bytes and escape sequences + if self._input_state == InputState.PURE_ASCII: + if self.HIGH_BYTE_DETECTOR.search(byte_str): + self._input_state = InputState.HIGH_BYTE + elif self._input_state == InputState.PURE_ASCII and \ + self.ESC_DETECTOR.search(self._last_char + byte_str): + self._input_state = InputState.ESC_ASCII + + self._last_char = byte_str[-1:] + + # If we've seen escape sequences, use the EscCharSetProber, which + # uses a simple state machine to check for known escape sequences in + # HZ and ISO-2022 encodings, since those are the only encodings that + # use such sequences. + if self._input_state == InputState.ESC_ASCII: + if not self._esc_charset_prober: + self._esc_charset_prober = EscCharSetProber(self.lang_filter) + if self._esc_charset_prober.feed(byte_str) == ProbingState.FOUND_IT: + self.result = {'encoding': + self._esc_charset_prober.charset_name, + 'confidence': + self._esc_charset_prober.get_confidence(), + 'language': + self._esc_charset_prober.language} + self.done = True + # If we've seen high bytes (i.e., those with values greater than 127), + # we need to do more complicated checks using all our multi-byte and + # single-byte probers that are left. The single-byte probers + # use character bigram distributions to determine the encoding, whereas + # the multi-byte probers use a combination of character unigram and + # bigram distributions. + elif self._input_state == InputState.HIGH_BYTE: + if not self._charset_probers: + self._charset_probers = [MBCSGroupProber(self.lang_filter)] + # If we're checking non-CJK encodings, use single-byte prober + if self.lang_filter & LanguageFilter.NON_CJK: + self._charset_probers.append(SBCSGroupProber()) + self._charset_probers.append(Latin1Prober()) + for prober in self._charset_probers: + if prober.feed(byte_str) == ProbingState.FOUND_IT: + self.result = {'encoding': prober.charset_name, + 'confidence': prober.get_confidence(), + 'language': prober.language} + self.done = True + break + if self.WIN_BYTE_DETECTOR.search(byte_str): + self._has_win_bytes = True + + def close(self): + """ + Stop analyzing the current document and come up with a final + prediction. + + :returns: The ``result`` attribute, a ``dict`` with the keys + `encoding`, `confidence`, and `language`. + """ + # Don't bother with checks if we're already done + if self.done: + return self.result + self.done = True + + if not self._got_data: + self.logger.debug('no data received!') + + # Default to ASCII if it is all we've seen so far + elif self._input_state == InputState.PURE_ASCII: + self.result = {'encoding': 'ascii', + 'confidence': 1.0, + 'language': ''} + + # If we have seen non-ASCII, return the best that met MINIMUM_THRESHOLD + elif self._input_state == InputState.HIGH_BYTE: + prober_confidence = None + max_prober_confidence = 0.0 + max_prober = None + for prober in self._charset_probers: + if not prober: + continue + prober_confidence = prober.get_confidence() + if prober_confidence > max_prober_confidence: + max_prober_confidence = prober_confidence + max_prober = prober + if max_prober and (max_prober_confidence > self.MINIMUM_THRESHOLD): + charset_name = max_prober.charset_name + lower_charset_name = max_prober.charset_name.lower() + confidence = max_prober.get_confidence() + # Use Windows encoding name instead of ISO-8859 if we saw any + # extra Windows-specific bytes + if lower_charset_name.startswith('iso-8859'): + if self._has_win_bytes: + charset_name = self.ISO_WIN_MAP.get(lower_charset_name, + charset_name) + self.result = {'encoding': charset_name, + 'confidence': confidence, + 'language': max_prober.language} + + # Log all prober confidences if none met MINIMUM_THRESHOLD + if self.logger.getEffectiveLevel() == logging.DEBUG: + if self.result['encoding'] is None: + self.logger.debug('no probers hit minimum threshold') + for group_prober in self._charset_probers: + if not group_prober: + continue + if isinstance(group_prober, CharSetGroupProber): + for prober in group_prober.probers: + self.logger.debug('%s %s confidence = %s', + prober.charset_name, + prober.language, + prober.get_confidence()) + else: + self.logger.debug('%s %s confidence = %s', + prober.charset_name, + prober.language, + prober.get_confidence()) + return self.result diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/utf8prober.py b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/utf8prober.py new file mode 100644 index 0000000..6c3196c --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/utf8prober.py @@ -0,0 +1,82 @@ +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is mozilla.org code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +from .charsetprober import CharSetProber +from .enums import ProbingState, MachineState +from .codingstatemachine import CodingStateMachine +from .mbcssm import UTF8_SM_MODEL + + + +class UTF8Prober(CharSetProber): + ONE_CHAR_PROB = 0.5 + + def __init__(self): + super(UTF8Prober, self).__init__() + self.coding_sm = CodingStateMachine(UTF8_SM_MODEL) + self._num_mb_chars = None + self.reset() + + def reset(self): + super(UTF8Prober, self).reset() + self.coding_sm.reset() + self._num_mb_chars = 0 + + @property + def charset_name(self): + return "utf-8" + + @property + def language(self): + return "" + + def feed(self, byte_str): + for c in byte_str: + coding_state = self.coding_sm.next_state(c) + if coding_state == MachineState.ERROR: + self._state = ProbingState.NOT_ME + break + elif coding_state == MachineState.ITS_ME: + self._state = ProbingState.FOUND_IT + break + elif coding_state == MachineState.START: + if self.coding_sm.get_current_charlen() >= 2: + self._num_mb_chars += 1 + + if self.state == ProbingState.DETECTING: + if self.get_confidence() > self.SHORTCUT_THRESHOLD: + self._state = ProbingState.FOUND_IT + + return self.state + + def get_confidence(self): + unlike = 0.99 + if self._num_mb_chars < 6: + unlike *= self.ONE_CHAR_PROB ** self._num_mb_chars + return 1.0 - unlike + else: + return unlike diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/version.py b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/version.py new file mode 100644 index 0000000..bb2a34a --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_vendor/chardet/version.py @@ -0,0 +1,9 @@ +""" +This module exists only to simplify retrieving the version number of chardet +from within setup.py and from chardet subpackages. + +:author: Dan Blanchard (dan.blanchard@gmail.com) +""" + +__version__ = "3.0.4" +VERSION = __version__.split('.') diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/colorama/__init__.py b/venv.bak/lib/python3.7/site-packages/pip/_vendor/colorama/__init__.py new file mode 100644 index 0000000..34c263c --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_vendor/colorama/__init__.py @@ -0,0 +1,6 @@ +# Copyright Jonathan Hartley 2013. BSD 3-Clause license, see LICENSE file. +from .initialise import init, deinit, reinit, colorama_text +from .ansi import Fore, Back, Style, Cursor +from .ansitowin32 import AnsiToWin32 + +__version__ = '0.4.3' diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/colorama/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_vendor/colorama/__pycache__/__init__.cpython-37.pyc new file mode 100644 index 0000000..2cfdde0 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_vendor/colorama/__pycache__/__init__.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/colorama/__pycache__/ansi.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_vendor/colorama/__pycache__/ansi.cpython-37.pyc new file mode 100644 index 0000000..b9aa12d Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_vendor/colorama/__pycache__/ansi.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/colorama/__pycache__/ansitowin32.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_vendor/colorama/__pycache__/ansitowin32.cpython-37.pyc new file mode 100644 index 0000000..948602f Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_vendor/colorama/__pycache__/ansitowin32.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/colorama/__pycache__/initialise.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_vendor/colorama/__pycache__/initialise.cpython-37.pyc new file mode 100644 index 0000000..aa4a3b8 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_vendor/colorama/__pycache__/initialise.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/colorama/__pycache__/win32.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_vendor/colorama/__pycache__/win32.cpython-37.pyc new file mode 100644 index 0000000..c398990 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_vendor/colorama/__pycache__/win32.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/colorama/__pycache__/winterm.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_vendor/colorama/__pycache__/winterm.cpython-37.pyc new file mode 100644 index 0000000..12edab0 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_vendor/colorama/__pycache__/winterm.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/colorama/ansi.py b/venv.bak/lib/python3.7/site-packages/pip/_vendor/colorama/ansi.py new file mode 100644 index 0000000..7877658 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_vendor/colorama/ansi.py @@ -0,0 +1,102 @@ +# Copyright Jonathan Hartley 2013. BSD 3-Clause license, see LICENSE file. +''' +This module generates ANSI character codes to printing colors to terminals. +See: http://en.wikipedia.org/wiki/ANSI_escape_code +''' + +CSI = '\033[' +OSC = '\033]' +BEL = '\007' + + +def code_to_chars(code): + return CSI + str(code) + 'm' + +def set_title(title): + return OSC + '2;' + title + BEL + +def clear_screen(mode=2): + return CSI + str(mode) + 'J' + +def clear_line(mode=2): + return CSI + str(mode) + 'K' + + +class AnsiCodes(object): + def __init__(self): + # the subclasses declare class attributes which are numbers. + # Upon instantiation we define instance attributes, which are the same + # as the class attributes but wrapped with the ANSI escape sequence + for name in dir(self): + if not name.startswith('_'): + value = getattr(self, name) + setattr(self, name, code_to_chars(value)) + + +class AnsiCursor(object): + def UP(self, n=1): + return CSI + str(n) + 'A' + def DOWN(self, n=1): + return CSI + str(n) + 'B' + def FORWARD(self, n=1): + return CSI + str(n) + 'C' + def BACK(self, n=1): + return CSI + str(n) + 'D' + def POS(self, x=1, y=1): + return CSI + str(y) + ';' + str(x) + 'H' + + +class AnsiFore(AnsiCodes): + BLACK = 30 + RED = 31 + GREEN = 32 + YELLOW = 33 + BLUE = 34 + MAGENTA = 35 + CYAN = 36 + WHITE = 37 + RESET = 39 + + # These are fairly well supported, but not part of the standard. + LIGHTBLACK_EX = 90 + LIGHTRED_EX = 91 + LIGHTGREEN_EX = 92 + LIGHTYELLOW_EX = 93 + LIGHTBLUE_EX = 94 + LIGHTMAGENTA_EX = 95 + LIGHTCYAN_EX = 96 + LIGHTWHITE_EX = 97 + + +class AnsiBack(AnsiCodes): + BLACK = 40 + RED = 41 + GREEN = 42 + YELLOW = 43 + BLUE = 44 + MAGENTA = 45 + CYAN = 46 + WHITE = 47 + RESET = 49 + + # These are fairly well supported, but not part of the standard. + LIGHTBLACK_EX = 100 + LIGHTRED_EX = 101 + LIGHTGREEN_EX = 102 + LIGHTYELLOW_EX = 103 + LIGHTBLUE_EX = 104 + LIGHTMAGENTA_EX = 105 + LIGHTCYAN_EX = 106 + LIGHTWHITE_EX = 107 + + +class AnsiStyle(AnsiCodes): + BRIGHT = 1 + DIM = 2 + NORMAL = 22 + RESET_ALL = 0 + +Fore = AnsiFore() +Back = AnsiBack() +Style = AnsiStyle() +Cursor = AnsiCursor() diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/colorama/ansitowin32.py b/venv.bak/lib/python3.7/site-packages/pip/_vendor/colorama/ansitowin32.py new file mode 100644 index 0000000..359c92b --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_vendor/colorama/ansitowin32.py @@ -0,0 +1,257 @@ +# Copyright Jonathan Hartley 2013. BSD 3-Clause license, see LICENSE file. +import re +import sys +import os + +from .ansi import AnsiFore, AnsiBack, AnsiStyle, Style +from .winterm import WinTerm, WinColor, WinStyle +from .win32 import windll, winapi_test + + +winterm = None +if windll is not None: + winterm = WinTerm() + + +class StreamWrapper(object): + ''' + Wraps a stream (such as stdout), acting as a transparent proxy for all + attribute access apart from method 'write()', which is delegated to our + Converter instance. + ''' + def __init__(self, wrapped, converter): + # double-underscore everything to prevent clashes with names of + # attributes on the wrapped stream object. + self.__wrapped = wrapped + self.__convertor = converter + + def __getattr__(self, name): + return getattr(self.__wrapped, name) + + def __enter__(self, *args, **kwargs): + # special method lookup bypasses __getattr__/__getattribute__, see + # https://stackoverflow.com/questions/12632894/why-doesnt-getattr-work-with-exit + # thus, contextlib magic methods are not proxied via __getattr__ + return self.__wrapped.__enter__(*args, **kwargs) + + def __exit__(self, *args, **kwargs): + return self.__wrapped.__exit__(*args, **kwargs) + + def write(self, text): + self.__convertor.write(text) + + def isatty(self): + stream = self.__wrapped + if 'PYCHARM_HOSTED' in os.environ: + if stream is not None and (stream is sys.__stdout__ or stream is sys.__stderr__): + return True + try: + stream_isatty = stream.isatty + except AttributeError: + return False + else: + return stream_isatty() + + @property + def closed(self): + stream = self.__wrapped + try: + return stream.closed + except AttributeError: + return True + + +class AnsiToWin32(object): + ''' + Implements a 'write()' method which, on Windows, will strip ANSI character + sequences from the text, and if outputting to a tty, will convert them into + win32 function calls. + ''' + ANSI_CSI_RE = re.compile('\001?\033\\[((?:\\d|;)*)([a-zA-Z])\002?') # Control Sequence Introducer + ANSI_OSC_RE = re.compile('\001?\033\\]((?:.|;)*?)(\x07)\002?') # Operating System Command + + def __init__(self, wrapped, convert=None, strip=None, autoreset=False): + # The wrapped stream (normally sys.stdout or sys.stderr) + self.wrapped = wrapped + + # should we reset colors to defaults after every .write() + self.autoreset = autoreset + + # create the proxy wrapping our output stream + self.stream = StreamWrapper(wrapped, self) + + on_windows = os.name == 'nt' + # We test if the WinAPI works, because even if we are on Windows + # we may be using a terminal that doesn't support the WinAPI + # (e.g. Cygwin Terminal). In this case it's up to the terminal + # to support the ANSI codes. + conversion_supported = on_windows and winapi_test() + + # should we strip ANSI sequences from our output? + if strip is None: + strip = conversion_supported or (not self.stream.closed and not self.stream.isatty()) + self.strip = strip + + # should we should convert ANSI sequences into win32 calls? + if convert is None: + convert = conversion_supported and not self.stream.closed and self.stream.isatty() + self.convert = convert + + # dict of ansi codes to win32 functions and parameters + self.win32_calls = self.get_win32_calls() + + # are we wrapping stderr? + self.on_stderr = self.wrapped is sys.stderr + + def should_wrap(self): + ''' + True if this class is actually needed. If false, then the output + stream will not be affected, nor will win32 calls be issued, so + wrapping stdout is not actually required. This will generally be + False on non-Windows platforms, unless optional functionality like + autoreset has been requested using kwargs to init() + ''' + return self.convert or self.strip or self.autoreset + + def get_win32_calls(self): + if self.convert and winterm: + return { + AnsiStyle.RESET_ALL: (winterm.reset_all, ), + AnsiStyle.BRIGHT: (winterm.style, WinStyle.BRIGHT), + AnsiStyle.DIM: (winterm.style, WinStyle.NORMAL), + AnsiStyle.NORMAL: (winterm.style, WinStyle.NORMAL), + AnsiFore.BLACK: (winterm.fore, WinColor.BLACK), + AnsiFore.RED: (winterm.fore, WinColor.RED), + AnsiFore.GREEN: (winterm.fore, WinColor.GREEN), + AnsiFore.YELLOW: (winterm.fore, WinColor.YELLOW), + AnsiFore.BLUE: (winterm.fore, WinColor.BLUE), + AnsiFore.MAGENTA: (winterm.fore, WinColor.MAGENTA), + AnsiFore.CYAN: (winterm.fore, WinColor.CYAN), + AnsiFore.WHITE: (winterm.fore, WinColor.GREY), + AnsiFore.RESET: (winterm.fore, ), + AnsiFore.LIGHTBLACK_EX: (winterm.fore, WinColor.BLACK, True), + AnsiFore.LIGHTRED_EX: (winterm.fore, WinColor.RED, True), + AnsiFore.LIGHTGREEN_EX: (winterm.fore, WinColor.GREEN, True), + AnsiFore.LIGHTYELLOW_EX: (winterm.fore, WinColor.YELLOW, True), + AnsiFore.LIGHTBLUE_EX: (winterm.fore, WinColor.BLUE, True), + AnsiFore.LIGHTMAGENTA_EX: (winterm.fore, WinColor.MAGENTA, True), + AnsiFore.LIGHTCYAN_EX: (winterm.fore, WinColor.CYAN, True), + AnsiFore.LIGHTWHITE_EX: (winterm.fore, WinColor.GREY, True), + AnsiBack.BLACK: (winterm.back, WinColor.BLACK), + AnsiBack.RED: (winterm.back, WinColor.RED), + AnsiBack.GREEN: (winterm.back, WinColor.GREEN), + AnsiBack.YELLOW: (winterm.back, WinColor.YELLOW), + AnsiBack.BLUE: (winterm.back, WinColor.BLUE), + AnsiBack.MAGENTA: (winterm.back, WinColor.MAGENTA), + AnsiBack.CYAN: (winterm.back, WinColor.CYAN), + AnsiBack.WHITE: (winterm.back, WinColor.GREY), + AnsiBack.RESET: (winterm.back, ), + AnsiBack.LIGHTBLACK_EX: (winterm.back, WinColor.BLACK, True), + AnsiBack.LIGHTRED_EX: (winterm.back, WinColor.RED, True), + AnsiBack.LIGHTGREEN_EX: (winterm.back, WinColor.GREEN, True), + AnsiBack.LIGHTYELLOW_EX: (winterm.back, WinColor.YELLOW, True), + AnsiBack.LIGHTBLUE_EX: (winterm.back, WinColor.BLUE, True), + AnsiBack.LIGHTMAGENTA_EX: (winterm.back, WinColor.MAGENTA, True), + AnsiBack.LIGHTCYAN_EX: (winterm.back, WinColor.CYAN, True), + AnsiBack.LIGHTWHITE_EX: (winterm.back, WinColor.GREY, True), + } + return dict() + + def write(self, text): + if self.strip or self.convert: + self.write_and_convert(text) + else: + self.wrapped.write(text) + self.wrapped.flush() + if self.autoreset: + self.reset_all() + + + def reset_all(self): + if self.convert: + self.call_win32('m', (0,)) + elif not self.strip and not self.stream.closed: + self.wrapped.write(Style.RESET_ALL) + + + def write_and_convert(self, text): + ''' + Write the given text to our wrapped stream, stripping any ANSI + sequences from the text, and optionally converting them into win32 + calls. + ''' + cursor = 0 + text = self.convert_osc(text) + for match in self.ANSI_CSI_RE.finditer(text): + start, end = match.span() + self.write_plain_text(text, cursor, start) + self.convert_ansi(*match.groups()) + cursor = end + self.write_plain_text(text, cursor, len(text)) + + + def write_plain_text(self, text, start, end): + if start < end: + self.wrapped.write(text[start:end]) + self.wrapped.flush() + + + def convert_ansi(self, paramstring, command): + if self.convert: + params = self.extract_params(command, paramstring) + self.call_win32(command, params) + + + def extract_params(self, command, paramstring): + if command in 'Hf': + params = tuple(int(p) if len(p) != 0 else 1 for p in paramstring.split(';')) + while len(params) < 2: + # defaults: + params = params + (1,) + else: + params = tuple(int(p) for p in paramstring.split(';') if len(p) != 0) + if len(params) == 0: + # defaults: + if command in 'JKm': + params = (0,) + elif command in 'ABCD': + params = (1,) + + return params + + + def call_win32(self, command, params): + if command == 'm': + for param in params: + if param in self.win32_calls: + func_args = self.win32_calls[param] + func = func_args[0] + args = func_args[1:] + kwargs = dict(on_stderr=self.on_stderr) + func(*args, **kwargs) + elif command in 'J': + winterm.erase_screen(params[0], on_stderr=self.on_stderr) + elif command in 'K': + winterm.erase_line(params[0], on_stderr=self.on_stderr) + elif command in 'Hf': # cursor position - absolute + winterm.set_cursor_position(params, on_stderr=self.on_stderr) + elif command in 'ABCD': # cursor position - relative + n = params[0] + # A - up, B - down, C - forward, D - back + x, y = {'A': (0, -n), 'B': (0, n), 'C': (n, 0), 'D': (-n, 0)}[command] + winterm.cursor_adjust(x, y, on_stderr=self.on_stderr) + + + def convert_osc(self, text): + for match in self.ANSI_OSC_RE.finditer(text): + start, end = match.span() + text = text[:start] + text[end:] + paramstring, command = match.groups() + if command in '\x07': # \x07 = BEL + params = paramstring.split(";") + # 0 - change title and icon (we will only change title) + # 1 - change icon (we don't support this) + # 2 - change title + if params[0] in '02': + winterm.set_title(params[1]) + return text diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/colorama/initialise.py b/venv.bak/lib/python3.7/site-packages/pip/_vendor/colorama/initialise.py new file mode 100644 index 0000000..430d066 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_vendor/colorama/initialise.py @@ -0,0 +1,80 @@ +# Copyright Jonathan Hartley 2013. BSD 3-Clause license, see LICENSE file. +import atexit +import contextlib +import sys + +from .ansitowin32 import AnsiToWin32 + + +orig_stdout = None +orig_stderr = None + +wrapped_stdout = None +wrapped_stderr = None + +atexit_done = False + + +def reset_all(): + if AnsiToWin32 is not None: # Issue #74: objects might become None at exit + AnsiToWin32(orig_stdout).reset_all() + + +def init(autoreset=False, convert=None, strip=None, wrap=True): + + if not wrap and any([autoreset, convert, strip]): + raise ValueError('wrap=False conflicts with any other arg=True') + + global wrapped_stdout, wrapped_stderr + global orig_stdout, orig_stderr + + orig_stdout = sys.stdout + orig_stderr = sys.stderr + + if sys.stdout is None: + wrapped_stdout = None + else: + sys.stdout = wrapped_stdout = \ + wrap_stream(orig_stdout, convert, strip, autoreset, wrap) + if sys.stderr is None: + wrapped_stderr = None + else: + sys.stderr = wrapped_stderr = \ + wrap_stream(orig_stderr, convert, strip, autoreset, wrap) + + global atexit_done + if not atexit_done: + atexit.register(reset_all) + atexit_done = True + + +def deinit(): + if orig_stdout is not None: + sys.stdout = orig_stdout + if orig_stderr is not None: + sys.stderr = orig_stderr + + +@contextlib.contextmanager +def colorama_text(*args, **kwargs): + init(*args, **kwargs) + try: + yield + finally: + deinit() + + +def reinit(): + if wrapped_stdout is not None: + sys.stdout = wrapped_stdout + if wrapped_stderr is not None: + sys.stderr = wrapped_stderr + + +def wrap_stream(stream, convert, strip, autoreset, wrap): + if wrap: + wrapper = AnsiToWin32(stream, + convert=convert, strip=strip, autoreset=autoreset) + if wrapper.should_wrap(): + stream = wrapper.stream + return stream diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/colorama/win32.py b/venv.bak/lib/python3.7/site-packages/pip/_vendor/colorama/win32.py new file mode 100644 index 0000000..c2d8360 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_vendor/colorama/win32.py @@ -0,0 +1,152 @@ +# Copyright Jonathan Hartley 2013. BSD 3-Clause license, see LICENSE file. + +# from winbase.h +STDOUT = -11 +STDERR = -12 + +try: + import ctypes + from ctypes import LibraryLoader + windll = LibraryLoader(ctypes.WinDLL) + from ctypes import wintypes +except (AttributeError, ImportError): + windll = None + SetConsoleTextAttribute = lambda *_: None + winapi_test = lambda *_: None +else: + from ctypes import byref, Structure, c_char, POINTER + + COORD = wintypes._COORD + + class CONSOLE_SCREEN_BUFFER_INFO(Structure): + """struct in wincon.h.""" + _fields_ = [ + ("dwSize", COORD), + ("dwCursorPosition", COORD), + ("wAttributes", wintypes.WORD), + ("srWindow", wintypes.SMALL_RECT), + ("dwMaximumWindowSize", COORD), + ] + def __str__(self): + return '(%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d)' % ( + self.dwSize.Y, self.dwSize.X + , self.dwCursorPosition.Y, self.dwCursorPosition.X + , self.wAttributes + , self.srWindow.Top, self.srWindow.Left, self.srWindow.Bottom, self.srWindow.Right + , self.dwMaximumWindowSize.Y, self.dwMaximumWindowSize.X + ) + + _GetStdHandle = windll.kernel32.GetStdHandle + _GetStdHandle.argtypes = [ + wintypes.DWORD, + ] + _GetStdHandle.restype = wintypes.HANDLE + + _GetConsoleScreenBufferInfo = windll.kernel32.GetConsoleScreenBufferInfo + _GetConsoleScreenBufferInfo.argtypes = [ + wintypes.HANDLE, + POINTER(CONSOLE_SCREEN_BUFFER_INFO), + ] + _GetConsoleScreenBufferInfo.restype = wintypes.BOOL + + _SetConsoleTextAttribute = windll.kernel32.SetConsoleTextAttribute + _SetConsoleTextAttribute.argtypes = [ + wintypes.HANDLE, + wintypes.WORD, + ] + _SetConsoleTextAttribute.restype = wintypes.BOOL + + _SetConsoleCursorPosition = windll.kernel32.SetConsoleCursorPosition + _SetConsoleCursorPosition.argtypes = [ + wintypes.HANDLE, + COORD, + ] + _SetConsoleCursorPosition.restype = wintypes.BOOL + + _FillConsoleOutputCharacterA = windll.kernel32.FillConsoleOutputCharacterA + _FillConsoleOutputCharacterA.argtypes = [ + wintypes.HANDLE, + c_char, + wintypes.DWORD, + COORD, + POINTER(wintypes.DWORD), + ] + _FillConsoleOutputCharacterA.restype = wintypes.BOOL + + _FillConsoleOutputAttribute = windll.kernel32.FillConsoleOutputAttribute + _FillConsoleOutputAttribute.argtypes = [ + wintypes.HANDLE, + wintypes.WORD, + wintypes.DWORD, + COORD, + POINTER(wintypes.DWORD), + ] + _FillConsoleOutputAttribute.restype = wintypes.BOOL + + _SetConsoleTitleW = windll.kernel32.SetConsoleTitleW + _SetConsoleTitleW.argtypes = [ + wintypes.LPCWSTR + ] + _SetConsoleTitleW.restype = wintypes.BOOL + + def _winapi_test(handle): + csbi = CONSOLE_SCREEN_BUFFER_INFO() + success = _GetConsoleScreenBufferInfo( + handle, byref(csbi)) + return bool(success) + + def winapi_test(): + return any(_winapi_test(h) for h in + (_GetStdHandle(STDOUT), _GetStdHandle(STDERR))) + + def GetConsoleScreenBufferInfo(stream_id=STDOUT): + handle = _GetStdHandle(stream_id) + csbi = CONSOLE_SCREEN_BUFFER_INFO() + success = _GetConsoleScreenBufferInfo( + handle, byref(csbi)) + return csbi + + def SetConsoleTextAttribute(stream_id, attrs): + handle = _GetStdHandle(stream_id) + return _SetConsoleTextAttribute(handle, attrs) + + def SetConsoleCursorPosition(stream_id, position, adjust=True): + position = COORD(*position) + # If the position is out of range, do nothing. + if position.Y <= 0 or position.X <= 0: + return + # Adjust for Windows' SetConsoleCursorPosition: + # 1. being 0-based, while ANSI is 1-based. + # 2. expecting (x,y), while ANSI uses (y,x). + adjusted_position = COORD(position.Y - 1, position.X - 1) + if adjust: + # Adjust for viewport's scroll position + sr = GetConsoleScreenBufferInfo(STDOUT).srWindow + adjusted_position.Y += sr.Top + adjusted_position.X += sr.Left + # Resume normal processing + handle = _GetStdHandle(stream_id) + return _SetConsoleCursorPosition(handle, adjusted_position) + + def FillConsoleOutputCharacter(stream_id, char, length, start): + handle = _GetStdHandle(stream_id) + char = c_char(char.encode()) + length = wintypes.DWORD(length) + num_written = wintypes.DWORD(0) + # Note that this is hard-coded for ANSI (vs wide) bytes. + success = _FillConsoleOutputCharacterA( + handle, char, length, start, byref(num_written)) + return num_written.value + + def FillConsoleOutputAttribute(stream_id, attr, length, start): + ''' FillConsoleOutputAttribute( hConsole, csbi.wAttributes, dwConSize, coordScreen, &cCharsWritten )''' + handle = _GetStdHandle(stream_id) + attribute = wintypes.WORD(attr) + length = wintypes.DWORD(length) + num_written = wintypes.DWORD(0) + # Note that this is hard-coded for ANSI (vs wide) bytes. + return _FillConsoleOutputAttribute( + handle, attribute, length, start, byref(num_written)) + + def SetConsoleTitle(title): + return _SetConsoleTitleW(title) diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/colorama/winterm.py b/venv.bak/lib/python3.7/site-packages/pip/_vendor/colorama/winterm.py new file mode 100644 index 0000000..0fdb4ec --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_vendor/colorama/winterm.py @@ -0,0 +1,169 @@ +# Copyright Jonathan Hartley 2013. BSD 3-Clause license, see LICENSE file. +from . import win32 + + +# from wincon.h +class WinColor(object): + BLACK = 0 + BLUE = 1 + GREEN = 2 + CYAN = 3 + RED = 4 + MAGENTA = 5 + YELLOW = 6 + GREY = 7 + +# from wincon.h +class WinStyle(object): + NORMAL = 0x00 # dim text, dim background + BRIGHT = 0x08 # bright text, dim background + BRIGHT_BACKGROUND = 0x80 # dim text, bright background + +class WinTerm(object): + + def __init__(self): + self._default = win32.GetConsoleScreenBufferInfo(win32.STDOUT).wAttributes + self.set_attrs(self._default) + self._default_fore = self._fore + self._default_back = self._back + self._default_style = self._style + # In order to emulate LIGHT_EX in windows, we borrow the BRIGHT style. + # So that LIGHT_EX colors and BRIGHT style do not clobber each other, + # we track them separately, since LIGHT_EX is overwritten by Fore/Back + # and BRIGHT is overwritten by Style codes. + self._light = 0 + + def get_attrs(self): + return self._fore + self._back * 16 + (self._style | self._light) + + def set_attrs(self, value): + self._fore = value & 7 + self._back = (value >> 4) & 7 + self._style = value & (WinStyle.BRIGHT | WinStyle.BRIGHT_BACKGROUND) + + def reset_all(self, on_stderr=None): + self.set_attrs(self._default) + self.set_console(attrs=self._default) + self._light = 0 + + def fore(self, fore=None, light=False, on_stderr=False): + if fore is None: + fore = self._default_fore + self._fore = fore + # Emulate LIGHT_EX with BRIGHT Style + if light: + self._light |= WinStyle.BRIGHT + else: + self._light &= ~WinStyle.BRIGHT + self.set_console(on_stderr=on_stderr) + + def back(self, back=None, light=False, on_stderr=False): + if back is None: + back = self._default_back + self._back = back + # Emulate LIGHT_EX with BRIGHT_BACKGROUND Style + if light: + self._light |= WinStyle.BRIGHT_BACKGROUND + else: + self._light &= ~WinStyle.BRIGHT_BACKGROUND + self.set_console(on_stderr=on_stderr) + + def style(self, style=None, on_stderr=False): + if style is None: + style = self._default_style + self._style = style + self.set_console(on_stderr=on_stderr) + + def set_console(self, attrs=None, on_stderr=False): + if attrs is None: + attrs = self.get_attrs() + handle = win32.STDOUT + if on_stderr: + handle = win32.STDERR + win32.SetConsoleTextAttribute(handle, attrs) + + def get_position(self, handle): + position = win32.GetConsoleScreenBufferInfo(handle).dwCursorPosition + # Because Windows coordinates are 0-based, + # and win32.SetConsoleCursorPosition expects 1-based. + position.X += 1 + position.Y += 1 + return position + + def set_cursor_position(self, position=None, on_stderr=False): + if position is None: + # I'm not currently tracking the position, so there is no default. + # position = self.get_position() + return + handle = win32.STDOUT + if on_stderr: + handle = win32.STDERR + win32.SetConsoleCursorPosition(handle, position) + + def cursor_adjust(self, x, y, on_stderr=False): + handle = win32.STDOUT + if on_stderr: + handle = win32.STDERR + position = self.get_position(handle) + adjusted_position = (position.Y + y, position.X + x) + win32.SetConsoleCursorPosition(handle, adjusted_position, adjust=False) + + def erase_screen(self, mode=0, on_stderr=False): + # 0 should clear from the cursor to the end of the screen. + # 1 should clear from the cursor to the beginning of the screen. + # 2 should clear the entire screen, and move cursor to (1,1) + handle = win32.STDOUT + if on_stderr: + handle = win32.STDERR + csbi = win32.GetConsoleScreenBufferInfo(handle) + # get the number of character cells in the current buffer + cells_in_screen = csbi.dwSize.X * csbi.dwSize.Y + # get number of character cells before current cursor position + cells_before_cursor = csbi.dwSize.X * csbi.dwCursorPosition.Y + csbi.dwCursorPosition.X + if mode == 0: + from_coord = csbi.dwCursorPosition + cells_to_erase = cells_in_screen - cells_before_cursor + elif mode == 1: + from_coord = win32.COORD(0, 0) + cells_to_erase = cells_before_cursor + elif mode == 2: + from_coord = win32.COORD(0, 0) + cells_to_erase = cells_in_screen + else: + # invalid mode + return + # fill the entire screen with blanks + win32.FillConsoleOutputCharacter(handle, ' ', cells_to_erase, from_coord) + # now set the buffer's attributes accordingly + win32.FillConsoleOutputAttribute(handle, self.get_attrs(), cells_to_erase, from_coord) + if mode == 2: + # put the cursor where needed + win32.SetConsoleCursorPosition(handle, (1, 1)) + + def erase_line(self, mode=0, on_stderr=False): + # 0 should clear from the cursor to the end of the line. + # 1 should clear from the cursor to the beginning of the line. + # 2 should clear the entire line. + handle = win32.STDOUT + if on_stderr: + handle = win32.STDERR + csbi = win32.GetConsoleScreenBufferInfo(handle) + if mode == 0: + from_coord = csbi.dwCursorPosition + cells_to_erase = csbi.dwSize.X - csbi.dwCursorPosition.X + elif mode == 1: + from_coord = win32.COORD(0, csbi.dwCursorPosition.Y) + cells_to_erase = csbi.dwCursorPosition.X + elif mode == 2: + from_coord = win32.COORD(0, csbi.dwCursorPosition.Y) + cells_to_erase = csbi.dwSize.X + else: + # invalid mode + return + # fill the entire screen with blanks + win32.FillConsoleOutputCharacter(handle, ' ', cells_to_erase, from_coord) + # now set the buffer's attributes accordingly + win32.FillConsoleOutputAttribute(handle, self.get_attrs(), cells_to_erase, from_coord) + + def set_title(self, title): + win32.SetConsoleTitle(title) diff --git a/venv/lib/python3.7/site-packages/pip/_vendor/contextlib2.py b/venv.bak/lib/python3.7/site-packages/pip/_vendor/contextlib2.py similarity index 100% rename from venv/lib/python3.7/site-packages/pip/_vendor/contextlib2.py rename to venv.bak/lib/python3.7/site-packages/pip/_vendor/contextlib2.py diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/distlib/__init__.py b/venv.bak/lib/python3.7/site-packages/pip/_vendor/distlib/__init__.py new file mode 100644 index 0000000..e19aebd --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_vendor/distlib/__init__.py @@ -0,0 +1,23 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2012-2019 Vinay Sajip. +# Licensed to the Python Software Foundation under a contributor agreement. +# See LICENSE.txt and CONTRIBUTORS.txt. +# +import logging + +__version__ = '0.3.0' + +class DistlibException(Exception): + pass + +try: + from logging import NullHandler +except ImportError: # pragma: no cover + class NullHandler(logging.Handler): + def handle(self, record): pass + def emit(self, record): pass + def createLock(self): self.lock = None + +logger = logging.getLogger(__name__) +logger.addHandler(NullHandler()) diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/distlib/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_vendor/distlib/__pycache__/__init__.cpython-37.pyc new file mode 100644 index 0000000..03e5b1d Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_vendor/distlib/__pycache__/__init__.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/distlib/__pycache__/compat.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_vendor/distlib/__pycache__/compat.cpython-37.pyc new file mode 100644 index 0000000..cb9b13f Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_vendor/distlib/__pycache__/compat.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/distlib/__pycache__/database.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_vendor/distlib/__pycache__/database.cpython-37.pyc new file mode 100644 index 0000000..226cf1c Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_vendor/distlib/__pycache__/database.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/distlib/__pycache__/index.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_vendor/distlib/__pycache__/index.cpython-37.pyc new file mode 100644 index 0000000..f07abea Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_vendor/distlib/__pycache__/index.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/distlib/__pycache__/locators.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_vendor/distlib/__pycache__/locators.cpython-37.pyc new file mode 100644 index 0000000..5828b11 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_vendor/distlib/__pycache__/locators.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/distlib/__pycache__/manifest.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_vendor/distlib/__pycache__/manifest.cpython-37.pyc new file mode 100644 index 0000000..2255abb Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_vendor/distlib/__pycache__/manifest.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/distlib/__pycache__/markers.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_vendor/distlib/__pycache__/markers.cpython-37.pyc new file mode 100644 index 0000000..6135af4 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_vendor/distlib/__pycache__/markers.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/distlib/__pycache__/metadata.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_vendor/distlib/__pycache__/metadata.cpython-37.pyc new file mode 100644 index 0000000..4ba0440 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_vendor/distlib/__pycache__/metadata.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/distlib/__pycache__/resources.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_vendor/distlib/__pycache__/resources.cpython-37.pyc new file mode 100644 index 0000000..4fb048a Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_vendor/distlib/__pycache__/resources.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/distlib/__pycache__/scripts.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_vendor/distlib/__pycache__/scripts.cpython-37.pyc new file mode 100644 index 0000000..3cc6a57 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_vendor/distlib/__pycache__/scripts.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/distlib/__pycache__/util.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_vendor/distlib/__pycache__/util.cpython-37.pyc new file mode 100644 index 0000000..a8e89c6 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_vendor/distlib/__pycache__/util.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/distlib/__pycache__/version.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_vendor/distlib/__pycache__/version.cpython-37.pyc new file mode 100644 index 0000000..9f94ffb Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_vendor/distlib/__pycache__/version.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/distlib/__pycache__/wheel.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_vendor/distlib/__pycache__/wheel.cpython-37.pyc new file mode 100644 index 0000000..e11c4ad Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_vendor/distlib/__pycache__/wheel.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/distlib/_backport/__init__.py b/venv.bak/lib/python3.7/site-packages/pip/_vendor/distlib/_backport/__init__.py new file mode 100644 index 0000000..f7dbf4c --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_vendor/distlib/_backport/__init__.py @@ -0,0 +1,6 @@ +"""Modules copied from Python 3 standard libraries, for internal use only. + +Individual classes and functions are found in d2._backport.misc. Intended +usage is to always import things missing from 3.1 from that module: the +built-in/stdlib objects will be used if found. +""" diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/distlib/_backport/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_vendor/distlib/_backport/__pycache__/__init__.cpython-37.pyc new file mode 100644 index 0000000..5e24f28 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_vendor/distlib/_backport/__pycache__/__init__.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/distlib/_backport/__pycache__/misc.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_vendor/distlib/_backport/__pycache__/misc.cpython-37.pyc new file mode 100644 index 0000000..a18f461 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_vendor/distlib/_backport/__pycache__/misc.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/distlib/_backport/__pycache__/shutil.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_vendor/distlib/_backport/__pycache__/shutil.cpython-37.pyc new file mode 100644 index 0000000..a2fa78b Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_vendor/distlib/_backport/__pycache__/shutil.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/distlib/_backport/__pycache__/sysconfig.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_vendor/distlib/_backport/__pycache__/sysconfig.cpython-37.pyc new file mode 100644 index 0000000..3df819c Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_vendor/distlib/_backport/__pycache__/sysconfig.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/distlib/_backport/__pycache__/tarfile.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_vendor/distlib/_backport/__pycache__/tarfile.cpython-37.pyc new file mode 100644 index 0000000..93632d9 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_vendor/distlib/_backport/__pycache__/tarfile.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/distlib/_backport/misc.py b/venv.bak/lib/python3.7/site-packages/pip/_vendor/distlib/_backport/misc.py new file mode 100644 index 0000000..cfb318d --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_vendor/distlib/_backport/misc.py @@ -0,0 +1,41 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2012 The Python Software Foundation. +# See LICENSE.txt and CONTRIBUTORS.txt. +# +"""Backports for individual classes and functions.""" + +import os +import sys + +__all__ = ['cache_from_source', 'callable', 'fsencode'] + + +try: + from imp import cache_from_source +except ImportError: + def cache_from_source(py_file, debug=__debug__): + ext = debug and 'c' or 'o' + return py_file + ext + + +try: + callable = callable +except NameError: + from collections import Callable + + def callable(obj): + return isinstance(obj, Callable) + + +try: + fsencode = os.fsencode +except AttributeError: + def fsencode(filename): + if isinstance(filename, bytes): + return filename + elif isinstance(filename, str): + return filename.encode(sys.getfilesystemencoding()) + else: + raise TypeError("expect bytes or str, not %s" % + type(filename).__name__) diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/distlib/_backport/shutil.py b/venv.bak/lib/python3.7/site-packages/pip/_vendor/distlib/_backport/shutil.py new file mode 100644 index 0000000..159e49e --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_vendor/distlib/_backport/shutil.py @@ -0,0 +1,761 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2012 The Python Software Foundation. +# See LICENSE.txt and CONTRIBUTORS.txt. +# +"""Utility functions for copying and archiving files and directory trees. + +XXX The functions here don't copy the resource fork or other metadata on Mac. + +""" + +import os +import sys +import stat +from os.path import abspath +import fnmatch +import collections +import errno +from . import tarfile + +try: + import bz2 + _BZ2_SUPPORTED = True +except ImportError: + _BZ2_SUPPORTED = False + +try: + from pwd import getpwnam +except ImportError: + getpwnam = None + +try: + from grp import getgrnam +except ImportError: + getgrnam = None + +__all__ = ["copyfileobj", "copyfile", "copymode", "copystat", "copy", "copy2", + "copytree", "move", "rmtree", "Error", "SpecialFileError", + "ExecError", "make_archive", "get_archive_formats", + "register_archive_format", "unregister_archive_format", + "get_unpack_formats", "register_unpack_format", + "unregister_unpack_format", "unpack_archive", "ignore_patterns"] + +class Error(EnvironmentError): + pass + +class SpecialFileError(EnvironmentError): + """Raised when trying to do a kind of operation (e.g. copying) which is + not supported on a special file (e.g. a named pipe)""" + +class ExecError(EnvironmentError): + """Raised when a command could not be executed""" + +class ReadError(EnvironmentError): + """Raised when an archive cannot be read""" + +class RegistryError(Exception): + """Raised when a registry operation with the archiving + and unpacking registries fails""" + + +try: + WindowsError +except NameError: + WindowsError = None + +def copyfileobj(fsrc, fdst, length=16*1024): + """copy data from file-like object fsrc to file-like object fdst""" + while 1: + buf = fsrc.read(length) + if not buf: + break + fdst.write(buf) + +def _samefile(src, dst): + # Macintosh, Unix. + if hasattr(os.path, 'samefile'): + try: + return os.path.samefile(src, dst) + except OSError: + return False + + # All other platforms: check for same pathname. + return (os.path.normcase(os.path.abspath(src)) == + os.path.normcase(os.path.abspath(dst))) + +def copyfile(src, dst): + """Copy data from src to dst""" + if _samefile(src, dst): + raise Error("`%s` and `%s` are the same file" % (src, dst)) + + for fn in [src, dst]: + try: + st = os.stat(fn) + except OSError: + # File most likely does not exist + pass + else: + # XXX What about other special files? (sockets, devices...) + if stat.S_ISFIFO(st.st_mode): + raise SpecialFileError("`%s` is a named pipe" % fn) + + with open(src, 'rb') as fsrc: + with open(dst, 'wb') as fdst: + copyfileobj(fsrc, fdst) + +def copymode(src, dst): + """Copy mode bits from src to dst""" + if hasattr(os, 'chmod'): + st = os.stat(src) + mode = stat.S_IMODE(st.st_mode) + os.chmod(dst, mode) + +def copystat(src, dst): + """Copy all stat info (mode bits, atime, mtime, flags) from src to dst""" + st = os.stat(src) + mode = stat.S_IMODE(st.st_mode) + if hasattr(os, 'utime'): + os.utime(dst, (st.st_atime, st.st_mtime)) + if hasattr(os, 'chmod'): + os.chmod(dst, mode) + if hasattr(os, 'chflags') and hasattr(st, 'st_flags'): + try: + os.chflags(dst, st.st_flags) + except OSError as why: + if (not hasattr(errno, 'EOPNOTSUPP') or + why.errno != errno.EOPNOTSUPP): + raise + +def copy(src, dst): + """Copy data and mode bits ("cp src dst"). + + The destination may be a directory. + + """ + if os.path.isdir(dst): + dst = os.path.join(dst, os.path.basename(src)) + copyfile(src, dst) + copymode(src, dst) + +def copy2(src, dst): + """Copy data and all stat info ("cp -p src dst"). + + The destination may be a directory. + + """ + if os.path.isdir(dst): + dst = os.path.join(dst, os.path.basename(src)) + copyfile(src, dst) + copystat(src, dst) + +def ignore_patterns(*patterns): + """Function that can be used as copytree() ignore parameter. + + Patterns is a sequence of glob-style patterns + that are used to exclude files""" + def _ignore_patterns(path, names): + ignored_names = [] + for pattern in patterns: + ignored_names.extend(fnmatch.filter(names, pattern)) + return set(ignored_names) + return _ignore_patterns + +def copytree(src, dst, symlinks=False, ignore=None, copy_function=copy2, + ignore_dangling_symlinks=False): + """Recursively copy a directory tree. + + The destination directory must not already exist. + If exception(s) occur, an Error is raised with a list of reasons. + + If the optional symlinks flag is true, symbolic links in the + source tree result in symbolic links in the destination tree; if + it is false, the contents of the files pointed to by symbolic + links are copied. If the file pointed by the symlink doesn't + exist, an exception will be added in the list of errors raised in + an Error exception at the end of the copy process. + + You can set the optional ignore_dangling_symlinks flag to true if you + want to silence this exception. Notice that this has no effect on + platforms that don't support os.symlink. + + The optional ignore argument is a callable. If given, it + is called with the `src` parameter, which is the directory + being visited by copytree(), and `names` which is the list of + `src` contents, as returned by os.listdir(): + + callable(src, names) -> ignored_names + + Since copytree() is called recursively, the callable will be + called once for each directory that is copied. It returns a + list of names relative to the `src` directory that should + not be copied. + + The optional copy_function argument is a callable that will be used + to copy each file. It will be called with the source path and the + destination path as arguments. By default, copy2() is used, but any + function that supports the same signature (like copy()) can be used. + + """ + names = os.listdir(src) + if ignore is not None: + ignored_names = ignore(src, names) + else: + ignored_names = set() + + os.makedirs(dst) + errors = [] + for name in names: + if name in ignored_names: + continue + srcname = os.path.join(src, name) + dstname = os.path.join(dst, name) + try: + if os.path.islink(srcname): + linkto = os.readlink(srcname) + if symlinks: + os.symlink(linkto, dstname) + else: + # ignore dangling symlink if the flag is on + if not os.path.exists(linkto) and ignore_dangling_symlinks: + continue + # otherwise let the copy occurs. copy2 will raise an error + copy_function(srcname, dstname) + elif os.path.isdir(srcname): + copytree(srcname, dstname, symlinks, ignore, copy_function) + else: + # Will raise a SpecialFileError for unsupported file types + copy_function(srcname, dstname) + # catch the Error from the recursive copytree so that we can + # continue with other files + except Error as err: + errors.extend(err.args[0]) + except EnvironmentError as why: + errors.append((srcname, dstname, str(why))) + try: + copystat(src, dst) + except OSError as why: + if WindowsError is not None and isinstance(why, WindowsError): + # Copying file access times may fail on Windows + pass + else: + errors.extend((src, dst, str(why))) + if errors: + raise Error(errors) + +def rmtree(path, ignore_errors=False, onerror=None): + """Recursively delete a directory tree. + + If ignore_errors is set, errors are ignored; otherwise, if onerror + is set, it is called to handle the error with arguments (func, + path, exc_info) where func is os.listdir, os.remove, or os.rmdir; + path is the argument to that function that caused it to fail; and + exc_info is a tuple returned by sys.exc_info(). If ignore_errors + is false and onerror is None, an exception is raised. + + """ + if ignore_errors: + def onerror(*args): + pass + elif onerror is None: + def onerror(*args): + raise + try: + if os.path.islink(path): + # symlinks to directories are forbidden, see bug #1669 + raise OSError("Cannot call rmtree on a symbolic link") + except OSError: + onerror(os.path.islink, path, sys.exc_info()) + # can't continue even if onerror hook returns + return + names = [] + try: + names = os.listdir(path) + except os.error: + onerror(os.listdir, path, sys.exc_info()) + for name in names: + fullname = os.path.join(path, name) + try: + mode = os.lstat(fullname).st_mode + except os.error: + mode = 0 + if stat.S_ISDIR(mode): + rmtree(fullname, ignore_errors, onerror) + else: + try: + os.remove(fullname) + except os.error: + onerror(os.remove, fullname, sys.exc_info()) + try: + os.rmdir(path) + except os.error: + onerror(os.rmdir, path, sys.exc_info()) + + +def _basename(path): + # A basename() variant which first strips the trailing slash, if present. + # Thus we always get the last component of the path, even for directories. + return os.path.basename(path.rstrip(os.path.sep)) + +def move(src, dst): + """Recursively move a file or directory to another location. This is + similar to the Unix "mv" command. + + If the destination is a directory or a symlink to a directory, the source + is moved inside the directory. The destination path must not already + exist. + + If the destination already exists but is not a directory, it may be + overwritten depending on os.rename() semantics. + + If the destination is on our current filesystem, then rename() is used. + Otherwise, src is copied to the destination and then removed. + A lot more could be done here... A look at a mv.c shows a lot of + the issues this implementation glosses over. + + """ + real_dst = dst + if os.path.isdir(dst): + if _samefile(src, dst): + # We might be on a case insensitive filesystem, + # perform the rename anyway. + os.rename(src, dst) + return + + real_dst = os.path.join(dst, _basename(src)) + if os.path.exists(real_dst): + raise Error("Destination path '%s' already exists" % real_dst) + try: + os.rename(src, real_dst) + except OSError: + if os.path.isdir(src): + if _destinsrc(src, dst): + raise Error("Cannot move a directory '%s' into itself '%s'." % (src, dst)) + copytree(src, real_dst, symlinks=True) + rmtree(src) + else: + copy2(src, real_dst) + os.unlink(src) + +def _destinsrc(src, dst): + src = abspath(src) + dst = abspath(dst) + if not src.endswith(os.path.sep): + src += os.path.sep + if not dst.endswith(os.path.sep): + dst += os.path.sep + return dst.startswith(src) + +def _get_gid(name): + """Returns a gid, given a group name.""" + if getgrnam is None or name is None: + return None + try: + result = getgrnam(name) + except KeyError: + result = None + if result is not None: + return result[2] + return None + +def _get_uid(name): + """Returns an uid, given a user name.""" + if getpwnam is None or name is None: + return None + try: + result = getpwnam(name) + except KeyError: + result = None + if result is not None: + return result[2] + return None + +def _make_tarball(base_name, base_dir, compress="gzip", verbose=0, dry_run=0, + owner=None, group=None, logger=None): + """Create a (possibly compressed) tar file from all the files under + 'base_dir'. + + 'compress' must be "gzip" (the default), "bzip2", or None. + + 'owner' and 'group' can be used to define an owner and a group for the + archive that is being built. If not provided, the current owner and group + will be used. + + The output tar file will be named 'base_name' + ".tar", possibly plus + the appropriate compression extension (".gz", or ".bz2"). + + Returns the output filename. + """ + tar_compression = {'gzip': 'gz', None: ''} + compress_ext = {'gzip': '.gz'} + + if _BZ2_SUPPORTED: + tar_compression['bzip2'] = 'bz2' + compress_ext['bzip2'] = '.bz2' + + # flags for compression program, each element of list will be an argument + if compress is not None and compress not in compress_ext: + raise ValueError("bad value for 'compress', or compression format not " + "supported : {0}".format(compress)) + + archive_name = base_name + '.tar' + compress_ext.get(compress, '') + archive_dir = os.path.dirname(archive_name) + + if not os.path.exists(archive_dir): + if logger is not None: + logger.info("creating %s", archive_dir) + if not dry_run: + os.makedirs(archive_dir) + + # creating the tarball + if logger is not None: + logger.info('Creating tar archive') + + uid = _get_uid(owner) + gid = _get_gid(group) + + def _set_uid_gid(tarinfo): + if gid is not None: + tarinfo.gid = gid + tarinfo.gname = group + if uid is not None: + tarinfo.uid = uid + tarinfo.uname = owner + return tarinfo + + if not dry_run: + tar = tarfile.open(archive_name, 'w|%s' % tar_compression[compress]) + try: + tar.add(base_dir, filter=_set_uid_gid) + finally: + tar.close() + + return archive_name + +def _call_external_zip(base_dir, zip_filename, verbose=False, dry_run=False): + # XXX see if we want to keep an external call here + if verbose: + zipoptions = "-r" + else: + zipoptions = "-rq" + from distutils.errors import DistutilsExecError + from distutils.spawn import spawn + try: + spawn(["zip", zipoptions, zip_filename, base_dir], dry_run=dry_run) + except DistutilsExecError: + # XXX really should distinguish between "couldn't find + # external 'zip' command" and "zip failed". + raise ExecError("unable to create zip file '%s': " + "could neither import the 'zipfile' module nor " + "find a standalone zip utility") % zip_filename + +def _make_zipfile(base_name, base_dir, verbose=0, dry_run=0, logger=None): + """Create a zip file from all the files under 'base_dir'. + + The output zip file will be named 'base_name' + ".zip". Uses either the + "zipfile" Python module (if available) or the InfoZIP "zip" utility + (if installed and found on the default search path). If neither tool is + available, raises ExecError. Returns the name of the output zip + file. + """ + zip_filename = base_name + ".zip" + archive_dir = os.path.dirname(base_name) + + if not os.path.exists(archive_dir): + if logger is not None: + logger.info("creating %s", archive_dir) + if not dry_run: + os.makedirs(archive_dir) + + # If zipfile module is not available, try spawning an external 'zip' + # command. + try: + import zipfile + except ImportError: + zipfile = None + + if zipfile is None: + _call_external_zip(base_dir, zip_filename, verbose, dry_run) + else: + if logger is not None: + logger.info("creating '%s' and adding '%s' to it", + zip_filename, base_dir) + + if not dry_run: + zip = zipfile.ZipFile(zip_filename, "w", + compression=zipfile.ZIP_DEFLATED) + + for dirpath, dirnames, filenames in os.walk(base_dir): + for name in filenames: + path = os.path.normpath(os.path.join(dirpath, name)) + if os.path.isfile(path): + zip.write(path, path) + if logger is not None: + logger.info("adding '%s'", path) + zip.close() + + return zip_filename + +_ARCHIVE_FORMATS = { + 'gztar': (_make_tarball, [('compress', 'gzip')], "gzip'ed tar-file"), + 'bztar': (_make_tarball, [('compress', 'bzip2')], "bzip2'ed tar-file"), + 'tar': (_make_tarball, [('compress', None)], "uncompressed tar file"), + 'zip': (_make_zipfile, [], "ZIP file"), + } + +if _BZ2_SUPPORTED: + _ARCHIVE_FORMATS['bztar'] = (_make_tarball, [('compress', 'bzip2')], + "bzip2'ed tar-file") + +def get_archive_formats(): + """Returns a list of supported formats for archiving and unarchiving. + + Each element of the returned sequence is a tuple (name, description) + """ + formats = [(name, registry[2]) for name, registry in + _ARCHIVE_FORMATS.items()] + formats.sort() + return formats + +def register_archive_format(name, function, extra_args=None, description=''): + """Registers an archive format. + + name is the name of the format. function is the callable that will be + used to create archives. If provided, extra_args is a sequence of + (name, value) tuples that will be passed as arguments to the callable. + description can be provided to describe the format, and will be returned + by the get_archive_formats() function. + """ + if extra_args is None: + extra_args = [] + if not isinstance(function, collections.Callable): + raise TypeError('The %s object is not callable' % function) + if not isinstance(extra_args, (tuple, list)): + raise TypeError('extra_args needs to be a sequence') + for element in extra_args: + if not isinstance(element, (tuple, list)) or len(element) !=2: + raise TypeError('extra_args elements are : (arg_name, value)') + + _ARCHIVE_FORMATS[name] = (function, extra_args, description) + +def unregister_archive_format(name): + del _ARCHIVE_FORMATS[name] + +def make_archive(base_name, format, root_dir=None, base_dir=None, verbose=0, + dry_run=0, owner=None, group=None, logger=None): + """Create an archive file (eg. zip or tar). + + 'base_name' is the name of the file to create, minus any format-specific + extension; 'format' is the archive format: one of "zip", "tar", "bztar" + or "gztar". + + 'root_dir' is a directory that will be the root directory of the + archive; ie. we typically chdir into 'root_dir' before creating the + archive. 'base_dir' is the directory where we start archiving from; + ie. 'base_dir' will be the common prefix of all files and + directories in the archive. 'root_dir' and 'base_dir' both default + to the current directory. Returns the name of the archive file. + + 'owner' and 'group' are used when creating a tar archive. By default, + uses the current owner and group. + """ + save_cwd = os.getcwd() + if root_dir is not None: + if logger is not None: + logger.debug("changing into '%s'", root_dir) + base_name = os.path.abspath(base_name) + if not dry_run: + os.chdir(root_dir) + + if base_dir is None: + base_dir = os.curdir + + kwargs = {'dry_run': dry_run, 'logger': logger} + + try: + format_info = _ARCHIVE_FORMATS[format] + except KeyError: + raise ValueError("unknown archive format '%s'" % format) + + func = format_info[0] + for arg, val in format_info[1]: + kwargs[arg] = val + + if format != 'zip': + kwargs['owner'] = owner + kwargs['group'] = group + + try: + filename = func(base_name, base_dir, **kwargs) + finally: + if root_dir is not None: + if logger is not None: + logger.debug("changing back to '%s'", save_cwd) + os.chdir(save_cwd) + + return filename + + +def get_unpack_formats(): + """Returns a list of supported formats for unpacking. + + Each element of the returned sequence is a tuple + (name, extensions, description) + """ + formats = [(name, info[0], info[3]) for name, info in + _UNPACK_FORMATS.items()] + formats.sort() + return formats + +def _check_unpack_options(extensions, function, extra_args): + """Checks what gets registered as an unpacker.""" + # first make sure no other unpacker is registered for this extension + existing_extensions = {} + for name, info in _UNPACK_FORMATS.items(): + for ext in info[0]: + existing_extensions[ext] = name + + for extension in extensions: + if extension in existing_extensions: + msg = '%s is already registered for "%s"' + raise RegistryError(msg % (extension, + existing_extensions[extension])) + + if not isinstance(function, collections.Callable): + raise TypeError('The registered function must be a callable') + + +def register_unpack_format(name, extensions, function, extra_args=None, + description=''): + """Registers an unpack format. + + `name` is the name of the format. `extensions` is a list of extensions + corresponding to the format. + + `function` is the callable that will be + used to unpack archives. The callable will receive archives to unpack. + If it's unable to handle an archive, it needs to raise a ReadError + exception. + + If provided, `extra_args` is a sequence of + (name, value) tuples that will be passed as arguments to the callable. + description can be provided to describe the format, and will be returned + by the get_unpack_formats() function. + """ + if extra_args is None: + extra_args = [] + _check_unpack_options(extensions, function, extra_args) + _UNPACK_FORMATS[name] = extensions, function, extra_args, description + +def unregister_unpack_format(name): + """Removes the pack format from the registry.""" + del _UNPACK_FORMATS[name] + +def _ensure_directory(path): + """Ensure that the parent directory of `path` exists""" + dirname = os.path.dirname(path) + if not os.path.isdir(dirname): + os.makedirs(dirname) + +def _unpack_zipfile(filename, extract_dir): + """Unpack zip `filename` to `extract_dir` + """ + try: + import zipfile + except ImportError: + raise ReadError('zlib not supported, cannot unpack this archive.') + + if not zipfile.is_zipfile(filename): + raise ReadError("%s is not a zip file" % filename) + + zip = zipfile.ZipFile(filename) + try: + for info in zip.infolist(): + name = info.filename + + # don't extract absolute paths or ones with .. in them + if name.startswith('/') or '..' in name: + continue + + target = os.path.join(extract_dir, *name.split('/')) + if not target: + continue + + _ensure_directory(target) + if not name.endswith('/'): + # file + data = zip.read(info.filename) + f = open(target, 'wb') + try: + f.write(data) + finally: + f.close() + del data + finally: + zip.close() + +def _unpack_tarfile(filename, extract_dir): + """Unpack tar/tar.gz/tar.bz2 `filename` to `extract_dir` + """ + try: + tarobj = tarfile.open(filename) + except tarfile.TarError: + raise ReadError( + "%s is not a compressed or uncompressed tar file" % filename) + try: + tarobj.extractall(extract_dir) + finally: + tarobj.close() + +_UNPACK_FORMATS = { + 'gztar': (['.tar.gz', '.tgz'], _unpack_tarfile, [], "gzip'ed tar-file"), + 'tar': (['.tar'], _unpack_tarfile, [], "uncompressed tar file"), + 'zip': (['.zip'], _unpack_zipfile, [], "ZIP file") + } + +if _BZ2_SUPPORTED: + _UNPACK_FORMATS['bztar'] = (['.bz2'], _unpack_tarfile, [], + "bzip2'ed tar-file") + +def _find_unpack_format(filename): + for name, info in _UNPACK_FORMATS.items(): + for extension in info[0]: + if filename.endswith(extension): + return name + return None + +def unpack_archive(filename, extract_dir=None, format=None): + """Unpack an archive. + + `filename` is the name of the archive. + + `extract_dir` is the name of the target directory, where the archive + is unpacked. If not provided, the current working directory is used. + + `format` is the archive format: one of "zip", "tar", or "gztar". Or any + other registered format. If not provided, unpack_archive will use the + filename extension and see if an unpacker was registered for that + extension. + + In case none is found, a ValueError is raised. + """ + if extract_dir is None: + extract_dir = os.getcwd() + + if format is not None: + try: + format_info = _UNPACK_FORMATS[format] + except KeyError: + raise ValueError("Unknown unpack format '{0}'".format(format)) + + func = format_info[1] + func(filename, extract_dir, **dict(format_info[2])) + else: + # we need to look at the registered unpackers supported extensions + format = _find_unpack_format(filename) + if format is None: + raise ReadError("Unknown archive format '{0}'".format(filename)) + + func = _UNPACK_FORMATS[format][1] + kwargs = dict(_UNPACK_FORMATS[format][2]) + func(filename, extract_dir, **kwargs) diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/distlib/_backport/sysconfig.cfg b/venv.bak/lib/python3.7/site-packages/pip/_vendor/distlib/_backport/sysconfig.cfg new file mode 100644 index 0000000..1746bd0 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_vendor/distlib/_backport/sysconfig.cfg @@ -0,0 +1,84 @@ +[posix_prefix] +# Configuration directories. Some of these come straight out of the +# configure script. They are for implementing the other variables, not to +# be used directly in [resource_locations]. +confdir = /etc +datadir = /usr/share +libdir = /usr/lib +statedir = /var +# User resource directory +local = ~/.local/{distribution.name} + +stdlib = {base}/lib/python{py_version_short} +platstdlib = {platbase}/lib/python{py_version_short} +purelib = {base}/lib/python{py_version_short}/site-packages +platlib = {platbase}/lib/python{py_version_short}/site-packages +include = {base}/include/python{py_version_short}{abiflags} +platinclude = {platbase}/include/python{py_version_short}{abiflags} +data = {base} + +[posix_home] +stdlib = {base}/lib/python +platstdlib = {base}/lib/python +purelib = {base}/lib/python +platlib = {base}/lib/python +include = {base}/include/python +platinclude = {base}/include/python +scripts = {base}/bin +data = {base} + +[nt] +stdlib = {base}/Lib +platstdlib = {base}/Lib +purelib = {base}/Lib/site-packages +platlib = {base}/Lib/site-packages +include = {base}/Include +platinclude = {base}/Include +scripts = {base}/Scripts +data = {base} + +[os2] +stdlib = {base}/Lib +platstdlib = {base}/Lib +purelib = {base}/Lib/site-packages +platlib = {base}/Lib/site-packages +include = {base}/Include +platinclude = {base}/Include +scripts = {base}/Scripts +data = {base} + +[os2_home] +stdlib = {userbase}/lib/python{py_version_short} +platstdlib = {userbase}/lib/python{py_version_short} +purelib = {userbase}/lib/python{py_version_short}/site-packages +platlib = {userbase}/lib/python{py_version_short}/site-packages +include = {userbase}/include/python{py_version_short} +scripts = {userbase}/bin +data = {userbase} + +[nt_user] +stdlib = {userbase}/Python{py_version_nodot} +platstdlib = {userbase}/Python{py_version_nodot} +purelib = {userbase}/Python{py_version_nodot}/site-packages +platlib = {userbase}/Python{py_version_nodot}/site-packages +include = {userbase}/Python{py_version_nodot}/Include +scripts = {userbase}/Scripts +data = {userbase} + +[posix_user] +stdlib = {userbase}/lib/python{py_version_short} +platstdlib = {userbase}/lib/python{py_version_short} +purelib = {userbase}/lib/python{py_version_short}/site-packages +platlib = {userbase}/lib/python{py_version_short}/site-packages +include = {userbase}/include/python{py_version_short} +scripts = {userbase}/bin +data = {userbase} + +[osx_framework_user] +stdlib = {userbase}/lib/python +platstdlib = {userbase}/lib/python +purelib = {userbase}/lib/python/site-packages +platlib = {userbase}/lib/python/site-packages +include = {userbase}/include +scripts = {userbase}/bin +data = {userbase} diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/distlib/_backport/sysconfig.py b/venv.bak/lib/python3.7/site-packages/pip/_vendor/distlib/_backport/sysconfig.py new file mode 100644 index 0000000..b470a37 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_vendor/distlib/_backport/sysconfig.py @@ -0,0 +1,786 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2012 The Python Software Foundation. +# See LICENSE.txt and CONTRIBUTORS.txt. +# +"""Access to Python's configuration information.""" + +import codecs +import os +import re +import sys +from os.path import pardir, realpath +try: + import configparser +except ImportError: + import ConfigParser as configparser + + +__all__ = [ + 'get_config_h_filename', + 'get_config_var', + 'get_config_vars', + 'get_makefile_filename', + 'get_path', + 'get_path_names', + 'get_paths', + 'get_platform', + 'get_python_version', + 'get_scheme_names', + 'parse_config_h', +] + + +def _safe_realpath(path): + try: + return realpath(path) + except OSError: + return path + + +if sys.executable: + _PROJECT_BASE = os.path.dirname(_safe_realpath(sys.executable)) +else: + # sys.executable can be empty if argv[0] has been changed and Python is + # unable to retrieve the real program name + _PROJECT_BASE = _safe_realpath(os.getcwd()) + +if os.name == "nt" and "pcbuild" in _PROJECT_BASE[-8:].lower(): + _PROJECT_BASE = _safe_realpath(os.path.join(_PROJECT_BASE, pardir)) +# PC/VS7.1 +if os.name == "nt" and "\\pc\\v" in _PROJECT_BASE[-10:].lower(): + _PROJECT_BASE = _safe_realpath(os.path.join(_PROJECT_BASE, pardir, pardir)) +# PC/AMD64 +if os.name == "nt" and "\\pcbuild\\amd64" in _PROJECT_BASE[-14:].lower(): + _PROJECT_BASE = _safe_realpath(os.path.join(_PROJECT_BASE, pardir, pardir)) + + +def is_python_build(): + for fn in ("Setup.dist", "Setup.local"): + if os.path.isfile(os.path.join(_PROJECT_BASE, "Modules", fn)): + return True + return False + +_PYTHON_BUILD = is_python_build() + +_cfg_read = False + +def _ensure_cfg_read(): + global _cfg_read + if not _cfg_read: + from ..resources import finder + backport_package = __name__.rsplit('.', 1)[0] + _finder = finder(backport_package) + _cfgfile = _finder.find('sysconfig.cfg') + assert _cfgfile, 'sysconfig.cfg exists' + with _cfgfile.as_stream() as s: + _SCHEMES.readfp(s) + if _PYTHON_BUILD: + for scheme in ('posix_prefix', 'posix_home'): + _SCHEMES.set(scheme, 'include', '{srcdir}/Include') + _SCHEMES.set(scheme, 'platinclude', '{projectbase}/.') + + _cfg_read = True + + +_SCHEMES = configparser.RawConfigParser() +_VAR_REPL = re.compile(r'\{([^{]*?)\}') + +def _expand_globals(config): + _ensure_cfg_read() + if config.has_section('globals'): + globals = config.items('globals') + else: + globals = tuple() + + sections = config.sections() + for section in sections: + if section == 'globals': + continue + for option, value in globals: + if config.has_option(section, option): + continue + config.set(section, option, value) + config.remove_section('globals') + + # now expanding local variables defined in the cfg file + # + for section in config.sections(): + variables = dict(config.items(section)) + + def _replacer(matchobj): + name = matchobj.group(1) + if name in variables: + return variables[name] + return matchobj.group(0) + + for option, value in config.items(section): + config.set(section, option, _VAR_REPL.sub(_replacer, value)) + +#_expand_globals(_SCHEMES) + +_PY_VERSION = '%s.%s.%s' % sys.version_info[:3] +_PY_VERSION_SHORT = '%s.%s' % sys.version_info[:2] +_PY_VERSION_SHORT_NO_DOT = '%s%s' % sys.version_info[:2] +_PREFIX = os.path.normpath(sys.prefix) +_EXEC_PREFIX = os.path.normpath(sys.exec_prefix) +_CONFIG_VARS = None +_USER_BASE = None + + +def _subst_vars(path, local_vars): + """In the string `path`, replace tokens like {some.thing} with the + corresponding value from the map `local_vars`. + + If there is no corresponding value, leave the token unchanged. + """ + def _replacer(matchobj): + name = matchobj.group(1) + if name in local_vars: + return local_vars[name] + elif name in os.environ: + return os.environ[name] + return matchobj.group(0) + return _VAR_REPL.sub(_replacer, path) + + +def _extend_dict(target_dict, other_dict): + target_keys = target_dict.keys() + for key, value in other_dict.items(): + if key in target_keys: + continue + target_dict[key] = value + + +def _expand_vars(scheme, vars): + res = {} + if vars is None: + vars = {} + _extend_dict(vars, get_config_vars()) + + for key, value in _SCHEMES.items(scheme): + if os.name in ('posix', 'nt'): + value = os.path.expanduser(value) + res[key] = os.path.normpath(_subst_vars(value, vars)) + return res + + +def format_value(value, vars): + def _replacer(matchobj): + name = matchobj.group(1) + if name in vars: + return vars[name] + return matchobj.group(0) + return _VAR_REPL.sub(_replacer, value) + + +def _get_default_scheme(): + if os.name == 'posix': + # the default scheme for posix is posix_prefix + return 'posix_prefix' + return os.name + + +def _getuserbase(): + env_base = os.environ.get("PYTHONUSERBASE", None) + + def joinuser(*args): + return os.path.expanduser(os.path.join(*args)) + + # what about 'os2emx', 'riscos' ? + if os.name == "nt": + base = os.environ.get("APPDATA") or "~" + if env_base: + return env_base + else: + return joinuser(base, "Python") + + if sys.platform == "darwin": + framework = get_config_var("PYTHONFRAMEWORK") + if framework: + if env_base: + return env_base + else: + return joinuser("~", "Library", framework, "%d.%d" % + sys.version_info[:2]) + + if env_base: + return env_base + else: + return joinuser("~", ".local") + + +def _parse_makefile(filename, vars=None): + """Parse a Makefile-style file. + + A dictionary containing name/value pairs is returned. If an + optional dictionary is passed in as the second argument, it is + used instead of a new dictionary. + """ + # Regexes needed for parsing Makefile (and similar syntaxes, + # like old-style Setup files). + _variable_rx = re.compile(r"([a-zA-Z][a-zA-Z0-9_]+)\s*=\s*(.*)") + _findvar1_rx = re.compile(r"\$\(([A-Za-z][A-Za-z0-9_]*)\)") + _findvar2_rx = re.compile(r"\${([A-Za-z][A-Za-z0-9_]*)}") + + if vars is None: + vars = {} + done = {} + notdone = {} + + with codecs.open(filename, encoding='utf-8', errors="surrogateescape") as f: + lines = f.readlines() + + for line in lines: + if line.startswith('#') or line.strip() == '': + continue + m = _variable_rx.match(line) + if m: + n, v = m.group(1, 2) + v = v.strip() + # `$$' is a literal `$' in make + tmpv = v.replace('$$', '') + + if "$" in tmpv: + notdone[n] = v + else: + try: + v = int(v) + except ValueError: + # insert literal `$' + done[n] = v.replace('$$', '$') + else: + done[n] = v + + # do variable interpolation here + variables = list(notdone.keys()) + + # Variables with a 'PY_' prefix in the makefile. These need to + # be made available without that prefix through sysconfig. + # Special care is needed to ensure that variable expansion works, even + # if the expansion uses the name without a prefix. + renamed_variables = ('CFLAGS', 'LDFLAGS', 'CPPFLAGS') + + while len(variables) > 0: + for name in tuple(variables): + value = notdone[name] + m = _findvar1_rx.search(value) or _findvar2_rx.search(value) + if m is not None: + n = m.group(1) + found = True + if n in done: + item = str(done[n]) + elif n in notdone: + # get it on a subsequent round + found = False + elif n in os.environ: + # do it like make: fall back to environment + item = os.environ[n] + + elif n in renamed_variables: + if (name.startswith('PY_') and + name[3:] in renamed_variables): + item = "" + + elif 'PY_' + n in notdone: + found = False + + else: + item = str(done['PY_' + n]) + + else: + done[n] = item = "" + + if found: + after = value[m.end():] + value = value[:m.start()] + item + after + if "$" in after: + notdone[name] = value + else: + try: + value = int(value) + except ValueError: + done[name] = value.strip() + else: + done[name] = value + variables.remove(name) + + if (name.startswith('PY_') and + name[3:] in renamed_variables): + + name = name[3:] + if name not in done: + done[name] = value + + else: + # bogus variable reference (e.g. "prefix=$/opt/python"); + # just drop it since we can't deal + done[name] = value + variables.remove(name) + + # strip spurious spaces + for k, v in done.items(): + if isinstance(v, str): + done[k] = v.strip() + + # save the results in the global dictionary + vars.update(done) + return vars + + +def get_makefile_filename(): + """Return the path of the Makefile.""" + if _PYTHON_BUILD: + return os.path.join(_PROJECT_BASE, "Makefile") + if hasattr(sys, 'abiflags'): + config_dir_name = 'config-%s%s' % (_PY_VERSION_SHORT, sys.abiflags) + else: + config_dir_name = 'config' + return os.path.join(get_path('stdlib'), config_dir_name, 'Makefile') + + +def _init_posix(vars): + """Initialize the module as appropriate for POSIX systems.""" + # load the installed Makefile: + makefile = get_makefile_filename() + try: + _parse_makefile(makefile, vars) + except IOError as e: + msg = "invalid Python installation: unable to open %s" % makefile + if hasattr(e, "strerror"): + msg = msg + " (%s)" % e.strerror + raise IOError(msg) + # load the installed pyconfig.h: + config_h = get_config_h_filename() + try: + with open(config_h) as f: + parse_config_h(f, vars) + except IOError as e: + msg = "invalid Python installation: unable to open %s" % config_h + if hasattr(e, "strerror"): + msg = msg + " (%s)" % e.strerror + raise IOError(msg) + # On AIX, there are wrong paths to the linker scripts in the Makefile + # -- these paths are relative to the Python source, but when installed + # the scripts are in another directory. + if _PYTHON_BUILD: + vars['LDSHARED'] = vars['BLDSHARED'] + + +def _init_non_posix(vars): + """Initialize the module as appropriate for NT""" + # set basic install directories + vars['LIBDEST'] = get_path('stdlib') + vars['BINLIBDEST'] = get_path('platstdlib') + vars['INCLUDEPY'] = get_path('include') + vars['SO'] = '.pyd' + vars['EXE'] = '.exe' + vars['VERSION'] = _PY_VERSION_SHORT_NO_DOT + vars['BINDIR'] = os.path.dirname(_safe_realpath(sys.executable)) + +# +# public APIs +# + + +def parse_config_h(fp, vars=None): + """Parse a config.h-style file. + + A dictionary containing name/value pairs is returned. If an + optional dictionary is passed in as the second argument, it is + used instead of a new dictionary. + """ + if vars is None: + vars = {} + define_rx = re.compile("#define ([A-Z][A-Za-z0-9_]+) (.*)\n") + undef_rx = re.compile("/[*] #undef ([A-Z][A-Za-z0-9_]+) [*]/\n") + + while True: + line = fp.readline() + if not line: + break + m = define_rx.match(line) + if m: + n, v = m.group(1, 2) + try: + v = int(v) + except ValueError: + pass + vars[n] = v + else: + m = undef_rx.match(line) + if m: + vars[m.group(1)] = 0 + return vars + + +def get_config_h_filename(): + """Return the path of pyconfig.h.""" + if _PYTHON_BUILD: + if os.name == "nt": + inc_dir = os.path.join(_PROJECT_BASE, "PC") + else: + inc_dir = _PROJECT_BASE + else: + inc_dir = get_path('platinclude') + return os.path.join(inc_dir, 'pyconfig.h') + + +def get_scheme_names(): + """Return a tuple containing the schemes names.""" + return tuple(sorted(_SCHEMES.sections())) + + +def get_path_names(): + """Return a tuple containing the paths names.""" + # xxx see if we want a static list + return _SCHEMES.options('posix_prefix') + + +def get_paths(scheme=_get_default_scheme(), vars=None, expand=True): + """Return a mapping containing an install scheme. + + ``scheme`` is the install scheme name. If not provided, it will + return the default scheme for the current platform. + """ + _ensure_cfg_read() + if expand: + return _expand_vars(scheme, vars) + else: + return dict(_SCHEMES.items(scheme)) + + +def get_path(name, scheme=_get_default_scheme(), vars=None, expand=True): + """Return a path corresponding to the scheme. + + ``scheme`` is the install scheme name. + """ + return get_paths(scheme, vars, expand)[name] + + +def get_config_vars(*args): + """With no arguments, return a dictionary of all configuration + variables relevant for the current platform. + + On Unix, this means every variable defined in Python's installed Makefile; + On Windows and Mac OS it's a much smaller set. + + With arguments, return a list of values that result from looking up + each argument in the configuration variable dictionary. + """ + global _CONFIG_VARS + if _CONFIG_VARS is None: + _CONFIG_VARS = {} + # Normalized versions of prefix and exec_prefix are handy to have; + # in fact, these are the standard versions used most places in the + # distutils2 module. + _CONFIG_VARS['prefix'] = _PREFIX + _CONFIG_VARS['exec_prefix'] = _EXEC_PREFIX + _CONFIG_VARS['py_version'] = _PY_VERSION + _CONFIG_VARS['py_version_short'] = _PY_VERSION_SHORT + _CONFIG_VARS['py_version_nodot'] = _PY_VERSION[0] + _PY_VERSION[2] + _CONFIG_VARS['base'] = _PREFIX + _CONFIG_VARS['platbase'] = _EXEC_PREFIX + _CONFIG_VARS['projectbase'] = _PROJECT_BASE + try: + _CONFIG_VARS['abiflags'] = sys.abiflags + except AttributeError: + # sys.abiflags may not be defined on all platforms. + _CONFIG_VARS['abiflags'] = '' + + if os.name in ('nt', 'os2'): + _init_non_posix(_CONFIG_VARS) + if os.name == 'posix': + _init_posix(_CONFIG_VARS) + # Setting 'userbase' is done below the call to the + # init function to enable using 'get_config_var' in + # the init-function. + if sys.version >= '2.6': + _CONFIG_VARS['userbase'] = _getuserbase() + + if 'srcdir' not in _CONFIG_VARS: + _CONFIG_VARS['srcdir'] = _PROJECT_BASE + else: + _CONFIG_VARS['srcdir'] = _safe_realpath(_CONFIG_VARS['srcdir']) + + # Convert srcdir into an absolute path if it appears necessary. + # Normally it is relative to the build directory. However, during + # testing, for example, we might be running a non-installed python + # from a different directory. + if _PYTHON_BUILD and os.name == "posix": + base = _PROJECT_BASE + try: + cwd = os.getcwd() + except OSError: + cwd = None + if (not os.path.isabs(_CONFIG_VARS['srcdir']) and + base != cwd): + # srcdir is relative and we are not in the same directory + # as the executable. Assume executable is in the build + # directory and make srcdir absolute. + srcdir = os.path.join(base, _CONFIG_VARS['srcdir']) + _CONFIG_VARS['srcdir'] = os.path.normpath(srcdir) + + if sys.platform == 'darwin': + kernel_version = os.uname()[2] # Kernel version (8.4.3) + major_version = int(kernel_version.split('.')[0]) + + if major_version < 8: + # On Mac OS X before 10.4, check if -arch and -isysroot + # are in CFLAGS or LDFLAGS and remove them if they are. + # This is needed when building extensions on a 10.3 system + # using a universal build of python. + for key in ('LDFLAGS', 'BASECFLAGS', + # a number of derived variables. These need to be + # patched up as well. + 'CFLAGS', 'PY_CFLAGS', 'BLDSHARED'): + flags = _CONFIG_VARS[key] + flags = re.sub(r'-arch\s+\w+\s', ' ', flags) + flags = re.sub('-isysroot [^ \t]*', ' ', flags) + _CONFIG_VARS[key] = flags + else: + # Allow the user to override the architecture flags using + # an environment variable. + # NOTE: This name was introduced by Apple in OSX 10.5 and + # is used by several scripting languages distributed with + # that OS release. + if 'ARCHFLAGS' in os.environ: + arch = os.environ['ARCHFLAGS'] + for key in ('LDFLAGS', 'BASECFLAGS', + # a number of derived variables. These need to be + # patched up as well. + 'CFLAGS', 'PY_CFLAGS', 'BLDSHARED'): + + flags = _CONFIG_VARS[key] + flags = re.sub(r'-arch\s+\w+\s', ' ', flags) + flags = flags + ' ' + arch + _CONFIG_VARS[key] = flags + + # If we're on OSX 10.5 or later and the user tries to + # compiles an extension using an SDK that is not present + # on the current machine it is better to not use an SDK + # than to fail. + # + # The major usecase for this is users using a Python.org + # binary installer on OSX 10.6: that installer uses + # the 10.4u SDK, but that SDK is not installed by default + # when you install Xcode. + # + CFLAGS = _CONFIG_VARS.get('CFLAGS', '') + m = re.search(r'-isysroot\s+(\S+)', CFLAGS) + if m is not None: + sdk = m.group(1) + if not os.path.exists(sdk): + for key in ('LDFLAGS', 'BASECFLAGS', + # a number of derived variables. These need to be + # patched up as well. + 'CFLAGS', 'PY_CFLAGS', 'BLDSHARED'): + + flags = _CONFIG_VARS[key] + flags = re.sub(r'-isysroot\s+\S+(\s|$)', ' ', flags) + _CONFIG_VARS[key] = flags + + if args: + vals = [] + for name in args: + vals.append(_CONFIG_VARS.get(name)) + return vals + else: + return _CONFIG_VARS + + +def get_config_var(name): + """Return the value of a single variable using the dictionary returned by + 'get_config_vars()'. + + Equivalent to get_config_vars().get(name) + """ + return get_config_vars().get(name) + + +def get_platform(): + """Return a string that identifies the current platform. + + This is used mainly to distinguish platform-specific build directories and + platform-specific built distributions. Typically includes the OS name + and version and the architecture (as supplied by 'os.uname()'), + although the exact information included depends on the OS; eg. for IRIX + the architecture isn't particularly important (IRIX only runs on SGI + hardware), but for Linux the kernel version isn't particularly + important. + + Examples of returned values: + linux-i586 + linux-alpha (?) + solaris-2.6-sun4u + irix-5.3 + irix64-6.2 + + Windows will return one of: + win-amd64 (64bit Windows on AMD64 (aka x86_64, Intel64, EM64T, etc) + win-ia64 (64bit Windows on Itanium) + win32 (all others - specifically, sys.platform is returned) + + For other non-POSIX platforms, currently just returns 'sys.platform'. + """ + if os.name == 'nt': + # sniff sys.version for architecture. + prefix = " bit (" + i = sys.version.find(prefix) + if i == -1: + return sys.platform + j = sys.version.find(")", i) + look = sys.version[i+len(prefix):j].lower() + if look == 'amd64': + return 'win-amd64' + if look == 'itanium': + return 'win-ia64' + return sys.platform + + if os.name != "posix" or not hasattr(os, 'uname'): + # XXX what about the architecture? NT is Intel or Alpha, + # Mac OS is M68k or PPC, etc. + return sys.platform + + # Try to distinguish various flavours of Unix + osname, host, release, version, machine = os.uname() + + # Convert the OS name to lowercase, remove '/' characters + # (to accommodate BSD/OS), and translate spaces (for "Power Macintosh") + osname = osname.lower().replace('/', '') + machine = machine.replace(' ', '_') + machine = machine.replace('/', '-') + + if osname[:5] == "linux": + # At least on Linux/Intel, 'machine' is the processor -- + # i386, etc. + # XXX what about Alpha, SPARC, etc? + return "%s-%s" % (osname, machine) + elif osname[:5] == "sunos": + if release[0] >= "5": # SunOS 5 == Solaris 2 + osname = "solaris" + release = "%d.%s" % (int(release[0]) - 3, release[2:]) + # fall through to standard osname-release-machine representation + elif osname[:4] == "irix": # could be "irix64"! + return "%s-%s" % (osname, release) + elif osname[:3] == "aix": + return "%s-%s.%s" % (osname, version, release) + elif osname[:6] == "cygwin": + osname = "cygwin" + rel_re = re.compile(r'[\d.]+') + m = rel_re.match(release) + if m: + release = m.group() + elif osname[:6] == "darwin": + # + # For our purposes, we'll assume that the system version from + # distutils' perspective is what MACOSX_DEPLOYMENT_TARGET is set + # to. This makes the compatibility story a bit more sane because the + # machine is going to compile and link as if it were + # MACOSX_DEPLOYMENT_TARGET. + cfgvars = get_config_vars() + macver = cfgvars.get('MACOSX_DEPLOYMENT_TARGET') + + if True: + # Always calculate the release of the running machine, + # needed to determine if we can build fat binaries or not. + + macrelease = macver + # Get the system version. Reading this plist is a documented + # way to get the system version (see the documentation for + # the Gestalt Manager) + try: + f = open('/System/Library/CoreServices/SystemVersion.plist') + except IOError: + # We're on a plain darwin box, fall back to the default + # behaviour. + pass + else: + try: + m = re.search(r'ProductUserVisibleVersion\s*' + r'(.*?)', f.read()) + finally: + f.close() + if m is not None: + macrelease = '.'.join(m.group(1).split('.')[:2]) + # else: fall back to the default behaviour + + if not macver: + macver = macrelease + + if macver: + release = macver + osname = "macosx" + + if ((macrelease + '.') >= '10.4.' and + '-arch' in get_config_vars().get('CFLAGS', '').strip()): + # The universal build will build fat binaries, but not on + # systems before 10.4 + # + # Try to detect 4-way universal builds, those have machine-type + # 'universal' instead of 'fat'. + + machine = 'fat' + cflags = get_config_vars().get('CFLAGS') + + archs = re.findall(r'-arch\s+(\S+)', cflags) + archs = tuple(sorted(set(archs))) + + if len(archs) == 1: + machine = archs[0] + elif archs == ('i386', 'ppc'): + machine = 'fat' + elif archs == ('i386', 'x86_64'): + machine = 'intel' + elif archs == ('i386', 'ppc', 'x86_64'): + machine = 'fat3' + elif archs == ('ppc64', 'x86_64'): + machine = 'fat64' + elif archs == ('i386', 'ppc', 'ppc64', 'x86_64'): + machine = 'universal' + else: + raise ValueError( + "Don't know machine value for archs=%r" % (archs,)) + + elif machine == 'i386': + # On OSX the machine type returned by uname is always the + # 32-bit variant, even if the executable architecture is + # the 64-bit variant + if sys.maxsize >= 2**32: + machine = 'x86_64' + + elif machine in ('PowerPC', 'Power_Macintosh'): + # Pick a sane name for the PPC architecture. + # See 'i386' case + if sys.maxsize >= 2**32: + machine = 'ppc64' + else: + machine = 'ppc' + + return "%s-%s-%s" % (osname, release, machine) + + +def get_python_version(): + return _PY_VERSION_SHORT + + +def _print_dict(title, data): + for index, (key, value) in enumerate(sorted(data.items())): + if index == 0: + print('%s: ' % (title)) + print('\t%s = "%s"' % (key, value)) + + +def _main(): + """Display all information sysconfig detains.""" + print('Platform: "%s"' % get_platform()) + print('Python version: "%s"' % get_python_version()) + print('Current installation scheme: "%s"' % _get_default_scheme()) + print() + _print_dict('Paths', get_paths()) + print() + _print_dict('Variables', get_config_vars()) + + +if __name__ == '__main__': + _main() diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/distlib/_backport/tarfile.py b/venv.bak/lib/python3.7/site-packages/pip/_vendor/distlib/_backport/tarfile.py new file mode 100644 index 0000000..d66d856 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_vendor/distlib/_backport/tarfile.py @@ -0,0 +1,2607 @@ +#------------------------------------------------------------------- +# tarfile.py +#------------------------------------------------------------------- +# Copyright (C) 2002 Lars Gustaebel +# All rights reserved. +# +# Permission is hereby granted, free of charge, to any person +# obtaining a copy of this software and associated documentation +# files (the "Software"), to deal in the Software without +# restriction, including without limitation the rights to use, +# copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following +# conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. +# +from __future__ import print_function + +"""Read from and write to tar format archives. +""" + +__version__ = "$Revision$" + +version = "0.9.0" +__author__ = "Lars Gust\u00e4bel (lars@gustaebel.de)" +__date__ = "$Date: 2011-02-25 17:42:01 +0200 (Fri, 25 Feb 2011) $" +__cvsid__ = "$Id: tarfile.py 88586 2011-02-25 15:42:01Z marc-andre.lemburg $" +__credits__ = "Gustavo Niemeyer, Niels Gust\u00e4bel, Richard Townsend." + +#--------- +# Imports +#--------- +import sys +import os +import stat +import errno +import time +import struct +import copy +import re + +try: + import grp, pwd +except ImportError: + grp = pwd = None + +# os.symlink on Windows prior to 6.0 raises NotImplementedError +symlink_exception = (AttributeError, NotImplementedError) +try: + # WindowsError (1314) will be raised if the caller does not hold the + # SeCreateSymbolicLinkPrivilege privilege + symlink_exception += (WindowsError,) +except NameError: + pass + +# from tarfile import * +__all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError"] + +if sys.version_info[0] < 3: + import __builtin__ as builtins +else: + import builtins + +_open = builtins.open # Since 'open' is TarFile.open + +#--------------------------------------------------------- +# tar constants +#--------------------------------------------------------- +NUL = b"\0" # the null character +BLOCKSIZE = 512 # length of processing blocks +RECORDSIZE = BLOCKSIZE * 20 # length of records +GNU_MAGIC = b"ustar \0" # magic gnu tar string +POSIX_MAGIC = b"ustar\x0000" # magic posix tar string + +LENGTH_NAME = 100 # maximum length of a filename +LENGTH_LINK = 100 # maximum length of a linkname +LENGTH_PREFIX = 155 # maximum length of the prefix field + +REGTYPE = b"0" # regular file +AREGTYPE = b"\0" # regular file +LNKTYPE = b"1" # link (inside tarfile) +SYMTYPE = b"2" # symbolic link +CHRTYPE = b"3" # character special device +BLKTYPE = b"4" # block special device +DIRTYPE = b"5" # directory +FIFOTYPE = b"6" # fifo special device +CONTTYPE = b"7" # contiguous file + +GNUTYPE_LONGNAME = b"L" # GNU tar longname +GNUTYPE_LONGLINK = b"K" # GNU tar longlink +GNUTYPE_SPARSE = b"S" # GNU tar sparse file + +XHDTYPE = b"x" # POSIX.1-2001 extended header +XGLTYPE = b"g" # POSIX.1-2001 global header +SOLARIS_XHDTYPE = b"X" # Solaris extended header + +USTAR_FORMAT = 0 # POSIX.1-1988 (ustar) format +GNU_FORMAT = 1 # GNU tar format +PAX_FORMAT = 2 # POSIX.1-2001 (pax) format +DEFAULT_FORMAT = GNU_FORMAT + +#--------------------------------------------------------- +# tarfile constants +#--------------------------------------------------------- +# File types that tarfile supports: +SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE, + SYMTYPE, DIRTYPE, FIFOTYPE, + CONTTYPE, CHRTYPE, BLKTYPE, + GNUTYPE_LONGNAME, GNUTYPE_LONGLINK, + GNUTYPE_SPARSE) + +# File types that will be treated as a regular file. +REGULAR_TYPES = (REGTYPE, AREGTYPE, + CONTTYPE, GNUTYPE_SPARSE) + +# File types that are part of the GNU tar format. +GNU_TYPES = (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK, + GNUTYPE_SPARSE) + +# Fields from a pax header that override a TarInfo attribute. +PAX_FIELDS = ("path", "linkpath", "size", "mtime", + "uid", "gid", "uname", "gname") + +# Fields from a pax header that are affected by hdrcharset. +PAX_NAME_FIELDS = set(("path", "linkpath", "uname", "gname")) + +# Fields in a pax header that are numbers, all other fields +# are treated as strings. +PAX_NUMBER_FIELDS = { + "atime": float, + "ctime": float, + "mtime": float, + "uid": int, + "gid": int, + "size": int +} + +#--------------------------------------------------------- +# Bits used in the mode field, values in octal. +#--------------------------------------------------------- +S_IFLNK = 0o120000 # symbolic link +S_IFREG = 0o100000 # regular file +S_IFBLK = 0o060000 # block device +S_IFDIR = 0o040000 # directory +S_IFCHR = 0o020000 # character device +S_IFIFO = 0o010000 # fifo + +TSUID = 0o4000 # set UID on execution +TSGID = 0o2000 # set GID on execution +TSVTX = 0o1000 # reserved + +TUREAD = 0o400 # read by owner +TUWRITE = 0o200 # write by owner +TUEXEC = 0o100 # execute/search by owner +TGREAD = 0o040 # read by group +TGWRITE = 0o020 # write by group +TGEXEC = 0o010 # execute/search by group +TOREAD = 0o004 # read by other +TOWRITE = 0o002 # write by other +TOEXEC = 0o001 # execute/search by other + +#--------------------------------------------------------- +# initialization +#--------------------------------------------------------- +if os.name in ("nt", "ce"): + ENCODING = "utf-8" +else: + ENCODING = sys.getfilesystemencoding() + +#--------------------------------------------------------- +# Some useful functions +#--------------------------------------------------------- + +def stn(s, length, encoding, errors): + """Convert a string to a null-terminated bytes object. + """ + s = s.encode(encoding, errors) + return s[:length] + (length - len(s)) * NUL + +def nts(s, encoding, errors): + """Convert a null-terminated bytes object to a string. + """ + p = s.find(b"\0") + if p != -1: + s = s[:p] + return s.decode(encoding, errors) + +def nti(s): + """Convert a number field to a python number. + """ + # There are two possible encodings for a number field, see + # itn() below. + if s[0] != chr(0o200): + try: + n = int(nts(s, "ascii", "strict") or "0", 8) + except ValueError: + raise InvalidHeaderError("invalid header") + else: + n = 0 + for i in range(len(s) - 1): + n <<= 8 + n += ord(s[i + 1]) + return n + +def itn(n, digits=8, format=DEFAULT_FORMAT): + """Convert a python number to a number field. + """ + # POSIX 1003.1-1988 requires numbers to be encoded as a string of + # octal digits followed by a null-byte, this allows values up to + # (8**(digits-1))-1. GNU tar allows storing numbers greater than + # that if necessary. A leading 0o200 byte indicates this particular + # encoding, the following digits-1 bytes are a big-endian + # representation. This allows values up to (256**(digits-1))-1. + if 0 <= n < 8 ** (digits - 1): + s = ("%0*o" % (digits - 1, n)).encode("ascii") + NUL + else: + if format != GNU_FORMAT or n >= 256 ** (digits - 1): + raise ValueError("overflow in number field") + + if n < 0: + # XXX We mimic GNU tar's behaviour with negative numbers, + # this could raise OverflowError. + n = struct.unpack("L", struct.pack("l", n))[0] + + s = bytearray() + for i in range(digits - 1): + s.insert(0, n & 0o377) + n >>= 8 + s.insert(0, 0o200) + return s + +def calc_chksums(buf): + """Calculate the checksum for a member's header by summing up all + characters except for the chksum field which is treated as if + it was filled with spaces. According to the GNU tar sources, + some tars (Sun and NeXT) calculate chksum with signed char, + which will be different if there are chars in the buffer with + the high bit set. So we calculate two checksums, unsigned and + signed. + """ + unsigned_chksum = 256 + sum(struct.unpack("148B", buf[:148]) + struct.unpack("356B", buf[156:512])) + signed_chksum = 256 + sum(struct.unpack("148b", buf[:148]) + struct.unpack("356b", buf[156:512])) + return unsigned_chksum, signed_chksum + +def copyfileobj(src, dst, length=None): + """Copy length bytes from fileobj src to fileobj dst. + If length is None, copy the entire content. + """ + if length == 0: + return + if length is None: + while True: + buf = src.read(16*1024) + if not buf: + break + dst.write(buf) + return + + BUFSIZE = 16 * 1024 + blocks, remainder = divmod(length, BUFSIZE) + for b in range(blocks): + buf = src.read(BUFSIZE) + if len(buf) < BUFSIZE: + raise IOError("end of file reached") + dst.write(buf) + + if remainder != 0: + buf = src.read(remainder) + if len(buf) < remainder: + raise IOError("end of file reached") + dst.write(buf) + return + +filemode_table = ( + ((S_IFLNK, "l"), + (S_IFREG, "-"), + (S_IFBLK, "b"), + (S_IFDIR, "d"), + (S_IFCHR, "c"), + (S_IFIFO, "p")), + + ((TUREAD, "r"),), + ((TUWRITE, "w"),), + ((TUEXEC|TSUID, "s"), + (TSUID, "S"), + (TUEXEC, "x")), + + ((TGREAD, "r"),), + ((TGWRITE, "w"),), + ((TGEXEC|TSGID, "s"), + (TSGID, "S"), + (TGEXEC, "x")), + + ((TOREAD, "r"),), + ((TOWRITE, "w"),), + ((TOEXEC|TSVTX, "t"), + (TSVTX, "T"), + (TOEXEC, "x")) +) + +def filemode(mode): + """Convert a file's mode to a string of the form + -rwxrwxrwx. + Used by TarFile.list() + """ + perm = [] + for table in filemode_table: + for bit, char in table: + if mode & bit == bit: + perm.append(char) + break + else: + perm.append("-") + return "".join(perm) + +class TarError(Exception): + """Base exception.""" + pass +class ExtractError(TarError): + """General exception for extract errors.""" + pass +class ReadError(TarError): + """Exception for unreadable tar archives.""" + pass +class CompressionError(TarError): + """Exception for unavailable compression methods.""" + pass +class StreamError(TarError): + """Exception for unsupported operations on stream-like TarFiles.""" + pass +class HeaderError(TarError): + """Base exception for header errors.""" + pass +class EmptyHeaderError(HeaderError): + """Exception for empty headers.""" + pass +class TruncatedHeaderError(HeaderError): + """Exception for truncated headers.""" + pass +class EOFHeaderError(HeaderError): + """Exception for end of file headers.""" + pass +class InvalidHeaderError(HeaderError): + """Exception for invalid headers.""" + pass +class SubsequentHeaderError(HeaderError): + """Exception for missing and invalid extended headers.""" + pass + +#--------------------------- +# internal stream interface +#--------------------------- +class _LowLevelFile(object): + """Low-level file object. Supports reading and writing. + It is used instead of a regular file object for streaming + access. + """ + + def __init__(self, name, mode): + mode = { + "r": os.O_RDONLY, + "w": os.O_WRONLY | os.O_CREAT | os.O_TRUNC, + }[mode] + if hasattr(os, "O_BINARY"): + mode |= os.O_BINARY + self.fd = os.open(name, mode, 0o666) + + def close(self): + os.close(self.fd) + + def read(self, size): + return os.read(self.fd, size) + + def write(self, s): + os.write(self.fd, s) + +class _Stream(object): + """Class that serves as an adapter between TarFile and + a stream-like object. The stream-like object only + needs to have a read() or write() method and is accessed + blockwise. Use of gzip or bzip2 compression is possible. + A stream-like object could be for example: sys.stdin, + sys.stdout, a socket, a tape device etc. + + _Stream is intended to be used only internally. + """ + + def __init__(self, name, mode, comptype, fileobj, bufsize): + """Construct a _Stream object. + """ + self._extfileobj = True + if fileobj is None: + fileobj = _LowLevelFile(name, mode) + self._extfileobj = False + + if comptype == '*': + # Enable transparent compression detection for the + # stream interface + fileobj = _StreamProxy(fileobj) + comptype = fileobj.getcomptype() + + self.name = name or "" + self.mode = mode + self.comptype = comptype + self.fileobj = fileobj + self.bufsize = bufsize + self.buf = b"" + self.pos = 0 + self.closed = False + + try: + if comptype == "gz": + try: + import zlib + except ImportError: + raise CompressionError("zlib module is not available") + self.zlib = zlib + self.crc = zlib.crc32(b"") + if mode == "r": + self._init_read_gz() + else: + self._init_write_gz() + + if comptype == "bz2": + try: + import bz2 + except ImportError: + raise CompressionError("bz2 module is not available") + if mode == "r": + self.dbuf = b"" + self.cmp = bz2.BZ2Decompressor() + else: + self.cmp = bz2.BZ2Compressor() + except: + if not self._extfileobj: + self.fileobj.close() + self.closed = True + raise + + def __del__(self): + if hasattr(self, "closed") and not self.closed: + self.close() + + def _init_write_gz(self): + """Initialize for writing with gzip compression. + """ + self.cmp = self.zlib.compressobj(9, self.zlib.DEFLATED, + -self.zlib.MAX_WBITS, + self.zlib.DEF_MEM_LEVEL, + 0) + timestamp = struct.pack(" self.bufsize: + self.fileobj.write(self.buf[:self.bufsize]) + self.buf = self.buf[self.bufsize:] + + def close(self): + """Close the _Stream object. No operation should be + done on it afterwards. + """ + if self.closed: + return + + if self.mode == "w" and self.comptype != "tar": + self.buf += self.cmp.flush() + + if self.mode == "w" and self.buf: + self.fileobj.write(self.buf) + self.buf = b"" + if self.comptype == "gz": + # The native zlib crc is an unsigned 32-bit integer, but + # the Python wrapper implicitly casts that to a signed C + # long. So, on a 32-bit box self.crc may "look negative", + # while the same crc on a 64-bit box may "look positive". + # To avoid irksome warnings from the `struct` module, force + # it to look positive on all boxes. + self.fileobj.write(struct.pack("= 0: + blocks, remainder = divmod(pos - self.pos, self.bufsize) + for i in range(blocks): + self.read(self.bufsize) + self.read(remainder) + else: + raise StreamError("seeking backwards is not allowed") + return self.pos + + def read(self, size=None): + """Return the next size number of bytes from the stream. + If size is not defined, return all bytes of the stream + up to EOF. + """ + if size is None: + t = [] + while True: + buf = self._read(self.bufsize) + if not buf: + break + t.append(buf) + buf = "".join(t) + else: + buf = self._read(size) + self.pos += len(buf) + return buf + + def _read(self, size): + """Return size bytes from the stream. + """ + if self.comptype == "tar": + return self.__read(size) + + c = len(self.dbuf) + while c < size: + buf = self.__read(self.bufsize) + if not buf: + break + try: + buf = self.cmp.decompress(buf) + except IOError: + raise ReadError("invalid compressed data") + self.dbuf += buf + c += len(buf) + buf = self.dbuf[:size] + self.dbuf = self.dbuf[size:] + return buf + + def __read(self, size): + """Return size bytes from stream. If internal buffer is empty, + read another block from the stream. + """ + c = len(self.buf) + while c < size: + buf = self.fileobj.read(self.bufsize) + if not buf: + break + self.buf += buf + c += len(buf) + buf = self.buf[:size] + self.buf = self.buf[size:] + return buf +# class _Stream + +class _StreamProxy(object): + """Small proxy class that enables transparent compression + detection for the Stream interface (mode 'r|*'). + """ + + def __init__(self, fileobj): + self.fileobj = fileobj + self.buf = self.fileobj.read(BLOCKSIZE) + + def read(self, size): + self.read = self.fileobj.read + return self.buf + + def getcomptype(self): + if self.buf.startswith(b"\037\213\010"): + return "gz" + if self.buf.startswith(b"BZh91"): + return "bz2" + return "tar" + + def close(self): + self.fileobj.close() +# class StreamProxy + +class _BZ2Proxy(object): + """Small proxy class that enables external file object + support for "r:bz2" and "w:bz2" modes. This is actually + a workaround for a limitation in bz2 module's BZ2File + class which (unlike gzip.GzipFile) has no support for + a file object argument. + """ + + blocksize = 16 * 1024 + + def __init__(self, fileobj, mode): + self.fileobj = fileobj + self.mode = mode + self.name = getattr(self.fileobj, "name", None) + self.init() + + def init(self): + import bz2 + self.pos = 0 + if self.mode == "r": + self.bz2obj = bz2.BZ2Decompressor() + self.fileobj.seek(0) + self.buf = b"" + else: + self.bz2obj = bz2.BZ2Compressor() + + def read(self, size): + x = len(self.buf) + while x < size: + raw = self.fileobj.read(self.blocksize) + if not raw: + break + data = self.bz2obj.decompress(raw) + self.buf += data + x += len(data) + + buf = self.buf[:size] + self.buf = self.buf[size:] + self.pos += len(buf) + return buf + + def seek(self, pos): + if pos < self.pos: + self.init() + self.read(pos - self.pos) + + def tell(self): + return self.pos + + def write(self, data): + self.pos += len(data) + raw = self.bz2obj.compress(data) + self.fileobj.write(raw) + + def close(self): + if self.mode == "w": + raw = self.bz2obj.flush() + self.fileobj.write(raw) +# class _BZ2Proxy + +#------------------------ +# Extraction file object +#------------------------ +class _FileInFile(object): + """A thin wrapper around an existing file object that + provides a part of its data as an individual file + object. + """ + + def __init__(self, fileobj, offset, size, blockinfo=None): + self.fileobj = fileobj + self.offset = offset + self.size = size + self.position = 0 + + if blockinfo is None: + blockinfo = [(0, size)] + + # Construct a map with data and zero blocks. + self.map_index = 0 + self.map = [] + lastpos = 0 + realpos = self.offset + for offset, size in blockinfo: + if offset > lastpos: + self.map.append((False, lastpos, offset, None)) + self.map.append((True, offset, offset + size, realpos)) + realpos += size + lastpos = offset + size + if lastpos < self.size: + self.map.append((False, lastpos, self.size, None)) + + def seekable(self): + if not hasattr(self.fileobj, "seekable"): + # XXX gzip.GzipFile and bz2.BZ2File + return True + return self.fileobj.seekable() + + def tell(self): + """Return the current file position. + """ + return self.position + + def seek(self, position): + """Seek to a position in the file. + """ + self.position = position + + def read(self, size=None): + """Read data from the file. + """ + if size is None: + size = self.size - self.position + else: + size = min(size, self.size - self.position) + + buf = b"" + while size > 0: + while True: + data, start, stop, offset = self.map[self.map_index] + if start <= self.position < stop: + break + else: + self.map_index += 1 + if self.map_index == len(self.map): + self.map_index = 0 + length = min(size, stop - self.position) + if data: + self.fileobj.seek(offset + (self.position - start)) + buf += self.fileobj.read(length) + else: + buf += NUL * length + size -= length + self.position += length + return buf +#class _FileInFile + + +class ExFileObject(object): + """File-like object for reading an archive member. + Is returned by TarFile.extractfile(). + """ + blocksize = 1024 + + def __init__(self, tarfile, tarinfo): + self.fileobj = _FileInFile(tarfile.fileobj, + tarinfo.offset_data, + tarinfo.size, + tarinfo.sparse) + self.name = tarinfo.name + self.mode = "r" + self.closed = False + self.size = tarinfo.size + + self.position = 0 + self.buffer = b"" + + def readable(self): + return True + + def writable(self): + return False + + def seekable(self): + return self.fileobj.seekable() + + def read(self, size=None): + """Read at most size bytes from the file. If size is not + present or None, read all data until EOF is reached. + """ + if self.closed: + raise ValueError("I/O operation on closed file") + + buf = b"" + if self.buffer: + if size is None: + buf = self.buffer + self.buffer = b"" + else: + buf = self.buffer[:size] + self.buffer = self.buffer[size:] + + if size is None: + buf += self.fileobj.read() + else: + buf += self.fileobj.read(size - len(buf)) + + self.position += len(buf) + return buf + + # XXX TextIOWrapper uses the read1() method. + read1 = read + + def readline(self, size=-1): + """Read one entire line from the file. If size is present + and non-negative, return a string with at most that + size, which may be an incomplete line. + """ + if self.closed: + raise ValueError("I/O operation on closed file") + + pos = self.buffer.find(b"\n") + 1 + if pos == 0: + # no newline found. + while True: + buf = self.fileobj.read(self.blocksize) + self.buffer += buf + if not buf or b"\n" in buf: + pos = self.buffer.find(b"\n") + 1 + if pos == 0: + # no newline found. + pos = len(self.buffer) + break + + if size != -1: + pos = min(size, pos) + + buf = self.buffer[:pos] + self.buffer = self.buffer[pos:] + self.position += len(buf) + return buf + + def readlines(self): + """Return a list with all remaining lines. + """ + result = [] + while True: + line = self.readline() + if not line: break + result.append(line) + return result + + def tell(self): + """Return the current file position. + """ + if self.closed: + raise ValueError("I/O operation on closed file") + + return self.position + + def seek(self, pos, whence=os.SEEK_SET): + """Seek to a position in the file. + """ + if self.closed: + raise ValueError("I/O operation on closed file") + + if whence == os.SEEK_SET: + self.position = min(max(pos, 0), self.size) + elif whence == os.SEEK_CUR: + if pos < 0: + self.position = max(self.position + pos, 0) + else: + self.position = min(self.position + pos, self.size) + elif whence == os.SEEK_END: + self.position = max(min(self.size + pos, self.size), 0) + else: + raise ValueError("Invalid argument") + + self.buffer = b"" + self.fileobj.seek(self.position) + + def close(self): + """Close the file object. + """ + self.closed = True + + def __iter__(self): + """Get an iterator over the file's lines. + """ + while True: + line = self.readline() + if not line: + break + yield line +#class ExFileObject + +#------------------ +# Exported Classes +#------------------ +class TarInfo(object): + """Informational class which holds the details about an + archive member given by a tar header block. + TarInfo objects are returned by TarFile.getmember(), + TarFile.getmembers() and TarFile.gettarinfo() and are + usually created internally. + """ + + __slots__ = ("name", "mode", "uid", "gid", "size", "mtime", + "chksum", "type", "linkname", "uname", "gname", + "devmajor", "devminor", + "offset", "offset_data", "pax_headers", "sparse", + "tarfile", "_sparse_structs", "_link_target") + + def __init__(self, name=""): + """Construct a TarInfo object. name is the optional name + of the member. + """ + self.name = name # member name + self.mode = 0o644 # file permissions + self.uid = 0 # user id + self.gid = 0 # group id + self.size = 0 # file size + self.mtime = 0 # modification time + self.chksum = 0 # header checksum + self.type = REGTYPE # member type + self.linkname = "" # link name + self.uname = "" # user name + self.gname = "" # group name + self.devmajor = 0 # device major number + self.devminor = 0 # device minor number + + self.offset = 0 # the tar header starts here + self.offset_data = 0 # the file's data starts here + + self.sparse = None # sparse member information + self.pax_headers = {} # pax header information + + # In pax headers the "name" and "linkname" field are called + # "path" and "linkpath". + def _getpath(self): + return self.name + def _setpath(self, name): + self.name = name + path = property(_getpath, _setpath) + + def _getlinkpath(self): + return self.linkname + def _setlinkpath(self, linkname): + self.linkname = linkname + linkpath = property(_getlinkpath, _setlinkpath) + + def __repr__(self): + return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self)) + + def get_info(self): + """Return the TarInfo's attributes as a dictionary. + """ + info = { + "name": self.name, + "mode": self.mode & 0o7777, + "uid": self.uid, + "gid": self.gid, + "size": self.size, + "mtime": self.mtime, + "chksum": self.chksum, + "type": self.type, + "linkname": self.linkname, + "uname": self.uname, + "gname": self.gname, + "devmajor": self.devmajor, + "devminor": self.devminor + } + + if info["type"] == DIRTYPE and not info["name"].endswith("/"): + info["name"] += "/" + + return info + + def tobuf(self, format=DEFAULT_FORMAT, encoding=ENCODING, errors="surrogateescape"): + """Return a tar header as a string of 512 byte blocks. + """ + info = self.get_info() + + if format == USTAR_FORMAT: + return self.create_ustar_header(info, encoding, errors) + elif format == GNU_FORMAT: + return self.create_gnu_header(info, encoding, errors) + elif format == PAX_FORMAT: + return self.create_pax_header(info, encoding) + else: + raise ValueError("invalid format") + + def create_ustar_header(self, info, encoding, errors): + """Return the object as a ustar header block. + """ + info["magic"] = POSIX_MAGIC + + if len(info["linkname"]) > LENGTH_LINK: + raise ValueError("linkname is too long") + + if len(info["name"]) > LENGTH_NAME: + info["prefix"], info["name"] = self._posix_split_name(info["name"]) + + return self._create_header(info, USTAR_FORMAT, encoding, errors) + + def create_gnu_header(self, info, encoding, errors): + """Return the object as a GNU header block sequence. + """ + info["magic"] = GNU_MAGIC + + buf = b"" + if len(info["linkname"]) > LENGTH_LINK: + buf += self._create_gnu_long_header(info["linkname"], GNUTYPE_LONGLINK, encoding, errors) + + if len(info["name"]) > LENGTH_NAME: + buf += self._create_gnu_long_header(info["name"], GNUTYPE_LONGNAME, encoding, errors) + + return buf + self._create_header(info, GNU_FORMAT, encoding, errors) + + def create_pax_header(self, info, encoding): + """Return the object as a ustar header block. If it cannot be + represented this way, prepend a pax extended header sequence + with supplement information. + """ + info["magic"] = POSIX_MAGIC + pax_headers = self.pax_headers.copy() + + # Test string fields for values that exceed the field length or cannot + # be represented in ASCII encoding. + for name, hname, length in ( + ("name", "path", LENGTH_NAME), ("linkname", "linkpath", LENGTH_LINK), + ("uname", "uname", 32), ("gname", "gname", 32)): + + if hname in pax_headers: + # The pax header has priority. + continue + + # Try to encode the string as ASCII. + try: + info[name].encode("ascii", "strict") + except UnicodeEncodeError: + pax_headers[hname] = info[name] + continue + + if len(info[name]) > length: + pax_headers[hname] = info[name] + + # Test number fields for values that exceed the field limit or values + # that like to be stored as float. + for name, digits in (("uid", 8), ("gid", 8), ("size", 12), ("mtime", 12)): + if name in pax_headers: + # The pax header has priority. Avoid overflow. + info[name] = 0 + continue + + val = info[name] + if not 0 <= val < 8 ** (digits - 1) or isinstance(val, float): + pax_headers[name] = str(val) + info[name] = 0 + + # Create a pax extended header if necessary. + if pax_headers: + buf = self._create_pax_generic_header(pax_headers, XHDTYPE, encoding) + else: + buf = b"" + + return buf + self._create_header(info, USTAR_FORMAT, "ascii", "replace") + + @classmethod + def create_pax_global_header(cls, pax_headers): + """Return the object as a pax global header block sequence. + """ + return cls._create_pax_generic_header(pax_headers, XGLTYPE, "utf8") + + def _posix_split_name(self, name): + """Split a name longer than 100 chars into a prefix + and a name part. + """ + prefix = name[:LENGTH_PREFIX + 1] + while prefix and prefix[-1] != "/": + prefix = prefix[:-1] + + name = name[len(prefix):] + prefix = prefix[:-1] + + if not prefix or len(name) > LENGTH_NAME: + raise ValueError("name is too long") + return prefix, name + + @staticmethod + def _create_header(info, format, encoding, errors): + """Return a header block. info is a dictionary with file + information, format must be one of the *_FORMAT constants. + """ + parts = [ + stn(info.get("name", ""), 100, encoding, errors), + itn(info.get("mode", 0) & 0o7777, 8, format), + itn(info.get("uid", 0), 8, format), + itn(info.get("gid", 0), 8, format), + itn(info.get("size", 0), 12, format), + itn(info.get("mtime", 0), 12, format), + b" ", # checksum field + info.get("type", REGTYPE), + stn(info.get("linkname", ""), 100, encoding, errors), + info.get("magic", POSIX_MAGIC), + stn(info.get("uname", ""), 32, encoding, errors), + stn(info.get("gname", ""), 32, encoding, errors), + itn(info.get("devmajor", 0), 8, format), + itn(info.get("devminor", 0), 8, format), + stn(info.get("prefix", ""), 155, encoding, errors) + ] + + buf = struct.pack("%ds" % BLOCKSIZE, b"".join(parts)) + chksum = calc_chksums(buf[-BLOCKSIZE:])[0] + buf = buf[:-364] + ("%06o\0" % chksum).encode("ascii") + buf[-357:] + return buf + + @staticmethod + def _create_payload(payload): + """Return the string payload filled with zero bytes + up to the next 512 byte border. + """ + blocks, remainder = divmod(len(payload), BLOCKSIZE) + if remainder > 0: + payload += (BLOCKSIZE - remainder) * NUL + return payload + + @classmethod + def _create_gnu_long_header(cls, name, type, encoding, errors): + """Return a GNUTYPE_LONGNAME or GNUTYPE_LONGLINK sequence + for name. + """ + name = name.encode(encoding, errors) + NUL + + info = {} + info["name"] = "././@LongLink" + info["type"] = type + info["size"] = len(name) + info["magic"] = GNU_MAGIC + + # create extended header + name blocks. + return cls._create_header(info, USTAR_FORMAT, encoding, errors) + \ + cls._create_payload(name) + + @classmethod + def _create_pax_generic_header(cls, pax_headers, type, encoding): + """Return a POSIX.1-2008 extended or global header sequence + that contains a list of keyword, value pairs. The values + must be strings. + """ + # Check if one of the fields contains surrogate characters and thereby + # forces hdrcharset=BINARY, see _proc_pax() for more information. + binary = False + for keyword, value in pax_headers.items(): + try: + value.encode("utf8", "strict") + except UnicodeEncodeError: + binary = True + break + + records = b"" + if binary: + # Put the hdrcharset field at the beginning of the header. + records += b"21 hdrcharset=BINARY\n" + + for keyword, value in pax_headers.items(): + keyword = keyword.encode("utf8") + if binary: + # Try to restore the original byte representation of `value'. + # Needless to say, that the encoding must match the string. + value = value.encode(encoding, "surrogateescape") + else: + value = value.encode("utf8") + + l = len(keyword) + len(value) + 3 # ' ' + '=' + '\n' + n = p = 0 + while True: + n = l + len(str(p)) + if n == p: + break + p = n + records += bytes(str(p), "ascii") + b" " + keyword + b"=" + value + b"\n" + + # We use a hardcoded "././@PaxHeader" name like star does + # instead of the one that POSIX recommends. + info = {} + info["name"] = "././@PaxHeader" + info["type"] = type + info["size"] = len(records) + info["magic"] = POSIX_MAGIC + + # Create pax header + record blocks. + return cls._create_header(info, USTAR_FORMAT, "ascii", "replace") + \ + cls._create_payload(records) + + @classmethod + def frombuf(cls, buf, encoding, errors): + """Construct a TarInfo object from a 512 byte bytes object. + """ + if len(buf) == 0: + raise EmptyHeaderError("empty header") + if len(buf) != BLOCKSIZE: + raise TruncatedHeaderError("truncated header") + if buf.count(NUL) == BLOCKSIZE: + raise EOFHeaderError("end of file header") + + chksum = nti(buf[148:156]) + if chksum not in calc_chksums(buf): + raise InvalidHeaderError("bad checksum") + + obj = cls() + obj.name = nts(buf[0:100], encoding, errors) + obj.mode = nti(buf[100:108]) + obj.uid = nti(buf[108:116]) + obj.gid = nti(buf[116:124]) + obj.size = nti(buf[124:136]) + obj.mtime = nti(buf[136:148]) + obj.chksum = chksum + obj.type = buf[156:157] + obj.linkname = nts(buf[157:257], encoding, errors) + obj.uname = nts(buf[265:297], encoding, errors) + obj.gname = nts(buf[297:329], encoding, errors) + obj.devmajor = nti(buf[329:337]) + obj.devminor = nti(buf[337:345]) + prefix = nts(buf[345:500], encoding, errors) + + # Old V7 tar format represents a directory as a regular + # file with a trailing slash. + if obj.type == AREGTYPE and obj.name.endswith("/"): + obj.type = DIRTYPE + + # The old GNU sparse format occupies some of the unused + # space in the buffer for up to 4 sparse structures. + # Save the them for later processing in _proc_sparse(). + if obj.type == GNUTYPE_SPARSE: + pos = 386 + structs = [] + for i in range(4): + try: + offset = nti(buf[pos:pos + 12]) + numbytes = nti(buf[pos + 12:pos + 24]) + except ValueError: + break + structs.append((offset, numbytes)) + pos += 24 + isextended = bool(buf[482]) + origsize = nti(buf[483:495]) + obj._sparse_structs = (structs, isextended, origsize) + + # Remove redundant slashes from directories. + if obj.isdir(): + obj.name = obj.name.rstrip("/") + + # Reconstruct a ustar longname. + if prefix and obj.type not in GNU_TYPES: + obj.name = prefix + "/" + obj.name + return obj + + @classmethod + def fromtarfile(cls, tarfile): + """Return the next TarInfo object from TarFile object + tarfile. + """ + buf = tarfile.fileobj.read(BLOCKSIZE) + obj = cls.frombuf(buf, tarfile.encoding, tarfile.errors) + obj.offset = tarfile.fileobj.tell() - BLOCKSIZE + return obj._proc_member(tarfile) + + #-------------------------------------------------------------------------- + # The following are methods that are called depending on the type of a + # member. The entry point is _proc_member() which can be overridden in a + # subclass to add custom _proc_*() methods. A _proc_*() method MUST + # implement the following + # operations: + # 1. Set self.offset_data to the position where the data blocks begin, + # if there is data that follows. + # 2. Set tarfile.offset to the position where the next member's header will + # begin. + # 3. Return self or another valid TarInfo object. + def _proc_member(self, tarfile): + """Choose the right processing method depending on + the type and call it. + """ + if self.type in (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK): + return self._proc_gnulong(tarfile) + elif self.type == GNUTYPE_SPARSE: + return self._proc_sparse(tarfile) + elif self.type in (XHDTYPE, XGLTYPE, SOLARIS_XHDTYPE): + return self._proc_pax(tarfile) + else: + return self._proc_builtin(tarfile) + + def _proc_builtin(self, tarfile): + """Process a builtin type or an unknown type which + will be treated as a regular file. + """ + self.offset_data = tarfile.fileobj.tell() + offset = self.offset_data + if self.isreg() or self.type not in SUPPORTED_TYPES: + # Skip the following data blocks. + offset += self._block(self.size) + tarfile.offset = offset + + # Patch the TarInfo object with saved global + # header information. + self._apply_pax_info(tarfile.pax_headers, tarfile.encoding, tarfile.errors) + + return self + + def _proc_gnulong(self, tarfile): + """Process the blocks that hold a GNU longname + or longlink member. + """ + buf = tarfile.fileobj.read(self._block(self.size)) + + # Fetch the next header and process it. + try: + next = self.fromtarfile(tarfile) + except HeaderError: + raise SubsequentHeaderError("missing or bad subsequent header") + + # Patch the TarInfo object from the next header with + # the longname information. + next.offset = self.offset + if self.type == GNUTYPE_LONGNAME: + next.name = nts(buf, tarfile.encoding, tarfile.errors) + elif self.type == GNUTYPE_LONGLINK: + next.linkname = nts(buf, tarfile.encoding, tarfile.errors) + + return next + + def _proc_sparse(self, tarfile): + """Process a GNU sparse header plus extra headers. + """ + # We already collected some sparse structures in frombuf(). + structs, isextended, origsize = self._sparse_structs + del self._sparse_structs + + # Collect sparse structures from extended header blocks. + while isextended: + buf = tarfile.fileobj.read(BLOCKSIZE) + pos = 0 + for i in range(21): + try: + offset = nti(buf[pos:pos + 12]) + numbytes = nti(buf[pos + 12:pos + 24]) + except ValueError: + break + if offset and numbytes: + structs.append((offset, numbytes)) + pos += 24 + isextended = bool(buf[504]) + self.sparse = structs + + self.offset_data = tarfile.fileobj.tell() + tarfile.offset = self.offset_data + self._block(self.size) + self.size = origsize + return self + + def _proc_pax(self, tarfile): + """Process an extended or global header as described in + POSIX.1-2008. + """ + # Read the header information. + buf = tarfile.fileobj.read(self._block(self.size)) + + # A pax header stores supplemental information for either + # the following file (extended) or all following files + # (global). + if self.type == XGLTYPE: + pax_headers = tarfile.pax_headers + else: + pax_headers = tarfile.pax_headers.copy() + + # Check if the pax header contains a hdrcharset field. This tells us + # the encoding of the path, linkpath, uname and gname fields. Normally, + # these fields are UTF-8 encoded but since POSIX.1-2008 tar + # implementations are allowed to store them as raw binary strings if + # the translation to UTF-8 fails. + match = re.search(br"\d+ hdrcharset=([^\n]+)\n", buf) + if match is not None: + pax_headers["hdrcharset"] = match.group(1).decode("utf8") + + # For the time being, we don't care about anything other than "BINARY". + # The only other value that is currently allowed by the standard is + # "ISO-IR 10646 2000 UTF-8" in other words UTF-8. + hdrcharset = pax_headers.get("hdrcharset") + if hdrcharset == "BINARY": + encoding = tarfile.encoding + else: + encoding = "utf8" + + # Parse pax header information. A record looks like that: + # "%d %s=%s\n" % (length, keyword, value). length is the size + # of the complete record including the length field itself and + # the newline. keyword and value are both UTF-8 encoded strings. + regex = re.compile(br"(\d+) ([^=]+)=") + pos = 0 + while True: + match = regex.match(buf, pos) + if not match: + break + + length, keyword = match.groups() + length = int(length) + value = buf[match.end(2) + 1:match.start(1) + length - 1] + + # Normally, we could just use "utf8" as the encoding and "strict" + # as the error handler, but we better not take the risk. For + # example, GNU tar <= 1.23 is known to store filenames it cannot + # translate to UTF-8 as raw strings (unfortunately without a + # hdrcharset=BINARY header). + # We first try the strict standard encoding, and if that fails we + # fall back on the user's encoding and error handler. + keyword = self._decode_pax_field(keyword, "utf8", "utf8", + tarfile.errors) + if keyword in PAX_NAME_FIELDS: + value = self._decode_pax_field(value, encoding, tarfile.encoding, + tarfile.errors) + else: + value = self._decode_pax_field(value, "utf8", "utf8", + tarfile.errors) + + pax_headers[keyword] = value + pos += length + + # Fetch the next header. + try: + next = self.fromtarfile(tarfile) + except HeaderError: + raise SubsequentHeaderError("missing or bad subsequent header") + + # Process GNU sparse information. + if "GNU.sparse.map" in pax_headers: + # GNU extended sparse format version 0.1. + self._proc_gnusparse_01(next, pax_headers) + + elif "GNU.sparse.size" in pax_headers: + # GNU extended sparse format version 0.0. + self._proc_gnusparse_00(next, pax_headers, buf) + + elif pax_headers.get("GNU.sparse.major") == "1" and pax_headers.get("GNU.sparse.minor") == "0": + # GNU extended sparse format version 1.0. + self._proc_gnusparse_10(next, pax_headers, tarfile) + + if self.type in (XHDTYPE, SOLARIS_XHDTYPE): + # Patch the TarInfo object with the extended header info. + next._apply_pax_info(pax_headers, tarfile.encoding, tarfile.errors) + next.offset = self.offset + + if "size" in pax_headers: + # If the extended header replaces the size field, + # we need to recalculate the offset where the next + # header starts. + offset = next.offset_data + if next.isreg() or next.type not in SUPPORTED_TYPES: + offset += next._block(next.size) + tarfile.offset = offset + + return next + + def _proc_gnusparse_00(self, next, pax_headers, buf): + """Process a GNU tar extended sparse header, version 0.0. + """ + offsets = [] + for match in re.finditer(br"\d+ GNU.sparse.offset=(\d+)\n", buf): + offsets.append(int(match.group(1))) + numbytes = [] + for match in re.finditer(br"\d+ GNU.sparse.numbytes=(\d+)\n", buf): + numbytes.append(int(match.group(1))) + next.sparse = list(zip(offsets, numbytes)) + + def _proc_gnusparse_01(self, next, pax_headers): + """Process a GNU tar extended sparse header, version 0.1. + """ + sparse = [int(x) for x in pax_headers["GNU.sparse.map"].split(",")] + next.sparse = list(zip(sparse[::2], sparse[1::2])) + + def _proc_gnusparse_10(self, next, pax_headers, tarfile): + """Process a GNU tar extended sparse header, version 1.0. + """ + fields = None + sparse = [] + buf = tarfile.fileobj.read(BLOCKSIZE) + fields, buf = buf.split(b"\n", 1) + fields = int(fields) + while len(sparse) < fields * 2: + if b"\n" not in buf: + buf += tarfile.fileobj.read(BLOCKSIZE) + number, buf = buf.split(b"\n", 1) + sparse.append(int(number)) + next.offset_data = tarfile.fileobj.tell() + next.sparse = list(zip(sparse[::2], sparse[1::2])) + + def _apply_pax_info(self, pax_headers, encoding, errors): + """Replace fields with supplemental information from a previous + pax extended or global header. + """ + for keyword, value in pax_headers.items(): + if keyword == "GNU.sparse.name": + setattr(self, "path", value) + elif keyword == "GNU.sparse.size": + setattr(self, "size", int(value)) + elif keyword == "GNU.sparse.realsize": + setattr(self, "size", int(value)) + elif keyword in PAX_FIELDS: + if keyword in PAX_NUMBER_FIELDS: + try: + value = PAX_NUMBER_FIELDS[keyword](value) + except ValueError: + value = 0 + if keyword == "path": + value = value.rstrip("/") + setattr(self, keyword, value) + + self.pax_headers = pax_headers.copy() + + def _decode_pax_field(self, value, encoding, fallback_encoding, fallback_errors): + """Decode a single field from a pax record. + """ + try: + return value.decode(encoding, "strict") + except UnicodeDecodeError: + return value.decode(fallback_encoding, fallback_errors) + + def _block(self, count): + """Round up a byte count by BLOCKSIZE and return it, + e.g. _block(834) => 1024. + """ + blocks, remainder = divmod(count, BLOCKSIZE) + if remainder: + blocks += 1 + return blocks * BLOCKSIZE + + def isreg(self): + return self.type in REGULAR_TYPES + def isfile(self): + return self.isreg() + def isdir(self): + return self.type == DIRTYPE + def issym(self): + return self.type == SYMTYPE + def islnk(self): + return self.type == LNKTYPE + def ischr(self): + return self.type == CHRTYPE + def isblk(self): + return self.type == BLKTYPE + def isfifo(self): + return self.type == FIFOTYPE + def issparse(self): + return self.sparse is not None + def isdev(self): + return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE) +# class TarInfo + +class TarFile(object): + """The TarFile Class provides an interface to tar archives. + """ + + debug = 0 # May be set from 0 (no msgs) to 3 (all msgs) + + dereference = False # If true, add content of linked file to the + # tar file, else the link. + + ignore_zeros = False # If true, skips empty or invalid blocks and + # continues processing. + + errorlevel = 1 # If 0, fatal errors only appear in debug + # messages (if debug >= 0). If > 0, errors + # are passed to the caller as exceptions. + + format = DEFAULT_FORMAT # The format to use when creating an archive. + + encoding = ENCODING # Encoding for 8-bit character strings. + + errors = None # Error handler for unicode conversion. + + tarinfo = TarInfo # The default TarInfo class to use. + + fileobject = ExFileObject # The default ExFileObject class to use. + + def __init__(self, name=None, mode="r", fileobj=None, format=None, + tarinfo=None, dereference=None, ignore_zeros=None, encoding=None, + errors="surrogateescape", pax_headers=None, debug=None, errorlevel=None): + """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to + read from an existing archive, 'a' to append data to an existing + file or 'w' to create a new file overwriting an existing one. `mode' + defaults to 'r'. + If `fileobj' is given, it is used for reading or writing data. If it + can be determined, `mode' is overridden by `fileobj's mode. + `fileobj' is not closed, when TarFile is closed. + """ + if len(mode) > 1 or mode not in "raw": + raise ValueError("mode must be 'r', 'a' or 'w'") + self.mode = mode + self._mode = {"r": "rb", "a": "r+b", "w": "wb"}[mode] + + if not fileobj: + if self.mode == "a" and not os.path.exists(name): + # Create nonexistent files in append mode. + self.mode = "w" + self._mode = "wb" + fileobj = bltn_open(name, self._mode) + self._extfileobj = False + else: + if name is None and hasattr(fileobj, "name"): + name = fileobj.name + if hasattr(fileobj, "mode"): + self._mode = fileobj.mode + self._extfileobj = True + self.name = os.path.abspath(name) if name else None + self.fileobj = fileobj + + # Init attributes. + if format is not None: + self.format = format + if tarinfo is not None: + self.tarinfo = tarinfo + if dereference is not None: + self.dereference = dereference + if ignore_zeros is not None: + self.ignore_zeros = ignore_zeros + if encoding is not None: + self.encoding = encoding + self.errors = errors + + if pax_headers is not None and self.format == PAX_FORMAT: + self.pax_headers = pax_headers + else: + self.pax_headers = {} + + if debug is not None: + self.debug = debug + if errorlevel is not None: + self.errorlevel = errorlevel + + # Init datastructures. + self.closed = False + self.members = [] # list of members as TarInfo objects + self._loaded = False # flag if all members have been read + self.offset = self.fileobj.tell() + # current position in the archive file + self.inodes = {} # dictionary caching the inodes of + # archive members already added + + try: + if self.mode == "r": + self.firstmember = None + self.firstmember = self.next() + + if self.mode == "a": + # Move to the end of the archive, + # before the first empty block. + while True: + self.fileobj.seek(self.offset) + try: + tarinfo = self.tarinfo.fromtarfile(self) + self.members.append(tarinfo) + except EOFHeaderError: + self.fileobj.seek(self.offset) + break + except HeaderError as e: + raise ReadError(str(e)) + + if self.mode in "aw": + self._loaded = True + + if self.pax_headers: + buf = self.tarinfo.create_pax_global_header(self.pax_headers.copy()) + self.fileobj.write(buf) + self.offset += len(buf) + except: + if not self._extfileobj: + self.fileobj.close() + self.closed = True + raise + + #-------------------------------------------------------------------------- + # Below are the classmethods which act as alternate constructors to the + # TarFile class. The open() method is the only one that is needed for + # public use; it is the "super"-constructor and is able to select an + # adequate "sub"-constructor for a particular compression using the mapping + # from OPEN_METH. + # + # This concept allows one to subclass TarFile without losing the comfort of + # the super-constructor. A sub-constructor is registered and made available + # by adding it to the mapping in OPEN_METH. + + @classmethod + def open(cls, name=None, mode="r", fileobj=None, bufsize=RECORDSIZE, **kwargs): + """Open a tar archive for reading, writing or appending. Return + an appropriate TarFile class. + + mode: + 'r' or 'r:*' open for reading with transparent compression + 'r:' open for reading exclusively uncompressed + 'r:gz' open for reading with gzip compression + 'r:bz2' open for reading with bzip2 compression + 'a' or 'a:' open for appending, creating the file if necessary + 'w' or 'w:' open for writing without compression + 'w:gz' open for writing with gzip compression + 'w:bz2' open for writing with bzip2 compression + + 'r|*' open a stream of tar blocks with transparent compression + 'r|' open an uncompressed stream of tar blocks for reading + 'r|gz' open a gzip compressed stream of tar blocks + 'r|bz2' open a bzip2 compressed stream of tar blocks + 'w|' open an uncompressed stream for writing + 'w|gz' open a gzip compressed stream for writing + 'w|bz2' open a bzip2 compressed stream for writing + """ + + if not name and not fileobj: + raise ValueError("nothing to open") + + if mode in ("r", "r:*"): + # Find out which *open() is appropriate for opening the file. + for comptype in cls.OPEN_METH: + func = getattr(cls, cls.OPEN_METH[comptype]) + if fileobj is not None: + saved_pos = fileobj.tell() + try: + return func(name, "r", fileobj, **kwargs) + except (ReadError, CompressionError) as e: + if fileobj is not None: + fileobj.seek(saved_pos) + continue + raise ReadError("file could not be opened successfully") + + elif ":" in mode: + filemode, comptype = mode.split(":", 1) + filemode = filemode or "r" + comptype = comptype or "tar" + + # Select the *open() function according to + # given compression. + if comptype in cls.OPEN_METH: + func = getattr(cls, cls.OPEN_METH[comptype]) + else: + raise CompressionError("unknown compression type %r" % comptype) + return func(name, filemode, fileobj, **kwargs) + + elif "|" in mode: + filemode, comptype = mode.split("|", 1) + filemode = filemode or "r" + comptype = comptype or "tar" + + if filemode not in "rw": + raise ValueError("mode must be 'r' or 'w'") + + stream = _Stream(name, filemode, comptype, fileobj, bufsize) + try: + t = cls(name, filemode, stream, **kwargs) + except: + stream.close() + raise + t._extfileobj = False + return t + + elif mode in "aw": + return cls.taropen(name, mode, fileobj, **kwargs) + + raise ValueError("undiscernible mode") + + @classmethod + def taropen(cls, name, mode="r", fileobj=None, **kwargs): + """Open uncompressed tar archive name for reading or writing. + """ + if len(mode) > 1 or mode not in "raw": + raise ValueError("mode must be 'r', 'a' or 'w'") + return cls(name, mode, fileobj, **kwargs) + + @classmethod + def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs): + """Open gzip compressed tar archive name for reading or writing. + Appending is not allowed. + """ + if len(mode) > 1 or mode not in "rw": + raise ValueError("mode must be 'r' or 'w'") + + try: + import gzip + gzip.GzipFile + except (ImportError, AttributeError): + raise CompressionError("gzip module is not available") + + extfileobj = fileobj is not None + try: + fileobj = gzip.GzipFile(name, mode + "b", compresslevel, fileobj) + t = cls.taropen(name, mode, fileobj, **kwargs) + except IOError: + if not extfileobj and fileobj is not None: + fileobj.close() + if fileobj is None: + raise + raise ReadError("not a gzip file") + except: + if not extfileobj and fileobj is not None: + fileobj.close() + raise + t._extfileobj = extfileobj + return t + + @classmethod + def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs): + """Open bzip2 compressed tar archive name for reading or writing. + Appending is not allowed. + """ + if len(mode) > 1 or mode not in "rw": + raise ValueError("mode must be 'r' or 'w'.") + + try: + import bz2 + except ImportError: + raise CompressionError("bz2 module is not available") + + if fileobj is not None: + fileobj = _BZ2Proxy(fileobj, mode) + else: + fileobj = bz2.BZ2File(name, mode, compresslevel=compresslevel) + + try: + t = cls.taropen(name, mode, fileobj, **kwargs) + except (IOError, EOFError): + fileobj.close() + raise ReadError("not a bzip2 file") + t._extfileobj = False + return t + + # All *open() methods are registered here. + OPEN_METH = { + "tar": "taropen", # uncompressed tar + "gz": "gzopen", # gzip compressed tar + "bz2": "bz2open" # bzip2 compressed tar + } + + #-------------------------------------------------------------------------- + # The public methods which TarFile provides: + + def close(self): + """Close the TarFile. In write-mode, two finishing zero blocks are + appended to the archive. + """ + if self.closed: + return + + if self.mode in "aw": + self.fileobj.write(NUL * (BLOCKSIZE * 2)) + self.offset += (BLOCKSIZE * 2) + # fill up the end with zero-blocks + # (like option -b20 for tar does) + blocks, remainder = divmod(self.offset, RECORDSIZE) + if remainder > 0: + self.fileobj.write(NUL * (RECORDSIZE - remainder)) + + if not self._extfileobj: + self.fileobj.close() + self.closed = True + + def getmember(self, name): + """Return a TarInfo object for member `name'. If `name' can not be + found in the archive, KeyError is raised. If a member occurs more + than once in the archive, its last occurrence is assumed to be the + most up-to-date version. + """ + tarinfo = self._getmember(name) + if tarinfo is None: + raise KeyError("filename %r not found" % name) + return tarinfo + + def getmembers(self): + """Return the members of the archive as a list of TarInfo objects. The + list has the same order as the members in the archive. + """ + self._check() + if not self._loaded: # if we want to obtain a list of + self._load() # all members, we first have to + # scan the whole archive. + return self.members + + def getnames(self): + """Return the members of the archive as a list of their names. It has + the same order as the list returned by getmembers(). + """ + return [tarinfo.name for tarinfo in self.getmembers()] + + def gettarinfo(self, name=None, arcname=None, fileobj=None): + """Create a TarInfo object for either the file `name' or the file + object `fileobj' (using os.fstat on its file descriptor). You can + modify some of the TarInfo's attributes before you add it using + addfile(). If given, `arcname' specifies an alternative name for the + file in the archive. + """ + self._check("aw") + + # When fileobj is given, replace name by + # fileobj's real name. + if fileobj is not None: + name = fileobj.name + + # Building the name of the member in the archive. + # Backward slashes are converted to forward slashes, + # Absolute paths are turned to relative paths. + if arcname is None: + arcname = name + drv, arcname = os.path.splitdrive(arcname) + arcname = arcname.replace(os.sep, "/") + arcname = arcname.lstrip("/") + + # Now, fill the TarInfo object with + # information specific for the file. + tarinfo = self.tarinfo() + tarinfo.tarfile = self + + # Use os.stat or os.lstat, depending on platform + # and if symlinks shall be resolved. + if fileobj is None: + if hasattr(os, "lstat") and not self.dereference: + statres = os.lstat(name) + else: + statres = os.stat(name) + else: + statres = os.fstat(fileobj.fileno()) + linkname = "" + + stmd = statres.st_mode + if stat.S_ISREG(stmd): + inode = (statres.st_ino, statres.st_dev) + if not self.dereference and statres.st_nlink > 1 and \ + inode in self.inodes and arcname != self.inodes[inode]: + # Is it a hardlink to an already + # archived file? + type = LNKTYPE + linkname = self.inodes[inode] + else: + # The inode is added only if its valid. + # For win32 it is always 0. + type = REGTYPE + if inode[0]: + self.inodes[inode] = arcname + elif stat.S_ISDIR(stmd): + type = DIRTYPE + elif stat.S_ISFIFO(stmd): + type = FIFOTYPE + elif stat.S_ISLNK(stmd): + type = SYMTYPE + linkname = os.readlink(name) + elif stat.S_ISCHR(stmd): + type = CHRTYPE + elif stat.S_ISBLK(stmd): + type = BLKTYPE + else: + return None + + # Fill the TarInfo object with all + # information we can get. + tarinfo.name = arcname + tarinfo.mode = stmd + tarinfo.uid = statres.st_uid + tarinfo.gid = statres.st_gid + if type == REGTYPE: + tarinfo.size = statres.st_size + else: + tarinfo.size = 0 + tarinfo.mtime = statres.st_mtime + tarinfo.type = type + tarinfo.linkname = linkname + if pwd: + try: + tarinfo.uname = pwd.getpwuid(tarinfo.uid)[0] + except KeyError: + pass + if grp: + try: + tarinfo.gname = grp.getgrgid(tarinfo.gid)[0] + except KeyError: + pass + + if type in (CHRTYPE, BLKTYPE): + if hasattr(os, "major") and hasattr(os, "minor"): + tarinfo.devmajor = os.major(statres.st_rdev) + tarinfo.devminor = os.minor(statres.st_rdev) + return tarinfo + + def list(self, verbose=True): + """Print a table of contents to sys.stdout. If `verbose' is False, only + the names of the members are printed. If it is True, an `ls -l'-like + output is produced. + """ + self._check() + + for tarinfo in self: + if verbose: + print(filemode(tarinfo.mode), end=' ') + print("%s/%s" % (tarinfo.uname or tarinfo.uid, + tarinfo.gname or tarinfo.gid), end=' ') + if tarinfo.ischr() or tarinfo.isblk(): + print("%10s" % ("%d,%d" \ + % (tarinfo.devmajor, tarinfo.devminor)), end=' ') + else: + print("%10d" % tarinfo.size, end=' ') + print("%d-%02d-%02d %02d:%02d:%02d" \ + % time.localtime(tarinfo.mtime)[:6], end=' ') + + print(tarinfo.name + ("/" if tarinfo.isdir() else ""), end=' ') + + if verbose: + if tarinfo.issym(): + print("->", tarinfo.linkname, end=' ') + if tarinfo.islnk(): + print("link to", tarinfo.linkname, end=' ') + print() + + def add(self, name, arcname=None, recursive=True, exclude=None, filter=None): + """Add the file `name' to the archive. `name' may be any type of file + (directory, fifo, symbolic link, etc.). If given, `arcname' + specifies an alternative name for the file in the archive. + Directories are added recursively by default. This can be avoided by + setting `recursive' to False. `exclude' is a function that should + return True for each filename to be excluded. `filter' is a function + that expects a TarInfo object argument and returns the changed + TarInfo object, if it returns None the TarInfo object will be + excluded from the archive. + """ + self._check("aw") + + if arcname is None: + arcname = name + + # Exclude pathnames. + if exclude is not None: + import warnings + warnings.warn("use the filter argument instead", + DeprecationWarning, 2) + if exclude(name): + self._dbg(2, "tarfile: Excluded %r" % name) + return + + # Skip if somebody tries to archive the archive... + if self.name is not None and os.path.abspath(name) == self.name: + self._dbg(2, "tarfile: Skipped %r" % name) + return + + self._dbg(1, name) + + # Create a TarInfo object from the file. + tarinfo = self.gettarinfo(name, arcname) + + if tarinfo is None: + self._dbg(1, "tarfile: Unsupported type %r" % name) + return + + # Change or exclude the TarInfo object. + if filter is not None: + tarinfo = filter(tarinfo) + if tarinfo is None: + self._dbg(2, "tarfile: Excluded %r" % name) + return + + # Append the tar header and data to the archive. + if tarinfo.isreg(): + f = bltn_open(name, "rb") + self.addfile(tarinfo, f) + f.close() + + elif tarinfo.isdir(): + self.addfile(tarinfo) + if recursive: + for f in os.listdir(name): + self.add(os.path.join(name, f), os.path.join(arcname, f), + recursive, exclude, filter=filter) + + else: + self.addfile(tarinfo) + + def addfile(self, tarinfo, fileobj=None): + """Add the TarInfo object `tarinfo' to the archive. If `fileobj' is + given, tarinfo.size bytes are read from it and added to the archive. + You can create TarInfo objects using gettarinfo(). + On Windows platforms, `fileobj' should always be opened with mode + 'rb' to avoid irritation about the file size. + """ + self._check("aw") + + tarinfo = copy.copy(tarinfo) + + buf = tarinfo.tobuf(self.format, self.encoding, self.errors) + self.fileobj.write(buf) + self.offset += len(buf) + + # If there's data to follow, append it. + if fileobj is not None: + copyfileobj(fileobj, self.fileobj, tarinfo.size) + blocks, remainder = divmod(tarinfo.size, BLOCKSIZE) + if remainder > 0: + self.fileobj.write(NUL * (BLOCKSIZE - remainder)) + blocks += 1 + self.offset += blocks * BLOCKSIZE + + self.members.append(tarinfo) + + def extractall(self, path=".", members=None): + """Extract all members from the archive to the current working + directory and set owner, modification time and permissions on + directories afterwards. `path' specifies a different directory + to extract to. `members' is optional and must be a subset of the + list returned by getmembers(). + """ + directories = [] + + if members is None: + members = self + + for tarinfo in members: + if tarinfo.isdir(): + # Extract directories with a safe mode. + directories.append(tarinfo) + tarinfo = copy.copy(tarinfo) + tarinfo.mode = 0o700 + # Do not set_attrs directories, as we will do that further down + self.extract(tarinfo, path, set_attrs=not tarinfo.isdir()) + + # Reverse sort directories. + directories.sort(key=lambda a: a.name) + directories.reverse() + + # Set correct owner, mtime and filemode on directories. + for tarinfo in directories: + dirpath = os.path.join(path, tarinfo.name) + try: + self.chown(tarinfo, dirpath) + self.utime(tarinfo, dirpath) + self.chmod(tarinfo, dirpath) + except ExtractError as e: + if self.errorlevel > 1: + raise + else: + self._dbg(1, "tarfile: %s" % e) + + def extract(self, member, path="", set_attrs=True): + """Extract a member from the archive to the current working directory, + using its full name. Its file information is extracted as accurately + as possible. `member' may be a filename or a TarInfo object. You can + specify a different directory using `path'. File attributes (owner, + mtime, mode) are set unless `set_attrs' is False. + """ + self._check("r") + + if isinstance(member, str): + tarinfo = self.getmember(member) + else: + tarinfo = member + + # Prepare the link target for makelink(). + if tarinfo.islnk(): + tarinfo._link_target = os.path.join(path, tarinfo.linkname) + + try: + self._extract_member(tarinfo, os.path.join(path, tarinfo.name), + set_attrs=set_attrs) + except EnvironmentError as e: + if self.errorlevel > 0: + raise + else: + if e.filename is None: + self._dbg(1, "tarfile: %s" % e.strerror) + else: + self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename)) + except ExtractError as e: + if self.errorlevel > 1: + raise + else: + self._dbg(1, "tarfile: %s" % e) + + def extractfile(self, member): + """Extract a member from the archive as a file object. `member' may be + a filename or a TarInfo object. If `member' is a regular file, a + file-like object is returned. If `member' is a link, a file-like + object is constructed from the link's target. If `member' is none of + the above, None is returned. + The file-like object is read-only and provides the following + methods: read(), readline(), readlines(), seek() and tell() + """ + self._check("r") + + if isinstance(member, str): + tarinfo = self.getmember(member) + else: + tarinfo = member + + if tarinfo.isreg(): + return self.fileobject(self, tarinfo) + + elif tarinfo.type not in SUPPORTED_TYPES: + # If a member's type is unknown, it is treated as a + # regular file. + return self.fileobject(self, tarinfo) + + elif tarinfo.islnk() or tarinfo.issym(): + if isinstance(self.fileobj, _Stream): + # A small but ugly workaround for the case that someone tries + # to extract a (sym)link as a file-object from a non-seekable + # stream of tar blocks. + raise StreamError("cannot extract (sym)link as file object") + else: + # A (sym)link's file object is its target's file object. + return self.extractfile(self._find_link_target(tarinfo)) + else: + # If there's no data associated with the member (directory, chrdev, + # blkdev, etc.), return None instead of a file object. + return None + + def _extract_member(self, tarinfo, targetpath, set_attrs=True): + """Extract the TarInfo object tarinfo to a physical + file called targetpath. + """ + # Fetch the TarInfo object for the given name + # and build the destination pathname, replacing + # forward slashes to platform specific separators. + targetpath = targetpath.rstrip("/") + targetpath = targetpath.replace("/", os.sep) + + # Create all upper directories. + upperdirs = os.path.dirname(targetpath) + if upperdirs and not os.path.exists(upperdirs): + # Create directories that are not part of the archive with + # default permissions. + os.makedirs(upperdirs) + + if tarinfo.islnk() or tarinfo.issym(): + self._dbg(1, "%s -> %s" % (tarinfo.name, tarinfo.linkname)) + else: + self._dbg(1, tarinfo.name) + + if tarinfo.isreg(): + self.makefile(tarinfo, targetpath) + elif tarinfo.isdir(): + self.makedir(tarinfo, targetpath) + elif tarinfo.isfifo(): + self.makefifo(tarinfo, targetpath) + elif tarinfo.ischr() or tarinfo.isblk(): + self.makedev(tarinfo, targetpath) + elif tarinfo.islnk() or tarinfo.issym(): + self.makelink(tarinfo, targetpath) + elif tarinfo.type not in SUPPORTED_TYPES: + self.makeunknown(tarinfo, targetpath) + else: + self.makefile(tarinfo, targetpath) + + if set_attrs: + self.chown(tarinfo, targetpath) + if not tarinfo.issym(): + self.chmod(tarinfo, targetpath) + self.utime(tarinfo, targetpath) + + #-------------------------------------------------------------------------- + # Below are the different file methods. They are called via + # _extract_member() when extract() is called. They can be replaced in a + # subclass to implement other functionality. + + def makedir(self, tarinfo, targetpath): + """Make a directory called targetpath. + """ + try: + # Use a safe mode for the directory, the real mode is set + # later in _extract_member(). + os.mkdir(targetpath, 0o700) + except EnvironmentError as e: + if e.errno != errno.EEXIST: + raise + + def makefile(self, tarinfo, targetpath): + """Make a file called targetpath. + """ + source = self.fileobj + source.seek(tarinfo.offset_data) + target = bltn_open(targetpath, "wb") + if tarinfo.sparse is not None: + for offset, size in tarinfo.sparse: + target.seek(offset) + copyfileobj(source, target, size) + else: + copyfileobj(source, target, tarinfo.size) + target.seek(tarinfo.size) + target.truncate() + target.close() + + def makeunknown(self, tarinfo, targetpath): + """Make a file from a TarInfo object with an unknown type + at targetpath. + """ + self.makefile(tarinfo, targetpath) + self._dbg(1, "tarfile: Unknown file type %r, " \ + "extracted as regular file." % tarinfo.type) + + def makefifo(self, tarinfo, targetpath): + """Make a fifo called targetpath. + """ + if hasattr(os, "mkfifo"): + os.mkfifo(targetpath) + else: + raise ExtractError("fifo not supported by system") + + def makedev(self, tarinfo, targetpath): + """Make a character or block device called targetpath. + """ + if not hasattr(os, "mknod") or not hasattr(os, "makedev"): + raise ExtractError("special devices not supported by system") + + mode = tarinfo.mode + if tarinfo.isblk(): + mode |= stat.S_IFBLK + else: + mode |= stat.S_IFCHR + + os.mknod(targetpath, mode, + os.makedev(tarinfo.devmajor, tarinfo.devminor)) + + def makelink(self, tarinfo, targetpath): + """Make a (symbolic) link called targetpath. If it cannot be created + (platform limitation), we try to make a copy of the referenced file + instead of a link. + """ + try: + # For systems that support symbolic and hard links. + if tarinfo.issym(): + os.symlink(tarinfo.linkname, targetpath) + else: + # See extract(). + if os.path.exists(tarinfo._link_target): + os.link(tarinfo._link_target, targetpath) + else: + self._extract_member(self._find_link_target(tarinfo), + targetpath) + except symlink_exception: + if tarinfo.issym(): + linkpath = os.path.join(os.path.dirname(tarinfo.name), + tarinfo.linkname) + else: + linkpath = tarinfo.linkname + else: + try: + self._extract_member(self._find_link_target(tarinfo), + targetpath) + except KeyError: + raise ExtractError("unable to resolve link inside archive") + + def chown(self, tarinfo, targetpath): + """Set owner of targetpath according to tarinfo. + """ + if pwd and hasattr(os, "geteuid") and os.geteuid() == 0: + # We have to be root to do so. + try: + g = grp.getgrnam(tarinfo.gname)[2] + except KeyError: + g = tarinfo.gid + try: + u = pwd.getpwnam(tarinfo.uname)[2] + except KeyError: + u = tarinfo.uid + try: + if tarinfo.issym() and hasattr(os, "lchown"): + os.lchown(targetpath, u, g) + else: + if sys.platform != "os2emx": + os.chown(targetpath, u, g) + except EnvironmentError as e: + raise ExtractError("could not change owner") + + def chmod(self, tarinfo, targetpath): + """Set file permissions of targetpath according to tarinfo. + """ + if hasattr(os, 'chmod'): + try: + os.chmod(targetpath, tarinfo.mode) + except EnvironmentError as e: + raise ExtractError("could not change mode") + + def utime(self, tarinfo, targetpath): + """Set modification time of targetpath according to tarinfo. + """ + if not hasattr(os, 'utime'): + return + try: + os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime)) + except EnvironmentError as e: + raise ExtractError("could not change modification time") + + #-------------------------------------------------------------------------- + def next(self): + """Return the next member of the archive as a TarInfo object, when + TarFile is opened for reading. Return None if there is no more + available. + """ + self._check("ra") + if self.firstmember is not None: + m = self.firstmember + self.firstmember = None + return m + + # Read the next block. + self.fileobj.seek(self.offset) + tarinfo = None + while True: + try: + tarinfo = self.tarinfo.fromtarfile(self) + except EOFHeaderError as e: + if self.ignore_zeros: + self._dbg(2, "0x%X: %s" % (self.offset, e)) + self.offset += BLOCKSIZE + continue + except InvalidHeaderError as e: + if self.ignore_zeros: + self._dbg(2, "0x%X: %s" % (self.offset, e)) + self.offset += BLOCKSIZE + continue + elif self.offset == 0: + raise ReadError(str(e)) + except EmptyHeaderError: + if self.offset == 0: + raise ReadError("empty file") + except TruncatedHeaderError as e: + if self.offset == 0: + raise ReadError(str(e)) + except SubsequentHeaderError as e: + raise ReadError(str(e)) + break + + if tarinfo is not None: + self.members.append(tarinfo) + else: + self._loaded = True + + return tarinfo + + #-------------------------------------------------------------------------- + # Little helper methods: + + def _getmember(self, name, tarinfo=None, normalize=False): + """Find an archive member by name from bottom to top. + If tarinfo is given, it is used as the starting point. + """ + # Ensure that all members have been loaded. + members = self.getmembers() + + # Limit the member search list up to tarinfo. + if tarinfo is not None: + members = members[:members.index(tarinfo)] + + if normalize: + name = os.path.normpath(name) + + for member in reversed(members): + if normalize: + member_name = os.path.normpath(member.name) + else: + member_name = member.name + + if name == member_name: + return member + + def _load(self): + """Read through the entire archive file and look for readable + members. + """ + while True: + tarinfo = self.next() + if tarinfo is None: + break + self._loaded = True + + def _check(self, mode=None): + """Check if TarFile is still open, and if the operation's mode + corresponds to TarFile's mode. + """ + if self.closed: + raise IOError("%s is closed" % self.__class__.__name__) + if mode is not None and self.mode not in mode: + raise IOError("bad operation for mode %r" % self.mode) + + def _find_link_target(self, tarinfo): + """Find the target member of a symlink or hardlink member in the + archive. + """ + if tarinfo.issym(): + # Always search the entire archive. + linkname = os.path.dirname(tarinfo.name) + "/" + tarinfo.linkname + limit = None + else: + # Search the archive before the link, because a hard link is + # just a reference to an already archived file. + linkname = tarinfo.linkname + limit = tarinfo + + member = self._getmember(linkname, tarinfo=limit, normalize=True) + if member is None: + raise KeyError("linkname %r not found" % linkname) + return member + + def __iter__(self): + """Provide an iterator object. + """ + if self._loaded: + return iter(self.members) + else: + return TarIter(self) + + def _dbg(self, level, msg): + """Write debugging output to sys.stderr. + """ + if level <= self.debug: + print(msg, file=sys.stderr) + + def __enter__(self): + self._check() + return self + + def __exit__(self, type, value, traceback): + if type is None: + self.close() + else: + # An exception occurred. We must not call close() because + # it would try to write end-of-archive blocks and padding. + if not self._extfileobj: + self.fileobj.close() + self.closed = True +# class TarFile + +class TarIter(object): + """Iterator Class. + + for tarinfo in TarFile(...): + suite... + """ + + def __init__(self, tarfile): + """Construct a TarIter object. + """ + self.tarfile = tarfile + self.index = 0 + def __iter__(self): + """Return iterator object. + """ + return self + + def __next__(self): + """Return the next item using TarFile's next() method. + When all members have been read, set TarFile as _loaded. + """ + # Fix for SF #1100429: Under rare circumstances it can + # happen that getmembers() is called during iteration, + # which will cause TarIter to stop prematurely. + if not self.tarfile._loaded: + tarinfo = self.tarfile.next() + if not tarinfo: + self.tarfile._loaded = True + raise StopIteration + else: + try: + tarinfo = self.tarfile.members[self.index] + except IndexError: + raise StopIteration + self.index += 1 + return tarinfo + + next = __next__ # for Python 2.x + +#-------------------- +# exported functions +#-------------------- +def is_tarfile(name): + """Return True if name points to a tar archive that we + are able to handle, else return False. + """ + try: + t = open(name) + t.close() + return True + except TarError: + return False + +bltn_open = open +open = TarFile.open diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/distlib/compat.py b/venv.bak/lib/python3.7/site-packages/pip/_vendor/distlib/compat.py new file mode 100644 index 0000000..ff328c8 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_vendor/distlib/compat.py @@ -0,0 +1,1120 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2013-2017 Vinay Sajip. +# Licensed to the Python Software Foundation under a contributor agreement. +# See LICENSE.txt and CONTRIBUTORS.txt. +# +from __future__ import absolute_import + +import os +import re +import sys + +try: + import ssl +except ImportError: # pragma: no cover + ssl = None + +if sys.version_info[0] < 3: # pragma: no cover + from StringIO import StringIO + string_types = basestring, + text_type = unicode + from types import FileType as file_type + import __builtin__ as builtins + import ConfigParser as configparser + from ._backport import shutil + from urlparse import urlparse, urlunparse, urljoin, urlsplit, urlunsplit + from urllib import (urlretrieve, quote as _quote, unquote, url2pathname, + pathname2url, ContentTooShortError, splittype) + + def quote(s): + if isinstance(s, unicode): + s = s.encode('utf-8') + return _quote(s) + + import urllib2 + from urllib2 import (Request, urlopen, URLError, HTTPError, + HTTPBasicAuthHandler, HTTPPasswordMgr, + HTTPHandler, HTTPRedirectHandler, + build_opener) + if ssl: + from urllib2 import HTTPSHandler + import httplib + import xmlrpclib + import Queue as queue + from HTMLParser import HTMLParser + import htmlentitydefs + raw_input = raw_input + from itertools import ifilter as filter + from itertools import ifilterfalse as filterfalse + + _userprog = None + def splituser(host): + """splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'.""" + global _userprog + if _userprog is None: + import re + _userprog = re.compile('^(.*)@(.*)$') + + match = _userprog.match(host) + if match: return match.group(1, 2) + return None, host + +else: # pragma: no cover + from io import StringIO + string_types = str, + text_type = str + from io import TextIOWrapper as file_type + import builtins + import configparser + import shutil + from urllib.parse import (urlparse, urlunparse, urljoin, splituser, quote, + unquote, urlsplit, urlunsplit, splittype) + from urllib.request import (urlopen, urlretrieve, Request, url2pathname, + pathname2url, + HTTPBasicAuthHandler, HTTPPasswordMgr, + HTTPHandler, HTTPRedirectHandler, + build_opener) + if ssl: + from urllib.request import HTTPSHandler + from urllib.error import HTTPError, URLError, ContentTooShortError + import http.client as httplib + import urllib.request as urllib2 + import xmlrpc.client as xmlrpclib + import queue + from html.parser import HTMLParser + import html.entities as htmlentitydefs + raw_input = input + from itertools import filterfalse + filter = filter + +try: + from ssl import match_hostname, CertificateError +except ImportError: # pragma: no cover + class CertificateError(ValueError): + pass + + + def _dnsname_match(dn, hostname, max_wildcards=1): + """Matching according to RFC 6125, section 6.4.3 + + http://tools.ietf.org/html/rfc6125#section-6.4.3 + """ + pats = [] + if not dn: + return False + + parts = dn.split('.') + leftmost, remainder = parts[0], parts[1:] + + wildcards = leftmost.count('*') + if wildcards > max_wildcards: + # Issue #17980: avoid denials of service by refusing more + # than one wildcard per fragment. A survey of established + # policy among SSL implementations showed it to be a + # reasonable choice. + raise CertificateError( + "too many wildcards in certificate DNS name: " + repr(dn)) + + # speed up common case w/o wildcards + if not wildcards: + return dn.lower() == hostname.lower() + + # RFC 6125, section 6.4.3, subitem 1. + # The client SHOULD NOT attempt to match a presented identifier in which + # the wildcard character comprises a label other than the left-most label. + if leftmost == '*': + # When '*' is a fragment by itself, it matches a non-empty dotless + # fragment. + pats.append('[^.]+') + elif leftmost.startswith('xn--') or hostname.startswith('xn--'): + # RFC 6125, section 6.4.3, subitem 3. + # The client SHOULD NOT attempt to match a presented identifier + # where the wildcard character is embedded within an A-label or + # U-label of an internationalized domain name. + pats.append(re.escape(leftmost)) + else: + # Otherwise, '*' matches any dotless string, e.g. www* + pats.append(re.escape(leftmost).replace(r'\*', '[^.]*')) + + # add the remaining fragments, ignore any wildcards + for frag in remainder: + pats.append(re.escape(frag)) + + pat = re.compile(r'\A' + r'\.'.join(pats) + r'\Z', re.IGNORECASE) + return pat.match(hostname) + + + def match_hostname(cert, hostname): + """Verify that *cert* (in decoded format as returned by + SSLSocket.getpeercert()) matches the *hostname*. RFC 2818 and RFC 6125 + rules are followed, but IP addresses are not accepted for *hostname*. + + CertificateError is raised on failure. On success, the function + returns nothing. + """ + if not cert: + raise ValueError("empty or no certificate, match_hostname needs a " + "SSL socket or SSL context with either " + "CERT_OPTIONAL or CERT_REQUIRED") + dnsnames = [] + san = cert.get('subjectAltName', ()) + for key, value in san: + if key == 'DNS': + if _dnsname_match(value, hostname): + return + dnsnames.append(value) + if not dnsnames: + # The subject is only checked when there is no dNSName entry + # in subjectAltName + for sub in cert.get('subject', ()): + for key, value in sub: + # XXX according to RFC 2818, the most specific Common Name + # must be used. + if key == 'commonName': + if _dnsname_match(value, hostname): + return + dnsnames.append(value) + if len(dnsnames) > 1: + raise CertificateError("hostname %r " + "doesn't match either of %s" + % (hostname, ', '.join(map(repr, dnsnames)))) + elif len(dnsnames) == 1: + raise CertificateError("hostname %r " + "doesn't match %r" + % (hostname, dnsnames[0])) + else: + raise CertificateError("no appropriate commonName or " + "subjectAltName fields were found") + + +try: + from types import SimpleNamespace as Container +except ImportError: # pragma: no cover + class Container(object): + """ + A generic container for when multiple values need to be returned + """ + def __init__(self, **kwargs): + self.__dict__.update(kwargs) + + +try: + from shutil import which +except ImportError: # pragma: no cover + # Implementation from Python 3.3 + def which(cmd, mode=os.F_OK | os.X_OK, path=None): + """Given a command, mode, and a PATH string, return the path which + conforms to the given mode on the PATH, or None if there is no such + file. + + `mode` defaults to os.F_OK | os.X_OK. `path` defaults to the result + of os.environ.get("PATH"), or can be overridden with a custom search + path. + + """ + # Check that a given file can be accessed with the correct mode. + # Additionally check that `file` is not a directory, as on Windows + # directories pass the os.access check. + def _access_check(fn, mode): + return (os.path.exists(fn) and os.access(fn, mode) + and not os.path.isdir(fn)) + + # If we're given a path with a directory part, look it up directly rather + # than referring to PATH directories. This includes checking relative to the + # current directory, e.g. ./script + if os.path.dirname(cmd): + if _access_check(cmd, mode): + return cmd + return None + + if path is None: + path = os.environ.get("PATH", os.defpath) + if not path: + return None + path = path.split(os.pathsep) + + if sys.platform == "win32": + # The current directory takes precedence on Windows. + if not os.curdir in path: + path.insert(0, os.curdir) + + # PATHEXT is necessary to check on Windows. + pathext = os.environ.get("PATHEXT", "").split(os.pathsep) + # See if the given file matches any of the expected path extensions. + # This will allow us to short circuit when given "python.exe". + # If it does match, only test that one, otherwise we have to try + # others. + if any(cmd.lower().endswith(ext.lower()) for ext in pathext): + files = [cmd] + else: + files = [cmd + ext for ext in pathext] + else: + # On other platforms you don't have things like PATHEXT to tell you + # what file suffixes are executable, so just pass on cmd as-is. + files = [cmd] + + seen = set() + for dir in path: + normdir = os.path.normcase(dir) + if not normdir in seen: + seen.add(normdir) + for thefile in files: + name = os.path.join(dir, thefile) + if _access_check(name, mode): + return name + return None + + +# ZipFile is a context manager in 2.7, but not in 2.6 + +from zipfile import ZipFile as BaseZipFile + +if hasattr(BaseZipFile, '__enter__'): # pragma: no cover + ZipFile = BaseZipFile +else: # pragma: no cover + from zipfile import ZipExtFile as BaseZipExtFile + + class ZipExtFile(BaseZipExtFile): + def __init__(self, base): + self.__dict__.update(base.__dict__) + + def __enter__(self): + return self + + def __exit__(self, *exc_info): + self.close() + # return None, so if an exception occurred, it will propagate + + class ZipFile(BaseZipFile): + def __enter__(self): + return self + + def __exit__(self, *exc_info): + self.close() + # return None, so if an exception occurred, it will propagate + + def open(self, *args, **kwargs): + base = BaseZipFile.open(self, *args, **kwargs) + return ZipExtFile(base) + +try: + from platform import python_implementation +except ImportError: # pragma: no cover + def python_implementation(): + """Return a string identifying the Python implementation.""" + if 'PyPy' in sys.version: + return 'PyPy' + if os.name == 'java': + return 'Jython' + if sys.version.startswith('IronPython'): + return 'IronPython' + return 'CPython' + +try: + import sysconfig +except ImportError: # pragma: no cover + from ._backport import sysconfig + +try: + callable = callable +except NameError: # pragma: no cover + from collections import Callable + + def callable(obj): + return isinstance(obj, Callable) + + +try: + fsencode = os.fsencode + fsdecode = os.fsdecode +except AttributeError: # pragma: no cover + # Issue #99: on some systems (e.g. containerised), + # sys.getfilesystemencoding() returns None, and we need a real value, + # so fall back to utf-8. From the CPython 2.7 docs relating to Unix and + # sys.getfilesystemencoding(): the return value is "the user’s preference + # according to the result of nl_langinfo(CODESET), or None if the + # nl_langinfo(CODESET) failed." + _fsencoding = sys.getfilesystemencoding() or 'utf-8' + if _fsencoding == 'mbcs': + _fserrors = 'strict' + else: + _fserrors = 'surrogateescape' + + def fsencode(filename): + if isinstance(filename, bytes): + return filename + elif isinstance(filename, text_type): + return filename.encode(_fsencoding, _fserrors) + else: + raise TypeError("expect bytes or str, not %s" % + type(filename).__name__) + + def fsdecode(filename): + if isinstance(filename, text_type): + return filename + elif isinstance(filename, bytes): + return filename.decode(_fsencoding, _fserrors) + else: + raise TypeError("expect bytes or str, not %s" % + type(filename).__name__) + +try: + from tokenize import detect_encoding +except ImportError: # pragma: no cover + from codecs import BOM_UTF8, lookup + import re + + cookie_re = re.compile(r"coding[:=]\s*([-\w.]+)") + + def _get_normal_name(orig_enc): + """Imitates get_normal_name in tokenizer.c.""" + # Only care about the first 12 characters. + enc = orig_enc[:12].lower().replace("_", "-") + if enc == "utf-8" or enc.startswith("utf-8-"): + return "utf-8" + if enc in ("latin-1", "iso-8859-1", "iso-latin-1") or \ + enc.startswith(("latin-1-", "iso-8859-1-", "iso-latin-1-")): + return "iso-8859-1" + return orig_enc + + def detect_encoding(readline): + """ + The detect_encoding() function is used to detect the encoding that should + be used to decode a Python source file. It requires one argument, readline, + in the same way as the tokenize() generator. + + It will call readline a maximum of twice, and return the encoding used + (as a string) and a list of any lines (left as bytes) it has read in. + + It detects the encoding from the presence of a utf-8 bom or an encoding + cookie as specified in pep-0263. If both a bom and a cookie are present, + but disagree, a SyntaxError will be raised. If the encoding cookie is an + invalid charset, raise a SyntaxError. Note that if a utf-8 bom is found, + 'utf-8-sig' is returned. + + If no encoding is specified, then the default of 'utf-8' will be returned. + """ + try: + filename = readline.__self__.name + except AttributeError: + filename = None + bom_found = False + encoding = None + default = 'utf-8' + def read_or_stop(): + try: + return readline() + except StopIteration: + return b'' + + def find_cookie(line): + try: + # Decode as UTF-8. Either the line is an encoding declaration, + # in which case it should be pure ASCII, or it must be UTF-8 + # per default encoding. + line_string = line.decode('utf-8') + except UnicodeDecodeError: + msg = "invalid or missing encoding declaration" + if filename is not None: + msg = '{} for {!r}'.format(msg, filename) + raise SyntaxError(msg) + + matches = cookie_re.findall(line_string) + if not matches: + return None + encoding = _get_normal_name(matches[0]) + try: + codec = lookup(encoding) + except LookupError: + # This behaviour mimics the Python interpreter + if filename is None: + msg = "unknown encoding: " + encoding + else: + msg = "unknown encoding for {!r}: {}".format(filename, + encoding) + raise SyntaxError(msg) + + if bom_found: + if codec.name != 'utf-8': + # This behaviour mimics the Python interpreter + if filename is None: + msg = 'encoding problem: utf-8' + else: + msg = 'encoding problem for {!r}: utf-8'.format(filename) + raise SyntaxError(msg) + encoding += '-sig' + return encoding + + first = read_or_stop() + if first.startswith(BOM_UTF8): + bom_found = True + first = first[3:] + default = 'utf-8-sig' + if not first: + return default, [] + + encoding = find_cookie(first) + if encoding: + return encoding, [first] + + second = read_or_stop() + if not second: + return default, [first] + + encoding = find_cookie(second) + if encoding: + return encoding, [first, second] + + return default, [first, second] + +# For converting & <-> & etc. +try: + from html import escape +except ImportError: + from cgi import escape +if sys.version_info[:2] < (3, 4): + unescape = HTMLParser().unescape +else: + from html import unescape + +try: + from collections import ChainMap +except ImportError: # pragma: no cover + from collections import MutableMapping + + try: + from reprlib import recursive_repr as _recursive_repr + except ImportError: + def _recursive_repr(fillvalue='...'): + ''' + Decorator to make a repr function return fillvalue for a recursive + call + ''' + + def decorating_function(user_function): + repr_running = set() + + def wrapper(self): + key = id(self), get_ident() + if key in repr_running: + return fillvalue + repr_running.add(key) + try: + result = user_function(self) + finally: + repr_running.discard(key) + return result + + # Can't use functools.wraps() here because of bootstrap issues + wrapper.__module__ = getattr(user_function, '__module__') + wrapper.__doc__ = getattr(user_function, '__doc__') + wrapper.__name__ = getattr(user_function, '__name__') + wrapper.__annotations__ = getattr(user_function, '__annotations__', {}) + return wrapper + + return decorating_function + + class ChainMap(MutableMapping): + ''' A ChainMap groups multiple dicts (or other mappings) together + to create a single, updateable view. + + The underlying mappings are stored in a list. That list is public and can + accessed or updated using the *maps* attribute. There is no other state. + + Lookups search the underlying mappings successively until a key is found. + In contrast, writes, updates, and deletions only operate on the first + mapping. + + ''' + + def __init__(self, *maps): + '''Initialize a ChainMap by setting *maps* to the given mappings. + If no mappings are provided, a single empty dictionary is used. + + ''' + self.maps = list(maps) or [{}] # always at least one map + + def __missing__(self, key): + raise KeyError(key) + + def __getitem__(self, key): + for mapping in self.maps: + try: + return mapping[key] # can't use 'key in mapping' with defaultdict + except KeyError: + pass + return self.__missing__(key) # support subclasses that define __missing__ + + def get(self, key, default=None): + return self[key] if key in self else default + + def __len__(self): + return len(set().union(*self.maps)) # reuses stored hash values if possible + + def __iter__(self): + return iter(set().union(*self.maps)) + + def __contains__(self, key): + return any(key in m for m in self.maps) + + def __bool__(self): + return any(self.maps) + + @_recursive_repr() + def __repr__(self): + return '{0.__class__.__name__}({1})'.format( + self, ', '.join(map(repr, self.maps))) + + @classmethod + def fromkeys(cls, iterable, *args): + 'Create a ChainMap with a single dict created from the iterable.' + return cls(dict.fromkeys(iterable, *args)) + + def copy(self): + 'New ChainMap or subclass with a new copy of maps[0] and refs to maps[1:]' + return self.__class__(self.maps[0].copy(), *self.maps[1:]) + + __copy__ = copy + + def new_child(self): # like Django's Context.push() + 'New ChainMap with a new dict followed by all previous maps.' + return self.__class__({}, *self.maps) + + @property + def parents(self): # like Django's Context.pop() + 'New ChainMap from maps[1:].' + return self.__class__(*self.maps[1:]) + + def __setitem__(self, key, value): + self.maps[0][key] = value + + def __delitem__(self, key): + try: + del self.maps[0][key] + except KeyError: + raise KeyError('Key not found in the first mapping: {!r}'.format(key)) + + def popitem(self): + 'Remove and return an item pair from maps[0]. Raise KeyError is maps[0] is empty.' + try: + return self.maps[0].popitem() + except KeyError: + raise KeyError('No keys found in the first mapping.') + + def pop(self, key, *args): + 'Remove *key* from maps[0] and return its value. Raise KeyError if *key* not in maps[0].' + try: + return self.maps[0].pop(key, *args) + except KeyError: + raise KeyError('Key not found in the first mapping: {!r}'.format(key)) + + def clear(self): + 'Clear maps[0], leaving maps[1:] intact.' + self.maps[0].clear() + +try: + from importlib.util import cache_from_source # Python >= 3.4 +except ImportError: # pragma: no cover + try: + from imp import cache_from_source + except ImportError: # pragma: no cover + def cache_from_source(path, debug_override=None): + assert path.endswith('.py') + if debug_override is None: + debug_override = __debug__ + if debug_override: + suffix = 'c' + else: + suffix = 'o' + return path + suffix + +try: + from collections import OrderedDict +except ImportError: # pragma: no cover +## {{{ http://code.activestate.com/recipes/576693/ (r9) +# Backport of OrderedDict() class that runs on Python 2.4, 2.5, 2.6, 2.7 and pypy. +# Passes Python2.7's test suite and incorporates all the latest updates. + try: + from thread import get_ident as _get_ident + except ImportError: + from dummy_thread import get_ident as _get_ident + + try: + from _abcoll import KeysView, ValuesView, ItemsView + except ImportError: + pass + + + class OrderedDict(dict): + 'Dictionary that remembers insertion order' + # An inherited dict maps keys to values. + # The inherited dict provides __getitem__, __len__, __contains__, and get. + # The remaining methods are order-aware. + # Big-O running times for all methods are the same as for regular dictionaries. + + # The internal self.__map dictionary maps keys to links in a doubly linked list. + # The circular doubly linked list starts and ends with a sentinel element. + # The sentinel element never gets deleted (this simplifies the algorithm). + # Each link is stored as a list of length three: [PREV, NEXT, KEY]. + + def __init__(self, *args, **kwds): + '''Initialize an ordered dictionary. Signature is the same as for + regular dictionaries, but keyword arguments are not recommended + because their insertion order is arbitrary. + + ''' + if len(args) > 1: + raise TypeError('expected at most 1 arguments, got %d' % len(args)) + try: + self.__root + except AttributeError: + self.__root = root = [] # sentinel node + root[:] = [root, root, None] + self.__map = {} + self.__update(*args, **kwds) + + def __setitem__(self, key, value, dict_setitem=dict.__setitem__): + 'od.__setitem__(i, y) <==> od[i]=y' + # Setting a new item creates a new link which goes at the end of the linked + # list, and the inherited dictionary is updated with the new key/value pair. + if key not in self: + root = self.__root + last = root[0] + last[1] = root[0] = self.__map[key] = [last, root, key] + dict_setitem(self, key, value) + + def __delitem__(self, key, dict_delitem=dict.__delitem__): + 'od.__delitem__(y) <==> del od[y]' + # Deleting an existing item uses self.__map to find the link which is + # then removed by updating the links in the predecessor and successor nodes. + dict_delitem(self, key) + link_prev, link_next, key = self.__map.pop(key) + link_prev[1] = link_next + link_next[0] = link_prev + + def __iter__(self): + 'od.__iter__() <==> iter(od)' + root = self.__root + curr = root[1] + while curr is not root: + yield curr[2] + curr = curr[1] + + def __reversed__(self): + 'od.__reversed__() <==> reversed(od)' + root = self.__root + curr = root[0] + while curr is not root: + yield curr[2] + curr = curr[0] + + def clear(self): + 'od.clear() -> None. Remove all items from od.' + try: + for node in self.__map.itervalues(): + del node[:] + root = self.__root + root[:] = [root, root, None] + self.__map.clear() + except AttributeError: + pass + dict.clear(self) + + def popitem(self, last=True): + '''od.popitem() -> (k, v), return and remove a (key, value) pair. + Pairs are returned in LIFO order if last is true or FIFO order if false. + + ''' + if not self: + raise KeyError('dictionary is empty') + root = self.__root + if last: + link = root[0] + link_prev = link[0] + link_prev[1] = root + root[0] = link_prev + else: + link = root[1] + link_next = link[1] + root[1] = link_next + link_next[0] = root + key = link[2] + del self.__map[key] + value = dict.pop(self, key) + return key, value + + # -- the following methods do not depend on the internal structure -- + + def keys(self): + 'od.keys() -> list of keys in od' + return list(self) + + def values(self): + 'od.values() -> list of values in od' + return [self[key] for key in self] + + def items(self): + 'od.items() -> list of (key, value) pairs in od' + return [(key, self[key]) for key in self] + + def iterkeys(self): + 'od.iterkeys() -> an iterator over the keys in od' + return iter(self) + + def itervalues(self): + 'od.itervalues -> an iterator over the values in od' + for k in self: + yield self[k] + + def iteritems(self): + 'od.iteritems -> an iterator over the (key, value) items in od' + for k in self: + yield (k, self[k]) + + def update(*args, **kwds): + '''od.update(E, **F) -> None. Update od from dict/iterable E and F. + + If E is a dict instance, does: for k in E: od[k] = E[k] + If E has a .keys() method, does: for k in E.keys(): od[k] = E[k] + Or if E is an iterable of items, does: for k, v in E: od[k] = v + In either case, this is followed by: for k, v in F.items(): od[k] = v + + ''' + if len(args) > 2: + raise TypeError('update() takes at most 2 positional ' + 'arguments (%d given)' % (len(args),)) + elif not args: + raise TypeError('update() takes at least 1 argument (0 given)') + self = args[0] + # Make progressively weaker assumptions about "other" + other = () + if len(args) == 2: + other = args[1] + if isinstance(other, dict): + for key in other: + self[key] = other[key] + elif hasattr(other, 'keys'): + for key in other.keys(): + self[key] = other[key] + else: + for key, value in other: + self[key] = value + for key, value in kwds.items(): + self[key] = value + + __update = update # let subclasses override update without breaking __init__ + + __marker = object() + + def pop(self, key, default=__marker): + '''od.pop(k[,d]) -> v, remove specified key and return the corresponding value. + If key is not found, d is returned if given, otherwise KeyError is raised. + + ''' + if key in self: + result = self[key] + del self[key] + return result + if default is self.__marker: + raise KeyError(key) + return default + + def setdefault(self, key, default=None): + 'od.setdefault(k[,d]) -> od.get(k,d), also set od[k]=d if k not in od' + if key in self: + return self[key] + self[key] = default + return default + + def __repr__(self, _repr_running=None): + 'od.__repr__() <==> repr(od)' + if not _repr_running: _repr_running = {} + call_key = id(self), _get_ident() + if call_key in _repr_running: + return '...' + _repr_running[call_key] = 1 + try: + if not self: + return '%s()' % (self.__class__.__name__,) + return '%s(%r)' % (self.__class__.__name__, self.items()) + finally: + del _repr_running[call_key] + + def __reduce__(self): + 'Return state information for pickling' + items = [[k, self[k]] for k in self] + inst_dict = vars(self).copy() + for k in vars(OrderedDict()): + inst_dict.pop(k, None) + if inst_dict: + return (self.__class__, (items,), inst_dict) + return self.__class__, (items,) + + def copy(self): + 'od.copy() -> a shallow copy of od' + return self.__class__(self) + + @classmethod + def fromkeys(cls, iterable, value=None): + '''OD.fromkeys(S[, v]) -> New ordered dictionary with keys from S + and values equal to v (which defaults to None). + + ''' + d = cls() + for key in iterable: + d[key] = value + return d + + def __eq__(self, other): + '''od.__eq__(y) <==> od==y. Comparison to another OD is order-sensitive + while comparison to a regular mapping is order-insensitive. + + ''' + if isinstance(other, OrderedDict): + return len(self)==len(other) and self.items() == other.items() + return dict.__eq__(self, other) + + def __ne__(self, other): + return not self == other + + # -- the following methods are only used in Python 2.7 -- + + def viewkeys(self): + "od.viewkeys() -> a set-like object providing a view on od's keys" + return KeysView(self) + + def viewvalues(self): + "od.viewvalues() -> an object providing a view on od's values" + return ValuesView(self) + + def viewitems(self): + "od.viewitems() -> a set-like object providing a view on od's items" + return ItemsView(self) + +try: + from logging.config import BaseConfigurator, valid_ident +except ImportError: # pragma: no cover + IDENTIFIER = re.compile('^[a-z_][a-z0-9_]*$', re.I) + + + def valid_ident(s): + m = IDENTIFIER.match(s) + if not m: + raise ValueError('Not a valid Python identifier: %r' % s) + return True + + + # The ConvertingXXX classes are wrappers around standard Python containers, + # and they serve to convert any suitable values in the container. The + # conversion converts base dicts, lists and tuples to their wrapped + # equivalents, whereas strings which match a conversion format are converted + # appropriately. + # + # Each wrapper should have a configurator attribute holding the actual + # configurator to use for conversion. + + class ConvertingDict(dict): + """A converting dictionary wrapper.""" + + def __getitem__(self, key): + value = dict.__getitem__(self, key) + result = self.configurator.convert(value) + #If the converted value is different, save for next time + if value is not result: + self[key] = result + if type(result) in (ConvertingDict, ConvertingList, + ConvertingTuple): + result.parent = self + result.key = key + return result + + def get(self, key, default=None): + value = dict.get(self, key, default) + result = self.configurator.convert(value) + #If the converted value is different, save for next time + if value is not result: + self[key] = result + if type(result) in (ConvertingDict, ConvertingList, + ConvertingTuple): + result.parent = self + result.key = key + return result + + def pop(self, key, default=None): + value = dict.pop(self, key, default) + result = self.configurator.convert(value) + if value is not result: + if type(result) in (ConvertingDict, ConvertingList, + ConvertingTuple): + result.parent = self + result.key = key + return result + + class ConvertingList(list): + """A converting list wrapper.""" + def __getitem__(self, key): + value = list.__getitem__(self, key) + result = self.configurator.convert(value) + #If the converted value is different, save for next time + if value is not result: + self[key] = result + if type(result) in (ConvertingDict, ConvertingList, + ConvertingTuple): + result.parent = self + result.key = key + return result + + def pop(self, idx=-1): + value = list.pop(self, idx) + result = self.configurator.convert(value) + if value is not result: + if type(result) in (ConvertingDict, ConvertingList, + ConvertingTuple): + result.parent = self + return result + + class ConvertingTuple(tuple): + """A converting tuple wrapper.""" + def __getitem__(self, key): + value = tuple.__getitem__(self, key) + result = self.configurator.convert(value) + if value is not result: + if type(result) in (ConvertingDict, ConvertingList, + ConvertingTuple): + result.parent = self + result.key = key + return result + + class BaseConfigurator(object): + """ + The configurator base class which defines some useful defaults. + """ + + CONVERT_PATTERN = re.compile(r'^(?P[a-z]+)://(?P.*)$') + + WORD_PATTERN = re.compile(r'^\s*(\w+)\s*') + DOT_PATTERN = re.compile(r'^\.\s*(\w+)\s*') + INDEX_PATTERN = re.compile(r'^\[\s*(\w+)\s*\]\s*') + DIGIT_PATTERN = re.compile(r'^\d+$') + + value_converters = { + 'ext' : 'ext_convert', + 'cfg' : 'cfg_convert', + } + + # We might want to use a different one, e.g. importlib + importer = staticmethod(__import__) + + def __init__(self, config): + self.config = ConvertingDict(config) + self.config.configurator = self + + def resolve(self, s): + """ + Resolve strings to objects using standard import and attribute + syntax. + """ + name = s.split('.') + used = name.pop(0) + try: + found = self.importer(used) + for frag in name: + used += '.' + frag + try: + found = getattr(found, frag) + except AttributeError: + self.importer(used) + found = getattr(found, frag) + return found + except ImportError: + e, tb = sys.exc_info()[1:] + v = ValueError('Cannot resolve %r: %s' % (s, e)) + v.__cause__, v.__traceback__ = e, tb + raise v + + def ext_convert(self, value): + """Default converter for the ext:// protocol.""" + return self.resolve(value) + + def cfg_convert(self, value): + """Default converter for the cfg:// protocol.""" + rest = value + m = self.WORD_PATTERN.match(rest) + if m is None: + raise ValueError("Unable to convert %r" % value) + else: + rest = rest[m.end():] + d = self.config[m.groups()[0]] + #print d, rest + while rest: + m = self.DOT_PATTERN.match(rest) + if m: + d = d[m.groups()[0]] + else: + m = self.INDEX_PATTERN.match(rest) + if m: + idx = m.groups()[0] + if not self.DIGIT_PATTERN.match(idx): + d = d[idx] + else: + try: + n = int(idx) # try as number first (most likely) + d = d[n] + except TypeError: + d = d[idx] + if m: + rest = rest[m.end():] + else: + raise ValueError('Unable to convert ' + '%r at %r' % (value, rest)) + #rest should be empty + return d + + def convert(self, value): + """ + Convert values to an appropriate type. dicts, lists and tuples are + replaced by their converting alternatives. Strings are checked to + see if they have a conversion format and are converted if they do. + """ + if not isinstance(value, ConvertingDict) and isinstance(value, dict): + value = ConvertingDict(value) + value.configurator = self + elif not isinstance(value, ConvertingList) and isinstance(value, list): + value = ConvertingList(value) + value.configurator = self + elif not isinstance(value, ConvertingTuple) and\ + isinstance(value, tuple): + value = ConvertingTuple(value) + value.configurator = self + elif isinstance(value, string_types): + m = self.CONVERT_PATTERN.match(value) + if m: + d = m.groupdict() + prefix = d['prefix'] + converter = self.value_converters.get(prefix, None) + if converter: + suffix = d['suffix'] + converter = getattr(self, converter) + value = converter(suffix) + return value + + def configure_custom(self, config): + """Configure an object with a user-supplied factory.""" + c = config.pop('()') + if not callable(c): + c = self.resolve(c) + props = config.pop('.', None) + # Check for valid identifiers + kwargs = dict([(k, config[k]) for k in config if valid_ident(k)]) + result = c(**kwargs) + if props: + for name, value in props.items(): + setattr(result, name, value) + return result + + def as_tuple(self, value): + """Utility function which converts lists to tuples.""" + if isinstance(value, list): + value = tuple(value) + return value diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/distlib/database.py b/venv.bak/lib/python3.7/site-packages/pip/_vendor/distlib/database.py new file mode 100644 index 0000000..c16c0c8 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_vendor/distlib/database.py @@ -0,0 +1,1339 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2012-2017 The Python Software Foundation. +# See LICENSE.txt and CONTRIBUTORS.txt. +# +"""PEP 376 implementation.""" + +from __future__ import unicode_literals + +import base64 +import codecs +import contextlib +import hashlib +import logging +import os +import posixpath +import sys +import zipimport + +from . import DistlibException, resources +from .compat import StringIO +from .version import get_scheme, UnsupportedVersionError +from .metadata import (Metadata, METADATA_FILENAME, WHEEL_METADATA_FILENAME, + LEGACY_METADATA_FILENAME) +from .util import (parse_requirement, cached_property, parse_name_and_version, + read_exports, write_exports, CSVReader, CSVWriter) + + +__all__ = ['Distribution', 'BaseInstalledDistribution', + 'InstalledDistribution', 'EggInfoDistribution', + 'DistributionPath'] + + +logger = logging.getLogger(__name__) + +EXPORTS_FILENAME = 'pydist-exports.json' +COMMANDS_FILENAME = 'pydist-commands.json' + +DIST_FILES = ('INSTALLER', METADATA_FILENAME, 'RECORD', 'REQUESTED', + 'RESOURCES', EXPORTS_FILENAME, 'SHARED') + +DISTINFO_EXT = '.dist-info' + + +class _Cache(object): + """ + A simple cache mapping names and .dist-info paths to distributions + """ + def __init__(self): + """ + Initialise an instance. There is normally one for each DistributionPath. + """ + self.name = {} + self.path = {} + self.generated = False + + def clear(self): + """ + Clear the cache, setting it to its initial state. + """ + self.name.clear() + self.path.clear() + self.generated = False + + def add(self, dist): + """ + Add a distribution to the cache. + :param dist: The distribution to add. + """ + if dist.path not in self.path: + self.path[dist.path] = dist + self.name.setdefault(dist.key, []).append(dist) + + +class DistributionPath(object): + """ + Represents a set of distributions installed on a path (typically sys.path). + """ + def __init__(self, path=None, include_egg=False): + """ + Create an instance from a path, optionally including legacy (distutils/ + setuptools/distribute) distributions. + :param path: The path to use, as a list of directories. If not specified, + sys.path is used. + :param include_egg: If True, this instance will look for and return legacy + distributions as well as those based on PEP 376. + """ + if path is None: + path = sys.path + self.path = path + self._include_dist = True + self._include_egg = include_egg + + self._cache = _Cache() + self._cache_egg = _Cache() + self._cache_enabled = True + self._scheme = get_scheme('default') + + def _get_cache_enabled(self): + return self._cache_enabled + + def _set_cache_enabled(self, value): + self._cache_enabled = value + + cache_enabled = property(_get_cache_enabled, _set_cache_enabled) + + def clear_cache(self): + """ + Clears the internal cache. + """ + self._cache.clear() + self._cache_egg.clear() + + + def _yield_distributions(self): + """ + Yield .dist-info and/or .egg(-info) distributions. + """ + # We need to check if we've seen some resources already, because on + # some Linux systems (e.g. some Debian/Ubuntu variants) there are + # symlinks which alias other files in the environment. + seen = set() + for path in self.path: + finder = resources.finder_for_path(path) + if finder is None: + continue + r = finder.find('') + if not r or not r.is_container: + continue + rset = sorted(r.resources) + for entry in rset: + r = finder.find(entry) + if not r or r.path in seen: + continue + if self._include_dist and entry.endswith(DISTINFO_EXT): + possible_filenames = [METADATA_FILENAME, + WHEEL_METADATA_FILENAME, + LEGACY_METADATA_FILENAME] + for metadata_filename in possible_filenames: + metadata_path = posixpath.join(entry, metadata_filename) + pydist = finder.find(metadata_path) + if pydist: + break + else: + continue + + with contextlib.closing(pydist.as_stream()) as stream: + metadata = Metadata(fileobj=stream, scheme='legacy') + logger.debug('Found %s', r.path) + seen.add(r.path) + yield new_dist_class(r.path, metadata=metadata, + env=self) + elif self._include_egg and entry.endswith(('.egg-info', + '.egg')): + logger.debug('Found %s', r.path) + seen.add(r.path) + yield old_dist_class(r.path, self) + + def _generate_cache(self): + """ + Scan the path for distributions and populate the cache with + those that are found. + """ + gen_dist = not self._cache.generated + gen_egg = self._include_egg and not self._cache_egg.generated + if gen_dist or gen_egg: + for dist in self._yield_distributions(): + if isinstance(dist, InstalledDistribution): + self._cache.add(dist) + else: + self._cache_egg.add(dist) + + if gen_dist: + self._cache.generated = True + if gen_egg: + self._cache_egg.generated = True + + @classmethod + def distinfo_dirname(cls, name, version): + """ + The *name* and *version* parameters are converted into their + filename-escaped form, i.e. any ``'-'`` characters are replaced + with ``'_'`` other than the one in ``'dist-info'`` and the one + separating the name from the version number. + + :parameter name: is converted to a standard distribution name by replacing + any runs of non- alphanumeric characters with a single + ``'-'``. + :type name: string + :parameter version: is converted to a standard version string. Spaces + become dots, and all other non-alphanumeric characters + (except dots) become dashes, with runs of multiple + dashes condensed to a single dash. + :type version: string + :returns: directory name + :rtype: string""" + name = name.replace('-', '_') + return '-'.join([name, version]) + DISTINFO_EXT + + def get_distributions(self): + """ + Provides an iterator that looks for distributions and returns + :class:`InstalledDistribution` or + :class:`EggInfoDistribution` instances for each one of them. + + :rtype: iterator of :class:`InstalledDistribution` and + :class:`EggInfoDistribution` instances + """ + if not self._cache_enabled: + for dist in self._yield_distributions(): + yield dist + else: + self._generate_cache() + + for dist in self._cache.path.values(): + yield dist + + if self._include_egg: + for dist in self._cache_egg.path.values(): + yield dist + + def get_distribution(self, name): + """ + Looks for a named distribution on the path. + + This function only returns the first result found, as no more than one + value is expected. If nothing is found, ``None`` is returned. + + :rtype: :class:`InstalledDistribution`, :class:`EggInfoDistribution` + or ``None`` + """ + result = None + name = name.lower() + if not self._cache_enabled: + for dist in self._yield_distributions(): + if dist.key == name: + result = dist + break + else: + self._generate_cache() + + if name in self._cache.name: + result = self._cache.name[name][0] + elif self._include_egg and name in self._cache_egg.name: + result = self._cache_egg.name[name][0] + return result + + def provides_distribution(self, name, version=None): + """ + Iterates over all distributions to find which distributions provide *name*. + If a *version* is provided, it will be used to filter the results. + + This function only returns the first result found, since no more than + one values are expected. If the directory is not found, returns ``None``. + + :parameter version: a version specifier that indicates the version + required, conforming to the format in ``PEP-345`` + + :type name: string + :type version: string + """ + matcher = None + if version is not None: + try: + matcher = self._scheme.matcher('%s (%s)' % (name, version)) + except ValueError: + raise DistlibException('invalid name or version: %r, %r' % + (name, version)) + + for dist in self.get_distributions(): + # We hit a problem on Travis where enum34 was installed and doesn't + # have a provides attribute ... + if not hasattr(dist, 'provides'): + logger.debug('No "provides": %s', dist) + else: + provided = dist.provides + + for p in provided: + p_name, p_ver = parse_name_and_version(p) + if matcher is None: + if p_name == name: + yield dist + break + else: + if p_name == name and matcher.match(p_ver): + yield dist + break + + def get_file_path(self, name, relative_path): + """ + Return the path to a resource file. + """ + dist = self.get_distribution(name) + if dist is None: + raise LookupError('no distribution named %r found' % name) + return dist.get_resource_path(relative_path) + + def get_exported_entries(self, category, name=None): + """ + Return all of the exported entries in a particular category. + + :param category: The category to search for entries. + :param name: If specified, only entries with that name are returned. + """ + for dist in self.get_distributions(): + r = dist.exports + if category in r: + d = r[category] + if name is not None: + if name in d: + yield d[name] + else: + for v in d.values(): + yield v + + +class Distribution(object): + """ + A base class for distributions, whether installed or from indexes. + Either way, it must have some metadata, so that's all that's needed + for construction. + """ + + build_time_dependency = False + """ + Set to True if it's known to be only a build-time dependency (i.e. + not needed after installation). + """ + + requested = False + """A boolean that indicates whether the ``REQUESTED`` metadata file is + present (in other words, whether the package was installed by user + request or it was installed as a dependency).""" + + def __init__(self, metadata): + """ + Initialise an instance. + :param metadata: The instance of :class:`Metadata` describing this + distribution. + """ + self.metadata = metadata + self.name = metadata.name + self.key = self.name.lower() # for case-insensitive comparisons + self.version = metadata.version + self.locator = None + self.digest = None + self.extras = None # additional features requested + self.context = None # environment marker overrides + self.download_urls = set() + self.digests = {} + + @property + def source_url(self): + """ + The source archive download URL for this distribution. + """ + return self.metadata.source_url + + download_url = source_url # Backward compatibility + + @property + def name_and_version(self): + """ + A utility property which displays the name and version in parentheses. + """ + return '%s (%s)' % (self.name, self.version) + + @property + def provides(self): + """ + A set of distribution names and versions provided by this distribution. + :return: A set of "name (version)" strings. + """ + plist = self.metadata.provides + s = '%s (%s)' % (self.name, self.version) + if s not in plist: + plist.append(s) + return plist + + def _get_requirements(self, req_attr): + md = self.metadata + logger.debug('Getting requirements from metadata %r', md.todict()) + reqts = getattr(md, req_attr) + return set(md.get_requirements(reqts, extras=self.extras, + env=self.context)) + + @property + def run_requires(self): + return self._get_requirements('run_requires') + + @property + def meta_requires(self): + return self._get_requirements('meta_requires') + + @property + def build_requires(self): + return self._get_requirements('build_requires') + + @property + def test_requires(self): + return self._get_requirements('test_requires') + + @property + def dev_requires(self): + return self._get_requirements('dev_requires') + + def matches_requirement(self, req): + """ + Say if this instance matches (fulfills) a requirement. + :param req: The requirement to match. + :rtype req: str + :return: True if it matches, else False. + """ + # Requirement may contain extras - parse to lose those + # from what's passed to the matcher + r = parse_requirement(req) + scheme = get_scheme(self.metadata.scheme) + try: + matcher = scheme.matcher(r.requirement) + except UnsupportedVersionError: + # XXX compat-mode if cannot read the version + logger.warning('could not read version %r - using name only', + req) + name = req.split()[0] + matcher = scheme.matcher(name) + + name = matcher.key # case-insensitive + + result = False + for p in self.provides: + p_name, p_ver = parse_name_and_version(p) + if p_name != name: + continue + try: + result = matcher.match(p_ver) + break + except UnsupportedVersionError: + pass + return result + + def __repr__(self): + """ + Return a textual representation of this instance, + """ + if self.source_url: + suffix = ' [%s]' % self.source_url + else: + suffix = '' + return '' % (self.name, self.version, suffix) + + def __eq__(self, other): + """ + See if this distribution is the same as another. + :param other: The distribution to compare with. To be equal to one + another. distributions must have the same type, name, + version and source_url. + :return: True if it is the same, else False. + """ + if type(other) is not type(self): + result = False + else: + result = (self.name == other.name and + self.version == other.version and + self.source_url == other.source_url) + return result + + def __hash__(self): + """ + Compute hash in a way which matches the equality test. + """ + return hash(self.name) + hash(self.version) + hash(self.source_url) + + +class BaseInstalledDistribution(Distribution): + """ + This is the base class for installed distributions (whether PEP 376 or + legacy). + """ + + hasher = None + + def __init__(self, metadata, path, env=None): + """ + Initialise an instance. + :param metadata: An instance of :class:`Metadata` which describes the + distribution. This will normally have been initialised + from a metadata file in the ``path``. + :param path: The path of the ``.dist-info`` or ``.egg-info`` + directory for the distribution. + :param env: This is normally the :class:`DistributionPath` + instance where this distribution was found. + """ + super(BaseInstalledDistribution, self).__init__(metadata) + self.path = path + self.dist_path = env + + def get_hash(self, data, hasher=None): + """ + Get the hash of some data, using a particular hash algorithm, if + specified. + + :param data: The data to be hashed. + :type data: bytes + :param hasher: The name of a hash implementation, supported by hashlib, + or ``None``. Examples of valid values are ``'sha1'``, + ``'sha224'``, ``'sha384'``, '``sha256'``, ``'md5'`` and + ``'sha512'``. If no hasher is specified, the ``hasher`` + attribute of the :class:`InstalledDistribution` instance + is used. If the hasher is determined to be ``None``, MD5 + is used as the hashing algorithm. + :returns: The hash of the data. If a hasher was explicitly specified, + the returned hash will be prefixed with the specified hasher + followed by '='. + :rtype: str + """ + if hasher is None: + hasher = self.hasher + if hasher is None: + hasher = hashlib.md5 + prefix = '' + else: + hasher = getattr(hashlib, hasher) + prefix = '%s=' % self.hasher + digest = hasher(data).digest() + digest = base64.urlsafe_b64encode(digest).rstrip(b'=').decode('ascii') + return '%s%s' % (prefix, digest) + + +class InstalledDistribution(BaseInstalledDistribution): + """ + Created with the *path* of the ``.dist-info`` directory provided to the + constructor. It reads the metadata contained in ``pydist.json`` when it is + instantiated., or uses a passed in Metadata instance (useful for when + dry-run mode is being used). + """ + + hasher = 'sha256' + + def __init__(self, path, metadata=None, env=None): + self.modules = [] + self.finder = finder = resources.finder_for_path(path) + if finder is None: + raise ValueError('finder unavailable for %s' % path) + if env and env._cache_enabled and path in env._cache.path: + metadata = env._cache.path[path].metadata + elif metadata is None: + r = finder.find(METADATA_FILENAME) + # Temporary - for Wheel 0.23 support + if r is None: + r = finder.find(WHEEL_METADATA_FILENAME) + # Temporary - for legacy support + if r is None: + r = finder.find('METADATA') + if r is None: + raise ValueError('no %s found in %s' % (METADATA_FILENAME, + path)) + with contextlib.closing(r.as_stream()) as stream: + metadata = Metadata(fileobj=stream, scheme='legacy') + + super(InstalledDistribution, self).__init__(metadata, path, env) + + if env and env._cache_enabled: + env._cache.add(self) + + r = finder.find('REQUESTED') + self.requested = r is not None + p = os.path.join(path, 'top_level.txt') + if os.path.exists(p): + with open(p, 'rb') as f: + data = f.read().decode('utf-8') + self.modules = data.splitlines() + + def __repr__(self): + return '' % ( + self.name, self.version, self.path) + + def __str__(self): + return "%s %s" % (self.name, self.version) + + def _get_records(self): + """ + Get the list of installed files for the distribution + :return: A list of tuples of path, hash and size. Note that hash and + size might be ``None`` for some entries. The path is exactly + as stored in the file (which is as in PEP 376). + """ + results = [] + r = self.get_distinfo_resource('RECORD') + with contextlib.closing(r.as_stream()) as stream: + with CSVReader(stream=stream) as record_reader: + # Base location is parent dir of .dist-info dir + #base_location = os.path.dirname(self.path) + #base_location = os.path.abspath(base_location) + for row in record_reader: + missing = [None for i in range(len(row), 3)] + path, checksum, size = row + missing + #if not os.path.isabs(path): + # path = path.replace('/', os.sep) + # path = os.path.join(base_location, path) + results.append((path, checksum, size)) + return results + + @cached_property + def exports(self): + """ + Return the information exported by this distribution. + :return: A dictionary of exports, mapping an export category to a dict + of :class:`ExportEntry` instances describing the individual + export entries, and keyed by name. + """ + result = {} + r = self.get_distinfo_resource(EXPORTS_FILENAME) + if r: + result = self.read_exports() + return result + + def read_exports(self): + """ + Read exports data from a file in .ini format. + + :return: A dictionary of exports, mapping an export category to a list + of :class:`ExportEntry` instances describing the individual + export entries. + """ + result = {} + r = self.get_distinfo_resource(EXPORTS_FILENAME) + if r: + with contextlib.closing(r.as_stream()) as stream: + result = read_exports(stream) + return result + + def write_exports(self, exports): + """ + Write a dictionary of exports to a file in .ini format. + :param exports: A dictionary of exports, mapping an export category to + a list of :class:`ExportEntry` instances describing the + individual export entries. + """ + rf = self.get_distinfo_file(EXPORTS_FILENAME) + with open(rf, 'w') as f: + write_exports(exports, f) + + def get_resource_path(self, relative_path): + """ + NOTE: This API may change in the future. + + Return the absolute path to a resource file with the given relative + path. + + :param relative_path: The path, relative to .dist-info, of the resource + of interest. + :return: The absolute path where the resource is to be found. + """ + r = self.get_distinfo_resource('RESOURCES') + with contextlib.closing(r.as_stream()) as stream: + with CSVReader(stream=stream) as resources_reader: + for relative, destination in resources_reader: + if relative == relative_path: + return destination + raise KeyError('no resource file with relative path %r ' + 'is installed' % relative_path) + + def list_installed_files(self): + """ + Iterates over the ``RECORD`` entries and returns a tuple + ``(path, hash, size)`` for each line. + + :returns: iterator of (path, hash, size) + """ + for result in self._get_records(): + yield result + + def write_installed_files(self, paths, prefix, dry_run=False): + """ + Writes the ``RECORD`` file, using the ``paths`` iterable passed in. Any + existing ``RECORD`` file is silently overwritten. + + prefix is used to determine when to write absolute paths. + """ + prefix = os.path.join(prefix, '') + base = os.path.dirname(self.path) + base_under_prefix = base.startswith(prefix) + base = os.path.join(base, '') + record_path = self.get_distinfo_file('RECORD') + logger.info('creating %s', record_path) + if dry_run: + return None + with CSVWriter(record_path) as writer: + for path in paths: + if os.path.isdir(path) or path.endswith(('.pyc', '.pyo')): + # do not put size and hash, as in PEP-376 + hash_value = size = '' + else: + size = '%d' % os.path.getsize(path) + with open(path, 'rb') as fp: + hash_value = self.get_hash(fp.read()) + if path.startswith(base) or (base_under_prefix and + path.startswith(prefix)): + path = os.path.relpath(path, base) + writer.writerow((path, hash_value, size)) + + # add the RECORD file itself + if record_path.startswith(base): + record_path = os.path.relpath(record_path, base) + writer.writerow((record_path, '', '')) + return record_path + + def check_installed_files(self): + """ + Checks that the hashes and sizes of the files in ``RECORD`` are + matched by the files themselves. Returns a (possibly empty) list of + mismatches. Each entry in the mismatch list will be a tuple consisting + of the path, 'exists', 'size' or 'hash' according to what didn't match + (existence is checked first, then size, then hash), the expected + value and the actual value. + """ + mismatches = [] + base = os.path.dirname(self.path) + record_path = self.get_distinfo_file('RECORD') + for path, hash_value, size in self.list_installed_files(): + if not os.path.isabs(path): + path = os.path.join(base, path) + if path == record_path: + continue + if not os.path.exists(path): + mismatches.append((path, 'exists', True, False)) + elif os.path.isfile(path): + actual_size = str(os.path.getsize(path)) + if size and actual_size != size: + mismatches.append((path, 'size', size, actual_size)) + elif hash_value: + if '=' in hash_value: + hasher = hash_value.split('=', 1)[0] + else: + hasher = None + + with open(path, 'rb') as f: + actual_hash = self.get_hash(f.read(), hasher) + if actual_hash != hash_value: + mismatches.append((path, 'hash', hash_value, actual_hash)) + return mismatches + + @cached_property + def shared_locations(self): + """ + A dictionary of shared locations whose keys are in the set 'prefix', + 'purelib', 'platlib', 'scripts', 'headers', 'data' and 'namespace'. + The corresponding value is the absolute path of that category for + this distribution, and takes into account any paths selected by the + user at installation time (e.g. via command-line arguments). In the + case of the 'namespace' key, this would be a list of absolute paths + for the roots of namespace packages in this distribution. + + The first time this property is accessed, the relevant information is + read from the SHARED file in the .dist-info directory. + """ + result = {} + shared_path = os.path.join(self.path, 'SHARED') + if os.path.isfile(shared_path): + with codecs.open(shared_path, 'r', encoding='utf-8') as f: + lines = f.read().splitlines() + for line in lines: + key, value = line.split('=', 1) + if key == 'namespace': + result.setdefault(key, []).append(value) + else: + result[key] = value + return result + + def write_shared_locations(self, paths, dry_run=False): + """ + Write shared location information to the SHARED file in .dist-info. + :param paths: A dictionary as described in the documentation for + :meth:`shared_locations`. + :param dry_run: If True, the action is logged but no file is actually + written. + :return: The path of the file written to. + """ + shared_path = os.path.join(self.path, 'SHARED') + logger.info('creating %s', shared_path) + if dry_run: + return None + lines = [] + for key in ('prefix', 'lib', 'headers', 'scripts', 'data'): + path = paths[key] + if os.path.isdir(paths[key]): + lines.append('%s=%s' % (key, path)) + for ns in paths.get('namespace', ()): + lines.append('namespace=%s' % ns) + + with codecs.open(shared_path, 'w', encoding='utf-8') as f: + f.write('\n'.join(lines)) + return shared_path + + def get_distinfo_resource(self, path): + if path not in DIST_FILES: + raise DistlibException('invalid path for a dist-info file: ' + '%r at %r' % (path, self.path)) + finder = resources.finder_for_path(self.path) + if finder is None: + raise DistlibException('Unable to get a finder for %s' % self.path) + return finder.find(path) + + def get_distinfo_file(self, path): + """ + Returns a path located under the ``.dist-info`` directory. Returns a + string representing the path. + + :parameter path: a ``'/'``-separated path relative to the + ``.dist-info`` directory or an absolute path; + If *path* is an absolute path and doesn't start + with the ``.dist-info`` directory path, + a :class:`DistlibException` is raised + :type path: str + :rtype: str + """ + # Check if it is an absolute path # XXX use relpath, add tests + if path.find(os.sep) >= 0: + # it's an absolute path? + distinfo_dirname, path = path.split(os.sep)[-2:] + if distinfo_dirname != self.path.split(os.sep)[-1]: + raise DistlibException( + 'dist-info file %r does not belong to the %r %s ' + 'distribution' % (path, self.name, self.version)) + + # The file must be relative + if path not in DIST_FILES: + raise DistlibException('invalid path for a dist-info file: ' + '%r at %r' % (path, self.path)) + + return os.path.join(self.path, path) + + def list_distinfo_files(self): + """ + Iterates over the ``RECORD`` entries and returns paths for each line if + the path is pointing to a file located in the ``.dist-info`` directory + or one of its subdirectories. + + :returns: iterator of paths + """ + base = os.path.dirname(self.path) + for path, checksum, size in self._get_records(): + # XXX add separator or use real relpath algo + if not os.path.isabs(path): + path = os.path.join(base, path) + if path.startswith(self.path): + yield path + + def __eq__(self, other): + return (isinstance(other, InstalledDistribution) and + self.path == other.path) + + # See http://docs.python.org/reference/datamodel#object.__hash__ + __hash__ = object.__hash__ + + +class EggInfoDistribution(BaseInstalledDistribution): + """Created with the *path* of the ``.egg-info`` directory or file provided + to the constructor. It reads the metadata contained in the file itself, or + if the given path happens to be a directory, the metadata is read from the + file ``PKG-INFO`` under that directory.""" + + requested = True # as we have no way of knowing, assume it was + shared_locations = {} + + def __init__(self, path, env=None): + def set_name_and_version(s, n, v): + s.name = n + s.key = n.lower() # for case-insensitive comparisons + s.version = v + + self.path = path + self.dist_path = env + if env and env._cache_enabled and path in env._cache_egg.path: + metadata = env._cache_egg.path[path].metadata + set_name_and_version(self, metadata.name, metadata.version) + else: + metadata = self._get_metadata(path) + + # Need to be set before caching + set_name_and_version(self, metadata.name, metadata.version) + + if env and env._cache_enabled: + env._cache_egg.add(self) + super(EggInfoDistribution, self).__init__(metadata, path, env) + + def _get_metadata(self, path): + requires = None + + def parse_requires_data(data): + """Create a list of dependencies from a requires.txt file. + + *data*: the contents of a setuptools-produced requires.txt file. + """ + reqs = [] + lines = data.splitlines() + for line in lines: + line = line.strip() + if line.startswith('['): + logger.warning('Unexpected line: quitting requirement scan: %r', + line) + break + r = parse_requirement(line) + if not r: + logger.warning('Not recognised as a requirement: %r', line) + continue + if r.extras: + logger.warning('extra requirements in requires.txt are ' + 'not supported') + if not r.constraints: + reqs.append(r.name) + else: + cons = ', '.join('%s%s' % c for c in r.constraints) + reqs.append('%s (%s)' % (r.name, cons)) + return reqs + + def parse_requires_path(req_path): + """Create a list of dependencies from a requires.txt file. + + *req_path*: the path to a setuptools-produced requires.txt file. + """ + + reqs = [] + try: + with codecs.open(req_path, 'r', 'utf-8') as fp: + reqs = parse_requires_data(fp.read()) + except IOError: + pass + return reqs + + tl_path = tl_data = None + if path.endswith('.egg'): + if os.path.isdir(path): + p = os.path.join(path, 'EGG-INFO') + meta_path = os.path.join(p, 'PKG-INFO') + metadata = Metadata(path=meta_path, scheme='legacy') + req_path = os.path.join(p, 'requires.txt') + tl_path = os.path.join(p, 'top_level.txt') + requires = parse_requires_path(req_path) + else: + # FIXME handle the case where zipfile is not available + zipf = zipimport.zipimporter(path) + fileobj = StringIO( + zipf.get_data('EGG-INFO/PKG-INFO').decode('utf8')) + metadata = Metadata(fileobj=fileobj, scheme='legacy') + try: + data = zipf.get_data('EGG-INFO/requires.txt') + tl_data = zipf.get_data('EGG-INFO/top_level.txt').decode('utf-8') + requires = parse_requires_data(data.decode('utf-8')) + except IOError: + requires = None + elif path.endswith('.egg-info'): + if os.path.isdir(path): + req_path = os.path.join(path, 'requires.txt') + requires = parse_requires_path(req_path) + path = os.path.join(path, 'PKG-INFO') + tl_path = os.path.join(path, 'top_level.txt') + metadata = Metadata(path=path, scheme='legacy') + else: + raise DistlibException('path must end with .egg-info or .egg, ' + 'got %r' % path) + + if requires: + metadata.add_requirements(requires) + # look for top-level modules in top_level.txt, if present + if tl_data is None: + if tl_path is not None and os.path.exists(tl_path): + with open(tl_path, 'rb') as f: + tl_data = f.read().decode('utf-8') + if not tl_data: + tl_data = [] + else: + tl_data = tl_data.splitlines() + self.modules = tl_data + return metadata + + def __repr__(self): + return '' % ( + self.name, self.version, self.path) + + def __str__(self): + return "%s %s" % (self.name, self.version) + + def check_installed_files(self): + """ + Checks that the hashes and sizes of the files in ``RECORD`` are + matched by the files themselves. Returns a (possibly empty) list of + mismatches. Each entry in the mismatch list will be a tuple consisting + of the path, 'exists', 'size' or 'hash' according to what didn't match + (existence is checked first, then size, then hash), the expected + value and the actual value. + """ + mismatches = [] + record_path = os.path.join(self.path, 'installed-files.txt') + if os.path.exists(record_path): + for path, _, _ in self.list_installed_files(): + if path == record_path: + continue + if not os.path.exists(path): + mismatches.append((path, 'exists', True, False)) + return mismatches + + def list_installed_files(self): + """ + Iterates over the ``installed-files.txt`` entries and returns a tuple + ``(path, hash, size)`` for each line. + + :returns: a list of (path, hash, size) + """ + + def _md5(path): + f = open(path, 'rb') + try: + content = f.read() + finally: + f.close() + return hashlib.md5(content).hexdigest() + + def _size(path): + return os.stat(path).st_size + + record_path = os.path.join(self.path, 'installed-files.txt') + result = [] + if os.path.exists(record_path): + with codecs.open(record_path, 'r', encoding='utf-8') as f: + for line in f: + line = line.strip() + p = os.path.normpath(os.path.join(self.path, line)) + # "./" is present as a marker between installed files + # and installation metadata files + if not os.path.exists(p): + logger.warning('Non-existent file: %s', p) + if p.endswith(('.pyc', '.pyo')): + continue + #otherwise fall through and fail + if not os.path.isdir(p): + result.append((p, _md5(p), _size(p))) + result.append((record_path, None, None)) + return result + + def list_distinfo_files(self, absolute=False): + """ + Iterates over the ``installed-files.txt`` entries and returns paths for + each line if the path is pointing to a file located in the + ``.egg-info`` directory or one of its subdirectories. + + :parameter absolute: If *absolute* is ``True``, each returned path is + transformed into a local absolute path. Otherwise the + raw value from ``installed-files.txt`` is returned. + :type absolute: boolean + :returns: iterator of paths + """ + record_path = os.path.join(self.path, 'installed-files.txt') + if os.path.exists(record_path): + skip = True + with codecs.open(record_path, 'r', encoding='utf-8') as f: + for line in f: + line = line.strip() + if line == './': + skip = False + continue + if not skip: + p = os.path.normpath(os.path.join(self.path, line)) + if p.startswith(self.path): + if absolute: + yield p + else: + yield line + + def __eq__(self, other): + return (isinstance(other, EggInfoDistribution) and + self.path == other.path) + + # See http://docs.python.org/reference/datamodel#object.__hash__ + __hash__ = object.__hash__ + +new_dist_class = InstalledDistribution +old_dist_class = EggInfoDistribution + + +class DependencyGraph(object): + """ + Represents a dependency graph between distributions. + + The dependency relationships are stored in an ``adjacency_list`` that maps + distributions to a list of ``(other, label)`` tuples where ``other`` + is a distribution and the edge is labeled with ``label`` (i.e. the version + specifier, if such was provided). Also, for more efficient traversal, for + every distribution ``x``, a list of predecessors is kept in + ``reverse_list[x]``. An edge from distribution ``a`` to + distribution ``b`` means that ``a`` depends on ``b``. If any missing + dependencies are found, they are stored in ``missing``, which is a + dictionary that maps distributions to a list of requirements that were not + provided by any other distributions. + """ + + def __init__(self): + self.adjacency_list = {} + self.reverse_list = {} + self.missing = {} + + def add_distribution(self, distribution): + """Add the *distribution* to the graph. + + :type distribution: :class:`distutils2.database.InstalledDistribution` + or :class:`distutils2.database.EggInfoDistribution` + """ + self.adjacency_list[distribution] = [] + self.reverse_list[distribution] = [] + #self.missing[distribution] = [] + + def add_edge(self, x, y, label=None): + """Add an edge from distribution *x* to distribution *y* with the given + *label*. + + :type x: :class:`distutils2.database.InstalledDistribution` or + :class:`distutils2.database.EggInfoDistribution` + :type y: :class:`distutils2.database.InstalledDistribution` or + :class:`distutils2.database.EggInfoDistribution` + :type label: ``str`` or ``None`` + """ + self.adjacency_list[x].append((y, label)) + # multiple edges are allowed, so be careful + if x not in self.reverse_list[y]: + self.reverse_list[y].append(x) + + def add_missing(self, distribution, requirement): + """ + Add a missing *requirement* for the given *distribution*. + + :type distribution: :class:`distutils2.database.InstalledDistribution` + or :class:`distutils2.database.EggInfoDistribution` + :type requirement: ``str`` + """ + logger.debug('%s missing %r', distribution, requirement) + self.missing.setdefault(distribution, []).append(requirement) + + def _repr_dist(self, dist): + return '%s %s' % (dist.name, dist.version) + + def repr_node(self, dist, level=1): + """Prints only a subgraph""" + output = [self._repr_dist(dist)] + for other, label in self.adjacency_list[dist]: + dist = self._repr_dist(other) + if label is not None: + dist = '%s [%s]' % (dist, label) + output.append(' ' * level + str(dist)) + suboutput = self.repr_node(other, level + 1) + subs = suboutput.split('\n') + output.extend(subs[1:]) + return '\n'.join(output) + + def to_dot(self, f, skip_disconnected=True): + """Writes a DOT output for the graph to the provided file *f*. + + If *skip_disconnected* is set to ``True``, then all distributions + that are not dependent on any other distribution are skipped. + + :type f: has to support ``file``-like operations + :type skip_disconnected: ``bool`` + """ + disconnected = [] + + f.write("digraph dependencies {\n") + for dist, adjs in self.adjacency_list.items(): + if len(adjs) == 0 and not skip_disconnected: + disconnected.append(dist) + for other, label in adjs: + if not label is None: + f.write('"%s" -> "%s" [label="%s"]\n' % + (dist.name, other.name, label)) + else: + f.write('"%s" -> "%s"\n' % (dist.name, other.name)) + if not skip_disconnected and len(disconnected) > 0: + f.write('subgraph disconnected {\n') + f.write('label = "Disconnected"\n') + f.write('bgcolor = red\n') + + for dist in disconnected: + f.write('"%s"' % dist.name) + f.write('\n') + f.write('}\n') + f.write('}\n') + + def topological_sort(self): + """ + Perform a topological sort of the graph. + :return: A tuple, the first element of which is a topologically sorted + list of distributions, and the second element of which is a + list of distributions that cannot be sorted because they have + circular dependencies and so form a cycle. + """ + result = [] + # Make a shallow copy of the adjacency list + alist = {} + for k, v in self.adjacency_list.items(): + alist[k] = v[:] + while True: + # See what we can remove in this run + to_remove = [] + for k, v in list(alist.items())[:]: + if not v: + to_remove.append(k) + del alist[k] + if not to_remove: + # What's left in alist (if anything) is a cycle. + break + # Remove from the adjacency list of others + for k, v in alist.items(): + alist[k] = [(d, r) for d, r in v if d not in to_remove] + logger.debug('Moving to result: %s', + ['%s (%s)' % (d.name, d.version) for d in to_remove]) + result.extend(to_remove) + return result, list(alist.keys()) + + def __repr__(self): + """Representation of the graph""" + output = [] + for dist, adjs in self.adjacency_list.items(): + output.append(self.repr_node(dist)) + return '\n'.join(output) + + +def make_graph(dists, scheme='default'): + """Makes a dependency graph from the given distributions. + + :parameter dists: a list of distributions + :type dists: list of :class:`distutils2.database.InstalledDistribution` and + :class:`distutils2.database.EggInfoDistribution` instances + :rtype: a :class:`DependencyGraph` instance + """ + scheme = get_scheme(scheme) + graph = DependencyGraph() + provided = {} # maps names to lists of (version, dist) tuples + + # first, build the graph and find out what's provided + for dist in dists: + graph.add_distribution(dist) + + for p in dist.provides: + name, version = parse_name_and_version(p) + logger.debug('Add to provided: %s, %s, %s', name, version, dist) + provided.setdefault(name, []).append((version, dist)) + + # now make the edges + for dist in dists: + requires = (dist.run_requires | dist.meta_requires | + dist.build_requires | dist.dev_requires) + for req in requires: + try: + matcher = scheme.matcher(req) + except UnsupportedVersionError: + # XXX compat-mode if cannot read the version + logger.warning('could not read version %r - using name only', + req) + name = req.split()[0] + matcher = scheme.matcher(name) + + name = matcher.key # case-insensitive + + matched = False + if name in provided: + for version, provider in provided[name]: + try: + match = matcher.match(version) + except UnsupportedVersionError: + match = False + + if match: + graph.add_edge(dist, provider, req) + matched = True + break + if not matched: + graph.add_missing(dist, req) + return graph + + +def get_dependent_dists(dists, dist): + """Recursively generate a list of distributions from *dists* that are + dependent on *dist*. + + :param dists: a list of distributions + :param dist: a distribution, member of *dists* for which we are interested + """ + if dist not in dists: + raise DistlibException('given distribution %r is not a member ' + 'of the list' % dist.name) + graph = make_graph(dists) + + dep = [dist] # dependent distributions + todo = graph.reverse_list[dist] # list of nodes we should inspect + + while todo: + d = todo.pop() + dep.append(d) + for succ in graph.reverse_list[d]: + if succ not in dep: + todo.append(succ) + + dep.pop(0) # remove dist from dep, was there to prevent infinite loops + return dep + + +def get_required_dists(dists, dist): + """Recursively generate a list of distributions from *dists* that are + required by *dist*. + + :param dists: a list of distributions + :param dist: a distribution, member of *dists* for which we are interested + """ + if dist not in dists: + raise DistlibException('given distribution %r is not a member ' + 'of the list' % dist.name) + graph = make_graph(dists) + + req = [] # required distributions + todo = graph.adjacency_list[dist] # list of nodes we should inspect + + while todo: + d = todo.pop()[0] + req.append(d) + for pred in graph.adjacency_list[d]: + if pred not in req: + todo.append(pred) + + return req + + +def make_dist(name, version, **kwargs): + """ + A convenience method for making a dist given just a name and version. + """ + summary = kwargs.pop('summary', 'Placeholder for summary') + md = Metadata(**kwargs) + md.name = name + md.version = version + md.summary = summary or 'Placeholder for summary' + return Distribution(md) diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/distlib/index.py b/venv.bak/lib/python3.7/site-packages/pip/_vendor/distlib/index.py new file mode 100644 index 0000000..7a87cdc --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_vendor/distlib/index.py @@ -0,0 +1,516 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2013 Vinay Sajip. +# Licensed to the Python Software Foundation under a contributor agreement. +# See LICENSE.txt and CONTRIBUTORS.txt. +# +import hashlib +import logging +import os +import shutil +import subprocess +import tempfile +try: + from threading import Thread +except ImportError: + from dummy_threading import Thread + +from . import DistlibException +from .compat import (HTTPBasicAuthHandler, Request, HTTPPasswordMgr, + urlparse, build_opener, string_types) +from .util import cached_property, zip_dir, ServerProxy + +logger = logging.getLogger(__name__) + +DEFAULT_INDEX = 'https://pypi.org/pypi' +DEFAULT_REALM = 'pypi' + +class PackageIndex(object): + """ + This class represents a package index compatible with PyPI, the Python + Package Index. + """ + + boundary = b'----------ThIs_Is_tHe_distlib_index_bouNdaRY_$' + + def __init__(self, url=None): + """ + Initialise an instance. + + :param url: The URL of the index. If not specified, the URL for PyPI is + used. + """ + self.url = url or DEFAULT_INDEX + self.read_configuration() + scheme, netloc, path, params, query, frag = urlparse(self.url) + if params or query or frag or scheme not in ('http', 'https'): + raise DistlibException('invalid repository: %s' % self.url) + self.password_handler = None + self.ssl_verifier = None + self.gpg = None + self.gpg_home = None + with open(os.devnull, 'w') as sink: + # Use gpg by default rather than gpg2, as gpg2 insists on + # prompting for passwords + for s in ('gpg', 'gpg2'): + try: + rc = subprocess.check_call([s, '--version'], stdout=sink, + stderr=sink) + if rc == 0: + self.gpg = s + break + except OSError: + pass + + def _get_pypirc_command(self): + """ + Get the distutils command for interacting with PyPI configurations. + :return: the command. + """ + from distutils.core import Distribution + from distutils.config import PyPIRCCommand + d = Distribution() + return PyPIRCCommand(d) + + def read_configuration(self): + """ + Read the PyPI access configuration as supported by distutils, getting + PyPI to do the actual work. This populates ``username``, ``password``, + ``realm`` and ``url`` attributes from the configuration. + """ + # get distutils to do the work + c = self._get_pypirc_command() + c.repository = self.url + cfg = c._read_pypirc() + self.username = cfg.get('username') + self.password = cfg.get('password') + self.realm = cfg.get('realm', 'pypi') + self.url = cfg.get('repository', self.url) + + def save_configuration(self): + """ + Save the PyPI access configuration. You must have set ``username`` and + ``password`` attributes before calling this method. + + Again, distutils is used to do the actual work. + """ + self.check_credentials() + # get distutils to do the work + c = self._get_pypirc_command() + c._store_pypirc(self.username, self.password) + + def check_credentials(self): + """ + Check that ``username`` and ``password`` have been set, and raise an + exception if not. + """ + if self.username is None or self.password is None: + raise DistlibException('username and password must be set') + pm = HTTPPasswordMgr() + _, netloc, _, _, _, _ = urlparse(self.url) + pm.add_password(self.realm, netloc, self.username, self.password) + self.password_handler = HTTPBasicAuthHandler(pm) + + def register(self, metadata): + """ + Register a distribution on PyPI, using the provided metadata. + + :param metadata: A :class:`Metadata` instance defining at least a name + and version number for the distribution to be + registered. + :return: The HTTP response received from PyPI upon submission of the + request. + """ + self.check_credentials() + metadata.validate() + d = metadata.todict() + d[':action'] = 'verify' + request = self.encode_request(d.items(), []) + response = self.send_request(request) + d[':action'] = 'submit' + request = self.encode_request(d.items(), []) + return self.send_request(request) + + def _reader(self, name, stream, outbuf): + """ + Thread runner for reading lines of from a subprocess into a buffer. + + :param name: The logical name of the stream (used for logging only). + :param stream: The stream to read from. This will typically a pipe + connected to the output stream of a subprocess. + :param outbuf: The list to append the read lines to. + """ + while True: + s = stream.readline() + if not s: + break + s = s.decode('utf-8').rstrip() + outbuf.append(s) + logger.debug('%s: %s' % (name, s)) + stream.close() + + def get_sign_command(self, filename, signer, sign_password, + keystore=None): + """ + Return a suitable command for signing a file. + + :param filename: The pathname to the file to be signed. + :param signer: The identifier of the signer of the file. + :param sign_password: The passphrase for the signer's + private key used for signing. + :param keystore: The path to a directory which contains the keys + used in verification. If not specified, the + instance's ``gpg_home`` attribute is used instead. + :return: The signing command as a list suitable to be + passed to :class:`subprocess.Popen`. + """ + cmd = [self.gpg, '--status-fd', '2', '--no-tty'] + if keystore is None: + keystore = self.gpg_home + if keystore: + cmd.extend(['--homedir', keystore]) + if sign_password is not None: + cmd.extend(['--batch', '--passphrase-fd', '0']) + td = tempfile.mkdtemp() + sf = os.path.join(td, os.path.basename(filename) + '.asc') + cmd.extend(['--detach-sign', '--armor', '--local-user', + signer, '--output', sf, filename]) + logger.debug('invoking: %s', ' '.join(cmd)) + return cmd, sf + + def run_command(self, cmd, input_data=None): + """ + Run a command in a child process , passing it any input data specified. + + :param cmd: The command to run. + :param input_data: If specified, this must be a byte string containing + data to be sent to the child process. + :return: A tuple consisting of the subprocess' exit code, a list of + lines read from the subprocess' ``stdout``, and a list of + lines read from the subprocess' ``stderr``. + """ + kwargs = { + 'stdout': subprocess.PIPE, + 'stderr': subprocess.PIPE, + } + if input_data is not None: + kwargs['stdin'] = subprocess.PIPE + stdout = [] + stderr = [] + p = subprocess.Popen(cmd, **kwargs) + # We don't use communicate() here because we may need to + # get clever with interacting with the command + t1 = Thread(target=self._reader, args=('stdout', p.stdout, stdout)) + t1.start() + t2 = Thread(target=self._reader, args=('stderr', p.stderr, stderr)) + t2.start() + if input_data is not None: + p.stdin.write(input_data) + p.stdin.close() + + p.wait() + t1.join() + t2.join() + return p.returncode, stdout, stderr + + def sign_file(self, filename, signer, sign_password, keystore=None): + """ + Sign a file. + + :param filename: The pathname to the file to be signed. + :param signer: The identifier of the signer of the file. + :param sign_password: The passphrase for the signer's + private key used for signing. + :param keystore: The path to a directory which contains the keys + used in signing. If not specified, the instance's + ``gpg_home`` attribute is used instead. + :return: The absolute pathname of the file where the signature is + stored. + """ + cmd, sig_file = self.get_sign_command(filename, signer, sign_password, + keystore) + rc, stdout, stderr = self.run_command(cmd, + sign_password.encode('utf-8')) + if rc != 0: + raise DistlibException('sign command failed with error ' + 'code %s' % rc) + return sig_file + + def upload_file(self, metadata, filename, signer=None, sign_password=None, + filetype='sdist', pyversion='source', keystore=None): + """ + Upload a release file to the index. + + :param metadata: A :class:`Metadata` instance defining at least a name + and version number for the file to be uploaded. + :param filename: The pathname of the file to be uploaded. + :param signer: The identifier of the signer of the file. + :param sign_password: The passphrase for the signer's + private key used for signing. + :param filetype: The type of the file being uploaded. This is the + distutils command which produced that file, e.g. + ``sdist`` or ``bdist_wheel``. + :param pyversion: The version of Python which the release relates + to. For code compatible with any Python, this would + be ``source``, otherwise it would be e.g. ``3.2``. + :param keystore: The path to a directory which contains the keys + used in signing. If not specified, the instance's + ``gpg_home`` attribute is used instead. + :return: The HTTP response received from PyPI upon submission of the + request. + """ + self.check_credentials() + if not os.path.exists(filename): + raise DistlibException('not found: %s' % filename) + metadata.validate() + d = metadata.todict() + sig_file = None + if signer: + if not self.gpg: + logger.warning('no signing program available - not signed') + else: + sig_file = self.sign_file(filename, signer, sign_password, + keystore) + with open(filename, 'rb') as f: + file_data = f.read() + md5_digest = hashlib.md5(file_data).hexdigest() + sha256_digest = hashlib.sha256(file_data).hexdigest() + d.update({ + ':action': 'file_upload', + 'protocol_version': '1', + 'filetype': filetype, + 'pyversion': pyversion, + 'md5_digest': md5_digest, + 'sha256_digest': sha256_digest, + }) + files = [('content', os.path.basename(filename), file_data)] + if sig_file: + with open(sig_file, 'rb') as f: + sig_data = f.read() + files.append(('gpg_signature', os.path.basename(sig_file), + sig_data)) + shutil.rmtree(os.path.dirname(sig_file)) + request = self.encode_request(d.items(), files) + return self.send_request(request) + + def upload_documentation(self, metadata, doc_dir): + """ + Upload documentation to the index. + + :param metadata: A :class:`Metadata` instance defining at least a name + and version number for the documentation to be + uploaded. + :param doc_dir: The pathname of the directory which contains the + documentation. This should be the directory that + contains the ``index.html`` for the documentation. + :return: The HTTP response received from PyPI upon submission of the + request. + """ + self.check_credentials() + if not os.path.isdir(doc_dir): + raise DistlibException('not a directory: %r' % doc_dir) + fn = os.path.join(doc_dir, 'index.html') + if not os.path.exists(fn): + raise DistlibException('not found: %r' % fn) + metadata.validate() + name, version = metadata.name, metadata.version + zip_data = zip_dir(doc_dir).getvalue() + fields = [(':action', 'doc_upload'), + ('name', name), ('version', version)] + files = [('content', name, zip_data)] + request = self.encode_request(fields, files) + return self.send_request(request) + + def get_verify_command(self, signature_filename, data_filename, + keystore=None): + """ + Return a suitable command for verifying a file. + + :param signature_filename: The pathname to the file containing the + signature. + :param data_filename: The pathname to the file containing the + signed data. + :param keystore: The path to a directory which contains the keys + used in verification. If not specified, the + instance's ``gpg_home`` attribute is used instead. + :return: The verifying command as a list suitable to be + passed to :class:`subprocess.Popen`. + """ + cmd = [self.gpg, '--status-fd', '2', '--no-tty'] + if keystore is None: + keystore = self.gpg_home + if keystore: + cmd.extend(['--homedir', keystore]) + cmd.extend(['--verify', signature_filename, data_filename]) + logger.debug('invoking: %s', ' '.join(cmd)) + return cmd + + def verify_signature(self, signature_filename, data_filename, + keystore=None): + """ + Verify a signature for a file. + + :param signature_filename: The pathname to the file containing the + signature. + :param data_filename: The pathname to the file containing the + signed data. + :param keystore: The path to a directory which contains the keys + used in verification. If not specified, the + instance's ``gpg_home`` attribute is used instead. + :return: True if the signature was verified, else False. + """ + if not self.gpg: + raise DistlibException('verification unavailable because gpg ' + 'unavailable') + cmd = self.get_verify_command(signature_filename, data_filename, + keystore) + rc, stdout, stderr = self.run_command(cmd) + if rc not in (0, 1): + raise DistlibException('verify command failed with error ' + 'code %s' % rc) + return rc == 0 + + def download_file(self, url, destfile, digest=None, reporthook=None): + """ + This is a convenience method for downloading a file from an URL. + Normally, this will be a file from the index, though currently + no check is made for this (i.e. a file can be downloaded from + anywhere). + + The method is just like the :func:`urlretrieve` function in the + standard library, except that it allows digest computation to be + done during download and checking that the downloaded data + matched any expected value. + + :param url: The URL of the file to be downloaded (assumed to be + available via an HTTP GET request). + :param destfile: The pathname where the downloaded file is to be + saved. + :param digest: If specified, this must be a (hasher, value) + tuple, where hasher is the algorithm used (e.g. + ``'md5'``) and ``value`` is the expected value. + :param reporthook: The same as for :func:`urlretrieve` in the + standard library. + """ + if digest is None: + digester = None + logger.debug('No digest specified') + else: + if isinstance(digest, (list, tuple)): + hasher, digest = digest + else: + hasher = 'md5' + digester = getattr(hashlib, hasher)() + logger.debug('Digest specified: %s' % digest) + # The following code is equivalent to urlretrieve. + # We need to do it this way so that we can compute the + # digest of the file as we go. + with open(destfile, 'wb') as dfp: + # addinfourl is not a context manager on 2.x + # so we have to use try/finally + sfp = self.send_request(Request(url)) + try: + headers = sfp.info() + blocksize = 8192 + size = -1 + read = 0 + blocknum = 0 + if "content-length" in headers: + size = int(headers["Content-Length"]) + if reporthook: + reporthook(blocknum, blocksize, size) + while True: + block = sfp.read(blocksize) + if not block: + break + read += len(block) + dfp.write(block) + if digester: + digester.update(block) + blocknum += 1 + if reporthook: + reporthook(blocknum, blocksize, size) + finally: + sfp.close() + + # check that we got the whole file, if we can + if size >= 0 and read < size: + raise DistlibException( + 'retrieval incomplete: got only %d out of %d bytes' + % (read, size)) + # if we have a digest, it must match. + if digester: + actual = digester.hexdigest() + if digest != actual: + raise DistlibException('%s digest mismatch for %s: expected ' + '%s, got %s' % (hasher, destfile, + digest, actual)) + logger.debug('Digest verified: %s', digest) + + def send_request(self, req): + """ + Send a standard library :class:`Request` to PyPI and return its + response. + + :param req: The request to send. + :return: The HTTP response from PyPI (a standard library HTTPResponse). + """ + handlers = [] + if self.password_handler: + handlers.append(self.password_handler) + if self.ssl_verifier: + handlers.append(self.ssl_verifier) + opener = build_opener(*handlers) + return opener.open(req) + + def encode_request(self, fields, files): + """ + Encode fields and files for posting to an HTTP server. + + :param fields: The fields to send as a list of (fieldname, value) + tuples. + :param files: The files to send as a list of (fieldname, filename, + file_bytes) tuple. + """ + # Adapted from packaging, which in turn was adapted from + # http://code.activestate.com/recipes/146306 + + parts = [] + boundary = self.boundary + for k, values in fields: + if not isinstance(values, (list, tuple)): + values = [values] + + for v in values: + parts.extend(( + b'--' + boundary, + ('Content-Disposition: form-data; name="%s"' % + k).encode('utf-8'), + b'', + v.encode('utf-8'))) + for key, filename, value in files: + parts.extend(( + b'--' + boundary, + ('Content-Disposition: form-data; name="%s"; filename="%s"' % + (key, filename)).encode('utf-8'), + b'', + value)) + + parts.extend((b'--' + boundary + b'--', b'')) + + body = b'\r\n'.join(parts) + ct = b'multipart/form-data; boundary=' + boundary + headers = { + 'Content-type': ct, + 'Content-length': str(len(body)) + } + return Request(self.url, body, headers) + + def search(self, terms, operator=None): + if isinstance(terms, string_types): + terms = {'name': terms} + rpc_proxy = ServerProxy(self.url, timeout=3.0) + try: + return rpc_proxy.search(terms, operator or 'and') + finally: + rpc_proxy('close')() diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/distlib/locators.py b/venv.bak/lib/python3.7/site-packages/pip/_vendor/distlib/locators.py new file mode 100644 index 0000000..12a1d06 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_vendor/distlib/locators.py @@ -0,0 +1,1302 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2012-2015 Vinay Sajip. +# Licensed to the Python Software Foundation under a contributor agreement. +# See LICENSE.txt and CONTRIBUTORS.txt. +# + +import gzip +from io import BytesIO +import json +import logging +import os +import posixpath +import re +try: + import threading +except ImportError: # pragma: no cover + import dummy_threading as threading +import zlib + +from . import DistlibException +from .compat import (urljoin, urlparse, urlunparse, url2pathname, pathname2url, + queue, quote, unescape, string_types, build_opener, + HTTPRedirectHandler as BaseRedirectHandler, text_type, + Request, HTTPError, URLError) +from .database import Distribution, DistributionPath, make_dist +from .metadata import Metadata, MetadataInvalidError +from .util import (cached_property, parse_credentials, ensure_slash, + split_filename, get_project_data, parse_requirement, + parse_name_and_version, ServerProxy, normalize_name) +from .version import get_scheme, UnsupportedVersionError +from .wheel import Wheel, is_compatible + +logger = logging.getLogger(__name__) + +HASHER_HASH = re.compile(r'^(\w+)=([a-f0-9]+)') +CHARSET = re.compile(r';\s*charset\s*=\s*(.*)\s*$', re.I) +HTML_CONTENT_TYPE = re.compile('text/html|application/x(ht)?ml') +DEFAULT_INDEX = 'https://pypi.org/pypi' + +def get_all_distribution_names(url=None): + """ + Return all distribution names known by an index. + :param url: The URL of the index. + :return: A list of all known distribution names. + """ + if url is None: + url = DEFAULT_INDEX + client = ServerProxy(url, timeout=3.0) + try: + return client.list_packages() + finally: + client('close')() + +class RedirectHandler(BaseRedirectHandler): + """ + A class to work around a bug in some Python 3.2.x releases. + """ + # There's a bug in the base version for some 3.2.x + # (e.g. 3.2.2 on Ubuntu Oneiric). If a Location header + # returns e.g. /abc, it bails because it says the scheme '' + # is bogus, when actually it should use the request's + # URL for the scheme. See Python issue #13696. + def http_error_302(self, req, fp, code, msg, headers): + # Some servers (incorrectly) return multiple Location headers + # (so probably same goes for URI). Use first header. + newurl = None + for key in ('location', 'uri'): + if key in headers: + newurl = headers[key] + break + if newurl is None: # pragma: no cover + return + urlparts = urlparse(newurl) + if urlparts.scheme == '': + newurl = urljoin(req.get_full_url(), newurl) + if hasattr(headers, 'replace_header'): + headers.replace_header(key, newurl) + else: + headers[key] = newurl + return BaseRedirectHandler.http_error_302(self, req, fp, code, msg, + headers) + + http_error_301 = http_error_303 = http_error_307 = http_error_302 + +class Locator(object): + """ + A base class for locators - things that locate distributions. + """ + source_extensions = ('.tar.gz', '.tar.bz2', '.tar', '.zip', '.tgz', '.tbz') + binary_extensions = ('.egg', '.exe', '.whl') + excluded_extensions = ('.pdf',) + + # A list of tags indicating which wheels you want to match. The default + # value of None matches against the tags compatible with the running + # Python. If you want to match other values, set wheel_tags on a locator + # instance to a list of tuples (pyver, abi, arch) which you want to match. + wheel_tags = None + + downloadable_extensions = source_extensions + ('.whl',) + + def __init__(self, scheme='default'): + """ + Initialise an instance. + :param scheme: Because locators look for most recent versions, they + need to know the version scheme to use. This specifies + the current PEP-recommended scheme - use ``'legacy'`` + if you need to support existing distributions on PyPI. + """ + self._cache = {} + self.scheme = scheme + # Because of bugs in some of the handlers on some of the platforms, + # we use our own opener rather than just using urlopen. + self.opener = build_opener(RedirectHandler()) + # If get_project() is called from locate(), the matcher instance + # is set from the requirement passed to locate(). See issue #18 for + # why this can be useful to know. + self.matcher = None + self.errors = queue.Queue() + + def get_errors(self): + """ + Return any errors which have occurred. + """ + result = [] + while not self.errors.empty(): # pragma: no cover + try: + e = self.errors.get(False) + result.append(e) + except self.errors.Empty: + continue + self.errors.task_done() + return result + + def clear_errors(self): + """ + Clear any errors which may have been logged. + """ + # Just get the errors and throw them away + self.get_errors() + + def clear_cache(self): + self._cache.clear() + + def _get_scheme(self): + return self._scheme + + def _set_scheme(self, value): + self._scheme = value + + scheme = property(_get_scheme, _set_scheme) + + def _get_project(self, name): + """ + For a given project, get a dictionary mapping available versions to Distribution + instances. + + This should be implemented in subclasses. + + If called from a locate() request, self.matcher will be set to a + matcher for the requirement to satisfy, otherwise it will be None. + """ + raise NotImplementedError('Please implement in the subclass') + + def get_distribution_names(self): + """ + Return all the distribution names known to this locator. + """ + raise NotImplementedError('Please implement in the subclass') + + def get_project(self, name): + """ + For a given project, get a dictionary mapping available versions to Distribution + instances. + + This calls _get_project to do all the work, and just implements a caching layer on top. + """ + if self._cache is None: # pragma: no cover + result = self._get_project(name) + elif name in self._cache: + result = self._cache[name] + else: + self.clear_errors() + result = self._get_project(name) + self._cache[name] = result + return result + + def score_url(self, url): + """ + Give an url a score which can be used to choose preferred URLs + for a given project release. + """ + t = urlparse(url) + basename = posixpath.basename(t.path) + compatible = True + is_wheel = basename.endswith('.whl') + is_downloadable = basename.endswith(self.downloadable_extensions) + if is_wheel: + compatible = is_compatible(Wheel(basename), self.wheel_tags) + return (t.scheme == 'https', 'pypi.org' in t.netloc, + is_downloadable, is_wheel, compatible, basename) + + def prefer_url(self, url1, url2): + """ + Choose one of two URLs where both are candidates for distribution + archives for the same version of a distribution (for example, + .tar.gz vs. zip). + + The current implementation favours https:// URLs over http://, archives + from PyPI over those from other locations, wheel compatibility (if a + wheel) and then the archive name. + """ + result = url2 + if url1: + s1 = self.score_url(url1) + s2 = self.score_url(url2) + if s1 > s2: + result = url1 + if result != url2: + logger.debug('Not replacing %r with %r', url1, url2) + else: + logger.debug('Replacing %r with %r', url1, url2) + return result + + def split_filename(self, filename, project_name): + """ + Attempt to split a filename in project name, version and Python version. + """ + return split_filename(filename, project_name) + + def convert_url_to_download_info(self, url, project_name): + """ + See if a URL is a candidate for a download URL for a project (the URL + has typically been scraped from an HTML page). + + If it is, a dictionary is returned with keys "name", "version", + "filename" and "url"; otherwise, None is returned. + """ + def same_project(name1, name2): + return normalize_name(name1) == normalize_name(name2) + + result = None + scheme, netloc, path, params, query, frag = urlparse(url) + if frag.lower().startswith('egg='): # pragma: no cover + logger.debug('%s: version hint in fragment: %r', + project_name, frag) + m = HASHER_HASH.match(frag) + if m: + algo, digest = m.groups() + else: + algo, digest = None, None + origpath = path + if path and path[-1] == '/': # pragma: no cover + path = path[:-1] + if path.endswith('.whl'): + try: + wheel = Wheel(path) + if not is_compatible(wheel, self.wheel_tags): + logger.debug('Wheel not compatible: %s', path) + else: + if project_name is None: + include = True + else: + include = same_project(wheel.name, project_name) + if include: + result = { + 'name': wheel.name, + 'version': wheel.version, + 'filename': wheel.filename, + 'url': urlunparse((scheme, netloc, origpath, + params, query, '')), + 'python-version': ', '.join( + ['.'.join(list(v[2:])) for v in wheel.pyver]), + } + except Exception as e: # pragma: no cover + logger.warning('invalid path for wheel: %s', path) + elif not path.endswith(self.downloadable_extensions): # pragma: no cover + logger.debug('Not downloadable: %s', path) + else: # downloadable extension + path = filename = posixpath.basename(path) + for ext in self.downloadable_extensions: + if path.endswith(ext): + path = path[:-len(ext)] + t = self.split_filename(path, project_name) + if not t: # pragma: no cover + logger.debug('No match for project/version: %s', path) + else: + name, version, pyver = t + if not project_name or same_project(project_name, name): + result = { + 'name': name, + 'version': version, + 'filename': filename, + 'url': urlunparse((scheme, netloc, origpath, + params, query, '')), + #'packagetype': 'sdist', + } + if pyver: # pragma: no cover + result['python-version'] = pyver + break + if result and algo: + result['%s_digest' % algo] = digest + return result + + def _get_digest(self, info): + """ + Get a digest from a dictionary by looking at a "digests" dictionary + or keys of the form 'algo_digest'. + + Returns a 2-tuple (algo, digest) if found, else None. Currently + looks only for SHA256, then MD5. + """ + result = None + if 'digests' in info: + digests = info['digests'] + for algo in ('sha256', 'md5'): + if algo in digests: + result = (algo, digests[algo]) + break + if not result: + for algo in ('sha256', 'md5'): + key = '%s_digest' % algo + if key in info: + result = (algo, info[key]) + break + return result + + def _update_version_data(self, result, info): + """ + Update a result dictionary (the final result from _get_project) with a + dictionary for a specific version, which typically holds information + gleaned from a filename or URL for an archive for the distribution. + """ + name = info.pop('name') + version = info.pop('version') + if version in result: + dist = result[version] + md = dist.metadata + else: + dist = make_dist(name, version, scheme=self.scheme) + md = dist.metadata + dist.digest = digest = self._get_digest(info) + url = info['url'] + result['digests'][url] = digest + if md.source_url != info['url']: + md.source_url = self.prefer_url(md.source_url, url) + result['urls'].setdefault(version, set()).add(url) + dist.locator = self + result[version] = dist + + def locate(self, requirement, prereleases=False): + """ + Find the most recent distribution which matches the given + requirement. + + :param requirement: A requirement of the form 'foo (1.0)' or perhaps + 'foo (>= 1.0, < 2.0, != 1.3)' + :param prereleases: If ``True``, allow pre-release versions + to be located. Otherwise, pre-release versions + are not returned. + :return: A :class:`Distribution` instance, or ``None`` if no such + distribution could be located. + """ + result = None + r = parse_requirement(requirement) + if r is None: # pragma: no cover + raise DistlibException('Not a valid requirement: %r' % requirement) + scheme = get_scheme(self.scheme) + self.matcher = matcher = scheme.matcher(r.requirement) + logger.debug('matcher: %s (%s)', matcher, type(matcher).__name__) + versions = self.get_project(r.name) + if len(versions) > 2: # urls and digests keys are present + # sometimes, versions are invalid + slist = [] + vcls = matcher.version_class + for k in versions: + if k in ('urls', 'digests'): + continue + try: + if not matcher.match(k): + logger.debug('%s did not match %r', matcher, k) + else: + if prereleases or not vcls(k).is_prerelease: + slist.append(k) + else: + logger.debug('skipping pre-release ' + 'version %s of %s', k, matcher.name) + except Exception: # pragma: no cover + logger.warning('error matching %s with %r', matcher, k) + pass # slist.append(k) + if len(slist) > 1: + slist = sorted(slist, key=scheme.key) + if slist: + logger.debug('sorted list: %s', slist) + version = slist[-1] + result = versions[version] + if result: + if r.extras: + result.extras = r.extras + result.download_urls = versions.get('urls', {}).get(version, set()) + d = {} + sd = versions.get('digests', {}) + for url in result.download_urls: + if url in sd: # pragma: no cover + d[url] = sd[url] + result.digests = d + self.matcher = None + return result + + +class PyPIRPCLocator(Locator): + """ + This locator uses XML-RPC to locate distributions. It therefore + cannot be used with simple mirrors (that only mirror file content). + """ + def __init__(self, url, **kwargs): + """ + Initialise an instance. + + :param url: The URL to use for XML-RPC. + :param kwargs: Passed to the superclass constructor. + """ + super(PyPIRPCLocator, self).__init__(**kwargs) + self.base_url = url + self.client = ServerProxy(url, timeout=3.0) + + def get_distribution_names(self): + """ + Return all the distribution names known to this locator. + """ + return set(self.client.list_packages()) + + def _get_project(self, name): + result = {'urls': {}, 'digests': {}} + versions = self.client.package_releases(name, True) + for v in versions: + urls = self.client.release_urls(name, v) + data = self.client.release_data(name, v) + metadata = Metadata(scheme=self.scheme) + metadata.name = data['name'] + metadata.version = data['version'] + metadata.license = data.get('license') + metadata.keywords = data.get('keywords', []) + metadata.summary = data.get('summary') + dist = Distribution(metadata) + if urls: + info = urls[0] + metadata.source_url = info['url'] + dist.digest = self._get_digest(info) + dist.locator = self + result[v] = dist + for info in urls: + url = info['url'] + digest = self._get_digest(info) + result['urls'].setdefault(v, set()).add(url) + result['digests'][url] = digest + return result + +class PyPIJSONLocator(Locator): + """ + This locator uses PyPI's JSON interface. It's very limited in functionality + and probably not worth using. + """ + def __init__(self, url, **kwargs): + super(PyPIJSONLocator, self).__init__(**kwargs) + self.base_url = ensure_slash(url) + + def get_distribution_names(self): + """ + Return all the distribution names known to this locator. + """ + raise NotImplementedError('Not available from this locator') + + def _get_project(self, name): + result = {'urls': {}, 'digests': {}} + url = urljoin(self.base_url, '%s/json' % quote(name)) + try: + resp = self.opener.open(url) + data = resp.read().decode() # for now + d = json.loads(data) + md = Metadata(scheme=self.scheme) + data = d['info'] + md.name = data['name'] + md.version = data['version'] + md.license = data.get('license') + md.keywords = data.get('keywords', []) + md.summary = data.get('summary') + dist = Distribution(md) + dist.locator = self + urls = d['urls'] + result[md.version] = dist + for info in d['urls']: + url = info['url'] + dist.download_urls.add(url) + dist.digests[url] = self._get_digest(info) + result['urls'].setdefault(md.version, set()).add(url) + result['digests'][url] = self._get_digest(info) + # Now get other releases + for version, infos in d['releases'].items(): + if version == md.version: + continue # already done + omd = Metadata(scheme=self.scheme) + omd.name = md.name + omd.version = version + odist = Distribution(omd) + odist.locator = self + result[version] = odist + for info in infos: + url = info['url'] + odist.download_urls.add(url) + odist.digests[url] = self._get_digest(info) + result['urls'].setdefault(version, set()).add(url) + result['digests'][url] = self._get_digest(info) +# for info in urls: +# md.source_url = info['url'] +# dist.digest = self._get_digest(info) +# dist.locator = self +# for info in urls: +# url = info['url'] +# result['urls'].setdefault(md.version, set()).add(url) +# result['digests'][url] = self._get_digest(info) + except Exception as e: + self.errors.put(text_type(e)) + logger.exception('JSON fetch failed: %s', e) + return result + + +class Page(object): + """ + This class represents a scraped HTML page. + """ + # The following slightly hairy-looking regex just looks for the contents of + # an anchor link, which has an attribute "href" either immediately preceded + # or immediately followed by a "rel" attribute. The attribute values can be + # declared with double quotes, single quotes or no quotes - which leads to + # the length of the expression. + _href = re.compile(""" +(rel\\s*=\\s*(?:"(?P[^"]*)"|'(?P[^']*)'|(?P[^>\\s\n]*))\\s+)? +href\\s*=\\s*(?:"(?P[^"]*)"|'(?P[^']*)'|(?P[^>\\s\n]*)) +(\\s+rel\\s*=\\s*(?:"(?P[^"]*)"|'(?P[^']*)'|(?P[^>\\s\n]*)))? +""", re.I | re.S | re.X) + _base = re.compile(r"""]+)""", re.I | re.S) + + def __init__(self, data, url): + """ + Initialise an instance with the Unicode page contents and the URL they + came from. + """ + self.data = data + self.base_url = self.url = url + m = self._base.search(self.data) + if m: + self.base_url = m.group(1) + + _clean_re = re.compile(r'[^a-z0-9$&+,/:;=?@.#%_\\|-]', re.I) + + @cached_property + def links(self): + """ + Return the URLs of all the links on a page together with information + about their "rel" attribute, for determining which ones to treat as + downloads and which ones to queue for further scraping. + """ + def clean(url): + "Tidy up an URL." + scheme, netloc, path, params, query, frag = urlparse(url) + return urlunparse((scheme, netloc, quote(path), + params, query, frag)) + + result = set() + for match in self._href.finditer(self.data): + d = match.groupdict('') + rel = (d['rel1'] or d['rel2'] or d['rel3'] or + d['rel4'] or d['rel5'] or d['rel6']) + url = d['url1'] or d['url2'] or d['url3'] + url = urljoin(self.base_url, url) + url = unescape(url) + url = self._clean_re.sub(lambda m: '%%%2x' % ord(m.group(0)), url) + result.add((url, rel)) + # We sort the result, hoping to bring the most recent versions + # to the front + result = sorted(result, key=lambda t: t[0], reverse=True) + return result + + +class SimpleScrapingLocator(Locator): + """ + A locator which scrapes HTML pages to locate downloads for a distribution. + This runs multiple threads to do the I/O; performance is at least as good + as pip's PackageFinder, which works in an analogous fashion. + """ + + # These are used to deal with various Content-Encoding schemes. + decoders = { + 'deflate': zlib.decompress, + 'gzip': lambda b: gzip.GzipFile(fileobj=BytesIO(d)).read(), + 'none': lambda b: b, + } + + def __init__(self, url, timeout=None, num_workers=10, **kwargs): + """ + Initialise an instance. + :param url: The root URL to use for scraping. + :param timeout: The timeout, in seconds, to be applied to requests. + This defaults to ``None`` (no timeout specified). + :param num_workers: The number of worker threads you want to do I/O, + This defaults to 10. + :param kwargs: Passed to the superclass. + """ + super(SimpleScrapingLocator, self).__init__(**kwargs) + self.base_url = ensure_slash(url) + self.timeout = timeout + self._page_cache = {} + self._seen = set() + self._to_fetch = queue.Queue() + self._bad_hosts = set() + self.skip_externals = False + self.num_workers = num_workers + self._lock = threading.RLock() + # See issue #45: we need to be resilient when the locator is used + # in a thread, e.g. with concurrent.futures. We can't use self._lock + # as it is for coordinating our internal threads - the ones created + # in _prepare_threads. + self._gplock = threading.RLock() + self.platform_check = False # See issue #112 + + def _prepare_threads(self): + """ + Threads are created only when get_project is called, and terminate + before it returns. They are there primarily to parallelise I/O (i.e. + fetching web pages). + """ + self._threads = [] + for i in range(self.num_workers): + t = threading.Thread(target=self._fetch) + t.setDaemon(True) + t.start() + self._threads.append(t) + + def _wait_threads(self): + """ + Tell all the threads to terminate (by sending a sentinel value) and + wait for them to do so. + """ + # Note that you need two loops, since you can't say which + # thread will get each sentinel + for t in self._threads: + self._to_fetch.put(None) # sentinel + for t in self._threads: + t.join() + self._threads = [] + + def _get_project(self, name): + result = {'urls': {}, 'digests': {}} + with self._gplock: + self.result = result + self.project_name = name + url = urljoin(self.base_url, '%s/' % quote(name)) + self._seen.clear() + self._page_cache.clear() + self._prepare_threads() + try: + logger.debug('Queueing %s', url) + self._to_fetch.put(url) + self._to_fetch.join() + finally: + self._wait_threads() + del self.result + return result + + platform_dependent = re.compile(r'\b(linux_(i\d86|x86_64|arm\w+)|' + r'win(32|_amd64)|macosx_?\d+)\b', re.I) + + def _is_platform_dependent(self, url): + """ + Does an URL refer to a platform-specific download? + """ + return self.platform_dependent.search(url) + + def _process_download(self, url): + """ + See if an URL is a suitable download for a project. + + If it is, register information in the result dictionary (for + _get_project) about the specific version it's for. + + Note that the return value isn't actually used other than as a boolean + value. + """ + if self.platform_check and self._is_platform_dependent(url): + info = None + else: + info = self.convert_url_to_download_info(url, self.project_name) + logger.debug('process_download: %s -> %s', url, info) + if info: + with self._lock: # needed because self.result is shared + self._update_version_data(self.result, info) + return info + + def _should_queue(self, link, referrer, rel): + """ + Determine whether a link URL from a referring page and with a + particular "rel" attribute should be queued for scraping. + """ + scheme, netloc, path, _, _, _ = urlparse(link) + if path.endswith(self.source_extensions + self.binary_extensions + + self.excluded_extensions): + result = False + elif self.skip_externals and not link.startswith(self.base_url): + result = False + elif not referrer.startswith(self.base_url): + result = False + elif rel not in ('homepage', 'download'): + result = False + elif scheme not in ('http', 'https', 'ftp'): + result = False + elif self._is_platform_dependent(link): + result = False + else: + host = netloc.split(':', 1)[0] + if host.lower() == 'localhost': + result = False + else: + result = True + logger.debug('should_queue: %s (%s) from %s -> %s', link, rel, + referrer, result) + return result + + def _fetch(self): + """ + Get a URL to fetch from the work queue, get the HTML page, examine its + links for download candidates and candidates for further scraping. + + This is a handy method to run in a thread. + """ + while True: + url = self._to_fetch.get() + try: + if url: + page = self.get_page(url) + if page is None: # e.g. after an error + continue + for link, rel in page.links: + if link not in self._seen: + try: + self._seen.add(link) + if (not self._process_download(link) and + self._should_queue(link, url, rel)): + logger.debug('Queueing %s from %s', link, url) + self._to_fetch.put(link) + except MetadataInvalidError: # e.g. invalid versions + pass + except Exception as e: # pragma: no cover + self.errors.put(text_type(e)) + finally: + # always do this, to avoid hangs :-) + self._to_fetch.task_done() + if not url: + #logger.debug('Sentinel seen, quitting.') + break + + def get_page(self, url): + """ + Get the HTML for an URL, possibly from an in-memory cache. + + XXX TODO Note: this cache is never actually cleared. It's assumed that + the data won't get stale over the lifetime of a locator instance (not + necessarily true for the default_locator). + """ + # http://peak.telecommunity.com/DevCenter/EasyInstall#package-index-api + scheme, netloc, path, _, _, _ = urlparse(url) + if scheme == 'file' and os.path.isdir(url2pathname(path)): + url = urljoin(ensure_slash(url), 'index.html') + + if url in self._page_cache: + result = self._page_cache[url] + logger.debug('Returning %s from cache: %s', url, result) + else: + host = netloc.split(':', 1)[0] + result = None + if host in self._bad_hosts: + logger.debug('Skipping %s due to bad host %s', url, host) + else: + req = Request(url, headers={'Accept-encoding': 'identity'}) + try: + logger.debug('Fetching %s', url) + resp = self.opener.open(req, timeout=self.timeout) + logger.debug('Fetched %s', url) + headers = resp.info() + content_type = headers.get('Content-Type', '') + if HTML_CONTENT_TYPE.match(content_type): + final_url = resp.geturl() + data = resp.read() + encoding = headers.get('Content-Encoding') + if encoding: + decoder = self.decoders[encoding] # fail if not found + data = decoder(data) + encoding = 'utf-8' + m = CHARSET.search(content_type) + if m: + encoding = m.group(1) + try: + data = data.decode(encoding) + except UnicodeError: # pragma: no cover + data = data.decode('latin-1') # fallback + result = Page(data, final_url) + self._page_cache[final_url] = result + except HTTPError as e: + if e.code != 404: + logger.exception('Fetch failed: %s: %s', url, e) + except URLError as e: # pragma: no cover + logger.exception('Fetch failed: %s: %s', url, e) + with self._lock: + self._bad_hosts.add(host) + except Exception as e: # pragma: no cover + logger.exception('Fetch failed: %s: %s', url, e) + finally: + self._page_cache[url] = result # even if None (failure) + return result + + _distname_re = re.compile(']*>([^<]+)<') + + def get_distribution_names(self): + """ + Return all the distribution names known to this locator. + """ + result = set() + page = self.get_page(self.base_url) + if not page: + raise DistlibException('Unable to get %s' % self.base_url) + for match in self._distname_re.finditer(page.data): + result.add(match.group(1)) + return result + +class DirectoryLocator(Locator): + """ + This class locates distributions in a directory tree. + """ + + def __init__(self, path, **kwargs): + """ + Initialise an instance. + :param path: The root of the directory tree to search. + :param kwargs: Passed to the superclass constructor, + except for: + * recursive - if True (the default), subdirectories are + recursed into. If False, only the top-level directory + is searched, + """ + self.recursive = kwargs.pop('recursive', True) + super(DirectoryLocator, self).__init__(**kwargs) + path = os.path.abspath(path) + if not os.path.isdir(path): # pragma: no cover + raise DistlibException('Not a directory: %r' % path) + self.base_dir = path + + def should_include(self, filename, parent): + """ + Should a filename be considered as a candidate for a distribution + archive? As well as the filename, the directory which contains it + is provided, though not used by the current implementation. + """ + return filename.endswith(self.downloadable_extensions) + + def _get_project(self, name): + result = {'urls': {}, 'digests': {}} + for root, dirs, files in os.walk(self.base_dir): + for fn in files: + if self.should_include(fn, root): + fn = os.path.join(root, fn) + url = urlunparse(('file', '', + pathname2url(os.path.abspath(fn)), + '', '', '')) + info = self.convert_url_to_download_info(url, name) + if info: + self._update_version_data(result, info) + if not self.recursive: + break + return result + + def get_distribution_names(self): + """ + Return all the distribution names known to this locator. + """ + result = set() + for root, dirs, files in os.walk(self.base_dir): + for fn in files: + if self.should_include(fn, root): + fn = os.path.join(root, fn) + url = urlunparse(('file', '', + pathname2url(os.path.abspath(fn)), + '', '', '')) + info = self.convert_url_to_download_info(url, None) + if info: + result.add(info['name']) + if not self.recursive: + break + return result + +class JSONLocator(Locator): + """ + This locator uses special extended metadata (not available on PyPI) and is + the basis of performant dependency resolution in distlib. Other locators + require archive downloads before dependencies can be determined! As you + might imagine, that can be slow. + """ + def get_distribution_names(self): + """ + Return all the distribution names known to this locator. + """ + raise NotImplementedError('Not available from this locator') + + def _get_project(self, name): + result = {'urls': {}, 'digests': {}} + data = get_project_data(name) + if data: + for info in data.get('files', []): + if info['ptype'] != 'sdist' or info['pyversion'] != 'source': + continue + # We don't store summary in project metadata as it makes + # the data bigger for no benefit during dependency + # resolution + dist = make_dist(data['name'], info['version'], + summary=data.get('summary', + 'Placeholder for summary'), + scheme=self.scheme) + md = dist.metadata + md.source_url = info['url'] + # TODO SHA256 digest + if 'digest' in info and info['digest']: + dist.digest = ('md5', info['digest']) + md.dependencies = info.get('requirements', {}) + dist.exports = info.get('exports', {}) + result[dist.version] = dist + result['urls'].setdefault(dist.version, set()).add(info['url']) + return result + +class DistPathLocator(Locator): + """ + This locator finds installed distributions in a path. It can be useful for + adding to an :class:`AggregatingLocator`. + """ + def __init__(self, distpath, **kwargs): + """ + Initialise an instance. + + :param distpath: A :class:`DistributionPath` instance to search. + """ + super(DistPathLocator, self).__init__(**kwargs) + assert isinstance(distpath, DistributionPath) + self.distpath = distpath + + def _get_project(self, name): + dist = self.distpath.get_distribution(name) + if dist is None: + result = {'urls': {}, 'digests': {}} + else: + result = { + dist.version: dist, + 'urls': {dist.version: set([dist.source_url])}, + 'digests': {dist.version: set([None])} + } + return result + + +class AggregatingLocator(Locator): + """ + This class allows you to chain and/or merge a list of locators. + """ + def __init__(self, *locators, **kwargs): + """ + Initialise an instance. + + :param locators: The list of locators to search. + :param kwargs: Passed to the superclass constructor, + except for: + * merge - if False (the default), the first successful + search from any of the locators is returned. If True, + the results from all locators are merged (this can be + slow). + """ + self.merge = kwargs.pop('merge', False) + self.locators = locators + super(AggregatingLocator, self).__init__(**kwargs) + + def clear_cache(self): + super(AggregatingLocator, self).clear_cache() + for locator in self.locators: + locator.clear_cache() + + def _set_scheme(self, value): + self._scheme = value + for locator in self.locators: + locator.scheme = value + + scheme = property(Locator.scheme.fget, _set_scheme) + + def _get_project(self, name): + result = {} + for locator in self.locators: + d = locator.get_project(name) + if d: + if self.merge: + files = result.get('urls', {}) + digests = result.get('digests', {}) + # next line could overwrite result['urls'], result['digests'] + result.update(d) + df = result.get('urls') + if files and df: + for k, v in files.items(): + if k in df: + df[k] |= v + else: + df[k] = v + dd = result.get('digests') + if digests and dd: + dd.update(digests) + else: + # See issue #18. If any dists are found and we're looking + # for specific constraints, we only return something if + # a match is found. For example, if a DirectoryLocator + # returns just foo (1.0) while we're looking for + # foo (>= 2.0), we'll pretend there was nothing there so + # that subsequent locators can be queried. Otherwise we + # would just return foo (1.0) which would then lead to a + # failure to find foo (>= 2.0), because other locators + # weren't searched. Note that this only matters when + # merge=False. + if self.matcher is None: + found = True + else: + found = False + for k in d: + if self.matcher.match(k): + found = True + break + if found: + result = d + break + return result + + def get_distribution_names(self): + """ + Return all the distribution names known to this locator. + """ + result = set() + for locator in self.locators: + try: + result |= locator.get_distribution_names() + except NotImplementedError: + pass + return result + + +# We use a legacy scheme simply because most of the dists on PyPI use legacy +# versions which don't conform to PEP 426 / PEP 440. +default_locator = AggregatingLocator( + JSONLocator(), + SimpleScrapingLocator('https://pypi.org/simple/', + timeout=3.0), + scheme='legacy') + +locate = default_locator.locate + +NAME_VERSION_RE = re.compile(r'(?P[\w-]+)\s*' + r'\(\s*(==\s*)?(?P[^)]+)\)$') + +class DependencyFinder(object): + """ + Locate dependencies for distributions. + """ + + def __init__(self, locator=None): + """ + Initialise an instance, using the specified locator + to locate distributions. + """ + self.locator = locator or default_locator + self.scheme = get_scheme(self.locator.scheme) + + def add_distribution(self, dist): + """ + Add a distribution to the finder. This will update internal information + about who provides what. + :param dist: The distribution to add. + """ + logger.debug('adding distribution %s', dist) + name = dist.key + self.dists_by_name[name] = dist + self.dists[(name, dist.version)] = dist + for p in dist.provides: + name, version = parse_name_and_version(p) + logger.debug('Add to provided: %s, %s, %s', name, version, dist) + self.provided.setdefault(name, set()).add((version, dist)) + + def remove_distribution(self, dist): + """ + Remove a distribution from the finder. This will update internal + information about who provides what. + :param dist: The distribution to remove. + """ + logger.debug('removing distribution %s', dist) + name = dist.key + del self.dists_by_name[name] + del self.dists[(name, dist.version)] + for p in dist.provides: + name, version = parse_name_and_version(p) + logger.debug('Remove from provided: %s, %s, %s', name, version, dist) + s = self.provided[name] + s.remove((version, dist)) + if not s: + del self.provided[name] + + def get_matcher(self, reqt): + """ + Get a version matcher for a requirement. + :param reqt: The requirement + :type reqt: str + :return: A version matcher (an instance of + :class:`distlib.version.Matcher`). + """ + try: + matcher = self.scheme.matcher(reqt) + except UnsupportedVersionError: # pragma: no cover + # XXX compat-mode if cannot read the version + name = reqt.split()[0] + matcher = self.scheme.matcher(name) + return matcher + + def find_providers(self, reqt): + """ + Find the distributions which can fulfill a requirement. + + :param reqt: The requirement. + :type reqt: str + :return: A set of distribution which can fulfill the requirement. + """ + matcher = self.get_matcher(reqt) + name = matcher.key # case-insensitive + result = set() + provided = self.provided + if name in provided: + for version, provider in provided[name]: + try: + match = matcher.match(version) + except UnsupportedVersionError: + match = False + + if match: + result.add(provider) + break + return result + + def try_to_replace(self, provider, other, problems): + """ + Attempt to replace one provider with another. This is typically used + when resolving dependencies from multiple sources, e.g. A requires + (B >= 1.0) while C requires (B >= 1.1). + + For successful replacement, ``provider`` must meet all the requirements + which ``other`` fulfills. + + :param provider: The provider we are trying to replace with. + :param other: The provider we're trying to replace. + :param problems: If False is returned, this will contain what + problems prevented replacement. This is currently + a tuple of the literal string 'cantreplace', + ``provider``, ``other`` and the set of requirements + that ``provider`` couldn't fulfill. + :return: True if we can replace ``other`` with ``provider``, else + False. + """ + rlist = self.reqts[other] + unmatched = set() + for s in rlist: + matcher = self.get_matcher(s) + if not matcher.match(provider.version): + unmatched.add(s) + if unmatched: + # can't replace other with provider + problems.add(('cantreplace', provider, other, + frozenset(unmatched))) + result = False + else: + # can replace other with provider + self.remove_distribution(other) + del self.reqts[other] + for s in rlist: + self.reqts.setdefault(provider, set()).add(s) + self.add_distribution(provider) + result = True + return result + + def find(self, requirement, meta_extras=None, prereleases=False): + """ + Find a distribution and all distributions it depends on. + + :param requirement: The requirement specifying the distribution to + find, or a Distribution instance. + :param meta_extras: A list of meta extras such as :test:, :build: and + so on. + :param prereleases: If ``True``, allow pre-release versions to be + returned - otherwise, don't return prereleases + unless they're all that's available. + + Return a set of :class:`Distribution` instances and a set of + problems. + + The distributions returned should be such that they have the + :attr:`required` attribute set to ``True`` if they were + from the ``requirement`` passed to ``find()``, and they have the + :attr:`build_time_dependency` attribute set to ``True`` unless they + are post-installation dependencies of the ``requirement``. + + The problems should be a tuple consisting of the string + ``'unsatisfied'`` and the requirement which couldn't be satisfied + by any distribution known to the locator. + """ + + self.provided = {} + self.dists = {} + self.dists_by_name = {} + self.reqts = {} + + meta_extras = set(meta_extras or []) + if ':*:' in meta_extras: + meta_extras.remove(':*:') + # :meta: and :run: are implicitly included + meta_extras |= set([':test:', ':build:', ':dev:']) + + if isinstance(requirement, Distribution): + dist = odist = requirement + logger.debug('passed %s as requirement', odist) + else: + dist = odist = self.locator.locate(requirement, + prereleases=prereleases) + if dist is None: + raise DistlibException('Unable to locate %r' % requirement) + logger.debug('located %s', odist) + dist.requested = True + problems = set() + todo = set([dist]) + install_dists = set([odist]) + while todo: + dist = todo.pop() + name = dist.key # case-insensitive + if name not in self.dists_by_name: + self.add_distribution(dist) + else: + #import pdb; pdb.set_trace() + other = self.dists_by_name[name] + if other != dist: + self.try_to_replace(dist, other, problems) + + ireqts = dist.run_requires | dist.meta_requires + sreqts = dist.build_requires + ereqts = set() + if meta_extras and dist in install_dists: + for key in ('test', 'build', 'dev'): + e = ':%s:' % key + if e in meta_extras: + ereqts |= getattr(dist, '%s_requires' % key) + all_reqts = ireqts | sreqts | ereqts + for r in all_reqts: + providers = self.find_providers(r) + if not providers: + logger.debug('No providers found for %r', r) + provider = self.locator.locate(r, prereleases=prereleases) + # If no provider is found and we didn't consider + # prereleases, consider them now. + if provider is None and not prereleases: + provider = self.locator.locate(r, prereleases=True) + if provider is None: + logger.debug('Cannot satisfy %r', r) + problems.add(('unsatisfied', r)) + else: + n, v = provider.key, provider.version + if (n, v) not in self.dists: + todo.add(provider) + providers.add(provider) + if r in ireqts and dist in install_dists: + install_dists.add(provider) + logger.debug('Adding %s to install_dists', + provider.name_and_version) + for p in providers: + name = p.key + if name not in self.dists_by_name: + self.reqts.setdefault(p, set()).add(r) + else: + other = self.dists_by_name[name] + if other != p: + # see if other can be replaced by p + self.try_to_replace(p, other, problems) + + dists = set(self.dists.values()) + for dist in dists: + dist.build_time_dependency = dist not in install_dists + if dist.build_time_dependency: + logger.debug('%s is a build-time dependency only.', + dist.name_and_version) + logger.debug('find done for %s', odist) + return dists, problems diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/distlib/manifest.py b/venv.bak/lib/python3.7/site-packages/pip/_vendor/distlib/manifest.py new file mode 100644 index 0000000..ca0fe44 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_vendor/distlib/manifest.py @@ -0,0 +1,393 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2012-2013 Python Software Foundation. +# See LICENSE.txt and CONTRIBUTORS.txt. +# +""" +Class representing the list of files in a distribution. + +Equivalent to distutils.filelist, but fixes some problems. +""" +import fnmatch +import logging +import os +import re +import sys + +from . import DistlibException +from .compat import fsdecode +from .util import convert_path + + +__all__ = ['Manifest'] + +logger = logging.getLogger(__name__) + +# a \ followed by some spaces + EOL +_COLLAPSE_PATTERN = re.compile('\\\\w*\n', re.M) +_COMMENTED_LINE = re.compile('#.*?(?=\n)|\n(?=$)', re.M | re.S) + +# +# Due to the different results returned by fnmatch.translate, we need +# to do slightly different processing for Python 2.7 and 3.2 ... this needed +# to be brought in for Python 3.6 onwards. +# +_PYTHON_VERSION = sys.version_info[:2] + +class Manifest(object): + """A list of files built by on exploring the filesystem and filtered by + applying various patterns to what we find there. + """ + + def __init__(self, base=None): + """ + Initialise an instance. + + :param base: The base directory to explore under. + """ + self.base = os.path.abspath(os.path.normpath(base or os.getcwd())) + self.prefix = self.base + os.sep + self.allfiles = None + self.files = set() + + # + # Public API + # + + def findall(self): + """Find all files under the base and set ``allfiles`` to the absolute + pathnames of files found. + """ + from stat import S_ISREG, S_ISDIR, S_ISLNK + + self.allfiles = allfiles = [] + root = self.base + stack = [root] + pop = stack.pop + push = stack.append + + while stack: + root = pop() + names = os.listdir(root) + + for name in names: + fullname = os.path.join(root, name) + + # Avoid excess stat calls -- just one will do, thank you! + stat = os.stat(fullname) + mode = stat.st_mode + if S_ISREG(mode): + allfiles.append(fsdecode(fullname)) + elif S_ISDIR(mode) and not S_ISLNK(mode): + push(fullname) + + def add(self, item): + """ + Add a file to the manifest. + + :param item: The pathname to add. This can be relative to the base. + """ + if not item.startswith(self.prefix): + item = os.path.join(self.base, item) + self.files.add(os.path.normpath(item)) + + def add_many(self, items): + """ + Add a list of files to the manifest. + + :param items: The pathnames to add. These can be relative to the base. + """ + for item in items: + self.add(item) + + def sorted(self, wantdirs=False): + """ + Return sorted files in directory order + """ + + def add_dir(dirs, d): + dirs.add(d) + logger.debug('add_dir added %s', d) + if d != self.base: + parent, _ = os.path.split(d) + assert parent not in ('', '/') + add_dir(dirs, parent) + + result = set(self.files) # make a copy! + if wantdirs: + dirs = set() + for f in result: + add_dir(dirs, os.path.dirname(f)) + result |= dirs + return [os.path.join(*path_tuple) for path_tuple in + sorted(os.path.split(path) for path in result)] + + def clear(self): + """Clear all collected files.""" + self.files = set() + self.allfiles = [] + + def process_directive(self, directive): + """ + Process a directive which either adds some files from ``allfiles`` to + ``files``, or removes some files from ``files``. + + :param directive: The directive to process. This should be in a format + compatible with distutils ``MANIFEST.in`` files: + + http://docs.python.org/distutils/sourcedist.html#commands + """ + # Parse the line: split it up, make sure the right number of words + # is there, and return the relevant words. 'action' is always + # defined: it's the first word of the line. Which of the other + # three are defined depends on the action; it'll be either + # patterns, (dir and patterns), or (dirpattern). + action, patterns, thedir, dirpattern = self._parse_directive(directive) + + # OK, now we know that the action is valid and we have the + # right number of words on the line for that action -- so we + # can proceed with minimal error-checking. + if action == 'include': + for pattern in patterns: + if not self._include_pattern(pattern, anchor=True): + logger.warning('no files found matching %r', pattern) + + elif action == 'exclude': + for pattern in patterns: + found = self._exclude_pattern(pattern, anchor=True) + #if not found: + # logger.warning('no previously-included files ' + # 'found matching %r', pattern) + + elif action == 'global-include': + for pattern in patterns: + if not self._include_pattern(pattern, anchor=False): + logger.warning('no files found matching %r ' + 'anywhere in distribution', pattern) + + elif action == 'global-exclude': + for pattern in patterns: + found = self._exclude_pattern(pattern, anchor=False) + #if not found: + # logger.warning('no previously-included files ' + # 'matching %r found anywhere in ' + # 'distribution', pattern) + + elif action == 'recursive-include': + for pattern in patterns: + if not self._include_pattern(pattern, prefix=thedir): + logger.warning('no files found matching %r ' + 'under directory %r', pattern, thedir) + + elif action == 'recursive-exclude': + for pattern in patterns: + found = self._exclude_pattern(pattern, prefix=thedir) + #if not found: + # logger.warning('no previously-included files ' + # 'matching %r found under directory %r', + # pattern, thedir) + + elif action == 'graft': + if not self._include_pattern(None, prefix=dirpattern): + logger.warning('no directories found matching %r', + dirpattern) + + elif action == 'prune': + if not self._exclude_pattern(None, prefix=dirpattern): + logger.warning('no previously-included directories found ' + 'matching %r', dirpattern) + else: # pragma: no cover + # This should never happen, as it should be caught in + # _parse_template_line + raise DistlibException( + 'invalid action %r' % action) + + # + # Private API + # + + def _parse_directive(self, directive): + """ + Validate a directive. + :param directive: The directive to validate. + :return: A tuple of action, patterns, thedir, dir_patterns + """ + words = directive.split() + if len(words) == 1 and words[0] not in ('include', 'exclude', + 'global-include', + 'global-exclude', + 'recursive-include', + 'recursive-exclude', + 'graft', 'prune'): + # no action given, let's use the default 'include' + words.insert(0, 'include') + + action = words[0] + patterns = thedir = dir_pattern = None + + if action in ('include', 'exclude', + 'global-include', 'global-exclude'): + if len(words) < 2: + raise DistlibException( + '%r expects ...' % action) + + patterns = [convert_path(word) for word in words[1:]] + + elif action in ('recursive-include', 'recursive-exclude'): + if len(words) < 3: + raise DistlibException( + '%r expects ...' % action) + + thedir = convert_path(words[1]) + patterns = [convert_path(word) for word in words[2:]] + + elif action in ('graft', 'prune'): + if len(words) != 2: + raise DistlibException( + '%r expects a single ' % action) + + dir_pattern = convert_path(words[1]) + + else: + raise DistlibException('unknown action %r' % action) + + return action, patterns, thedir, dir_pattern + + def _include_pattern(self, pattern, anchor=True, prefix=None, + is_regex=False): + """Select strings (presumably filenames) from 'self.files' that + match 'pattern', a Unix-style wildcard (glob) pattern. + + Patterns are not quite the same as implemented by the 'fnmatch' + module: '*' and '?' match non-special characters, where "special" + is platform-dependent: slash on Unix; colon, slash, and backslash on + DOS/Windows; and colon on Mac OS. + + If 'anchor' is true (the default), then the pattern match is more + stringent: "*.py" will match "foo.py" but not "foo/bar.py". If + 'anchor' is false, both of these will match. + + If 'prefix' is supplied, then only filenames starting with 'prefix' + (itself a pattern) and ending with 'pattern', with anything in between + them, will match. 'anchor' is ignored in this case. + + If 'is_regex' is true, 'anchor' and 'prefix' are ignored, and + 'pattern' is assumed to be either a string containing a regex or a + regex object -- no translation is done, the regex is just compiled + and used as-is. + + Selected strings will be added to self.files. + + Return True if files are found. + """ + # XXX docstring lying about what the special chars are? + found = False + pattern_re = self._translate_pattern(pattern, anchor, prefix, is_regex) + + # delayed loading of allfiles list + if self.allfiles is None: + self.findall() + + for name in self.allfiles: + if pattern_re.search(name): + self.files.add(name) + found = True + return found + + def _exclude_pattern(self, pattern, anchor=True, prefix=None, + is_regex=False): + """Remove strings (presumably filenames) from 'files' that match + 'pattern'. + + Other parameters are the same as for 'include_pattern()', above. + The list 'self.files' is modified in place. Return True if files are + found. + + This API is public to allow e.g. exclusion of SCM subdirs, e.g. when + packaging source distributions + """ + found = False + pattern_re = self._translate_pattern(pattern, anchor, prefix, is_regex) + for f in list(self.files): + if pattern_re.search(f): + self.files.remove(f) + found = True + return found + + def _translate_pattern(self, pattern, anchor=True, prefix=None, + is_regex=False): + """Translate a shell-like wildcard pattern to a compiled regular + expression. + + Return the compiled regex. If 'is_regex' true, + then 'pattern' is directly compiled to a regex (if it's a string) + or just returned as-is (assumes it's a regex object). + """ + if is_regex: + if isinstance(pattern, str): + return re.compile(pattern) + else: + return pattern + + if _PYTHON_VERSION > (3, 2): + # ditch start and end characters + start, _, end = self._glob_to_re('_').partition('_') + + if pattern: + pattern_re = self._glob_to_re(pattern) + if _PYTHON_VERSION > (3, 2): + assert pattern_re.startswith(start) and pattern_re.endswith(end) + else: + pattern_re = '' + + base = re.escape(os.path.join(self.base, '')) + if prefix is not None: + # ditch end of pattern character + if _PYTHON_VERSION <= (3, 2): + empty_pattern = self._glob_to_re('') + prefix_re = self._glob_to_re(prefix)[:-len(empty_pattern)] + else: + prefix_re = self._glob_to_re(prefix) + assert prefix_re.startswith(start) and prefix_re.endswith(end) + prefix_re = prefix_re[len(start): len(prefix_re) - len(end)] + sep = os.sep + if os.sep == '\\': + sep = r'\\' + if _PYTHON_VERSION <= (3, 2): + pattern_re = '^' + base + sep.join((prefix_re, + '.*' + pattern_re)) + else: + pattern_re = pattern_re[len(start): len(pattern_re) - len(end)] + pattern_re = r'%s%s%s%s.*%s%s' % (start, base, prefix_re, sep, + pattern_re, end) + else: # no prefix -- respect anchor flag + if anchor: + if _PYTHON_VERSION <= (3, 2): + pattern_re = '^' + base + pattern_re + else: + pattern_re = r'%s%s%s' % (start, base, pattern_re[len(start):]) + + return re.compile(pattern_re) + + def _glob_to_re(self, pattern): + """Translate a shell-like glob pattern to a regular expression. + + Return a string containing the regex. Differs from + 'fnmatch.translate()' in that '*' does not match "special characters" + (which are platform-specific). + """ + pattern_re = fnmatch.translate(pattern) + + # '?' and '*' in the glob pattern become '.' and '.*' in the RE, which + # IMHO is wrong -- '?' and '*' aren't supposed to match slash in Unix, + # and by extension they shouldn't match such "special characters" under + # any OS. So change all non-escaped dots in the RE to match any + # character except the special characters (currently: just os.sep). + sep = os.sep + if os.sep == '\\': + # we're using a regex to manipulate a regex, so we need + # to escape the backslash twice + sep = r'\\\\' + escaped = r'\1[^%s]' % sep + pattern_re = re.sub(r'((? y, + '!=': lambda x, y: x != y, + '<': lambda x, y: x < y, + '<=': lambda x, y: x == y or x < y, + '>': lambda x, y: x > y, + '>=': lambda x, y: x == y or x > y, + 'and': lambda x, y: x and y, + 'or': lambda x, y: x or y, + 'in': lambda x, y: x in y, + 'not in': lambda x, y: x not in y, + } + + def evaluate(self, expr, context): + """ + Evaluate a marker expression returned by the :func:`parse_requirement` + function in the specified context. + """ + if isinstance(expr, string_types): + if expr[0] in '\'"': + result = expr[1:-1] + else: + if expr not in context: + raise SyntaxError('unknown variable: %s' % expr) + result = context[expr] + else: + assert isinstance(expr, dict) + op = expr['op'] + if op not in self.operations: + raise NotImplementedError('op not implemented: %s' % op) + elhs = expr['lhs'] + erhs = expr['rhs'] + if _is_literal(expr['lhs']) and _is_literal(expr['rhs']): + raise SyntaxError('invalid comparison: %s %s %s' % (elhs, op, erhs)) + + lhs = self.evaluate(elhs, context) + rhs = self.evaluate(erhs, context) + result = self.operations[op](lhs, rhs) + return result + +def default_context(): + def format_full_version(info): + version = '%s.%s.%s' % (info.major, info.minor, info.micro) + kind = info.releaselevel + if kind != 'final': + version += kind[0] + str(info.serial) + return version + + if hasattr(sys, 'implementation'): + implementation_version = format_full_version(sys.implementation.version) + implementation_name = sys.implementation.name + else: + implementation_version = '0' + implementation_name = '' + + result = { + 'implementation_name': implementation_name, + 'implementation_version': implementation_version, + 'os_name': os.name, + 'platform_machine': platform.machine(), + 'platform_python_implementation': platform.python_implementation(), + 'platform_release': platform.release(), + 'platform_system': platform.system(), + 'platform_version': platform.version(), + 'platform_in_venv': str(in_venv()), + 'python_full_version': platform.python_version(), + 'python_version': platform.python_version()[:3], + 'sys_platform': sys.platform, + } + return result + +DEFAULT_CONTEXT = default_context() +del default_context + +evaluator = Evaluator() + +def interpret(marker, execution_context=None): + """ + Interpret a marker and return a result depending on environment. + + :param marker: The marker to interpret. + :type marker: str + :param execution_context: The context used for name lookup. + :type execution_context: mapping + """ + try: + expr, rest = parse_marker(marker) + except Exception as e: + raise SyntaxError('Unable to interpret marker syntax: %s: %s' % (marker, e)) + if rest and rest[0] != '#': + raise SyntaxError('unexpected trailing data in marker: %s: %s' % (marker, rest)) + context = dict(DEFAULT_CONTEXT) + if execution_context: + context.update(execution_context) + return evaluator.evaluate(expr, context) diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/distlib/metadata.py b/venv.bak/lib/python3.7/site-packages/pip/_vendor/distlib/metadata.py new file mode 100644 index 0000000..2d61378 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_vendor/distlib/metadata.py @@ -0,0 +1,1096 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2012 The Python Software Foundation. +# See LICENSE.txt and CONTRIBUTORS.txt. +# +"""Implementation of the Metadata for Python packages PEPs. + +Supports all metadata formats (1.0, 1.1, 1.2, and 2.0 experimental). +""" +from __future__ import unicode_literals + +import codecs +from email import message_from_file +import json +import logging +import re + + +from . import DistlibException, __version__ +from .compat import StringIO, string_types, text_type +from .markers import interpret +from .util import extract_by_key, get_extras +from .version import get_scheme, PEP440_VERSION_RE + +logger = logging.getLogger(__name__) + + +class MetadataMissingError(DistlibException): + """A required metadata is missing""" + + +class MetadataConflictError(DistlibException): + """Attempt to read or write metadata fields that are conflictual.""" + + +class MetadataUnrecognizedVersionError(DistlibException): + """Unknown metadata version number.""" + + +class MetadataInvalidError(DistlibException): + """A metadata value is invalid""" + +# public API of this module +__all__ = ['Metadata', 'PKG_INFO_ENCODING', 'PKG_INFO_PREFERRED_VERSION'] + +# Encoding used for the PKG-INFO files +PKG_INFO_ENCODING = 'utf-8' + +# preferred version. Hopefully will be changed +# to 1.2 once PEP 345 is supported everywhere +PKG_INFO_PREFERRED_VERSION = '1.1' + +_LINE_PREFIX_1_2 = re.compile('\n \\|') +_LINE_PREFIX_PRE_1_2 = re.compile('\n ') +_241_FIELDS = ('Metadata-Version', 'Name', 'Version', 'Platform', + 'Summary', 'Description', + 'Keywords', 'Home-page', 'Author', 'Author-email', + 'License') + +_314_FIELDS = ('Metadata-Version', 'Name', 'Version', 'Platform', + 'Supported-Platform', 'Summary', 'Description', + 'Keywords', 'Home-page', 'Author', 'Author-email', + 'License', 'Classifier', 'Download-URL', 'Obsoletes', + 'Provides', 'Requires') + +_314_MARKERS = ('Obsoletes', 'Provides', 'Requires', 'Classifier', + 'Download-URL') + +_345_FIELDS = ('Metadata-Version', 'Name', 'Version', 'Platform', + 'Supported-Platform', 'Summary', 'Description', + 'Keywords', 'Home-page', 'Author', 'Author-email', + 'Maintainer', 'Maintainer-email', 'License', + 'Classifier', 'Download-URL', 'Obsoletes-Dist', + 'Project-URL', 'Provides-Dist', 'Requires-Dist', + 'Requires-Python', 'Requires-External') + +_345_MARKERS = ('Provides-Dist', 'Requires-Dist', 'Requires-Python', + 'Obsoletes-Dist', 'Requires-External', 'Maintainer', + 'Maintainer-email', 'Project-URL') + +_426_FIELDS = ('Metadata-Version', 'Name', 'Version', 'Platform', + 'Supported-Platform', 'Summary', 'Description', + 'Keywords', 'Home-page', 'Author', 'Author-email', + 'Maintainer', 'Maintainer-email', 'License', + 'Classifier', 'Download-URL', 'Obsoletes-Dist', + 'Project-URL', 'Provides-Dist', 'Requires-Dist', + 'Requires-Python', 'Requires-External', 'Private-Version', + 'Obsoleted-By', 'Setup-Requires-Dist', 'Extension', + 'Provides-Extra') + +_426_MARKERS = ('Private-Version', 'Provides-Extra', 'Obsoleted-By', + 'Setup-Requires-Dist', 'Extension') + +# See issue #106: Sometimes 'Requires' and 'Provides' occur wrongly in +# the metadata. Include them in the tuple literal below to allow them +# (for now). +_566_FIELDS = _426_FIELDS + ('Description-Content-Type', + 'Requires', 'Provides') + +_566_MARKERS = ('Description-Content-Type',) + +_ALL_FIELDS = set() +_ALL_FIELDS.update(_241_FIELDS) +_ALL_FIELDS.update(_314_FIELDS) +_ALL_FIELDS.update(_345_FIELDS) +_ALL_FIELDS.update(_426_FIELDS) +_ALL_FIELDS.update(_566_FIELDS) + +EXTRA_RE = re.compile(r'''extra\s*==\s*("([^"]+)"|'([^']+)')''') + + +def _version2fieldlist(version): + if version == '1.0': + return _241_FIELDS + elif version == '1.1': + return _314_FIELDS + elif version == '1.2': + return _345_FIELDS + elif version in ('1.3', '2.1'): + return _345_FIELDS + _566_FIELDS + elif version == '2.0': + return _426_FIELDS + raise MetadataUnrecognizedVersionError(version) + + +def _best_version(fields): + """Detect the best version depending on the fields used.""" + def _has_marker(keys, markers): + for marker in markers: + if marker in keys: + return True + return False + + keys = [] + for key, value in fields.items(): + if value in ([], 'UNKNOWN', None): + continue + keys.append(key) + + possible_versions = ['1.0', '1.1', '1.2', '1.3', '2.0', '2.1'] + + # first let's try to see if a field is not part of one of the version + for key in keys: + if key not in _241_FIELDS and '1.0' in possible_versions: + possible_versions.remove('1.0') + logger.debug('Removed 1.0 due to %s', key) + if key not in _314_FIELDS and '1.1' in possible_versions: + possible_versions.remove('1.1') + logger.debug('Removed 1.1 due to %s', key) + if key not in _345_FIELDS and '1.2' in possible_versions: + possible_versions.remove('1.2') + logger.debug('Removed 1.2 due to %s', key) + if key not in _566_FIELDS and '1.3' in possible_versions: + possible_versions.remove('1.3') + logger.debug('Removed 1.3 due to %s', key) + if key not in _566_FIELDS and '2.1' in possible_versions: + if key != 'Description': # In 2.1, description allowed after headers + possible_versions.remove('2.1') + logger.debug('Removed 2.1 due to %s', key) + if key not in _426_FIELDS and '2.0' in possible_versions: + possible_versions.remove('2.0') + logger.debug('Removed 2.0 due to %s', key) + + # possible_version contains qualified versions + if len(possible_versions) == 1: + return possible_versions[0] # found ! + elif len(possible_versions) == 0: + logger.debug('Out of options - unknown metadata set: %s', fields) + raise MetadataConflictError('Unknown metadata set') + + # let's see if one unique marker is found + is_1_1 = '1.1' in possible_versions and _has_marker(keys, _314_MARKERS) + is_1_2 = '1.2' in possible_versions and _has_marker(keys, _345_MARKERS) + is_2_1 = '2.1' in possible_versions and _has_marker(keys, _566_MARKERS) + is_2_0 = '2.0' in possible_versions and _has_marker(keys, _426_MARKERS) + if int(is_1_1) + int(is_1_2) + int(is_2_1) + int(is_2_0) > 1: + raise MetadataConflictError('You used incompatible 1.1/1.2/2.0/2.1 fields') + + # we have the choice, 1.0, or 1.2, or 2.0 + # - 1.0 has a broken Summary field but works with all tools + # - 1.1 is to avoid + # - 1.2 fixes Summary but has little adoption + # - 2.0 adds more features and is very new + if not is_1_1 and not is_1_2 and not is_2_1 and not is_2_0: + # we couldn't find any specific marker + if PKG_INFO_PREFERRED_VERSION in possible_versions: + return PKG_INFO_PREFERRED_VERSION + if is_1_1: + return '1.1' + if is_1_2: + return '1.2' + if is_2_1: + return '2.1' + + return '2.0' + +_ATTR2FIELD = { + 'metadata_version': 'Metadata-Version', + 'name': 'Name', + 'version': 'Version', + 'platform': 'Platform', + 'supported_platform': 'Supported-Platform', + 'summary': 'Summary', + 'description': 'Description', + 'keywords': 'Keywords', + 'home_page': 'Home-page', + 'author': 'Author', + 'author_email': 'Author-email', + 'maintainer': 'Maintainer', + 'maintainer_email': 'Maintainer-email', + 'license': 'License', + 'classifier': 'Classifier', + 'download_url': 'Download-URL', + 'obsoletes_dist': 'Obsoletes-Dist', + 'provides_dist': 'Provides-Dist', + 'requires_dist': 'Requires-Dist', + 'setup_requires_dist': 'Setup-Requires-Dist', + 'requires_python': 'Requires-Python', + 'requires_external': 'Requires-External', + 'requires': 'Requires', + 'provides': 'Provides', + 'obsoletes': 'Obsoletes', + 'project_url': 'Project-URL', + 'private_version': 'Private-Version', + 'obsoleted_by': 'Obsoleted-By', + 'extension': 'Extension', + 'provides_extra': 'Provides-Extra', +} + +_PREDICATE_FIELDS = ('Requires-Dist', 'Obsoletes-Dist', 'Provides-Dist') +_VERSIONS_FIELDS = ('Requires-Python',) +_VERSION_FIELDS = ('Version',) +_LISTFIELDS = ('Platform', 'Classifier', 'Obsoletes', + 'Requires', 'Provides', 'Obsoletes-Dist', + 'Provides-Dist', 'Requires-Dist', 'Requires-External', + 'Project-URL', 'Supported-Platform', 'Setup-Requires-Dist', + 'Provides-Extra', 'Extension') +_LISTTUPLEFIELDS = ('Project-URL',) + +_ELEMENTSFIELD = ('Keywords',) + +_UNICODEFIELDS = ('Author', 'Maintainer', 'Summary', 'Description') + +_MISSING = object() + +_FILESAFE = re.compile('[^A-Za-z0-9.]+') + + +def _get_name_and_version(name, version, for_filename=False): + """Return the distribution name with version. + + If for_filename is true, return a filename-escaped form.""" + if for_filename: + # For both name and version any runs of non-alphanumeric or '.' + # characters are replaced with a single '-'. Additionally any + # spaces in the version string become '.' + name = _FILESAFE.sub('-', name) + version = _FILESAFE.sub('-', version.replace(' ', '.')) + return '%s-%s' % (name, version) + + +class LegacyMetadata(object): + """The legacy metadata of a release. + + Supports versions 1.0, 1.1 and 1.2 (auto-detected). You can + instantiate the class with one of these arguments (or none): + - *path*, the path to a metadata file + - *fileobj* give a file-like object with metadata as content + - *mapping* is a dict-like object + - *scheme* is a version scheme name + """ + # TODO document the mapping API and UNKNOWN default key + + def __init__(self, path=None, fileobj=None, mapping=None, + scheme='default'): + if [path, fileobj, mapping].count(None) < 2: + raise TypeError('path, fileobj and mapping are exclusive') + self._fields = {} + self.requires_files = [] + self._dependencies = None + self.scheme = scheme + if path is not None: + self.read(path) + elif fileobj is not None: + self.read_file(fileobj) + elif mapping is not None: + self.update(mapping) + self.set_metadata_version() + + def set_metadata_version(self): + self._fields['Metadata-Version'] = _best_version(self._fields) + + def _write_field(self, fileobj, name, value): + fileobj.write('%s: %s\n' % (name, value)) + + def __getitem__(self, name): + return self.get(name) + + def __setitem__(self, name, value): + return self.set(name, value) + + def __delitem__(self, name): + field_name = self._convert_name(name) + try: + del self._fields[field_name] + except KeyError: + raise KeyError(name) + + def __contains__(self, name): + return (name in self._fields or + self._convert_name(name) in self._fields) + + def _convert_name(self, name): + if name in _ALL_FIELDS: + return name + name = name.replace('-', '_').lower() + return _ATTR2FIELD.get(name, name) + + def _default_value(self, name): + if name in _LISTFIELDS or name in _ELEMENTSFIELD: + return [] + return 'UNKNOWN' + + def _remove_line_prefix(self, value): + if self.metadata_version in ('1.0', '1.1'): + return _LINE_PREFIX_PRE_1_2.sub('\n', value) + else: + return _LINE_PREFIX_1_2.sub('\n', value) + + def __getattr__(self, name): + if name in _ATTR2FIELD: + return self[name] + raise AttributeError(name) + + # + # Public API + # + +# dependencies = property(_get_dependencies, _set_dependencies) + + def get_fullname(self, filesafe=False): + """Return the distribution name with version. + + If filesafe is true, return a filename-escaped form.""" + return _get_name_and_version(self['Name'], self['Version'], filesafe) + + def is_field(self, name): + """return True if name is a valid metadata key""" + name = self._convert_name(name) + return name in _ALL_FIELDS + + def is_multi_field(self, name): + name = self._convert_name(name) + return name in _LISTFIELDS + + def read(self, filepath): + """Read the metadata values from a file path.""" + fp = codecs.open(filepath, 'r', encoding='utf-8') + try: + self.read_file(fp) + finally: + fp.close() + + def read_file(self, fileob): + """Read the metadata values from a file object.""" + msg = message_from_file(fileob) + self._fields['Metadata-Version'] = msg['metadata-version'] + + # When reading, get all the fields we can + for field in _ALL_FIELDS: + if field not in msg: + continue + if field in _LISTFIELDS: + # we can have multiple lines + values = msg.get_all(field) + if field in _LISTTUPLEFIELDS and values is not None: + values = [tuple(value.split(',')) for value in values] + self.set(field, values) + else: + # single line + value = msg[field] + if value is not None and value != 'UNKNOWN': + self.set(field, value) + # logger.debug('Attempting to set metadata for %s', self) + # self.set_metadata_version() + + def write(self, filepath, skip_unknown=False): + """Write the metadata fields to filepath.""" + fp = codecs.open(filepath, 'w', encoding='utf-8') + try: + self.write_file(fp, skip_unknown) + finally: + fp.close() + + def write_file(self, fileobject, skip_unknown=False): + """Write the PKG-INFO format data to a file object.""" + self.set_metadata_version() + + for field in _version2fieldlist(self['Metadata-Version']): + values = self.get(field) + if skip_unknown and values in ('UNKNOWN', [], ['UNKNOWN']): + continue + if field in _ELEMENTSFIELD: + self._write_field(fileobject, field, ','.join(values)) + continue + if field not in _LISTFIELDS: + if field == 'Description': + if self.metadata_version in ('1.0', '1.1'): + values = values.replace('\n', '\n ') + else: + values = values.replace('\n', '\n |') + values = [values] + + if field in _LISTTUPLEFIELDS: + values = [','.join(value) for value in values] + + for value in values: + self._write_field(fileobject, field, value) + + def update(self, other=None, **kwargs): + """Set metadata values from the given iterable `other` and kwargs. + + Behavior is like `dict.update`: If `other` has a ``keys`` method, + they are looped over and ``self[key]`` is assigned ``other[key]``. + Else, ``other`` is an iterable of ``(key, value)`` iterables. + + Keys that don't match a metadata field or that have an empty value are + dropped. + """ + def _set(key, value): + if key in _ATTR2FIELD and value: + self.set(self._convert_name(key), value) + + if not other: + # other is None or empty container + pass + elif hasattr(other, 'keys'): + for k in other.keys(): + _set(k, other[k]) + else: + for k, v in other: + _set(k, v) + + if kwargs: + for k, v in kwargs.items(): + _set(k, v) + + def set(self, name, value): + """Control then set a metadata field.""" + name = self._convert_name(name) + + if ((name in _ELEMENTSFIELD or name == 'Platform') and + not isinstance(value, (list, tuple))): + if isinstance(value, string_types): + value = [v.strip() for v in value.split(',')] + else: + value = [] + elif (name in _LISTFIELDS and + not isinstance(value, (list, tuple))): + if isinstance(value, string_types): + value = [value] + else: + value = [] + + if logger.isEnabledFor(logging.WARNING): + project_name = self['Name'] + + scheme = get_scheme(self.scheme) + if name in _PREDICATE_FIELDS and value is not None: + for v in value: + # check that the values are valid + if not scheme.is_valid_matcher(v.split(';')[0]): + logger.warning( + "'%s': '%s' is not valid (field '%s')", + project_name, v, name) + # FIXME this rejects UNKNOWN, is that right? + elif name in _VERSIONS_FIELDS and value is not None: + if not scheme.is_valid_constraint_list(value): + logger.warning("'%s': '%s' is not a valid version (field '%s')", + project_name, value, name) + elif name in _VERSION_FIELDS and value is not None: + if not scheme.is_valid_version(value): + logger.warning("'%s': '%s' is not a valid version (field '%s')", + project_name, value, name) + + if name in _UNICODEFIELDS: + if name == 'Description': + value = self._remove_line_prefix(value) + + self._fields[name] = value + + def get(self, name, default=_MISSING): + """Get a metadata field.""" + name = self._convert_name(name) + if name not in self._fields: + if default is _MISSING: + default = self._default_value(name) + return default + if name in _UNICODEFIELDS: + value = self._fields[name] + return value + elif name in _LISTFIELDS: + value = self._fields[name] + if value is None: + return [] + res = [] + for val in value: + if name not in _LISTTUPLEFIELDS: + res.append(val) + else: + # That's for Project-URL + res.append((val[0], val[1])) + return res + + elif name in _ELEMENTSFIELD: + value = self._fields[name] + if isinstance(value, string_types): + return value.split(',') + return self._fields[name] + + def check(self, strict=False): + """Check if the metadata is compliant. If strict is True then raise if + no Name or Version are provided""" + self.set_metadata_version() + + # XXX should check the versions (if the file was loaded) + missing, warnings = [], [] + + for attr in ('Name', 'Version'): # required by PEP 345 + if attr not in self: + missing.append(attr) + + if strict and missing != []: + msg = 'missing required metadata: %s' % ', '.join(missing) + raise MetadataMissingError(msg) + + for attr in ('Home-page', 'Author'): + if attr not in self: + missing.append(attr) + + # checking metadata 1.2 (XXX needs to check 1.1, 1.0) + if self['Metadata-Version'] != '1.2': + return missing, warnings + + scheme = get_scheme(self.scheme) + + def are_valid_constraints(value): + for v in value: + if not scheme.is_valid_matcher(v.split(';')[0]): + return False + return True + + for fields, controller in ((_PREDICATE_FIELDS, are_valid_constraints), + (_VERSIONS_FIELDS, + scheme.is_valid_constraint_list), + (_VERSION_FIELDS, + scheme.is_valid_version)): + for field in fields: + value = self.get(field, None) + if value is not None and not controller(value): + warnings.append("Wrong value for '%s': %s" % (field, value)) + + return missing, warnings + + def todict(self, skip_missing=False): + """Return fields as a dict. + + Field names will be converted to use the underscore-lowercase style + instead of hyphen-mixed case (i.e. home_page instead of Home-page). + """ + self.set_metadata_version() + + mapping_1_0 = ( + ('metadata_version', 'Metadata-Version'), + ('name', 'Name'), + ('version', 'Version'), + ('summary', 'Summary'), + ('home_page', 'Home-page'), + ('author', 'Author'), + ('author_email', 'Author-email'), + ('license', 'License'), + ('description', 'Description'), + ('keywords', 'Keywords'), + ('platform', 'Platform'), + ('classifiers', 'Classifier'), + ('download_url', 'Download-URL'), + ) + + data = {} + for key, field_name in mapping_1_0: + if not skip_missing or field_name in self._fields: + data[key] = self[field_name] + + if self['Metadata-Version'] == '1.2': + mapping_1_2 = ( + ('requires_dist', 'Requires-Dist'), + ('requires_python', 'Requires-Python'), + ('requires_external', 'Requires-External'), + ('provides_dist', 'Provides-Dist'), + ('obsoletes_dist', 'Obsoletes-Dist'), + ('project_url', 'Project-URL'), + ('maintainer', 'Maintainer'), + ('maintainer_email', 'Maintainer-email'), + ) + for key, field_name in mapping_1_2: + if not skip_missing or field_name in self._fields: + if key != 'project_url': + data[key] = self[field_name] + else: + data[key] = [','.join(u) for u in self[field_name]] + + elif self['Metadata-Version'] == '1.1': + mapping_1_1 = ( + ('provides', 'Provides'), + ('requires', 'Requires'), + ('obsoletes', 'Obsoletes'), + ) + for key, field_name in mapping_1_1: + if not skip_missing or field_name in self._fields: + data[key] = self[field_name] + + return data + + def add_requirements(self, requirements): + if self['Metadata-Version'] == '1.1': + # we can't have 1.1 metadata *and* Setuptools requires + for field in ('Obsoletes', 'Requires', 'Provides'): + if field in self: + del self[field] + self['Requires-Dist'] += requirements + + # Mapping API + # TODO could add iter* variants + + def keys(self): + return list(_version2fieldlist(self['Metadata-Version'])) + + def __iter__(self): + for key in self.keys(): + yield key + + def values(self): + return [self[key] for key in self.keys()] + + def items(self): + return [(key, self[key]) for key in self.keys()] + + def __repr__(self): + return '<%s %s %s>' % (self.__class__.__name__, self.name, + self.version) + + +METADATA_FILENAME = 'pydist.json' +WHEEL_METADATA_FILENAME = 'metadata.json' +LEGACY_METADATA_FILENAME = 'METADATA' + + +class Metadata(object): + """ + The metadata of a release. This implementation uses 2.0 (JSON) + metadata where possible. If not possible, it wraps a LegacyMetadata + instance which handles the key-value metadata format. + """ + + METADATA_VERSION_MATCHER = re.compile(r'^\d+(\.\d+)*$') + + NAME_MATCHER = re.compile('^[0-9A-Z]([0-9A-Z_.-]*[0-9A-Z])?$', re.I) + + VERSION_MATCHER = PEP440_VERSION_RE + + SUMMARY_MATCHER = re.compile('.{1,2047}') + + METADATA_VERSION = '2.0' + + GENERATOR = 'distlib (%s)' % __version__ + + MANDATORY_KEYS = { + 'name': (), + 'version': (), + 'summary': ('legacy',), + } + + INDEX_KEYS = ('name version license summary description author ' + 'author_email keywords platform home_page classifiers ' + 'download_url') + + DEPENDENCY_KEYS = ('extras run_requires test_requires build_requires ' + 'dev_requires provides meta_requires obsoleted_by ' + 'supports_environments') + + SYNTAX_VALIDATORS = { + 'metadata_version': (METADATA_VERSION_MATCHER, ()), + 'name': (NAME_MATCHER, ('legacy',)), + 'version': (VERSION_MATCHER, ('legacy',)), + 'summary': (SUMMARY_MATCHER, ('legacy',)), + } + + __slots__ = ('_legacy', '_data', 'scheme') + + def __init__(self, path=None, fileobj=None, mapping=None, + scheme='default'): + if [path, fileobj, mapping].count(None) < 2: + raise TypeError('path, fileobj and mapping are exclusive') + self._legacy = None + self._data = None + self.scheme = scheme + #import pdb; pdb.set_trace() + if mapping is not None: + try: + self._validate_mapping(mapping, scheme) + self._data = mapping + except MetadataUnrecognizedVersionError: + self._legacy = LegacyMetadata(mapping=mapping, scheme=scheme) + self.validate() + else: + data = None + if path: + with open(path, 'rb') as f: + data = f.read() + elif fileobj: + data = fileobj.read() + if data is None: + # Initialised with no args - to be added + self._data = { + 'metadata_version': self.METADATA_VERSION, + 'generator': self.GENERATOR, + } + else: + if not isinstance(data, text_type): + data = data.decode('utf-8') + try: + self._data = json.loads(data) + self._validate_mapping(self._data, scheme) + except ValueError: + # Note: MetadataUnrecognizedVersionError does not + # inherit from ValueError (it's a DistlibException, + # which should not inherit from ValueError). + # The ValueError comes from the json.load - if that + # succeeds and we get a validation error, we want + # that to propagate + self._legacy = LegacyMetadata(fileobj=StringIO(data), + scheme=scheme) + self.validate() + + common_keys = set(('name', 'version', 'license', 'keywords', 'summary')) + + none_list = (None, list) + none_dict = (None, dict) + + mapped_keys = { + 'run_requires': ('Requires-Dist', list), + 'build_requires': ('Setup-Requires-Dist', list), + 'dev_requires': none_list, + 'test_requires': none_list, + 'meta_requires': none_list, + 'extras': ('Provides-Extra', list), + 'modules': none_list, + 'namespaces': none_list, + 'exports': none_dict, + 'commands': none_dict, + 'classifiers': ('Classifier', list), + 'source_url': ('Download-URL', None), + 'metadata_version': ('Metadata-Version', None), + } + + del none_list, none_dict + + def __getattribute__(self, key): + common = object.__getattribute__(self, 'common_keys') + mapped = object.__getattribute__(self, 'mapped_keys') + if key in mapped: + lk, maker = mapped[key] + if self._legacy: + if lk is None: + result = None if maker is None else maker() + else: + result = self._legacy.get(lk) + else: + value = None if maker is None else maker() + if key not in ('commands', 'exports', 'modules', 'namespaces', + 'classifiers'): + result = self._data.get(key, value) + else: + # special cases for PEP 459 + sentinel = object() + result = sentinel + d = self._data.get('extensions') + if d: + if key == 'commands': + result = d.get('python.commands', value) + elif key == 'classifiers': + d = d.get('python.details') + if d: + result = d.get(key, value) + else: + d = d.get('python.exports') + if not d: + d = self._data.get('python.exports') + if d: + result = d.get(key, value) + if result is sentinel: + result = value + elif key not in common: + result = object.__getattribute__(self, key) + elif self._legacy: + result = self._legacy.get(key) + else: + result = self._data.get(key) + return result + + def _validate_value(self, key, value, scheme=None): + if key in self.SYNTAX_VALIDATORS: + pattern, exclusions = self.SYNTAX_VALIDATORS[key] + if (scheme or self.scheme) not in exclusions: + m = pattern.match(value) + if not m: + raise MetadataInvalidError("'%s' is an invalid value for " + "the '%s' property" % (value, + key)) + + def __setattr__(self, key, value): + self._validate_value(key, value) + common = object.__getattribute__(self, 'common_keys') + mapped = object.__getattribute__(self, 'mapped_keys') + if key in mapped: + lk, _ = mapped[key] + if self._legacy: + if lk is None: + raise NotImplementedError + self._legacy[lk] = value + elif key not in ('commands', 'exports', 'modules', 'namespaces', + 'classifiers'): + self._data[key] = value + else: + # special cases for PEP 459 + d = self._data.setdefault('extensions', {}) + if key == 'commands': + d['python.commands'] = value + elif key == 'classifiers': + d = d.setdefault('python.details', {}) + d[key] = value + else: + d = d.setdefault('python.exports', {}) + d[key] = value + elif key not in common: + object.__setattr__(self, key, value) + else: + if key == 'keywords': + if isinstance(value, string_types): + value = value.strip() + if value: + value = value.split() + else: + value = [] + if self._legacy: + self._legacy[key] = value + else: + self._data[key] = value + + @property + def name_and_version(self): + return _get_name_and_version(self.name, self.version, True) + + @property + def provides(self): + if self._legacy: + result = self._legacy['Provides-Dist'] + else: + result = self._data.setdefault('provides', []) + s = '%s (%s)' % (self.name, self.version) + if s not in result: + result.append(s) + return result + + @provides.setter + def provides(self, value): + if self._legacy: + self._legacy['Provides-Dist'] = value + else: + self._data['provides'] = value + + def get_requirements(self, reqts, extras=None, env=None): + """ + Base method to get dependencies, given a set of extras + to satisfy and an optional environment context. + :param reqts: A list of sometimes-wanted dependencies, + perhaps dependent on extras and environment. + :param extras: A list of optional components being requested. + :param env: An optional environment for marker evaluation. + """ + if self._legacy: + result = reqts + else: + result = [] + extras = get_extras(extras or [], self.extras) + for d in reqts: + if 'extra' not in d and 'environment' not in d: + # unconditional + include = True + else: + if 'extra' not in d: + # Not extra-dependent - only environment-dependent + include = True + else: + include = d.get('extra') in extras + if include: + # Not excluded because of extras, check environment + marker = d.get('environment') + if marker: + include = interpret(marker, env) + if include: + result.extend(d['requires']) + for key in ('build', 'dev', 'test'): + e = ':%s:' % key + if e in extras: + extras.remove(e) + # A recursive call, but it should terminate since 'test' + # has been removed from the extras + reqts = self._data.get('%s_requires' % key, []) + result.extend(self.get_requirements(reqts, extras=extras, + env=env)) + return result + + @property + def dictionary(self): + if self._legacy: + return self._from_legacy() + return self._data + + @property + def dependencies(self): + if self._legacy: + raise NotImplementedError + else: + return extract_by_key(self._data, self.DEPENDENCY_KEYS) + + @dependencies.setter + def dependencies(self, value): + if self._legacy: + raise NotImplementedError + else: + self._data.update(value) + + def _validate_mapping(self, mapping, scheme): + if mapping.get('metadata_version') != self.METADATA_VERSION: + raise MetadataUnrecognizedVersionError() + missing = [] + for key, exclusions in self.MANDATORY_KEYS.items(): + if key not in mapping: + if scheme not in exclusions: + missing.append(key) + if missing: + msg = 'Missing metadata items: %s' % ', '.join(missing) + raise MetadataMissingError(msg) + for k, v in mapping.items(): + self._validate_value(k, v, scheme) + + def validate(self): + if self._legacy: + missing, warnings = self._legacy.check(True) + if missing or warnings: + logger.warning('Metadata: missing: %s, warnings: %s', + missing, warnings) + else: + self._validate_mapping(self._data, self.scheme) + + def todict(self): + if self._legacy: + return self._legacy.todict(True) + else: + result = extract_by_key(self._data, self.INDEX_KEYS) + return result + + def _from_legacy(self): + assert self._legacy and not self._data + result = { + 'metadata_version': self.METADATA_VERSION, + 'generator': self.GENERATOR, + } + lmd = self._legacy.todict(True) # skip missing ones + for k in ('name', 'version', 'license', 'summary', 'description', + 'classifier'): + if k in lmd: + if k == 'classifier': + nk = 'classifiers' + else: + nk = k + result[nk] = lmd[k] + kw = lmd.get('Keywords', []) + if kw == ['']: + kw = [] + result['keywords'] = kw + keys = (('requires_dist', 'run_requires'), + ('setup_requires_dist', 'build_requires')) + for ok, nk in keys: + if ok in lmd and lmd[ok]: + result[nk] = [{'requires': lmd[ok]}] + result['provides'] = self.provides + author = {} + maintainer = {} + return result + + LEGACY_MAPPING = { + 'name': 'Name', + 'version': 'Version', + 'license': 'License', + 'summary': 'Summary', + 'description': 'Description', + 'classifiers': 'Classifier', + } + + def _to_legacy(self): + def process_entries(entries): + reqts = set() + for e in entries: + extra = e.get('extra') + env = e.get('environment') + rlist = e['requires'] + for r in rlist: + if not env and not extra: + reqts.add(r) + else: + marker = '' + if extra: + marker = 'extra == "%s"' % extra + if env: + if marker: + marker = '(%s) and %s' % (env, marker) + else: + marker = env + reqts.add(';'.join((r, marker))) + return reqts + + assert self._data and not self._legacy + result = LegacyMetadata() + nmd = self._data + for nk, ok in self.LEGACY_MAPPING.items(): + if nk in nmd: + result[ok] = nmd[nk] + r1 = process_entries(self.run_requires + self.meta_requires) + r2 = process_entries(self.build_requires + self.dev_requires) + if self.extras: + result['Provides-Extra'] = sorted(self.extras) + result['Requires-Dist'] = sorted(r1) + result['Setup-Requires-Dist'] = sorted(r2) + # TODO: other fields such as contacts + return result + + def write(self, path=None, fileobj=None, legacy=False, skip_unknown=True): + if [path, fileobj].count(None) != 1: + raise ValueError('Exactly one of path and fileobj is needed') + self.validate() + if legacy: + if self._legacy: + legacy_md = self._legacy + else: + legacy_md = self._to_legacy() + if path: + legacy_md.write(path, skip_unknown=skip_unknown) + else: + legacy_md.write_file(fileobj, skip_unknown=skip_unknown) + else: + if self._legacy: + d = self._from_legacy() + else: + d = self._data + if fileobj: + json.dump(d, fileobj, ensure_ascii=True, indent=2, + sort_keys=True) + else: + with codecs.open(path, 'w', 'utf-8') as f: + json.dump(d, f, ensure_ascii=True, indent=2, + sort_keys=True) + + def add_requirements(self, requirements): + if self._legacy: + self._legacy.add_requirements(requirements) + else: + run_requires = self._data.setdefault('run_requires', []) + always = None + for entry in run_requires: + if 'environment' not in entry and 'extra' not in entry: + always = entry + break + if always is None: + always = { 'requires': requirements } + run_requires.insert(0, always) + else: + rset = set(always['requires']) | set(requirements) + always['requires'] = sorted(rset) + + def __repr__(self): + name = self.name or '(no name)' + version = self.version or 'no version' + return '<%s %s %s (%s)>' % (self.__class__.__name__, + self.metadata_version, name, version) diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/distlib/resources.py b/venv.bak/lib/python3.7/site-packages/pip/_vendor/distlib/resources.py new file mode 100644 index 0000000..1884016 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_vendor/distlib/resources.py @@ -0,0 +1,355 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2013-2017 Vinay Sajip. +# Licensed to the Python Software Foundation under a contributor agreement. +# See LICENSE.txt and CONTRIBUTORS.txt. +# +from __future__ import unicode_literals + +import bisect +import io +import logging +import os +import pkgutil +import shutil +import sys +import types +import zipimport + +from . import DistlibException +from .util import cached_property, get_cache_base, path_to_cache_dir, Cache + +logger = logging.getLogger(__name__) + + +cache = None # created when needed + + +class ResourceCache(Cache): + def __init__(self, base=None): + if base is None: + # Use native string to avoid issues on 2.x: see Python #20140. + base = os.path.join(get_cache_base(), str('resource-cache')) + super(ResourceCache, self).__init__(base) + + def is_stale(self, resource, path): + """ + Is the cache stale for the given resource? + + :param resource: The :class:`Resource` being cached. + :param path: The path of the resource in the cache. + :return: True if the cache is stale. + """ + # Cache invalidation is a hard problem :-) + return True + + def get(self, resource): + """ + Get a resource into the cache, + + :param resource: A :class:`Resource` instance. + :return: The pathname of the resource in the cache. + """ + prefix, path = resource.finder.get_cache_info(resource) + if prefix is None: + result = path + else: + result = os.path.join(self.base, self.prefix_to_dir(prefix), path) + dirname = os.path.dirname(result) + if not os.path.isdir(dirname): + os.makedirs(dirname) + if not os.path.exists(result): + stale = True + else: + stale = self.is_stale(resource, path) + if stale: + # write the bytes of the resource to the cache location + with open(result, 'wb') as f: + f.write(resource.bytes) + return result + + +class ResourceBase(object): + def __init__(self, finder, name): + self.finder = finder + self.name = name + + +class Resource(ResourceBase): + """ + A class representing an in-package resource, such as a data file. This is + not normally instantiated by user code, but rather by a + :class:`ResourceFinder` which manages the resource. + """ + is_container = False # Backwards compatibility + + def as_stream(self): + """ + Get the resource as a stream. + + This is not a property to make it obvious that it returns a new stream + each time. + """ + return self.finder.get_stream(self) + + @cached_property + def file_path(self): + global cache + if cache is None: + cache = ResourceCache() + return cache.get(self) + + @cached_property + def bytes(self): + return self.finder.get_bytes(self) + + @cached_property + def size(self): + return self.finder.get_size(self) + + +class ResourceContainer(ResourceBase): + is_container = True # Backwards compatibility + + @cached_property + def resources(self): + return self.finder.get_resources(self) + + +class ResourceFinder(object): + """ + Resource finder for file system resources. + """ + + if sys.platform.startswith('java'): + skipped_extensions = ('.pyc', '.pyo', '.class') + else: + skipped_extensions = ('.pyc', '.pyo') + + def __init__(self, module): + self.module = module + self.loader = getattr(module, '__loader__', None) + self.base = os.path.dirname(getattr(module, '__file__', '')) + + def _adjust_path(self, path): + return os.path.realpath(path) + + def _make_path(self, resource_name): + # Issue #50: need to preserve type of path on Python 2.x + # like os.path._get_sep + if isinstance(resource_name, bytes): # should only happen on 2.x + sep = b'/' + else: + sep = '/' + parts = resource_name.split(sep) + parts.insert(0, self.base) + result = os.path.join(*parts) + return self._adjust_path(result) + + def _find(self, path): + return os.path.exists(path) + + def get_cache_info(self, resource): + return None, resource.path + + def find(self, resource_name): + path = self._make_path(resource_name) + if not self._find(path): + result = None + else: + if self._is_directory(path): + result = ResourceContainer(self, resource_name) + else: + result = Resource(self, resource_name) + result.path = path + return result + + def get_stream(self, resource): + return open(resource.path, 'rb') + + def get_bytes(self, resource): + with open(resource.path, 'rb') as f: + return f.read() + + def get_size(self, resource): + return os.path.getsize(resource.path) + + def get_resources(self, resource): + def allowed(f): + return (f != '__pycache__' and not + f.endswith(self.skipped_extensions)) + return set([f for f in os.listdir(resource.path) if allowed(f)]) + + def is_container(self, resource): + return self._is_directory(resource.path) + + _is_directory = staticmethod(os.path.isdir) + + def iterator(self, resource_name): + resource = self.find(resource_name) + if resource is not None: + todo = [resource] + while todo: + resource = todo.pop(0) + yield resource + if resource.is_container: + rname = resource.name + for name in resource.resources: + if not rname: + new_name = name + else: + new_name = '/'.join([rname, name]) + child = self.find(new_name) + if child.is_container: + todo.append(child) + else: + yield child + + +class ZipResourceFinder(ResourceFinder): + """ + Resource finder for resources in .zip files. + """ + def __init__(self, module): + super(ZipResourceFinder, self).__init__(module) + archive = self.loader.archive + self.prefix_len = 1 + len(archive) + # PyPy doesn't have a _files attr on zipimporter, and you can't set one + if hasattr(self.loader, '_files'): + self._files = self.loader._files + else: + self._files = zipimport._zip_directory_cache[archive] + self.index = sorted(self._files) + + def _adjust_path(self, path): + return path + + def _find(self, path): + path = path[self.prefix_len:] + if path in self._files: + result = True + else: + if path and path[-1] != os.sep: + path = path + os.sep + i = bisect.bisect(self.index, path) + try: + result = self.index[i].startswith(path) + except IndexError: + result = False + if not result: + logger.debug('_find failed: %r %r', path, self.loader.prefix) + else: + logger.debug('_find worked: %r %r', path, self.loader.prefix) + return result + + def get_cache_info(self, resource): + prefix = self.loader.archive + path = resource.path[1 + len(prefix):] + return prefix, path + + def get_bytes(self, resource): + return self.loader.get_data(resource.path) + + def get_stream(self, resource): + return io.BytesIO(self.get_bytes(resource)) + + def get_size(self, resource): + path = resource.path[self.prefix_len:] + return self._files[path][3] + + def get_resources(self, resource): + path = resource.path[self.prefix_len:] + if path and path[-1] != os.sep: + path += os.sep + plen = len(path) + result = set() + i = bisect.bisect(self.index, path) + while i < len(self.index): + if not self.index[i].startswith(path): + break + s = self.index[i][plen:] + result.add(s.split(os.sep, 1)[0]) # only immediate children + i += 1 + return result + + def _is_directory(self, path): + path = path[self.prefix_len:] + if path and path[-1] != os.sep: + path += os.sep + i = bisect.bisect(self.index, path) + try: + result = self.index[i].startswith(path) + except IndexError: + result = False + return result + +_finder_registry = { + type(None): ResourceFinder, + zipimport.zipimporter: ZipResourceFinder +} + +try: + # In Python 3.6, _frozen_importlib -> _frozen_importlib_external + try: + import _frozen_importlib_external as _fi + except ImportError: + import _frozen_importlib as _fi + _finder_registry[_fi.SourceFileLoader] = ResourceFinder + _finder_registry[_fi.FileFinder] = ResourceFinder + del _fi +except (ImportError, AttributeError): + pass + + +def register_finder(loader, finder_maker): + _finder_registry[type(loader)] = finder_maker + +_finder_cache = {} + + +def finder(package): + """ + Return a resource finder for a package. + :param package: The name of the package. + :return: A :class:`ResourceFinder` instance for the package. + """ + if package in _finder_cache: + result = _finder_cache[package] + else: + if package not in sys.modules: + __import__(package) + module = sys.modules[package] + path = getattr(module, '__path__', None) + if path is None: + raise DistlibException('You cannot get a finder for a module, ' + 'only for a package') + loader = getattr(module, '__loader__', None) + finder_maker = _finder_registry.get(type(loader)) + if finder_maker is None: + raise DistlibException('Unable to locate finder for %r' % package) + result = finder_maker(module) + _finder_cache[package] = result + return result + + +_dummy_module = types.ModuleType(str('__dummy__')) + + +def finder_for_path(path): + """ + Return a resource finder for a path, which should represent a container. + + :param path: The path. + :return: A :class:`ResourceFinder` instance for the path. + """ + result = None + # calls any path hooks, gets importer into cache + pkgutil.get_importer(path) + loader = sys.path_importer_cache.get(path) + finder = _finder_registry.get(type(loader)) + if finder: + module = _dummy_module + module.__file__ = os.path.join(path, '') + module.__loader__ = loader + result = finder(module) + return result diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/distlib/scripts.py b/venv.bak/lib/python3.7/site-packages/pip/_vendor/distlib/scripts.py new file mode 100644 index 0000000..5185974 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_vendor/distlib/scripts.py @@ -0,0 +1,416 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2013-2015 Vinay Sajip. +# Licensed to the Python Software Foundation under a contributor agreement. +# See LICENSE.txt and CONTRIBUTORS.txt. +# +from io import BytesIO +import logging +import os +import re +import struct +import sys + +from .compat import sysconfig, detect_encoding, ZipFile +from .resources import finder +from .util import (FileOperator, get_export_entry, convert_path, + get_executable, in_venv) + +logger = logging.getLogger(__name__) + +_DEFAULT_MANIFEST = ''' + + + + + + + + + + + + +'''.strip() + +# check if Python is called on the first line with this expression +FIRST_LINE_RE = re.compile(b'^#!.*pythonw?[0-9.]*([ \t].*)?$') +SCRIPT_TEMPLATE = r'''# -*- coding: utf-8 -*- +import re +import sys +from %(module)s import %(import_name)s +if __name__ == '__main__': + sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0]) + sys.exit(%(func)s()) +''' + + +def _enquote_executable(executable): + if ' ' in executable: + # make sure we quote only the executable in case of env + # for example /usr/bin/env "/dir with spaces/bin/jython" + # instead of "/usr/bin/env /dir with spaces/bin/jython" + # otherwise whole + if executable.startswith('/usr/bin/env '): + env, _executable = executable.split(' ', 1) + if ' ' in _executable and not _executable.startswith('"'): + executable = '%s "%s"' % (env, _executable) + else: + if not executable.startswith('"'): + executable = '"%s"' % executable + return executable + + +class ScriptMaker(object): + """ + A class to copy or create scripts from source scripts or callable + specifications. + """ + script_template = SCRIPT_TEMPLATE + + executable = None # for shebangs + + def __init__(self, source_dir, target_dir, add_launchers=True, + dry_run=False, fileop=None): + self.source_dir = source_dir + self.target_dir = target_dir + self.add_launchers = add_launchers + self.force = False + self.clobber = False + # It only makes sense to set mode bits on POSIX. + self.set_mode = (os.name == 'posix') or (os.name == 'java' and + os._name == 'posix') + self.variants = set(('', 'X.Y')) + self._fileop = fileop or FileOperator(dry_run) + + self._is_nt = os.name == 'nt' or ( + os.name == 'java' and os._name == 'nt') + + def _get_alternate_executable(self, executable, options): + if options.get('gui', False) and self._is_nt: # pragma: no cover + dn, fn = os.path.split(executable) + fn = fn.replace('python', 'pythonw') + executable = os.path.join(dn, fn) + return executable + + if sys.platform.startswith('java'): # pragma: no cover + def _is_shell(self, executable): + """ + Determine if the specified executable is a script + (contains a #! line) + """ + try: + with open(executable) as fp: + return fp.read(2) == '#!' + except (OSError, IOError): + logger.warning('Failed to open %s', executable) + return False + + def _fix_jython_executable(self, executable): + if self._is_shell(executable): + # Workaround for Jython is not needed on Linux systems. + import java + + if java.lang.System.getProperty('os.name') == 'Linux': + return executable + elif executable.lower().endswith('jython.exe'): + # Use wrapper exe for Jython on Windows + return executable + return '/usr/bin/env %s' % executable + + def _build_shebang(self, executable, post_interp): + """ + Build a shebang line. In the simple case (on Windows, or a shebang line + which is not too long or contains spaces) use a simple formulation for + the shebang. Otherwise, use /bin/sh as the executable, with a contrived + shebang which allows the script to run either under Python or sh, using + suitable quoting. Thanks to Harald Nordgren for his input. + + See also: http://www.in-ulm.de/~mascheck/various/shebang/#length + https://hg.mozilla.org/mozilla-central/file/tip/mach + """ + if os.name != 'posix': + simple_shebang = True + else: + # Add 3 for '#!' prefix and newline suffix. + shebang_length = len(executable) + len(post_interp) + 3 + if sys.platform == 'darwin': + max_shebang_length = 512 + else: + max_shebang_length = 127 + simple_shebang = ((b' ' not in executable) and + (shebang_length <= max_shebang_length)) + + if simple_shebang: + result = b'#!' + executable + post_interp + b'\n' + else: + result = b'#!/bin/sh\n' + result += b"'''exec' " + executable + post_interp + b' "$0" "$@"\n' + result += b"' '''" + return result + + def _get_shebang(self, encoding, post_interp=b'', options=None): + enquote = True + if self.executable: + executable = self.executable + enquote = False # assume this will be taken care of + elif not sysconfig.is_python_build(): + executable = get_executable() + elif in_venv(): # pragma: no cover + executable = os.path.join(sysconfig.get_path('scripts'), + 'python%s' % sysconfig.get_config_var('EXE')) + else: # pragma: no cover + executable = os.path.join( + sysconfig.get_config_var('BINDIR'), + 'python%s%s' % (sysconfig.get_config_var('VERSION'), + sysconfig.get_config_var('EXE'))) + if options: + executable = self._get_alternate_executable(executable, options) + + if sys.platform.startswith('java'): # pragma: no cover + executable = self._fix_jython_executable(executable) + + # Normalise case for Windows - COMMENTED OUT + # executable = os.path.normcase(executable) + # N.B. The normalising operation above has been commented out: See + # issue #124. Although paths in Windows are generally case-insensitive, + # they aren't always. For example, a path containing a ẞ (which is a + # LATIN CAPITAL LETTER SHARP S - U+1E9E) is normcased to ß (which is a + # LATIN SMALL LETTER SHARP S' - U+00DF). The two are not considered by + # Windows as equivalent in path names. + + # If the user didn't specify an executable, it may be necessary to + # cater for executable paths with spaces (not uncommon on Windows) + if enquote: + executable = _enquote_executable(executable) + # Issue #51: don't use fsencode, since we later try to + # check that the shebang is decodable using utf-8. + executable = executable.encode('utf-8') + # in case of IronPython, play safe and enable frames support + if (sys.platform == 'cli' and '-X:Frames' not in post_interp + and '-X:FullFrames' not in post_interp): # pragma: no cover + post_interp += b' -X:Frames' + shebang = self._build_shebang(executable, post_interp) + # Python parser starts to read a script using UTF-8 until + # it gets a #coding:xxx cookie. The shebang has to be the + # first line of a file, the #coding:xxx cookie cannot be + # written before. So the shebang has to be decodable from + # UTF-8. + try: + shebang.decode('utf-8') + except UnicodeDecodeError: # pragma: no cover + raise ValueError( + 'The shebang (%r) is not decodable from utf-8' % shebang) + # If the script is encoded to a custom encoding (use a + # #coding:xxx cookie), the shebang has to be decodable from + # the script encoding too. + if encoding != 'utf-8': + try: + shebang.decode(encoding) + except UnicodeDecodeError: # pragma: no cover + raise ValueError( + 'The shebang (%r) is not decodable ' + 'from the script encoding (%r)' % (shebang, encoding)) + return shebang + + def _get_script_text(self, entry): + return self.script_template % dict(module=entry.prefix, + import_name=entry.suffix.split('.')[0], + func=entry.suffix) + + manifest = _DEFAULT_MANIFEST + + def get_manifest(self, exename): + base = os.path.basename(exename) + return self.manifest % base + + def _write_script(self, names, shebang, script_bytes, filenames, ext): + use_launcher = self.add_launchers and self._is_nt + linesep = os.linesep.encode('utf-8') + if not shebang.endswith(linesep): + shebang += linesep + if not use_launcher: + script_bytes = shebang + script_bytes + else: # pragma: no cover + if ext == 'py': + launcher = self._get_launcher('t') + else: + launcher = self._get_launcher('w') + stream = BytesIO() + with ZipFile(stream, 'w') as zf: + zf.writestr('__main__.py', script_bytes) + zip_data = stream.getvalue() + script_bytes = launcher + shebang + zip_data + for name in names: + outname = os.path.join(self.target_dir, name) + if use_launcher: # pragma: no cover + n, e = os.path.splitext(outname) + if e.startswith('.py'): + outname = n + outname = '%s.exe' % outname + try: + self._fileop.write_binary_file(outname, script_bytes) + except Exception: + # Failed writing an executable - it might be in use. + logger.warning('Failed to write executable - trying to ' + 'use .deleteme logic') + dfname = '%s.deleteme' % outname + if os.path.exists(dfname): + os.remove(dfname) # Not allowed to fail here + os.rename(outname, dfname) # nor here + self._fileop.write_binary_file(outname, script_bytes) + logger.debug('Able to replace executable using ' + '.deleteme logic') + try: + os.remove(dfname) + except Exception: + pass # still in use - ignore error + else: + if self._is_nt and not outname.endswith('.' + ext): # pragma: no cover + outname = '%s.%s' % (outname, ext) + if os.path.exists(outname) and not self.clobber: + logger.warning('Skipping existing file %s', outname) + continue + self._fileop.write_binary_file(outname, script_bytes) + if self.set_mode: + self._fileop.set_executable_mode([outname]) + filenames.append(outname) + + def _make_script(self, entry, filenames, options=None): + post_interp = b'' + if options: + args = options.get('interpreter_args', []) + if args: + args = ' %s' % ' '.join(args) + post_interp = args.encode('utf-8') + shebang = self._get_shebang('utf-8', post_interp, options=options) + script = self._get_script_text(entry).encode('utf-8') + name = entry.name + scriptnames = set() + if '' in self.variants: + scriptnames.add(name) + if 'X' in self.variants: + scriptnames.add('%s%s' % (name, sys.version_info[0])) + if 'X.Y' in self.variants: + scriptnames.add('%s-%s.%s' % (name, sys.version_info[0], + sys.version_info[1])) + if options and options.get('gui', False): + ext = 'pyw' + else: + ext = 'py' + self._write_script(scriptnames, shebang, script, filenames, ext) + + def _copy_script(self, script, filenames): + adjust = False + script = os.path.join(self.source_dir, convert_path(script)) + outname = os.path.join(self.target_dir, os.path.basename(script)) + if not self.force and not self._fileop.newer(script, outname): + logger.debug('not copying %s (up-to-date)', script) + return + + # Always open the file, but ignore failures in dry-run mode -- + # that way, we'll get accurate feedback if we can read the + # script. + try: + f = open(script, 'rb') + except IOError: # pragma: no cover + if not self.dry_run: + raise + f = None + else: + first_line = f.readline() + if not first_line: # pragma: no cover + logger.warning('%s: %s is an empty file (skipping)', + self.get_command_name(), script) + return + + match = FIRST_LINE_RE.match(first_line.replace(b'\r\n', b'\n')) + if match: + adjust = True + post_interp = match.group(1) or b'' + + if not adjust: + if f: + f.close() + self._fileop.copy_file(script, outname) + if self.set_mode: + self._fileop.set_executable_mode([outname]) + filenames.append(outname) + else: + logger.info('copying and adjusting %s -> %s', script, + self.target_dir) + if not self._fileop.dry_run: + encoding, lines = detect_encoding(f.readline) + f.seek(0) + shebang = self._get_shebang(encoding, post_interp) + if b'pythonw' in first_line: # pragma: no cover + ext = 'pyw' + else: + ext = 'py' + n = os.path.basename(outname) + self._write_script([n], shebang, f.read(), filenames, ext) + if f: + f.close() + + @property + def dry_run(self): + return self._fileop.dry_run + + @dry_run.setter + def dry_run(self, value): + self._fileop.dry_run = value + + if os.name == 'nt' or (os.name == 'java' and os._name == 'nt'): # pragma: no cover + # Executable launcher support. + # Launchers are from https://bitbucket.org/vinay.sajip/simple_launcher/ + + def _get_launcher(self, kind): + if struct.calcsize('P') == 8: # 64-bit + bits = '64' + else: + bits = '32' + name = '%s%s.exe' % (kind, bits) + # Issue 31: don't hardcode an absolute package name, but + # determine it relative to the current package + distlib_package = __name__.rsplit('.', 1)[0] + resource = finder(distlib_package).find(name) + if not resource: + msg = ('Unable to find resource %s in package %s' % (name, + distlib_package)) + raise ValueError(msg) + return resource.bytes + + # Public API follows + + def make(self, specification, options=None): + """ + Make a script. + + :param specification: The specification, which is either a valid export + entry specification (to make a script from a + callable) or a filename (to make a script by + copying from a source location). + :param options: A dictionary of options controlling script generation. + :return: A list of all absolute pathnames written to. + """ + filenames = [] + entry = get_export_entry(specification) + if entry is None: + self._copy_script(specification, filenames) + else: + self._make_script(entry, filenames, options=options) + return filenames + + def make_multiple(self, specifications, options=None): + """ + Take a list of specifications and make scripts from them, + :param specifications: A list of specifications. + :return: A list of all absolute pathnames written to, + """ + filenames = [] + for specification in specifications: + filenames.extend(self.make(specification, options)) + return filenames diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/distlib/t32.exe b/venv.bak/lib/python3.7/site-packages/pip/_vendor/distlib/t32.exe new file mode 100644 index 0000000..8932a18 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_vendor/distlib/t32.exe differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/distlib/t64.exe b/venv.bak/lib/python3.7/site-packages/pip/_vendor/distlib/t64.exe new file mode 100644 index 0000000..325b805 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_vendor/distlib/t64.exe differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/distlib/util.py b/venv.bak/lib/python3.7/site-packages/pip/_vendor/distlib/util.py new file mode 100644 index 0000000..01324ea --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_vendor/distlib/util.py @@ -0,0 +1,1761 @@ +# +# Copyright (C) 2012-2017 The Python Software Foundation. +# See LICENSE.txt and CONTRIBUTORS.txt. +# +import codecs +from collections import deque +import contextlib +import csv +from glob import iglob as std_iglob +import io +import json +import logging +import os +import py_compile +import re +import socket +try: + import ssl +except ImportError: # pragma: no cover + ssl = None +import subprocess +import sys +import tarfile +import tempfile +import textwrap + +try: + import threading +except ImportError: # pragma: no cover + import dummy_threading as threading +import time + +from . import DistlibException +from .compat import (string_types, text_type, shutil, raw_input, StringIO, + cache_from_source, urlopen, urljoin, httplib, xmlrpclib, + splittype, HTTPHandler, BaseConfigurator, valid_ident, + Container, configparser, URLError, ZipFile, fsdecode, + unquote, urlparse) + +logger = logging.getLogger(__name__) + +# +# Requirement parsing code as per PEP 508 +# + +IDENTIFIER = re.compile(r'^([\w\.-]+)\s*') +VERSION_IDENTIFIER = re.compile(r'^([\w\.*+-]+)\s*') +COMPARE_OP = re.compile(r'^(<=?|>=?|={2,3}|[~!]=)\s*') +MARKER_OP = re.compile(r'^((<=?)|(>=?)|={2,3}|[~!]=|in|not\s+in)\s*') +OR = re.compile(r'^or\b\s*') +AND = re.compile(r'^and\b\s*') +NON_SPACE = re.compile(r'(\S+)\s*') +STRING_CHUNK = re.compile(r'([\s\w\.{}()*+#:;,/?!~`@$%^&=|<>\[\]-]+)') + + +def parse_marker(marker_string): + """ + Parse a marker string and return a dictionary containing a marker expression. + + The dictionary will contain keys "op", "lhs" and "rhs" for non-terminals in + the expression grammar, or strings. A string contained in quotes is to be + interpreted as a literal string, and a string not contained in quotes is a + variable (such as os_name). + """ + def marker_var(remaining): + # either identifier, or literal string + m = IDENTIFIER.match(remaining) + if m: + result = m.groups()[0] + remaining = remaining[m.end():] + elif not remaining: + raise SyntaxError('unexpected end of input') + else: + q = remaining[0] + if q not in '\'"': + raise SyntaxError('invalid expression: %s' % remaining) + oq = '\'"'.replace(q, '') + remaining = remaining[1:] + parts = [q] + while remaining: + # either a string chunk, or oq, or q to terminate + if remaining[0] == q: + break + elif remaining[0] == oq: + parts.append(oq) + remaining = remaining[1:] + else: + m = STRING_CHUNK.match(remaining) + if not m: + raise SyntaxError('error in string literal: %s' % remaining) + parts.append(m.groups()[0]) + remaining = remaining[m.end():] + else: + s = ''.join(parts) + raise SyntaxError('unterminated string: %s' % s) + parts.append(q) + result = ''.join(parts) + remaining = remaining[1:].lstrip() # skip past closing quote + return result, remaining + + def marker_expr(remaining): + if remaining and remaining[0] == '(': + result, remaining = marker(remaining[1:].lstrip()) + if remaining[0] != ')': + raise SyntaxError('unterminated parenthesis: %s' % remaining) + remaining = remaining[1:].lstrip() + else: + lhs, remaining = marker_var(remaining) + while remaining: + m = MARKER_OP.match(remaining) + if not m: + break + op = m.groups()[0] + remaining = remaining[m.end():] + rhs, remaining = marker_var(remaining) + lhs = {'op': op, 'lhs': lhs, 'rhs': rhs} + result = lhs + return result, remaining + + def marker_and(remaining): + lhs, remaining = marker_expr(remaining) + while remaining: + m = AND.match(remaining) + if not m: + break + remaining = remaining[m.end():] + rhs, remaining = marker_expr(remaining) + lhs = {'op': 'and', 'lhs': lhs, 'rhs': rhs} + return lhs, remaining + + def marker(remaining): + lhs, remaining = marker_and(remaining) + while remaining: + m = OR.match(remaining) + if not m: + break + remaining = remaining[m.end():] + rhs, remaining = marker_and(remaining) + lhs = {'op': 'or', 'lhs': lhs, 'rhs': rhs} + return lhs, remaining + + return marker(marker_string) + + +def parse_requirement(req): + """ + Parse a requirement passed in as a string. Return a Container + whose attributes contain the various parts of the requirement. + """ + remaining = req.strip() + if not remaining or remaining.startswith('#'): + return None + m = IDENTIFIER.match(remaining) + if not m: + raise SyntaxError('name expected: %s' % remaining) + distname = m.groups()[0] + remaining = remaining[m.end():] + extras = mark_expr = versions = uri = None + if remaining and remaining[0] == '[': + i = remaining.find(']', 1) + if i < 0: + raise SyntaxError('unterminated extra: %s' % remaining) + s = remaining[1:i] + remaining = remaining[i + 1:].lstrip() + extras = [] + while s: + m = IDENTIFIER.match(s) + if not m: + raise SyntaxError('malformed extra: %s' % s) + extras.append(m.groups()[0]) + s = s[m.end():] + if not s: + break + if s[0] != ',': + raise SyntaxError('comma expected in extras: %s' % s) + s = s[1:].lstrip() + if not extras: + extras = None + if remaining: + if remaining[0] == '@': + # it's a URI + remaining = remaining[1:].lstrip() + m = NON_SPACE.match(remaining) + if not m: + raise SyntaxError('invalid URI: %s' % remaining) + uri = m.groups()[0] + t = urlparse(uri) + # there are issues with Python and URL parsing, so this test + # is a bit crude. See bpo-20271, bpo-23505. Python doesn't + # always parse invalid URLs correctly - it should raise + # exceptions for malformed URLs + if not (t.scheme and t.netloc): + raise SyntaxError('Invalid URL: %s' % uri) + remaining = remaining[m.end():].lstrip() + else: + + def get_versions(ver_remaining): + """ + Return a list of operator, version tuples if any are + specified, else None. + """ + m = COMPARE_OP.match(ver_remaining) + versions = None + if m: + versions = [] + while True: + op = m.groups()[0] + ver_remaining = ver_remaining[m.end():] + m = VERSION_IDENTIFIER.match(ver_remaining) + if not m: + raise SyntaxError('invalid version: %s' % ver_remaining) + v = m.groups()[0] + versions.append((op, v)) + ver_remaining = ver_remaining[m.end():] + if not ver_remaining or ver_remaining[0] != ',': + break + ver_remaining = ver_remaining[1:].lstrip() + m = COMPARE_OP.match(ver_remaining) + if not m: + raise SyntaxError('invalid constraint: %s' % ver_remaining) + if not versions: + versions = None + return versions, ver_remaining + + if remaining[0] != '(': + versions, remaining = get_versions(remaining) + else: + i = remaining.find(')', 1) + if i < 0: + raise SyntaxError('unterminated parenthesis: %s' % remaining) + s = remaining[1:i] + remaining = remaining[i + 1:].lstrip() + # As a special diversion from PEP 508, allow a version number + # a.b.c in parentheses as a synonym for ~= a.b.c (because this + # is allowed in earlier PEPs) + if COMPARE_OP.match(s): + versions, _ = get_versions(s) + else: + m = VERSION_IDENTIFIER.match(s) + if not m: + raise SyntaxError('invalid constraint: %s' % s) + v = m.groups()[0] + s = s[m.end():].lstrip() + if s: + raise SyntaxError('invalid constraint: %s' % s) + versions = [('~=', v)] + + if remaining: + if remaining[0] != ';': + raise SyntaxError('invalid requirement: %s' % remaining) + remaining = remaining[1:].lstrip() + + mark_expr, remaining = parse_marker(remaining) + + if remaining and remaining[0] != '#': + raise SyntaxError('unexpected trailing data: %s' % remaining) + + if not versions: + rs = distname + else: + rs = '%s %s' % (distname, ', '.join(['%s %s' % con for con in versions])) + return Container(name=distname, extras=extras, constraints=versions, + marker=mark_expr, url=uri, requirement=rs) + + +def get_resources_dests(resources_root, rules): + """Find destinations for resources files""" + + def get_rel_path(root, path): + # normalizes and returns a lstripped-/-separated path + root = root.replace(os.path.sep, '/') + path = path.replace(os.path.sep, '/') + assert path.startswith(root) + return path[len(root):].lstrip('/') + + destinations = {} + for base, suffix, dest in rules: + prefix = os.path.join(resources_root, base) + for abs_base in iglob(prefix): + abs_glob = os.path.join(abs_base, suffix) + for abs_path in iglob(abs_glob): + resource_file = get_rel_path(resources_root, abs_path) + if dest is None: # remove the entry if it was here + destinations.pop(resource_file, None) + else: + rel_path = get_rel_path(abs_base, abs_path) + rel_dest = dest.replace(os.path.sep, '/').rstrip('/') + destinations[resource_file] = rel_dest + '/' + rel_path + return destinations + + +def in_venv(): + if hasattr(sys, 'real_prefix'): + # virtualenv venvs + result = True + else: + # PEP 405 venvs + result = sys.prefix != getattr(sys, 'base_prefix', sys.prefix) + return result + + +def get_executable(): +# The __PYVENV_LAUNCHER__ dance is apparently no longer needed, as +# changes to the stub launcher mean that sys.executable always points +# to the stub on OS X +# if sys.platform == 'darwin' and ('__PYVENV_LAUNCHER__' +# in os.environ): +# result = os.environ['__PYVENV_LAUNCHER__'] +# else: +# result = sys.executable +# return result + result = os.path.normcase(sys.executable) + if not isinstance(result, text_type): + result = fsdecode(result) + return result + + +def proceed(prompt, allowed_chars, error_prompt=None, default=None): + p = prompt + while True: + s = raw_input(p) + p = prompt + if not s and default: + s = default + if s: + c = s[0].lower() + if c in allowed_chars: + break + if error_prompt: + p = '%c: %s\n%s' % (c, error_prompt, prompt) + return c + + +def extract_by_key(d, keys): + if isinstance(keys, string_types): + keys = keys.split() + result = {} + for key in keys: + if key in d: + result[key] = d[key] + return result + +def read_exports(stream): + if sys.version_info[0] >= 3: + # needs to be a text stream + stream = codecs.getreader('utf-8')(stream) + # Try to load as JSON, falling back on legacy format + data = stream.read() + stream = StringIO(data) + try: + jdata = json.load(stream) + result = jdata['extensions']['python.exports']['exports'] + for group, entries in result.items(): + for k, v in entries.items(): + s = '%s = %s' % (k, v) + entry = get_export_entry(s) + assert entry is not None + entries[k] = entry + return result + except Exception: + stream.seek(0, 0) + + def read_stream(cp, stream): + if hasattr(cp, 'read_file'): + cp.read_file(stream) + else: + cp.readfp(stream) + + cp = configparser.ConfigParser() + try: + read_stream(cp, stream) + except configparser.MissingSectionHeaderError: + stream.close() + data = textwrap.dedent(data) + stream = StringIO(data) + read_stream(cp, stream) + + result = {} + for key in cp.sections(): + result[key] = entries = {} + for name, value in cp.items(key): + s = '%s = %s' % (name, value) + entry = get_export_entry(s) + assert entry is not None + #entry.dist = self + entries[name] = entry + return result + + +def write_exports(exports, stream): + if sys.version_info[0] >= 3: + # needs to be a text stream + stream = codecs.getwriter('utf-8')(stream) + cp = configparser.ConfigParser() + for k, v in exports.items(): + # TODO check k, v for valid values + cp.add_section(k) + for entry in v.values(): + if entry.suffix is None: + s = entry.prefix + else: + s = '%s:%s' % (entry.prefix, entry.suffix) + if entry.flags: + s = '%s [%s]' % (s, ', '.join(entry.flags)) + cp.set(k, entry.name, s) + cp.write(stream) + + +@contextlib.contextmanager +def tempdir(): + td = tempfile.mkdtemp() + try: + yield td + finally: + shutil.rmtree(td) + +@contextlib.contextmanager +def chdir(d): + cwd = os.getcwd() + try: + os.chdir(d) + yield + finally: + os.chdir(cwd) + + +@contextlib.contextmanager +def socket_timeout(seconds=15): + cto = socket.getdefaulttimeout() + try: + socket.setdefaulttimeout(seconds) + yield + finally: + socket.setdefaulttimeout(cto) + + +class cached_property(object): + def __init__(self, func): + self.func = func + #for attr in ('__name__', '__module__', '__doc__'): + # setattr(self, attr, getattr(func, attr, None)) + + def __get__(self, obj, cls=None): + if obj is None: + return self + value = self.func(obj) + object.__setattr__(obj, self.func.__name__, value) + #obj.__dict__[self.func.__name__] = value = self.func(obj) + return value + +def convert_path(pathname): + """Return 'pathname' as a name that will work on the native filesystem. + + The path is split on '/' and put back together again using the current + directory separator. Needed because filenames in the setup script are + always supplied in Unix style, and have to be converted to the local + convention before we can actually use them in the filesystem. Raises + ValueError on non-Unix-ish systems if 'pathname' either starts or + ends with a slash. + """ + if os.sep == '/': + return pathname + if not pathname: + return pathname + if pathname[0] == '/': + raise ValueError("path '%s' cannot be absolute" % pathname) + if pathname[-1] == '/': + raise ValueError("path '%s' cannot end with '/'" % pathname) + + paths = pathname.split('/') + while os.curdir in paths: + paths.remove(os.curdir) + if not paths: + return os.curdir + return os.path.join(*paths) + + +class FileOperator(object): + def __init__(self, dry_run=False): + self.dry_run = dry_run + self.ensured = set() + self._init_record() + + def _init_record(self): + self.record = False + self.files_written = set() + self.dirs_created = set() + + def record_as_written(self, path): + if self.record: + self.files_written.add(path) + + def newer(self, source, target): + """Tell if the target is newer than the source. + + Returns true if 'source' exists and is more recently modified than + 'target', or if 'source' exists and 'target' doesn't. + + Returns false if both exist and 'target' is the same age or younger + than 'source'. Raise PackagingFileError if 'source' does not exist. + + Note that this test is not very accurate: files created in the same + second will have the same "age". + """ + if not os.path.exists(source): + raise DistlibException("file '%r' does not exist" % + os.path.abspath(source)) + if not os.path.exists(target): + return True + + return os.stat(source).st_mtime > os.stat(target).st_mtime + + def copy_file(self, infile, outfile, check=True): + """Copy a file respecting dry-run and force flags. + """ + self.ensure_dir(os.path.dirname(outfile)) + logger.info('Copying %s to %s', infile, outfile) + if not self.dry_run: + msg = None + if check: + if os.path.islink(outfile): + msg = '%s is a symlink' % outfile + elif os.path.exists(outfile) and not os.path.isfile(outfile): + msg = '%s is a non-regular file' % outfile + if msg: + raise ValueError(msg + ' which would be overwritten') + shutil.copyfile(infile, outfile) + self.record_as_written(outfile) + + def copy_stream(self, instream, outfile, encoding=None): + assert not os.path.isdir(outfile) + self.ensure_dir(os.path.dirname(outfile)) + logger.info('Copying stream %s to %s', instream, outfile) + if not self.dry_run: + if encoding is None: + outstream = open(outfile, 'wb') + else: + outstream = codecs.open(outfile, 'w', encoding=encoding) + try: + shutil.copyfileobj(instream, outstream) + finally: + outstream.close() + self.record_as_written(outfile) + + def write_binary_file(self, path, data): + self.ensure_dir(os.path.dirname(path)) + if not self.dry_run: + if os.path.exists(path): + os.remove(path) + with open(path, 'wb') as f: + f.write(data) + self.record_as_written(path) + + def write_text_file(self, path, data, encoding): + self.write_binary_file(path, data.encode(encoding)) + + def set_mode(self, bits, mask, files): + if os.name == 'posix' or (os.name == 'java' and os._name == 'posix'): + # Set the executable bits (owner, group, and world) on + # all the files specified. + for f in files: + if self.dry_run: + logger.info("changing mode of %s", f) + else: + mode = (os.stat(f).st_mode | bits) & mask + logger.info("changing mode of %s to %o", f, mode) + os.chmod(f, mode) + + set_executable_mode = lambda s, f: s.set_mode(0o555, 0o7777, f) + + def ensure_dir(self, path): + path = os.path.abspath(path) + if path not in self.ensured and not os.path.exists(path): + self.ensured.add(path) + d, f = os.path.split(path) + self.ensure_dir(d) + logger.info('Creating %s' % path) + if not self.dry_run: + os.mkdir(path) + if self.record: + self.dirs_created.add(path) + + def byte_compile(self, path, optimize=False, force=False, prefix=None, hashed_invalidation=False): + dpath = cache_from_source(path, not optimize) + logger.info('Byte-compiling %s to %s', path, dpath) + if not self.dry_run: + if force or self.newer(path, dpath): + if not prefix: + diagpath = None + else: + assert path.startswith(prefix) + diagpath = path[len(prefix):] + compile_kwargs = {} + if hashed_invalidation and hasattr(py_compile, 'PycInvalidationMode'): + compile_kwargs['invalidation_mode'] = py_compile.PycInvalidationMode.CHECKED_HASH + py_compile.compile(path, dpath, diagpath, True, **compile_kwargs) # raise error + self.record_as_written(dpath) + return dpath + + def ensure_removed(self, path): + if os.path.exists(path): + if os.path.isdir(path) and not os.path.islink(path): + logger.debug('Removing directory tree at %s', path) + if not self.dry_run: + shutil.rmtree(path) + if self.record: + if path in self.dirs_created: + self.dirs_created.remove(path) + else: + if os.path.islink(path): + s = 'link' + else: + s = 'file' + logger.debug('Removing %s %s', s, path) + if not self.dry_run: + os.remove(path) + if self.record: + if path in self.files_written: + self.files_written.remove(path) + + def is_writable(self, path): + result = False + while not result: + if os.path.exists(path): + result = os.access(path, os.W_OK) + break + parent = os.path.dirname(path) + if parent == path: + break + path = parent + return result + + def commit(self): + """ + Commit recorded changes, turn off recording, return + changes. + """ + assert self.record + result = self.files_written, self.dirs_created + self._init_record() + return result + + def rollback(self): + if not self.dry_run: + for f in list(self.files_written): + if os.path.exists(f): + os.remove(f) + # dirs should all be empty now, except perhaps for + # __pycache__ subdirs + # reverse so that subdirs appear before their parents + dirs = sorted(self.dirs_created, reverse=True) + for d in dirs: + flist = os.listdir(d) + if flist: + assert flist == ['__pycache__'] + sd = os.path.join(d, flist[0]) + os.rmdir(sd) + os.rmdir(d) # should fail if non-empty + self._init_record() + +def resolve(module_name, dotted_path): + if module_name in sys.modules: + mod = sys.modules[module_name] + else: + mod = __import__(module_name) + if dotted_path is None: + result = mod + else: + parts = dotted_path.split('.') + result = getattr(mod, parts.pop(0)) + for p in parts: + result = getattr(result, p) + return result + + +class ExportEntry(object): + def __init__(self, name, prefix, suffix, flags): + self.name = name + self.prefix = prefix + self.suffix = suffix + self.flags = flags + + @cached_property + def value(self): + return resolve(self.prefix, self.suffix) + + def __repr__(self): # pragma: no cover + return '' % (self.name, self.prefix, + self.suffix, self.flags) + + def __eq__(self, other): + if not isinstance(other, ExportEntry): + result = False + else: + result = (self.name == other.name and + self.prefix == other.prefix and + self.suffix == other.suffix and + self.flags == other.flags) + return result + + __hash__ = object.__hash__ + + +ENTRY_RE = re.compile(r'''(?P(\w|[-.+])+) + \s*=\s*(?P(\w+)([:\.]\w+)*) + \s*(\[\s*(?P[\w-]+(=\w+)?(,\s*\w+(=\w+)?)*)\s*\])? + ''', re.VERBOSE) + +def get_export_entry(specification): + m = ENTRY_RE.search(specification) + if not m: + result = None + if '[' in specification or ']' in specification: + raise DistlibException("Invalid specification " + "'%s'" % specification) + else: + d = m.groupdict() + name = d['name'] + path = d['callable'] + colons = path.count(':') + if colons == 0: + prefix, suffix = path, None + else: + if colons != 1: + raise DistlibException("Invalid specification " + "'%s'" % specification) + prefix, suffix = path.split(':') + flags = d['flags'] + if flags is None: + if '[' in specification or ']' in specification: + raise DistlibException("Invalid specification " + "'%s'" % specification) + flags = [] + else: + flags = [f.strip() for f in flags.split(',')] + result = ExportEntry(name, prefix, suffix, flags) + return result + + +def get_cache_base(suffix=None): + """ + Return the default base location for distlib caches. If the directory does + not exist, it is created. Use the suffix provided for the base directory, + and default to '.distlib' if it isn't provided. + + On Windows, if LOCALAPPDATA is defined in the environment, then it is + assumed to be a directory, and will be the parent directory of the result. + On POSIX, and on Windows if LOCALAPPDATA is not defined, the user's home + directory - using os.expanduser('~') - will be the parent directory of + the result. + + The result is just the directory '.distlib' in the parent directory as + determined above, or with the name specified with ``suffix``. + """ + if suffix is None: + suffix = '.distlib' + if os.name == 'nt' and 'LOCALAPPDATA' in os.environ: + result = os.path.expandvars('$localappdata') + else: + # Assume posix, or old Windows + result = os.path.expanduser('~') + # we use 'isdir' instead of 'exists', because we want to + # fail if there's a file with that name + if os.path.isdir(result): + usable = os.access(result, os.W_OK) + if not usable: + logger.warning('Directory exists but is not writable: %s', result) + else: + try: + os.makedirs(result) + usable = True + except OSError: + logger.warning('Unable to create %s', result, exc_info=True) + usable = False + if not usable: + result = tempfile.mkdtemp() + logger.warning('Default location unusable, using %s', result) + return os.path.join(result, suffix) + + +def path_to_cache_dir(path): + """ + Convert an absolute path to a directory name for use in a cache. + + The algorithm used is: + + #. On Windows, any ``':'`` in the drive is replaced with ``'---'``. + #. Any occurrence of ``os.sep`` is replaced with ``'--'``. + #. ``'.cache'`` is appended. + """ + d, p = os.path.splitdrive(os.path.abspath(path)) + if d: + d = d.replace(':', '---') + p = p.replace(os.sep, '--') + return d + p + '.cache' + + +def ensure_slash(s): + if not s.endswith('/'): + return s + '/' + return s + + +def parse_credentials(netloc): + username = password = None + if '@' in netloc: + prefix, netloc = netloc.rsplit('@', 1) + if ':' not in prefix: + username = prefix + else: + username, password = prefix.split(':', 1) + if username: + username = unquote(username) + if password: + password = unquote(password) + return username, password, netloc + + +def get_process_umask(): + result = os.umask(0o22) + os.umask(result) + return result + +def is_string_sequence(seq): + result = True + i = None + for i, s in enumerate(seq): + if not isinstance(s, string_types): + result = False + break + assert i is not None + return result + +PROJECT_NAME_AND_VERSION = re.compile('([a-z0-9_]+([.-][a-z_][a-z0-9_]*)*)-' + '([a-z0-9_.+-]+)', re.I) +PYTHON_VERSION = re.compile(r'-py(\d\.?\d?)') + + +def split_filename(filename, project_name=None): + """ + Extract name, version, python version from a filename (no extension) + + Return name, version, pyver or None + """ + result = None + pyver = None + filename = unquote(filename).replace(' ', '-') + m = PYTHON_VERSION.search(filename) + if m: + pyver = m.group(1) + filename = filename[:m.start()] + if project_name and len(filename) > len(project_name) + 1: + m = re.match(re.escape(project_name) + r'\b', filename) + if m: + n = m.end() + result = filename[:n], filename[n + 1:], pyver + if result is None: + m = PROJECT_NAME_AND_VERSION.match(filename) + if m: + result = m.group(1), m.group(3), pyver + return result + +# Allow spaces in name because of legacy dists like "Twisted Core" +NAME_VERSION_RE = re.compile(r'(?P[\w .-]+)\s*' + r'\(\s*(?P[^\s)]+)\)$') + +def parse_name_and_version(p): + """ + A utility method used to get name and version from a string. + + From e.g. a Provides-Dist value. + + :param p: A value in a form 'foo (1.0)' + :return: The name and version as a tuple. + """ + m = NAME_VERSION_RE.match(p) + if not m: + raise DistlibException('Ill-formed name/version string: \'%s\'' % p) + d = m.groupdict() + return d['name'].strip().lower(), d['ver'] + +def get_extras(requested, available): + result = set() + requested = set(requested or []) + available = set(available or []) + if '*' in requested: + requested.remove('*') + result |= available + for r in requested: + if r == '-': + result.add(r) + elif r.startswith('-'): + unwanted = r[1:] + if unwanted not in available: + logger.warning('undeclared extra: %s' % unwanted) + if unwanted in result: + result.remove(unwanted) + else: + if r not in available: + logger.warning('undeclared extra: %s' % r) + result.add(r) + return result +# +# Extended metadata functionality +# + +def _get_external_data(url): + result = {} + try: + # urlopen might fail if it runs into redirections, + # because of Python issue #13696. Fixed in locators + # using a custom redirect handler. + resp = urlopen(url) + headers = resp.info() + ct = headers.get('Content-Type') + if not ct.startswith('application/json'): + logger.debug('Unexpected response for JSON request: %s', ct) + else: + reader = codecs.getreader('utf-8')(resp) + #data = reader.read().decode('utf-8') + #result = json.loads(data) + result = json.load(reader) + except Exception as e: + logger.exception('Failed to get external data for %s: %s', url, e) + return result + +_external_data_base_url = 'https://www.red-dove.com/pypi/projects/' + +def get_project_data(name): + url = '%s/%s/project.json' % (name[0].upper(), name) + url = urljoin(_external_data_base_url, url) + result = _get_external_data(url) + return result + +def get_package_data(name, version): + url = '%s/%s/package-%s.json' % (name[0].upper(), name, version) + url = urljoin(_external_data_base_url, url) + return _get_external_data(url) + + +class Cache(object): + """ + A class implementing a cache for resources that need to live in the file system + e.g. shared libraries. This class was moved from resources to here because it + could be used by other modules, e.g. the wheel module. + """ + + def __init__(self, base): + """ + Initialise an instance. + + :param base: The base directory where the cache should be located. + """ + # we use 'isdir' instead of 'exists', because we want to + # fail if there's a file with that name + if not os.path.isdir(base): # pragma: no cover + os.makedirs(base) + if (os.stat(base).st_mode & 0o77) != 0: + logger.warning('Directory \'%s\' is not private', base) + self.base = os.path.abspath(os.path.normpath(base)) + + def prefix_to_dir(self, prefix): + """ + Converts a resource prefix to a directory name in the cache. + """ + return path_to_cache_dir(prefix) + + def clear(self): + """ + Clear the cache. + """ + not_removed = [] + for fn in os.listdir(self.base): + fn = os.path.join(self.base, fn) + try: + if os.path.islink(fn) or os.path.isfile(fn): + os.remove(fn) + elif os.path.isdir(fn): + shutil.rmtree(fn) + except Exception: + not_removed.append(fn) + return not_removed + + +class EventMixin(object): + """ + A very simple publish/subscribe system. + """ + def __init__(self): + self._subscribers = {} + + def add(self, event, subscriber, append=True): + """ + Add a subscriber for an event. + + :param event: The name of an event. + :param subscriber: The subscriber to be added (and called when the + event is published). + :param append: Whether to append or prepend the subscriber to an + existing subscriber list for the event. + """ + subs = self._subscribers + if event not in subs: + subs[event] = deque([subscriber]) + else: + sq = subs[event] + if append: + sq.append(subscriber) + else: + sq.appendleft(subscriber) + + def remove(self, event, subscriber): + """ + Remove a subscriber for an event. + + :param event: The name of an event. + :param subscriber: The subscriber to be removed. + """ + subs = self._subscribers + if event not in subs: + raise ValueError('No subscribers: %r' % event) + subs[event].remove(subscriber) + + def get_subscribers(self, event): + """ + Return an iterator for the subscribers for an event. + :param event: The event to return subscribers for. + """ + return iter(self._subscribers.get(event, ())) + + def publish(self, event, *args, **kwargs): + """ + Publish a event and return a list of values returned by its + subscribers. + + :param event: The event to publish. + :param args: The positional arguments to pass to the event's + subscribers. + :param kwargs: The keyword arguments to pass to the event's + subscribers. + """ + result = [] + for subscriber in self.get_subscribers(event): + try: + value = subscriber(event, *args, **kwargs) + except Exception: + logger.exception('Exception during event publication') + value = None + result.append(value) + logger.debug('publish %s: args = %s, kwargs = %s, result = %s', + event, args, kwargs, result) + return result + +# +# Simple sequencing +# +class Sequencer(object): + def __init__(self): + self._preds = {} + self._succs = {} + self._nodes = set() # nodes with no preds/succs + + def add_node(self, node): + self._nodes.add(node) + + def remove_node(self, node, edges=False): + if node in self._nodes: + self._nodes.remove(node) + if edges: + for p in set(self._preds.get(node, ())): + self.remove(p, node) + for s in set(self._succs.get(node, ())): + self.remove(node, s) + # Remove empties + for k, v in list(self._preds.items()): + if not v: + del self._preds[k] + for k, v in list(self._succs.items()): + if not v: + del self._succs[k] + + def add(self, pred, succ): + assert pred != succ + self._preds.setdefault(succ, set()).add(pred) + self._succs.setdefault(pred, set()).add(succ) + + def remove(self, pred, succ): + assert pred != succ + try: + preds = self._preds[succ] + succs = self._succs[pred] + except KeyError: # pragma: no cover + raise ValueError('%r not a successor of anything' % succ) + try: + preds.remove(pred) + succs.remove(succ) + except KeyError: # pragma: no cover + raise ValueError('%r not a successor of %r' % (succ, pred)) + + def is_step(self, step): + return (step in self._preds or step in self._succs or + step in self._nodes) + + def get_steps(self, final): + if not self.is_step(final): + raise ValueError('Unknown: %r' % final) + result = [] + todo = [] + seen = set() + todo.append(final) + while todo: + step = todo.pop(0) + if step in seen: + # if a step was already seen, + # move it to the end (so it will appear earlier + # when reversed on return) ... but not for the + # final step, as that would be confusing for + # users + if step != final: + result.remove(step) + result.append(step) + else: + seen.add(step) + result.append(step) + preds = self._preds.get(step, ()) + todo.extend(preds) + return reversed(result) + + @property + def strong_connections(self): + #http://en.wikipedia.org/wiki/Tarjan%27s_strongly_connected_components_algorithm + index_counter = [0] + stack = [] + lowlinks = {} + index = {} + result = [] + + graph = self._succs + + def strongconnect(node): + # set the depth index for this node to the smallest unused index + index[node] = index_counter[0] + lowlinks[node] = index_counter[0] + index_counter[0] += 1 + stack.append(node) + + # Consider successors + try: + successors = graph[node] + except Exception: + successors = [] + for successor in successors: + if successor not in lowlinks: + # Successor has not yet been visited + strongconnect(successor) + lowlinks[node] = min(lowlinks[node],lowlinks[successor]) + elif successor in stack: + # the successor is in the stack and hence in the current + # strongly connected component (SCC) + lowlinks[node] = min(lowlinks[node],index[successor]) + + # If `node` is a root node, pop the stack and generate an SCC + if lowlinks[node] == index[node]: + connected_component = [] + + while True: + successor = stack.pop() + connected_component.append(successor) + if successor == node: break + component = tuple(connected_component) + # storing the result + result.append(component) + + for node in graph: + if node not in lowlinks: + strongconnect(node) + + return result + + @property + def dot(self): + result = ['digraph G {'] + for succ in self._preds: + preds = self._preds[succ] + for pred in preds: + result.append(' %s -> %s;' % (pred, succ)) + for node in self._nodes: + result.append(' %s;' % node) + result.append('}') + return '\n'.join(result) + +# +# Unarchiving functionality for zip, tar, tgz, tbz, whl +# + +ARCHIVE_EXTENSIONS = ('.tar.gz', '.tar.bz2', '.tar', '.zip', + '.tgz', '.tbz', '.whl') + +def unarchive(archive_filename, dest_dir, format=None, check=True): + + def check_path(path): + if not isinstance(path, text_type): + path = path.decode('utf-8') + p = os.path.abspath(os.path.join(dest_dir, path)) + if not p.startswith(dest_dir) or p[plen] != os.sep: + raise ValueError('path outside destination: %r' % p) + + dest_dir = os.path.abspath(dest_dir) + plen = len(dest_dir) + archive = None + if format is None: + if archive_filename.endswith(('.zip', '.whl')): + format = 'zip' + elif archive_filename.endswith(('.tar.gz', '.tgz')): + format = 'tgz' + mode = 'r:gz' + elif archive_filename.endswith(('.tar.bz2', '.tbz')): + format = 'tbz' + mode = 'r:bz2' + elif archive_filename.endswith('.tar'): + format = 'tar' + mode = 'r' + else: # pragma: no cover + raise ValueError('Unknown format for %r' % archive_filename) + try: + if format == 'zip': + archive = ZipFile(archive_filename, 'r') + if check: + names = archive.namelist() + for name in names: + check_path(name) + else: + archive = tarfile.open(archive_filename, mode) + if check: + names = archive.getnames() + for name in names: + check_path(name) + if format != 'zip' and sys.version_info[0] < 3: + # See Python issue 17153. If the dest path contains Unicode, + # tarfile extraction fails on Python 2.x if a member path name + # contains non-ASCII characters - it leads to an implicit + # bytes -> unicode conversion using ASCII to decode. + for tarinfo in archive.getmembers(): + if not isinstance(tarinfo.name, text_type): + tarinfo.name = tarinfo.name.decode('utf-8') + archive.extractall(dest_dir) + + finally: + if archive: + archive.close() + + +def zip_dir(directory): + """zip a directory tree into a BytesIO object""" + result = io.BytesIO() + dlen = len(directory) + with ZipFile(result, "w") as zf: + for root, dirs, files in os.walk(directory): + for name in files: + full = os.path.join(root, name) + rel = root[dlen:] + dest = os.path.join(rel, name) + zf.write(full, dest) + return result + +# +# Simple progress bar +# + +UNITS = ('', 'K', 'M', 'G','T','P') + + +class Progress(object): + unknown = 'UNKNOWN' + + def __init__(self, minval=0, maxval=100): + assert maxval is None or maxval >= minval + self.min = self.cur = minval + self.max = maxval + self.started = None + self.elapsed = 0 + self.done = False + + def update(self, curval): + assert self.min <= curval + assert self.max is None or curval <= self.max + self.cur = curval + now = time.time() + if self.started is None: + self.started = now + else: + self.elapsed = now - self.started + + def increment(self, incr): + assert incr >= 0 + self.update(self.cur + incr) + + def start(self): + self.update(self.min) + return self + + def stop(self): + if self.max is not None: + self.update(self.max) + self.done = True + + @property + def maximum(self): + return self.unknown if self.max is None else self.max + + @property + def percentage(self): + if self.done: + result = '100 %' + elif self.max is None: + result = ' ?? %' + else: + v = 100.0 * (self.cur - self.min) / (self.max - self.min) + result = '%3d %%' % v + return result + + def format_duration(self, duration): + if (duration <= 0) and self.max is None or self.cur == self.min: + result = '??:??:??' + #elif duration < 1: + # result = '--:--:--' + else: + result = time.strftime('%H:%M:%S', time.gmtime(duration)) + return result + + @property + def ETA(self): + if self.done: + prefix = 'Done' + t = self.elapsed + #import pdb; pdb.set_trace() + else: + prefix = 'ETA ' + if self.max is None: + t = -1 + elif self.elapsed == 0 or (self.cur == self.min): + t = 0 + else: + #import pdb; pdb.set_trace() + t = float(self.max - self.min) + t /= self.cur - self.min + t = (t - 1) * self.elapsed + return '%s: %s' % (prefix, self.format_duration(t)) + + @property + def speed(self): + if self.elapsed == 0: + result = 0.0 + else: + result = (self.cur - self.min) / self.elapsed + for unit in UNITS: + if result < 1000: + break + result /= 1000.0 + return '%d %sB/s' % (result, unit) + +# +# Glob functionality +# + +RICH_GLOB = re.compile(r'\{([^}]*)\}') +_CHECK_RECURSIVE_GLOB = re.compile(r'[^/\\,{]\*\*|\*\*[^/\\,}]') +_CHECK_MISMATCH_SET = re.compile(r'^[^{]*\}|\{[^}]*$') + + +def iglob(path_glob): + """Extended globbing function that supports ** and {opt1,opt2,opt3}.""" + if _CHECK_RECURSIVE_GLOB.search(path_glob): + msg = """invalid glob %r: recursive glob "**" must be used alone""" + raise ValueError(msg % path_glob) + if _CHECK_MISMATCH_SET.search(path_glob): + msg = """invalid glob %r: mismatching set marker '{' or '}'""" + raise ValueError(msg % path_glob) + return _iglob(path_glob) + + +def _iglob(path_glob): + rich_path_glob = RICH_GLOB.split(path_glob, 1) + if len(rich_path_glob) > 1: + assert len(rich_path_glob) == 3, rich_path_glob + prefix, set, suffix = rich_path_glob + for item in set.split(','): + for path in _iglob(''.join((prefix, item, suffix))): + yield path + else: + if '**' not in path_glob: + for item in std_iglob(path_glob): + yield item + else: + prefix, radical = path_glob.split('**', 1) + if prefix == '': + prefix = '.' + if radical == '': + radical = '*' + else: + # we support both + radical = radical.lstrip('/') + radical = radical.lstrip('\\') + for path, dir, files in os.walk(prefix): + path = os.path.normpath(path) + for fn in _iglob(os.path.join(path, radical)): + yield fn + +if ssl: + from .compat import (HTTPSHandler as BaseHTTPSHandler, match_hostname, + CertificateError) + + +# +# HTTPSConnection which verifies certificates/matches domains +# + + class HTTPSConnection(httplib.HTTPSConnection): + ca_certs = None # set this to the path to the certs file (.pem) + check_domain = True # only used if ca_certs is not None + + # noinspection PyPropertyAccess + def connect(self): + sock = socket.create_connection((self.host, self.port), self.timeout) + if getattr(self, '_tunnel_host', False): + self.sock = sock + self._tunnel() + + if not hasattr(ssl, 'SSLContext'): + # For 2.x + if self.ca_certs: + cert_reqs = ssl.CERT_REQUIRED + else: + cert_reqs = ssl.CERT_NONE + self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file, + cert_reqs=cert_reqs, + ssl_version=ssl.PROTOCOL_SSLv23, + ca_certs=self.ca_certs) + else: # pragma: no cover + context = ssl.SSLContext(ssl.PROTOCOL_SSLv23) + if hasattr(ssl, 'OP_NO_SSLv2'): + context.options |= ssl.OP_NO_SSLv2 + if self.cert_file: + context.load_cert_chain(self.cert_file, self.key_file) + kwargs = {} + if self.ca_certs: + context.verify_mode = ssl.CERT_REQUIRED + context.load_verify_locations(cafile=self.ca_certs) + if getattr(ssl, 'HAS_SNI', False): + kwargs['server_hostname'] = self.host + self.sock = context.wrap_socket(sock, **kwargs) + if self.ca_certs and self.check_domain: + try: + match_hostname(self.sock.getpeercert(), self.host) + logger.debug('Host verified: %s', self.host) + except CertificateError: # pragma: no cover + self.sock.shutdown(socket.SHUT_RDWR) + self.sock.close() + raise + + class HTTPSHandler(BaseHTTPSHandler): + def __init__(self, ca_certs, check_domain=True): + BaseHTTPSHandler.__init__(self) + self.ca_certs = ca_certs + self.check_domain = check_domain + + def _conn_maker(self, *args, **kwargs): + """ + This is called to create a connection instance. Normally you'd + pass a connection class to do_open, but it doesn't actually check for + a class, and just expects a callable. As long as we behave just as a + constructor would have, we should be OK. If it ever changes so that + we *must* pass a class, we'll create an UnsafeHTTPSConnection class + which just sets check_domain to False in the class definition, and + choose which one to pass to do_open. + """ + result = HTTPSConnection(*args, **kwargs) + if self.ca_certs: + result.ca_certs = self.ca_certs + result.check_domain = self.check_domain + return result + + def https_open(self, req): + try: + return self.do_open(self._conn_maker, req) + except URLError as e: + if 'certificate verify failed' in str(e.reason): + raise CertificateError('Unable to verify server certificate ' + 'for %s' % req.host) + else: + raise + + # + # To prevent against mixing HTTP traffic with HTTPS (examples: A Man-In-The- + # Middle proxy using HTTP listens on port 443, or an index mistakenly serves + # HTML containing a http://xyz link when it should be https://xyz), + # you can use the following handler class, which does not allow HTTP traffic. + # + # It works by inheriting from HTTPHandler - so build_opener won't add a + # handler for HTTP itself. + # + class HTTPSOnlyHandler(HTTPSHandler, HTTPHandler): + def http_open(self, req): + raise URLError('Unexpected HTTP request on what should be a secure ' + 'connection: %s' % req) + +# +# XML-RPC with timeouts +# + +_ver_info = sys.version_info[:2] + +if _ver_info == (2, 6): + class HTTP(httplib.HTTP): + def __init__(self, host='', port=None, **kwargs): + if port == 0: # 0 means use port 0, not the default port + port = None + self._setup(self._connection_class(host, port, **kwargs)) + + + if ssl: + class HTTPS(httplib.HTTPS): + def __init__(self, host='', port=None, **kwargs): + if port == 0: # 0 means use port 0, not the default port + port = None + self._setup(self._connection_class(host, port, **kwargs)) + + +class Transport(xmlrpclib.Transport): + def __init__(self, timeout, use_datetime=0): + self.timeout = timeout + xmlrpclib.Transport.__init__(self, use_datetime) + + def make_connection(self, host): + h, eh, x509 = self.get_host_info(host) + if _ver_info == (2, 6): + result = HTTP(h, timeout=self.timeout) + else: + if not self._connection or host != self._connection[0]: + self._extra_headers = eh + self._connection = host, httplib.HTTPConnection(h) + result = self._connection[1] + return result + +if ssl: + class SafeTransport(xmlrpclib.SafeTransport): + def __init__(self, timeout, use_datetime=0): + self.timeout = timeout + xmlrpclib.SafeTransport.__init__(self, use_datetime) + + def make_connection(self, host): + h, eh, kwargs = self.get_host_info(host) + if not kwargs: + kwargs = {} + kwargs['timeout'] = self.timeout + if _ver_info == (2, 6): + result = HTTPS(host, None, **kwargs) + else: + if not self._connection or host != self._connection[0]: + self._extra_headers = eh + self._connection = host, httplib.HTTPSConnection(h, None, + **kwargs) + result = self._connection[1] + return result + + +class ServerProxy(xmlrpclib.ServerProxy): + def __init__(self, uri, **kwargs): + self.timeout = timeout = kwargs.pop('timeout', None) + # The above classes only come into play if a timeout + # is specified + if timeout is not None: + scheme, _ = splittype(uri) + use_datetime = kwargs.get('use_datetime', 0) + if scheme == 'https': + tcls = SafeTransport + else: + tcls = Transport + kwargs['transport'] = t = tcls(timeout, use_datetime=use_datetime) + self.transport = t + xmlrpclib.ServerProxy.__init__(self, uri, **kwargs) + +# +# CSV functionality. This is provided because on 2.x, the csv module can't +# handle Unicode. However, we need to deal with Unicode in e.g. RECORD files. +# + +def _csv_open(fn, mode, **kwargs): + if sys.version_info[0] < 3: + mode += 'b' + else: + kwargs['newline'] = '' + # Python 3 determines encoding from locale. Force 'utf-8' + # file encoding to match other forced utf-8 encoding + kwargs['encoding'] = 'utf-8' + return open(fn, mode, **kwargs) + + +class CSVBase(object): + defaults = { + 'delimiter': str(','), # The strs are used because we need native + 'quotechar': str('"'), # str in the csv API (2.x won't take + 'lineterminator': str('\n') # Unicode) + } + + def __enter__(self): + return self + + def __exit__(self, *exc_info): + self.stream.close() + + +class CSVReader(CSVBase): + def __init__(self, **kwargs): + if 'stream' in kwargs: + stream = kwargs['stream'] + if sys.version_info[0] >= 3: + # needs to be a text stream + stream = codecs.getreader('utf-8')(stream) + self.stream = stream + else: + self.stream = _csv_open(kwargs['path'], 'r') + self.reader = csv.reader(self.stream, **self.defaults) + + def __iter__(self): + return self + + def next(self): + result = next(self.reader) + if sys.version_info[0] < 3: + for i, item in enumerate(result): + if not isinstance(item, text_type): + result[i] = item.decode('utf-8') + return result + + __next__ = next + +class CSVWriter(CSVBase): + def __init__(self, fn, **kwargs): + self.stream = _csv_open(fn, 'w') + self.writer = csv.writer(self.stream, **self.defaults) + + def writerow(self, row): + if sys.version_info[0] < 3: + r = [] + for item in row: + if isinstance(item, text_type): + item = item.encode('utf-8') + r.append(item) + row = r + self.writer.writerow(row) + +# +# Configurator functionality +# + +class Configurator(BaseConfigurator): + + value_converters = dict(BaseConfigurator.value_converters) + value_converters['inc'] = 'inc_convert' + + def __init__(self, config, base=None): + super(Configurator, self).__init__(config) + self.base = base or os.getcwd() + + def configure_custom(self, config): + def convert(o): + if isinstance(o, (list, tuple)): + result = type(o)([convert(i) for i in o]) + elif isinstance(o, dict): + if '()' in o: + result = self.configure_custom(o) + else: + result = {} + for k in o: + result[k] = convert(o[k]) + else: + result = self.convert(o) + return result + + c = config.pop('()') + if not callable(c): + c = self.resolve(c) + props = config.pop('.', None) + # Check for valid identifiers + args = config.pop('[]', ()) + if args: + args = tuple([convert(o) for o in args]) + items = [(k, convert(config[k])) for k in config if valid_ident(k)] + kwargs = dict(items) + result = c(*args, **kwargs) + if props: + for n, v in props.items(): + setattr(result, n, convert(v)) + return result + + def __getitem__(self, key): + result = self.config[key] + if isinstance(result, dict) and '()' in result: + self.config[key] = result = self.configure_custom(result) + return result + + def inc_convert(self, value): + """Default converter for the inc:// protocol.""" + if not os.path.isabs(value): + value = os.path.join(self.base, value) + with codecs.open(value, 'r', encoding='utf-8') as f: + result = json.load(f) + return result + + +class SubprocessMixin(object): + """ + Mixin for running subprocesses and capturing their output + """ + def __init__(self, verbose=False, progress=None): + self.verbose = verbose + self.progress = progress + + def reader(self, stream, context): + """ + Read lines from a subprocess' output stream and either pass to a progress + callable (if specified) or write progress information to sys.stderr. + """ + progress = self.progress + verbose = self.verbose + while True: + s = stream.readline() + if not s: + break + if progress is not None: + progress(s, context) + else: + if not verbose: + sys.stderr.write('.') + else: + sys.stderr.write(s.decode('utf-8')) + sys.stderr.flush() + stream.close() + + def run_command(self, cmd, **kwargs): + p = subprocess.Popen(cmd, stdout=subprocess.PIPE, + stderr=subprocess.PIPE, **kwargs) + t1 = threading.Thread(target=self.reader, args=(p.stdout, 'stdout')) + t1.start() + t2 = threading.Thread(target=self.reader, args=(p.stderr, 'stderr')) + t2.start() + p.wait() + t1.join() + t2.join() + if self.progress is not None: + self.progress('done.', 'main') + elif self.verbose: + sys.stderr.write('done.\n') + return p + + +def normalize_name(name): + """Normalize a python package name a la PEP 503""" + # https://www.python.org/dev/peps/pep-0503/#normalized-names + return re.sub('[-_.]+', '-', name).lower() diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/distlib/version.py b/venv.bak/lib/python3.7/site-packages/pip/_vendor/distlib/version.py new file mode 100644 index 0000000..3eebe18 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_vendor/distlib/version.py @@ -0,0 +1,736 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2012-2017 The Python Software Foundation. +# See LICENSE.txt and CONTRIBUTORS.txt. +# +""" +Implementation of a flexible versioning scheme providing support for PEP-440, +setuptools-compatible and semantic versioning. +""" + +import logging +import re + +from .compat import string_types +from .util import parse_requirement + +__all__ = ['NormalizedVersion', 'NormalizedMatcher', + 'LegacyVersion', 'LegacyMatcher', + 'SemanticVersion', 'SemanticMatcher', + 'UnsupportedVersionError', 'get_scheme'] + +logger = logging.getLogger(__name__) + + +class UnsupportedVersionError(ValueError): + """This is an unsupported version.""" + pass + + +class Version(object): + def __init__(self, s): + self._string = s = s.strip() + self._parts = parts = self.parse(s) + assert isinstance(parts, tuple) + assert len(parts) > 0 + + def parse(self, s): + raise NotImplementedError('please implement in a subclass') + + def _check_compatible(self, other): + if type(self) != type(other): + raise TypeError('cannot compare %r and %r' % (self, other)) + + def __eq__(self, other): + self._check_compatible(other) + return self._parts == other._parts + + def __ne__(self, other): + return not self.__eq__(other) + + def __lt__(self, other): + self._check_compatible(other) + return self._parts < other._parts + + def __gt__(self, other): + return not (self.__lt__(other) or self.__eq__(other)) + + def __le__(self, other): + return self.__lt__(other) or self.__eq__(other) + + def __ge__(self, other): + return self.__gt__(other) or self.__eq__(other) + + # See http://docs.python.org/reference/datamodel#object.__hash__ + def __hash__(self): + return hash(self._parts) + + def __repr__(self): + return "%s('%s')" % (self.__class__.__name__, self._string) + + def __str__(self): + return self._string + + @property + def is_prerelease(self): + raise NotImplementedError('Please implement in subclasses.') + + +class Matcher(object): + version_class = None + + # value is either a callable or the name of a method + _operators = { + '<': lambda v, c, p: v < c, + '>': lambda v, c, p: v > c, + '<=': lambda v, c, p: v == c or v < c, + '>=': lambda v, c, p: v == c or v > c, + '==': lambda v, c, p: v == c, + '===': lambda v, c, p: v == c, + # by default, compatible => >=. + '~=': lambda v, c, p: v == c or v > c, + '!=': lambda v, c, p: v != c, + } + + # this is a method only to support alternative implementations + # via overriding + def parse_requirement(self, s): + return parse_requirement(s) + + def __init__(self, s): + if self.version_class is None: + raise ValueError('Please specify a version class') + self._string = s = s.strip() + r = self.parse_requirement(s) + if not r: + raise ValueError('Not valid: %r' % s) + self.name = r.name + self.key = self.name.lower() # for case-insensitive comparisons + clist = [] + if r.constraints: + # import pdb; pdb.set_trace() + for op, s in r.constraints: + if s.endswith('.*'): + if op not in ('==', '!='): + raise ValueError('\'.*\' not allowed for ' + '%r constraints' % op) + # Could be a partial version (e.g. for '2.*') which + # won't parse as a version, so keep it as a string + vn, prefix = s[:-2], True + # Just to check that vn is a valid version + self.version_class(vn) + else: + # Should parse as a version, so we can create an + # instance for the comparison + vn, prefix = self.version_class(s), False + clist.append((op, vn, prefix)) + self._parts = tuple(clist) + + def match(self, version): + """ + Check if the provided version matches the constraints. + + :param version: The version to match against this instance. + :type version: String or :class:`Version` instance. + """ + if isinstance(version, string_types): + version = self.version_class(version) + for operator, constraint, prefix in self._parts: + f = self._operators.get(operator) + if isinstance(f, string_types): + f = getattr(self, f) + if not f: + msg = ('%r not implemented ' + 'for %s' % (operator, self.__class__.__name__)) + raise NotImplementedError(msg) + if not f(version, constraint, prefix): + return False + return True + + @property + def exact_version(self): + result = None + if len(self._parts) == 1 and self._parts[0][0] in ('==', '==='): + result = self._parts[0][1] + return result + + def _check_compatible(self, other): + if type(self) != type(other) or self.name != other.name: + raise TypeError('cannot compare %s and %s' % (self, other)) + + def __eq__(self, other): + self._check_compatible(other) + return self.key == other.key and self._parts == other._parts + + def __ne__(self, other): + return not self.__eq__(other) + + # See http://docs.python.org/reference/datamodel#object.__hash__ + def __hash__(self): + return hash(self.key) + hash(self._parts) + + def __repr__(self): + return "%s(%r)" % (self.__class__.__name__, self._string) + + def __str__(self): + return self._string + + +PEP440_VERSION_RE = re.compile(r'^v?(\d+!)?(\d+(\.\d+)*)((a|b|c|rc)(\d+))?' + r'(\.(post)(\d+))?(\.(dev)(\d+))?' + r'(\+([a-zA-Z\d]+(\.[a-zA-Z\d]+)?))?$') + + +def _pep_440_key(s): + s = s.strip() + m = PEP440_VERSION_RE.match(s) + if not m: + raise UnsupportedVersionError('Not a valid version: %s' % s) + groups = m.groups() + nums = tuple(int(v) for v in groups[1].split('.')) + while len(nums) > 1 and nums[-1] == 0: + nums = nums[:-1] + + if not groups[0]: + epoch = 0 + else: + epoch = int(groups[0]) + pre = groups[4:6] + post = groups[7:9] + dev = groups[10:12] + local = groups[13] + if pre == (None, None): + pre = () + else: + pre = pre[0], int(pre[1]) + if post == (None, None): + post = () + else: + post = post[0], int(post[1]) + if dev == (None, None): + dev = () + else: + dev = dev[0], int(dev[1]) + if local is None: + local = () + else: + parts = [] + for part in local.split('.'): + # to ensure that numeric compares as > lexicographic, avoid + # comparing them directly, but encode a tuple which ensures + # correct sorting + if part.isdigit(): + part = (1, int(part)) + else: + part = (0, part) + parts.append(part) + local = tuple(parts) + if not pre: + # either before pre-release, or final release and after + if not post and dev: + # before pre-release + pre = ('a', -1) # to sort before a0 + else: + pre = ('z',) # to sort after all pre-releases + # now look at the state of post and dev. + if not post: + post = ('_',) # sort before 'a' + if not dev: + dev = ('final',) + + #print('%s -> %s' % (s, m.groups())) + return epoch, nums, pre, post, dev, local + + +_normalized_key = _pep_440_key + + +class NormalizedVersion(Version): + """A rational version. + + Good: + 1.2 # equivalent to "1.2.0" + 1.2.0 + 1.2a1 + 1.2.3a2 + 1.2.3b1 + 1.2.3c1 + 1.2.3.4 + TODO: fill this out + + Bad: + 1 # minimum two numbers + 1.2a # release level must have a release serial + 1.2.3b + """ + def parse(self, s): + result = _normalized_key(s) + # _normalized_key loses trailing zeroes in the release + # clause, since that's needed to ensure that X.Y == X.Y.0 == X.Y.0.0 + # However, PEP 440 prefix matching needs it: for example, + # (~= 1.4.5.0) matches differently to (~= 1.4.5.0.0). + m = PEP440_VERSION_RE.match(s) # must succeed + groups = m.groups() + self._release_clause = tuple(int(v) for v in groups[1].split('.')) + return result + + PREREL_TAGS = set(['a', 'b', 'c', 'rc', 'dev']) + + @property + def is_prerelease(self): + return any(t[0] in self.PREREL_TAGS for t in self._parts if t) + + +def _match_prefix(x, y): + x = str(x) + y = str(y) + if x == y: + return True + if not x.startswith(y): + return False + n = len(y) + return x[n] == '.' + + +class NormalizedMatcher(Matcher): + version_class = NormalizedVersion + + # value is either a callable or the name of a method + _operators = { + '~=': '_match_compatible', + '<': '_match_lt', + '>': '_match_gt', + '<=': '_match_le', + '>=': '_match_ge', + '==': '_match_eq', + '===': '_match_arbitrary', + '!=': '_match_ne', + } + + def _adjust_local(self, version, constraint, prefix): + if prefix: + strip_local = '+' not in constraint and version._parts[-1] + else: + # both constraint and version are + # NormalizedVersion instances. + # If constraint does not have a local component, + # ensure the version doesn't, either. + strip_local = not constraint._parts[-1] and version._parts[-1] + if strip_local: + s = version._string.split('+', 1)[0] + version = self.version_class(s) + return version, constraint + + def _match_lt(self, version, constraint, prefix): + version, constraint = self._adjust_local(version, constraint, prefix) + if version >= constraint: + return False + release_clause = constraint._release_clause + pfx = '.'.join([str(i) for i in release_clause]) + return not _match_prefix(version, pfx) + + def _match_gt(self, version, constraint, prefix): + version, constraint = self._adjust_local(version, constraint, prefix) + if version <= constraint: + return False + release_clause = constraint._release_clause + pfx = '.'.join([str(i) for i in release_clause]) + return not _match_prefix(version, pfx) + + def _match_le(self, version, constraint, prefix): + version, constraint = self._adjust_local(version, constraint, prefix) + return version <= constraint + + def _match_ge(self, version, constraint, prefix): + version, constraint = self._adjust_local(version, constraint, prefix) + return version >= constraint + + def _match_eq(self, version, constraint, prefix): + version, constraint = self._adjust_local(version, constraint, prefix) + if not prefix: + result = (version == constraint) + else: + result = _match_prefix(version, constraint) + return result + + def _match_arbitrary(self, version, constraint, prefix): + return str(version) == str(constraint) + + def _match_ne(self, version, constraint, prefix): + version, constraint = self._adjust_local(version, constraint, prefix) + if not prefix: + result = (version != constraint) + else: + result = not _match_prefix(version, constraint) + return result + + def _match_compatible(self, version, constraint, prefix): + version, constraint = self._adjust_local(version, constraint, prefix) + if version == constraint: + return True + if version < constraint: + return False +# if not prefix: +# return True + release_clause = constraint._release_clause + if len(release_clause) > 1: + release_clause = release_clause[:-1] + pfx = '.'.join([str(i) for i in release_clause]) + return _match_prefix(version, pfx) + +_REPLACEMENTS = ( + (re.compile('[.+-]$'), ''), # remove trailing puncts + (re.compile(r'^[.](\d)'), r'0.\1'), # .N -> 0.N at start + (re.compile('^[.-]'), ''), # remove leading puncts + (re.compile(r'^\((.*)\)$'), r'\1'), # remove parentheses + (re.compile(r'^v(ersion)?\s*(\d+)'), r'\2'), # remove leading v(ersion) + (re.compile(r'^r(ev)?\s*(\d+)'), r'\2'), # remove leading v(ersion) + (re.compile('[.]{2,}'), '.'), # multiple runs of '.' + (re.compile(r'\b(alfa|apha)\b'), 'alpha'), # misspelt alpha + (re.compile(r'\b(pre-alpha|prealpha)\b'), + 'pre.alpha'), # standardise + (re.compile(r'\(beta\)$'), 'beta'), # remove parentheses +) + +_SUFFIX_REPLACEMENTS = ( + (re.compile('^[:~._+-]+'), ''), # remove leading puncts + (re.compile('[,*")([\\]]'), ''), # remove unwanted chars + (re.compile('[~:+_ -]'), '.'), # replace illegal chars + (re.compile('[.]{2,}'), '.'), # multiple runs of '.' + (re.compile(r'\.$'), ''), # trailing '.' +) + +_NUMERIC_PREFIX = re.compile(r'(\d+(\.\d+)*)') + + +def _suggest_semantic_version(s): + """ + Try to suggest a semantic form for a version for which + _suggest_normalized_version couldn't come up with anything. + """ + result = s.strip().lower() + for pat, repl in _REPLACEMENTS: + result = pat.sub(repl, result) + if not result: + result = '0.0.0' + + # Now look for numeric prefix, and separate it out from + # the rest. + #import pdb; pdb.set_trace() + m = _NUMERIC_PREFIX.match(result) + if not m: + prefix = '0.0.0' + suffix = result + else: + prefix = m.groups()[0].split('.') + prefix = [int(i) for i in prefix] + while len(prefix) < 3: + prefix.append(0) + if len(prefix) == 3: + suffix = result[m.end():] + else: + suffix = '.'.join([str(i) for i in prefix[3:]]) + result[m.end():] + prefix = prefix[:3] + prefix = '.'.join([str(i) for i in prefix]) + suffix = suffix.strip() + if suffix: + #import pdb; pdb.set_trace() + # massage the suffix. + for pat, repl in _SUFFIX_REPLACEMENTS: + suffix = pat.sub(repl, suffix) + + if not suffix: + result = prefix + else: + sep = '-' if 'dev' in suffix else '+' + result = prefix + sep + suffix + if not is_semver(result): + result = None + return result + + +def _suggest_normalized_version(s): + """Suggest a normalized version close to the given version string. + + If you have a version string that isn't rational (i.e. NormalizedVersion + doesn't like it) then you might be able to get an equivalent (or close) + rational version from this function. + + This does a number of simple normalizations to the given string, based + on observation of versions currently in use on PyPI. Given a dump of + those version during PyCon 2009, 4287 of them: + - 2312 (53.93%) match NormalizedVersion without change + with the automatic suggestion + - 3474 (81.04%) match when using this suggestion method + + @param s {str} An irrational version string. + @returns A rational version string, or None, if couldn't determine one. + """ + try: + _normalized_key(s) + return s # already rational + except UnsupportedVersionError: + pass + + rs = s.lower() + + # part of this could use maketrans + for orig, repl in (('-alpha', 'a'), ('-beta', 'b'), ('alpha', 'a'), + ('beta', 'b'), ('rc', 'c'), ('-final', ''), + ('-pre', 'c'), + ('-release', ''), ('.release', ''), ('-stable', ''), + ('+', '.'), ('_', '.'), (' ', ''), ('.final', ''), + ('final', '')): + rs = rs.replace(orig, repl) + + # if something ends with dev or pre, we add a 0 + rs = re.sub(r"pre$", r"pre0", rs) + rs = re.sub(r"dev$", r"dev0", rs) + + # if we have something like "b-2" or "a.2" at the end of the + # version, that is probably beta, alpha, etc + # let's remove the dash or dot + rs = re.sub(r"([abc]|rc)[\-\.](\d+)$", r"\1\2", rs) + + # 1.0-dev-r371 -> 1.0.dev371 + # 0.1-dev-r79 -> 0.1.dev79 + rs = re.sub(r"[\-\.](dev)[\-\.]?r?(\d+)$", r".\1\2", rs) + + # Clean: 2.0.a.3, 2.0.b1, 0.9.0~c1 + rs = re.sub(r"[.~]?([abc])\.?", r"\1", rs) + + # Clean: v0.3, v1.0 + if rs.startswith('v'): + rs = rs[1:] + + # Clean leading '0's on numbers. + #TODO: unintended side-effect on, e.g., "2003.05.09" + # PyPI stats: 77 (~2%) better + rs = re.sub(r"\b0+(\d+)(?!\d)", r"\1", rs) + + # Clean a/b/c with no version. E.g. "1.0a" -> "1.0a0". Setuptools infers + # zero. + # PyPI stats: 245 (7.56%) better + rs = re.sub(r"(\d+[abc])$", r"\g<1>0", rs) + + # the 'dev-rNNN' tag is a dev tag + rs = re.sub(r"\.?(dev-r|dev\.r)\.?(\d+)$", r".dev\2", rs) + + # clean the - when used as a pre delimiter + rs = re.sub(r"-(a|b|c)(\d+)$", r"\1\2", rs) + + # a terminal "dev" or "devel" can be changed into ".dev0" + rs = re.sub(r"[\.\-](dev|devel)$", r".dev0", rs) + + # a terminal "dev" can be changed into ".dev0" + rs = re.sub(r"(?![\.\-])dev$", r".dev0", rs) + + # a terminal "final" or "stable" can be removed + rs = re.sub(r"(final|stable)$", "", rs) + + # The 'r' and the '-' tags are post release tags + # 0.4a1.r10 -> 0.4a1.post10 + # 0.9.33-17222 -> 0.9.33.post17222 + # 0.9.33-r17222 -> 0.9.33.post17222 + rs = re.sub(r"\.?(r|-|-r)\.?(\d+)$", r".post\2", rs) + + # Clean 'r' instead of 'dev' usage: + # 0.9.33+r17222 -> 0.9.33.dev17222 + # 1.0dev123 -> 1.0.dev123 + # 1.0.git123 -> 1.0.dev123 + # 1.0.bzr123 -> 1.0.dev123 + # 0.1a0dev.123 -> 0.1a0.dev123 + # PyPI stats: ~150 (~4%) better + rs = re.sub(r"\.?(dev|git|bzr)\.?(\d+)$", r".dev\2", rs) + + # Clean '.pre' (normalized from '-pre' above) instead of 'c' usage: + # 0.2.pre1 -> 0.2c1 + # 0.2-c1 -> 0.2c1 + # 1.0preview123 -> 1.0c123 + # PyPI stats: ~21 (0.62%) better + rs = re.sub(r"\.?(pre|preview|-c)(\d+)$", r"c\g<2>", rs) + + # Tcl/Tk uses "px" for their post release markers + rs = re.sub(r"p(\d+)$", r".post\1", rs) + + try: + _normalized_key(rs) + except UnsupportedVersionError: + rs = None + return rs + +# +# Legacy version processing (distribute-compatible) +# + +_VERSION_PART = re.compile(r'([a-z]+|\d+|[\.-])', re.I) +_VERSION_REPLACE = { + 'pre': 'c', + 'preview': 'c', + '-': 'final-', + 'rc': 'c', + 'dev': '@', + '': None, + '.': None, +} + + +def _legacy_key(s): + def get_parts(s): + result = [] + for p in _VERSION_PART.split(s.lower()): + p = _VERSION_REPLACE.get(p, p) + if p: + if '0' <= p[:1] <= '9': + p = p.zfill(8) + else: + p = '*' + p + result.append(p) + result.append('*final') + return result + + result = [] + for p in get_parts(s): + if p.startswith('*'): + if p < '*final': + while result and result[-1] == '*final-': + result.pop() + while result and result[-1] == '00000000': + result.pop() + result.append(p) + return tuple(result) + + +class LegacyVersion(Version): + def parse(self, s): + return _legacy_key(s) + + @property + def is_prerelease(self): + result = False + for x in self._parts: + if (isinstance(x, string_types) and x.startswith('*') and + x < '*final'): + result = True + break + return result + + +class LegacyMatcher(Matcher): + version_class = LegacyVersion + + _operators = dict(Matcher._operators) + _operators['~='] = '_match_compatible' + + numeric_re = re.compile(r'^(\d+(\.\d+)*)') + + def _match_compatible(self, version, constraint, prefix): + if version < constraint: + return False + m = self.numeric_re.match(str(constraint)) + if not m: + logger.warning('Cannot compute compatible match for version %s ' + ' and constraint %s', version, constraint) + return True + s = m.groups()[0] + if '.' in s: + s = s.rsplit('.', 1)[0] + return _match_prefix(version, s) + +# +# Semantic versioning +# + +_SEMVER_RE = re.compile(r'^(\d+)\.(\d+)\.(\d+)' + r'(-[a-z0-9]+(\.[a-z0-9-]+)*)?' + r'(\+[a-z0-9]+(\.[a-z0-9-]+)*)?$', re.I) + + +def is_semver(s): + return _SEMVER_RE.match(s) + + +def _semantic_key(s): + def make_tuple(s, absent): + if s is None: + result = (absent,) + else: + parts = s[1:].split('.') + # We can't compare ints and strings on Python 3, so fudge it + # by zero-filling numeric values so simulate a numeric comparison + result = tuple([p.zfill(8) if p.isdigit() else p for p in parts]) + return result + + m = is_semver(s) + if not m: + raise UnsupportedVersionError(s) + groups = m.groups() + major, minor, patch = [int(i) for i in groups[:3]] + # choose the '|' and '*' so that versions sort correctly + pre, build = make_tuple(groups[3], '|'), make_tuple(groups[5], '*') + return (major, minor, patch), pre, build + + +class SemanticVersion(Version): + def parse(self, s): + return _semantic_key(s) + + @property + def is_prerelease(self): + return self._parts[1][0] != '|' + + +class SemanticMatcher(Matcher): + version_class = SemanticVersion + + +class VersionScheme(object): + def __init__(self, key, matcher, suggester=None): + self.key = key + self.matcher = matcher + self.suggester = suggester + + def is_valid_version(self, s): + try: + self.matcher.version_class(s) + result = True + except UnsupportedVersionError: + result = False + return result + + def is_valid_matcher(self, s): + try: + self.matcher(s) + result = True + except UnsupportedVersionError: + result = False + return result + + def is_valid_constraint_list(self, s): + """ + Used for processing some metadata fields + """ + return self.is_valid_matcher('dummy_name (%s)' % s) + + def suggest(self, s): + if self.suggester is None: + result = None + else: + result = self.suggester(s) + return result + +_SCHEMES = { + 'normalized': VersionScheme(_normalized_key, NormalizedMatcher, + _suggest_normalized_version), + 'legacy': VersionScheme(_legacy_key, LegacyMatcher, lambda self, s: s), + 'semantic': VersionScheme(_semantic_key, SemanticMatcher, + _suggest_semantic_version), +} + +_SCHEMES['default'] = _SCHEMES['normalized'] + + +def get_scheme(name): + if name not in _SCHEMES: + raise ValueError('unknown scheme name: %r' % name) + return _SCHEMES[name] diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/distlib/w32.exe b/venv.bak/lib/python3.7/site-packages/pip/_vendor/distlib/w32.exe new file mode 100644 index 0000000..e6439e9 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_vendor/distlib/w32.exe differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/distlib/w64.exe b/venv.bak/lib/python3.7/site-packages/pip/_vendor/distlib/w64.exe new file mode 100644 index 0000000..46139db Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_vendor/distlib/w64.exe differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/distlib/wheel.py b/venv.bak/lib/python3.7/site-packages/pip/_vendor/distlib/wheel.py new file mode 100644 index 0000000..bd17938 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_vendor/distlib/wheel.py @@ -0,0 +1,1004 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2013-2017 Vinay Sajip. +# Licensed to the Python Software Foundation under a contributor agreement. +# See LICENSE.txt and CONTRIBUTORS.txt. +# +from __future__ import unicode_literals + +import base64 +import codecs +import datetime +import distutils.util +from email import message_from_file +import hashlib +import imp +import json +import logging +import os +import posixpath +import re +import shutil +import sys +import tempfile +import zipfile + +from . import __version__, DistlibException +from .compat import sysconfig, ZipFile, fsdecode, text_type, filter +from .database import InstalledDistribution +from .metadata import Metadata, METADATA_FILENAME, WHEEL_METADATA_FILENAME +from .util import (FileOperator, convert_path, CSVReader, CSVWriter, Cache, + cached_property, get_cache_base, read_exports, tempdir) +from .version import NormalizedVersion, UnsupportedVersionError + +logger = logging.getLogger(__name__) + +cache = None # created when needed + +if hasattr(sys, 'pypy_version_info'): # pragma: no cover + IMP_PREFIX = 'pp' +elif sys.platform.startswith('java'): # pragma: no cover + IMP_PREFIX = 'jy' +elif sys.platform == 'cli': # pragma: no cover + IMP_PREFIX = 'ip' +else: + IMP_PREFIX = 'cp' + +VER_SUFFIX = sysconfig.get_config_var('py_version_nodot') +if not VER_SUFFIX: # pragma: no cover + VER_SUFFIX = '%s%s' % sys.version_info[:2] +PYVER = 'py' + VER_SUFFIX +IMPVER = IMP_PREFIX + VER_SUFFIX + +ARCH = distutils.util.get_platform().replace('-', '_').replace('.', '_') + +ABI = sysconfig.get_config_var('SOABI') +if ABI and ABI.startswith('cpython-'): + ABI = ABI.replace('cpython-', 'cp') +else: + def _derive_abi(): + parts = ['cp', VER_SUFFIX] + if sysconfig.get_config_var('Py_DEBUG'): + parts.append('d') + if sysconfig.get_config_var('WITH_PYMALLOC'): + parts.append('m') + if sysconfig.get_config_var('Py_UNICODE_SIZE') == 4: + parts.append('u') + return ''.join(parts) + ABI = _derive_abi() + del _derive_abi + +FILENAME_RE = re.compile(r''' +(?P[^-]+) +-(?P\d+[^-]*) +(-(?P\d+[^-]*))? +-(?P\w+\d+(\.\w+\d+)*) +-(?P\w+) +-(?P\w+(\.\w+)*) +\.whl$ +''', re.IGNORECASE | re.VERBOSE) + +NAME_VERSION_RE = re.compile(r''' +(?P[^-]+) +-(?P\d+[^-]*) +(-(?P\d+[^-]*))?$ +''', re.IGNORECASE | re.VERBOSE) + +SHEBANG_RE = re.compile(br'\s*#![^\r\n]*') +SHEBANG_DETAIL_RE = re.compile(br'^(\s*#!("[^"]+"|\S+))\s+(.*)$') +SHEBANG_PYTHON = b'#!python' +SHEBANG_PYTHONW = b'#!pythonw' + +if os.sep == '/': + to_posix = lambda o: o +else: + to_posix = lambda o: o.replace(os.sep, '/') + + +class Mounter(object): + def __init__(self): + self.impure_wheels = {} + self.libs = {} + + def add(self, pathname, extensions): + self.impure_wheels[pathname] = extensions + self.libs.update(extensions) + + def remove(self, pathname): + extensions = self.impure_wheels.pop(pathname) + for k, v in extensions: + if k in self.libs: + del self.libs[k] + + def find_module(self, fullname, path=None): + if fullname in self.libs: + result = self + else: + result = None + return result + + def load_module(self, fullname): + if fullname in sys.modules: + result = sys.modules[fullname] + else: + if fullname not in self.libs: + raise ImportError('unable to find extension for %s' % fullname) + result = imp.load_dynamic(fullname, self.libs[fullname]) + result.__loader__ = self + parts = fullname.rsplit('.', 1) + if len(parts) > 1: + result.__package__ = parts[0] + return result + +_hook = Mounter() + + +class Wheel(object): + """ + Class to build and install from Wheel files (PEP 427). + """ + + wheel_version = (1, 1) + hash_kind = 'sha256' + + def __init__(self, filename=None, sign=False, verify=False): + """ + Initialise an instance using a (valid) filename. + """ + self.sign = sign + self.should_verify = verify + self.buildver = '' + self.pyver = [PYVER] + self.abi = ['none'] + self.arch = ['any'] + self.dirname = os.getcwd() + if filename is None: + self.name = 'dummy' + self.version = '0.1' + self._filename = self.filename + else: + m = NAME_VERSION_RE.match(filename) + if m: + info = m.groupdict('') + self.name = info['nm'] + # Reinstate the local version separator + self.version = info['vn'].replace('_', '-') + self.buildver = info['bn'] + self._filename = self.filename + else: + dirname, filename = os.path.split(filename) + m = FILENAME_RE.match(filename) + if not m: + raise DistlibException('Invalid name or ' + 'filename: %r' % filename) + if dirname: + self.dirname = os.path.abspath(dirname) + self._filename = filename + info = m.groupdict('') + self.name = info['nm'] + self.version = info['vn'] + self.buildver = info['bn'] + self.pyver = info['py'].split('.') + self.abi = info['bi'].split('.') + self.arch = info['ar'].split('.') + + @property + def filename(self): + """ + Build and return a filename from the various components. + """ + if self.buildver: + buildver = '-' + self.buildver + else: + buildver = '' + pyver = '.'.join(self.pyver) + abi = '.'.join(self.abi) + arch = '.'.join(self.arch) + # replace - with _ as a local version separator + version = self.version.replace('-', '_') + return '%s-%s%s-%s-%s-%s.whl' % (self.name, version, buildver, + pyver, abi, arch) + + @property + def exists(self): + path = os.path.join(self.dirname, self.filename) + return os.path.isfile(path) + + @property + def tags(self): + for pyver in self.pyver: + for abi in self.abi: + for arch in self.arch: + yield pyver, abi, arch + + @cached_property + def metadata(self): + pathname = os.path.join(self.dirname, self.filename) + name_ver = '%s-%s' % (self.name, self.version) + info_dir = '%s.dist-info' % name_ver + wrapper = codecs.getreader('utf-8') + with ZipFile(pathname, 'r') as zf: + wheel_metadata = self.get_wheel_metadata(zf) + wv = wheel_metadata['Wheel-Version'].split('.', 1) + file_version = tuple([int(i) for i in wv]) + if file_version < (1, 1): + fns = [WHEEL_METADATA_FILENAME, METADATA_FILENAME, 'METADATA'] + else: + fns = [WHEEL_METADATA_FILENAME, METADATA_FILENAME] + result = None + for fn in fns: + try: + metadata_filename = posixpath.join(info_dir, fn) + with zf.open(metadata_filename) as bf: + wf = wrapper(bf) + result = Metadata(fileobj=wf) + if result: + break + except KeyError: + pass + if not result: + raise ValueError('Invalid wheel, because metadata is ' + 'missing: looked in %s' % ', '.join(fns)) + return result + + def get_wheel_metadata(self, zf): + name_ver = '%s-%s' % (self.name, self.version) + info_dir = '%s.dist-info' % name_ver + metadata_filename = posixpath.join(info_dir, 'WHEEL') + with zf.open(metadata_filename) as bf: + wf = codecs.getreader('utf-8')(bf) + message = message_from_file(wf) + return dict(message) + + @cached_property + def info(self): + pathname = os.path.join(self.dirname, self.filename) + with ZipFile(pathname, 'r') as zf: + result = self.get_wheel_metadata(zf) + return result + + def process_shebang(self, data): + m = SHEBANG_RE.match(data) + if m: + end = m.end() + shebang, data_after_shebang = data[:end], data[end:] + # Preserve any arguments after the interpreter + if b'pythonw' in shebang.lower(): + shebang_python = SHEBANG_PYTHONW + else: + shebang_python = SHEBANG_PYTHON + m = SHEBANG_DETAIL_RE.match(shebang) + if m: + args = b' ' + m.groups()[-1] + else: + args = b'' + shebang = shebang_python + args + data = shebang + data_after_shebang + else: + cr = data.find(b'\r') + lf = data.find(b'\n') + if cr < 0 or cr > lf: + term = b'\n' + else: + if data[cr:cr + 2] == b'\r\n': + term = b'\r\n' + else: + term = b'\r' + data = SHEBANG_PYTHON + term + data + return data + + def get_hash(self, data, hash_kind=None): + if hash_kind is None: + hash_kind = self.hash_kind + try: + hasher = getattr(hashlib, hash_kind) + except AttributeError: + raise DistlibException('Unsupported hash algorithm: %r' % hash_kind) + result = hasher(data).digest() + result = base64.urlsafe_b64encode(result).rstrip(b'=').decode('ascii') + return hash_kind, result + + def write_record(self, records, record_path, base): + records = list(records) # make a copy for sorting + p = to_posix(os.path.relpath(record_path, base)) + records.append((p, '', '')) + records.sort() + with CSVWriter(record_path) as writer: + for row in records: + writer.writerow(row) + + def write_records(self, info, libdir, archive_paths): + records = [] + distinfo, info_dir = info + hasher = getattr(hashlib, self.hash_kind) + for ap, p in archive_paths: + with open(p, 'rb') as f: + data = f.read() + digest = '%s=%s' % self.get_hash(data) + size = os.path.getsize(p) + records.append((ap, digest, size)) + + p = os.path.join(distinfo, 'RECORD') + self.write_record(records, p, libdir) + ap = to_posix(os.path.join(info_dir, 'RECORD')) + archive_paths.append((ap, p)) + + def build_zip(self, pathname, archive_paths): + with ZipFile(pathname, 'w', zipfile.ZIP_DEFLATED) as zf: + for ap, p in archive_paths: + logger.debug('Wrote %s to %s in wheel', p, ap) + zf.write(p, ap) + + def build(self, paths, tags=None, wheel_version=None): + """ + Build a wheel from files in specified paths, and use any specified tags + when determining the name of the wheel. + """ + if tags is None: + tags = {} + + libkey = list(filter(lambda o: o in paths, ('purelib', 'platlib')))[0] + if libkey == 'platlib': + is_pure = 'false' + default_pyver = [IMPVER] + default_abi = [ABI] + default_arch = [ARCH] + else: + is_pure = 'true' + default_pyver = [PYVER] + default_abi = ['none'] + default_arch = ['any'] + + self.pyver = tags.get('pyver', default_pyver) + self.abi = tags.get('abi', default_abi) + self.arch = tags.get('arch', default_arch) + + libdir = paths[libkey] + + name_ver = '%s-%s' % (self.name, self.version) + data_dir = '%s.data' % name_ver + info_dir = '%s.dist-info' % name_ver + + archive_paths = [] + + # First, stuff which is not in site-packages + for key in ('data', 'headers', 'scripts'): + if key not in paths: + continue + path = paths[key] + if os.path.isdir(path): + for root, dirs, files in os.walk(path): + for fn in files: + p = fsdecode(os.path.join(root, fn)) + rp = os.path.relpath(p, path) + ap = to_posix(os.path.join(data_dir, key, rp)) + archive_paths.append((ap, p)) + if key == 'scripts' and not p.endswith('.exe'): + with open(p, 'rb') as f: + data = f.read() + data = self.process_shebang(data) + with open(p, 'wb') as f: + f.write(data) + + # Now, stuff which is in site-packages, other than the + # distinfo stuff. + path = libdir + distinfo = None + for root, dirs, files in os.walk(path): + if root == path: + # At the top level only, save distinfo for later + # and skip it for now + for i, dn in enumerate(dirs): + dn = fsdecode(dn) + if dn.endswith('.dist-info'): + distinfo = os.path.join(root, dn) + del dirs[i] + break + assert distinfo, '.dist-info directory expected, not found' + + for fn in files: + # comment out next suite to leave .pyc files in + if fsdecode(fn).endswith(('.pyc', '.pyo')): + continue + p = os.path.join(root, fn) + rp = to_posix(os.path.relpath(p, path)) + archive_paths.append((rp, p)) + + # Now distinfo. Assumed to be flat, i.e. os.listdir is enough. + files = os.listdir(distinfo) + for fn in files: + if fn not in ('RECORD', 'INSTALLER', 'SHARED', 'WHEEL'): + p = fsdecode(os.path.join(distinfo, fn)) + ap = to_posix(os.path.join(info_dir, fn)) + archive_paths.append((ap, p)) + + wheel_metadata = [ + 'Wheel-Version: %d.%d' % (wheel_version or self.wheel_version), + 'Generator: distlib %s' % __version__, + 'Root-Is-Purelib: %s' % is_pure, + ] + for pyver, abi, arch in self.tags: + wheel_metadata.append('Tag: %s-%s-%s' % (pyver, abi, arch)) + p = os.path.join(distinfo, 'WHEEL') + with open(p, 'w') as f: + f.write('\n'.join(wheel_metadata)) + ap = to_posix(os.path.join(info_dir, 'WHEEL')) + archive_paths.append((ap, p)) + + # Now, at last, RECORD. + # Paths in here are archive paths - nothing else makes sense. + self.write_records((distinfo, info_dir), libdir, archive_paths) + # Now, ready to build the zip file + pathname = os.path.join(self.dirname, self.filename) + self.build_zip(pathname, archive_paths) + return pathname + + def skip_entry(self, arcname): + """ + Determine whether an archive entry should be skipped when verifying + or installing. + """ + # The signature file won't be in RECORD, + # and we don't currently don't do anything with it + # We also skip directories, as they won't be in RECORD + # either. See: + # + # https://github.com/pypa/wheel/issues/294 + # https://github.com/pypa/wheel/issues/287 + # https://github.com/pypa/wheel/pull/289 + # + return arcname.endswith(('/', '/RECORD.jws')) + + def install(self, paths, maker, **kwargs): + """ + Install a wheel to the specified paths. If kwarg ``warner`` is + specified, it should be a callable, which will be called with two + tuples indicating the wheel version of this software and the wheel + version in the file, if there is a discrepancy in the versions. + This can be used to issue any warnings to raise any exceptions. + If kwarg ``lib_only`` is True, only the purelib/platlib files are + installed, and the headers, scripts, data and dist-info metadata are + not written. If kwarg ``bytecode_hashed_invalidation`` is True, written + bytecode will try to use file-hash based invalidation (PEP-552) on + supported interpreter versions (CPython 2.7+). + + The return value is a :class:`InstalledDistribution` instance unless + ``options.lib_only`` is True, in which case the return value is ``None``. + """ + + dry_run = maker.dry_run + warner = kwargs.get('warner') + lib_only = kwargs.get('lib_only', False) + bc_hashed_invalidation = kwargs.get('bytecode_hashed_invalidation', False) + + pathname = os.path.join(self.dirname, self.filename) + name_ver = '%s-%s' % (self.name, self.version) + data_dir = '%s.data' % name_ver + info_dir = '%s.dist-info' % name_ver + + metadata_name = posixpath.join(info_dir, METADATA_FILENAME) + wheel_metadata_name = posixpath.join(info_dir, 'WHEEL') + record_name = posixpath.join(info_dir, 'RECORD') + + wrapper = codecs.getreader('utf-8') + + with ZipFile(pathname, 'r') as zf: + with zf.open(wheel_metadata_name) as bwf: + wf = wrapper(bwf) + message = message_from_file(wf) + wv = message['Wheel-Version'].split('.', 1) + file_version = tuple([int(i) for i in wv]) + if (file_version != self.wheel_version) and warner: + warner(self.wheel_version, file_version) + + if message['Root-Is-Purelib'] == 'true': + libdir = paths['purelib'] + else: + libdir = paths['platlib'] + + records = {} + with zf.open(record_name) as bf: + with CSVReader(stream=bf) as reader: + for row in reader: + p = row[0] + records[p] = row + + data_pfx = posixpath.join(data_dir, '') + info_pfx = posixpath.join(info_dir, '') + script_pfx = posixpath.join(data_dir, 'scripts', '') + + # make a new instance rather than a copy of maker's, + # as we mutate it + fileop = FileOperator(dry_run=dry_run) + fileop.record = True # so we can rollback if needed + + bc = not sys.dont_write_bytecode # Double negatives. Lovely! + + outfiles = [] # for RECORD writing + + # for script copying/shebang processing + workdir = tempfile.mkdtemp() + # set target dir later + # we default add_launchers to False, as the + # Python Launcher should be used instead + maker.source_dir = workdir + maker.target_dir = None + try: + for zinfo in zf.infolist(): + arcname = zinfo.filename + if isinstance(arcname, text_type): + u_arcname = arcname + else: + u_arcname = arcname.decode('utf-8') + if self.skip_entry(u_arcname): + continue + row = records[u_arcname] + if row[2] and str(zinfo.file_size) != row[2]: + raise DistlibException('size mismatch for ' + '%s' % u_arcname) + if row[1]: + kind, value = row[1].split('=', 1) + with zf.open(arcname) as bf: + data = bf.read() + _, digest = self.get_hash(data, kind) + if digest != value: + raise DistlibException('digest mismatch for ' + '%s' % arcname) + + if lib_only and u_arcname.startswith((info_pfx, data_pfx)): + logger.debug('lib_only: skipping %s', u_arcname) + continue + is_script = (u_arcname.startswith(script_pfx) + and not u_arcname.endswith('.exe')) + + if u_arcname.startswith(data_pfx): + _, where, rp = u_arcname.split('/', 2) + outfile = os.path.join(paths[where], convert_path(rp)) + else: + # meant for site-packages. + if u_arcname in (wheel_metadata_name, record_name): + continue + outfile = os.path.join(libdir, convert_path(u_arcname)) + if not is_script: + with zf.open(arcname) as bf: + fileop.copy_stream(bf, outfile) + outfiles.append(outfile) + # Double check the digest of the written file + if not dry_run and row[1]: + with open(outfile, 'rb') as bf: + data = bf.read() + _, newdigest = self.get_hash(data, kind) + if newdigest != digest: + raise DistlibException('digest mismatch ' + 'on write for ' + '%s' % outfile) + if bc and outfile.endswith('.py'): + try: + pyc = fileop.byte_compile(outfile, + hashed_invalidation=bc_hashed_invalidation) + outfiles.append(pyc) + except Exception: + # Don't give up if byte-compilation fails, + # but log it and perhaps warn the user + logger.warning('Byte-compilation failed', + exc_info=True) + else: + fn = os.path.basename(convert_path(arcname)) + workname = os.path.join(workdir, fn) + with zf.open(arcname) as bf: + fileop.copy_stream(bf, workname) + + dn, fn = os.path.split(outfile) + maker.target_dir = dn + filenames = maker.make(fn) + fileop.set_executable_mode(filenames) + outfiles.extend(filenames) + + if lib_only: + logger.debug('lib_only: returning None') + dist = None + else: + # Generate scripts + + # Try to get pydist.json so we can see if there are + # any commands to generate. If this fails (e.g. because + # of a legacy wheel), log a warning but don't give up. + commands = None + file_version = self.info['Wheel-Version'] + if file_version == '1.0': + # Use legacy info + ep = posixpath.join(info_dir, 'entry_points.txt') + try: + with zf.open(ep) as bwf: + epdata = read_exports(bwf) + commands = {} + for key in ('console', 'gui'): + k = '%s_scripts' % key + if k in epdata: + commands['wrap_%s' % key] = d = {} + for v in epdata[k].values(): + s = '%s:%s' % (v.prefix, v.suffix) + if v.flags: + s += ' %s' % v.flags + d[v.name] = s + except Exception: + logger.warning('Unable to read legacy script ' + 'metadata, so cannot generate ' + 'scripts') + else: + try: + with zf.open(metadata_name) as bwf: + wf = wrapper(bwf) + commands = json.load(wf).get('extensions') + if commands: + commands = commands.get('python.commands') + except Exception: + logger.warning('Unable to read JSON metadata, so ' + 'cannot generate scripts') + if commands: + console_scripts = commands.get('wrap_console', {}) + gui_scripts = commands.get('wrap_gui', {}) + if console_scripts or gui_scripts: + script_dir = paths.get('scripts', '') + if not os.path.isdir(script_dir): + raise ValueError('Valid script path not ' + 'specified') + maker.target_dir = script_dir + for k, v in console_scripts.items(): + script = '%s = %s' % (k, v) + filenames = maker.make(script) + fileop.set_executable_mode(filenames) + + if gui_scripts: + options = {'gui': True } + for k, v in gui_scripts.items(): + script = '%s = %s' % (k, v) + filenames = maker.make(script, options) + fileop.set_executable_mode(filenames) + + p = os.path.join(libdir, info_dir) + dist = InstalledDistribution(p) + + # Write SHARED + paths = dict(paths) # don't change passed in dict + del paths['purelib'] + del paths['platlib'] + paths['lib'] = libdir + p = dist.write_shared_locations(paths, dry_run) + if p: + outfiles.append(p) + + # Write RECORD + dist.write_installed_files(outfiles, paths['prefix'], + dry_run) + return dist + except Exception: # pragma: no cover + logger.exception('installation failed.') + fileop.rollback() + raise + finally: + shutil.rmtree(workdir) + + def _get_dylib_cache(self): + global cache + if cache is None: + # Use native string to avoid issues on 2.x: see Python #20140. + base = os.path.join(get_cache_base(), str('dylib-cache'), + '%s.%s' % sys.version_info[:2]) + cache = Cache(base) + return cache + + def _get_extensions(self): + pathname = os.path.join(self.dirname, self.filename) + name_ver = '%s-%s' % (self.name, self.version) + info_dir = '%s.dist-info' % name_ver + arcname = posixpath.join(info_dir, 'EXTENSIONS') + wrapper = codecs.getreader('utf-8') + result = [] + with ZipFile(pathname, 'r') as zf: + try: + with zf.open(arcname) as bf: + wf = wrapper(bf) + extensions = json.load(wf) + cache = self._get_dylib_cache() + prefix = cache.prefix_to_dir(pathname) + cache_base = os.path.join(cache.base, prefix) + if not os.path.isdir(cache_base): + os.makedirs(cache_base) + for name, relpath in extensions.items(): + dest = os.path.join(cache_base, convert_path(relpath)) + if not os.path.exists(dest): + extract = True + else: + file_time = os.stat(dest).st_mtime + file_time = datetime.datetime.fromtimestamp(file_time) + info = zf.getinfo(relpath) + wheel_time = datetime.datetime(*info.date_time) + extract = wheel_time > file_time + if extract: + zf.extract(relpath, cache_base) + result.append((name, dest)) + except KeyError: + pass + return result + + def is_compatible(self): + """ + Determine if a wheel is compatible with the running system. + """ + return is_compatible(self) + + def is_mountable(self): + """ + Determine if a wheel is asserted as mountable by its metadata. + """ + return True # for now - metadata details TBD + + def mount(self, append=False): + pathname = os.path.abspath(os.path.join(self.dirname, self.filename)) + if not self.is_compatible(): + msg = 'Wheel %s not compatible with this Python.' % pathname + raise DistlibException(msg) + if not self.is_mountable(): + msg = 'Wheel %s is marked as not mountable.' % pathname + raise DistlibException(msg) + if pathname in sys.path: + logger.debug('%s already in path', pathname) + else: + if append: + sys.path.append(pathname) + else: + sys.path.insert(0, pathname) + extensions = self._get_extensions() + if extensions: + if _hook not in sys.meta_path: + sys.meta_path.append(_hook) + _hook.add(pathname, extensions) + + def unmount(self): + pathname = os.path.abspath(os.path.join(self.dirname, self.filename)) + if pathname not in sys.path: + logger.debug('%s not in path', pathname) + else: + sys.path.remove(pathname) + if pathname in _hook.impure_wheels: + _hook.remove(pathname) + if not _hook.impure_wheels: + if _hook in sys.meta_path: + sys.meta_path.remove(_hook) + + def verify(self): + pathname = os.path.join(self.dirname, self.filename) + name_ver = '%s-%s' % (self.name, self.version) + data_dir = '%s.data' % name_ver + info_dir = '%s.dist-info' % name_ver + + metadata_name = posixpath.join(info_dir, METADATA_FILENAME) + wheel_metadata_name = posixpath.join(info_dir, 'WHEEL') + record_name = posixpath.join(info_dir, 'RECORD') + + wrapper = codecs.getreader('utf-8') + + with ZipFile(pathname, 'r') as zf: + with zf.open(wheel_metadata_name) as bwf: + wf = wrapper(bwf) + message = message_from_file(wf) + wv = message['Wheel-Version'].split('.', 1) + file_version = tuple([int(i) for i in wv]) + # TODO version verification + + records = {} + with zf.open(record_name) as bf: + with CSVReader(stream=bf) as reader: + for row in reader: + p = row[0] + records[p] = row + + for zinfo in zf.infolist(): + arcname = zinfo.filename + if isinstance(arcname, text_type): + u_arcname = arcname + else: + u_arcname = arcname.decode('utf-8') + # See issue #115: some wheels have .. in their entries, but + # in the filename ... e.g. __main__..py ! So the check is + # updated to look for .. in the directory portions + p = u_arcname.split('/') + if '..' in p: + raise DistlibException('invalid entry in ' + 'wheel: %r' % u_arcname) + + if self.skip_entry(u_arcname): + continue + row = records[u_arcname] + if row[2] and str(zinfo.file_size) != row[2]: + raise DistlibException('size mismatch for ' + '%s' % u_arcname) + if row[1]: + kind, value = row[1].split('=', 1) + with zf.open(arcname) as bf: + data = bf.read() + _, digest = self.get_hash(data, kind) + if digest != value: + raise DistlibException('digest mismatch for ' + '%s' % arcname) + + def update(self, modifier, dest_dir=None, **kwargs): + """ + Update the contents of a wheel in a generic way. The modifier should + be a callable which expects a dictionary argument: its keys are + archive-entry paths, and its values are absolute filesystem paths + where the contents the corresponding archive entries can be found. The + modifier is free to change the contents of the files pointed to, add + new entries and remove entries, before returning. This method will + extract the entire contents of the wheel to a temporary location, call + the modifier, and then use the passed (and possibly updated) + dictionary to write a new wheel. If ``dest_dir`` is specified, the new + wheel is written there -- otherwise, the original wheel is overwritten. + + The modifier should return True if it updated the wheel, else False. + This method returns the same value the modifier returns. + """ + + def get_version(path_map, info_dir): + version = path = None + key = '%s/%s' % (info_dir, METADATA_FILENAME) + if key not in path_map: + key = '%s/PKG-INFO' % info_dir + if key in path_map: + path = path_map[key] + version = Metadata(path=path).version + return version, path + + def update_version(version, path): + updated = None + try: + v = NormalizedVersion(version) + i = version.find('-') + if i < 0: + updated = '%s+1' % version + else: + parts = [int(s) for s in version[i + 1:].split('.')] + parts[-1] += 1 + updated = '%s+%s' % (version[:i], + '.'.join(str(i) for i in parts)) + except UnsupportedVersionError: + logger.debug('Cannot update non-compliant (PEP-440) ' + 'version %r', version) + if updated: + md = Metadata(path=path) + md.version = updated + legacy = not path.endswith(METADATA_FILENAME) + md.write(path=path, legacy=legacy) + logger.debug('Version updated from %r to %r', version, + updated) + + pathname = os.path.join(self.dirname, self.filename) + name_ver = '%s-%s' % (self.name, self.version) + info_dir = '%s.dist-info' % name_ver + record_name = posixpath.join(info_dir, 'RECORD') + with tempdir() as workdir: + with ZipFile(pathname, 'r') as zf: + path_map = {} + for zinfo in zf.infolist(): + arcname = zinfo.filename + if isinstance(arcname, text_type): + u_arcname = arcname + else: + u_arcname = arcname.decode('utf-8') + if u_arcname == record_name: + continue + if '..' in u_arcname: + raise DistlibException('invalid entry in ' + 'wheel: %r' % u_arcname) + zf.extract(zinfo, workdir) + path = os.path.join(workdir, convert_path(u_arcname)) + path_map[u_arcname] = path + + # Remember the version. + original_version, _ = get_version(path_map, info_dir) + # Files extracted. Call the modifier. + modified = modifier(path_map, **kwargs) + if modified: + # Something changed - need to build a new wheel. + current_version, path = get_version(path_map, info_dir) + if current_version and (current_version == original_version): + # Add or update local version to signify changes. + update_version(current_version, path) + # Decide where the new wheel goes. + if dest_dir is None: + fd, newpath = tempfile.mkstemp(suffix='.whl', + prefix='wheel-update-', + dir=workdir) + os.close(fd) + else: + if not os.path.isdir(dest_dir): + raise DistlibException('Not a directory: %r' % dest_dir) + newpath = os.path.join(dest_dir, self.filename) + archive_paths = list(path_map.items()) + distinfo = os.path.join(workdir, info_dir) + info = distinfo, info_dir + self.write_records(info, workdir, archive_paths) + self.build_zip(newpath, archive_paths) + if dest_dir is None: + shutil.copyfile(newpath, pathname) + return modified + +def compatible_tags(): + """ + Return (pyver, abi, arch) tuples compatible with this Python. + """ + versions = [VER_SUFFIX] + major = VER_SUFFIX[0] + for minor in range(sys.version_info[1] - 1, - 1, -1): + versions.append(''.join([major, str(minor)])) + + abis = [] + for suffix, _, _ in imp.get_suffixes(): + if suffix.startswith('.abi'): + abis.append(suffix.split('.', 2)[1]) + abis.sort() + if ABI != 'none': + abis.insert(0, ABI) + abis.append('none') + result = [] + + arches = [ARCH] + if sys.platform == 'darwin': + m = re.match(r'(\w+)_(\d+)_(\d+)_(\w+)$', ARCH) + if m: + name, major, minor, arch = m.groups() + minor = int(minor) + matches = [arch] + if arch in ('i386', 'ppc'): + matches.append('fat') + if arch in ('i386', 'ppc', 'x86_64'): + matches.append('fat3') + if arch in ('ppc64', 'x86_64'): + matches.append('fat64') + if arch in ('i386', 'x86_64'): + matches.append('intel') + if arch in ('i386', 'x86_64', 'intel', 'ppc', 'ppc64'): + matches.append('universal') + while minor >= 0: + for match in matches: + s = '%s_%s_%s_%s' % (name, major, minor, match) + if s != ARCH: # already there + arches.append(s) + minor -= 1 + + # Most specific - our Python version, ABI and arch + for abi in abis: + for arch in arches: + result.append((''.join((IMP_PREFIX, versions[0])), abi, arch)) + + # where no ABI / arch dependency, but IMP_PREFIX dependency + for i, version in enumerate(versions): + result.append((''.join((IMP_PREFIX, version)), 'none', 'any')) + if i == 0: + result.append((''.join((IMP_PREFIX, version[0])), 'none', 'any')) + + # no IMP_PREFIX, ABI or arch dependency + for i, version in enumerate(versions): + result.append((''.join(('py', version)), 'none', 'any')) + if i == 0: + result.append((''.join(('py', version[0])), 'none', 'any')) + return set(result) + + +COMPATIBLE_TAGS = compatible_tags() + +del compatible_tags + + +def is_compatible(wheel, tags=None): + if not isinstance(wheel, Wheel): + wheel = Wheel(wheel) # assume it's a filename + result = False + if tags is None: + tags = COMPATIBLE_TAGS + for ver, abi, arch in tags: + if ver in wheel.pyver and abi in wheel.abi and arch in wheel.arch: + result = True + break + return result diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/distro.py b/venv.bak/lib/python3.7/site-packages/pip/_vendor/distro.py new file mode 100644 index 0000000..3306163 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_vendor/distro.py @@ -0,0 +1,1216 @@ +# Copyright 2015,2016,2017 Nir Cohen +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +The ``distro`` package (``distro`` stands for Linux Distribution) provides +information about the Linux distribution it runs on, such as a reliable +machine-readable distro ID, or version information. + +It is the recommended replacement for Python's original +:py:func:`platform.linux_distribution` function, but it provides much more +functionality. An alternative implementation became necessary because Python +3.5 deprecated this function, and Python 3.8 will remove it altogether. +Its predecessor function :py:func:`platform.dist` was already +deprecated since Python 2.6 and will also be removed in Python 3.8. +Still, there are many cases in which access to OS distribution information +is needed. See `Python issue 1322 `_ for +more information. +""" + +import os +import re +import sys +import json +import shlex +import logging +import argparse +import subprocess + + +_UNIXCONFDIR = os.environ.get('UNIXCONFDIR', '/etc') +_OS_RELEASE_BASENAME = 'os-release' + +#: Translation table for normalizing the "ID" attribute defined in os-release +#: files, for use by the :func:`distro.id` method. +#: +#: * Key: Value as defined in the os-release file, translated to lower case, +#: with blanks translated to underscores. +#: +#: * Value: Normalized value. +NORMALIZED_OS_ID = { + 'ol': 'oracle', # Oracle Enterprise Linux +} + +#: Translation table for normalizing the "Distributor ID" attribute returned by +#: the lsb_release command, for use by the :func:`distro.id` method. +#: +#: * Key: Value as returned by the lsb_release command, translated to lower +#: case, with blanks translated to underscores. +#: +#: * Value: Normalized value. +NORMALIZED_LSB_ID = { + 'enterpriseenterprise': 'oracle', # Oracle Enterprise Linux + 'redhatenterpriseworkstation': 'rhel', # RHEL 6, 7 Workstation + 'redhatenterpriseserver': 'rhel', # RHEL 6, 7 Server +} + +#: Translation table for normalizing the distro ID derived from the file name +#: of distro release files, for use by the :func:`distro.id` method. +#: +#: * Key: Value as derived from the file name of a distro release file, +#: translated to lower case, with blanks translated to underscores. +#: +#: * Value: Normalized value. +NORMALIZED_DISTRO_ID = { + 'redhat': 'rhel', # RHEL 6.x, 7.x +} + +# Pattern for content of distro release file (reversed) +_DISTRO_RELEASE_CONTENT_REVERSED_PATTERN = re.compile( + r'(?:[^)]*\)(.*)\()? *(?:STL )?([\d.+\-a-z]*\d) *(?:esaeler *)?(.+)') + +# Pattern for base file name of distro release file +_DISTRO_RELEASE_BASENAME_PATTERN = re.compile( + r'(\w+)[-_](release|version)$') + +# Base file names to be ignored when searching for distro release file +_DISTRO_RELEASE_IGNORE_BASENAMES = ( + 'debian_version', + 'lsb-release', + 'oem-release', + _OS_RELEASE_BASENAME, + 'system-release' +) + + +def linux_distribution(full_distribution_name=True): + """ + Return information about the current OS distribution as a tuple + ``(id_name, version, codename)`` with items as follows: + + * ``id_name``: If *full_distribution_name* is false, the result of + :func:`distro.id`. Otherwise, the result of :func:`distro.name`. + + * ``version``: The result of :func:`distro.version`. + + * ``codename``: The result of :func:`distro.codename`. + + The interface of this function is compatible with the original + :py:func:`platform.linux_distribution` function, supporting a subset of + its parameters. + + The data it returns may not exactly be the same, because it uses more data + sources than the original function, and that may lead to different data if + the OS distribution is not consistent across multiple data sources it + provides (there are indeed such distributions ...). + + Another reason for differences is the fact that the :func:`distro.id` + method normalizes the distro ID string to a reliable machine-readable value + for a number of popular OS distributions. + """ + return _distro.linux_distribution(full_distribution_name) + + +def id(): + """ + Return the distro ID of the current distribution, as a + machine-readable string. + + For a number of OS distributions, the returned distro ID value is + *reliable*, in the sense that it is documented and that it does not change + across releases of the distribution. + + This package maintains the following reliable distro ID values: + + ============== ========================================= + Distro ID Distribution + ============== ========================================= + "ubuntu" Ubuntu + "debian" Debian + "rhel" RedHat Enterprise Linux + "centos" CentOS + "fedora" Fedora + "sles" SUSE Linux Enterprise Server + "opensuse" openSUSE + "amazon" Amazon Linux + "arch" Arch Linux + "cloudlinux" CloudLinux OS + "exherbo" Exherbo Linux + "gentoo" GenToo Linux + "ibm_powerkvm" IBM PowerKVM + "kvmibm" KVM for IBM z Systems + "linuxmint" Linux Mint + "mageia" Mageia + "mandriva" Mandriva Linux + "parallels" Parallels + "pidora" Pidora + "raspbian" Raspbian + "oracle" Oracle Linux (and Oracle Enterprise Linux) + "scientific" Scientific Linux + "slackware" Slackware + "xenserver" XenServer + "openbsd" OpenBSD + "netbsd" NetBSD + "freebsd" FreeBSD + ============== ========================================= + + If you have a need to get distros for reliable IDs added into this set, + or if you find that the :func:`distro.id` function returns a different + distro ID for one of the listed distros, please create an issue in the + `distro issue tracker`_. + + **Lookup hierarchy and transformations:** + + First, the ID is obtained from the following sources, in the specified + order. The first available and non-empty value is used: + + * the value of the "ID" attribute of the os-release file, + + * the value of the "Distributor ID" attribute returned by the lsb_release + command, + + * the first part of the file name of the distro release file, + + The so determined ID value then passes the following transformations, + before it is returned by this method: + + * it is translated to lower case, + + * blanks (which should not be there anyway) are translated to underscores, + + * a normalization of the ID is performed, based upon + `normalization tables`_. The purpose of this normalization is to ensure + that the ID is as reliable as possible, even across incompatible changes + in the OS distributions. A common reason for an incompatible change is + the addition of an os-release file, or the addition of the lsb_release + command, with ID values that differ from what was previously determined + from the distro release file name. + """ + return _distro.id() + + +def name(pretty=False): + """ + Return the name of the current OS distribution, as a human-readable + string. + + If *pretty* is false, the name is returned without version or codename. + (e.g. "CentOS Linux") + + If *pretty* is true, the version and codename are appended. + (e.g. "CentOS Linux 7.1.1503 (Core)") + + **Lookup hierarchy:** + + The name is obtained from the following sources, in the specified order. + The first available and non-empty value is used: + + * If *pretty* is false: + + - the value of the "NAME" attribute of the os-release file, + + - the value of the "Distributor ID" attribute returned by the lsb_release + command, + + - the value of the "" field of the distro release file. + + * If *pretty* is true: + + - the value of the "PRETTY_NAME" attribute of the os-release file, + + - the value of the "Description" attribute returned by the lsb_release + command, + + - the value of the "" field of the distro release file, appended + with the value of the pretty version ("" and "" + fields) of the distro release file, if available. + """ + return _distro.name(pretty) + + +def version(pretty=False, best=False): + """ + Return the version of the current OS distribution, as a human-readable + string. + + If *pretty* is false, the version is returned without codename (e.g. + "7.0"). + + If *pretty* is true, the codename in parenthesis is appended, if the + codename is non-empty (e.g. "7.0 (Maipo)"). + + Some distributions provide version numbers with different precisions in + the different sources of distribution information. Examining the different + sources in a fixed priority order does not always yield the most precise + version (e.g. for Debian 8.2, or CentOS 7.1). + + The *best* parameter can be used to control the approach for the returned + version: + + If *best* is false, the first non-empty version number in priority order of + the examined sources is returned. + + If *best* is true, the most precise version number out of all examined + sources is returned. + + **Lookup hierarchy:** + + In all cases, the version number is obtained from the following sources. + If *best* is false, this order represents the priority order: + + * the value of the "VERSION_ID" attribute of the os-release file, + * the value of the "Release" attribute returned by the lsb_release + command, + * the version number parsed from the "" field of the first line + of the distro release file, + * the version number parsed from the "PRETTY_NAME" attribute of the + os-release file, if it follows the format of the distro release files. + * the version number parsed from the "Description" attribute returned by + the lsb_release command, if it follows the format of the distro release + files. + """ + return _distro.version(pretty, best) + + +def version_parts(best=False): + """ + Return the version of the current OS distribution as a tuple + ``(major, minor, build_number)`` with items as follows: + + * ``major``: The result of :func:`distro.major_version`. + + * ``minor``: The result of :func:`distro.minor_version`. + + * ``build_number``: The result of :func:`distro.build_number`. + + For a description of the *best* parameter, see the :func:`distro.version` + method. + """ + return _distro.version_parts(best) + + +def major_version(best=False): + """ + Return the major version of the current OS distribution, as a string, + if provided. + Otherwise, the empty string is returned. The major version is the first + part of the dot-separated version string. + + For a description of the *best* parameter, see the :func:`distro.version` + method. + """ + return _distro.major_version(best) + + +def minor_version(best=False): + """ + Return the minor version of the current OS distribution, as a string, + if provided. + Otherwise, the empty string is returned. The minor version is the second + part of the dot-separated version string. + + For a description of the *best* parameter, see the :func:`distro.version` + method. + """ + return _distro.minor_version(best) + + +def build_number(best=False): + """ + Return the build number of the current OS distribution, as a string, + if provided. + Otherwise, the empty string is returned. The build number is the third part + of the dot-separated version string. + + For a description of the *best* parameter, see the :func:`distro.version` + method. + """ + return _distro.build_number(best) + + +def like(): + """ + Return a space-separated list of distro IDs of distributions that are + closely related to the current OS distribution in regards to packaging + and programming interfaces, for example distributions the current + distribution is a derivative from. + + **Lookup hierarchy:** + + This information item is only provided by the os-release file. + For details, see the description of the "ID_LIKE" attribute in the + `os-release man page + `_. + """ + return _distro.like() + + +def codename(): + """ + Return the codename for the release of the current OS distribution, + as a string. + + If the distribution does not have a codename, an empty string is returned. + + Note that the returned codename is not always really a codename. For + example, openSUSE returns "x86_64". This function does not handle such + cases in any special way and just returns the string it finds, if any. + + **Lookup hierarchy:** + + * the codename within the "VERSION" attribute of the os-release file, if + provided, + + * the value of the "Codename" attribute returned by the lsb_release + command, + + * the value of the "" field of the distro release file. + """ + return _distro.codename() + + +def info(pretty=False, best=False): + """ + Return certain machine-readable information items about the current OS + distribution in a dictionary, as shown in the following example: + + .. sourcecode:: python + + { + 'id': 'rhel', + 'version': '7.0', + 'version_parts': { + 'major': '7', + 'minor': '0', + 'build_number': '' + }, + 'like': 'fedora', + 'codename': 'Maipo' + } + + The dictionary structure and keys are always the same, regardless of which + information items are available in the underlying data sources. The values + for the various keys are as follows: + + * ``id``: The result of :func:`distro.id`. + + * ``version``: The result of :func:`distro.version`. + + * ``version_parts -> major``: The result of :func:`distro.major_version`. + + * ``version_parts -> minor``: The result of :func:`distro.minor_version`. + + * ``version_parts -> build_number``: The result of + :func:`distro.build_number`. + + * ``like``: The result of :func:`distro.like`. + + * ``codename``: The result of :func:`distro.codename`. + + For a description of the *pretty* and *best* parameters, see the + :func:`distro.version` method. + """ + return _distro.info(pretty, best) + + +def os_release_info(): + """ + Return a dictionary containing key-value pairs for the information items + from the os-release file data source of the current OS distribution. + + See `os-release file`_ for details about these information items. + """ + return _distro.os_release_info() + + +def lsb_release_info(): + """ + Return a dictionary containing key-value pairs for the information items + from the lsb_release command data source of the current OS distribution. + + See `lsb_release command output`_ for details about these information + items. + """ + return _distro.lsb_release_info() + + +def distro_release_info(): + """ + Return a dictionary containing key-value pairs for the information items + from the distro release file data source of the current OS distribution. + + See `distro release file`_ for details about these information items. + """ + return _distro.distro_release_info() + + +def uname_info(): + """ + Return a dictionary containing key-value pairs for the information items + from the distro release file data source of the current OS distribution. + """ + return _distro.uname_info() + + +def os_release_attr(attribute): + """ + Return a single named information item from the os-release file data source + of the current OS distribution. + + Parameters: + + * ``attribute`` (string): Key of the information item. + + Returns: + + * (string): Value of the information item, if the item exists. + The empty string, if the item does not exist. + + See `os-release file`_ for details about these information items. + """ + return _distro.os_release_attr(attribute) + + +def lsb_release_attr(attribute): + """ + Return a single named information item from the lsb_release command output + data source of the current OS distribution. + + Parameters: + + * ``attribute`` (string): Key of the information item. + + Returns: + + * (string): Value of the information item, if the item exists. + The empty string, if the item does not exist. + + See `lsb_release command output`_ for details about these information + items. + """ + return _distro.lsb_release_attr(attribute) + + +def distro_release_attr(attribute): + """ + Return a single named information item from the distro release file + data source of the current OS distribution. + + Parameters: + + * ``attribute`` (string): Key of the information item. + + Returns: + + * (string): Value of the information item, if the item exists. + The empty string, if the item does not exist. + + See `distro release file`_ for details about these information items. + """ + return _distro.distro_release_attr(attribute) + + +def uname_attr(attribute): + """ + Return a single named information item from the distro release file + data source of the current OS distribution. + + Parameters: + + * ``attribute`` (string): Key of the information item. + + Returns: + + * (string): Value of the information item, if the item exists. + The empty string, if the item does not exist. + """ + return _distro.uname_attr(attribute) + + +class cached_property(object): + """A version of @property which caches the value. On access, it calls the + underlying function and sets the value in `__dict__` so future accesses + will not re-call the property. + """ + def __init__(self, f): + self._fname = f.__name__ + self._f = f + + def __get__(self, obj, owner): + assert obj is not None, 'call {} on an instance'.format(self._fname) + ret = obj.__dict__[self._fname] = self._f(obj) + return ret + + +class LinuxDistribution(object): + """ + Provides information about a OS distribution. + + This package creates a private module-global instance of this class with + default initialization arguments, that is used by the + `consolidated accessor functions`_ and `single source accessor functions`_. + By using default initialization arguments, that module-global instance + returns data about the current OS distribution (i.e. the distro this + package runs on). + + Normally, it is not necessary to create additional instances of this class. + However, in situations where control is needed over the exact data sources + that are used, instances of this class can be created with a specific + distro release file, or a specific os-release file, or without invoking the + lsb_release command. + """ + + def __init__(self, + include_lsb=True, + os_release_file='', + distro_release_file='', + include_uname=True): + """ + The initialization method of this class gathers information from the + available data sources, and stores that in private instance attributes. + Subsequent access to the information items uses these private instance + attributes, so that the data sources are read only once. + + Parameters: + + * ``include_lsb`` (bool): Controls whether the + `lsb_release command output`_ is included as a data source. + + If the lsb_release command is not available in the program execution + path, the data source for the lsb_release command will be empty. + + * ``os_release_file`` (string): The path name of the + `os-release file`_ that is to be used as a data source. + + An empty string (the default) will cause the default path name to + be used (see `os-release file`_ for details). + + If the specified or defaulted os-release file does not exist, the + data source for the os-release file will be empty. + + * ``distro_release_file`` (string): The path name of the + `distro release file`_ that is to be used as a data source. + + An empty string (the default) will cause a default search algorithm + to be used (see `distro release file`_ for details). + + If the specified distro release file does not exist, or if no default + distro release file can be found, the data source for the distro + release file will be empty. + + * ``include_name`` (bool): Controls whether uname command output is + included as a data source. If the uname command is not available in + the program execution path the data source for the uname command will + be empty. + + Public instance attributes: + + * ``os_release_file`` (string): The path name of the + `os-release file`_ that is actually used as a data source. The + empty string if no distro release file is used as a data source. + + * ``distro_release_file`` (string): The path name of the + `distro release file`_ that is actually used as a data source. The + empty string if no distro release file is used as a data source. + + * ``include_lsb`` (bool): The result of the ``include_lsb`` parameter. + This controls whether the lsb information will be loaded. + + * ``include_uname`` (bool): The result of the ``include_uname`` + parameter. This controls whether the uname information will + be loaded. + + Raises: + + * :py:exc:`IOError`: Some I/O issue with an os-release file or distro + release file. + + * :py:exc:`subprocess.CalledProcessError`: The lsb_release command had + some issue (other than not being available in the program execution + path). + + * :py:exc:`UnicodeError`: A data source has unexpected characters or + uses an unexpected encoding. + """ + self.os_release_file = os_release_file or \ + os.path.join(_UNIXCONFDIR, _OS_RELEASE_BASENAME) + self.distro_release_file = distro_release_file or '' # updated later + self.include_lsb = include_lsb + self.include_uname = include_uname + + def __repr__(self): + """Return repr of all info + """ + return \ + "LinuxDistribution(" \ + "os_release_file={self.os_release_file!r}, " \ + "distro_release_file={self.distro_release_file!r}, " \ + "include_lsb={self.include_lsb!r}, " \ + "include_uname={self.include_uname!r}, " \ + "_os_release_info={self._os_release_info!r}, " \ + "_lsb_release_info={self._lsb_release_info!r}, " \ + "_distro_release_info={self._distro_release_info!r}, " \ + "_uname_info={self._uname_info!r})".format( + self=self) + + def linux_distribution(self, full_distribution_name=True): + """ + Return information about the OS distribution that is compatible + with Python's :func:`platform.linux_distribution`, supporting a subset + of its parameters. + + For details, see :func:`distro.linux_distribution`. + """ + return ( + self.name() if full_distribution_name else self.id(), + self.version(), + self.codename() + ) + + def id(self): + """Return the distro ID of the OS distribution, as a string. + + For details, see :func:`distro.id`. + """ + def normalize(distro_id, table): + distro_id = distro_id.lower().replace(' ', '_') + return table.get(distro_id, distro_id) + + distro_id = self.os_release_attr('id') + if distro_id: + return normalize(distro_id, NORMALIZED_OS_ID) + + distro_id = self.lsb_release_attr('distributor_id') + if distro_id: + return normalize(distro_id, NORMALIZED_LSB_ID) + + distro_id = self.distro_release_attr('id') + if distro_id: + return normalize(distro_id, NORMALIZED_DISTRO_ID) + + distro_id = self.uname_attr('id') + if distro_id: + return normalize(distro_id, NORMALIZED_DISTRO_ID) + + return '' + + def name(self, pretty=False): + """ + Return the name of the OS distribution, as a string. + + For details, see :func:`distro.name`. + """ + name = self.os_release_attr('name') \ + or self.lsb_release_attr('distributor_id') \ + or self.distro_release_attr('name') \ + or self.uname_attr('name') + if pretty: + name = self.os_release_attr('pretty_name') \ + or self.lsb_release_attr('description') + if not name: + name = self.distro_release_attr('name') \ + or self.uname_attr('name') + version = self.version(pretty=True) + if version: + name = name + ' ' + version + return name or '' + + def version(self, pretty=False, best=False): + """ + Return the version of the OS distribution, as a string. + + For details, see :func:`distro.version`. + """ + versions = [ + self.os_release_attr('version_id'), + self.lsb_release_attr('release'), + self.distro_release_attr('version_id'), + self._parse_distro_release_content( + self.os_release_attr('pretty_name')).get('version_id', ''), + self._parse_distro_release_content( + self.lsb_release_attr('description')).get('version_id', ''), + self.uname_attr('release') + ] + version = '' + if best: + # This algorithm uses the last version in priority order that has + # the best precision. If the versions are not in conflict, that + # does not matter; otherwise, using the last one instead of the + # first one might be considered a surprise. + for v in versions: + if v.count(".") > version.count(".") or version == '': + version = v + else: + for v in versions: + if v != '': + version = v + break + if pretty and version and self.codename(): + version = u'{0} ({1})'.format(version, self.codename()) + return version + + def version_parts(self, best=False): + """ + Return the version of the OS distribution, as a tuple of version + numbers. + + For details, see :func:`distro.version_parts`. + """ + version_str = self.version(best=best) + if version_str: + version_regex = re.compile(r'(\d+)\.?(\d+)?\.?(\d+)?') + matches = version_regex.match(version_str) + if matches: + major, minor, build_number = matches.groups() + return major, minor or '', build_number or '' + return '', '', '' + + def major_version(self, best=False): + """ + Return the major version number of the current distribution. + + For details, see :func:`distro.major_version`. + """ + return self.version_parts(best)[0] + + def minor_version(self, best=False): + """ + Return the minor version number of the current distribution. + + For details, see :func:`distro.minor_version`. + """ + return self.version_parts(best)[1] + + def build_number(self, best=False): + """ + Return the build number of the current distribution. + + For details, see :func:`distro.build_number`. + """ + return self.version_parts(best)[2] + + def like(self): + """ + Return the IDs of distributions that are like the OS distribution. + + For details, see :func:`distro.like`. + """ + return self.os_release_attr('id_like') or '' + + def codename(self): + """ + Return the codename of the OS distribution. + + For details, see :func:`distro.codename`. + """ + try: + # Handle os_release specially since distros might purposefully set + # this to empty string to have no codename + return self._os_release_info['codename'] + except KeyError: + return self.lsb_release_attr('codename') \ + or self.distro_release_attr('codename') \ + or '' + + def info(self, pretty=False, best=False): + """ + Return certain machine-readable information about the OS + distribution. + + For details, see :func:`distro.info`. + """ + return dict( + id=self.id(), + version=self.version(pretty, best), + version_parts=dict( + major=self.major_version(best), + minor=self.minor_version(best), + build_number=self.build_number(best) + ), + like=self.like(), + codename=self.codename(), + ) + + def os_release_info(self): + """ + Return a dictionary containing key-value pairs for the information + items from the os-release file data source of the OS distribution. + + For details, see :func:`distro.os_release_info`. + """ + return self._os_release_info + + def lsb_release_info(self): + """ + Return a dictionary containing key-value pairs for the information + items from the lsb_release command data source of the OS + distribution. + + For details, see :func:`distro.lsb_release_info`. + """ + return self._lsb_release_info + + def distro_release_info(self): + """ + Return a dictionary containing key-value pairs for the information + items from the distro release file data source of the OS + distribution. + + For details, see :func:`distro.distro_release_info`. + """ + return self._distro_release_info + + def uname_info(self): + """ + Return a dictionary containing key-value pairs for the information + items from the uname command data source of the OS distribution. + + For details, see :func:`distro.uname_info`. + """ + return self._uname_info + + def os_release_attr(self, attribute): + """ + Return a single named information item from the os-release file data + source of the OS distribution. + + For details, see :func:`distro.os_release_attr`. + """ + return self._os_release_info.get(attribute, '') + + def lsb_release_attr(self, attribute): + """ + Return a single named information item from the lsb_release command + output data source of the OS distribution. + + For details, see :func:`distro.lsb_release_attr`. + """ + return self._lsb_release_info.get(attribute, '') + + def distro_release_attr(self, attribute): + """ + Return a single named information item from the distro release file + data source of the OS distribution. + + For details, see :func:`distro.distro_release_attr`. + """ + return self._distro_release_info.get(attribute, '') + + def uname_attr(self, attribute): + """ + Return a single named information item from the uname command + output data source of the OS distribution. + + For details, see :func:`distro.uname_release_attr`. + """ + return self._uname_info.get(attribute, '') + + @cached_property + def _os_release_info(self): + """ + Get the information items from the specified os-release file. + + Returns: + A dictionary containing all information items. + """ + if os.path.isfile(self.os_release_file): + with open(self.os_release_file) as release_file: + return self._parse_os_release_content(release_file) + return {} + + @staticmethod + def _parse_os_release_content(lines): + """ + Parse the lines of an os-release file. + + Parameters: + + * lines: Iterable through the lines in the os-release file. + Each line must be a unicode string or a UTF-8 encoded byte + string. + + Returns: + A dictionary containing all information items. + """ + props = {} + lexer = shlex.shlex(lines, posix=True) + lexer.whitespace_split = True + + # The shlex module defines its `wordchars` variable using literals, + # making it dependent on the encoding of the Python source file. + # In Python 2.6 and 2.7, the shlex source file is encoded in + # 'iso-8859-1', and the `wordchars` variable is defined as a byte + # string. This causes a UnicodeDecodeError to be raised when the + # parsed content is a unicode object. The following fix resolves that + # (... but it should be fixed in shlex...): + if sys.version_info[0] == 2 and isinstance(lexer.wordchars, bytes): + lexer.wordchars = lexer.wordchars.decode('iso-8859-1') + + tokens = list(lexer) + for token in tokens: + # At this point, all shell-like parsing has been done (i.e. + # comments processed, quotes and backslash escape sequences + # processed, multi-line values assembled, trailing newlines + # stripped, etc.), so the tokens are now either: + # * variable assignments: var=value + # * commands or their arguments (not allowed in os-release) + if '=' in token: + k, v = token.split('=', 1) + if isinstance(v, bytes): + v = v.decode('utf-8') + props[k.lower()] = v + else: + # Ignore any tokens that are not variable assignments + pass + + if 'version_codename' in props: + # os-release added a version_codename field. Use that in + # preference to anything else Note that some distros purposefully + # do not have code names. They should be setting + # version_codename="" + props['codename'] = props['version_codename'] + elif 'ubuntu_codename' in props: + # Same as above but a non-standard field name used on older Ubuntus + props['codename'] = props['ubuntu_codename'] + elif 'version' in props: + # If there is no version_codename, parse it from the version + codename = re.search(r'(\(\D+\))|,(\s+)?\D+', props['version']) + if codename: + codename = codename.group() + codename = codename.strip('()') + codename = codename.strip(',') + codename = codename.strip() + # codename appears within paranthese. + props['codename'] = codename + + return props + + @cached_property + def _lsb_release_info(self): + """ + Get the information items from the lsb_release command output. + + Returns: + A dictionary containing all information items. + """ + if not self.include_lsb: + return {} + with open(os.devnull, 'w') as devnull: + try: + cmd = ('lsb_release', '-a') + stdout = subprocess.check_output(cmd, stderr=devnull) + except OSError: # Command not found + return {} + content = stdout.decode(sys.getfilesystemencoding()).splitlines() + return self._parse_lsb_release_content(content) + + @staticmethod + def _parse_lsb_release_content(lines): + """ + Parse the output of the lsb_release command. + + Parameters: + + * lines: Iterable through the lines of the lsb_release output. + Each line must be a unicode string or a UTF-8 encoded byte + string. + + Returns: + A dictionary containing all information items. + """ + props = {} + for line in lines: + kv = line.strip('\n').split(':', 1) + if len(kv) != 2: + # Ignore lines without colon. + continue + k, v = kv + props.update({k.replace(' ', '_').lower(): v.strip()}) + return props + + @cached_property + def _uname_info(self): + with open(os.devnull, 'w') as devnull: + try: + cmd = ('uname', '-rs') + stdout = subprocess.check_output(cmd, stderr=devnull) + except OSError: + return {} + content = stdout.decode(sys.getfilesystemencoding()).splitlines() + return self._parse_uname_content(content) + + @staticmethod + def _parse_uname_content(lines): + props = {} + match = re.search(r'^([^\s]+)\s+([\d\.]+)', lines[0].strip()) + if match: + name, version = match.groups() + + # This is to prevent the Linux kernel version from + # appearing as the 'best' version on otherwise + # identifiable distributions. + if name == 'Linux': + return {} + props['id'] = name.lower() + props['name'] = name + props['release'] = version + return props + + @cached_property + def _distro_release_info(self): + """ + Get the information items from the specified distro release file. + + Returns: + A dictionary containing all information items. + """ + if self.distro_release_file: + # If it was specified, we use it and parse what we can, even if + # its file name or content does not match the expected pattern. + distro_info = self._parse_distro_release_file( + self.distro_release_file) + basename = os.path.basename(self.distro_release_file) + # The file name pattern for user-specified distro release files + # is somewhat more tolerant (compared to when searching for the + # file), because we want to use what was specified as best as + # possible. + match = _DISTRO_RELEASE_BASENAME_PATTERN.match(basename) + if 'name' in distro_info \ + and 'cloudlinux' in distro_info['name'].lower(): + distro_info['id'] = 'cloudlinux' + elif match: + distro_info['id'] = match.group(1) + return distro_info + else: + try: + basenames = os.listdir(_UNIXCONFDIR) + # We sort for repeatability in cases where there are multiple + # distro specific files; e.g. CentOS, Oracle, Enterprise all + # containing `redhat-release` on top of their own. + basenames.sort() + except OSError: + # This may occur when /etc is not readable but we can't be + # sure about the *-release files. Check common entries of + # /etc for information. If they turn out to not be there the + # error is handled in `_parse_distro_release_file()`. + basenames = ['SuSE-release', + 'arch-release', + 'base-release', + 'centos-release', + 'fedora-release', + 'gentoo-release', + 'mageia-release', + 'mandrake-release', + 'mandriva-release', + 'mandrivalinux-release', + 'manjaro-release', + 'oracle-release', + 'redhat-release', + 'sl-release', + 'slackware-version'] + for basename in basenames: + if basename in _DISTRO_RELEASE_IGNORE_BASENAMES: + continue + match = _DISTRO_RELEASE_BASENAME_PATTERN.match(basename) + if match: + filepath = os.path.join(_UNIXCONFDIR, basename) + distro_info = self._parse_distro_release_file(filepath) + if 'name' in distro_info: + # The name is always present if the pattern matches + self.distro_release_file = filepath + distro_info['id'] = match.group(1) + if 'cloudlinux' in distro_info['name'].lower(): + distro_info['id'] = 'cloudlinux' + return distro_info + return {} + + def _parse_distro_release_file(self, filepath): + """ + Parse a distro release file. + + Parameters: + + * filepath: Path name of the distro release file. + + Returns: + A dictionary containing all information items. + """ + try: + with open(filepath) as fp: + # Only parse the first line. For instance, on SLES there + # are multiple lines. We don't want them... + return self._parse_distro_release_content(fp.readline()) + except (OSError, IOError): + # Ignore not being able to read a specific, seemingly version + # related file. + # See https://github.com/nir0s/distro/issues/162 + return {} + + @staticmethod + def _parse_distro_release_content(line): + """ + Parse a line from a distro release file. + + Parameters: + * line: Line from the distro release file. Must be a unicode string + or a UTF-8 encoded byte string. + + Returns: + A dictionary containing all information items. + """ + if isinstance(line, bytes): + line = line.decode('utf-8') + matches = _DISTRO_RELEASE_CONTENT_REVERSED_PATTERN.match( + line.strip()[::-1]) + distro_info = {} + if matches: + # regexp ensures non-None + distro_info['name'] = matches.group(3)[::-1] + if matches.group(2): + distro_info['version_id'] = matches.group(2)[::-1] + if matches.group(1): + distro_info['codename'] = matches.group(1)[::-1] + elif line: + distro_info['name'] = line.strip() + return distro_info + + +_distro = LinuxDistribution() + + +def main(): + logger = logging.getLogger(__name__) + logger.setLevel(logging.DEBUG) + logger.addHandler(logging.StreamHandler(sys.stdout)) + + parser = argparse.ArgumentParser(description="OS distro info tool") + parser.add_argument( + '--json', + '-j', + help="Output in machine readable format", + action="store_true") + args = parser.parse_args() + + if args.json: + logger.info(json.dumps(info(), indent=4, sort_keys=True)) + else: + logger.info('Name: %s', name(pretty=True)) + distribution_version = version(pretty=True) + logger.info('Version: %s', distribution_version) + distribution_codename = codename() + logger.info('Codename: %s', distribution_codename) + + +if __name__ == '__main__': + main() diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/html5lib/__init__.py b/venv.bak/lib/python3.7/site-packages/pip/_vendor/html5lib/__init__.py new file mode 100644 index 0000000..0491234 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_vendor/html5lib/__init__.py @@ -0,0 +1,35 @@ +""" +HTML parsing library based on the `WHATWG HTML specification +`_. The parser is designed to be compatible with +existing HTML found in the wild and implements well-defined error recovery that +is largely compatible with modern desktop web browsers. + +Example usage:: + + from pip._vendor import html5lib + with open("my_document.html", "rb") as f: + tree = html5lib.parse(f) + +For convenience, this module re-exports the following names: + +* :func:`~.html5parser.parse` +* :func:`~.html5parser.parseFragment` +* :class:`~.html5parser.HTMLParser` +* :func:`~.treebuilders.getTreeBuilder` +* :func:`~.treewalkers.getTreeWalker` +* :func:`~.serializer.serialize` +""" + +from __future__ import absolute_import, division, unicode_literals + +from .html5parser import HTMLParser, parse, parseFragment +from .treebuilders import getTreeBuilder +from .treewalkers import getTreeWalker +from .serializer import serialize + +__all__ = ["HTMLParser", "parse", "parseFragment", "getTreeBuilder", + "getTreeWalker", "serialize"] + +# this has to be at the top level, see how setup.py parses this +#: Distribution version number. +__version__ = "1.0.1" diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/html5lib/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_vendor/html5lib/__pycache__/__init__.cpython-37.pyc new file mode 100644 index 0000000..2cc66c2 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_vendor/html5lib/__pycache__/__init__.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/html5lib/__pycache__/_ihatexml.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_vendor/html5lib/__pycache__/_ihatexml.cpython-37.pyc new file mode 100644 index 0000000..74232e2 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_vendor/html5lib/__pycache__/_ihatexml.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/html5lib/__pycache__/_inputstream.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_vendor/html5lib/__pycache__/_inputstream.cpython-37.pyc new file mode 100644 index 0000000..7a48bff Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_vendor/html5lib/__pycache__/_inputstream.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/html5lib/__pycache__/_tokenizer.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_vendor/html5lib/__pycache__/_tokenizer.cpython-37.pyc new file mode 100644 index 0000000..5715fa6 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_vendor/html5lib/__pycache__/_tokenizer.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/html5lib/__pycache__/_utils.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_vendor/html5lib/__pycache__/_utils.cpython-37.pyc new file mode 100644 index 0000000..b6141b6 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_vendor/html5lib/__pycache__/_utils.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/html5lib/__pycache__/constants.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_vendor/html5lib/__pycache__/constants.cpython-37.pyc new file mode 100644 index 0000000..4a37df9 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_vendor/html5lib/__pycache__/constants.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/html5lib/__pycache__/html5parser.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_vendor/html5lib/__pycache__/html5parser.cpython-37.pyc new file mode 100644 index 0000000..44cd430 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_vendor/html5lib/__pycache__/html5parser.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/html5lib/__pycache__/serializer.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_vendor/html5lib/__pycache__/serializer.cpython-37.pyc new file mode 100644 index 0000000..1aa0394 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_vendor/html5lib/__pycache__/serializer.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/html5lib/_ihatexml.py b/venv.bak/lib/python3.7/site-packages/pip/_vendor/html5lib/_ihatexml.py new file mode 100644 index 0000000..4c77717 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_vendor/html5lib/_ihatexml.py @@ -0,0 +1,288 @@ +from __future__ import absolute_import, division, unicode_literals + +import re +import warnings + +from .constants import DataLossWarning + +baseChar = """ +[#x0041-#x005A] | [#x0061-#x007A] | [#x00C0-#x00D6] | [#x00D8-#x00F6] | +[#x00F8-#x00FF] | [#x0100-#x0131] | [#x0134-#x013E] | [#x0141-#x0148] | +[#x014A-#x017E] | [#x0180-#x01C3] | [#x01CD-#x01F0] | [#x01F4-#x01F5] | +[#x01FA-#x0217] | [#x0250-#x02A8] | [#x02BB-#x02C1] | #x0386 | +[#x0388-#x038A] | #x038C | [#x038E-#x03A1] | [#x03A3-#x03CE] | +[#x03D0-#x03D6] | #x03DA | #x03DC | #x03DE | #x03E0 | [#x03E2-#x03F3] | +[#x0401-#x040C] | [#x040E-#x044F] | [#x0451-#x045C] | [#x045E-#x0481] | +[#x0490-#x04C4] | [#x04C7-#x04C8] | [#x04CB-#x04CC] | [#x04D0-#x04EB] | +[#x04EE-#x04F5] | [#x04F8-#x04F9] | [#x0531-#x0556] | #x0559 | +[#x0561-#x0586] | [#x05D0-#x05EA] | [#x05F0-#x05F2] | [#x0621-#x063A] | +[#x0641-#x064A] | [#x0671-#x06B7] | [#x06BA-#x06BE] | [#x06C0-#x06CE] | +[#x06D0-#x06D3] | #x06D5 | [#x06E5-#x06E6] | [#x0905-#x0939] | #x093D | +[#x0958-#x0961] | [#x0985-#x098C] | [#x098F-#x0990] | [#x0993-#x09A8] | +[#x09AA-#x09B0] | #x09B2 | [#x09B6-#x09B9] | [#x09DC-#x09DD] | +[#x09DF-#x09E1] | [#x09F0-#x09F1] | [#x0A05-#x0A0A] | [#x0A0F-#x0A10] | +[#x0A13-#x0A28] | [#x0A2A-#x0A30] | [#x0A32-#x0A33] | [#x0A35-#x0A36] | +[#x0A38-#x0A39] | [#x0A59-#x0A5C] | #x0A5E | [#x0A72-#x0A74] | +[#x0A85-#x0A8B] | #x0A8D | [#x0A8F-#x0A91] | [#x0A93-#x0AA8] | +[#x0AAA-#x0AB0] | [#x0AB2-#x0AB3] | [#x0AB5-#x0AB9] | #x0ABD | #x0AE0 | +[#x0B05-#x0B0C] | [#x0B0F-#x0B10] | [#x0B13-#x0B28] | [#x0B2A-#x0B30] | +[#x0B32-#x0B33] | [#x0B36-#x0B39] | #x0B3D | [#x0B5C-#x0B5D] | +[#x0B5F-#x0B61] | [#x0B85-#x0B8A] | [#x0B8E-#x0B90] | [#x0B92-#x0B95] | +[#x0B99-#x0B9A] | #x0B9C | [#x0B9E-#x0B9F] | [#x0BA3-#x0BA4] | +[#x0BA8-#x0BAA] | [#x0BAE-#x0BB5] | [#x0BB7-#x0BB9] | [#x0C05-#x0C0C] | +[#x0C0E-#x0C10] | [#x0C12-#x0C28] | [#x0C2A-#x0C33] | [#x0C35-#x0C39] | +[#x0C60-#x0C61] | [#x0C85-#x0C8C] | [#x0C8E-#x0C90] | [#x0C92-#x0CA8] | +[#x0CAA-#x0CB3] | [#x0CB5-#x0CB9] | #x0CDE | [#x0CE0-#x0CE1] | +[#x0D05-#x0D0C] | [#x0D0E-#x0D10] | [#x0D12-#x0D28] | [#x0D2A-#x0D39] | +[#x0D60-#x0D61] | [#x0E01-#x0E2E] | #x0E30 | [#x0E32-#x0E33] | +[#x0E40-#x0E45] | [#x0E81-#x0E82] | #x0E84 | [#x0E87-#x0E88] | #x0E8A | +#x0E8D | [#x0E94-#x0E97] | [#x0E99-#x0E9F] | [#x0EA1-#x0EA3] | #x0EA5 | +#x0EA7 | [#x0EAA-#x0EAB] | [#x0EAD-#x0EAE] | #x0EB0 | [#x0EB2-#x0EB3] | +#x0EBD | [#x0EC0-#x0EC4] | [#x0F40-#x0F47] | [#x0F49-#x0F69] | +[#x10A0-#x10C5] | [#x10D0-#x10F6] | #x1100 | [#x1102-#x1103] | +[#x1105-#x1107] | #x1109 | [#x110B-#x110C] | [#x110E-#x1112] | #x113C | +#x113E | #x1140 | #x114C | #x114E | #x1150 | [#x1154-#x1155] | #x1159 | +[#x115F-#x1161] | #x1163 | #x1165 | #x1167 | #x1169 | [#x116D-#x116E] | +[#x1172-#x1173] | #x1175 | #x119E | #x11A8 | #x11AB | [#x11AE-#x11AF] | +[#x11B7-#x11B8] | #x11BA | [#x11BC-#x11C2] | #x11EB | #x11F0 | #x11F9 | +[#x1E00-#x1E9B] | [#x1EA0-#x1EF9] | [#x1F00-#x1F15] | [#x1F18-#x1F1D] | +[#x1F20-#x1F45] | [#x1F48-#x1F4D] | [#x1F50-#x1F57] | #x1F59 | #x1F5B | +#x1F5D | [#x1F5F-#x1F7D] | [#x1F80-#x1FB4] | [#x1FB6-#x1FBC] | #x1FBE | +[#x1FC2-#x1FC4] | [#x1FC6-#x1FCC] | [#x1FD0-#x1FD3] | [#x1FD6-#x1FDB] | +[#x1FE0-#x1FEC] | [#x1FF2-#x1FF4] | [#x1FF6-#x1FFC] | #x2126 | +[#x212A-#x212B] | #x212E | [#x2180-#x2182] | [#x3041-#x3094] | +[#x30A1-#x30FA] | [#x3105-#x312C] | [#xAC00-#xD7A3]""" + +ideographic = """[#x4E00-#x9FA5] | #x3007 | [#x3021-#x3029]""" + +combiningCharacter = """ +[#x0300-#x0345] | [#x0360-#x0361] | [#x0483-#x0486] | [#x0591-#x05A1] | +[#x05A3-#x05B9] | [#x05BB-#x05BD] | #x05BF | [#x05C1-#x05C2] | #x05C4 | +[#x064B-#x0652] | #x0670 | [#x06D6-#x06DC] | [#x06DD-#x06DF] | +[#x06E0-#x06E4] | [#x06E7-#x06E8] | [#x06EA-#x06ED] | [#x0901-#x0903] | +#x093C | [#x093E-#x094C] | #x094D | [#x0951-#x0954] | [#x0962-#x0963] | +[#x0981-#x0983] | #x09BC | #x09BE | #x09BF | [#x09C0-#x09C4] | +[#x09C7-#x09C8] | [#x09CB-#x09CD] | #x09D7 | [#x09E2-#x09E3] | #x0A02 | +#x0A3C | #x0A3E | #x0A3F | [#x0A40-#x0A42] | [#x0A47-#x0A48] | +[#x0A4B-#x0A4D] | [#x0A70-#x0A71] | [#x0A81-#x0A83] | #x0ABC | +[#x0ABE-#x0AC5] | [#x0AC7-#x0AC9] | [#x0ACB-#x0ACD] | [#x0B01-#x0B03] | +#x0B3C | [#x0B3E-#x0B43] | [#x0B47-#x0B48] | [#x0B4B-#x0B4D] | +[#x0B56-#x0B57] | [#x0B82-#x0B83] | [#x0BBE-#x0BC2] | [#x0BC6-#x0BC8] | +[#x0BCA-#x0BCD] | #x0BD7 | [#x0C01-#x0C03] | [#x0C3E-#x0C44] | +[#x0C46-#x0C48] | [#x0C4A-#x0C4D] | [#x0C55-#x0C56] | [#x0C82-#x0C83] | +[#x0CBE-#x0CC4] | [#x0CC6-#x0CC8] | [#x0CCA-#x0CCD] | [#x0CD5-#x0CD6] | +[#x0D02-#x0D03] | [#x0D3E-#x0D43] | [#x0D46-#x0D48] | [#x0D4A-#x0D4D] | +#x0D57 | #x0E31 | [#x0E34-#x0E3A] | [#x0E47-#x0E4E] | #x0EB1 | +[#x0EB4-#x0EB9] | [#x0EBB-#x0EBC] | [#x0EC8-#x0ECD] | [#x0F18-#x0F19] | +#x0F35 | #x0F37 | #x0F39 | #x0F3E | #x0F3F | [#x0F71-#x0F84] | +[#x0F86-#x0F8B] | [#x0F90-#x0F95] | #x0F97 | [#x0F99-#x0FAD] | +[#x0FB1-#x0FB7] | #x0FB9 | [#x20D0-#x20DC] | #x20E1 | [#x302A-#x302F] | +#x3099 | #x309A""" + +digit = """ +[#x0030-#x0039] | [#x0660-#x0669] | [#x06F0-#x06F9] | [#x0966-#x096F] | +[#x09E6-#x09EF] | [#x0A66-#x0A6F] | [#x0AE6-#x0AEF] | [#x0B66-#x0B6F] | +[#x0BE7-#x0BEF] | [#x0C66-#x0C6F] | [#x0CE6-#x0CEF] | [#x0D66-#x0D6F] | +[#x0E50-#x0E59] | [#x0ED0-#x0ED9] | [#x0F20-#x0F29]""" + +extender = """ +#x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 | #x0E46 | #x0EC6 | #x3005 | +#[#x3031-#x3035] | [#x309D-#x309E] | [#x30FC-#x30FE]""" + +letter = " | ".join([baseChar, ideographic]) + +# Without the +name = " | ".join([letter, digit, ".", "-", "_", combiningCharacter, + extender]) +nameFirst = " | ".join([letter, "_"]) + +reChar = re.compile(r"#x([\d|A-F]{4,4})") +reCharRange = re.compile(r"\[#x([\d|A-F]{4,4})-#x([\d|A-F]{4,4})\]") + + +def charStringToList(chars): + charRanges = [item.strip() for item in chars.split(" | ")] + rv = [] + for item in charRanges: + foundMatch = False + for regexp in (reChar, reCharRange): + match = regexp.match(item) + if match is not None: + rv.append([hexToInt(item) for item in match.groups()]) + if len(rv[-1]) == 1: + rv[-1] = rv[-1] * 2 + foundMatch = True + break + if not foundMatch: + assert len(item) == 1 + + rv.append([ord(item)] * 2) + rv = normaliseCharList(rv) + return rv + + +def normaliseCharList(charList): + charList = sorted(charList) + for item in charList: + assert item[1] >= item[0] + rv = [] + i = 0 + while i < len(charList): + j = 1 + rv.append(charList[i]) + while i + j < len(charList) and charList[i + j][0] <= rv[-1][1] + 1: + rv[-1][1] = charList[i + j][1] + j += 1 + i += j + return rv + +# We don't really support characters above the BMP :( +max_unicode = int("FFFF", 16) + + +def missingRanges(charList): + rv = [] + if charList[0] != 0: + rv.append([0, charList[0][0] - 1]) + for i, item in enumerate(charList[:-1]): + rv.append([item[1] + 1, charList[i + 1][0] - 1]) + if charList[-1][1] != max_unicode: + rv.append([charList[-1][1] + 1, max_unicode]) + return rv + + +def listToRegexpStr(charList): + rv = [] + for item in charList: + if item[0] == item[1]: + rv.append(escapeRegexp(chr(item[0]))) + else: + rv.append(escapeRegexp(chr(item[0])) + "-" + + escapeRegexp(chr(item[1]))) + return "[%s]" % "".join(rv) + + +def hexToInt(hex_str): + return int(hex_str, 16) + + +def escapeRegexp(string): + specialCharacters = (".", "^", "$", "*", "+", "?", "{", "}", + "[", "]", "|", "(", ")", "-") + for char in specialCharacters: + string = string.replace(char, "\\" + char) + + return string + +# output from the above +nonXmlNameBMPRegexp = re.compile('[\x00-,/:-@\\[-\\^`\\{-\xb6\xb8-\xbf\xd7\xf7\u0132-\u0133\u013f-\u0140\u0149\u017f\u01c4-\u01cc\u01f1-\u01f3\u01f6-\u01f9\u0218-\u024f\u02a9-\u02ba\u02c2-\u02cf\u02d2-\u02ff\u0346-\u035f\u0362-\u0385\u038b\u038d\u03a2\u03cf\u03d7-\u03d9\u03db\u03dd\u03df\u03e1\u03f4-\u0400\u040d\u0450\u045d\u0482\u0487-\u048f\u04c5-\u04c6\u04c9-\u04ca\u04cd-\u04cf\u04ec-\u04ed\u04f6-\u04f7\u04fa-\u0530\u0557-\u0558\u055a-\u0560\u0587-\u0590\u05a2\u05ba\u05be\u05c0\u05c3\u05c5-\u05cf\u05eb-\u05ef\u05f3-\u0620\u063b-\u063f\u0653-\u065f\u066a-\u066f\u06b8-\u06b9\u06bf\u06cf\u06d4\u06e9\u06ee-\u06ef\u06fa-\u0900\u0904\u093a-\u093b\u094e-\u0950\u0955-\u0957\u0964-\u0965\u0970-\u0980\u0984\u098d-\u098e\u0991-\u0992\u09a9\u09b1\u09b3-\u09b5\u09ba-\u09bb\u09bd\u09c5-\u09c6\u09c9-\u09ca\u09ce-\u09d6\u09d8-\u09db\u09de\u09e4-\u09e5\u09f2-\u0a01\u0a03-\u0a04\u0a0b-\u0a0e\u0a11-\u0a12\u0a29\u0a31\u0a34\u0a37\u0a3a-\u0a3b\u0a3d\u0a43-\u0a46\u0a49-\u0a4a\u0a4e-\u0a58\u0a5d\u0a5f-\u0a65\u0a75-\u0a80\u0a84\u0a8c\u0a8e\u0a92\u0aa9\u0ab1\u0ab4\u0aba-\u0abb\u0ac6\u0aca\u0ace-\u0adf\u0ae1-\u0ae5\u0af0-\u0b00\u0b04\u0b0d-\u0b0e\u0b11-\u0b12\u0b29\u0b31\u0b34-\u0b35\u0b3a-\u0b3b\u0b44-\u0b46\u0b49-\u0b4a\u0b4e-\u0b55\u0b58-\u0b5b\u0b5e\u0b62-\u0b65\u0b70-\u0b81\u0b84\u0b8b-\u0b8d\u0b91\u0b96-\u0b98\u0b9b\u0b9d\u0ba0-\u0ba2\u0ba5-\u0ba7\u0bab-\u0bad\u0bb6\u0bba-\u0bbd\u0bc3-\u0bc5\u0bc9\u0bce-\u0bd6\u0bd8-\u0be6\u0bf0-\u0c00\u0c04\u0c0d\u0c11\u0c29\u0c34\u0c3a-\u0c3d\u0c45\u0c49\u0c4e-\u0c54\u0c57-\u0c5f\u0c62-\u0c65\u0c70-\u0c81\u0c84\u0c8d\u0c91\u0ca9\u0cb4\u0cba-\u0cbd\u0cc5\u0cc9\u0cce-\u0cd4\u0cd7-\u0cdd\u0cdf\u0ce2-\u0ce5\u0cf0-\u0d01\u0d04\u0d0d\u0d11\u0d29\u0d3a-\u0d3d\u0d44-\u0d45\u0d49\u0d4e-\u0d56\u0d58-\u0d5f\u0d62-\u0d65\u0d70-\u0e00\u0e2f\u0e3b-\u0e3f\u0e4f\u0e5a-\u0e80\u0e83\u0e85-\u0e86\u0e89\u0e8b-\u0e8c\u0e8e-\u0e93\u0e98\u0ea0\u0ea4\u0ea6\u0ea8-\u0ea9\u0eac\u0eaf\u0eba\u0ebe-\u0ebf\u0ec5\u0ec7\u0ece-\u0ecf\u0eda-\u0f17\u0f1a-\u0f1f\u0f2a-\u0f34\u0f36\u0f38\u0f3a-\u0f3d\u0f48\u0f6a-\u0f70\u0f85\u0f8c-\u0f8f\u0f96\u0f98\u0fae-\u0fb0\u0fb8\u0fba-\u109f\u10c6-\u10cf\u10f7-\u10ff\u1101\u1104\u1108\u110a\u110d\u1113-\u113b\u113d\u113f\u1141-\u114b\u114d\u114f\u1151-\u1153\u1156-\u1158\u115a-\u115e\u1162\u1164\u1166\u1168\u116a-\u116c\u116f-\u1171\u1174\u1176-\u119d\u119f-\u11a7\u11a9-\u11aa\u11ac-\u11ad\u11b0-\u11b6\u11b9\u11bb\u11c3-\u11ea\u11ec-\u11ef\u11f1-\u11f8\u11fa-\u1dff\u1e9c-\u1e9f\u1efa-\u1eff\u1f16-\u1f17\u1f1e-\u1f1f\u1f46-\u1f47\u1f4e-\u1f4f\u1f58\u1f5a\u1f5c\u1f5e\u1f7e-\u1f7f\u1fb5\u1fbd\u1fbf-\u1fc1\u1fc5\u1fcd-\u1fcf\u1fd4-\u1fd5\u1fdc-\u1fdf\u1fed-\u1ff1\u1ff5\u1ffd-\u20cf\u20dd-\u20e0\u20e2-\u2125\u2127-\u2129\u212c-\u212d\u212f-\u217f\u2183-\u3004\u3006\u3008-\u3020\u3030\u3036-\u3040\u3095-\u3098\u309b-\u309c\u309f-\u30a0\u30fb\u30ff-\u3104\u312d-\u4dff\u9fa6-\uabff\ud7a4-\uffff]') # noqa + +nonXmlNameFirstBMPRegexp = re.compile('[\x00-@\\[-\\^`\\{-\xbf\xd7\xf7\u0132-\u0133\u013f-\u0140\u0149\u017f\u01c4-\u01cc\u01f1-\u01f3\u01f6-\u01f9\u0218-\u024f\u02a9-\u02ba\u02c2-\u0385\u0387\u038b\u038d\u03a2\u03cf\u03d7-\u03d9\u03db\u03dd\u03df\u03e1\u03f4-\u0400\u040d\u0450\u045d\u0482-\u048f\u04c5-\u04c6\u04c9-\u04ca\u04cd-\u04cf\u04ec-\u04ed\u04f6-\u04f7\u04fa-\u0530\u0557-\u0558\u055a-\u0560\u0587-\u05cf\u05eb-\u05ef\u05f3-\u0620\u063b-\u0640\u064b-\u0670\u06b8-\u06b9\u06bf\u06cf\u06d4\u06d6-\u06e4\u06e7-\u0904\u093a-\u093c\u093e-\u0957\u0962-\u0984\u098d-\u098e\u0991-\u0992\u09a9\u09b1\u09b3-\u09b5\u09ba-\u09db\u09de\u09e2-\u09ef\u09f2-\u0a04\u0a0b-\u0a0e\u0a11-\u0a12\u0a29\u0a31\u0a34\u0a37\u0a3a-\u0a58\u0a5d\u0a5f-\u0a71\u0a75-\u0a84\u0a8c\u0a8e\u0a92\u0aa9\u0ab1\u0ab4\u0aba-\u0abc\u0abe-\u0adf\u0ae1-\u0b04\u0b0d-\u0b0e\u0b11-\u0b12\u0b29\u0b31\u0b34-\u0b35\u0b3a-\u0b3c\u0b3e-\u0b5b\u0b5e\u0b62-\u0b84\u0b8b-\u0b8d\u0b91\u0b96-\u0b98\u0b9b\u0b9d\u0ba0-\u0ba2\u0ba5-\u0ba7\u0bab-\u0bad\u0bb6\u0bba-\u0c04\u0c0d\u0c11\u0c29\u0c34\u0c3a-\u0c5f\u0c62-\u0c84\u0c8d\u0c91\u0ca9\u0cb4\u0cba-\u0cdd\u0cdf\u0ce2-\u0d04\u0d0d\u0d11\u0d29\u0d3a-\u0d5f\u0d62-\u0e00\u0e2f\u0e31\u0e34-\u0e3f\u0e46-\u0e80\u0e83\u0e85-\u0e86\u0e89\u0e8b-\u0e8c\u0e8e-\u0e93\u0e98\u0ea0\u0ea4\u0ea6\u0ea8-\u0ea9\u0eac\u0eaf\u0eb1\u0eb4-\u0ebc\u0ebe-\u0ebf\u0ec5-\u0f3f\u0f48\u0f6a-\u109f\u10c6-\u10cf\u10f7-\u10ff\u1101\u1104\u1108\u110a\u110d\u1113-\u113b\u113d\u113f\u1141-\u114b\u114d\u114f\u1151-\u1153\u1156-\u1158\u115a-\u115e\u1162\u1164\u1166\u1168\u116a-\u116c\u116f-\u1171\u1174\u1176-\u119d\u119f-\u11a7\u11a9-\u11aa\u11ac-\u11ad\u11b0-\u11b6\u11b9\u11bb\u11c3-\u11ea\u11ec-\u11ef\u11f1-\u11f8\u11fa-\u1dff\u1e9c-\u1e9f\u1efa-\u1eff\u1f16-\u1f17\u1f1e-\u1f1f\u1f46-\u1f47\u1f4e-\u1f4f\u1f58\u1f5a\u1f5c\u1f5e\u1f7e-\u1f7f\u1fb5\u1fbd\u1fbf-\u1fc1\u1fc5\u1fcd-\u1fcf\u1fd4-\u1fd5\u1fdc-\u1fdf\u1fed-\u1ff1\u1ff5\u1ffd-\u2125\u2127-\u2129\u212c-\u212d\u212f-\u217f\u2183-\u3006\u3008-\u3020\u302a-\u3040\u3095-\u30a0\u30fb-\u3104\u312d-\u4dff\u9fa6-\uabff\ud7a4-\uffff]') # noqa + +# Simpler things +nonPubidCharRegexp = re.compile("[^\x20\x0D\x0Aa-zA-Z0-9\\-'()+,./:=?;!*#@$_%]") + + +class InfosetFilter(object): + replacementRegexp = re.compile(r"U[\dA-F]{5,5}") + + def __init__(self, + dropXmlnsLocalName=False, + dropXmlnsAttrNs=False, + preventDoubleDashComments=False, + preventDashAtCommentEnd=False, + replaceFormFeedCharacters=True, + preventSingleQuotePubid=False): + + self.dropXmlnsLocalName = dropXmlnsLocalName + self.dropXmlnsAttrNs = dropXmlnsAttrNs + + self.preventDoubleDashComments = preventDoubleDashComments + self.preventDashAtCommentEnd = preventDashAtCommentEnd + + self.replaceFormFeedCharacters = replaceFormFeedCharacters + + self.preventSingleQuotePubid = preventSingleQuotePubid + + self.replaceCache = {} + + def coerceAttribute(self, name, namespace=None): + if self.dropXmlnsLocalName and name.startswith("xmlns:"): + warnings.warn("Attributes cannot begin with xmlns", DataLossWarning) + return None + elif (self.dropXmlnsAttrNs and + namespace == "http://www.w3.org/2000/xmlns/"): + warnings.warn("Attributes cannot be in the xml namespace", DataLossWarning) + return None + else: + return self.toXmlName(name) + + def coerceElement(self, name): + return self.toXmlName(name) + + def coerceComment(self, data): + if self.preventDoubleDashComments: + while "--" in data: + warnings.warn("Comments cannot contain adjacent dashes", DataLossWarning) + data = data.replace("--", "- -") + if data.endswith("-"): + warnings.warn("Comments cannot end in a dash", DataLossWarning) + data += " " + return data + + def coerceCharacters(self, data): + if self.replaceFormFeedCharacters: + for _ in range(data.count("\x0C")): + warnings.warn("Text cannot contain U+000C", DataLossWarning) + data = data.replace("\x0C", " ") + # Other non-xml characters + return data + + def coercePubid(self, data): + dataOutput = data + for char in nonPubidCharRegexp.findall(data): + warnings.warn("Coercing non-XML pubid", DataLossWarning) + replacement = self.getReplacementCharacter(char) + dataOutput = dataOutput.replace(char, replacement) + if self.preventSingleQuotePubid and dataOutput.find("'") >= 0: + warnings.warn("Pubid cannot contain single quote", DataLossWarning) + dataOutput = dataOutput.replace("'", self.getReplacementCharacter("'")) + return dataOutput + + def toXmlName(self, name): + nameFirst = name[0] + nameRest = name[1:] + m = nonXmlNameFirstBMPRegexp.match(nameFirst) + if m: + warnings.warn("Coercing non-XML name", DataLossWarning) + nameFirstOutput = self.getReplacementCharacter(nameFirst) + else: + nameFirstOutput = nameFirst + + nameRestOutput = nameRest + replaceChars = set(nonXmlNameBMPRegexp.findall(nameRest)) + for char in replaceChars: + warnings.warn("Coercing non-XML name", DataLossWarning) + replacement = self.getReplacementCharacter(char) + nameRestOutput = nameRestOutput.replace(char, replacement) + return nameFirstOutput + nameRestOutput + + def getReplacementCharacter(self, char): + if char in self.replaceCache: + replacement = self.replaceCache[char] + else: + replacement = self.escapeChar(char) + return replacement + + def fromXmlName(self, name): + for item in set(self.replacementRegexp.findall(name)): + name = name.replace(item, self.unescapeChar(item)) + return name + + def escapeChar(self, char): + replacement = "U%05X" % ord(char) + self.replaceCache[char] = replacement + return replacement + + def unescapeChar(self, charcode): + return chr(int(charcode[1:], 16)) diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/html5lib/_inputstream.py b/venv.bak/lib/python3.7/site-packages/pip/_vendor/html5lib/_inputstream.py new file mode 100644 index 0000000..a65e55f --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_vendor/html5lib/_inputstream.py @@ -0,0 +1,923 @@ +from __future__ import absolute_import, division, unicode_literals + +from pip._vendor.six import text_type, binary_type +from pip._vendor.six.moves import http_client, urllib + +import codecs +import re + +from pip._vendor import webencodings + +from .constants import EOF, spaceCharacters, asciiLetters, asciiUppercase +from .constants import _ReparseException +from . import _utils + +from io import StringIO + +try: + from io import BytesIO +except ImportError: + BytesIO = StringIO + +# Non-unicode versions of constants for use in the pre-parser +spaceCharactersBytes = frozenset([item.encode("ascii") for item in spaceCharacters]) +asciiLettersBytes = frozenset([item.encode("ascii") for item in asciiLetters]) +asciiUppercaseBytes = frozenset([item.encode("ascii") for item in asciiUppercase]) +spacesAngleBrackets = spaceCharactersBytes | frozenset([b">", b"<"]) + + +invalid_unicode_no_surrogate = "[\u0001-\u0008\u000B\u000E-\u001F\u007F-\u009F\uFDD0-\uFDEF\uFFFE\uFFFF\U0001FFFE\U0001FFFF\U0002FFFE\U0002FFFF\U0003FFFE\U0003FFFF\U0004FFFE\U0004FFFF\U0005FFFE\U0005FFFF\U0006FFFE\U0006FFFF\U0007FFFE\U0007FFFF\U0008FFFE\U0008FFFF\U0009FFFE\U0009FFFF\U000AFFFE\U000AFFFF\U000BFFFE\U000BFFFF\U000CFFFE\U000CFFFF\U000DFFFE\U000DFFFF\U000EFFFE\U000EFFFF\U000FFFFE\U000FFFFF\U0010FFFE\U0010FFFF]" # noqa + +if _utils.supports_lone_surrogates: + # Use one extra step of indirection and create surrogates with + # eval. Not using this indirection would introduce an illegal + # unicode literal on platforms not supporting such lone + # surrogates. + assert invalid_unicode_no_surrogate[-1] == "]" and invalid_unicode_no_surrogate.count("]") == 1 + invalid_unicode_re = re.compile(invalid_unicode_no_surrogate[:-1] + + eval('"\\uD800-\\uDFFF"') + # pylint:disable=eval-used + "]") +else: + invalid_unicode_re = re.compile(invalid_unicode_no_surrogate) + +non_bmp_invalid_codepoints = set([0x1FFFE, 0x1FFFF, 0x2FFFE, 0x2FFFF, 0x3FFFE, + 0x3FFFF, 0x4FFFE, 0x4FFFF, 0x5FFFE, 0x5FFFF, + 0x6FFFE, 0x6FFFF, 0x7FFFE, 0x7FFFF, 0x8FFFE, + 0x8FFFF, 0x9FFFE, 0x9FFFF, 0xAFFFE, 0xAFFFF, + 0xBFFFE, 0xBFFFF, 0xCFFFE, 0xCFFFF, 0xDFFFE, + 0xDFFFF, 0xEFFFE, 0xEFFFF, 0xFFFFE, 0xFFFFF, + 0x10FFFE, 0x10FFFF]) + +ascii_punctuation_re = re.compile("[\u0009-\u000D\u0020-\u002F\u003A-\u0040\u005C\u005B-\u0060\u007B-\u007E]") + +# Cache for charsUntil() +charsUntilRegEx = {} + + +class BufferedStream(object): + """Buffering for streams that do not have buffering of their own + + The buffer is implemented as a list of chunks on the assumption that + joining many strings will be slow since it is O(n**2) + """ + + def __init__(self, stream): + self.stream = stream + self.buffer = [] + self.position = [-1, 0] # chunk number, offset + + def tell(self): + pos = 0 + for chunk in self.buffer[:self.position[0]]: + pos += len(chunk) + pos += self.position[1] + return pos + + def seek(self, pos): + assert pos <= self._bufferedBytes() + offset = pos + i = 0 + while len(self.buffer[i]) < offset: + offset -= len(self.buffer[i]) + i += 1 + self.position = [i, offset] + + def read(self, bytes): + if not self.buffer: + return self._readStream(bytes) + elif (self.position[0] == len(self.buffer) and + self.position[1] == len(self.buffer[-1])): + return self._readStream(bytes) + else: + return self._readFromBuffer(bytes) + + def _bufferedBytes(self): + return sum([len(item) for item in self.buffer]) + + def _readStream(self, bytes): + data = self.stream.read(bytes) + self.buffer.append(data) + self.position[0] += 1 + self.position[1] = len(data) + return data + + def _readFromBuffer(self, bytes): + remainingBytes = bytes + rv = [] + bufferIndex = self.position[0] + bufferOffset = self.position[1] + while bufferIndex < len(self.buffer) and remainingBytes != 0: + assert remainingBytes > 0 + bufferedData = self.buffer[bufferIndex] + + if remainingBytes <= len(bufferedData) - bufferOffset: + bytesToRead = remainingBytes + self.position = [bufferIndex, bufferOffset + bytesToRead] + else: + bytesToRead = len(bufferedData) - bufferOffset + self.position = [bufferIndex, len(bufferedData)] + bufferIndex += 1 + rv.append(bufferedData[bufferOffset:bufferOffset + bytesToRead]) + remainingBytes -= bytesToRead + + bufferOffset = 0 + + if remainingBytes: + rv.append(self._readStream(remainingBytes)) + + return b"".join(rv) + + +def HTMLInputStream(source, **kwargs): + # Work around Python bug #20007: read(0) closes the connection. + # http://bugs.python.org/issue20007 + if (isinstance(source, http_client.HTTPResponse) or + # Also check for addinfourl wrapping HTTPResponse + (isinstance(source, urllib.response.addbase) and + isinstance(source.fp, http_client.HTTPResponse))): + isUnicode = False + elif hasattr(source, "read"): + isUnicode = isinstance(source.read(0), text_type) + else: + isUnicode = isinstance(source, text_type) + + if isUnicode: + encodings = [x for x in kwargs if x.endswith("_encoding")] + if encodings: + raise TypeError("Cannot set an encoding with a unicode input, set %r" % encodings) + + return HTMLUnicodeInputStream(source, **kwargs) + else: + return HTMLBinaryInputStream(source, **kwargs) + + +class HTMLUnicodeInputStream(object): + """Provides a unicode stream of characters to the HTMLTokenizer. + + This class takes care of character encoding and removing or replacing + incorrect byte-sequences and also provides column and line tracking. + + """ + + _defaultChunkSize = 10240 + + def __init__(self, source): + """Initialises the HTMLInputStream. + + HTMLInputStream(source, [encoding]) -> Normalized stream from source + for use by html5lib. + + source can be either a file-object, local filename or a string. + + The optional encoding parameter must be a string that indicates + the encoding. If specified, that encoding will be used, + regardless of any BOM or later declaration (such as in a meta + element) + + """ + + if not _utils.supports_lone_surrogates: + # Such platforms will have already checked for such + # surrogate errors, so no need to do this checking. + self.reportCharacterErrors = None + elif len("\U0010FFFF") == 1: + self.reportCharacterErrors = self.characterErrorsUCS4 + else: + self.reportCharacterErrors = self.characterErrorsUCS2 + + # List of where new lines occur + self.newLines = [0] + + self.charEncoding = (lookupEncoding("utf-8"), "certain") + self.dataStream = self.openStream(source) + + self.reset() + + def reset(self): + self.chunk = "" + self.chunkSize = 0 + self.chunkOffset = 0 + self.errors = [] + + # number of (complete) lines in previous chunks + self.prevNumLines = 0 + # number of columns in the last line of the previous chunk + self.prevNumCols = 0 + + # Deal with CR LF and surrogates split over chunk boundaries + self._bufferedCharacter = None + + def openStream(self, source): + """Produces a file object from source. + + source can be either a file object, local filename or a string. + + """ + # Already a file object + if hasattr(source, 'read'): + stream = source + else: + stream = StringIO(source) + + return stream + + def _position(self, offset): + chunk = self.chunk + nLines = chunk.count('\n', 0, offset) + positionLine = self.prevNumLines + nLines + lastLinePos = chunk.rfind('\n', 0, offset) + if lastLinePos == -1: + positionColumn = self.prevNumCols + offset + else: + positionColumn = offset - (lastLinePos + 1) + return (positionLine, positionColumn) + + def position(self): + """Returns (line, col) of the current position in the stream.""" + line, col = self._position(self.chunkOffset) + return (line + 1, col) + + def char(self): + """ Read one character from the stream or queue if available. Return + EOF when EOF is reached. + """ + # Read a new chunk from the input stream if necessary + if self.chunkOffset >= self.chunkSize: + if not self.readChunk(): + return EOF + + chunkOffset = self.chunkOffset + char = self.chunk[chunkOffset] + self.chunkOffset = chunkOffset + 1 + + return char + + def readChunk(self, chunkSize=None): + if chunkSize is None: + chunkSize = self._defaultChunkSize + + self.prevNumLines, self.prevNumCols = self._position(self.chunkSize) + + self.chunk = "" + self.chunkSize = 0 + self.chunkOffset = 0 + + data = self.dataStream.read(chunkSize) + + # Deal with CR LF and surrogates broken across chunks + if self._bufferedCharacter: + data = self._bufferedCharacter + data + self._bufferedCharacter = None + elif not data: + # We have no more data, bye-bye stream + return False + + if len(data) > 1: + lastv = ord(data[-1]) + if lastv == 0x0D or 0xD800 <= lastv <= 0xDBFF: + self._bufferedCharacter = data[-1] + data = data[:-1] + + if self.reportCharacterErrors: + self.reportCharacterErrors(data) + + # Replace invalid characters + data = data.replace("\r\n", "\n") + data = data.replace("\r", "\n") + + self.chunk = data + self.chunkSize = len(data) + + return True + + def characterErrorsUCS4(self, data): + for _ in range(len(invalid_unicode_re.findall(data))): + self.errors.append("invalid-codepoint") + + def characterErrorsUCS2(self, data): + # Someone picked the wrong compile option + # You lose + skip = False + for match in invalid_unicode_re.finditer(data): + if skip: + continue + codepoint = ord(match.group()) + pos = match.start() + # Pretty sure there should be endianness issues here + if _utils.isSurrogatePair(data[pos:pos + 2]): + # We have a surrogate pair! + char_val = _utils.surrogatePairToCodepoint(data[pos:pos + 2]) + if char_val in non_bmp_invalid_codepoints: + self.errors.append("invalid-codepoint") + skip = True + elif (codepoint >= 0xD800 and codepoint <= 0xDFFF and + pos == len(data) - 1): + self.errors.append("invalid-codepoint") + else: + skip = False + self.errors.append("invalid-codepoint") + + def charsUntil(self, characters, opposite=False): + """ Returns a string of characters from the stream up to but not + including any character in 'characters' or EOF. 'characters' must be + a container that supports the 'in' method and iteration over its + characters. + """ + + # Use a cache of regexps to find the required characters + try: + chars = charsUntilRegEx[(characters, opposite)] + except KeyError: + if __debug__: + for c in characters: + assert(ord(c) < 128) + regex = "".join(["\\x%02x" % ord(c) for c in characters]) + if not opposite: + regex = "^%s" % regex + chars = charsUntilRegEx[(characters, opposite)] = re.compile("[%s]+" % regex) + + rv = [] + + while True: + # Find the longest matching prefix + m = chars.match(self.chunk, self.chunkOffset) + if m is None: + # If nothing matched, and it wasn't because we ran out of chunk, + # then stop + if self.chunkOffset != self.chunkSize: + break + else: + end = m.end() + # If not the whole chunk matched, return everything + # up to the part that didn't match + if end != self.chunkSize: + rv.append(self.chunk[self.chunkOffset:end]) + self.chunkOffset = end + break + # If the whole remainder of the chunk matched, + # use it all and read the next chunk + rv.append(self.chunk[self.chunkOffset:]) + if not self.readChunk(): + # Reached EOF + break + + r = "".join(rv) + return r + + def unget(self, char): + # Only one character is allowed to be ungotten at once - it must + # be consumed again before any further call to unget + if char is not None: + if self.chunkOffset == 0: + # unget is called quite rarely, so it's a good idea to do + # more work here if it saves a bit of work in the frequently + # called char and charsUntil. + # So, just prepend the ungotten character onto the current + # chunk: + self.chunk = char + self.chunk + self.chunkSize += 1 + else: + self.chunkOffset -= 1 + assert self.chunk[self.chunkOffset] == char + + +class HTMLBinaryInputStream(HTMLUnicodeInputStream): + """Provides a unicode stream of characters to the HTMLTokenizer. + + This class takes care of character encoding and removing or replacing + incorrect byte-sequences and also provides column and line tracking. + + """ + + def __init__(self, source, override_encoding=None, transport_encoding=None, + same_origin_parent_encoding=None, likely_encoding=None, + default_encoding="windows-1252", useChardet=True): + """Initialises the HTMLInputStream. + + HTMLInputStream(source, [encoding]) -> Normalized stream from source + for use by html5lib. + + source can be either a file-object, local filename or a string. + + The optional encoding parameter must be a string that indicates + the encoding. If specified, that encoding will be used, + regardless of any BOM or later declaration (such as in a meta + element) + + """ + # Raw Stream - for unicode objects this will encode to utf-8 and set + # self.charEncoding as appropriate + self.rawStream = self.openStream(source) + + HTMLUnicodeInputStream.__init__(self, self.rawStream) + + # Encoding Information + # Number of bytes to use when looking for a meta element with + # encoding information + self.numBytesMeta = 1024 + # Number of bytes to use when using detecting encoding using chardet + self.numBytesChardet = 100 + # Things from args + self.override_encoding = override_encoding + self.transport_encoding = transport_encoding + self.same_origin_parent_encoding = same_origin_parent_encoding + self.likely_encoding = likely_encoding + self.default_encoding = default_encoding + + # Determine encoding + self.charEncoding = self.determineEncoding(useChardet) + assert self.charEncoding[0] is not None + + # Call superclass + self.reset() + + def reset(self): + self.dataStream = self.charEncoding[0].codec_info.streamreader(self.rawStream, 'replace') + HTMLUnicodeInputStream.reset(self) + + def openStream(self, source): + """Produces a file object from source. + + source can be either a file object, local filename or a string. + + """ + # Already a file object + if hasattr(source, 'read'): + stream = source + else: + stream = BytesIO(source) + + try: + stream.seek(stream.tell()) + except: # pylint:disable=bare-except + stream = BufferedStream(stream) + + return stream + + def determineEncoding(self, chardet=True): + # BOMs take precedence over everything + # This will also read past the BOM if present + charEncoding = self.detectBOM(), "certain" + if charEncoding[0] is not None: + return charEncoding + + # If we've been overriden, we've been overriden + charEncoding = lookupEncoding(self.override_encoding), "certain" + if charEncoding[0] is not None: + return charEncoding + + # Now check the transport layer + charEncoding = lookupEncoding(self.transport_encoding), "certain" + if charEncoding[0] is not None: + return charEncoding + + # Look for meta elements with encoding information + charEncoding = self.detectEncodingMeta(), "tentative" + if charEncoding[0] is not None: + return charEncoding + + # Parent document encoding + charEncoding = lookupEncoding(self.same_origin_parent_encoding), "tentative" + if charEncoding[0] is not None and not charEncoding[0].name.startswith("utf-16"): + return charEncoding + + # "likely" encoding + charEncoding = lookupEncoding(self.likely_encoding), "tentative" + if charEncoding[0] is not None: + return charEncoding + + # Guess with chardet, if available + if chardet: + try: + from pip._vendor.chardet.universaldetector import UniversalDetector + except ImportError: + pass + else: + buffers = [] + detector = UniversalDetector() + while not detector.done: + buffer = self.rawStream.read(self.numBytesChardet) + assert isinstance(buffer, bytes) + if not buffer: + break + buffers.append(buffer) + detector.feed(buffer) + detector.close() + encoding = lookupEncoding(detector.result['encoding']) + self.rawStream.seek(0) + if encoding is not None: + return encoding, "tentative" + + # Try the default encoding + charEncoding = lookupEncoding(self.default_encoding), "tentative" + if charEncoding[0] is not None: + return charEncoding + + # Fallback to html5lib's default if even that hasn't worked + return lookupEncoding("windows-1252"), "tentative" + + def changeEncoding(self, newEncoding): + assert self.charEncoding[1] != "certain" + newEncoding = lookupEncoding(newEncoding) + if newEncoding is None: + return + if newEncoding.name in ("utf-16be", "utf-16le"): + newEncoding = lookupEncoding("utf-8") + assert newEncoding is not None + elif newEncoding == self.charEncoding[0]: + self.charEncoding = (self.charEncoding[0], "certain") + else: + self.rawStream.seek(0) + self.charEncoding = (newEncoding, "certain") + self.reset() + raise _ReparseException("Encoding changed from %s to %s" % (self.charEncoding[0], newEncoding)) + + def detectBOM(self): + """Attempts to detect at BOM at the start of the stream. If + an encoding can be determined from the BOM return the name of the + encoding otherwise return None""" + bomDict = { + codecs.BOM_UTF8: 'utf-8', + codecs.BOM_UTF16_LE: 'utf-16le', codecs.BOM_UTF16_BE: 'utf-16be', + codecs.BOM_UTF32_LE: 'utf-32le', codecs.BOM_UTF32_BE: 'utf-32be' + } + + # Go to beginning of file and read in 4 bytes + string = self.rawStream.read(4) + assert isinstance(string, bytes) + + # Try detecting the BOM using bytes from the string + encoding = bomDict.get(string[:3]) # UTF-8 + seek = 3 + if not encoding: + # Need to detect UTF-32 before UTF-16 + encoding = bomDict.get(string) # UTF-32 + seek = 4 + if not encoding: + encoding = bomDict.get(string[:2]) # UTF-16 + seek = 2 + + # Set the read position past the BOM if one was found, otherwise + # set it to the start of the stream + if encoding: + self.rawStream.seek(seek) + return lookupEncoding(encoding) + else: + self.rawStream.seek(0) + return None + + def detectEncodingMeta(self): + """Report the encoding declared by the meta element + """ + buffer = self.rawStream.read(self.numBytesMeta) + assert isinstance(buffer, bytes) + parser = EncodingParser(buffer) + self.rawStream.seek(0) + encoding = parser.getEncoding() + + if encoding is not None and encoding.name in ("utf-16be", "utf-16le"): + encoding = lookupEncoding("utf-8") + + return encoding + + +class EncodingBytes(bytes): + """String-like object with an associated position and various extra methods + If the position is ever greater than the string length then an exception is + raised""" + def __new__(self, value): + assert isinstance(value, bytes) + return bytes.__new__(self, value.lower()) + + def __init__(self, value): + # pylint:disable=unused-argument + self._position = -1 + + def __iter__(self): + return self + + def __next__(self): + p = self._position = self._position + 1 + if p >= len(self): + raise StopIteration + elif p < 0: + raise TypeError + return self[p:p + 1] + + def next(self): + # Py2 compat + return self.__next__() + + def previous(self): + p = self._position + if p >= len(self): + raise StopIteration + elif p < 0: + raise TypeError + self._position = p = p - 1 + return self[p:p + 1] + + def setPosition(self, position): + if self._position >= len(self): + raise StopIteration + self._position = position + + def getPosition(self): + if self._position >= len(self): + raise StopIteration + if self._position >= 0: + return self._position + else: + return None + + position = property(getPosition, setPosition) + + def getCurrentByte(self): + return self[self.position:self.position + 1] + + currentByte = property(getCurrentByte) + + def skip(self, chars=spaceCharactersBytes): + """Skip past a list of characters""" + p = self.position # use property for the error-checking + while p < len(self): + c = self[p:p + 1] + if c not in chars: + self._position = p + return c + p += 1 + self._position = p + return None + + def skipUntil(self, chars): + p = self.position + while p < len(self): + c = self[p:p + 1] + if c in chars: + self._position = p + return c + p += 1 + self._position = p + return None + + def matchBytes(self, bytes): + """Look for a sequence of bytes at the start of a string. If the bytes + are found return True and advance the position to the byte after the + match. Otherwise return False and leave the position alone""" + p = self.position + data = self[p:p + len(bytes)] + rv = data.startswith(bytes) + if rv: + self.position += len(bytes) + return rv + + def jumpTo(self, bytes): + """Look for the next sequence of bytes matching a given sequence. If + a match is found advance the position to the last byte of the match""" + newPosition = self[self.position:].find(bytes) + if newPosition > -1: + # XXX: This is ugly, but I can't see a nicer way to fix this. + if self._position == -1: + self._position = 0 + self._position += (newPosition + len(bytes) - 1) + return True + else: + raise StopIteration + + +class EncodingParser(object): + """Mini parser for detecting character encoding from meta elements""" + + def __init__(self, data): + """string - the data to work on for encoding detection""" + self.data = EncodingBytes(data) + self.encoding = None + + def getEncoding(self): + methodDispatch = ( + (b"") + + def handleMeta(self): + if self.data.currentByte not in spaceCharactersBytes: + # if we have ") + + def getAttribute(self): + """Return a name,value pair for the next attribute in the stream, + if one is found, or None""" + data = self.data + # Step 1 (skip chars) + c = data.skip(spaceCharactersBytes | frozenset([b"/"])) + assert c is None or len(c) == 1 + # Step 2 + if c in (b">", None): + return None + # Step 3 + attrName = [] + attrValue = [] + # Step 4 attribute name + while True: + if c == b"=" and attrName: + break + elif c in spaceCharactersBytes: + # Step 6! + c = data.skip() + break + elif c in (b"/", b">"): + return b"".join(attrName), b"" + elif c in asciiUppercaseBytes: + attrName.append(c.lower()) + elif c is None: + return None + else: + attrName.append(c) + # Step 5 + c = next(data) + # Step 7 + if c != b"=": + data.previous() + return b"".join(attrName), b"" + # Step 8 + next(data) + # Step 9 + c = data.skip() + # Step 10 + if c in (b"'", b'"'): + # 10.1 + quoteChar = c + while True: + # 10.2 + c = next(data) + # 10.3 + if c == quoteChar: + next(data) + return b"".join(attrName), b"".join(attrValue) + # 10.4 + elif c in asciiUppercaseBytes: + attrValue.append(c.lower()) + # 10.5 + else: + attrValue.append(c) + elif c == b">": + return b"".join(attrName), b"" + elif c in asciiUppercaseBytes: + attrValue.append(c.lower()) + elif c is None: + return None + else: + attrValue.append(c) + # Step 11 + while True: + c = next(data) + if c in spacesAngleBrackets: + return b"".join(attrName), b"".join(attrValue) + elif c in asciiUppercaseBytes: + attrValue.append(c.lower()) + elif c is None: + return None + else: + attrValue.append(c) + + +class ContentAttrParser(object): + def __init__(self, data): + assert isinstance(data, bytes) + self.data = data + + def parse(self): + try: + # Check if the attr name is charset + # otherwise return + self.data.jumpTo(b"charset") + self.data.position += 1 + self.data.skip() + if not self.data.currentByte == b"=": + # If there is no = sign keep looking for attrs + return None + self.data.position += 1 + self.data.skip() + # Look for an encoding between matching quote marks + if self.data.currentByte in (b'"', b"'"): + quoteMark = self.data.currentByte + self.data.position += 1 + oldPosition = self.data.position + if self.data.jumpTo(quoteMark): + return self.data[oldPosition:self.data.position] + else: + return None + else: + # Unquoted value + oldPosition = self.data.position + try: + self.data.skipUntil(spaceCharactersBytes) + return self.data[oldPosition:self.data.position] + except StopIteration: + # Return the whole remaining value + return self.data[oldPosition:] + except StopIteration: + return None + + +def lookupEncoding(encoding): + """Return the python codec name corresponding to an encoding or None if the + string doesn't correspond to a valid encoding.""" + if isinstance(encoding, binary_type): + try: + encoding = encoding.decode("ascii") + except UnicodeDecodeError: + return None + + if encoding is not None: + try: + return webencodings.lookup(encoding) + except AttributeError: + return None + else: + return None diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/html5lib/_tokenizer.py b/venv.bak/lib/python3.7/site-packages/pip/_vendor/html5lib/_tokenizer.py new file mode 100644 index 0000000..178f6e7 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_vendor/html5lib/_tokenizer.py @@ -0,0 +1,1721 @@ +from __future__ import absolute_import, division, unicode_literals + +from pip._vendor.six import unichr as chr + +from collections import deque + +from .constants import spaceCharacters +from .constants import entities +from .constants import asciiLetters, asciiUpper2Lower +from .constants import digits, hexDigits, EOF +from .constants import tokenTypes, tagTokenTypes +from .constants import replacementCharacters + +from ._inputstream import HTMLInputStream + +from ._trie import Trie + +entitiesTrie = Trie(entities) + + +class HTMLTokenizer(object): + """ This class takes care of tokenizing HTML. + + * self.currentToken + Holds the token that is currently being processed. + + * self.state + Holds a reference to the method to be invoked... XXX + + * self.stream + Points to HTMLInputStream object. + """ + + def __init__(self, stream, parser=None, **kwargs): + + self.stream = HTMLInputStream(stream, **kwargs) + self.parser = parser + + # Setup the initial tokenizer state + self.escapeFlag = False + self.lastFourChars = [] + self.state = self.dataState + self.escape = False + + # The current token being created + self.currentToken = None + super(HTMLTokenizer, self).__init__() + + def __iter__(self): + """ This is where the magic happens. + + We do our usually processing through the states and when we have a token + to return we yield the token which pauses processing until the next token + is requested. + """ + self.tokenQueue = deque([]) + # Start processing. When EOF is reached self.state will return False + # instead of True and the loop will terminate. + while self.state(): + while self.stream.errors: + yield {"type": tokenTypes["ParseError"], "data": self.stream.errors.pop(0)} + while self.tokenQueue: + yield self.tokenQueue.popleft() + + def consumeNumberEntity(self, isHex): + """This function returns either U+FFFD or the character based on the + decimal or hexadecimal representation. It also discards ";" if present. + If not present self.tokenQueue.append({"type": tokenTypes["ParseError"]}) is invoked. + """ + + allowed = digits + radix = 10 + if isHex: + allowed = hexDigits + radix = 16 + + charStack = [] + + # Consume all the characters that are in range while making sure we + # don't hit an EOF. + c = self.stream.char() + while c in allowed and c is not EOF: + charStack.append(c) + c = self.stream.char() + + # Convert the set of characters consumed to an int. + charAsInt = int("".join(charStack), radix) + + # Certain characters get replaced with others + if charAsInt in replacementCharacters: + char = replacementCharacters[charAsInt] + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "illegal-codepoint-for-numeric-entity", + "datavars": {"charAsInt": charAsInt}}) + elif ((0xD800 <= charAsInt <= 0xDFFF) or + (charAsInt > 0x10FFFF)): + char = "\uFFFD" + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "illegal-codepoint-for-numeric-entity", + "datavars": {"charAsInt": charAsInt}}) + else: + # Should speed up this check somehow (e.g. move the set to a constant) + if ((0x0001 <= charAsInt <= 0x0008) or + (0x000E <= charAsInt <= 0x001F) or + (0x007F <= charAsInt <= 0x009F) or + (0xFDD0 <= charAsInt <= 0xFDEF) or + charAsInt in frozenset([0x000B, 0xFFFE, 0xFFFF, 0x1FFFE, + 0x1FFFF, 0x2FFFE, 0x2FFFF, 0x3FFFE, + 0x3FFFF, 0x4FFFE, 0x4FFFF, 0x5FFFE, + 0x5FFFF, 0x6FFFE, 0x6FFFF, 0x7FFFE, + 0x7FFFF, 0x8FFFE, 0x8FFFF, 0x9FFFE, + 0x9FFFF, 0xAFFFE, 0xAFFFF, 0xBFFFE, + 0xBFFFF, 0xCFFFE, 0xCFFFF, 0xDFFFE, + 0xDFFFF, 0xEFFFE, 0xEFFFF, 0xFFFFE, + 0xFFFFF, 0x10FFFE, 0x10FFFF])): + self.tokenQueue.append({"type": tokenTypes["ParseError"], + "data": + "illegal-codepoint-for-numeric-entity", + "datavars": {"charAsInt": charAsInt}}) + try: + # Try/except needed as UCS-2 Python builds' unichar only works + # within the BMP. + char = chr(charAsInt) + except ValueError: + v = charAsInt - 0x10000 + char = chr(0xD800 | (v >> 10)) + chr(0xDC00 | (v & 0x3FF)) + + # Discard the ; if present. Otherwise, put it back on the queue and + # invoke parseError on parser. + if c != ";": + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "numeric-entity-without-semicolon"}) + self.stream.unget(c) + + return char + + def consumeEntity(self, allowedChar=None, fromAttribute=False): + # Initialise to the default output for when no entity is matched + output = "&" + + charStack = [self.stream.char()] + if (charStack[0] in spaceCharacters or charStack[0] in (EOF, "<", "&") or + (allowedChar is not None and allowedChar == charStack[0])): + self.stream.unget(charStack[0]) + + elif charStack[0] == "#": + # Read the next character to see if it's hex or decimal + hex = False + charStack.append(self.stream.char()) + if charStack[-1] in ("x", "X"): + hex = True + charStack.append(self.stream.char()) + + # charStack[-1] should be the first digit + if (hex and charStack[-1] in hexDigits) \ + or (not hex and charStack[-1] in digits): + # At least one digit found, so consume the whole number + self.stream.unget(charStack[-1]) + output = self.consumeNumberEntity(hex) + else: + # No digits found + self.tokenQueue.append({"type": tokenTypes["ParseError"], + "data": "expected-numeric-entity"}) + self.stream.unget(charStack.pop()) + output = "&" + "".join(charStack) + + else: + # At this point in the process might have named entity. Entities + # are stored in the global variable "entities". + # + # Consume characters and compare to these to a substring of the + # entity names in the list until the substring no longer matches. + while (charStack[-1] is not EOF): + if not entitiesTrie.has_keys_with_prefix("".join(charStack)): + break + charStack.append(self.stream.char()) + + # At this point we have a string that starts with some characters + # that may match an entity + # Try to find the longest entity the string will match to take care + # of ¬i for instance. + try: + entityName = entitiesTrie.longest_prefix("".join(charStack[:-1])) + entityLength = len(entityName) + except KeyError: + entityName = None + + if entityName is not None: + if entityName[-1] != ";": + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "named-entity-without-semicolon"}) + if (entityName[-1] != ";" and fromAttribute and + (charStack[entityLength] in asciiLetters or + charStack[entityLength] in digits or + charStack[entityLength] == "=")): + self.stream.unget(charStack.pop()) + output = "&" + "".join(charStack) + else: + output = entities[entityName] + self.stream.unget(charStack.pop()) + output += "".join(charStack[entityLength:]) + else: + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "expected-named-entity"}) + self.stream.unget(charStack.pop()) + output = "&" + "".join(charStack) + + if fromAttribute: + self.currentToken["data"][-1][1] += output + else: + if output in spaceCharacters: + tokenType = "SpaceCharacters" + else: + tokenType = "Characters" + self.tokenQueue.append({"type": tokenTypes[tokenType], "data": output}) + + def processEntityInAttribute(self, allowedChar): + """This method replaces the need for "entityInAttributeValueState". + """ + self.consumeEntity(allowedChar=allowedChar, fromAttribute=True) + + def emitCurrentToken(self): + """This method is a generic handler for emitting the tags. It also sets + the state to "data" because that's what's needed after a token has been + emitted. + """ + token = self.currentToken + # Add token to the queue to be yielded + if (token["type"] in tagTokenTypes): + token["name"] = token["name"].translate(asciiUpper2Lower) + if token["type"] == tokenTypes["EndTag"]: + if token["data"]: + self.tokenQueue.append({"type": tokenTypes["ParseError"], + "data": "attributes-in-end-tag"}) + if token["selfClosing"]: + self.tokenQueue.append({"type": tokenTypes["ParseError"], + "data": "self-closing-flag-on-end-tag"}) + self.tokenQueue.append(token) + self.state = self.dataState + + # Below are the various tokenizer states worked out. + def dataState(self): + data = self.stream.char() + if data == "&": + self.state = self.entityDataState + elif data == "<": + self.state = self.tagOpenState + elif data == "\u0000": + self.tokenQueue.append({"type": tokenTypes["ParseError"], + "data": "invalid-codepoint"}) + self.tokenQueue.append({"type": tokenTypes["Characters"], + "data": "\u0000"}) + elif data is EOF: + # Tokenization ends. + return False + elif data in spaceCharacters: + # Directly after emitting a token you switch back to the "data + # state". At that point spaceCharacters are important so they are + # emitted separately. + self.tokenQueue.append({"type": tokenTypes["SpaceCharacters"], "data": + data + self.stream.charsUntil(spaceCharacters, True)}) + # No need to update lastFourChars here, since the first space will + # have already been appended to lastFourChars and will have broken + # any sequences + else: + chars = self.stream.charsUntil(("&", "<", "\u0000")) + self.tokenQueue.append({"type": tokenTypes["Characters"], "data": + data + chars}) + return True + + def entityDataState(self): + self.consumeEntity() + self.state = self.dataState + return True + + def rcdataState(self): + data = self.stream.char() + if data == "&": + self.state = self.characterReferenceInRcdata + elif data == "<": + self.state = self.rcdataLessThanSignState + elif data == EOF: + # Tokenization ends. + return False + elif data == "\u0000": + self.tokenQueue.append({"type": tokenTypes["ParseError"], + "data": "invalid-codepoint"}) + self.tokenQueue.append({"type": tokenTypes["Characters"], + "data": "\uFFFD"}) + elif data in spaceCharacters: + # Directly after emitting a token you switch back to the "data + # state". At that point spaceCharacters are important so they are + # emitted separately. + self.tokenQueue.append({"type": tokenTypes["SpaceCharacters"], "data": + data + self.stream.charsUntil(spaceCharacters, True)}) + # No need to update lastFourChars here, since the first space will + # have already been appended to lastFourChars and will have broken + # any sequences + else: + chars = self.stream.charsUntil(("&", "<", "\u0000")) + self.tokenQueue.append({"type": tokenTypes["Characters"], "data": + data + chars}) + return True + + def characterReferenceInRcdata(self): + self.consumeEntity() + self.state = self.rcdataState + return True + + def rawtextState(self): + data = self.stream.char() + if data == "<": + self.state = self.rawtextLessThanSignState + elif data == "\u0000": + self.tokenQueue.append({"type": tokenTypes["ParseError"], + "data": "invalid-codepoint"}) + self.tokenQueue.append({"type": tokenTypes["Characters"], + "data": "\uFFFD"}) + elif data == EOF: + # Tokenization ends. + return False + else: + chars = self.stream.charsUntil(("<", "\u0000")) + self.tokenQueue.append({"type": tokenTypes["Characters"], "data": + data + chars}) + return True + + def scriptDataState(self): + data = self.stream.char() + if data == "<": + self.state = self.scriptDataLessThanSignState + elif data == "\u0000": + self.tokenQueue.append({"type": tokenTypes["ParseError"], + "data": "invalid-codepoint"}) + self.tokenQueue.append({"type": tokenTypes["Characters"], + "data": "\uFFFD"}) + elif data == EOF: + # Tokenization ends. + return False + else: + chars = self.stream.charsUntil(("<", "\u0000")) + self.tokenQueue.append({"type": tokenTypes["Characters"], "data": + data + chars}) + return True + + def plaintextState(self): + data = self.stream.char() + if data == EOF: + # Tokenization ends. + return False + elif data == "\u0000": + self.tokenQueue.append({"type": tokenTypes["ParseError"], + "data": "invalid-codepoint"}) + self.tokenQueue.append({"type": tokenTypes["Characters"], + "data": "\uFFFD"}) + else: + self.tokenQueue.append({"type": tokenTypes["Characters"], "data": + data + self.stream.charsUntil("\u0000")}) + return True + + def tagOpenState(self): + data = self.stream.char() + if data == "!": + self.state = self.markupDeclarationOpenState + elif data == "/": + self.state = self.closeTagOpenState + elif data in asciiLetters: + self.currentToken = {"type": tokenTypes["StartTag"], + "name": data, "data": [], + "selfClosing": False, + "selfClosingAcknowledged": False} + self.state = self.tagNameState + elif data == ">": + # XXX In theory it could be something besides a tag name. But + # do we really care? + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "expected-tag-name-but-got-right-bracket"}) + self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<>"}) + self.state = self.dataState + elif data == "?": + # XXX In theory it could be something besides a tag name. But + # do we really care? + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "expected-tag-name-but-got-question-mark"}) + self.stream.unget(data) + self.state = self.bogusCommentState + else: + # XXX + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "expected-tag-name"}) + self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<"}) + self.stream.unget(data) + self.state = self.dataState + return True + + def closeTagOpenState(self): + data = self.stream.char() + if data in asciiLetters: + self.currentToken = {"type": tokenTypes["EndTag"], "name": data, + "data": [], "selfClosing": False} + self.state = self.tagNameState + elif data == ">": + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "expected-closing-tag-but-got-right-bracket"}) + self.state = self.dataState + elif data is EOF: + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "expected-closing-tag-but-got-eof"}) + self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "": + self.emitCurrentToken() + elif data is EOF: + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "eof-in-tag-name"}) + self.state = self.dataState + elif data == "/": + self.state = self.selfClosingStartTagState + elif data == "\u0000": + self.tokenQueue.append({"type": tokenTypes["ParseError"], + "data": "invalid-codepoint"}) + self.currentToken["name"] += "\uFFFD" + else: + self.currentToken["name"] += data + # (Don't use charsUntil here, because tag names are + # very short and it's faster to not do anything fancy) + return True + + def rcdataLessThanSignState(self): + data = self.stream.char() + if data == "/": + self.temporaryBuffer = "" + self.state = self.rcdataEndTagOpenState + else: + self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<"}) + self.stream.unget(data) + self.state = self.rcdataState + return True + + def rcdataEndTagOpenState(self): + data = self.stream.char() + if data in asciiLetters: + self.temporaryBuffer += data + self.state = self.rcdataEndTagNameState + else: + self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "" and appropriate: + self.currentToken = {"type": tokenTypes["EndTag"], + "name": self.temporaryBuffer, + "data": [], "selfClosing": False} + self.emitCurrentToken() + self.state = self.dataState + elif data in asciiLetters: + self.temporaryBuffer += data + else: + self.tokenQueue.append({"type": tokenTypes["Characters"], + "data": "" and appropriate: + self.currentToken = {"type": tokenTypes["EndTag"], + "name": self.temporaryBuffer, + "data": [], "selfClosing": False} + self.emitCurrentToken() + self.state = self.dataState + elif data in asciiLetters: + self.temporaryBuffer += data + else: + self.tokenQueue.append({"type": tokenTypes["Characters"], + "data": "" and appropriate: + self.currentToken = {"type": tokenTypes["EndTag"], + "name": self.temporaryBuffer, + "data": [], "selfClosing": False} + self.emitCurrentToken() + self.state = self.dataState + elif data in asciiLetters: + self.temporaryBuffer += data + else: + self.tokenQueue.append({"type": tokenTypes["Characters"], + "data": "": + self.tokenQueue.append({"type": tokenTypes["Characters"], "data": ">"}) + self.state = self.scriptDataState + elif data == "\u0000": + self.tokenQueue.append({"type": tokenTypes["ParseError"], + "data": "invalid-codepoint"}) + self.tokenQueue.append({"type": tokenTypes["Characters"], + "data": "\uFFFD"}) + self.state = self.scriptDataEscapedState + elif data == EOF: + self.state = self.dataState + else: + self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data}) + self.state = self.scriptDataEscapedState + return True + + def scriptDataEscapedLessThanSignState(self): + data = self.stream.char() + if data == "/": + self.temporaryBuffer = "" + self.state = self.scriptDataEscapedEndTagOpenState + elif data in asciiLetters: + self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<" + data}) + self.temporaryBuffer = data + self.state = self.scriptDataDoubleEscapeStartState + else: + self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<"}) + self.stream.unget(data) + self.state = self.scriptDataEscapedState + return True + + def scriptDataEscapedEndTagOpenState(self): + data = self.stream.char() + if data in asciiLetters: + self.temporaryBuffer = data + self.state = self.scriptDataEscapedEndTagNameState + else: + self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "" and appropriate: + self.currentToken = {"type": tokenTypes["EndTag"], + "name": self.temporaryBuffer, + "data": [], "selfClosing": False} + self.emitCurrentToken() + self.state = self.dataState + elif data in asciiLetters: + self.temporaryBuffer += data + else: + self.tokenQueue.append({"type": tokenTypes["Characters"], + "data": ""))): + self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data}) + if self.temporaryBuffer.lower() == "script": + self.state = self.scriptDataDoubleEscapedState + else: + self.state = self.scriptDataEscapedState + elif data in asciiLetters: + self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data}) + self.temporaryBuffer += data + else: + self.stream.unget(data) + self.state = self.scriptDataEscapedState + return True + + def scriptDataDoubleEscapedState(self): + data = self.stream.char() + if data == "-": + self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "-"}) + self.state = self.scriptDataDoubleEscapedDashState + elif data == "<": + self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<"}) + self.state = self.scriptDataDoubleEscapedLessThanSignState + elif data == "\u0000": + self.tokenQueue.append({"type": tokenTypes["ParseError"], + "data": "invalid-codepoint"}) + self.tokenQueue.append({"type": tokenTypes["Characters"], + "data": "\uFFFD"}) + elif data == EOF: + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "eof-in-script-in-script"}) + self.state = self.dataState + else: + self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data}) + return True + + def scriptDataDoubleEscapedDashState(self): + data = self.stream.char() + if data == "-": + self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "-"}) + self.state = self.scriptDataDoubleEscapedDashDashState + elif data == "<": + self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<"}) + self.state = self.scriptDataDoubleEscapedLessThanSignState + elif data == "\u0000": + self.tokenQueue.append({"type": tokenTypes["ParseError"], + "data": "invalid-codepoint"}) + self.tokenQueue.append({"type": tokenTypes["Characters"], + "data": "\uFFFD"}) + self.state = self.scriptDataDoubleEscapedState + elif data == EOF: + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "eof-in-script-in-script"}) + self.state = self.dataState + else: + self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data}) + self.state = self.scriptDataDoubleEscapedState + return True + + def scriptDataDoubleEscapedDashDashState(self): + data = self.stream.char() + if data == "-": + self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "-"}) + elif data == "<": + self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<"}) + self.state = self.scriptDataDoubleEscapedLessThanSignState + elif data == ">": + self.tokenQueue.append({"type": tokenTypes["Characters"], "data": ">"}) + self.state = self.scriptDataState + elif data == "\u0000": + self.tokenQueue.append({"type": tokenTypes["ParseError"], + "data": "invalid-codepoint"}) + self.tokenQueue.append({"type": tokenTypes["Characters"], + "data": "\uFFFD"}) + self.state = self.scriptDataDoubleEscapedState + elif data == EOF: + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "eof-in-script-in-script"}) + self.state = self.dataState + else: + self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data}) + self.state = self.scriptDataDoubleEscapedState + return True + + def scriptDataDoubleEscapedLessThanSignState(self): + data = self.stream.char() + if data == "/": + self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "/"}) + self.temporaryBuffer = "" + self.state = self.scriptDataDoubleEscapeEndState + else: + self.stream.unget(data) + self.state = self.scriptDataDoubleEscapedState + return True + + def scriptDataDoubleEscapeEndState(self): + data = self.stream.char() + if data in (spaceCharacters | frozenset(("/", ">"))): + self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data}) + if self.temporaryBuffer.lower() == "script": + self.state = self.scriptDataEscapedState + else: + self.state = self.scriptDataDoubleEscapedState + elif data in asciiLetters: + self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data}) + self.temporaryBuffer += data + else: + self.stream.unget(data) + self.state = self.scriptDataDoubleEscapedState + return True + + def beforeAttributeNameState(self): + data = self.stream.char() + if data in spaceCharacters: + self.stream.charsUntil(spaceCharacters, True) + elif data in asciiLetters: + self.currentToken["data"].append([data, ""]) + self.state = self.attributeNameState + elif data == ">": + self.emitCurrentToken() + elif data == "/": + self.state = self.selfClosingStartTagState + elif data in ("'", '"', "=", "<"): + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "invalid-character-in-attribute-name"}) + self.currentToken["data"].append([data, ""]) + self.state = self.attributeNameState + elif data == "\u0000": + self.tokenQueue.append({"type": tokenTypes["ParseError"], + "data": "invalid-codepoint"}) + self.currentToken["data"].append(["\uFFFD", ""]) + self.state = self.attributeNameState + elif data is EOF: + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "expected-attribute-name-but-got-eof"}) + self.state = self.dataState + else: + self.currentToken["data"].append([data, ""]) + self.state = self.attributeNameState + return True + + def attributeNameState(self): + data = self.stream.char() + leavingThisState = True + emitToken = False + if data == "=": + self.state = self.beforeAttributeValueState + elif data in asciiLetters: + self.currentToken["data"][-1][0] += data +\ + self.stream.charsUntil(asciiLetters, True) + leavingThisState = False + elif data == ">": + # XXX If we emit here the attributes are converted to a dict + # without being checked and when the code below runs we error + # because data is a dict not a list + emitToken = True + elif data in spaceCharacters: + self.state = self.afterAttributeNameState + elif data == "/": + self.state = self.selfClosingStartTagState + elif data == "\u0000": + self.tokenQueue.append({"type": tokenTypes["ParseError"], + "data": "invalid-codepoint"}) + self.currentToken["data"][-1][0] += "\uFFFD" + leavingThisState = False + elif data in ("'", '"', "<"): + self.tokenQueue.append({"type": tokenTypes["ParseError"], + "data": + "invalid-character-in-attribute-name"}) + self.currentToken["data"][-1][0] += data + leavingThisState = False + elif data is EOF: + self.tokenQueue.append({"type": tokenTypes["ParseError"], + "data": "eof-in-attribute-name"}) + self.state = self.dataState + else: + self.currentToken["data"][-1][0] += data + leavingThisState = False + + if leavingThisState: + # Attributes are not dropped at this stage. That happens when the + # start tag token is emitted so values can still be safely appended + # to attributes, but we do want to report the parse error in time. + self.currentToken["data"][-1][0] = ( + self.currentToken["data"][-1][0].translate(asciiUpper2Lower)) + for name, _ in self.currentToken["data"][:-1]: + if self.currentToken["data"][-1][0] == name: + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "duplicate-attribute"}) + break + # XXX Fix for above XXX + if emitToken: + self.emitCurrentToken() + return True + + def afterAttributeNameState(self): + data = self.stream.char() + if data in spaceCharacters: + self.stream.charsUntil(spaceCharacters, True) + elif data == "=": + self.state = self.beforeAttributeValueState + elif data == ">": + self.emitCurrentToken() + elif data in asciiLetters: + self.currentToken["data"].append([data, ""]) + self.state = self.attributeNameState + elif data == "/": + self.state = self.selfClosingStartTagState + elif data == "\u0000": + self.tokenQueue.append({"type": tokenTypes["ParseError"], + "data": "invalid-codepoint"}) + self.currentToken["data"].append(["\uFFFD", ""]) + self.state = self.attributeNameState + elif data in ("'", '"', "<"): + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "invalid-character-after-attribute-name"}) + self.currentToken["data"].append([data, ""]) + self.state = self.attributeNameState + elif data is EOF: + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "expected-end-of-tag-but-got-eof"}) + self.state = self.dataState + else: + self.currentToken["data"].append([data, ""]) + self.state = self.attributeNameState + return True + + def beforeAttributeValueState(self): + data = self.stream.char() + if data in spaceCharacters: + self.stream.charsUntil(spaceCharacters, True) + elif data == "\"": + self.state = self.attributeValueDoubleQuotedState + elif data == "&": + self.state = self.attributeValueUnQuotedState + self.stream.unget(data) + elif data == "'": + self.state = self.attributeValueSingleQuotedState + elif data == ">": + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "expected-attribute-value-but-got-right-bracket"}) + self.emitCurrentToken() + elif data == "\u0000": + self.tokenQueue.append({"type": tokenTypes["ParseError"], + "data": "invalid-codepoint"}) + self.currentToken["data"][-1][1] += "\uFFFD" + self.state = self.attributeValueUnQuotedState + elif data in ("=", "<", "`"): + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "equals-in-unquoted-attribute-value"}) + self.currentToken["data"][-1][1] += data + self.state = self.attributeValueUnQuotedState + elif data is EOF: + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "expected-attribute-value-but-got-eof"}) + self.state = self.dataState + else: + self.currentToken["data"][-1][1] += data + self.state = self.attributeValueUnQuotedState + return True + + def attributeValueDoubleQuotedState(self): + data = self.stream.char() + if data == "\"": + self.state = self.afterAttributeValueState + elif data == "&": + self.processEntityInAttribute('"') + elif data == "\u0000": + self.tokenQueue.append({"type": tokenTypes["ParseError"], + "data": "invalid-codepoint"}) + self.currentToken["data"][-1][1] += "\uFFFD" + elif data is EOF: + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "eof-in-attribute-value-double-quote"}) + self.state = self.dataState + else: + self.currentToken["data"][-1][1] += data +\ + self.stream.charsUntil(("\"", "&", "\u0000")) + return True + + def attributeValueSingleQuotedState(self): + data = self.stream.char() + if data == "'": + self.state = self.afterAttributeValueState + elif data == "&": + self.processEntityInAttribute("'") + elif data == "\u0000": + self.tokenQueue.append({"type": tokenTypes["ParseError"], + "data": "invalid-codepoint"}) + self.currentToken["data"][-1][1] += "\uFFFD" + elif data is EOF: + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "eof-in-attribute-value-single-quote"}) + self.state = self.dataState + else: + self.currentToken["data"][-1][1] += data +\ + self.stream.charsUntil(("'", "&", "\u0000")) + return True + + def attributeValueUnQuotedState(self): + data = self.stream.char() + if data in spaceCharacters: + self.state = self.beforeAttributeNameState + elif data == "&": + self.processEntityInAttribute(">") + elif data == ">": + self.emitCurrentToken() + elif data in ('"', "'", "=", "<", "`"): + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "unexpected-character-in-unquoted-attribute-value"}) + self.currentToken["data"][-1][1] += data + elif data == "\u0000": + self.tokenQueue.append({"type": tokenTypes["ParseError"], + "data": "invalid-codepoint"}) + self.currentToken["data"][-1][1] += "\uFFFD" + elif data is EOF: + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "eof-in-attribute-value-no-quotes"}) + self.state = self.dataState + else: + self.currentToken["data"][-1][1] += data + self.stream.charsUntil( + frozenset(("&", ">", '"', "'", "=", "<", "`", "\u0000")) | spaceCharacters) + return True + + def afterAttributeValueState(self): + data = self.stream.char() + if data in spaceCharacters: + self.state = self.beforeAttributeNameState + elif data == ">": + self.emitCurrentToken() + elif data == "/": + self.state = self.selfClosingStartTagState + elif data is EOF: + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "unexpected-EOF-after-attribute-value"}) + self.stream.unget(data) + self.state = self.dataState + else: + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "unexpected-character-after-attribute-value"}) + self.stream.unget(data) + self.state = self.beforeAttributeNameState + return True + + def selfClosingStartTagState(self): + data = self.stream.char() + if data == ">": + self.currentToken["selfClosing"] = True + self.emitCurrentToken() + elif data is EOF: + self.tokenQueue.append({"type": tokenTypes["ParseError"], + "data": + "unexpected-EOF-after-solidus-in-tag"}) + self.stream.unget(data) + self.state = self.dataState + else: + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "unexpected-character-after-solidus-in-tag"}) + self.stream.unget(data) + self.state = self.beforeAttributeNameState + return True + + def bogusCommentState(self): + # Make a new comment token and give it as value all the characters + # until the first > or EOF (charsUntil checks for EOF automatically) + # and emit it. + data = self.stream.charsUntil(">") + data = data.replace("\u0000", "\uFFFD") + self.tokenQueue.append( + {"type": tokenTypes["Comment"], "data": data}) + + # Eat the character directly after the bogus comment which is either a + # ">" or an EOF. + self.stream.char() + self.state = self.dataState + return True + + def markupDeclarationOpenState(self): + charStack = [self.stream.char()] + if charStack[-1] == "-": + charStack.append(self.stream.char()) + if charStack[-1] == "-": + self.currentToken = {"type": tokenTypes["Comment"], "data": ""} + self.state = self.commentStartState + return True + elif charStack[-1] in ('d', 'D'): + matched = True + for expected in (('o', 'O'), ('c', 'C'), ('t', 'T'), + ('y', 'Y'), ('p', 'P'), ('e', 'E')): + charStack.append(self.stream.char()) + if charStack[-1] not in expected: + matched = False + break + if matched: + self.currentToken = {"type": tokenTypes["Doctype"], + "name": "", + "publicId": None, "systemId": None, + "correct": True} + self.state = self.doctypeState + return True + elif (charStack[-1] == "[" and + self.parser is not None and + self.parser.tree.openElements and + self.parser.tree.openElements[-1].namespace != self.parser.tree.defaultNamespace): + matched = True + for expected in ["C", "D", "A", "T", "A", "["]: + charStack.append(self.stream.char()) + if charStack[-1] != expected: + matched = False + break + if matched: + self.state = self.cdataSectionState + return True + + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "expected-dashes-or-doctype"}) + + while charStack: + self.stream.unget(charStack.pop()) + self.state = self.bogusCommentState + return True + + def commentStartState(self): + data = self.stream.char() + if data == "-": + self.state = self.commentStartDashState + elif data == "\u0000": + self.tokenQueue.append({"type": tokenTypes["ParseError"], + "data": "invalid-codepoint"}) + self.currentToken["data"] += "\uFFFD" + elif data == ">": + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "incorrect-comment"}) + self.tokenQueue.append(self.currentToken) + self.state = self.dataState + elif data is EOF: + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "eof-in-comment"}) + self.tokenQueue.append(self.currentToken) + self.state = self.dataState + else: + self.currentToken["data"] += data + self.state = self.commentState + return True + + def commentStartDashState(self): + data = self.stream.char() + if data == "-": + self.state = self.commentEndState + elif data == "\u0000": + self.tokenQueue.append({"type": tokenTypes["ParseError"], + "data": "invalid-codepoint"}) + self.currentToken["data"] += "-\uFFFD" + elif data == ">": + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "incorrect-comment"}) + self.tokenQueue.append(self.currentToken) + self.state = self.dataState + elif data is EOF: + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "eof-in-comment"}) + self.tokenQueue.append(self.currentToken) + self.state = self.dataState + else: + self.currentToken["data"] += "-" + data + self.state = self.commentState + return True + + def commentState(self): + data = self.stream.char() + if data == "-": + self.state = self.commentEndDashState + elif data == "\u0000": + self.tokenQueue.append({"type": tokenTypes["ParseError"], + "data": "invalid-codepoint"}) + self.currentToken["data"] += "\uFFFD" + elif data is EOF: + self.tokenQueue.append({"type": tokenTypes["ParseError"], + "data": "eof-in-comment"}) + self.tokenQueue.append(self.currentToken) + self.state = self.dataState + else: + self.currentToken["data"] += data + \ + self.stream.charsUntil(("-", "\u0000")) + return True + + def commentEndDashState(self): + data = self.stream.char() + if data == "-": + self.state = self.commentEndState + elif data == "\u0000": + self.tokenQueue.append({"type": tokenTypes["ParseError"], + "data": "invalid-codepoint"}) + self.currentToken["data"] += "-\uFFFD" + self.state = self.commentState + elif data is EOF: + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "eof-in-comment-end-dash"}) + self.tokenQueue.append(self.currentToken) + self.state = self.dataState + else: + self.currentToken["data"] += "-" + data + self.state = self.commentState + return True + + def commentEndState(self): + data = self.stream.char() + if data == ">": + self.tokenQueue.append(self.currentToken) + self.state = self.dataState + elif data == "\u0000": + self.tokenQueue.append({"type": tokenTypes["ParseError"], + "data": "invalid-codepoint"}) + self.currentToken["data"] += "--\uFFFD" + self.state = self.commentState + elif data == "!": + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "unexpected-bang-after-double-dash-in-comment"}) + self.state = self.commentEndBangState + elif data == "-": + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "unexpected-dash-after-double-dash-in-comment"}) + self.currentToken["data"] += data + elif data is EOF: + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "eof-in-comment-double-dash"}) + self.tokenQueue.append(self.currentToken) + self.state = self.dataState + else: + # XXX + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "unexpected-char-in-comment"}) + self.currentToken["data"] += "--" + data + self.state = self.commentState + return True + + def commentEndBangState(self): + data = self.stream.char() + if data == ">": + self.tokenQueue.append(self.currentToken) + self.state = self.dataState + elif data == "-": + self.currentToken["data"] += "--!" + self.state = self.commentEndDashState + elif data == "\u0000": + self.tokenQueue.append({"type": tokenTypes["ParseError"], + "data": "invalid-codepoint"}) + self.currentToken["data"] += "--!\uFFFD" + self.state = self.commentState + elif data is EOF: + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "eof-in-comment-end-bang-state"}) + self.tokenQueue.append(self.currentToken) + self.state = self.dataState + else: + self.currentToken["data"] += "--!" + data + self.state = self.commentState + return True + + def doctypeState(self): + data = self.stream.char() + if data in spaceCharacters: + self.state = self.beforeDoctypeNameState + elif data is EOF: + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "expected-doctype-name-but-got-eof"}) + self.currentToken["correct"] = False + self.tokenQueue.append(self.currentToken) + self.state = self.dataState + else: + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "need-space-after-doctype"}) + self.stream.unget(data) + self.state = self.beforeDoctypeNameState + return True + + def beforeDoctypeNameState(self): + data = self.stream.char() + if data in spaceCharacters: + pass + elif data == ">": + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "expected-doctype-name-but-got-right-bracket"}) + self.currentToken["correct"] = False + self.tokenQueue.append(self.currentToken) + self.state = self.dataState + elif data == "\u0000": + self.tokenQueue.append({"type": tokenTypes["ParseError"], + "data": "invalid-codepoint"}) + self.currentToken["name"] = "\uFFFD" + self.state = self.doctypeNameState + elif data is EOF: + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "expected-doctype-name-but-got-eof"}) + self.currentToken["correct"] = False + self.tokenQueue.append(self.currentToken) + self.state = self.dataState + else: + self.currentToken["name"] = data + self.state = self.doctypeNameState + return True + + def doctypeNameState(self): + data = self.stream.char() + if data in spaceCharacters: + self.currentToken["name"] = self.currentToken["name"].translate(asciiUpper2Lower) + self.state = self.afterDoctypeNameState + elif data == ">": + self.currentToken["name"] = self.currentToken["name"].translate(asciiUpper2Lower) + self.tokenQueue.append(self.currentToken) + self.state = self.dataState + elif data == "\u0000": + self.tokenQueue.append({"type": tokenTypes["ParseError"], + "data": "invalid-codepoint"}) + self.currentToken["name"] += "\uFFFD" + self.state = self.doctypeNameState + elif data is EOF: + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "eof-in-doctype-name"}) + self.currentToken["correct"] = False + self.currentToken["name"] = self.currentToken["name"].translate(asciiUpper2Lower) + self.tokenQueue.append(self.currentToken) + self.state = self.dataState + else: + self.currentToken["name"] += data + return True + + def afterDoctypeNameState(self): + data = self.stream.char() + if data in spaceCharacters: + pass + elif data == ">": + self.tokenQueue.append(self.currentToken) + self.state = self.dataState + elif data is EOF: + self.currentToken["correct"] = False + self.stream.unget(data) + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "eof-in-doctype"}) + self.tokenQueue.append(self.currentToken) + self.state = self.dataState + else: + if data in ("p", "P"): + matched = True + for expected in (("u", "U"), ("b", "B"), ("l", "L"), + ("i", "I"), ("c", "C")): + data = self.stream.char() + if data not in expected: + matched = False + break + if matched: + self.state = self.afterDoctypePublicKeywordState + return True + elif data in ("s", "S"): + matched = True + for expected in (("y", "Y"), ("s", "S"), ("t", "T"), + ("e", "E"), ("m", "M")): + data = self.stream.char() + if data not in expected: + matched = False + break + if matched: + self.state = self.afterDoctypeSystemKeywordState + return True + + # All the characters read before the current 'data' will be + # [a-zA-Z], so they're garbage in the bogus doctype and can be + # discarded; only the latest character might be '>' or EOF + # and needs to be ungetted + self.stream.unget(data) + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "expected-space-or-right-bracket-in-doctype", "datavars": + {"data": data}}) + self.currentToken["correct"] = False + self.state = self.bogusDoctypeState + + return True + + def afterDoctypePublicKeywordState(self): + data = self.stream.char() + if data in spaceCharacters: + self.state = self.beforeDoctypePublicIdentifierState + elif data in ("'", '"'): + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "unexpected-char-in-doctype"}) + self.stream.unget(data) + self.state = self.beforeDoctypePublicIdentifierState + elif data is EOF: + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "eof-in-doctype"}) + self.currentToken["correct"] = False + self.tokenQueue.append(self.currentToken) + self.state = self.dataState + else: + self.stream.unget(data) + self.state = self.beforeDoctypePublicIdentifierState + return True + + def beforeDoctypePublicIdentifierState(self): + data = self.stream.char() + if data in spaceCharacters: + pass + elif data == "\"": + self.currentToken["publicId"] = "" + self.state = self.doctypePublicIdentifierDoubleQuotedState + elif data == "'": + self.currentToken["publicId"] = "" + self.state = self.doctypePublicIdentifierSingleQuotedState + elif data == ">": + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "unexpected-end-of-doctype"}) + self.currentToken["correct"] = False + self.tokenQueue.append(self.currentToken) + self.state = self.dataState + elif data is EOF: + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "eof-in-doctype"}) + self.currentToken["correct"] = False + self.tokenQueue.append(self.currentToken) + self.state = self.dataState + else: + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "unexpected-char-in-doctype"}) + self.currentToken["correct"] = False + self.state = self.bogusDoctypeState + return True + + def doctypePublicIdentifierDoubleQuotedState(self): + data = self.stream.char() + if data == "\"": + self.state = self.afterDoctypePublicIdentifierState + elif data == "\u0000": + self.tokenQueue.append({"type": tokenTypes["ParseError"], + "data": "invalid-codepoint"}) + self.currentToken["publicId"] += "\uFFFD" + elif data == ">": + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "unexpected-end-of-doctype"}) + self.currentToken["correct"] = False + self.tokenQueue.append(self.currentToken) + self.state = self.dataState + elif data is EOF: + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "eof-in-doctype"}) + self.currentToken["correct"] = False + self.tokenQueue.append(self.currentToken) + self.state = self.dataState + else: + self.currentToken["publicId"] += data + return True + + def doctypePublicIdentifierSingleQuotedState(self): + data = self.stream.char() + if data == "'": + self.state = self.afterDoctypePublicIdentifierState + elif data == "\u0000": + self.tokenQueue.append({"type": tokenTypes["ParseError"], + "data": "invalid-codepoint"}) + self.currentToken["publicId"] += "\uFFFD" + elif data == ">": + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "unexpected-end-of-doctype"}) + self.currentToken["correct"] = False + self.tokenQueue.append(self.currentToken) + self.state = self.dataState + elif data is EOF: + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "eof-in-doctype"}) + self.currentToken["correct"] = False + self.tokenQueue.append(self.currentToken) + self.state = self.dataState + else: + self.currentToken["publicId"] += data + return True + + def afterDoctypePublicIdentifierState(self): + data = self.stream.char() + if data in spaceCharacters: + self.state = self.betweenDoctypePublicAndSystemIdentifiersState + elif data == ">": + self.tokenQueue.append(self.currentToken) + self.state = self.dataState + elif data == '"': + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "unexpected-char-in-doctype"}) + self.currentToken["systemId"] = "" + self.state = self.doctypeSystemIdentifierDoubleQuotedState + elif data == "'": + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "unexpected-char-in-doctype"}) + self.currentToken["systemId"] = "" + self.state = self.doctypeSystemIdentifierSingleQuotedState + elif data is EOF: + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "eof-in-doctype"}) + self.currentToken["correct"] = False + self.tokenQueue.append(self.currentToken) + self.state = self.dataState + else: + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "unexpected-char-in-doctype"}) + self.currentToken["correct"] = False + self.state = self.bogusDoctypeState + return True + + def betweenDoctypePublicAndSystemIdentifiersState(self): + data = self.stream.char() + if data in spaceCharacters: + pass + elif data == ">": + self.tokenQueue.append(self.currentToken) + self.state = self.dataState + elif data == '"': + self.currentToken["systemId"] = "" + self.state = self.doctypeSystemIdentifierDoubleQuotedState + elif data == "'": + self.currentToken["systemId"] = "" + self.state = self.doctypeSystemIdentifierSingleQuotedState + elif data == EOF: + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "eof-in-doctype"}) + self.currentToken["correct"] = False + self.tokenQueue.append(self.currentToken) + self.state = self.dataState + else: + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "unexpected-char-in-doctype"}) + self.currentToken["correct"] = False + self.state = self.bogusDoctypeState + return True + + def afterDoctypeSystemKeywordState(self): + data = self.stream.char() + if data in spaceCharacters: + self.state = self.beforeDoctypeSystemIdentifierState + elif data in ("'", '"'): + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "unexpected-char-in-doctype"}) + self.stream.unget(data) + self.state = self.beforeDoctypeSystemIdentifierState + elif data is EOF: + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "eof-in-doctype"}) + self.currentToken["correct"] = False + self.tokenQueue.append(self.currentToken) + self.state = self.dataState + else: + self.stream.unget(data) + self.state = self.beforeDoctypeSystemIdentifierState + return True + + def beforeDoctypeSystemIdentifierState(self): + data = self.stream.char() + if data in spaceCharacters: + pass + elif data == "\"": + self.currentToken["systemId"] = "" + self.state = self.doctypeSystemIdentifierDoubleQuotedState + elif data == "'": + self.currentToken["systemId"] = "" + self.state = self.doctypeSystemIdentifierSingleQuotedState + elif data == ">": + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "unexpected-char-in-doctype"}) + self.currentToken["correct"] = False + self.tokenQueue.append(self.currentToken) + self.state = self.dataState + elif data is EOF: + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "eof-in-doctype"}) + self.currentToken["correct"] = False + self.tokenQueue.append(self.currentToken) + self.state = self.dataState + else: + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "unexpected-char-in-doctype"}) + self.currentToken["correct"] = False + self.state = self.bogusDoctypeState + return True + + def doctypeSystemIdentifierDoubleQuotedState(self): + data = self.stream.char() + if data == "\"": + self.state = self.afterDoctypeSystemIdentifierState + elif data == "\u0000": + self.tokenQueue.append({"type": tokenTypes["ParseError"], + "data": "invalid-codepoint"}) + self.currentToken["systemId"] += "\uFFFD" + elif data == ">": + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "unexpected-end-of-doctype"}) + self.currentToken["correct"] = False + self.tokenQueue.append(self.currentToken) + self.state = self.dataState + elif data is EOF: + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "eof-in-doctype"}) + self.currentToken["correct"] = False + self.tokenQueue.append(self.currentToken) + self.state = self.dataState + else: + self.currentToken["systemId"] += data + return True + + def doctypeSystemIdentifierSingleQuotedState(self): + data = self.stream.char() + if data == "'": + self.state = self.afterDoctypeSystemIdentifierState + elif data == "\u0000": + self.tokenQueue.append({"type": tokenTypes["ParseError"], + "data": "invalid-codepoint"}) + self.currentToken["systemId"] += "\uFFFD" + elif data == ">": + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "unexpected-end-of-doctype"}) + self.currentToken["correct"] = False + self.tokenQueue.append(self.currentToken) + self.state = self.dataState + elif data is EOF: + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "eof-in-doctype"}) + self.currentToken["correct"] = False + self.tokenQueue.append(self.currentToken) + self.state = self.dataState + else: + self.currentToken["systemId"] += data + return True + + def afterDoctypeSystemIdentifierState(self): + data = self.stream.char() + if data in spaceCharacters: + pass + elif data == ">": + self.tokenQueue.append(self.currentToken) + self.state = self.dataState + elif data is EOF: + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "eof-in-doctype"}) + self.currentToken["correct"] = False + self.tokenQueue.append(self.currentToken) + self.state = self.dataState + else: + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "unexpected-char-in-doctype"}) + self.state = self.bogusDoctypeState + return True + + def bogusDoctypeState(self): + data = self.stream.char() + if data == ">": + self.tokenQueue.append(self.currentToken) + self.state = self.dataState + elif data is EOF: + # XXX EMIT + self.stream.unget(data) + self.tokenQueue.append(self.currentToken) + self.state = self.dataState + else: + pass + return True + + def cdataSectionState(self): + data = [] + while True: + data.append(self.stream.charsUntil("]")) + data.append(self.stream.charsUntil(">")) + char = self.stream.char() + if char == EOF: + break + else: + assert char == ">" + if data[-1][-2:] == "]]": + data[-1] = data[-1][:-2] + break + else: + data.append(char) + + data = "".join(data) # pylint:disable=redefined-variable-type + # Deal with null here rather than in the parser + nullCount = data.count("\u0000") + if nullCount > 0: + for _ in range(nullCount): + self.tokenQueue.append({"type": tokenTypes["ParseError"], + "data": "invalid-codepoint"}) + data = data.replace("\u0000", "\uFFFD") + if data: + self.tokenQueue.append({"type": tokenTypes["Characters"], + "data": data}) + self.state = self.dataState + return True diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/html5lib/_trie/__init__.py b/venv.bak/lib/python3.7/site-packages/pip/_vendor/html5lib/_trie/__init__.py new file mode 100644 index 0000000..a5ba4bf --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_vendor/html5lib/_trie/__init__.py @@ -0,0 +1,14 @@ +from __future__ import absolute_import, division, unicode_literals + +from .py import Trie as PyTrie + +Trie = PyTrie + +# pylint:disable=wrong-import-position +try: + from .datrie import Trie as DATrie +except ImportError: + pass +else: + Trie = DATrie +# pylint:enable=wrong-import-position diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/html5lib/_trie/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_vendor/html5lib/_trie/__pycache__/__init__.cpython-37.pyc new file mode 100644 index 0000000..e3ca47b Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_vendor/html5lib/_trie/__pycache__/__init__.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/html5lib/_trie/__pycache__/_base.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_vendor/html5lib/_trie/__pycache__/_base.cpython-37.pyc new file mode 100644 index 0000000..2860b6d Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_vendor/html5lib/_trie/__pycache__/_base.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/html5lib/_trie/__pycache__/datrie.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_vendor/html5lib/_trie/__pycache__/datrie.cpython-37.pyc new file mode 100644 index 0000000..0ea41f8 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_vendor/html5lib/_trie/__pycache__/datrie.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/html5lib/_trie/__pycache__/py.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_vendor/html5lib/_trie/__pycache__/py.cpython-37.pyc new file mode 100644 index 0000000..b87592c Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_vendor/html5lib/_trie/__pycache__/py.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/html5lib/_trie/_base.py b/venv.bak/lib/python3.7/site-packages/pip/_vendor/html5lib/_trie/_base.py new file mode 100644 index 0000000..6b71975 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_vendor/html5lib/_trie/_base.py @@ -0,0 +1,40 @@ +from __future__ import absolute_import, division, unicode_literals + +try: + from collections.abc import Mapping +except ImportError: # Python 2.7 + from collections import Mapping + + +class Trie(Mapping): + """Abstract base class for tries""" + + def keys(self, prefix=None): + # pylint:disable=arguments-differ + keys = super(Trie, self).keys() + + if prefix is None: + return set(keys) + + return {x for x in keys if x.startswith(prefix)} + + def has_keys_with_prefix(self, prefix): + for key in self.keys(): + if key.startswith(prefix): + return True + + return False + + def longest_prefix(self, prefix): + if prefix in self: + return prefix + + for i in range(1, len(prefix) + 1): + if prefix[:-i] in self: + return prefix[:-i] + + raise KeyError(prefix) + + def longest_prefix_item(self, prefix): + lprefix = self.longest_prefix(prefix) + return (lprefix, self[lprefix]) diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/html5lib/_trie/datrie.py b/venv.bak/lib/python3.7/site-packages/pip/_vendor/html5lib/_trie/datrie.py new file mode 100644 index 0000000..e2e5f86 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_vendor/html5lib/_trie/datrie.py @@ -0,0 +1,44 @@ +from __future__ import absolute_import, division, unicode_literals + +from datrie import Trie as DATrie +from pip._vendor.six import text_type + +from ._base import Trie as ABCTrie + + +class Trie(ABCTrie): + def __init__(self, data): + chars = set() + for key in data.keys(): + if not isinstance(key, text_type): + raise TypeError("All keys must be strings") + for char in key: + chars.add(char) + + self._data = DATrie("".join(chars)) + for key, value in data.items(): + self._data[key] = value + + def __contains__(self, key): + return key in self._data + + def __len__(self): + return len(self._data) + + def __iter__(self): + raise NotImplementedError() + + def __getitem__(self, key): + return self._data[key] + + def keys(self, prefix=None): + return self._data.keys(prefix) + + def has_keys_with_prefix(self, prefix): + return self._data.has_keys_with_prefix(prefix) + + def longest_prefix(self, prefix): + return self._data.longest_prefix(prefix) + + def longest_prefix_item(self, prefix): + return self._data.longest_prefix_item(prefix) diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/html5lib/_trie/py.py b/venv.bak/lib/python3.7/site-packages/pip/_vendor/html5lib/_trie/py.py new file mode 100644 index 0000000..c178b21 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_vendor/html5lib/_trie/py.py @@ -0,0 +1,67 @@ +from __future__ import absolute_import, division, unicode_literals +from pip._vendor.six import text_type + +from bisect import bisect_left + +from ._base import Trie as ABCTrie + + +class Trie(ABCTrie): + def __init__(self, data): + if not all(isinstance(x, text_type) for x in data.keys()): + raise TypeError("All keys must be strings") + + self._data = data + self._keys = sorted(data.keys()) + self._cachestr = "" + self._cachepoints = (0, len(data)) + + def __contains__(self, key): + return key in self._data + + def __len__(self): + return len(self._data) + + def __iter__(self): + return iter(self._data) + + def __getitem__(self, key): + return self._data[key] + + def keys(self, prefix=None): + if prefix is None or prefix == "" or not self._keys: + return set(self._keys) + + if prefix.startswith(self._cachestr): + lo, hi = self._cachepoints + start = i = bisect_left(self._keys, prefix, lo, hi) + else: + start = i = bisect_left(self._keys, prefix) + + keys = set() + if start == len(self._keys): + return keys + + while self._keys[i].startswith(prefix): + keys.add(self._keys[i]) + i += 1 + + self._cachestr = prefix + self._cachepoints = (start, i) + + return keys + + def has_keys_with_prefix(self, prefix): + if prefix in self._data: + return True + + if prefix.startswith(self._cachestr): + lo, hi = self._cachepoints + i = bisect_left(self._keys, prefix, lo, hi) + else: + i = bisect_left(self._keys, prefix) + + if i == len(self._keys): + return False + + return self._keys[i].startswith(prefix) diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/html5lib/_utils.py b/venv.bak/lib/python3.7/site-packages/pip/_vendor/html5lib/_utils.py new file mode 100644 index 0000000..0703afb --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_vendor/html5lib/_utils.py @@ -0,0 +1,124 @@ +from __future__ import absolute_import, division, unicode_literals + +from types import ModuleType + +from pip._vendor.six import text_type + +try: + import xml.etree.cElementTree as default_etree +except ImportError: + import xml.etree.ElementTree as default_etree + + +__all__ = ["default_etree", "MethodDispatcher", "isSurrogatePair", + "surrogatePairToCodepoint", "moduleFactoryFactory", + "supports_lone_surrogates"] + + +# Platforms not supporting lone surrogates (\uD800-\uDFFF) should be +# caught by the below test. In general this would be any platform +# using UTF-16 as its encoding of unicode strings, such as +# Jython. This is because UTF-16 itself is based on the use of such +# surrogates, and there is no mechanism to further escape such +# escapes. +try: + _x = eval('"\\uD800"') # pylint:disable=eval-used + if not isinstance(_x, text_type): + # We need this with u"" because of http://bugs.jython.org/issue2039 + _x = eval('u"\\uD800"') # pylint:disable=eval-used + assert isinstance(_x, text_type) +except: # pylint:disable=bare-except + supports_lone_surrogates = False +else: + supports_lone_surrogates = True + + +class MethodDispatcher(dict): + """Dict with 2 special properties: + + On initiation, keys that are lists, sets or tuples are converted to + multiple keys so accessing any one of the items in the original + list-like object returns the matching value + + md = MethodDispatcher({("foo", "bar"):"baz"}) + md["foo"] == "baz" + + A default value which can be set through the default attribute. + """ + + def __init__(self, items=()): + # Using _dictEntries instead of directly assigning to self is about + # twice as fast. Please do careful performance testing before changing + # anything here. + _dictEntries = [] + for name, value in items: + if isinstance(name, (list, tuple, frozenset, set)): + for item in name: + _dictEntries.append((item, value)) + else: + _dictEntries.append((name, value)) + dict.__init__(self, _dictEntries) + assert len(self) == len(_dictEntries) + self.default = None + + def __getitem__(self, key): + return dict.get(self, key, self.default) + + +# Some utility functions to deal with weirdness around UCS2 vs UCS4 +# python builds + +def isSurrogatePair(data): + return (len(data) == 2 and + ord(data[0]) >= 0xD800 and ord(data[0]) <= 0xDBFF and + ord(data[1]) >= 0xDC00 and ord(data[1]) <= 0xDFFF) + + +def surrogatePairToCodepoint(data): + char_val = (0x10000 + (ord(data[0]) - 0xD800) * 0x400 + + (ord(data[1]) - 0xDC00)) + return char_val + +# Module Factory Factory (no, this isn't Java, I know) +# Here to stop this being duplicated all over the place. + + +def moduleFactoryFactory(factory): + moduleCache = {} + + def moduleFactory(baseModule, *args, **kwargs): + if isinstance(ModuleType.__name__, type("")): + name = "_%s_factory" % baseModule.__name__ + else: + name = b"_%s_factory" % baseModule.__name__ + + kwargs_tuple = tuple(kwargs.items()) + + try: + return moduleCache[name][args][kwargs_tuple] + except KeyError: + mod = ModuleType(name) + objs = factory(baseModule, *args, **kwargs) + mod.__dict__.update(objs) + if "name" not in moduleCache: + moduleCache[name] = {} + if "args" not in moduleCache[name]: + moduleCache[name][args] = {} + if "kwargs" not in moduleCache[name][args]: + moduleCache[name][args][kwargs_tuple] = {} + moduleCache[name][args][kwargs_tuple] = mod + return mod + + return moduleFactory + + +def memoize(func): + cache = {} + + def wrapped(*args, **kwargs): + key = (tuple(args), tuple(kwargs.items())) + if key not in cache: + cache[key] = func(*args, **kwargs) + return cache[key] + + return wrapped diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/html5lib/constants.py b/venv.bak/lib/python3.7/site-packages/pip/_vendor/html5lib/constants.py new file mode 100644 index 0000000..1ff8041 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_vendor/html5lib/constants.py @@ -0,0 +1,2947 @@ +from __future__ import absolute_import, division, unicode_literals + +import string + +EOF = None + +E = { + "null-character": + "Null character in input stream, replaced with U+FFFD.", + "invalid-codepoint": + "Invalid codepoint in stream.", + "incorrectly-placed-solidus": + "Solidus (/) incorrectly placed in tag.", + "incorrect-cr-newline-entity": + "Incorrect CR newline entity, replaced with LF.", + "illegal-windows-1252-entity": + "Entity used with illegal number (windows-1252 reference).", + "cant-convert-numeric-entity": + "Numeric entity couldn't be converted to character " + "(codepoint U+%(charAsInt)08x).", + "illegal-codepoint-for-numeric-entity": + "Numeric entity represents an illegal codepoint: " + "U+%(charAsInt)08x.", + "numeric-entity-without-semicolon": + "Numeric entity didn't end with ';'.", + "expected-numeric-entity-but-got-eof": + "Numeric entity expected. Got end of file instead.", + "expected-numeric-entity": + "Numeric entity expected but none found.", + "named-entity-without-semicolon": + "Named entity didn't end with ';'.", + "expected-named-entity": + "Named entity expected. Got none.", + "attributes-in-end-tag": + "End tag contains unexpected attributes.", + 'self-closing-flag-on-end-tag': + "End tag contains unexpected self-closing flag.", + "expected-tag-name-but-got-right-bracket": + "Expected tag name. Got '>' instead.", + "expected-tag-name-but-got-question-mark": + "Expected tag name. Got '?' instead. (HTML doesn't " + "support processing instructions.)", + "expected-tag-name": + "Expected tag name. Got something else instead", + "expected-closing-tag-but-got-right-bracket": + "Expected closing tag. Got '>' instead. Ignoring ''.", + "expected-closing-tag-but-got-eof": + "Expected closing tag. Unexpected end of file.", + "expected-closing-tag-but-got-char": + "Expected closing tag. Unexpected character '%(data)s' found.", + "eof-in-tag-name": + "Unexpected end of file in the tag name.", + "expected-attribute-name-but-got-eof": + "Unexpected end of file. Expected attribute name instead.", + "eof-in-attribute-name": + "Unexpected end of file in attribute name.", + "invalid-character-in-attribute-name": + "Invalid character in attribute name", + "duplicate-attribute": + "Dropped duplicate attribute on tag.", + "expected-end-of-tag-name-but-got-eof": + "Unexpected end of file. Expected = or end of tag.", + "expected-attribute-value-but-got-eof": + "Unexpected end of file. Expected attribute value.", + "expected-attribute-value-but-got-right-bracket": + "Expected attribute value. Got '>' instead.", + 'equals-in-unquoted-attribute-value': + "Unexpected = in unquoted attribute", + 'unexpected-character-in-unquoted-attribute-value': + "Unexpected character in unquoted attribute", + "invalid-character-after-attribute-name": + "Unexpected character after attribute name.", + "unexpected-character-after-attribute-value": + "Unexpected character after attribute value.", + "eof-in-attribute-value-double-quote": + "Unexpected end of file in attribute value (\").", + "eof-in-attribute-value-single-quote": + "Unexpected end of file in attribute value (').", + "eof-in-attribute-value-no-quotes": + "Unexpected end of file in attribute value.", + "unexpected-EOF-after-solidus-in-tag": + "Unexpected end of file in tag. Expected >", + "unexpected-character-after-solidus-in-tag": + "Unexpected character after / in tag. Expected >", + "expected-dashes-or-doctype": + "Expected '--' or 'DOCTYPE'. Not found.", + "unexpected-bang-after-double-dash-in-comment": + "Unexpected ! after -- in comment", + "unexpected-space-after-double-dash-in-comment": + "Unexpected space after -- in comment", + "incorrect-comment": + "Incorrect comment.", + "eof-in-comment": + "Unexpected end of file in comment.", + "eof-in-comment-end-dash": + "Unexpected end of file in comment (-)", + "unexpected-dash-after-double-dash-in-comment": + "Unexpected '-' after '--' found in comment.", + "eof-in-comment-double-dash": + "Unexpected end of file in comment (--).", + "eof-in-comment-end-space-state": + "Unexpected end of file in comment.", + "eof-in-comment-end-bang-state": + "Unexpected end of file in comment.", + "unexpected-char-in-comment": + "Unexpected character in comment found.", + "need-space-after-doctype": + "No space after literal string 'DOCTYPE'.", + "expected-doctype-name-but-got-right-bracket": + "Unexpected > character. Expected DOCTYPE name.", + "expected-doctype-name-but-got-eof": + "Unexpected end of file. Expected DOCTYPE name.", + "eof-in-doctype-name": + "Unexpected end of file in DOCTYPE name.", + "eof-in-doctype": + "Unexpected end of file in DOCTYPE.", + "expected-space-or-right-bracket-in-doctype": + "Expected space or '>'. Got '%(data)s'", + "unexpected-end-of-doctype": + "Unexpected end of DOCTYPE.", + "unexpected-char-in-doctype": + "Unexpected character in DOCTYPE.", + "eof-in-innerhtml": + "XXX innerHTML EOF", + "unexpected-doctype": + "Unexpected DOCTYPE. Ignored.", + "non-html-root": + "html needs to be the first start tag.", + "expected-doctype-but-got-eof": + "Unexpected End of file. Expected DOCTYPE.", + "unknown-doctype": + "Erroneous DOCTYPE.", + "expected-doctype-but-got-chars": + "Unexpected non-space characters. Expected DOCTYPE.", + "expected-doctype-but-got-start-tag": + "Unexpected start tag (%(name)s). Expected DOCTYPE.", + "expected-doctype-but-got-end-tag": + "Unexpected end tag (%(name)s). Expected DOCTYPE.", + "end-tag-after-implied-root": + "Unexpected end tag (%(name)s) after the (implied) root element.", + "expected-named-closing-tag-but-got-eof": + "Unexpected end of file. Expected end tag (%(name)s).", + "two-heads-are-not-better-than-one": + "Unexpected start tag head in existing head. Ignored.", + "unexpected-end-tag": + "Unexpected end tag (%(name)s). Ignored.", + "unexpected-start-tag-out-of-my-head": + "Unexpected start tag (%(name)s) that can be in head. Moved.", + "unexpected-start-tag": + "Unexpected start tag (%(name)s).", + "missing-end-tag": + "Missing end tag (%(name)s).", + "missing-end-tags": + "Missing end tags (%(name)s).", + "unexpected-start-tag-implies-end-tag": + "Unexpected start tag (%(startName)s) " + "implies end tag (%(endName)s).", + "unexpected-start-tag-treated-as": + "Unexpected start tag (%(originalName)s). Treated as %(newName)s.", + "deprecated-tag": + "Unexpected start tag %(name)s. Don't use it!", + "unexpected-start-tag-ignored": + "Unexpected start tag %(name)s. Ignored.", + "expected-one-end-tag-but-got-another": + "Unexpected end tag (%(gotName)s). " + "Missing end tag (%(expectedName)s).", + "end-tag-too-early": + "End tag (%(name)s) seen too early. Expected other end tag.", + "end-tag-too-early-named": + "Unexpected end tag (%(gotName)s). Expected end tag (%(expectedName)s).", + "end-tag-too-early-ignored": + "End tag (%(name)s) seen too early. Ignored.", + "adoption-agency-1.1": + "End tag (%(name)s) violates step 1, " + "paragraph 1 of the adoption agency algorithm.", + "adoption-agency-1.2": + "End tag (%(name)s) violates step 1, " + "paragraph 2 of the adoption agency algorithm.", + "adoption-agency-1.3": + "End tag (%(name)s) violates step 1, " + "paragraph 3 of the adoption agency algorithm.", + "adoption-agency-4.4": + "End tag (%(name)s) violates step 4, " + "paragraph 4 of the adoption agency algorithm.", + "unexpected-end-tag-treated-as": + "Unexpected end tag (%(originalName)s). Treated as %(newName)s.", + "no-end-tag": + "This element (%(name)s) has no end tag.", + "unexpected-implied-end-tag-in-table": + "Unexpected implied end tag (%(name)s) in the table phase.", + "unexpected-implied-end-tag-in-table-body": + "Unexpected implied end tag (%(name)s) in the table body phase.", + "unexpected-char-implies-table-voodoo": + "Unexpected non-space characters in " + "table context caused voodoo mode.", + "unexpected-hidden-input-in-table": + "Unexpected input with type hidden in table context.", + "unexpected-form-in-table": + "Unexpected form in table context.", + "unexpected-start-tag-implies-table-voodoo": + "Unexpected start tag (%(name)s) in " + "table context caused voodoo mode.", + "unexpected-end-tag-implies-table-voodoo": + "Unexpected end tag (%(name)s) in " + "table context caused voodoo mode.", + "unexpected-cell-in-table-body": + "Unexpected table cell start tag (%(name)s) " + "in the table body phase.", + "unexpected-cell-end-tag": + "Got table cell end tag (%(name)s) " + "while required end tags are missing.", + "unexpected-end-tag-in-table-body": + "Unexpected end tag (%(name)s) in the table body phase. Ignored.", + "unexpected-implied-end-tag-in-table-row": + "Unexpected implied end tag (%(name)s) in the table row phase.", + "unexpected-end-tag-in-table-row": + "Unexpected end tag (%(name)s) in the table row phase. Ignored.", + "unexpected-select-in-select": + "Unexpected select start tag in the select phase " + "treated as select end tag.", + "unexpected-input-in-select": + "Unexpected input start tag in the select phase.", + "unexpected-start-tag-in-select": + "Unexpected start tag token (%(name)s in the select phase. " + "Ignored.", + "unexpected-end-tag-in-select": + "Unexpected end tag (%(name)s) in the select phase. Ignored.", + "unexpected-table-element-start-tag-in-select-in-table": + "Unexpected table element start tag (%(name)s) in the select in table phase.", + "unexpected-table-element-end-tag-in-select-in-table": + "Unexpected table element end tag (%(name)s) in the select in table phase.", + "unexpected-char-after-body": + "Unexpected non-space characters in the after body phase.", + "unexpected-start-tag-after-body": + "Unexpected start tag token (%(name)s)" + " in the after body phase.", + "unexpected-end-tag-after-body": + "Unexpected end tag token (%(name)s)" + " in the after body phase.", + "unexpected-char-in-frameset": + "Unexpected characters in the frameset phase. Characters ignored.", + "unexpected-start-tag-in-frameset": + "Unexpected start tag token (%(name)s)" + " in the frameset phase. Ignored.", + "unexpected-frameset-in-frameset-innerhtml": + "Unexpected end tag token (frameset) " + "in the frameset phase (innerHTML).", + "unexpected-end-tag-in-frameset": + "Unexpected end tag token (%(name)s)" + " in the frameset phase. Ignored.", + "unexpected-char-after-frameset": + "Unexpected non-space characters in the " + "after frameset phase. Ignored.", + "unexpected-start-tag-after-frameset": + "Unexpected start tag (%(name)s)" + " in the after frameset phase. Ignored.", + "unexpected-end-tag-after-frameset": + "Unexpected end tag (%(name)s)" + " in the after frameset phase. Ignored.", + "unexpected-end-tag-after-body-innerhtml": + "Unexpected end tag after body(innerHtml)", + "expected-eof-but-got-char": + "Unexpected non-space characters. Expected end of file.", + "expected-eof-but-got-start-tag": + "Unexpected start tag (%(name)s)" + ". Expected end of file.", + "expected-eof-but-got-end-tag": + "Unexpected end tag (%(name)s)" + ". Expected end of file.", + "eof-in-table": + "Unexpected end of file. Expected table content.", + "eof-in-select": + "Unexpected end of file. Expected select content.", + "eof-in-frameset": + "Unexpected end of file. Expected frameset content.", + "eof-in-script-in-script": + "Unexpected end of file. Expected script content.", + "eof-in-foreign-lands": + "Unexpected end of file. Expected foreign content", + "non-void-element-with-trailing-solidus": + "Trailing solidus not allowed on element %(name)s", + "unexpected-html-element-in-foreign-content": + "Element %(name)s not allowed in a non-html context", + "unexpected-end-tag-before-html": + "Unexpected end tag (%(name)s) before html.", + "unexpected-inhead-noscript-tag": + "Element %(name)s not allowed in a inhead-noscript context", + "eof-in-head-noscript": + "Unexpected end of file. Expected inhead-noscript content", + "char-in-head-noscript": + "Unexpected non-space character. Expected inhead-noscript content", + "XXX-undefined-error": + "Undefined error (this sucks and should be fixed)", +} + +namespaces = { + "html": "http://www.w3.org/1999/xhtml", + "mathml": "http://www.w3.org/1998/Math/MathML", + "svg": "http://www.w3.org/2000/svg", + "xlink": "http://www.w3.org/1999/xlink", + "xml": "http://www.w3.org/XML/1998/namespace", + "xmlns": "http://www.w3.org/2000/xmlns/" +} + +scopingElements = frozenset([ + (namespaces["html"], "applet"), + (namespaces["html"], "caption"), + (namespaces["html"], "html"), + (namespaces["html"], "marquee"), + (namespaces["html"], "object"), + (namespaces["html"], "table"), + (namespaces["html"], "td"), + (namespaces["html"], "th"), + (namespaces["mathml"], "mi"), + (namespaces["mathml"], "mo"), + (namespaces["mathml"], "mn"), + (namespaces["mathml"], "ms"), + (namespaces["mathml"], "mtext"), + (namespaces["mathml"], "annotation-xml"), + (namespaces["svg"], "foreignObject"), + (namespaces["svg"], "desc"), + (namespaces["svg"], "title"), +]) + +formattingElements = frozenset([ + (namespaces["html"], "a"), + (namespaces["html"], "b"), + (namespaces["html"], "big"), + (namespaces["html"], "code"), + (namespaces["html"], "em"), + (namespaces["html"], "font"), + (namespaces["html"], "i"), + (namespaces["html"], "nobr"), + (namespaces["html"], "s"), + (namespaces["html"], "small"), + (namespaces["html"], "strike"), + (namespaces["html"], "strong"), + (namespaces["html"], "tt"), + (namespaces["html"], "u") +]) + +specialElements = frozenset([ + (namespaces["html"], "address"), + (namespaces["html"], "applet"), + (namespaces["html"], "area"), + (namespaces["html"], "article"), + (namespaces["html"], "aside"), + (namespaces["html"], "base"), + (namespaces["html"], "basefont"), + (namespaces["html"], "bgsound"), + (namespaces["html"], "blockquote"), + (namespaces["html"], "body"), + (namespaces["html"], "br"), + (namespaces["html"], "button"), + (namespaces["html"], "caption"), + (namespaces["html"], "center"), + (namespaces["html"], "col"), + (namespaces["html"], "colgroup"), + (namespaces["html"], "command"), + (namespaces["html"], "dd"), + (namespaces["html"], "details"), + (namespaces["html"], "dir"), + (namespaces["html"], "div"), + (namespaces["html"], "dl"), + (namespaces["html"], "dt"), + (namespaces["html"], "embed"), + (namespaces["html"], "fieldset"), + (namespaces["html"], "figure"), + (namespaces["html"], "footer"), + (namespaces["html"], "form"), + (namespaces["html"], "frame"), + (namespaces["html"], "frameset"), + (namespaces["html"], "h1"), + (namespaces["html"], "h2"), + (namespaces["html"], "h3"), + (namespaces["html"], "h4"), + (namespaces["html"], "h5"), + (namespaces["html"], "h6"), + (namespaces["html"], "head"), + (namespaces["html"], "header"), + (namespaces["html"], "hr"), + (namespaces["html"], "html"), + (namespaces["html"], "iframe"), + # Note that image is commented out in the spec as "this isn't an + # element that can end up on the stack, so it doesn't matter," + (namespaces["html"], "image"), + (namespaces["html"], "img"), + (namespaces["html"], "input"), + (namespaces["html"], "isindex"), + (namespaces["html"], "li"), + (namespaces["html"], "link"), + (namespaces["html"], "listing"), + (namespaces["html"], "marquee"), + (namespaces["html"], "menu"), + (namespaces["html"], "meta"), + (namespaces["html"], "nav"), + (namespaces["html"], "noembed"), + (namespaces["html"], "noframes"), + (namespaces["html"], "noscript"), + (namespaces["html"], "object"), + (namespaces["html"], "ol"), + (namespaces["html"], "p"), + (namespaces["html"], "param"), + (namespaces["html"], "plaintext"), + (namespaces["html"], "pre"), + (namespaces["html"], "script"), + (namespaces["html"], "section"), + (namespaces["html"], "select"), + (namespaces["html"], "style"), + (namespaces["html"], "table"), + (namespaces["html"], "tbody"), + (namespaces["html"], "td"), + (namespaces["html"], "textarea"), + (namespaces["html"], "tfoot"), + (namespaces["html"], "th"), + (namespaces["html"], "thead"), + (namespaces["html"], "title"), + (namespaces["html"], "tr"), + (namespaces["html"], "ul"), + (namespaces["html"], "wbr"), + (namespaces["html"], "xmp"), + (namespaces["svg"], "foreignObject") +]) + +htmlIntegrationPointElements = frozenset([ + (namespaces["mathml"], "annotation-xml"), + (namespaces["svg"], "foreignObject"), + (namespaces["svg"], "desc"), + (namespaces["svg"], "title") +]) + +mathmlTextIntegrationPointElements = frozenset([ + (namespaces["mathml"], "mi"), + (namespaces["mathml"], "mo"), + (namespaces["mathml"], "mn"), + (namespaces["mathml"], "ms"), + (namespaces["mathml"], "mtext") +]) + +adjustSVGAttributes = { + "attributename": "attributeName", + "attributetype": "attributeType", + "basefrequency": "baseFrequency", + "baseprofile": "baseProfile", + "calcmode": "calcMode", + "clippathunits": "clipPathUnits", + "contentscripttype": "contentScriptType", + "contentstyletype": "contentStyleType", + "diffuseconstant": "diffuseConstant", + "edgemode": "edgeMode", + "externalresourcesrequired": "externalResourcesRequired", + "filterres": "filterRes", + "filterunits": "filterUnits", + "glyphref": "glyphRef", + "gradienttransform": "gradientTransform", + "gradientunits": "gradientUnits", + "kernelmatrix": "kernelMatrix", + "kernelunitlength": "kernelUnitLength", + "keypoints": "keyPoints", + "keysplines": "keySplines", + "keytimes": "keyTimes", + "lengthadjust": "lengthAdjust", + "limitingconeangle": "limitingConeAngle", + "markerheight": "markerHeight", + "markerunits": "markerUnits", + "markerwidth": "markerWidth", + "maskcontentunits": "maskContentUnits", + "maskunits": "maskUnits", + "numoctaves": "numOctaves", + "pathlength": "pathLength", + "patterncontentunits": "patternContentUnits", + "patterntransform": "patternTransform", + "patternunits": "patternUnits", + "pointsatx": "pointsAtX", + "pointsaty": "pointsAtY", + "pointsatz": "pointsAtZ", + "preservealpha": "preserveAlpha", + "preserveaspectratio": "preserveAspectRatio", + "primitiveunits": "primitiveUnits", + "refx": "refX", + "refy": "refY", + "repeatcount": "repeatCount", + "repeatdur": "repeatDur", + "requiredextensions": "requiredExtensions", + "requiredfeatures": "requiredFeatures", + "specularconstant": "specularConstant", + "specularexponent": "specularExponent", + "spreadmethod": "spreadMethod", + "startoffset": "startOffset", + "stddeviation": "stdDeviation", + "stitchtiles": "stitchTiles", + "surfacescale": "surfaceScale", + "systemlanguage": "systemLanguage", + "tablevalues": "tableValues", + "targetx": "targetX", + "targety": "targetY", + "textlength": "textLength", + "viewbox": "viewBox", + "viewtarget": "viewTarget", + "xchannelselector": "xChannelSelector", + "ychannelselector": "yChannelSelector", + "zoomandpan": "zoomAndPan" +} + +adjustMathMLAttributes = {"definitionurl": "definitionURL"} + +adjustForeignAttributes = { + "xlink:actuate": ("xlink", "actuate", namespaces["xlink"]), + "xlink:arcrole": ("xlink", "arcrole", namespaces["xlink"]), + "xlink:href": ("xlink", "href", namespaces["xlink"]), + "xlink:role": ("xlink", "role", namespaces["xlink"]), + "xlink:show": ("xlink", "show", namespaces["xlink"]), + "xlink:title": ("xlink", "title", namespaces["xlink"]), + "xlink:type": ("xlink", "type", namespaces["xlink"]), + "xml:base": ("xml", "base", namespaces["xml"]), + "xml:lang": ("xml", "lang", namespaces["xml"]), + "xml:space": ("xml", "space", namespaces["xml"]), + "xmlns": (None, "xmlns", namespaces["xmlns"]), + "xmlns:xlink": ("xmlns", "xlink", namespaces["xmlns"]) +} + +unadjustForeignAttributes = dict([((ns, local), qname) for qname, (prefix, local, ns) in + adjustForeignAttributes.items()]) + +spaceCharacters = frozenset([ + "\t", + "\n", + "\u000C", + " ", + "\r" +]) + +tableInsertModeElements = frozenset([ + "table", + "tbody", + "tfoot", + "thead", + "tr" +]) + +asciiLowercase = frozenset(string.ascii_lowercase) +asciiUppercase = frozenset(string.ascii_uppercase) +asciiLetters = frozenset(string.ascii_letters) +digits = frozenset(string.digits) +hexDigits = frozenset(string.hexdigits) + +asciiUpper2Lower = dict([(ord(c), ord(c.lower())) + for c in string.ascii_uppercase]) + +# Heading elements need to be ordered +headingElements = ( + "h1", + "h2", + "h3", + "h4", + "h5", + "h6" +) + +voidElements = frozenset([ + "base", + "command", + "event-source", + "link", + "meta", + "hr", + "br", + "img", + "embed", + "param", + "area", + "col", + "input", + "source", + "track" +]) + +cdataElements = frozenset(['title', 'textarea']) + +rcdataElements = frozenset([ + 'style', + 'script', + 'xmp', + 'iframe', + 'noembed', + 'noframes', + 'noscript' +]) + +booleanAttributes = { + "": frozenset(["irrelevant", "itemscope"]), + "style": frozenset(["scoped"]), + "img": frozenset(["ismap"]), + "audio": frozenset(["autoplay", "controls"]), + "video": frozenset(["autoplay", "controls"]), + "script": frozenset(["defer", "async"]), + "details": frozenset(["open"]), + "datagrid": frozenset(["multiple", "disabled"]), + "command": frozenset(["hidden", "disabled", "checked", "default"]), + "hr": frozenset(["noshade"]), + "menu": frozenset(["autosubmit"]), + "fieldset": frozenset(["disabled", "readonly"]), + "option": frozenset(["disabled", "readonly", "selected"]), + "optgroup": frozenset(["disabled", "readonly"]), + "button": frozenset(["disabled", "autofocus"]), + "input": frozenset(["disabled", "readonly", "required", "autofocus", "checked", "ismap"]), + "select": frozenset(["disabled", "readonly", "autofocus", "multiple"]), + "output": frozenset(["disabled", "readonly"]), + "iframe": frozenset(["seamless"]), +} + +# entitiesWindows1252 has to be _ordered_ and needs to have an index. It +# therefore can't be a frozenset. +entitiesWindows1252 = ( + 8364, # 0x80 0x20AC EURO SIGN + 65533, # 0x81 UNDEFINED + 8218, # 0x82 0x201A SINGLE LOW-9 QUOTATION MARK + 402, # 0x83 0x0192 LATIN SMALL LETTER F WITH HOOK + 8222, # 0x84 0x201E DOUBLE LOW-9 QUOTATION MARK + 8230, # 0x85 0x2026 HORIZONTAL ELLIPSIS + 8224, # 0x86 0x2020 DAGGER + 8225, # 0x87 0x2021 DOUBLE DAGGER + 710, # 0x88 0x02C6 MODIFIER LETTER CIRCUMFLEX ACCENT + 8240, # 0x89 0x2030 PER MILLE SIGN + 352, # 0x8A 0x0160 LATIN CAPITAL LETTER S WITH CARON + 8249, # 0x8B 0x2039 SINGLE LEFT-POINTING ANGLE QUOTATION MARK + 338, # 0x8C 0x0152 LATIN CAPITAL LIGATURE OE + 65533, # 0x8D UNDEFINED + 381, # 0x8E 0x017D LATIN CAPITAL LETTER Z WITH CARON + 65533, # 0x8F UNDEFINED + 65533, # 0x90 UNDEFINED + 8216, # 0x91 0x2018 LEFT SINGLE QUOTATION MARK + 8217, # 0x92 0x2019 RIGHT SINGLE QUOTATION MARK + 8220, # 0x93 0x201C LEFT DOUBLE QUOTATION MARK + 8221, # 0x94 0x201D RIGHT DOUBLE QUOTATION MARK + 8226, # 0x95 0x2022 BULLET + 8211, # 0x96 0x2013 EN DASH + 8212, # 0x97 0x2014 EM DASH + 732, # 0x98 0x02DC SMALL TILDE + 8482, # 0x99 0x2122 TRADE MARK SIGN + 353, # 0x9A 0x0161 LATIN SMALL LETTER S WITH CARON + 8250, # 0x9B 0x203A SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + 339, # 0x9C 0x0153 LATIN SMALL LIGATURE OE + 65533, # 0x9D UNDEFINED + 382, # 0x9E 0x017E LATIN SMALL LETTER Z WITH CARON + 376 # 0x9F 0x0178 LATIN CAPITAL LETTER Y WITH DIAERESIS +) + +xmlEntities = frozenset(['lt;', 'gt;', 'amp;', 'apos;', 'quot;']) + +entities = { + "AElig": "\xc6", + "AElig;": "\xc6", + "AMP": "&", + "AMP;": "&", + "Aacute": "\xc1", + "Aacute;": "\xc1", + "Abreve;": "\u0102", + "Acirc": "\xc2", + "Acirc;": "\xc2", + "Acy;": "\u0410", + "Afr;": "\U0001d504", + "Agrave": "\xc0", + "Agrave;": "\xc0", + "Alpha;": "\u0391", + "Amacr;": "\u0100", + "And;": "\u2a53", + "Aogon;": "\u0104", + "Aopf;": "\U0001d538", + "ApplyFunction;": "\u2061", + "Aring": "\xc5", + "Aring;": "\xc5", + "Ascr;": "\U0001d49c", + "Assign;": "\u2254", + "Atilde": "\xc3", + "Atilde;": "\xc3", + "Auml": "\xc4", + "Auml;": "\xc4", + "Backslash;": "\u2216", + "Barv;": "\u2ae7", + "Barwed;": "\u2306", + "Bcy;": "\u0411", + "Because;": "\u2235", + "Bernoullis;": "\u212c", + "Beta;": "\u0392", + "Bfr;": "\U0001d505", + "Bopf;": "\U0001d539", + "Breve;": "\u02d8", + "Bscr;": "\u212c", + "Bumpeq;": "\u224e", + "CHcy;": "\u0427", + "COPY": "\xa9", + "COPY;": "\xa9", + "Cacute;": "\u0106", + "Cap;": "\u22d2", + "CapitalDifferentialD;": "\u2145", + "Cayleys;": "\u212d", + "Ccaron;": "\u010c", + "Ccedil": "\xc7", + "Ccedil;": "\xc7", + "Ccirc;": "\u0108", + "Cconint;": "\u2230", + "Cdot;": "\u010a", + "Cedilla;": "\xb8", + "CenterDot;": "\xb7", + "Cfr;": "\u212d", + "Chi;": "\u03a7", + "CircleDot;": "\u2299", + "CircleMinus;": "\u2296", + "CirclePlus;": "\u2295", + "CircleTimes;": "\u2297", + "ClockwiseContourIntegral;": "\u2232", + "CloseCurlyDoubleQuote;": "\u201d", + "CloseCurlyQuote;": "\u2019", + "Colon;": "\u2237", + "Colone;": "\u2a74", + "Congruent;": "\u2261", + "Conint;": "\u222f", + "ContourIntegral;": "\u222e", + "Copf;": "\u2102", + "Coproduct;": "\u2210", + "CounterClockwiseContourIntegral;": "\u2233", + "Cross;": "\u2a2f", + "Cscr;": "\U0001d49e", + "Cup;": "\u22d3", + "CupCap;": "\u224d", + "DD;": "\u2145", + "DDotrahd;": "\u2911", + "DJcy;": "\u0402", + "DScy;": "\u0405", + "DZcy;": "\u040f", + "Dagger;": "\u2021", + "Darr;": "\u21a1", + "Dashv;": "\u2ae4", + "Dcaron;": "\u010e", + "Dcy;": "\u0414", + "Del;": "\u2207", + "Delta;": "\u0394", + "Dfr;": "\U0001d507", + "DiacriticalAcute;": "\xb4", + "DiacriticalDot;": "\u02d9", + "DiacriticalDoubleAcute;": "\u02dd", + "DiacriticalGrave;": "`", + "DiacriticalTilde;": "\u02dc", + "Diamond;": "\u22c4", + "DifferentialD;": "\u2146", + "Dopf;": "\U0001d53b", + "Dot;": "\xa8", + "DotDot;": "\u20dc", + "DotEqual;": "\u2250", + "DoubleContourIntegral;": "\u222f", + "DoubleDot;": "\xa8", + "DoubleDownArrow;": "\u21d3", + "DoubleLeftArrow;": "\u21d0", + "DoubleLeftRightArrow;": "\u21d4", + "DoubleLeftTee;": "\u2ae4", + "DoubleLongLeftArrow;": "\u27f8", + "DoubleLongLeftRightArrow;": "\u27fa", + "DoubleLongRightArrow;": "\u27f9", + "DoubleRightArrow;": "\u21d2", + "DoubleRightTee;": "\u22a8", + "DoubleUpArrow;": "\u21d1", + "DoubleUpDownArrow;": "\u21d5", + "DoubleVerticalBar;": "\u2225", + "DownArrow;": "\u2193", + "DownArrowBar;": "\u2913", + "DownArrowUpArrow;": "\u21f5", + "DownBreve;": "\u0311", + "DownLeftRightVector;": "\u2950", + "DownLeftTeeVector;": "\u295e", + "DownLeftVector;": "\u21bd", + "DownLeftVectorBar;": "\u2956", + "DownRightTeeVector;": "\u295f", + "DownRightVector;": "\u21c1", + "DownRightVectorBar;": "\u2957", + "DownTee;": "\u22a4", + "DownTeeArrow;": "\u21a7", + "Downarrow;": "\u21d3", + "Dscr;": "\U0001d49f", + "Dstrok;": "\u0110", + "ENG;": "\u014a", + "ETH": "\xd0", + "ETH;": "\xd0", + "Eacute": "\xc9", + "Eacute;": "\xc9", + "Ecaron;": "\u011a", + "Ecirc": "\xca", + "Ecirc;": "\xca", + "Ecy;": "\u042d", + "Edot;": "\u0116", + "Efr;": "\U0001d508", + "Egrave": "\xc8", + "Egrave;": "\xc8", + "Element;": "\u2208", + "Emacr;": "\u0112", + "EmptySmallSquare;": "\u25fb", + "EmptyVerySmallSquare;": "\u25ab", + "Eogon;": "\u0118", + "Eopf;": "\U0001d53c", + "Epsilon;": "\u0395", + "Equal;": "\u2a75", + "EqualTilde;": "\u2242", + "Equilibrium;": "\u21cc", + "Escr;": "\u2130", + "Esim;": "\u2a73", + "Eta;": "\u0397", + "Euml": "\xcb", + "Euml;": "\xcb", + "Exists;": "\u2203", + "ExponentialE;": "\u2147", + "Fcy;": "\u0424", + "Ffr;": "\U0001d509", + "FilledSmallSquare;": "\u25fc", + "FilledVerySmallSquare;": "\u25aa", + "Fopf;": "\U0001d53d", + "ForAll;": "\u2200", + "Fouriertrf;": "\u2131", + "Fscr;": "\u2131", + "GJcy;": "\u0403", + "GT": ">", + "GT;": ">", + "Gamma;": "\u0393", + "Gammad;": "\u03dc", + "Gbreve;": "\u011e", + "Gcedil;": "\u0122", + "Gcirc;": "\u011c", + "Gcy;": "\u0413", + "Gdot;": "\u0120", + "Gfr;": "\U0001d50a", + "Gg;": "\u22d9", + "Gopf;": "\U0001d53e", + "GreaterEqual;": "\u2265", + "GreaterEqualLess;": "\u22db", + "GreaterFullEqual;": "\u2267", + "GreaterGreater;": "\u2aa2", + "GreaterLess;": "\u2277", + "GreaterSlantEqual;": "\u2a7e", + "GreaterTilde;": "\u2273", + "Gscr;": "\U0001d4a2", + "Gt;": "\u226b", + "HARDcy;": "\u042a", + "Hacek;": "\u02c7", + "Hat;": "^", + "Hcirc;": "\u0124", + "Hfr;": "\u210c", + "HilbertSpace;": "\u210b", + "Hopf;": "\u210d", + "HorizontalLine;": "\u2500", + "Hscr;": "\u210b", + "Hstrok;": "\u0126", + "HumpDownHump;": "\u224e", + "HumpEqual;": "\u224f", + "IEcy;": "\u0415", + "IJlig;": "\u0132", + "IOcy;": "\u0401", + "Iacute": "\xcd", + "Iacute;": "\xcd", + "Icirc": "\xce", + "Icirc;": "\xce", + "Icy;": "\u0418", + "Idot;": "\u0130", + "Ifr;": "\u2111", + "Igrave": "\xcc", + "Igrave;": "\xcc", + "Im;": "\u2111", + "Imacr;": "\u012a", + "ImaginaryI;": "\u2148", + "Implies;": "\u21d2", + "Int;": "\u222c", + "Integral;": "\u222b", + "Intersection;": "\u22c2", + "InvisibleComma;": "\u2063", + "InvisibleTimes;": "\u2062", + "Iogon;": "\u012e", + "Iopf;": "\U0001d540", + "Iota;": "\u0399", + "Iscr;": "\u2110", + "Itilde;": "\u0128", + "Iukcy;": "\u0406", + "Iuml": "\xcf", + "Iuml;": "\xcf", + "Jcirc;": "\u0134", + "Jcy;": "\u0419", + "Jfr;": "\U0001d50d", + "Jopf;": "\U0001d541", + "Jscr;": "\U0001d4a5", + "Jsercy;": "\u0408", + "Jukcy;": "\u0404", + "KHcy;": "\u0425", + "KJcy;": "\u040c", + "Kappa;": "\u039a", + "Kcedil;": "\u0136", + "Kcy;": "\u041a", + "Kfr;": "\U0001d50e", + "Kopf;": "\U0001d542", + "Kscr;": "\U0001d4a6", + "LJcy;": "\u0409", + "LT": "<", + "LT;": "<", + "Lacute;": "\u0139", + "Lambda;": "\u039b", + "Lang;": "\u27ea", + "Laplacetrf;": "\u2112", + "Larr;": "\u219e", + "Lcaron;": "\u013d", + "Lcedil;": "\u013b", + "Lcy;": "\u041b", + "LeftAngleBracket;": "\u27e8", + "LeftArrow;": "\u2190", + "LeftArrowBar;": "\u21e4", + "LeftArrowRightArrow;": "\u21c6", + "LeftCeiling;": "\u2308", + "LeftDoubleBracket;": "\u27e6", + "LeftDownTeeVector;": "\u2961", + "LeftDownVector;": "\u21c3", + "LeftDownVectorBar;": "\u2959", + "LeftFloor;": "\u230a", + "LeftRightArrow;": "\u2194", + "LeftRightVector;": "\u294e", + "LeftTee;": "\u22a3", + "LeftTeeArrow;": "\u21a4", + "LeftTeeVector;": "\u295a", + "LeftTriangle;": "\u22b2", + "LeftTriangleBar;": "\u29cf", + "LeftTriangleEqual;": "\u22b4", + "LeftUpDownVector;": "\u2951", + "LeftUpTeeVector;": "\u2960", + "LeftUpVector;": "\u21bf", + "LeftUpVectorBar;": "\u2958", + "LeftVector;": "\u21bc", + "LeftVectorBar;": "\u2952", + "Leftarrow;": "\u21d0", + "Leftrightarrow;": "\u21d4", + "LessEqualGreater;": "\u22da", + "LessFullEqual;": "\u2266", + "LessGreater;": "\u2276", + "LessLess;": "\u2aa1", + "LessSlantEqual;": "\u2a7d", + "LessTilde;": "\u2272", + "Lfr;": "\U0001d50f", + "Ll;": "\u22d8", + "Lleftarrow;": "\u21da", + "Lmidot;": "\u013f", + "LongLeftArrow;": "\u27f5", + "LongLeftRightArrow;": "\u27f7", + "LongRightArrow;": "\u27f6", + "Longleftarrow;": "\u27f8", + "Longleftrightarrow;": "\u27fa", + "Longrightarrow;": "\u27f9", + "Lopf;": "\U0001d543", + "LowerLeftArrow;": "\u2199", + "LowerRightArrow;": "\u2198", + "Lscr;": "\u2112", + "Lsh;": "\u21b0", + "Lstrok;": "\u0141", + "Lt;": "\u226a", + "Map;": "\u2905", + "Mcy;": "\u041c", + "MediumSpace;": "\u205f", + "Mellintrf;": "\u2133", + "Mfr;": "\U0001d510", + "MinusPlus;": "\u2213", + "Mopf;": "\U0001d544", + "Mscr;": "\u2133", + "Mu;": "\u039c", + "NJcy;": "\u040a", + "Nacute;": "\u0143", + "Ncaron;": "\u0147", + "Ncedil;": "\u0145", + "Ncy;": "\u041d", + "NegativeMediumSpace;": "\u200b", + "NegativeThickSpace;": "\u200b", + "NegativeThinSpace;": "\u200b", + "NegativeVeryThinSpace;": "\u200b", + "NestedGreaterGreater;": "\u226b", + "NestedLessLess;": "\u226a", + "NewLine;": "\n", + "Nfr;": "\U0001d511", + "NoBreak;": "\u2060", + "NonBreakingSpace;": "\xa0", + "Nopf;": "\u2115", + "Not;": "\u2aec", + "NotCongruent;": "\u2262", + "NotCupCap;": "\u226d", + "NotDoubleVerticalBar;": "\u2226", + "NotElement;": "\u2209", + "NotEqual;": "\u2260", + "NotEqualTilde;": "\u2242\u0338", + "NotExists;": "\u2204", + "NotGreater;": "\u226f", + "NotGreaterEqual;": "\u2271", + "NotGreaterFullEqual;": "\u2267\u0338", + "NotGreaterGreater;": "\u226b\u0338", + "NotGreaterLess;": "\u2279", + "NotGreaterSlantEqual;": "\u2a7e\u0338", + "NotGreaterTilde;": "\u2275", + "NotHumpDownHump;": "\u224e\u0338", + "NotHumpEqual;": "\u224f\u0338", + "NotLeftTriangle;": "\u22ea", + "NotLeftTriangleBar;": "\u29cf\u0338", + "NotLeftTriangleEqual;": "\u22ec", + "NotLess;": "\u226e", + "NotLessEqual;": "\u2270", + "NotLessGreater;": "\u2278", + "NotLessLess;": "\u226a\u0338", + "NotLessSlantEqual;": "\u2a7d\u0338", + "NotLessTilde;": "\u2274", + "NotNestedGreaterGreater;": "\u2aa2\u0338", + "NotNestedLessLess;": "\u2aa1\u0338", + "NotPrecedes;": "\u2280", + "NotPrecedesEqual;": "\u2aaf\u0338", + "NotPrecedesSlantEqual;": "\u22e0", + "NotReverseElement;": "\u220c", + "NotRightTriangle;": "\u22eb", + "NotRightTriangleBar;": "\u29d0\u0338", + "NotRightTriangleEqual;": "\u22ed", + "NotSquareSubset;": "\u228f\u0338", + "NotSquareSubsetEqual;": "\u22e2", + "NotSquareSuperset;": "\u2290\u0338", + "NotSquareSupersetEqual;": "\u22e3", + "NotSubset;": "\u2282\u20d2", + "NotSubsetEqual;": "\u2288", + "NotSucceeds;": "\u2281", + "NotSucceedsEqual;": "\u2ab0\u0338", + "NotSucceedsSlantEqual;": "\u22e1", + "NotSucceedsTilde;": "\u227f\u0338", + "NotSuperset;": "\u2283\u20d2", + "NotSupersetEqual;": "\u2289", + "NotTilde;": "\u2241", + "NotTildeEqual;": "\u2244", + "NotTildeFullEqual;": "\u2247", + "NotTildeTilde;": "\u2249", + "NotVerticalBar;": "\u2224", + "Nscr;": "\U0001d4a9", + "Ntilde": "\xd1", + "Ntilde;": "\xd1", + "Nu;": "\u039d", + "OElig;": "\u0152", + "Oacute": "\xd3", + "Oacute;": "\xd3", + "Ocirc": "\xd4", + "Ocirc;": "\xd4", + "Ocy;": "\u041e", + "Odblac;": "\u0150", + "Ofr;": "\U0001d512", + "Ograve": "\xd2", + "Ograve;": "\xd2", + "Omacr;": "\u014c", + "Omega;": "\u03a9", + "Omicron;": "\u039f", + "Oopf;": "\U0001d546", + "OpenCurlyDoubleQuote;": "\u201c", + "OpenCurlyQuote;": "\u2018", + "Or;": "\u2a54", + "Oscr;": "\U0001d4aa", + "Oslash": "\xd8", + "Oslash;": "\xd8", + "Otilde": "\xd5", + "Otilde;": "\xd5", + "Otimes;": "\u2a37", + "Ouml": "\xd6", + "Ouml;": "\xd6", + "OverBar;": "\u203e", + "OverBrace;": "\u23de", + "OverBracket;": "\u23b4", + "OverParenthesis;": "\u23dc", + "PartialD;": "\u2202", + "Pcy;": "\u041f", + "Pfr;": "\U0001d513", + "Phi;": "\u03a6", + "Pi;": "\u03a0", + "PlusMinus;": "\xb1", + "Poincareplane;": "\u210c", + "Popf;": "\u2119", + "Pr;": "\u2abb", + "Precedes;": "\u227a", + "PrecedesEqual;": "\u2aaf", + "PrecedesSlantEqual;": "\u227c", + "PrecedesTilde;": "\u227e", + "Prime;": "\u2033", + "Product;": "\u220f", + "Proportion;": "\u2237", + "Proportional;": "\u221d", + "Pscr;": "\U0001d4ab", + "Psi;": "\u03a8", + "QUOT": "\"", + "QUOT;": "\"", + "Qfr;": "\U0001d514", + "Qopf;": "\u211a", + "Qscr;": "\U0001d4ac", + "RBarr;": "\u2910", + "REG": "\xae", + "REG;": "\xae", + "Racute;": "\u0154", + "Rang;": "\u27eb", + "Rarr;": "\u21a0", + "Rarrtl;": "\u2916", + "Rcaron;": "\u0158", + "Rcedil;": "\u0156", + "Rcy;": "\u0420", + "Re;": "\u211c", + "ReverseElement;": "\u220b", + "ReverseEquilibrium;": "\u21cb", + "ReverseUpEquilibrium;": "\u296f", + "Rfr;": "\u211c", + "Rho;": "\u03a1", + "RightAngleBracket;": "\u27e9", + "RightArrow;": "\u2192", + "RightArrowBar;": "\u21e5", + "RightArrowLeftArrow;": "\u21c4", + "RightCeiling;": "\u2309", + "RightDoubleBracket;": "\u27e7", + "RightDownTeeVector;": "\u295d", + "RightDownVector;": "\u21c2", + "RightDownVectorBar;": "\u2955", + "RightFloor;": "\u230b", + "RightTee;": "\u22a2", + "RightTeeArrow;": "\u21a6", + "RightTeeVector;": "\u295b", + "RightTriangle;": "\u22b3", + "RightTriangleBar;": "\u29d0", + "RightTriangleEqual;": "\u22b5", + "RightUpDownVector;": "\u294f", + "RightUpTeeVector;": "\u295c", + "RightUpVector;": "\u21be", + "RightUpVectorBar;": "\u2954", + "RightVector;": "\u21c0", + "RightVectorBar;": "\u2953", + "Rightarrow;": "\u21d2", + "Ropf;": "\u211d", + "RoundImplies;": "\u2970", + "Rrightarrow;": "\u21db", + "Rscr;": "\u211b", + "Rsh;": "\u21b1", + "RuleDelayed;": "\u29f4", + "SHCHcy;": "\u0429", + "SHcy;": "\u0428", + "SOFTcy;": "\u042c", + "Sacute;": "\u015a", + "Sc;": "\u2abc", + "Scaron;": "\u0160", + "Scedil;": "\u015e", + "Scirc;": "\u015c", + "Scy;": "\u0421", + "Sfr;": "\U0001d516", + "ShortDownArrow;": "\u2193", + "ShortLeftArrow;": "\u2190", + "ShortRightArrow;": "\u2192", + "ShortUpArrow;": "\u2191", + "Sigma;": "\u03a3", + "SmallCircle;": "\u2218", + "Sopf;": "\U0001d54a", + "Sqrt;": "\u221a", + "Square;": "\u25a1", + "SquareIntersection;": "\u2293", + "SquareSubset;": "\u228f", + "SquareSubsetEqual;": "\u2291", + "SquareSuperset;": "\u2290", + "SquareSupersetEqual;": "\u2292", + "SquareUnion;": "\u2294", + "Sscr;": "\U0001d4ae", + "Star;": "\u22c6", + "Sub;": "\u22d0", + "Subset;": "\u22d0", + "SubsetEqual;": "\u2286", + "Succeeds;": "\u227b", + "SucceedsEqual;": "\u2ab0", + "SucceedsSlantEqual;": "\u227d", + "SucceedsTilde;": "\u227f", + "SuchThat;": "\u220b", + "Sum;": "\u2211", + "Sup;": "\u22d1", + "Superset;": "\u2283", + "SupersetEqual;": "\u2287", + "Supset;": "\u22d1", + "THORN": "\xde", + "THORN;": "\xde", + "TRADE;": "\u2122", + "TSHcy;": "\u040b", + "TScy;": "\u0426", + "Tab;": "\t", + "Tau;": "\u03a4", + "Tcaron;": "\u0164", + "Tcedil;": "\u0162", + "Tcy;": "\u0422", + "Tfr;": "\U0001d517", + "Therefore;": "\u2234", + "Theta;": "\u0398", + "ThickSpace;": "\u205f\u200a", + "ThinSpace;": "\u2009", + "Tilde;": "\u223c", + "TildeEqual;": "\u2243", + "TildeFullEqual;": "\u2245", + "TildeTilde;": "\u2248", + "Topf;": "\U0001d54b", + "TripleDot;": "\u20db", + "Tscr;": "\U0001d4af", + "Tstrok;": "\u0166", + "Uacute": "\xda", + "Uacute;": "\xda", + "Uarr;": "\u219f", + "Uarrocir;": "\u2949", + "Ubrcy;": "\u040e", + "Ubreve;": "\u016c", + "Ucirc": "\xdb", + "Ucirc;": "\xdb", + "Ucy;": "\u0423", + "Udblac;": "\u0170", + "Ufr;": "\U0001d518", + "Ugrave": "\xd9", + "Ugrave;": "\xd9", + "Umacr;": "\u016a", + "UnderBar;": "_", + "UnderBrace;": "\u23df", + "UnderBracket;": "\u23b5", + "UnderParenthesis;": "\u23dd", + "Union;": "\u22c3", + "UnionPlus;": "\u228e", + "Uogon;": "\u0172", + "Uopf;": "\U0001d54c", + "UpArrow;": "\u2191", + "UpArrowBar;": "\u2912", + "UpArrowDownArrow;": "\u21c5", + "UpDownArrow;": "\u2195", + "UpEquilibrium;": "\u296e", + "UpTee;": "\u22a5", + "UpTeeArrow;": "\u21a5", + "Uparrow;": "\u21d1", + "Updownarrow;": "\u21d5", + "UpperLeftArrow;": "\u2196", + "UpperRightArrow;": "\u2197", + "Upsi;": "\u03d2", + "Upsilon;": "\u03a5", + "Uring;": "\u016e", + "Uscr;": "\U0001d4b0", + "Utilde;": "\u0168", + "Uuml": "\xdc", + "Uuml;": "\xdc", + "VDash;": "\u22ab", + "Vbar;": "\u2aeb", + "Vcy;": "\u0412", + "Vdash;": "\u22a9", + "Vdashl;": "\u2ae6", + "Vee;": "\u22c1", + "Verbar;": "\u2016", + "Vert;": "\u2016", + "VerticalBar;": "\u2223", + "VerticalLine;": "|", + "VerticalSeparator;": "\u2758", + "VerticalTilde;": "\u2240", + "VeryThinSpace;": "\u200a", + "Vfr;": "\U0001d519", + "Vopf;": "\U0001d54d", + "Vscr;": "\U0001d4b1", + "Vvdash;": "\u22aa", + "Wcirc;": "\u0174", + "Wedge;": "\u22c0", + "Wfr;": "\U0001d51a", + "Wopf;": "\U0001d54e", + "Wscr;": "\U0001d4b2", + "Xfr;": "\U0001d51b", + "Xi;": "\u039e", + "Xopf;": "\U0001d54f", + "Xscr;": "\U0001d4b3", + "YAcy;": "\u042f", + "YIcy;": "\u0407", + "YUcy;": "\u042e", + "Yacute": "\xdd", + "Yacute;": "\xdd", + "Ycirc;": "\u0176", + "Ycy;": "\u042b", + "Yfr;": "\U0001d51c", + "Yopf;": "\U0001d550", + "Yscr;": "\U0001d4b4", + "Yuml;": "\u0178", + "ZHcy;": "\u0416", + "Zacute;": "\u0179", + "Zcaron;": "\u017d", + "Zcy;": "\u0417", + "Zdot;": "\u017b", + "ZeroWidthSpace;": "\u200b", + "Zeta;": "\u0396", + "Zfr;": "\u2128", + "Zopf;": "\u2124", + "Zscr;": "\U0001d4b5", + "aacute": "\xe1", + "aacute;": "\xe1", + "abreve;": "\u0103", + "ac;": "\u223e", + "acE;": "\u223e\u0333", + "acd;": "\u223f", + "acirc": "\xe2", + "acirc;": "\xe2", + "acute": "\xb4", + "acute;": "\xb4", + "acy;": "\u0430", + "aelig": "\xe6", + "aelig;": "\xe6", + "af;": "\u2061", + "afr;": "\U0001d51e", + "agrave": "\xe0", + "agrave;": "\xe0", + "alefsym;": "\u2135", + "aleph;": "\u2135", + "alpha;": "\u03b1", + "amacr;": "\u0101", + "amalg;": "\u2a3f", + "amp": "&", + "amp;": "&", + "and;": "\u2227", + "andand;": "\u2a55", + "andd;": "\u2a5c", + "andslope;": "\u2a58", + "andv;": "\u2a5a", + "ang;": "\u2220", + "ange;": "\u29a4", + "angle;": "\u2220", + "angmsd;": "\u2221", + "angmsdaa;": "\u29a8", + "angmsdab;": "\u29a9", + "angmsdac;": "\u29aa", + "angmsdad;": "\u29ab", + "angmsdae;": "\u29ac", + "angmsdaf;": "\u29ad", + "angmsdag;": "\u29ae", + "angmsdah;": "\u29af", + "angrt;": "\u221f", + "angrtvb;": "\u22be", + "angrtvbd;": "\u299d", + "angsph;": "\u2222", + "angst;": "\xc5", + "angzarr;": "\u237c", + "aogon;": "\u0105", + "aopf;": "\U0001d552", + "ap;": "\u2248", + "apE;": "\u2a70", + "apacir;": "\u2a6f", + "ape;": "\u224a", + "apid;": "\u224b", + "apos;": "'", + "approx;": "\u2248", + "approxeq;": "\u224a", + "aring": "\xe5", + "aring;": "\xe5", + "ascr;": "\U0001d4b6", + "ast;": "*", + "asymp;": "\u2248", + "asympeq;": "\u224d", + "atilde": "\xe3", + "atilde;": "\xe3", + "auml": "\xe4", + "auml;": "\xe4", + "awconint;": "\u2233", + "awint;": "\u2a11", + "bNot;": "\u2aed", + "backcong;": "\u224c", + "backepsilon;": "\u03f6", + "backprime;": "\u2035", + "backsim;": "\u223d", + "backsimeq;": "\u22cd", + "barvee;": "\u22bd", + "barwed;": "\u2305", + "barwedge;": "\u2305", + "bbrk;": "\u23b5", + "bbrktbrk;": "\u23b6", + "bcong;": "\u224c", + "bcy;": "\u0431", + "bdquo;": "\u201e", + "becaus;": "\u2235", + "because;": "\u2235", + "bemptyv;": "\u29b0", + "bepsi;": "\u03f6", + "bernou;": "\u212c", + "beta;": "\u03b2", + "beth;": "\u2136", + "between;": "\u226c", + "bfr;": "\U0001d51f", + "bigcap;": "\u22c2", + "bigcirc;": "\u25ef", + "bigcup;": "\u22c3", + "bigodot;": "\u2a00", + "bigoplus;": "\u2a01", + "bigotimes;": "\u2a02", + "bigsqcup;": "\u2a06", + "bigstar;": "\u2605", + "bigtriangledown;": "\u25bd", + "bigtriangleup;": "\u25b3", + "biguplus;": "\u2a04", + "bigvee;": "\u22c1", + "bigwedge;": "\u22c0", + "bkarow;": "\u290d", + "blacklozenge;": "\u29eb", + "blacksquare;": "\u25aa", + "blacktriangle;": "\u25b4", + "blacktriangledown;": "\u25be", + "blacktriangleleft;": "\u25c2", + "blacktriangleright;": "\u25b8", + "blank;": "\u2423", + "blk12;": "\u2592", + "blk14;": "\u2591", + "blk34;": "\u2593", + "block;": "\u2588", + "bne;": "=\u20e5", + "bnequiv;": "\u2261\u20e5", + "bnot;": "\u2310", + "bopf;": "\U0001d553", + "bot;": "\u22a5", + "bottom;": "\u22a5", + "bowtie;": "\u22c8", + "boxDL;": "\u2557", + "boxDR;": "\u2554", + "boxDl;": "\u2556", + "boxDr;": "\u2553", + "boxH;": "\u2550", + "boxHD;": "\u2566", + "boxHU;": "\u2569", + "boxHd;": "\u2564", + "boxHu;": "\u2567", + "boxUL;": "\u255d", + "boxUR;": "\u255a", + "boxUl;": "\u255c", + "boxUr;": "\u2559", + "boxV;": "\u2551", + "boxVH;": "\u256c", + "boxVL;": "\u2563", + "boxVR;": "\u2560", + "boxVh;": "\u256b", + "boxVl;": "\u2562", + "boxVr;": "\u255f", + "boxbox;": "\u29c9", + "boxdL;": "\u2555", + "boxdR;": "\u2552", + "boxdl;": "\u2510", + "boxdr;": "\u250c", + "boxh;": "\u2500", + "boxhD;": "\u2565", + "boxhU;": "\u2568", + "boxhd;": "\u252c", + "boxhu;": "\u2534", + "boxminus;": "\u229f", + "boxplus;": "\u229e", + "boxtimes;": "\u22a0", + "boxuL;": "\u255b", + "boxuR;": "\u2558", + "boxul;": "\u2518", + "boxur;": "\u2514", + "boxv;": "\u2502", + "boxvH;": "\u256a", + "boxvL;": "\u2561", + "boxvR;": "\u255e", + "boxvh;": "\u253c", + "boxvl;": "\u2524", + "boxvr;": "\u251c", + "bprime;": "\u2035", + "breve;": "\u02d8", + "brvbar": "\xa6", + "brvbar;": "\xa6", + "bscr;": "\U0001d4b7", + "bsemi;": "\u204f", + "bsim;": "\u223d", + "bsime;": "\u22cd", + "bsol;": "\\", + "bsolb;": "\u29c5", + "bsolhsub;": "\u27c8", + "bull;": "\u2022", + "bullet;": "\u2022", + "bump;": "\u224e", + "bumpE;": "\u2aae", + "bumpe;": "\u224f", + "bumpeq;": "\u224f", + "cacute;": "\u0107", + "cap;": "\u2229", + "capand;": "\u2a44", + "capbrcup;": "\u2a49", + "capcap;": "\u2a4b", + "capcup;": "\u2a47", + "capdot;": "\u2a40", + "caps;": "\u2229\ufe00", + "caret;": "\u2041", + "caron;": "\u02c7", + "ccaps;": "\u2a4d", + "ccaron;": "\u010d", + "ccedil": "\xe7", + "ccedil;": "\xe7", + "ccirc;": "\u0109", + "ccups;": "\u2a4c", + "ccupssm;": "\u2a50", + "cdot;": "\u010b", + "cedil": "\xb8", + "cedil;": "\xb8", + "cemptyv;": "\u29b2", + "cent": "\xa2", + "cent;": "\xa2", + "centerdot;": "\xb7", + "cfr;": "\U0001d520", + "chcy;": "\u0447", + "check;": "\u2713", + "checkmark;": "\u2713", + "chi;": "\u03c7", + "cir;": "\u25cb", + "cirE;": "\u29c3", + "circ;": "\u02c6", + "circeq;": "\u2257", + "circlearrowleft;": "\u21ba", + "circlearrowright;": "\u21bb", + "circledR;": "\xae", + "circledS;": "\u24c8", + "circledast;": "\u229b", + "circledcirc;": "\u229a", + "circleddash;": "\u229d", + "cire;": "\u2257", + "cirfnint;": "\u2a10", + "cirmid;": "\u2aef", + "cirscir;": "\u29c2", + "clubs;": "\u2663", + "clubsuit;": "\u2663", + "colon;": ":", + "colone;": "\u2254", + "coloneq;": "\u2254", + "comma;": ",", + "commat;": "@", + "comp;": "\u2201", + "compfn;": "\u2218", + "complement;": "\u2201", + "complexes;": "\u2102", + "cong;": "\u2245", + "congdot;": "\u2a6d", + "conint;": "\u222e", + "copf;": "\U0001d554", + "coprod;": "\u2210", + "copy": "\xa9", + "copy;": "\xa9", + "copysr;": "\u2117", + "crarr;": "\u21b5", + "cross;": "\u2717", + "cscr;": "\U0001d4b8", + "csub;": "\u2acf", + "csube;": "\u2ad1", + "csup;": "\u2ad0", + "csupe;": "\u2ad2", + "ctdot;": "\u22ef", + "cudarrl;": "\u2938", + "cudarrr;": "\u2935", + "cuepr;": "\u22de", + "cuesc;": "\u22df", + "cularr;": "\u21b6", + "cularrp;": "\u293d", + "cup;": "\u222a", + "cupbrcap;": "\u2a48", + "cupcap;": "\u2a46", + "cupcup;": "\u2a4a", + "cupdot;": "\u228d", + "cupor;": "\u2a45", + "cups;": "\u222a\ufe00", + "curarr;": "\u21b7", + "curarrm;": "\u293c", + "curlyeqprec;": "\u22de", + "curlyeqsucc;": "\u22df", + "curlyvee;": "\u22ce", + "curlywedge;": "\u22cf", + "curren": "\xa4", + "curren;": "\xa4", + "curvearrowleft;": "\u21b6", + "curvearrowright;": "\u21b7", + "cuvee;": "\u22ce", + "cuwed;": "\u22cf", + "cwconint;": "\u2232", + "cwint;": "\u2231", + "cylcty;": "\u232d", + "dArr;": "\u21d3", + "dHar;": "\u2965", + "dagger;": "\u2020", + "daleth;": "\u2138", + "darr;": "\u2193", + "dash;": "\u2010", + "dashv;": "\u22a3", + "dbkarow;": "\u290f", + "dblac;": "\u02dd", + "dcaron;": "\u010f", + "dcy;": "\u0434", + "dd;": "\u2146", + "ddagger;": "\u2021", + "ddarr;": "\u21ca", + "ddotseq;": "\u2a77", + "deg": "\xb0", + "deg;": "\xb0", + "delta;": "\u03b4", + "demptyv;": "\u29b1", + "dfisht;": "\u297f", + "dfr;": "\U0001d521", + "dharl;": "\u21c3", + "dharr;": "\u21c2", + "diam;": "\u22c4", + "diamond;": "\u22c4", + "diamondsuit;": "\u2666", + "diams;": "\u2666", + "die;": "\xa8", + "digamma;": "\u03dd", + "disin;": "\u22f2", + "div;": "\xf7", + "divide": "\xf7", + "divide;": "\xf7", + "divideontimes;": "\u22c7", + "divonx;": "\u22c7", + "djcy;": "\u0452", + "dlcorn;": "\u231e", + "dlcrop;": "\u230d", + "dollar;": "$", + "dopf;": "\U0001d555", + "dot;": "\u02d9", + "doteq;": "\u2250", + "doteqdot;": "\u2251", + "dotminus;": "\u2238", + "dotplus;": "\u2214", + "dotsquare;": "\u22a1", + "doublebarwedge;": "\u2306", + "downarrow;": "\u2193", + "downdownarrows;": "\u21ca", + "downharpoonleft;": "\u21c3", + "downharpoonright;": "\u21c2", + "drbkarow;": "\u2910", + "drcorn;": "\u231f", + "drcrop;": "\u230c", + "dscr;": "\U0001d4b9", + "dscy;": "\u0455", + "dsol;": "\u29f6", + "dstrok;": "\u0111", + "dtdot;": "\u22f1", + "dtri;": "\u25bf", + "dtrif;": "\u25be", + "duarr;": "\u21f5", + "duhar;": "\u296f", + "dwangle;": "\u29a6", + "dzcy;": "\u045f", + "dzigrarr;": "\u27ff", + "eDDot;": "\u2a77", + "eDot;": "\u2251", + "eacute": "\xe9", + "eacute;": "\xe9", + "easter;": "\u2a6e", + "ecaron;": "\u011b", + "ecir;": "\u2256", + "ecirc": "\xea", + "ecirc;": "\xea", + "ecolon;": "\u2255", + "ecy;": "\u044d", + "edot;": "\u0117", + "ee;": "\u2147", + "efDot;": "\u2252", + "efr;": "\U0001d522", + "eg;": "\u2a9a", + "egrave": "\xe8", + "egrave;": "\xe8", + "egs;": "\u2a96", + "egsdot;": "\u2a98", + "el;": "\u2a99", + "elinters;": "\u23e7", + "ell;": "\u2113", + "els;": "\u2a95", + "elsdot;": "\u2a97", + "emacr;": "\u0113", + "empty;": "\u2205", + "emptyset;": "\u2205", + "emptyv;": "\u2205", + "emsp13;": "\u2004", + "emsp14;": "\u2005", + "emsp;": "\u2003", + "eng;": "\u014b", + "ensp;": "\u2002", + "eogon;": "\u0119", + "eopf;": "\U0001d556", + "epar;": "\u22d5", + "eparsl;": "\u29e3", + "eplus;": "\u2a71", + "epsi;": "\u03b5", + "epsilon;": "\u03b5", + "epsiv;": "\u03f5", + "eqcirc;": "\u2256", + "eqcolon;": "\u2255", + "eqsim;": "\u2242", + "eqslantgtr;": "\u2a96", + "eqslantless;": "\u2a95", + "equals;": "=", + "equest;": "\u225f", + "equiv;": "\u2261", + "equivDD;": "\u2a78", + "eqvparsl;": "\u29e5", + "erDot;": "\u2253", + "erarr;": "\u2971", + "escr;": "\u212f", + "esdot;": "\u2250", + "esim;": "\u2242", + "eta;": "\u03b7", + "eth": "\xf0", + "eth;": "\xf0", + "euml": "\xeb", + "euml;": "\xeb", + "euro;": "\u20ac", + "excl;": "!", + "exist;": "\u2203", + "expectation;": "\u2130", + "exponentiale;": "\u2147", + "fallingdotseq;": "\u2252", + "fcy;": "\u0444", + "female;": "\u2640", + "ffilig;": "\ufb03", + "fflig;": "\ufb00", + "ffllig;": "\ufb04", + "ffr;": "\U0001d523", + "filig;": "\ufb01", + "fjlig;": "fj", + "flat;": "\u266d", + "fllig;": "\ufb02", + "fltns;": "\u25b1", + "fnof;": "\u0192", + "fopf;": "\U0001d557", + "forall;": "\u2200", + "fork;": "\u22d4", + "forkv;": "\u2ad9", + "fpartint;": "\u2a0d", + "frac12": "\xbd", + "frac12;": "\xbd", + "frac13;": "\u2153", + "frac14": "\xbc", + "frac14;": "\xbc", + "frac15;": "\u2155", + "frac16;": "\u2159", + "frac18;": "\u215b", + "frac23;": "\u2154", + "frac25;": "\u2156", + "frac34": "\xbe", + "frac34;": "\xbe", + "frac35;": "\u2157", + "frac38;": "\u215c", + "frac45;": "\u2158", + "frac56;": "\u215a", + "frac58;": "\u215d", + "frac78;": "\u215e", + "frasl;": "\u2044", + "frown;": "\u2322", + "fscr;": "\U0001d4bb", + "gE;": "\u2267", + "gEl;": "\u2a8c", + "gacute;": "\u01f5", + "gamma;": "\u03b3", + "gammad;": "\u03dd", + "gap;": "\u2a86", + "gbreve;": "\u011f", + "gcirc;": "\u011d", + "gcy;": "\u0433", + "gdot;": "\u0121", + "ge;": "\u2265", + "gel;": "\u22db", + "geq;": "\u2265", + "geqq;": "\u2267", + "geqslant;": "\u2a7e", + "ges;": "\u2a7e", + "gescc;": "\u2aa9", + "gesdot;": "\u2a80", + "gesdoto;": "\u2a82", + "gesdotol;": "\u2a84", + "gesl;": "\u22db\ufe00", + "gesles;": "\u2a94", + "gfr;": "\U0001d524", + "gg;": "\u226b", + "ggg;": "\u22d9", + "gimel;": "\u2137", + "gjcy;": "\u0453", + "gl;": "\u2277", + "glE;": "\u2a92", + "gla;": "\u2aa5", + "glj;": "\u2aa4", + "gnE;": "\u2269", + "gnap;": "\u2a8a", + "gnapprox;": "\u2a8a", + "gne;": "\u2a88", + "gneq;": "\u2a88", + "gneqq;": "\u2269", + "gnsim;": "\u22e7", + "gopf;": "\U0001d558", + "grave;": "`", + "gscr;": "\u210a", + "gsim;": "\u2273", + "gsime;": "\u2a8e", + "gsiml;": "\u2a90", + "gt": ">", + "gt;": ">", + "gtcc;": "\u2aa7", + "gtcir;": "\u2a7a", + "gtdot;": "\u22d7", + "gtlPar;": "\u2995", + "gtquest;": "\u2a7c", + "gtrapprox;": "\u2a86", + "gtrarr;": "\u2978", + "gtrdot;": "\u22d7", + "gtreqless;": "\u22db", + "gtreqqless;": "\u2a8c", + "gtrless;": "\u2277", + "gtrsim;": "\u2273", + "gvertneqq;": "\u2269\ufe00", + "gvnE;": "\u2269\ufe00", + "hArr;": "\u21d4", + "hairsp;": "\u200a", + "half;": "\xbd", + "hamilt;": "\u210b", + "hardcy;": "\u044a", + "harr;": "\u2194", + "harrcir;": "\u2948", + "harrw;": "\u21ad", + "hbar;": "\u210f", + "hcirc;": "\u0125", + "hearts;": "\u2665", + "heartsuit;": "\u2665", + "hellip;": "\u2026", + "hercon;": "\u22b9", + "hfr;": "\U0001d525", + "hksearow;": "\u2925", + "hkswarow;": "\u2926", + "hoarr;": "\u21ff", + "homtht;": "\u223b", + "hookleftarrow;": "\u21a9", + "hookrightarrow;": "\u21aa", + "hopf;": "\U0001d559", + "horbar;": "\u2015", + "hscr;": "\U0001d4bd", + "hslash;": "\u210f", + "hstrok;": "\u0127", + "hybull;": "\u2043", + "hyphen;": "\u2010", + "iacute": "\xed", + "iacute;": "\xed", + "ic;": "\u2063", + "icirc": "\xee", + "icirc;": "\xee", + "icy;": "\u0438", + "iecy;": "\u0435", + "iexcl": "\xa1", + "iexcl;": "\xa1", + "iff;": "\u21d4", + "ifr;": "\U0001d526", + "igrave": "\xec", + "igrave;": "\xec", + "ii;": "\u2148", + "iiiint;": "\u2a0c", + "iiint;": "\u222d", + "iinfin;": "\u29dc", + "iiota;": "\u2129", + "ijlig;": "\u0133", + "imacr;": "\u012b", + "image;": "\u2111", + "imagline;": "\u2110", + "imagpart;": "\u2111", + "imath;": "\u0131", + "imof;": "\u22b7", + "imped;": "\u01b5", + "in;": "\u2208", + "incare;": "\u2105", + "infin;": "\u221e", + "infintie;": "\u29dd", + "inodot;": "\u0131", + "int;": "\u222b", + "intcal;": "\u22ba", + "integers;": "\u2124", + "intercal;": "\u22ba", + "intlarhk;": "\u2a17", + "intprod;": "\u2a3c", + "iocy;": "\u0451", + "iogon;": "\u012f", + "iopf;": "\U0001d55a", + "iota;": "\u03b9", + "iprod;": "\u2a3c", + "iquest": "\xbf", + "iquest;": "\xbf", + "iscr;": "\U0001d4be", + "isin;": "\u2208", + "isinE;": "\u22f9", + "isindot;": "\u22f5", + "isins;": "\u22f4", + "isinsv;": "\u22f3", + "isinv;": "\u2208", + "it;": "\u2062", + "itilde;": "\u0129", + "iukcy;": "\u0456", + "iuml": "\xef", + "iuml;": "\xef", + "jcirc;": "\u0135", + "jcy;": "\u0439", + "jfr;": "\U0001d527", + "jmath;": "\u0237", + "jopf;": "\U0001d55b", + "jscr;": "\U0001d4bf", + "jsercy;": "\u0458", + "jukcy;": "\u0454", + "kappa;": "\u03ba", + "kappav;": "\u03f0", + "kcedil;": "\u0137", + "kcy;": "\u043a", + "kfr;": "\U0001d528", + "kgreen;": "\u0138", + "khcy;": "\u0445", + "kjcy;": "\u045c", + "kopf;": "\U0001d55c", + "kscr;": "\U0001d4c0", + "lAarr;": "\u21da", + "lArr;": "\u21d0", + "lAtail;": "\u291b", + "lBarr;": "\u290e", + "lE;": "\u2266", + "lEg;": "\u2a8b", + "lHar;": "\u2962", + "lacute;": "\u013a", + "laemptyv;": "\u29b4", + "lagran;": "\u2112", + "lambda;": "\u03bb", + "lang;": "\u27e8", + "langd;": "\u2991", + "langle;": "\u27e8", + "lap;": "\u2a85", + "laquo": "\xab", + "laquo;": "\xab", + "larr;": "\u2190", + "larrb;": "\u21e4", + "larrbfs;": "\u291f", + "larrfs;": "\u291d", + "larrhk;": "\u21a9", + "larrlp;": "\u21ab", + "larrpl;": "\u2939", + "larrsim;": "\u2973", + "larrtl;": "\u21a2", + "lat;": "\u2aab", + "latail;": "\u2919", + "late;": "\u2aad", + "lates;": "\u2aad\ufe00", + "lbarr;": "\u290c", + "lbbrk;": "\u2772", + "lbrace;": "{", + "lbrack;": "[", + "lbrke;": "\u298b", + "lbrksld;": "\u298f", + "lbrkslu;": "\u298d", + "lcaron;": "\u013e", + "lcedil;": "\u013c", + "lceil;": "\u2308", + "lcub;": "{", + "lcy;": "\u043b", + "ldca;": "\u2936", + "ldquo;": "\u201c", + "ldquor;": "\u201e", + "ldrdhar;": "\u2967", + "ldrushar;": "\u294b", + "ldsh;": "\u21b2", + "le;": "\u2264", + "leftarrow;": "\u2190", + "leftarrowtail;": "\u21a2", + "leftharpoondown;": "\u21bd", + "leftharpoonup;": "\u21bc", + "leftleftarrows;": "\u21c7", + "leftrightarrow;": "\u2194", + "leftrightarrows;": "\u21c6", + "leftrightharpoons;": "\u21cb", + "leftrightsquigarrow;": "\u21ad", + "leftthreetimes;": "\u22cb", + "leg;": "\u22da", + "leq;": "\u2264", + "leqq;": "\u2266", + "leqslant;": "\u2a7d", + "les;": "\u2a7d", + "lescc;": "\u2aa8", + "lesdot;": "\u2a7f", + "lesdoto;": "\u2a81", + "lesdotor;": "\u2a83", + "lesg;": "\u22da\ufe00", + "lesges;": "\u2a93", + "lessapprox;": "\u2a85", + "lessdot;": "\u22d6", + "lesseqgtr;": "\u22da", + "lesseqqgtr;": "\u2a8b", + "lessgtr;": "\u2276", + "lesssim;": "\u2272", + "lfisht;": "\u297c", + "lfloor;": "\u230a", + "lfr;": "\U0001d529", + "lg;": "\u2276", + "lgE;": "\u2a91", + "lhard;": "\u21bd", + "lharu;": "\u21bc", + "lharul;": "\u296a", + "lhblk;": "\u2584", + "ljcy;": "\u0459", + "ll;": "\u226a", + "llarr;": "\u21c7", + "llcorner;": "\u231e", + "llhard;": "\u296b", + "lltri;": "\u25fa", + "lmidot;": "\u0140", + "lmoust;": "\u23b0", + "lmoustache;": "\u23b0", + "lnE;": "\u2268", + "lnap;": "\u2a89", + "lnapprox;": "\u2a89", + "lne;": "\u2a87", + "lneq;": "\u2a87", + "lneqq;": "\u2268", + "lnsim;": "\u22e6", + "loang;": "\u27ec", + "loarr;": "\u21fd", + "lobrk;": "\u27e6", + "longleftarrow;": "\u27f5", + "longleftrightarrow;": "\u27f7", + "longmapsto;": "\u27fc", + "longrightarrow;": "\u27f6", + "looparrowleft;": "\u21ab", + "looparrowright;": "\u21ac", + "lopar;": "\u2985", + "lopf;": "\U0001d55d", + "loplus;": "\u2a2d", + "lotimes;": "\u2a34", + "lowast;": "\u2217", + "lowbar;": "_", + "loz;": "\u25ca", + "lozenge;": "\u25ca", + "lozf;": "\u29eb", + "lpar;": "(", + "lparlt;": "\u2993", + "lrarr;": "\u21c6", + "lrcorner;": "\u231f", + "lrhar;": "\u21cb", + "lrhard;": "\u296d", + "lrm;": "\u200e", + "lrtri;": "\u22bf", + "lsaquo;": "\u2039", + "lscr;": "\U0001d4c1", + "lsh;": "\u21b0", + "lsim;": "\u2272", + "lsime;": "\u2a8d", + "lsimg;": "\u2a8f", + "lsqb;": "[", + "lsquo;": "\u2018", + "lsquor;": "\u201a", + "lstrok;": "\u0142", + "lt": "<", + "lt;": "<", + "ltcc;": "\u2aa6", + "ltcir;": "\u2a79", + "ltdot;": "\u22d6", + "lthree;": "\u22cb", + "ltimes;": "\u22c9", + "ltlarr;": "\u2976", + "ltquest;": "\u2a7b", + "ltrPar;": "\u2996", + "ltri;": "\u25c3", + "ltrie;": "\u22b4", + "ltrif;": "\u25c2", + "lurdshar;": "\u294a", + "luruhar;": "\u2966", + "lvertneqq;": "\u2268\ufe00", + "lvnE;": "\u2268\ufe00", + "mDDot;": "\u223a", + "macr": "\xaf", + "macr;": "\xaf", + "male;": "\u2642", + "malt;": "\u2720", + "maltese;": "\u2720", + "map;": "\u21a6", + "mapsto;": "\u21a6", + "mapstodown;": "\u21a7", + "mapstoleft;": "\u21a4", + "mapstoup;": "\u21a5", + "marker;": "\u25ae", + "mcomma;": "\u2a29", + "mcy;": "\u043c", + "mdash;": "\u2014", + "measuredangle;": "\u2221", + "mfr;": "\U0001d52a", + "mho;": "\u2127", + "micro": "\xb5", + "micro;": "\xb5", + "mid;": "\u2223", + "midast;": "*", + "midcir;": "\u2af0", + "middot": "\xb7", + "middot;": "\xb7", + "minus;": "\u2212", + "minusb;": "\u229f", + "minusd;": "\u2238", + "minusdu;": "\u2a2a", + "mlcp;": "\u2adb", + "mldr;": "\u2026", + "mnplus;": "\u2213", + "models;": "\u22a7", + "mopf;": "\U0001d55e", + "mp;": "\u2213", + "mscr;": "\U0001d4c2", + "mstpos;": "\u223e", + "mu;": "\u03bc", + "multimap;": "\u22b8", + "mumap;": "\u22b8", + "nGg;": "\u22d9\u0338", + "nGt;": "\u226b\u20d2", + "nGtv;": "\u226b\u0338", + "nLeftarrow;": "\u21cd", + "nLeftrightarrow;": "\u21ce", + "nLl;": "\u22d8\u0338", + "nLt;": "\u226a\u20d2", + "nLtv;": "\u226a\u0338", + "nRightarrow;": "\u21cf", + "nVDash;": "\u22af", + "nVdash;": "\u22ae", + "nabla;": "\u2207", + "nacute;": "\u0144", + "nang;": "\u2220\u20d2", + "nap;": "\u2249", + "napE;": "\u2a70\u0338", + "napid;": "\u224b\u0338", + "napos;": "\u0149", + "napprox;": "\u2249", + "natur;": "\u266e", + "natural;": "\u266e", + "naturals;": "\u2115", + "nbsp": "\xa0", + "nbsp;": "\xa0", + "nbump;": "\u224e\u0338", + "nbumpe;": "\u224f\u0338", + "ncap;": "\u2a43", + "ncaron;": "\u0148", + "ncedil;": "\u0146", + "ncong;": "\u2247", + "ncongdot;": "\u2a6d\u0338", + "ncup;": "\u2a42", + "ncy;": "\u043d", + "ndash;": "\u2013", + "ne;": "\u2260", + "neArr;": "\u21d7", + "nearhk;": "\u2924", + "nearr;": "\u2197", + "nearrow;": "\u2197", + "nedot;": "\u2250\u0338", + "nequiv;": "\u2262", + "nesear;": "\u2928", + "nesim;": "\u2242\u0338", + "nexist;": "\u2204", + "nexists;": "\u2204", + "nfr;": "\U0001d52b", + "ngE;": "\u2267\u0338", + "nge;": "\u2271", + "ngeq;": "\u2271", + "ngeqq;": "\u2267\u0338", + "ngeqslant;": "\u2a7e\u0338", + "nges;": "\u2a7e\u0338", + "ngsim;": "\u2275", + "ngt;": "\u226f", + "ngtr;": "\u226f", + "nhArr;": "\u21ce", + "nharr;": "\u21ae", + "nhpar;": "\u2af2", + "ni;": "\u220b", + "nis;": "\u22fc", + "nisd;": "\u22fa", + "niv;": "\u220b", + "njcy;": "\u045a", + "nlArr;": "\u21cd", + "nlE;": "\u2266\u0338", + "nlarr;": "\u219a", + "nldr;": "\u2025", + "nle;": "\u2270", + "nleftarrow;": "\u219a", + "nleftrightarrow;": "\u21ae", + "nleq;": "\u2270", + "nleqq;": "\u2266\u0338", + "nleqslant;": "\u2a7d\u0338", + "nles;": "\u2a7d\u0338", + "nless;": "\u226e", + "nlsim;": "\u2274", + "nlt;": "\u226e", + "nltri;": "\u22ea", + "nltrie;": "\u22ec", + "nmid;": "\u2224", + "nopf;": "\U0001d55f", + "not": "\xac", + "not;": "\xac", + "notin;": "\u2209", + "notinE;": "\u22f9\u0338", + "notindot;": "\u22f5\u0338", + "notinva;": "\u2209", + "notinvb;": "\u22f7", + "notinvc;": "\u22f6", + "notni;": "\u220c", + "notniva;": "\u220c", + "notnivb;": "\u22fe", + "notnivc;": "\u22fd", + "npar;": "\u2226", + "nparallel;": "\u2226", + "nparsl;": "\u2afd\u20e5", + "npart;": "\u2202\u0338", + "npolint;": "\u2a14", + "npr;": "\u2280", + "nprcue;": "\u22e0", + "npre;": "\u2aaf\u0338", + "nprec;": "\u2280", + "npreceq;": "\u2aaf\u0338", + "nrArr;": "\u21cf", + "nrarr;": "\u219b", + "nrarrc;": "\u2933\u0338", + "nrarrw;": "\u219d\u0338", + "nrightarrow;": "\u219b", + "nrtri;": "\u22eb", + "nrtrie;": "\u22ed", + "nsc;": "\u2281", + "nsccue;": "\u22e1", + "nsce;": "\u2ab0\u0338", + "nscr;": "\U0001d4c3", + "nshortmid;": "\u2224", + "nshortparallel;": "\u2226", + "nsim;": "\u2241", + "nsime;": "\u2244", + "nsimeq;": "\u2244", + "nsmid;": "\u2224", + "nspar;": "\u2226", + "nsqsube;": "\u22e2", + "nsqsupe;": "\u22e3", + "nsub;": "\u2284", + "nsubE;": "\u2ac5\u0338", + "nsube;": "\u2288", + "nsubset;": "\u2282\u20d2", + "nsubseteq;": "\u2288", + "nsubseteqq;": "\u2ac5\u0338", + "nsucc;": "\u2281", + "nsucceq;": "\u2ab0\u0338", + "nsup;": "\u2285", + "nsupE;": "\u2ac6\u0338", + "nsupe;": "\u2289", + "nsupset;": "\u2283\u20d2", + "nsupseteq;": "\u2289", + "nsupseteqq;": "\u2ac6\u0338", + "ntgl;": "\u2279", + "ntilde": "\xf1", + "ntilde;": "\xf1", + "ntlg;": "\u2278", + "ntriangleleft;": "\u22ea", + "ntrianglelefteq;": "\u22ec", + "ntriangleright;": "\u22eb", + "ntrianglerighteq;": "\u22ed", + "nu;": "\u03bd", + "num;": "#", + "numero;": "\u2116", + "numsp;": "\u2007", + "nvDash;": "\u22ad", + "nvHarr;": "\u2904", + "nvap;": "\u224d\u20d2", + "nvdash;": "\u22ac", + "nvge;": "\u2265\u20d2", + "nvgt;": ">\u20d2", + "nvinfin;": "\u29de", + "nvlArr;": "\u2902", + "nvle;": "\u2264\u20d2", + "nvlt;": "<\u20d2", + "nvltrie;": "\u22b4\u20d2", + "nvrArr;": "\u2903", + "nvrtrie;": "\u22b5\u20d2", + "nvsim;": "\u223c\u20d2", + "nwArr;": "\u21d6", + "nwarhk;": "\u2923", + "nwarr;": "\u2196", + "nwarrow;": "\u2196", + "nwnear;": "\u2927", + "oS;": "\u24c8", + "oacute": "\xf3", + "oacute;": "\xf3", + "oast;": "\u229b", + "ocir;": "\u229a", + "ocirc": "\xf4", + "ocirc;": "\xf4", + "ocy;": "\u043e", + "odash;": "\u229d", + "odblac;": "\u0151", + "odiv;": "\u2a38", + "odot;": "\u2299", + "odsold;": "\u29bc", + "oelig;": "\u0153", + "ofcir;": "\u29bf", + "ofr;": "\U0001d52c", + "ogon;": "\u02db", + "ograve": "\xf2", + "ograve;": "\xf2", + "ogt;": "\u29c1", + "ohbar;": "\u29b5", + "ohm;": "\u03a9", + "oint;": "\u222e", + "olarr;": "\u21ba", + "olcir;": "\u29be", + "olcross;": "\u29bb", + "oline;": "\u203e", + "olt;": "\u29c0", + "omacr;": "\u014d", + "omega;": "\u03c9", + "omicron;": "\u03bf", + "omid;": "\u29b6", + "ominus;": "\u2296", + "oopf;": "\U0001d560", + "opar;": "\u29b7", + "operp;": "\u29b9", + "oplus;": "\u2295", + "or;": "\u2228", + "orarr;": "\u21bb", + "ord;": "\u2a5d", + "order;": "\u2134", + "orderof;": "\u2134", + "ordf": "\xaa", + "ordf;": "\xaa", + "ordm": "\xba", + "ordm;": "\xba", + "origof;": "\u22b6", + "oror;": "\u2a56", + "orslope;": "\u2a57", + "orv;": "\u2a5b", + "oscr;": "\u2134", + "oslash": "\xf8", + "oslash;": "\xf8", + "osol;": "\u2298", + "otilde": "\xf5", + "otilde;": "\xf5", + "otimes;": "\u2297", + "otimesas;": "\u2a36", + "ouml": "\xf6", + "ouml;": "\xf6", + "ovbar;": "\u233d", + "par;": "\u2225", + "para": "\xb6", + "para;": "\xb6", + "parallel;": "\u2225", + "parsim;": "\u2af3", + "parsl;": "\u2afd", + "part;": "\u2202", + "pcy;": "\u043f", + "percnt;": "%", + "period;": ".", + "permil;": "\u2030", + "perp;": "\u22a5", + "pertenk;": "\u2031", + "pfr;": "\U0001d52d", + "phi;": "\u03c6", + "phiv;": "\u03d5", + "phmmat;": "\u2133", + "phone;": "\u260e", + "pi;": "\u03c0", + "pitchfork;": "\u22d4", + "piv;": "\u03d6", + "planck;": "\u210f", + "planckh;": "\u210e", + "plankv;": "\u210f", + "plus;": "+", + "plusacir;": "\u2a23", + "plusb;": "\u229e", + "pluscir;": "\u2a22", + "plusdo;": "\u2214", + "plusdu;": "\u2a25", + "pluse;": "\u2a72", + "plusmn": "\xb1", + "plusmn;": "\xb1", + "plussim;": "\u2a26", + "plustwo;": "\u2a27", + "pm;": "\xb1", + "pointint;": "\u2a15", + "popf;": "\U0001d561", + "pound": "\xa3", + "pound;": "\xa3", + "pr;": "\u227a", + "prE;": "\u2ab3", + "prap;": "\u2ab7", + "prcue;": "\u227c", + "pre;": "\u2aaf", + "prec;": "\u227a", + "precapprox;": "\u2ab7", + "preccurlyeq;": "\u227c", + "preceq;": "\u2aaf", + "precnapprox;": "\u2ab9", + "precneqq;": "\u2ab5", + "precnsim;": "\u22e8", + "precsim;": "\u227e", + "prime;": "\u2032", + "primes;": "\u2119", + "prnE;": "\u2ab5", + "prnap;": "\u2ab9", + "prnsim;": "\u22e8", + "prod;": "\u220f", + "profalar;": "\u232e", + "profline;": "\u2312", + "profsurf;": "\u2313", + "prop;": "\u221d", + "propto;": "\u221d", + "prsim;": "\u227e", + "prurel;": "\u22b0", + "pscr;": "\U0001d4c5", + "psi;": "\u03c8", + "puncsp;": "\u2008", + "qfr;": "\U0001d52e", + "qint;": "\u2a0c", + "qopf;": "\U0001d562", + "qprime;": "\u2057", + "qscr;": "\U0001d4c6", + "quaternions;": "\u210d", + "quatint;": "\u2a16", + "quest;": "?", + "questeq;": "\u225f", + "quot": "\"", + "quot;": "\"", + "rAarr;": "\u21db", + "rArr;": "\u21d2", + "rAtail;": "\u291c", + "rBarr;": "\u290f", + "rHar;": "\u2964", + "race;": "\u223d\u0331", + "racute;": "\u0155", + "radic;": "\u221a", + "raemptyv;": "\u29b3", + "rang;": "\u27e9", + "rangd;": "\u2992", + "range;": "\u29a5", + "rangle;": "\u27e9", + "raquo": "\xbb", + "raquo;": "\xbb", + "rarr;": "\u2192", + "rarrap;": "\u2975", + "rarrb;": "\u21e5", + "rarrbfs;": "\u2920", + "rarrc;": "\u2933", + "rarrfs;": "\u291e", + "rarrhk;": "\u21aa", + "rarrlp;": "\u21ac", + "rarrpl;": "\u2945", + "rarrsim;": "\u2974", + "rarrtl;": "\u21a3", + "rarrw;": "\u219d", + "ratail;": "\u291a", + "ratio;": "\u2236", + "rationals;": "\u211a", + "rbarr;": "\u290d", + "rbbrk;": "\u2773", + "rbrace;": "}", + "rbrack;": "]", + "rbrke;": "\u298c", + "rbrksld;": "\u298e", + "rbrkslu;": "\u2990", + "rcaron;": "\u0159", + "rcedil;": "\u0157", + "rceil;": "\u2309", + "rcub;": "}", + "rcy;": "\u0440", + "rdca;": "\u2937", + "rdldhar;": "\u2969", + "rdquo;": "\u201d", + "rdquor;": "\u201d", + "rdsh;": "\u21b3", + "real;": "\u211c", + "realine;": "\u211b", + "realpart;": "\u211c", + "reals;": "\u211d", + "rect;": "\u25ad", + "reg": "\xae", + "reg;": "\xae", + "rfisht;": "\u297d", + "rfloor;": "\u230b", + "rfr;": "\U0001d52f", + "rhard;": "\u21c1", + "rharu;": "\u21c0", + "rharul;": "\u296c", + "rho;": "\u03c1", + "rhov;": "\u03f1", + "rightarrow;": "\u2192", + "rightarrowtail;": "\u21a3", + "rightharpoondown;": "\u21c1", + "rightharpoonup;": "\u21c0", + "rightleftarrows;": "\u21c4", + "rightleftharpoons;": "\u21cc", + "rightrightarrows;": "\u21c9", + "rightsquigarrow;": "\u219d", + "rightthreetimes;": "\u22cc", + "ring;": "\u02da", + "risingdotseq;": "\u2253", + "rlarr;": "\u21c4", + "rlhar;": "\u21cc", + "rlm;": "\u200f", + "rmoust;": "\u23b1", + "rmoustache;": "\u23b1", + "rnmid;": "\u2aee", + "roang;": "\u27ed", + "roarr;": "\u21fe", + "robrk;": "\u27e7", + "ropar;": "\u2986", + "ropf;": "\U0001d563", + "roplus;": "\u2a2e", + "rotimes;": "\u2a35", + "rpar;": ")", + "rpargt;": "\u2994", + "rppolint;": "\u2a12", + "rrarr;": "\u21c9", + "rsaquo;": "\u203a", + "rscr;": "\U0001d4c7", + "rsh;": "\u21b1", + "rsqb;": "]", + "rsquo;": "\u2019", + "rsquor;": "\u2019", + "rthree;": "\u22cc", + "rtimes;": "\u22ca", + "rtri;": "\u25b9", + "rtrie;": "\u22b5", + "rtrif;": "\u25b8", + "rtriltri;": "\u29ce", + "ruluhar;": "\u2968", + "rx;": "\u211e", + "sacute;": "\u015b", + "sbquo;": "\u201a", + "sc;": "\u227b", + "scE;": "\u2ab4", + "scap;": "\u2ab8", + "scaron;": "\u0161", + "sccue;": "\u227d", + "sce;": "\u2ab0", + "scedil;": "\u015f", + "scirc;": "\u015d", + "scnE;": "\u2ab6", + "scnap;": "\u2aba", + "scnsim;": "\u22e9", + "scpolint;": "\u2a13", + "scsim;": "\u227f", + "scy;": "\u0441", + "sdot;": "\u22c5", + "sdotb;": "\u22a1", + "sdote;": "\u2a66", + "seArr;": "\u21d8", + "searhk;": "\u2925", + "searr;": "\u2198", + "searrow;": "\u2198", + "sect": "\xa7", + "sect;": "\xa7", + "semi;": ";", + "seswar;": "\u2929", + "setminus;": "\u2216", + "setmn;": "\u2216", + "sext;": "\u2736", + "sfr;": "\U0001d530", + "sfrown;": "\u2322", + "sharp;": "\u266f", + "shchcy;": "\u0449", + "shcy;": "\u0448", + "shortmid;": "\u2223", + "shortparallel;": "\u2225", + "shy": "\xad", + "shy;": "\xad", + "sigma;": "\u03c3", + "sigmaf;": "\u03c2", + "sigmav;": "\u03c2", + "sim;": "\u223c", + "simdot;": "\u2a6a", + "sime;": "\u2243", + "simeq;": "\u2243", + "simg;": "\u2a9e", + "simgE;": "\u2aa0", + "siml;": "\u2a9d", + "simlE;": "\u2a9f", + "simne;": "\u2246", + "simplus;": "\u2a24", + "simrarr;": "\u2972", + "slarr;": "\u2190", + "smallsetminus;": "\u2216", + "smashp;": "\u2a33", + "smeparsl;": "\u29e4", + "smid;": "\u2223", + "smile;": "\u2323", + "smt;": "\u2aaa", + "smte;": "\u2aac", + "smtes;": "\u2aac\ufe00", + "softcy;": "\u044c", + "sol;": "/", + "solb;": "\u29c4", + "solbar;": "\u233f", + "sopf;": "\U0001d564", + "spades;": "\u2660", + "spadesuit;": "\u2660", + "spar;": "\u2225", + "sqcap;": "\u2293", + "sqcaps;": "\u2293\ufe00", + "sqcup;": "\u2294", + "sqcups;": "\u2294\ufe00", + "sqsub;": "\u228f", + "sqsube;": "\u2291", + "sqsubset;": "\u228f", + "sqsubseteq;": "\u2291", + "sqsup;": "\u2290", + "sqsupe;": "\u2292", + "sqsupset;": "\u2290", + "sqsupseteq;": "\u2292", + "squ;": "\u25a1", + "square;": "\u25a1", + "squarf;": "\u25aa", + "squf;": "\u25aa", + "srarr;": "\u2192", + "sscr;": "\U0001d4c8", + "ssetmn;": "\u2216", + "ssmile;": "\u2323", + "sstarf;": "\u22c6", + "star;": "\u2606", + "starf;": "\u2605", + "straightepsilon;": "\u03f5", + "straightphi;": "\u03d5", + "strns;": "\xaf", + "sub;": "\u2282", + "subE;": "\u2ac5", + "subdot;": "\u2abd", + "sube;": "\u2286", + "subedot;": "\u2ac3", + "submult;": "\u2ac1", + "subnE;": "\u2acb", + "subne;": "\u228a", + "subplus;": "\u2abf", + "subrarr;": "\u2979", + "subset;": "\u2282", + "subseteq;": "\u2286", + "subseteqq;": "\u2ac5", + "subsetneq;": "\u228a", + "subsetneqq;": "\u2acb", + "subsim;": "\u2ac7", + "subsub;": "\u2ad5", + "subsup;": "\u2ad3", + "succ;": "\u227b", + "succapprox;": "\u2ab8", + "succcurlyeq;": "\u227d", + "succeq;": "\u2ab0", + "succnapprox;": "\u2aba", + "succneqq;": "\u2ab6", + "succnsim;": "\u22e9", + "succsim;": "\u227f", + "sum;": "\u2211", + "sung;": "\u266a", + "sup1": "\xb9", + "sup1;": "\xb9", + "sup2": "\xb2", + "sup2;": "\xb2", + "sup3": "\xb3", + "sup3;": "\xb3", + "sup;": "\u2283", + "supE;": "\u2ac6", + "supdot;": "\u2abe", + "supdsub;": "\u2ad8", + "supe;": "\u2287", + "supedot;": "\u2ac4", + "suphsol;": "\u27c9", + "suphsub;": "\u2ad7", + "suplarr;": "\u297b", + "supmult;": "\u2ac2", + "supnE;": "\u2acc", + "supne;": "\u228b", + "supplus;": "\u2ac0", + "supset;": "\u2283", + "supseteq;": "\u2287", + "supseteqq;": "\u2ac6", + "supsetneq;": "\u228b", + "supsetneqq;": "\u2acc", + "supsim;": "\u2ac8", + "supsub;": "\u2ad4", + "supsup;": "\u2ad6", + "swArr;": "\u21d9", + "swarhk;": "\u2926", + "swarr;": "\u2199", + "swarrow;": "\u2199", + "swnwar;": "\u292a", + "szlig": "\xdf", + "szlig;": "\xdf", + "target;": "\u2316", + "tau;": "\u03c4", + "tbrk;": "\u23b4", + "tcaron;": "\u0165", + "tcedil;": "\u0163", + "tcy;": "\u0442", + "tdot;": "\u20db", + "telrec;": "\u2315", + "tfr;": "\U0001d531", + "there4;": "\u2234", + "therefore;": "\u2234", + "theta;": "\u03b8", + "thetasym;": "\u03d1", + "thetav;": "\u03d1", + "thickapprox;": "\u2248", + "thicksim;": "\u223c", + "thinsp;": "\u2009", + "thkap;": "\u2248", + "thksim;": "\u223c", + "thorn": "\xfe", + "thorn;": "\xfe", + "tilde;": "\u02dc", + "times": "\xd7", + "times;": "\xd7", + "timesb;": "\u22a0", + "timesbar;": "\u2a31", + "timesd;": "\u2a30", + "tint;": "\u222d", + "toea;": "\u2928", + "top;": "\u22a4", + "topbot;": "\u2336", + "topcir;": "\u2af1", + "topf;": "\U0001d565", + "topfork;": "\u2ada", + "tosa;": "\u2929", + "tprime;": "\u2034", + "trade;": "\u2122", + "triangle;": "\u25b5", + "triangledown;": "\u25bf", + "triangleleft;": "\u25c3", + "trianglelefteq;": "\u22b4", + "triangleq;": "\u225c", + "triangleright;": "\u25b9", + "trianglerighteq;": "\u22b5", + "tridot;": "\u25ec", + "trie;": "\u225c", + "triminus;": "\u2a3a", + "triplus;": "\u2a39", + "trisb;": "\u29cd", + "tritime;": "\u2a3b", + "trpezium;": "\u23e2", + "tscr;": "\U0001d4c9", + "tscy;": "\u0446", + "tshcy;": "\u045b", + "tstrok;": "\u0167", + "twixt;": "\u226c", + "twoheadleftarrow;": "\u219e", + "twoheadrightarrow;": "\u21a0", + "uArr;": "\u21d1", + "uHar;": "\u2963", + "uacute": "\xfa", + "uacute;": "\xfa", + "uarr;": "\u2191", + "ubrcy;": "\u045e", + "ubreve;": "\u016d", + "ucirc": "\xfb", + "ucirc;": "\xfb", + "ucy;": "\u0443", + "udarr;": "\u21c5", + "udblac;": "\u0171", + "udhar;": "\u296e", + "ufisht;": "\u297e", + "ufr;": "\U0001d532", + "ugrave": "\xf9", + "ugrave;": "\xf9", + "uharl;": "\u21bf", + "uharr;": "\u21be", + "uhblk;": "\u2580", + "ulcorn;": "\u231c", + "ulcorner;": "\u231c", + "ulcrop;": "\u230f", + "ultri;": "\u25f8", + "umacr;": "\u016b", + "uml": "\xa8", + "uml;": "\xa8", + "uogon;": "\u0173", + "uopf;": "\U0001d566", + "uparrow;": "\u2191", + "updownarrow;": "\u2195", + "upharpoonleft;": "\u21bf", + "upharpoonright;": "\u21be", + "uplus;": "\u228e", + "upsi;": "\u03c5", + "upsih;": "\u03d2", + "upsilon;": "\u03c5", + "upuparrows;": "\u21c8", + "urcorn;": "\u231d", + "urcorner;": "\u231d", + "urcrop;": "\u230e", + "uring;": "\u016f", + "urtri;": "\u25f9", + "uscr;": "\U0001d4ca", + "utdot;": "\u22f0", + "utilde;": "\u0169", + "utri;": "\u25b5", + "utrif;": "\u25b4", + "uuarr;": "\u21c8", + "uuml": "\xfc", + "uuml;": "\xfc", + "uwangle;": "\u29a7", + "vArr;": "\u21d5", + "vBar;": "\u2ae8", + "vBarv;": "\u2ae9", + "vDash;": "\u22a8", + "vangrt;": "\u299c", + "varepsilon;": "\u03f5", + "varkappa;": "\u03f0", + "varnothing;": "\u2205", + "varphi;": "\u03d5", + "varpi;": "\u03d6", + "varpropto;": "\u221d", + "varr;": "\u2195", + "varrho;": "\u03f1", + "varsigma;": "\u03c2", + "varsubsetneq;": "\u228a\ufe00", + "varsubsetneqq;": "\u2acb\ufe00", + "varsupsetneq;": "\u228b\ufe00", + "varsupsetneqq;": "\u2acc\ufe00", + "vartheta;": "\u03d1", + "vartriangleleft;": "\u22b2", + "vartriangleright;": "\u22b3", + "vcy;": "\u0432", + "vdash;": "\u22a2", + "vee;": "\u2228", + "veebar;": "\u22bb", + "veeeq;": "\u225a", + "vellip;": "\u22ee", + "verbar;": "|", + "vert;": "|", + "vfr;": "\U0001d533", + "vltri;": "\u22b2", + "vnsub;": "\u2282\u20d2", + "vnsup;": "\u2283\u20d2", + "vopf;": "\U0001d567", + "vprop;": "\u221d", + "vrtri;": "\u22b3", + "vscr;": "\U0001d4cb", + "vsubnE;": "\u2acb\ufe00", + "vsubne;": "\u228a\ufe00", + "vsupnE;": "\u2acc\ufe00", + "vsupne;": "\u228b\ufe00", + "vzigzag;": "\u299a", + "wcirc;": "\u0175", + "wedbar;": "\u2a5f", + "wedge;": "\u2227", + "wedgeq;": "\u2259", + "weierp;": "\u2118", + "wfr;": "\U0001d534", + "wopf;": "\U0001d568", + "wp;": "\u2118", + "wr;": "\u2240", + "wreath;": "\u2240", + "wscr;": "\U0001d4cc", + "xcap;": "\u22c2", + "xcirc;": "\u25ef", + "xcup;": "\u22c3", + "xdtri;": "\u25bd", + "xfr;": "\U0001d535", + "xhArr;": "\u27fa", + "xharr;": "\u27f7", + "xi;": "\u03be", + "xlArr;": "\u27f8", + "xlarr;": "\u27f5", + "xmap;": "\u27fc", + "xnis;": "\u22fb", + "xodot;": "\u2a00", + "xopf;": "\U0001d569", + "xoplus;": "\u2a01", + "xotime;": "\u2a02", + "xrArr;": "\u27f9", + "xrarr;": "\u27f6", + "xscr;": "\U0001d4cd", + "xsqcup;": "\u2a06", + "xuplus;": "\u2a04", + "xutri;": "\u25b3", + "xvee;": "\u22c1", + "xwedge;": "\u22c0", + "yacute": "\xfd", + "yacute;": "\xfd", + "yacy;": "\u044f", + "ycirc;": "\u0177", + "ycy;": "\u044b", + "yen": "\xa5", + "yen;": "\xa5", + "yfr;": "\U0001d536", + "yicy;": "\u0457", + "yopf;": "\U0001d56a", + "yscr;": "\U0001d4ce", + "yucy;": "\u044e", + "yuml": "\xff", + "yuml;": "\xff", + "zacute;": "\u017a", + "zcaron;": "\u017e", + "zcy;": "\u0437", + "zdot;": "\u017c", + "zeetrf;": "\u2128", + "zeta;": "\u03b6", + "zfr;": "\U0001d537", + "zhcy;": "\u0436", + "zigrarr;": "\u21dd", + "zopf;": "\U0001d56b", + "zscr;": "\U0001d4cf", + "zwj;": "\u200d", + "zwnj;": "\u200c", +} + +replacementCharacters = { + 0x0: "\uFFFD", + 0x0d: "\u000D", + 0x80: "\u20AC", + 0x81: "\u0081", + 0x82: "\u201A", + 0x83: "\u0192", + 0x84: "\u201E", + 0x85: "\u2026", + 0x86: "\u2020", + 0x87: "\u2021", + 0x88: "\u02C6", + 0x89: "\u2030", + 0x8A: "\u0160", + 0x8B: "\u2039", + 0x8C: "\u0152", + 0x8D: "\u008D", + 0x8E: "\u017D", + 0x8F: "\u008F", + 0x90: "\u0090", + 0x91: "\u2018", + 0x92: "\u2019", + 0x93: "\u201C", + 0x94: "\u201D", + 0x95: "\u2022", + 0x96: "\u2013", + 0x97: "\u2014", + 0x98: "\u02DC", + 0x99: "\u2122", + 0x9A: "\u0161", + 0x9B: "\u203A", + 0x9C: "\u0153", + 0x9D: "\u009D", + 0x9E: "\u017E", + 0x9F: "\u0178", +} + +tokenTypes = { + "Doctype": 0, + "Characters": 1, + "SpaceCharacters": 2, + "StartTag": 3, + "EndTag": 4, + "EmptyTag": 5, + "Comment": 6, + "ParseError": 7 +} + +tagTokenTypes = frozenset([tokenTypes["StartTag"], tokenTypes["EndTag"], + tokenTypes["EmptyTag"]]) + + +prefixes = dict([(v, k) for k, v in namespaces.items()]) +prefixes["http://www.w3.org/1998/Math/MathML"] = "math" + + +class DataLossWarning(UserWarning): + """Raised when the current tree is unable to represent the input data""" + pass + + +class _ReparseException(Exception): + pass diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/html5lib/filters/__init__.py b/venv.bak/lib/python3.7/site-packages/pip/_vendor/html5lib/filters/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/html5lib/filters/__pycache__/__init__.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_vendor/html5lib/filters/__pycache__/__init__.cpython-37.pyc new file mode 100644 index 0000000..d7eaf0d Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_vendor/html5lib/filters/__pycache__/__init__.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/html5lib/filters/__pycache__/alphabeticalattributes.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_vendor/html5lib/filters/__pycache__/alphabeticalattributes.cpython-37.pyc new file mode 100644 index 0000000..7cc5480 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_vendor/html5lib/filters/__pycache__/alphabeticalattributes.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/html5lib/filters/__pycache__/base.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_vendor/html5lib/filters/__pycache__/base.cpython-37.pyc new file mode 100644 index 0000000..3233ff0 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_vendor/html5lib/filters/__pycache__/base.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/html5lib/filters/__pycache__/inject_meta_charset.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_vendor/html5lib/filters/__pycache__/inject_meta_charset.cpython-37.pyc new file mode 100644 index 0000000..6e8c564 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_vendor/html5lib/filters/__pycache__/inject_meta_charset.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/html5lib/filters/__pycache__/lint.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_vendor/html5lib/filters/__pycache__/lint.cpython-37.pyc new file mode 100644 index 0000000..41d744d Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_vendor/html5lib/filters/__pycache__/lint.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/html5lib/filters/__pycache__/optionaltags.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_vendor/html5lib/filters/__pycache__/optionaltags.cpython-37.pyc new file mode 100644 index 0000000..d1fd76d Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_vendor/html5lib/filters/__pycache__/optionaltags.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/html5lib/filters/__pycache__/sanitizer.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_vendor/html5lib/filters/__pycache__/sanitizer.cpython-37.pyc new file mode 100644 index 0000000..ab1e602 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_vendor/html5lib/filters/__pycache__/sanitizer.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/html5lib/filters/__pycache__/whitespace.cpython-37.pyc b/venv.bak/lib/python3.7/site-packages/pip/_vendor/html5lib/filters/__pycache__/whitespace.cpython-37.pyc new file mode 100644 index 0000000..93751d5 Binary files /dev/null and b/venv.bak/lib/python3.7/site-packages/pip/_vendor/html5lib/filters/__pycache__/whitespace.cpython-37.pyc differ diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/html5lib/filters/alphabeticalattributes.py b/venv.bak/lib/python3.7/site-packages/pip/_vendor/html5lib/filters/alphabeticalattributes.py new file mode 100644 index 0000000..5ba926e --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_vendor/html5lib/filters/alphabeticalattributes.py @@ -0,0 +1,29 @@ +from __future__ import absolute_import, division, unicode_literals + +from . import base + +from collections import OrderedDict + + +def _attr_key(attr): + """Return an appropriate key for an attribute for sorting + + Attributes have a namespace that can be either ``None`` or a string. We + can't compare the two because they're different types, so we convert + ``None`` to an empty string first. + + """ + return (attr[0][0] or ''), attr[0][1] + + +class Filter(base.Filter): + """Alphabetizes attributes for elements""" + def __iter__(self): + for token in base.Filter.__iter__(self): + if token["type"] in ("StartTag", "EmptyTag"): + attrs = OrderedDict() + for name, value in sorted(token["data"].items(), + key=_attr_key): + attrs[name] = value + token["data"] = attrs + yield token diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/html5lib/filters/base.py b/venv.bak/lib/python3.7/site-packages/pip/_vendor/html5lib/filters/base.py new file mode 100644 index 0000000..c7dbaed --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_vendor/html5lib/filters/base.py @@ -0,0 +1,12 @@ +from __future__ import absolute_import, division, unicode_literals + + +class Filter(object): + def __init__(self, source): + self.source = source + + def __iter__(self): + return iter(self.source) + + def __getattr__(self, name): + return getattr(self.source, name) diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/html5lib/filters/inject_meta_charset.py b/venv.bak/lib/python3.7/site-packages/pip/_vendor/html5lib/filters/inject_meta_charset.py new file mode 100644 index 0000000..aefb5c8 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_vendor/html5lib/filters/inject_meta_charset.py @@ -0,0 +1,73 @@ +from __future__ import absolute_import, division, unicode_literals + +from . import base + + +class Filter(base.Filter): + """Injects ```` tag into head of document""" + def __init__(self, source, encoding): + """Creates a Filter + + :arg source: the source token stream + + :arg encoding: the encoding to set + + """ + base.Filter.__init__(self, source) + self.encoding = encoding + + def __iter__(self): + state = "pre_head" + meta_found = (self.encoding is None) + pending = [] + + for token in base.Filter.__iter__(self): + type = token["type"] + if type == "StartTag": + if token["name"].lower() == "head": + state = "in_head" + + elif type == "EmptyTag": + if token["name"].lower() == "meta": + # replace charset with actual encoding + has_http_equiv_content_type = False + for (namespace, name), value in token["data"].items(): + if namespace is not None: + continue + elif name.lower() == 'charset': + token["data"][(namespace, name)] = self.encoding + meta_found = True + break + elif name == 'http-equiv' and value.lower() == 'content-type': + has_http_equiv_content_type = True + else: + if has_http_equiv_content_type and (None, "content") in token["data"]: + token["data"][(None, "content")] = 'text/html; charset=%s' % self.encoding + meta_found = True + + elif token["name"].lower() == "head" and not meta_found: + # insert meta into empty head + yield {"type": "StartTag", "name": "head", + "data": token["data"]} + yield {"type": "EmptyTag", "name": "meta", + "data": {(None, "charset"): self.encoding}} + yield {"type": "EndTag", "name": "head"} + meta_found = True + continue + + elif type == "EndTag": + if token["name"].lower() == "head" and pending: + # insert meta into head (if necessary) and flush pending queue + yield pending.pop(0) + if not meta_found: + yield {"type": "EmptyTag", "name": "meta", + "data": {(None, "charset"): self.encoding}} + while pending: + yield pending.pop(0) + meta_found = True + state = "post_head" + + if state == "in_head": + pending.append(token) + else: + yield token diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/html5lib/filters/lint.py b/venv.bak/lib/python3.7/site-packages/pip/_vendor/html5lib/filters/lint.py new file mode 100644 index 0000000..fcc07ee --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_vendor/html5lib/filters/lint.py @@ -0,0 +1,93 @@ +from __future__ import absolute_import, division, unicode_literals + +from pip._vendor.six import text_type + +from . import base +from ..constants import namespaces, voidElements + +from ..constants import spaceCharacters +spaceCharacters = "".join(spaceCharacters) + + +class Filter(base.Filter): + """Lints the token stream for errors + + If it finds any errors, it'll raise an ``AssertionError``. + + """ + def __init__(self, source, require_matching_tags=True): + """Creates a Filter + + :arg source: the source token stream + + :arg require_matching_tags: whether or not to require matching tags + + """ + super(Filter, self).__init__(source) + self.require_matching_tags = require_matching_tags + + def __iter__(self): + open_elements = [] + for token in base.Filter.__iter__(self): + type = token["type"] + if type in ("StartTag", "EmptyTag"): + namespace = token["namespace"] + name = token["name"] + assert namespace is None or isinstance(namespace, text_type) + assert namespace != "" + assert isinstance(name, text_type) + assert name != "" + assert isinstance(token["data"], dict) + if (not namespace or namespace == namespaces["html"]) and name in voidElements: + assert type == "EmptyTag" + else: + assert type == "StartTag" + if type == "StartTag" and self.require_matching_tags: + open_elements.append((namespace, name)) + for (namespace, name), value in token["data"].items(): + assert namespace is None or isinstance(namespace, text_type) + assert namespace != "" + assert isinstance(name, text_type) + assert name != "" + assert isinstance(value, text_type) + + elif type == "EndTag": + namespace = token["namespace"] + name = token["name"] + assert namespace is None or isinstance(namespace, text_type) + assert namespace != "" + assert isinstance(name, text_type) + assert name != "" + if (not namespace or namespace == namespaces["html"]) and name in voidElements: + assert False, "Void element reported as EndTag token: %(tag)s" % {"tag": name} + elif self.require_matching_tags: + start = open_elements.pop() + assert start == (namespace, name) + + elif type == "Comment": + data = token["data"] + assert isinstance(data, text_type) + + elif type in ("Characters", "SpaceCharacters"): + data = token["data"] + assert isinstance(data, text_type) + assert data != "" + if type == "SpaceCharacters": + assert data.strip(spaceCharacters) == "" + + elif type == "Doctype": + name = token["name"] + assert name is None or isinstance(name, text_type) + assert token["publicId"] is None or isinstance(name, text_type) + assert token["systemId"] is None or isinstance(name, text_type) + + elif type == "Entity": + assert isinstance(token["name"], text_type) + + elif type == "SerializerError": + assert isinstance(token["data"], text_type) + + else: + assert False, "Unknown token type: %(type)s" % {"type": type} + + yield token diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/html5lib/filters/optionaltags.py b/venv.bak/lib/python3.7/site-packages/pip/_vendor/html5lib/filters/optionaltags.py new file mode 100644 index 0000000..4a86501 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_vendor/html5lib/filters/optionaltags.py @@ -0,0 +1,207 @@ +from __future__ import absolute_import, division, unicode_literals + +from . import base + + +class Filter(base.Filter): + """Removes optional tags from the token stream""" + def slider(self): + previous1 = previous2 = None + for token in self.source: + if previous1 is not None: + yield previous2, previous1, token + previous2 = previous1 + previous1 = token + if previous1 is not None: + yield previous2, previous1, None + + def __iter__(self): + for previous, token, next in self.slider(): + type = token["type"] + if type == "StartTag": + if (token["data"] or + not self.is_optional_start(token["name"], previous, next)): + yield token + elif type == "EndTag": + if not self.is_optional_end(token["name"], next): + yield token + else: + yield token + + def is_optional_start(self, tagname, previous, next): + type = next and next["type"] or None + if tagname in 'html': + # An html element's start tag may be omitted if the first thing + # inside the html element is not a space character or a comment. + return type not in ("Comment", "SpaceCharacters") + elif tagname == 'head': + # A head element's start tag may be omitted if the first thing + # inside the head element is an element. + # XXX: we also omit the start tag if the head element is empty + if type in ("StartTag", "EmptyTag"): + return True + elif type == "EndTag": + return next["name"] == "head" + elif tagname == 'body': + # A body element's start tag may be omitted if the first thing + # inside the body element is not a space character or a comment, + # except if the first thing inside the body element is a script + # or style element and the node immediately preceding the body + # element is a head element whose end tag has been omitted. + if type in ("Comment", "SpaceCharacters"): + return False + elif type == "StartTag": + # XXX: we do not look at the preceding event, so we never omit + # the body element's start tag if it's followed by a script or + # a style element. + return next["name"] not in ('script', 'style') + else: + return True + elif tagname == 'colgroup': + # A colgroup element's start tag may be omitted if the first thing + # inside the colgroup element is a col element, and if the element + # is not immediately preceded by another colgroup element whose + # end tag has been omitted. + if type in ("StartTag", "EmptyTag"): + # XXX: we do not look at the preceding event, so instead we never + # omit the colgroup element's end tag when it is immediately + # followed by another colgroup element. See is_optional_end. + return next["name"] == "col" + else: + return False + elif tagname == 'tbody': + # A tbody element's start tag may be omitted if the first thing + # inside the tbody element is a tr element, and if the element is + # not immediately preceded by a tbody, thead, or tfoot element + # whose end tag has been omitted. + if type == "StartTag": + # omit the thead and tfoot elements' end tag when they are + # immediately followed by a tbody element. See is_optional_end. + if previous and previous['type'] == 'EndTag' and \ + previous['name'] in ('tbody', 'thead', 'tfoot'): + return False + return next["name"] == 'tr' + else: + return False + return False + + def is_optional_end(self, tagname, next): + type = next and next["type"] or None + if tagname in ('html', 'head', 'body'): + # An html element's end tag may be omitted if the html element + # is not immediately followed by a space character or a comment. + return type not in ("Comment", "SpaceCharacters") + elif tagname in ('li', 'optgroup', 'tr'): + # A li element's end tag may be omitted if the li element is + # immediately followed by another li element or if there is + # no more content in the parent element. + # An optgroup element's end tag may be omitted if the optgroup + # element is immediately followed by another optgroup element, + # or if there is no more content in the parent element. + # A tr element's end tag may be omitted if the tr element is + # immediately followed by another tr element, or if there is + # no more content in the parent element. + if type == "StartTag": + return next["name"] == tagname + else: + return type == "EndTag" or type is None + elif tagname in ('dt', 'dd'): + # A dt element's end tag may be omitted if the dt element is + # immediately followed by another dt element or a dd element. + # A dd element's end tag may be omitted if the dd element is + # immediately followed by another dd element or a dt element, + # or if there is no more content in the parent element. + if type == "StartTag": + return next["name"] in ('dt', 'dd') + elif tagname == 'dd': + return type == "EndTag" or type is None + else: + return False + elif tagname == 'p': + # A p element's end tag may be omitted if the p element is + # immediately followed by an address, article, aside, + # blockquote, datagrid, dialog, dir, div, dl, fieldset, + # footer, form, h1, h2, h3, h4, h5, h6, header, hr, menu, + # nav, ol, p, pre, section, table, or ul, element, or if + # there is no more content in the parent element. + if type in ("StartTag", "EmptyTag"): + return next["name"] in ('address', 'article', 'aside', + 'blockquote', 'datagrid', 'dialog', + 'dir', 'div', 'dl', 'fieldset', 'footer', + 'form', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', + 'header', 'hr', 'menu', 'nav', 'ol', + 'p', 'pre', 'section', 'table', 'ul') + else: + return type == "EndTag" or type is None + elif tagname == 'option': + # An option element's end tag may be omitted if the option + # element is immediately followed by another option element, + # or if it is immediately followed by an optgroup + # element, or if there is no more content in the parent + # element. + if type == "StartTag": + return next["name"] in ('option', 'optgroup') + else: + return type == "EndTag" or type is None + elif tagname in ('rt', 'rp'): + # An rt element's end tag may be omitted if the rt element is + # immediately followed by an rt or rp element, or if there is + # no more content in the parent element. + # An rp element's end tag may be omitted if the rp element is + # immediately followed by an rt or rp element, or if there is + # no more content in the parent element. + if type == "StartTag": + return next["name"] in ('rt', 'rp') + else: + return type == "EndTag" or type is None + elif tagname == 'colgroup': + # A colgroup element's end tag may be omitted if the colgroup + # element is not immediately followed by a space character or + # a comment. + if type in ("Comment", "SpaceCharacters"): + return False + elif type == "StartTag": + # XXX: we also look for an immediately following colgroup + # element. See is_optional_start. + return next["name"] != 'colgroup' + else: + return True + elif tagname in ('thead', 'tbody'): + # A thead element's end tag may be omitted if the thead element + # is immediately followed by a tbody or tfoot element. + # A tbody element's end tag may be omitted if the tbody element + # is immediately followed by a tbody or tfoot element, or if + # there is no more content in the parent element. + # A tfoot element's end tag may be omitted if the tfoot element + # is immediately followed by a tbody element, or if there is no + # more content in the parent element. + # XXX: we never omit the end tag when the following element is + # a tbody. See is_optional_start. + if type == "StartTag": + return next["name"] in ['tbody', 'tfoot'] + elif tagname == 'tbody': + return type == "EndTag" or type is None + else: + return False + elif tagname == 'tfoot': + # A tfoot element's end tag may be omitted if the tfoot element + # is immediately followed by a tbody element, or if there is no + # more content in the parent element. + # XXX: we never omit the end tag when the following element is + # a tbody. See is_optional_start. + if type == "StartTag": + return next["name"] == 'tbody' + else: + return type == "EndTag" or type is None + elif tagname in ('td', 'th'): + # A td element's end tag may be omitted if the td element is + # immediately followed by a td or th element, or if there is + # no more content in the parent element. + # A th element's end tag may be omitted if the th element is + # immediately followed by a td or th element, or if there is + # no more content in the parent element. + if type == "StartTag": + return next["name"] in ('td', 'th') + else: + return type == "EndTag" or type is None + return False diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/html5lib/filters/sanitizer.py b/venv.bak/lib/python3.7/site-packages/pip/_vendor/html5lib/filters/sanitizer.py new file mode 100644 index 0000000..af8e77b --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_vendor/html5lib/filters/sanitizer.py @@ -0,0 +1,896 @@ +from __future__ import absolute_import, division, unicode_literals + +import re +from xml.sax.saxutils import escape, unescape + +from pip._vendor.six.moves import urllib_parse as urlparse + +from . import base +from ..constants import namespaces, prefixes + +__all__ = ["Filter"] + + +allowed_elements = frozenset(( + (namespaces['html'], 'a'), + (namespaces['html'], 'abbr'), + (namespaces['html'], 'acronym'), + (namespaces['html'], 'address'), + (namespaces['html'], 'area'), + (namespaces['html'], 'article'), + (namespaces['html'], 'aside'), + (namespaces['html'], 'audio'), + (namespaces['html'], 'b'), + (namespaces['html'], 'big'), + (namespaces['html'], 'blockquote'), + (namespaces['html'], 'br'), + (namespaces['html'], 'button'), + (namespaces['html'], 'canvas'), + (namespaces['html'], 'caption'), + (namespaces['html'], 'center'), + (namespaces['html'], 'cite'), + (namespaces['html'], 'code'), + (namespaces['html'], 'col'), + (namespaces['html'], 'colgroup'), + (namespaces['html'], 'command'), + (namespaces['html'], 'datagrid'), + (namespaces['html'], 'datalist'), + (namespaces['html'], 'dd'), + (namespaces['html'], 'del'), + (namespaces['html'], 'details'), + (namespaces['html'], 'dfn'), + (namespaces['html'], 'dialog'), + (namespaces['html'], 'dir'), + (namespaces['html'], 'div'), + (namespaces['html'], 'dl'), + (namespaces['html'], 'dt'), + (namespaces['html'], 'em'), + (namespaces['html'], 'event-source'), + (namespaces['html'], 'fieldset'), + (namespaces['html'], 'figcaption'), + (namespaces['html'], 'figure'), + (namespaces['html'], 'footer'), + (namespaces['html'], 'font'), + (namespaces['html'], 'form'), + (namespaces['html'], 'header'), + (namespaces['html'], 'h1'), + (namespaces['html'], 'h2'), + (namespaces['html'], 'h3'), + (namespaces['html'], 'h4'), + (namespaces['html'], 'h5'), + (namespaces['html'], 'h6'), + (namespaces['html'], 'hr'), + (namespaces['html'], 'i'), + (namespaces['html'], 'img'), + (namespaces['html'], 'input'), + (namespaces['html'], 'ins'), + (namespaces['html'], 'keygen'), + (namespaces['html'], 'kbd'), + (namespaces['html'], 'label'), + (namespaces['html'], 'legend'), + (namespaces['html'], 'li'), + (namespaces['html'], 'm'), + (namespaces['html'], 'map'), + (namespaces['html'], 'menu'), + (namespaces['html'], 'meter'), + (namespaces['html'], 'multicol'), + (namespaces['html'], 'nav'), + (namespaces['html'], 'nextid'), + (namespaces['html'], 'ol'), + (namespaces['html'], 'output'), + (namespaces['html'], 'optgroup'), + (namespaces['html'], 'option'), + (namespaces['html'], 'p'), + (namespaces['html'], 'pre'), + (namespaces['html'], 'progress'), + (namespaces['html'], 'q'), + (namespaces['html'], 's'), + (namespaces['html'], 'samp'), + (namespaces['html'], 'section'), + (namespaces['html'], 'select'), + (namespaces['html'], 'small'), + (namespaces['html'], 'sound'), + (namespaces['html'], 'source'), + (namespaces['html'], 'spacer'), + (namespaces['html'], 'span'), + (namespaces['html'], 'strike'), + (namespaces['html'], 'strong'), + (namespaces['html'], 'sub'), + (namespaces['html'], 'sup'), + (namespaces['html'], 'table'), + (namespaces['html'], 'tbody'), + (namespaces['html'], 'td'), + (namespaces['html'], 'textarea'), + (namespaces['html'], 'time'), + (namespaces['html'], 'tfoot'), + (namespaces['html'], 'th'), + (namespaces['html'], 'thead'), + (namespaces['html'], 'tr'), + (namespaces['html'], 'tt'), + (namespaces['html'], 'u'), + (namespaces['html'], 'ul'), + (namespaces['html'], 'var'), + (namespaces['html'], 'video'), + (namespaces['mathml'], 'maction'), + (namespaces['mathml'], 'math'), + (namespaces['mathml'], 'merror'), + (namespaces['mathml'], 'mfrac'), + (namespaces['mathml'], 'mi'), + (namespaces['mathml'], 'mmultiscripts'), + (namespaces['mathml'], 'mn'), + (namespaces['mathml'], 'mo'), + (namespaces['mathml'], 'mover'), + (namespaces['mathml'], 'mpadded'), + (namespaces['mathml'], 'mphantom'), + (namespaces['mathml'], 'mprescripts'), + (namespaces['mathml'], 'mroot'), + (namespaces['mathml'], 'mrow'), + (namespaces['mathml'], 'mspace'), + (namespaces['mathml'], 'msqrt'), + (namespaces['mathml'], 'mstyle'), + (namespaces['mathml'], 'msub'), + (namespaces['mathml'], 'msubsup'), + (namespaces['mathml'], 'msup'), + (namespaces['mathml'], 'mtable'), + (namespaces['mathml'], 'mtd'), + (namespaces['mathml'], 'mtext'), + (namespaces['mathml'], 'mtr'), + (namespaces['mathml'], 'munder'), + (namespaces['mathml'], 'munderover'), + (namespaces['mathml'], 'none'), + (namespaces['svg'], 'a'), + (namespaces['svg'], 'animate'), + (namespaces['svg'], 'animateColor'), + (namespaces['svg'], 'animateMotion'), + (namespaces['svg'], 'animateTransform'), + (namespaces['svg'], 'clipPath'), + (namespaces['svg'], 'circle'), + (namespaces['svg'], 'defs'), + (namespaces['svg'], 'desc'), + (namespaces['svg'], 'ellipse'), + (namespaces['svg'], 'font-face'), + (namespaces['svg'], 'font-face-name'), + (namespaces['svg'], 'font-face-src'), + (namespaces['svg'], 'g'), + (namespaces['svg'], 'glyph'), + (namespaces['svg'], 'hkern'), + (namespaces['svg'], 'linearGradient'), + (namespaces['svg'], 'line'), + (namespaces['svg'], 'marker'), + (namespaces['svg'], 'metadata'), + (namespaces['svg'], 'missing-glyph'), + (namespaces['svg'], 'mpath'), + (namespaces['svg'], 'path'), + (namespaces['svg'], 'polygon'), + (namespaces['svg'], 'polyline'), + (namespaces['svg'], 'radialGradient'), + (namespaces['svg'], 'rect'), + (namespaces['svg'], 'set'), + (namespaces['svg'], 'stop'), + (namespaces['svg'], 'svg'), + (namespaces['svg'], 'switch'), + (namespaces['svg'], 'text'), + (namespaces['svg'], 'title'), + (namespaces['svg'], 'tspan'), + (namespaces['svg'], 'use'), +)) + +allowed_attributes = frozenset(( + # HTML attributes + (None, 'abbr'), + (None, 'accept'), + (None, 'accept-charset'), + (None, 'accesskey'), + (None, 'action'), + (None, 'align'), + (None, 'alt'), + (None, 'autocomplete'), + (None, 'autofocus'), + (None, 'axis'), + (None, 'background'), + (None, 'balance'), + (None, 'bgcolor'), + (None, 'bgproperties'), + (None, 'border'), + (None, 'bordercolor'), + (None, 'bordercolordark'), + (None, 'bordercolorlight'), + (None, 'bottompadding'), + (None, 'cellpadding'), + (None, 'cellspacing'), + (None, 'ch'), + (None, 'challenge'), + (None, 'char'), + (None, 'charoff'), + (None, 'choff'), + (None, 'charset'), + (None, 'checked'), + (None, 'cite'), + (None, 'class'), + (None, 'clear'), + (None, 'color'), + (None, 'cols'), + (None, 'colspan'), + (None, 'compact'), + (None, 'contenteditable'), + (None, 'controls'), + (None, 'coords'), + (None, 'data'), + (None, 'datafld'), + (None, 'datapagesize'), + (None, 'datasrc'), + (None, 'datetime'), + (None, 'default'), + (None, 'delay'), + (None, 'dir'), + (None, 'disabled'), + (None, 'draggable'), + (None, 'dynsrc'), + (None, 'enctype'), + (None, 'end'), + (None, 'face'), + (None, 'for'), + (None, 'form'), + (None, 'frame'), + (None, 'galleryimg'), + (None, 'gutter'), + (None, 'headers'), + (None, 'height'), + (None, 'hidefocus'), + (None, 'hidden'), + (None, 'high'), + (None, 'href'), + (None, 'hreflang'), + (None, 'hspace'), + (None, 'icon'), + (None, 'id'), + (None, 'inputmode'), + (None, 'ismap'), + (None, 'keytype'), + (None, 'label'), + (None, 'leftspacing'), + (None, 'lang'), + (None, 'list'), + (None, 'longdesc'), + (None, 'loop'), + (None, 'loopcount'), + (None, 'loopend'), + (None, 'loopstart'), + (None, 'low'), + (None, 'lowsrc'), + (None, 'max'), + (None, 'maxlength'), + (None, 'media'), + (None, 'method'), + (None, 'min'), + (None, 'multiple'), + (None, 'name'), + (None, 'nohref'), + (None, 'noshade'), + (None, 'nowrap'), + (None, 'open'), + (None, 'optimum'), + (None, 'pattern'), + (None, 'ping'), + (None, 'point-size'), + (None, 'poster'), + (None, 'pqg'), + (None, 'preload'), + (None, 'prompt'), + (None, 'radiogroup'), + (None, 'readonly'), + (None, 'rel'), + (None, 'repeat-max'), + (None, 'repeat-min'), + (None, 'replace'), + (None, 'required'), + (None, 'rev'), + (None, 'rightspacing'), + (None, 'rows'), + (None, 'rowspan'), + (None, 'rules'), + (None, 'scope'), + (None, 'selected'), + (None, 'shape'), + (None, 'size'), + (None, 'span'), + (None, 'src'), + (None, 'start'), + (None, 'step'), + (None, 'style'), + (None, 'summary'), + (None, 'suppress'), + (None, 'tabindex'), + (None, 'target'), + (None, 'template'), + (None, 'title'), + (None, 'toppadding'), + (None, 'type'), + (None, 'unselectable'), + (None, 'usemap'), + (None, 'urn'), + (None, 'valign'), + (None, 'value'), + (None, 'variable'), + (None, 'volume'), + (None, 'vspace'), + (None, 'vrml'), + (None, 'width'), + (None, 'wrap'), + (namespaces['xml'], 'lang'), + # MathML attributes + (None, 'actiontype'), + (None, 'align'), + (None, 'columnalign'), + (None, 'columnalign'), + (None, 'columnalign'), + (None, 'columnlines'), + (None, 'columnspacing'), + (None, 'columnspan'), + (None, 'depth'), + (None, 'display'), + (None, 'displaystyle'), + (None, 'equalcolumns'), + (None, 'equalrows'), + (None, 'fence'), + (None, 'fontstyle'), + (None, 'fontweight'), + (None, 'frame'), + (None, 'height'), + (None, 'linethickness'), + (None, 'lspace'), + (None, 'mathbackground'), + (None, 'mathcolor'), + (None, 'mathvariant'), + (None, 'mathvariant'), + (None, 'maxsize'), + (None, 'minsize'), + (None, 'other'), + (None, 'rowalign'), + (None, 'rowalign'), + (None, 'rowalign'), + (None, 'rowlines'), + (None, 'rowspacing'), + (None, 'rowspan'), + (None, 'rspace'), + (None, 'scriptlevel'), + (None, 'selection'), + (None, 'separator'), + (None, 'stretchy'), + (None, 'width'), + (None, 'width'), + (namespaces['xlink'], 'href'), + (namespaces['xlink'], 'show'), + (namespaces['xlink'], 'type'), + # SVG attributes + (None, 'accent-height'), + (None, 'accumulate'), + (None, 'additive'), + (None, 'alphabetic'), + (None, 'arabic-form'), + (None, 'ascent'), + (None, 'attributeName'), + (None, 'attributeType'), + (None, 'baseProfile'), + (None, 'bbox'), + (None, 'begin'), + (None, 'by'), + (None, 'calcMode'), + (None, 'cap-height'), + (None, 'class'), + (None, 'clip-path'), + (None, 'color'), + (None, 'color-rendering'), + (None, 'content'), + (None, 'cx'), + (None, 'cy'), + (None, 'd'), + (None, 'dx'), + (None, 'dy'), + (None, 'descent'), + (None, 'display'), + (None, 'dur'), + (None, 'end'), + (None, 'fill'), + (None, 'fill-opacity'), + (None, 'fill-rule'), + (None, 'font-family'), + (None, 'font-size'), + (None, 'font-stretch'), + (None, 'font-style'), + (None, 'font-variant'), + (None, 'font-weight'), + (None, 'from'), + (None, 'fx'), + (None, 'fy'), + (None, 'g1'), + (None, 'g2'), + (None, 'glyph-name'), + (None, 'gradientUnits'), + (None, 'hanging'), + (None, 'height'), + (None, 'horiz-adv-x'), + (None, 'horiz-origin-x'), + (None, 'id'), + (None, 'ideographic'), + (None, 'k'), + (None, 'keyPoints'), + (None, 'keySplines'), + (None, 'keyTimes'), + (None, 'lang'), + (None, 'marker-end'), + (None, 'marker-mid'), + (None, 'marker-start'), + (None, 'markerHeight'), + (None, 'markerUnits'), + (None, 'markerWidth'), + (None, 'mathematical'), + (None, 'max'), + (None, 'min'), + (None, 'name'), + (None, 'offset'), + (None, 'opacity'), + (None, 'orient'), + (None, 'origin'), + (None, 'overline-position'), + (None, 'overline-thickness'), + (None, 'panose-1'), + (None, 'path'), + (None, 'pathLength'), + (None, 'points'), + (None, 'preserveAspectRatio'), + (None, 'r'), + (None, 'refX'), + (None, 'refY'), + (None, 'repeatCount'), + (None, 'repeatDur'), + (None, 'requiredExtensions'), + (None, 'requiredFeatures'), + (None, 'restart'), + (None, 'rotate'), + (None, 'rx'), + (None, 'ry'), + (None, 'slope'), + (None, 'stemh'), + (None, 'stemv'), + (None, 'stop-color'), + (None, 'stop-opacity'), + (None, 'strikethrough-position'), + (None, 'strikethrough-thickness'), + (None, 'stroke'), + (None, 'stroke-dasharray'), + (None, 'stroke-dashoffset'), + (None, 'stroke-linecap'), + (None, 'stroke-linejoin'), + (None, 'stroke-miterlimit'), + (None, 'stroke-opacity'), + (None, 'stroke-width'), + (None, 'systemLanguage'), + (None, 'target'), + (None, 'text-anchor'), + (None, 'to'), + (None, 'transform'), + (None, 'type'), + (None, 'u1'), + (None, 'u2'), + (None, 'underline-position'), + (None, 'underline-thickness'), + (None, 'unicode'), + (None, 'unicode-range'), + (None, 'units-per-em'), + (None, 'values'), + (None, 'version'), + (None, 'viewBox'), + (None, 'visibility'), + (None, 'width'), + (None, 'widths'), + (None, 'x'), + (None, 'x-height'), + (None, 'x1'), + (None, 'x2'), + (namespaces['xlink'], 'actuate'), + (namespaces['xlink'], 'arcrole'), + (namespaces['xlink'], 'href'), + (namespaces['xlink'], 'role'), + (namespaces['xlink'], 'show'), + (namespaces['xlink'], 'title'), + (namespaces['xlink'], 'type'), + (namespaces['xml'], 'base'), + (namespaces['xml'], 'lang'), + (namespaces['xml'], 'space'), + (None, 'y'), + (None, 'y1'), + (None, 'y2'), + (None, 'zoomAndPan'), +)) + +attr_val_is_uri = frozenset(( + (None, 'href'), + (None, 'src'), + (None, 'cite'), + (None, 'action'), + (None, 'longdesc'), + (None, 'poster'), + (None, 'background'), + (None, 'datasrc'), + (None, 'dynsrc'), + (None, 'lowsrc'), + (None, 'ping'), + (namespaces['xlink'], 'href'), + (namespaces['xml'], 'base'), +)) + +svg_attr_val_allows_ref = frozenset(( + (None, 'clip-path'), + (None, 'color-profile'), + (None, 'cursor'), + (None, 'fill'), + (None, 'filter'), + (None, 'marker'), + (None, 'marker-start'), + (None, 'marker-mid'), + (None, 'marker-end'), + (None, 'mask'), + (None, 'stroke'), +)) + +svg_allow_local_href = frozenset(( + (None, 'altGlyph'), + (None, 'animate'), + (None, 'animateColor'), + (None, 'animateMotion'), + (None, 'animateTransform'), + (None, 'cursor'), + (None, 'feImage'), + (None, 'filter'), + (None, 'linearGradient'), + (None, 'pattern'), + (None, 'radialGradient'), + (None, 'textpath'), + (None, 'tref'), + (None, 'set'), + (None, 'use') +)) + +allowed_css_properties = frozenset(( + 'azimuth', + 'background-color', + 'border-bottom-color', + 'border-collapse', + 'border-color', + 'border-left-color', + 'border-right-color', + 'border-top-color', + 'clear', + 'color', + 'cursor', + 'direction', + 'display', + 'elevation', + 'float', + 'font', + 'font-family', + 'font-size', + 'font-style', + 'font-variant', + 'font-weight', + 'height', + 'letter-spacing', + 'line-height', + 'overflow', + 'pause', + 'pause-after', + 'pause-before', + 'pitch', + 'pitch-range', + 'richness', + 'speak', + 'speak-header', + 'speak-numeral', + 'speak-punctuation', + 'speech-rate', + 'stress', + 'text-align', + 'text-decoration', + 'text-indent', + 'unicode-bidi', + 'vertical-align', + 'voice-family', + 'volume', + 'white-space', + 'width', +)) + +allowed_css_keywords = frozenset(( + 'auto', + 'aqua', + 'black', + 'block', + 'blue', + 'bold', + 'both', + 'bottom', + 'brown', + 'center', + 'collapse', + 'dashed', + 'dotted', + 'fuchsia', + 'gray', + 'green', + '!important', + 'italic', + 'left', + 'lime', + 'maroon', + 'medium', + 'none', + 'navy', + 'normal', + 'nowrap', + 'olive', + 'pointer', + 'purple', + 'red', + 'right', + 'solid', + 'silver', + 'teal', + 'top', + 'transparent', + 'underline', + 'white', + 'yellow', +)) + +allowed_svg_properties = frozenset(( + 'fill', + 'fill-opacity', + 'fill-rule', + 'stroke', + 'stroke-width', + 'stroke-linecap', + 'stroke-linejoin', + 'stroke-opacity', +)) + +allowed_protocols = frozenset(( + 'ed2k', + 'ftp', + 'http', + 'https', + 'irc', + 'mailto', + 'news', + 'gopher', + 'nntp', + 'telnet', + 'webcal', + 'xmpp', + 'callto', + 'feed', + 'urn', + 'aim', + 'rsync', + 'tag', + 'ssh', + 'sftp', + 'rtsp', + 'afs', + 'data', +)) + +allowed_content_types = frozenset(( + 'image/png', + 'image/jpeg', + 'image/gif', + 'image/webp', + 'image/bmp', + 'text/plain', +)) + + +data_content_type = re.compile(r''' + ^ + # Match a content type / + (?P[-a-zA-Z0-9.]+/[-a-zA-Z0-9.]+) + # Match any character set and encoding + (?:(?:;charset=(?:[-a-zA-Z0-9]+)(?:;(?:base64))?) + |(?:;(?:base64))?(?:;charset=(?:[-a-zA-Z0-9]+))?) + # Assume the rest is data + ,.* + $ + ''', + re.VERBOSE) + + +class Filter(base.Filter): + """Sanitizes token stream of XHTML+MathML+SVG and of inline style attributes""" + def __init__(self, + source, + allowed_elements=allowed_elements, + allowed_attributes=allowed_attributes, + allowed_css_properties=allowed_css_properties, + allowed_css_keywords=allowed_css_keywords, + allowed_svg_properties=allowed_svg_properties, + allowed_protocols=allowed_protocols, + allowed_content_types=allowed_content_types, + attr_val_is_uri=attr_val_is_uri, + svg_attr_val_allows_ref=svg_attr_val_allows_ref, + svg_allow_local_href=svg_allow_local_href): + """Creates a Filter + + :arg allowed_elements: set of elements to allow--everything else will + be escaped + + :arg allowed_attributes: set of attributes to allow in + elements--everything else will be stripped + + :arg allowed_css_properties: set of CSS properties to allow--everything + else will be stripped + + :arg allowed_css_keywords: set of CSS keywords to allow--everything + else will be stripped + + :arg allowed_svg_properties: set of SVG properties to allow--everything + else will be removed + + :arg allowed_protocols: set of allowed protocols for URIs + + :arg allowed_content_types: set of allowed content types for ``data`` URIs. + + :arg attr_val_is_uri: set of attributes that have URI values--values + that have a scheme not listed in ``allowed_protocols`` are removed + + :arg svg_attr_val_allows_ref: set of SVG attributes that can have + references + + :arg svg_allow_local_href: set of SVG elements that can have local + hrefs--these are removed + + """ + super(Filter, self).__init__(source) + self.allowed_elements = allowed_elements + self.allowed_attributes = allowed_attributes + self.allowed_css_properties = allowed_css_properties + self.allowed_css_keywords = allowed_css_keywords + self.allowed_svg_properties = allowed_svg_properties + self.allowed_protocols = allowed_protocols + self.allowed_content_types = allowed_content_types + self.attr_val_is_uri = attr_val_is_uri + self.svg_attr_val_allows_ref = svg_attr_val_allows_ref + self.svg_allow_local_href = svg_allow_local_href + + def __iter__(self): + for token in base.Filter.__iter__(self): + token = self.sanitize_token(token) + if token: + yield token + + # Sanitize the +html+, escaping all elements not in ALLOWED_ELEMENTS, and + # stripping out all attributes not in ALLOWED_ATTRIBUTES. Style attributes + # are parsed, and a restricted set, specified by ALLOWED_CSS_PROPERTIES and + # ALLOWED_CSS_KEYWORDS, are allowed through. attributes in ATTR_VAL_IS_URI + # are scanned, and only URI schemes specified in ALLOWED_PROTOCOLS are + # allowed. + # + # sanitize_html('') + # => <script> do_nasty_stuff() </script> + # sanitize_html('Click here for $100') + # => Click here for $100 + def sanitize_token(self, token): + + # accommodate filters which use token_type differently + token_type = token["type"] + if token_type in ("StartTag", "EndTag", "EmptyTag"): + name = token["name"] + namespace = token["namespace"] + if ((namespace, name) in self.allowed_elements or + (namespace is None and + (namespaces["html"], name) in self.allowed_elements)): + return self.allowed_token(token) + else: + return self.disallowed_token(token) + elif token_type == "Comment": + pass + else: + return token + + def allowed_token(self, token): + if "data" in token: + attrs = token["data"] + attr_names = set(attrs.keys()) + + # Remove forbidden attributes + for to_remove in (attr_names - self.allowed_attributes): + del token["data"][to_remove] + attr_names.remove(to_remove) + + # Remove attributes with disallowed URL values + for attr in (attr_names & self.attr_val_is_uri): + assert attr in attrs + # I don't have a clue where this regexp comes from or why it matches those + # characters, nor why we call unescape. I just know it's always been here. + # Should you be worried by this comment in a sanitizer? Yes. On the other hand, all + # this will do is remove *more* than it otherwise would. + val_unescaped = re.sub("[`\x00-\x20\x7f-\xa0\\s]+", '', + unescape(attrs[attr])).lower() + # remove replacement characters from unescaped characters + val_unescaped = val_unescaped.replace("\ufffd", "") + try: + uri = urlparse.urlparse(val_unescaped) + except ValueError: + uri = None + del attrs[attr] + if uri and uri.scheme: + if uri.scheme not in self.allowed_protocols: + del attrs[attr] + if uri.scheme == 'data': + m = data_content_type.match(uri.path) + if not m: + del attrs[attr] + elif m.group('content_type') not in self.allowed_content_types: + del attrs[attr] + + for attr in self.svg_attr_val_allows_ref: + if attr in attrs: + attrs[attr] = re.sub(r'url\s*\(\s*[^#\s][^)]+?\)', + ' ', + unescape(attrs[attr])) + if (token["name"] in self.svg_allow_local_href and + (namespaces['xlink'], 'href') in attrs and re.search(r'^\s*[^#\s].*', + attrs[(namespaces['xlink'], 'href')])): + del attrs[(namespaces['xlink'], 'href')] + if (None, 'style') in attrs: + attrs[(None, 'style')] = self.sanitize_css(attrs[(None, 'style')]) + token["data"] = attrs + return token + + def disallowed_token(self, token): + token_type = token["type"] + if token_type == "EndTag": + token["data"] = "" % token["name"] + elif token["data"]: + assert token_type in ("StartTag", "EmptyTag") + attrs = [] + for (ns, name), v in token["data"].items(): + attrs.append(' %s="%s"' % (name if ns is None else "%s:%s" % (prefixes[ns], name), escape(v))) + token["data"] = "<%s%s>" % (token["name"], ''.join(attrs)) + else: + token["data"] = "<%s>" % token["name"] + if token.get("selfClosing"): + token["data"] = token["data"][:-1] + "/>" + + token["type"] = "Characters" + + del token["name"] + return token + + def sanitize_css(self, style): + # disallow urls + style = re.compile(r'url\s*\(\s*[^\s)]+?\s*\)\s*').sub(' ', style) + + # gauntlet + if not re.match(r"""^([:,;#%.\sa-zA-Z0-9!]|\w-\w|'[\s\w]+'|"[\s\w]+"|\([\d,\s]+\))*$""", style): + return '' + if not re.match(r"^\s*([-\w]+\s*:[^:;]*(;\s*|$))*$", style): + return '' + + clean = [] + for prop, value in re.findall(r"([-\w]+)\s*:\s*([^:;]*)", style): + if not value: + continue + if prop.lower() in self.allowed_css_properties: + clean.append(prop + ': ' + value + ';') + elif prop.split('-')[0].lower() in ['background', 'border', 'margin', + 'padding']: + for keyword in value.split(): + if keyword not in self.allowed_css_keywords and \ + not re.match(r"^(#[0-9a-fA-F]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|\d{0,2}\.?\d{0,2}(cm|em|ex|in|mm|pc|pt|px|%|,|\))?)$", keyword): # noqa + break + else: + clean.append(prop + ': ' + value + ';') + elif prop.lower() in self.allowed_svg_properties: + clean.append(prop + ': ' + value + ';') + + return ' '.join(clean) diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/html5lib/filters/whitespace.py b/venv.bak/lib/python3.7/site-packages/pip/_vendor/html5lib/filters/whitespace.py new file mode 100644 index 0000000..0d12584 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_vendor/html5lib/filters/whitespace.py @@ -0,0 +1,38 @@ +from __future__ import absolute_import, division, unicode_literals + +import re + +from . import base +from ..constants import rcdataElements, spaceCharacters +spaceCharacters = "".join(spaceCharacters) + +SPACES_REGEX = re.compile("[%s]+" % spaceCharacters) + + +class Filter(base.Filter): + """Collapses whitespace except in pre, textarea, and script elements""" + spacePreserveElements = frozenset(["pre", "textarea"] + list(rcdataElements)) + + def __iter__(self): + preserve = 0 + for token in base.Filter.__iter__(self): + type = token["type"] + if type == "StartTag" \ + and (preserve or token["name"] in self.spacePreserveElements): + preserve += 1 + + elif type == "EndTag" and preserve: + preserve -= 1 + + elif not preserve and type == "SpaceCharacters" and token["data"]: + # Test on token["data"] above to not introduce spaces where there were not + token["data"] = " " + + elif not preserve and type == "Characters": + token["data"] = collapse_spaces(token["data"]) + + yield token + + +def collapse_spaces(text): + return SPACES_REGEX.sub(' ', text) diff --git a/venv.bak/lib/python3.7/site-packages/pip/_vendor/html5lib/html5parser.py b/venv.bak/lib/python3.7/site-packages/pip/_vendor/html5lib/html5parser.py new file mode 100644 index 0000000..ae41a13 --- /dev/null +++ b/venv.bak/lib/python3.7/site-packages/pip/_vendor/html5lib/html5parser.py @@ -0,0 +1,2791 @@ +from __future__ import absolute_import, division, unicode_literals +from pip._vendor.six import with_metaclass, viewkeys + +import types +from collections import OrderedDict + +from . import _inputstream +from . import _tokenizer + +from . import treebuilders +from .treebuilders.base import Marker + +from . import _utils +from .constants import ( + spaceCharacters, asciiUpper2Lower, + specialElements, headingElements, cdataElements, rcdataElements, + tokenTypes, tagTokenTypes, + namespaces, + htmlIntegrationPointElements, mathmlTextIntegrationPointElements, + adjustForeignAttributes as adjustForeignAttributesMap, + adjustMathMLAttributes, adjustSVGAttributes, + E, + _ReparseException +) + + +def parse(doc, treebuilder="etree", namespaceHTMLElements=True, **kwargs): + """Parse an HTML document as a string or file-like object into a tree + + :arg doc: the document to parse as a string or file-like object + + :arg treebuilder: the treebuilder to use when parsing + + :arg namespaceHTMLElements: whether or not to namespace HTML elements + + :returns: parsed tree + + Example: + + >>> from html5lib.html5parser import parse + >>> parse('

    This is a doc

    ') + + + """ + tb = treebuilders.getTreeBuilder(treebuilder) + p = HTMLParser(tb, namespaceHTMLElements=namespaceHTMLElements) + return p.parse(doc, **kwargs) + + +def parseFragment(doc, container="div", treebuilder="etree", namespaceHTMLElements=True, **kwargs): + """Parse an HTML fragment as a string or file-like object into a tree + + :arg doc: the fragment to parse as a string or file-like object + + :arg container: the container context to parse the fragment in + + :arg treebuilder: the treebuilder to use when parsing + + :arg namespaceHTMLElements: whether or not to namespace HTML elements + + :returns: parsed tree + + Example: + + >>> from html5lib.html5libparser import parseFragment + >>> parseFragment('this is a fragment') + + + """ + tb = treebuilders.getTreeBuilder(treebuilder) + p = HTMLParser(tb, namespaceHTMLElements=namespaceHTMLElements) + return p.parseFragment(doc, container=container, **kwargs) + + +def method_decorator_metaclass(function): + class Decorated(type): + def __new__(meta, classname, bases, classDict): + for attributeName, attribute in classDict.items(): + if isinstance(attribute, types.FunctionType): + attribute = function(attribute) + + classDict[attributeName] = attribute + return type.__new__(meta, classname, bases, classDict) + return Decorated + + +class HTMLParser(object): + """HTML parser + + Generates a tree structure from a stream of (possibly malformed) HTML. + + """ + + def __init__(self, tree=None, strict=False, namespaceHTMLElements=True, debug=False): + """ + :arg tree: a treebuilder class controlling the type of tree that will be + returned. Built in treebuilders can be accessed through + html5lib.treebuilders.getTreeBuilder(treeType) + + :arg strict: raise an exception when a parse error is encountered + + :arg namespaceHTMLElements: whether or not to namespace HTML elements + + :arg debug: whether or not to enable debug mode which logs things + + Example: + + >>> from html5lib.html5parser import HTMLParser + >>> parser = HTMLParser() # generates parser with etree builder + >>> parser = HTMLParser('lxml', strict=True) # generates parser with lxml builder which is strict + + """ + + # Raise an exception on the first error encountered + self.strict = strict + + if tree is None: + tree = treebuilders.getTreeBuilder("etree") + self.tree = tree(namespaceHTMLElements) + self.errors = [] + + self.phases = dict([(name, cls(self, self.tree)) for name, cls in + getPhases(debug).items()]) + + def _parse(self, stream, innerHTML=False, container="div", scripting=False, **kwargs): + + self.innerHTMLMode = innerHTML + self.container = container + self.scripting = scripting + self.tokenizer = _tokenizer.HTMLTokenizer(stream, parser=self, **kwargs) + self.reset() + + try: + self.mainLoop() + except _ReparseException: + self.reset() + self.mainLoop() + + def reset(self): + self.tree.reset() + self.firstStartTag = False + self.errors = [] + self.log = [] # only used with debug mode + # "quirks" / "limited quirks" / "no quirks" + self.compatMode = "no quirks" + + if self.innerHTMLMode: + self.innerHTML = self.container.lower() + + if self.innerHTML in cdataElements: + self.tokenizer.state = self.tokenizer.rcdataState + elif self.innerHTML in rcdataElements: + self.tokenizer.state = self.tokenizer.rawtextState + elif self.innerHTML == 'plaintext': + self.tokenizer.state = self.tokenizer.plaintextState + else: + # state already is data state + # self.tokenizer.state = self.tokenizer.dataState + pass + self.phase = self.phases["beforeHtml"] + self.phase.insertHtmlElement() + self.resetInsertionMode() + else: + self.innerHTML = False # pylint:disable=redefined-variable-type + self.phase = self.phases["initial"] + + self.lastPhase = None + + self.beforeRCDataPhase = None + + self.framesetOK = True + + @property + def documentEncoding(self): + """Name of the character encoding that was used to decode the input stream, or + :obj:`None` if that is not determined yet + + """ + if not hasattr(self, 'tokenizer'): + return None + return self.tokenizer.stream.charEncoding[0].name + + def isHTMLIntegrationPoint(self, element): + if (element.name == "annotation-xml" and + element.namespace == namespaces["mathml"]): + return ("encoding" in element.attributes and + element.attributes["encoding"].translate( + asciiUpper2Lower) in + ("text/html", "application/xhtml+xml")) + else: + return (element.namespace, element.name) in htmlIntegrationPointElements + + def isMathMLTextIntegrationPoint(self, element): + return (element.namespace, element.name) in mathmlTextIntegrationPointElements + + def mainLoop(self): + CharactersToken = tokenTypes["Characters"] + SpaceCharactersToken = tokenTypes["SpaceCharacters"] + StartTagToken = tokenTypes["StartTag"] + EndTagToken = tokenTypes["EndTag"] + CommentToken = tokenTypes["Comment"] + DoctypeToken = tokenTypes["Doctype"] + ParseErrorToken = tokenTypes["ParseError"] + + for token in self.normalizedTokens(): + prev_token = None + new_token = token + while new_token is not None: + prev_token = new_token + currentNode = self.tree.openElements[-1] if self.tree.openElements else None + currentNodeNamespace = currentNode.namespace if currentNode else None + currentNodeName = currentNode.name if currentNode else None + + type = new_token["type"] + + if type == ParseErrorToken: + self.parseError(new_token["data"], new_token.get("datavars", {})) + new_token = None + else: + if (len(self.tree.openElements) == 0 or + currentNodeNamespace == self.tree.defaultNamespace or + (self.isMathMLTextIntegrationPoint(currentNode) and + ((type == StartTagToken and + token["name"] not in frozenset(["mglyph", "malignmark"])) or + type in (CharactersToken, SpaceCharactersToken))) or + (currentNodeNamespace == namespaces["mathml"] and + currentNodeName == "annotation-xml" and + type == StartTagToken and + token["name"] == "svg") or + (self.isHTMLIntegrationPoint(currentNode) and + type in (StartTagToken, CharactersToken, SpaceCharactersToken))): + phase = self.phase + else: + phase = self.phases["inForeignContent"] + + if type == CharactersToken: + new_token = phase.processCharacters(new_token) + elif type == SpaceCharactersToken: + new_token = phase.processSpaceCharacters(new_token) + elif type == StartTagToken: + new_token = phase.processStartTag(new_token) + elif type == EndTagToken: + new_token = phase.processEndTag(new_token) + elif type == CommentToken: + new_token = phase.processComment(new_token) + elif type == DoctypeToken: + new_token = phase.processDoctype(new_token) + + if (type == StartTagToken and prev_token["selfClosing"] and + not prev_token["selfClosingAcknowledged"]): + self.parseError("non-void-element-with-trailing-solidus", + {"name": prev_token["name"]}) + + # When the loop finishes it's EOF + reprocess = True + phases = [] + while reprocess: + phases.append(self.phase) + reprocess = self.phase.processEOF() + if reprocess: + assert self.phase not in phases + + def normalizedTokens(self): + for token in self.tokenizer: + yield self.normalizeToken(token) + + def parse(self, stream, *args, **kwargs): + """Parse a HTML document into a well-formed tree + + :arg stream: a file-like object or string containing the HTML to be parsed + + The optional encoding parameter must be a string that indicates + the encoding. If specified, that encoding will be used, + regardless of any BOM or later declaration (such as in a meta + element). + + :arg scripting: treat noscript elements as if JavaScript was turned on + + :returns: parsed tree + + Example: + + >>> from html5lib.html5parser import HTMLParser + >>> parser = HTMLParser() + >>> parser.parse('

    This is a doc

    ') + + + """ + self._parse(stream, False, None, *args, **kwargs) + return self.tree.getDocument() + + def parseFragment(self, stream, *args, **kwargs): + """Parse a HTML fragment into a well-formed tree fragment + + :arg container: name of the element we're setting the innerHTML + property if set to None, default to 'div' + + :arg stream: a file-like object or string containing the HTML to be parsed + + The optional encoding parameter must be a string that indicates + the encoding. If specified, that encoding will be used, + regardless of any BOM or later declaration (such as in a meta + element) + + :arg scripting: treat noscript elements as if JavaScript was turned on + + :returns: parsed tree + + Example: + + >>> from html5lib.html5libparser import HTMLParser + >>> parser = HTMLParser() + >>> parser.parseFragment('this is a fragment') + + + """ + self._parse(stream, True, *args, **kwargs) + return self.tree.getFragment() + + def parseError(self, errorcode="XXX-undefined-error", datavars=None): + # XXX The idea is to make errorcode mandatory. + if datavars is None: + datavars = {} + self.errors.append((self.tokenizer.stream.position(), errorcode, datavars)) + if self.strict: + raise ParseError(E[errorcode] % datavars) + + def normalizeToken(self, token): + # HTML5 specific normalizations to the token stream + if token["type"] == tokenTypes["StartTag"]: + raw = token["data"] + token["data"] = OrderedDict(raw) + if len(raw) > len(token["data"]): + # we had some duplicated attribute, fix so first wins + token["data"].update(raw[::-1]) + + return token + + def adjustMathMLAttributes(self, token): + adjust_attributes(token, adjustMathMLAttributes) + + def adjustSVGAttributes(self, token): + adjust_attributes(token, adjustSVGAttributes) + + def adjustForeignAttributes(self, token): + adjust_attributes(token, adjustForeignAttributesMap) + + def reparseTokenNormal(self, token): + # pylint:disable=unused-argument + self.parser.phase() + + def resetInsertionMode(self): + # The name of this method is mostly historical. (It's also used in the + # specification.) + last = False + newModes = { + "select": "inSelect", + "td": "inCell", + "th": "inCell", + "tr": "inRow", + "tbody": "inTableBody", + "thead": "inTableBody", + "tfoot": "inTableBody", + "caption": "inCaption", + "colgroup": "inColumnGroup", + "table": "inTable", + "head": "inBody", + "body": "inBody", + "frameset": "inFrameset", + "html": "beforeHead" + } + for node in self.tree.openElements[::-1]: + nodeName = node.name + new_phase = None + if node == self.tree.openElements[0]: + assert self.innerHTML + last = True + nodeName = self.innerHTML + # Check for conditions that should only happen in the innerHTML + # case + if nodeName in ("select", "colgroup", "head", "html"): + assert self.innerHTML + + if not last and node.namespace != self.tree.defaultNamespace: + continue + + if nodeName in newModes: + new_phase = self.phases[newModes[nodeName]] + break + elif last: + new_phase = self.phases["inBody"] + break + + self.phase = new_phase + + def parseRCDataRawtext(self, token, contentType): + # Generic RCDATA/RAWTEXT Parsing algorithm + assert contentType in ("RAWTEXT", "RCDATA") + + self.tree.insertElement(token) + + if contentType == "RAWTEXT": + self.tokenizer.state = self.tokenizer.rawtextState + else: + self.tokenizer.state = self.tokenizer.rcdataState + + self.originalPhase = self.phase + + self.phase = self.phases["text"] + + +@_utils.memoize +def getPhases(debug): + def log(function): + """Logger that records which phase processes each token""" + type_names = dict((value, key) for key, value in + tokenTypes.items()) + + def wrapped(self, *args, **kwargs): + if function.__name__.startswith("process") and len(args) > 0: + token = args[0] + try: + info = {"type": type_names[token['type']]} + except: + raise + if token['type'] in tagTokenTypes: + info["name"] = token['name'] + + self.parser.log.append((self.parser.tokenizer.state.__name__, + self.parser.phase.__class__.__name__, + self.__class__.__name__, + function.__name__, + info)) + return function(self, *args, **kwargs) + else: + return function(self, *args, **kwargs) + return wrapped + + def getMetaclass(use_metaclass, metaclass_func): + if use_metaclass: + return method_decorator_metaclass(metaclass_func) + else: + return type + + # pylint:disable=unused-argument + class Phase(with_metaclass(getMetaclass(debug, log))): + """Base class for helper object that implements each phase of processing + """ + + def __init__(self, parser, tree): + self.parser = parser + self.tree = tree + + def processEOF(self): + raise NotImplementedError + + def processComment(self, token): + # For most phases the following is correct. Where it's not it will be + # overridden. + self.tree.insertComment(token, self.tree.openElements[-1]) + + def processDoctype(self, token): + self.parser.parseError("unexpected-doctype") + + def processCharacters(self, token): + self.tree.insertText(token["data"]) + + def processSpaceCharacters(self, token): + self.tree.insertText(token["data"]) + + def processStartTag(self, token): + return self.startTagHandler[token["name"]](token) + + def startTagHtml(self, token): + if not self.parser.firstStartTag and token["name"] == "html": + self.parser.parseError("non-html-root") + # XXX Need a check here to see if the first start tag token emitted is + # this token... If it's not, invoke self.parser.parseError(). + for attr, value in token["data"].items(): + if attr not in self.tree.openElements[0].attributes: + self.tree.openElements[0].attributes[attr] = value + self.parser.firstStartTag = False + + def processEndTag(self, token): + return self.endTagHandler[token["name"]](token) + + class InitialPhase(Phase): + def processSpaceCharacters(self, token): + pass + + def processComment(self, token): + self.tree.insertComment(token, self.tree.document) + + def processDoctype(self, token): + name = token["name"] + publicId = token["publicId"] + systemId = token["systemId"] + correct = token["correct"] + + if (name != "html" or publicId is not None or + systemId is not None and systemId != "about:legacy-compat"): + self.parser.parseError("unknown-doctype") + + if publicId is None: + publicId = "" + + self.tree.insertDoctype(token) + + if publicId != "": + publicId = publicId.translate(asciiUpper2Lower) + + if (not correct or token["name"] != "html" or + publicId.startswith( + ("+//silmaril//dtd html pro v0r11 19970101//", + "-//advasoft ltd//dtd html 3.0 aswedit + extensions//", + "-//as//dtd html 3.0 aswedit + extensions//", + "-//ietf//dtd html 2.0 level 1//", + "-//ietf//dtd html 2.0 level 2//", + "-//ietf//dtd html 2.0 strict level 1//", + "-//ietf//dtd html 2.0 strict level 2//", + "-//ietf//dtd html 2.0 strict//", + "-//ietf//dtd html 2.0//", + "-//ietf//dtd html 2.1e//", + "-//ietf//dtd html 3.0//", + "-//ietf//dtd html 3.2 final//", + "-//ietf//dtd html 3.2//", + "-//ietf//dtd html 3//", + "-//ietf//dtd html level 0//", + "-//ietf//dtd html level 1//", + "-//ietf//dtd html level 2//", + "-//ietf//dtd html level 3//", + "-//ietf//dtd html strict level 0//", + "-//ietf//dtd html strict level 1//", + "-//ietf//dtd html strict level 2//", + "-//ietf//dtd html strict level 3//", + "-//ietf//dtd html strict//", + "-//ietf//dtd html//", + "-//metrius//dtd metrius presentational//", + "-//microsoft//dtd internet explorer 2.0 html strict//", + "-//microsoft//dtd internet explorer 2.0 html//", + "-//microsoft//dtd internet explorer 2.0 tables//", + "-//microsoft//dtd internet explorer 3.0 html strict//", + "-//microsoft//dtd internet explorer 3.0 html//", + "-//microsoft//dtd internet explorer 3.0 tables//", + "-//netscape comm. corp.//dtd html//", + "-//netscape comm. corp.//dtd strict html//", + "-//o'reilly and associates//dtd html 2.0//", + "-//o'reilly and associates//dtd html extended 1.0//", + "-//o'reilly and associates//dtd html extended relaxed 1.0//", + "-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to html 4.0//", + "-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0//", + "-//spyglass//dtd html 2.0 extended//", + "-//sq//dtd html 2.0 hotmetal + extensions//", + "-//sun microsystems corp.//dtd hotjava html//", + "-//sun microsystems corp.//dtd hotjava strict html//", + "-//w3c//dtd html 3 1995-03-24//", + "-//w3c//dtd html 3.2 draft//", + "-//w3c//dtd html 3.2 final//", + "-//w3c//dtd html 3.2//", + "-//w3c//dtd html 3.2s draft//", + "-//w3c//dtd html 4.0 frameset//", + "-//w3c//dtd html 4.0 transitional//", + "-//w3c//dtd html experimental 19960712//", + "-//w3c//dtd html experimental 970421//", + "-//w3c//dtd w3 html//", + "-//w3o//dtd w3 html 3.0//", + "-//webtechs//dtd mozilla html 2.0//", + "-//webtechs//dtd mozilla html//")) or + publicId in ("-//w3o//dtd w3 html strict 3.0//en//", + "-/w3c/dtd html 4.0 transitional/en", + "html") or + publicId.startswith( + ("-//w3c//dtd html 4.01 frameset//", + "-//w3c//dtd html 4.01 transitional//")) and + systemId is None or + systemId and systemId.lower() == "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd"): + self.parser.compatMode = "quirks" + elif (publicId.startswith( + ("-//w3c//dtd xhtml 1.0 frameset//", + "-//w3c//dtd xhtml 1.0 transitional//")) or + publicId.startswith( + ("-//w3c//dtd html 4.01 frameset//", + "-//w3c//dtd html 4.01 transitional//")) and + systemId is not None): + self.parser.compatMode = "limited quirks" + + self.parser.phase = self.parser.phases["beforeHtml"] + + def anythingElse(self): + self.parser.compatMode = "quirks" + self.parser.phase = self.parser.phases["beforeHtml"] + + def processCharacters(self, token): + self.parser.parseError("expected-doctype-but-got-chars") + self.anythingElse() + return token + + def processStartTag(self, token): + self.parser.parseError("expected-doctype-but-got-start-tag", + {"name": token["name"]}) + self.anythingElse() + return token + + def processEndTag(self, token): + self.parser.parseError("expected-doctype-but-got-end-tag", + {"name": token["name"]}) + self.anythingElse() + return token + + def processEOF(self): + self.parser.parseError("expected-doctype-but-got-eof") + self.anythingElse() + return True + + class BeforeHtmlPhase(Phase): + # helper methods + def insertHtmlElement(self): + self.tree.insertRoot(impliedTagToken("html", "StartTag")) + self.parser.phase = self.parser.phases["beforeHead"] + + # other + def processEOF(self): + self.insertHtmlElement() + return True + + def processComment(self, token): + self.tree.insertComment(token, self.tree.document) + + def processSpaceCharacters(self, token): + pass + + def processCharacters(self, token): + self.insertHtmlElement() + return token + + def processStartTag(self, token): + if token["name"] == "html": + self.parser.firstStartTag = True + self.insertHtmlElement() + return token + + def processEndTag(self, token): + if token["name"] not in ("head", "body", "html", "br"): + self.parser.parseError("unexpected-end-tag-before-html", + {"name": token["name"]}) + else: + self.insertHtmlElement() + return token + + class BeforeHeadPhase(Phase): + def __init__(self, parser, tree): + Phase.__init__(self, parser, tree) + + self.startTagHandler = _utils.MethodDispatcher([ + ("html", self.startTagHtml), + ("head", self.startTagHead) + ]) + self.startTagHandler.default = self.startTagOther + + self.endTagHandler = _utils.MethodDispatcher([ + (("head", "body", "html", "br"), self.endTagImplyHead) + ]) + self.endTagHandler.default = self.endTagOther + + def processEOF(self): + self.startTagHead(impliedTagToken("head", "StartTag")) + return True + + def processSpaceCharacters(self, token): + pass + + def processCharacters(self, token): + self.startTagHead(impliedTagToken("head", "StartTag")) + return token + + def startTagHtml(self, token): + return self.parser.phases["inBody"].processStartTag(token) + + def startTagHead(self, token): + self.tree.insertElement(token) + self.tree.headPointer = self.tree.openElements[-1] + self.parser.phase = self.parser.phases["inHead"] + + def startTagOther(self, token): + self.startTagHead(impliedTagToken("head", "StartTag")) + return token + + def endTagImplyHead(self, token): + self.startTagHead(impliedTagToken("head", "StartTag")) + return token + + def endTagOther(self, token): + self.parser.parseError("end-tag-after-implied-root", + {"name": token["name"]}) + + class InHeadPhase(Phase): + def __init__(self, parser, tree): + Phase.__init__(self, parser, tree) + + self.startTagHandler = _utils.MethodDispatcher([ + ("html", self.startTagHtml), + ("title", self.startTagTitle), + (("noframes", "style"), self.startTagNoFramesStyle), + ("noscript", self.startTagNoscript), + ("script", self.startTagScript), + (("base", "basefont", "bgsound", "command", "link"), + self.startTagBaseLinkCommand), + ("meta", self.startTagMeta), + ("head", self.startTagHead) + ]) + self.startTagHandler.default = self.startTagOther + + self.endTagHandler = _utils.MethodDispatcher([ + ("head", self.endTagHead), + (("br", "html", "body"), self.endTagHtmlBodyBr) + ]) + self.endTagHandler.default = self.endTagOther + + # the real thing + def processEOF(self): + self.anythingElse() + return True + + def processCharacters(self, token): + self.anythingElse() + return token + + def startTagHtml(self, token): + return self.parser.phases["inBody"].processStartTag(token) + + def startTagHead(self, token): + self.parser.parseError("two-heads-are-not-better-than-one") + + def startTagBaseLinkCommand(self, token): + self.tree.insertElement(token) + self.tree.openElements.pop() + token["selfClosingAcknowledged"] = True + + def startTagMeta(self, token): + self.tree.insertElement(token) + self.tree.openElements.pop() + token["selfClosingAcknowledged"] = True + + attributes = token["data"] + if self.parser.tokenizer.stream.charEncoding[1] == "tentative": + if "charset" in attributes: + self.parser.tokenizer.stream.changeEncoding(attributes["charset"]) + elif ("content" in attributes and + "http-equiv" in attributes and + attributes["http-equiv"].lower() == "content-type"): + # Encoding it as UTF-8 here is a hack, as really we should pass + # the abstract Unicode string, and just use the + # ContentAttrParser on that, but using UTF-8 allows all chars + # to be encoded and as a ASCII-superset works. + data = _inputstream.EncodingBytes(attributes["content"].encode("utf-8")) + parser = _inputstream.ContentAttrParser(data) + codec = parser.parse() + self.parser.tokenizer.stream.changeEncoding(codec) + + def startTagTitle(self, token): + self.parser.parseRCDataRawtext(token, "RCDATA") + + def startTagNoFramesStyle(self, token): + # Need to decide whether to implement the scripting-disabled case + self.parser.parseRCDataRawtext(token, "RAWTEXT") + + def startTagNoscript(self, token): + if self.parser.scripting: + self.parser.parseRCDataRawtext(token, "RAWTEXT") + else: + self.tree.insertElement(token) + self.parser.phase = self.parser.phases["inHeadNoscript"] + + def startTagScript(self, token): + self.tree.insertElement(token) + self.parser.tokenizer.state = self.parser.tokenizer.scriptDataState + self.parser.originalPhase = self.parser.phase + self.parser.phase = self.parser.phases["text"] + + def startTagOther(self, token): + self.anythingElse() + return token + + def endTagHead(self, token): + node = self.parser.tree.openElements.pop() + assert node.name == "head", "Expected head got %s" % node.name + self.parser.phase = self.parser.phases["afterHead"] + + def endTagHtmlBodyBr(self, token): + self.anythingElse() + return token + + def endTagOther(self, token): + self.parser.parseError("unexpected-end-tag", {"name": token["name"]}) + + def anythingElse(self): + self.endTagHead(impliedTagToken("head")) + + class InHeadNoscriptPhase(Phase): + def __init__(self, parser, tree): + Phase.__init__(self, parser, tree) + + self.startTagHandler = _utils.MethodDispatcher([ + ("html", self.startTagHtml), + (("basefont", "bgsound", "link", "meta", "noframes", "style"), self.startTagBaseLinkCommand), + (("head", "noscript"), self.startTagHeadNoscript), + ]) + self.startTagHandler.default = self.startTagOther + + self.endTagHandler = _utils.MethodDispatcher([ + ("noscript", self.endTagNoscript), + ("br", self.endTagBr), + ]) + self.endTagHandler.default = self.endTagOther + + def processEOF(self): + self.parser.parseError("eof-in-head-noscript") + self.anythingElse() + return True + + def processComment(self, token): + return self.parser.phases["inHead"].processComment(token) + + def processCharacters(self, token): + self.parser.parseError("char-in-head-noscript") + self.anythingElse() + return token + + def processSpaceCharacters(self, token): + return self.parser.phases["inHead"].processSpaceCharacters(token) + + def startTagHtml(self, token): + return self.parser.phases["inBody"].processStartTag(token) + + def startTagBaseLinkCommand(self, token): + return self.parser.phases["inHead"].processStartTag(token) + + def startTagHeadNoscript(self, token): + self.parser.parseError("unexpected-start-tag", {"name": token["name"]}) + + def startTagOther(self, token): + self.parser.parseError("unexpected-inhead-noscript-tag", {"name": token["name"]}) + self.anythingElse() + return token + + def endTagNoscript(self, token): + node = self.parser.tree.openElements.pop() + assert node.name == "noscript", "Expected noscript got %s" % node.name + self.parser.phase = self.parser.phases["inHead"] + + def endTagBr(self, token): + self.parser.parseError("unexpected-inhead-noscript-tag", {"name": token["name"]}) + self.anythingElse() + return token + + def endTagOther(self, token): + self.parser.parseError("unexpected-end-tag", {"name": token["name"]}) + + def anythingElse(self): + # Caller must raise parse error first! + self.endTagNoscript(impliedTagToken("noscript")) + + class AfterHeadPhase(Phase): + def __init__(self, parser, tree): + Phase.__init__(self, parser, tree) + + self.startTagHandler = _utils.MethodDispatcher([ + ("html", self.startTagHtml), + ("body", self.startTagBody), + ("frameset", self.startTagFrameset), + (("base", "basefont", "bgsound", "link", "meta", "noframes", "script", + "style", "title"), + self.startTagFromHead), + ("head", self.startTagHead) + ]) + self.startTagHandler.default = self.startTagOther + self.endTagHandler = _utils.MethodDispatcher([(("body", "html", "br"), + self.endTagHtmlBodyBr)]) + self.endTagHandler.default = self.endTagOther + + def processEOF(self): + self.anythingElse() + return True + + def processCharacters(self, token): + self.anythingElse() + return token + + def startTagHtml(self, token): + return self.parser.phases["inBody"].processStartTag(token) + + def startTagBody(self, token): + self.parser.framesetOK = False + self.tree.insertElement(token) + self.parser.phase = self.parser.phases["inBody"] + + def startTagFrameset(self, token): + self.tree.insertElement(token) + self.parser.phase = self.parser.phases["inFrameset"] + + def startTagFromHead(self, token): + self.parser.parseError("unexpected-start-tag-out-of-my-head", + {"name": token["name"]}) + self.tree.openElements.append(self.tree.headPointer) + self.parser.phases["inHead"].processStartTag(token) + for node in self.tree.openElements[::-1]: + if node.name == "head": + self.tree.openElements.remove(node) + break + + def startTagHead(self, token): + self.parser.parseError("unexpected-start-tag", {"name": token["name"]}) + + def startTagOther(self, token): + self.anythingElse() + return token + + def endTagHtmlBodyBr(self, token): + self.anythingElse() + return token + + def endTagOther(self, token): + self.parser.parseError("unexpected-end-tag", {"name": token["name"]}) + + def anythingElse(self): + self.tree.insertElement(impliedTagToken("body", "StartTag")) + self.parser.phase = self.parser.phases["inBody"] + self.parser.framesetOK = True + + class InBodyPhase(Phase): + # http://www.whatwg.org/specs/web-apps/current-work/#parsing-main-inbody + # the really-really-really-very crazy mode + def __init__(self, parser, tree): + Phase.__init__(self, parser, tree) + + # Set this to the default handler + self.processSpaceCharacters = self.processSpaceCharactersNonPre + + self.startTagHandler = _utils.MethodDispatcher([ + ("html", self.startTagHtml), + (("base", "basefont", "bgsound", "command", "link", "meta", + "script", "style", "title"), + self.startTagProcessInHead), + ("body", self.startTagBody), + ("frameset", self.startTagFrameset), + (("address", "article", "aside", "blockquote", "center", "details", + "dir", "div", "dl", "fieldset", "figcaption", "figure", + "footer", "header", "hgroup", "main", "menu", "nav", "ol", "p", + "section", "summary", "ul"), + self.startTagCloseP), + (headingElements, self.startTagHeading), + (("pre", "listing"), self.startTagPreListing), + ("form", self.startTagForm), + (("li", "dd", "dt"), self.startTagListItem), + ("plaintext", self.startTagPlaintext), + ("a", self.startTagA), + (("b", "big", "code", "em", "font", "i", "s", "small", "strike", + "strong", "tt", "u"), self.startTagFormatting), + ("nobr", self.startTagNobr), + ("button", self.startTagButton), + (("applet", "marquee", "object"), self.startTagAppletMarqueeObject), + ("xmp", self.startTagXmp), + ("table", self.startTagTable), + (("area", "br", "embed", "img", "keygen", "wbr"), + self.startTagVoidFormatting), + (("param", "source", "track"), self.startTagParamSource), + ("input", self.startTagInput), + ("hr", self.startTagHr), + ("image", self.startTagImage), + ("isindex", self.startTagIsIndex), + ("textarea", self.startTagTextarea), + ("iframe", self.startTagIFrame), + ("noscript", self.startTagNoscript), + (("noembed", "noframes"), self.startTagRawtext), + ("select", self.startTagSelect), + (("rp", "rt"), self.startTagRpRt), + (("option", "optgroup"), self.startTagOpt), + (("math"), self.startTagMath), + (("svg"), self.startTagSvg), + (("caption", "col", "colgroup", "frame", "head", + "tbody", "td", "tfoot", "th", "thead", + "tr"), self.startTagMisplaced) + ]) + self.startTagHandler.default = self.startTagOther + + self.endTagHandler = _utils.MethodDispatcher([ + ("body", self.endTagBody), + ("html", self.endTagHtml), + (("address", "article", "aside", "blockquote", "button", "center", + "details", "dialog", "dir", "div", "dl", "fieldset", "figcaption", "figure", + "footer", "header", "hgroup", "listing", "main", "menu", "nav", "ol", "pre", + "section", "summary", "ul"), self.endTagBlock), + ("form", self.endTagForm), + ("p", self.endTagP), + (("dd", "dt", "li"), self.endTagListItem), + (headingElements, self.endTagHeading), + (("a", "b", "big", "code", "em", "font", "i", "nobr", "s", "small", + "strike", "strong", "tt", "u"), self.endTagFormatting), + (("applet", "marquee", "object"), self.endTagAppletMarqueeObject), + ("br", self.endTagBr), + ]) + self.endTagHandler.default = self.endTagOther + + def isMatchingFormattingElement(self, node1, node2): + return (node1.name == node2.name and + node1.namespace == node2.namespace and + node1.attributes == node2.attributes) + + # helper + def addFormattingElement(self, token): + self.tree.insertElement(token) + element = self.tree.openElements[-1] + + matchingElements = [] + for node in self.tree.activeFormattingElements[::-1]: + if node is Marker: + break + elif self.isMatchingFormattingElement(node, element): + matchingElements.append(node) + + assert len(matchingElements) <= 3 + if len(matchingElements) == 3: + self.tree.activeFormattingElements.remove(matchingElements[-1]) + self.tree.activeFormattingElements.append(element) + + # the real deal + def processEOF(self): + allowed_elements = frozenset(("dd", "dt", "li", "p", "tbody", "td", + "tfoot", "th", "thead", "tr", "body", + "html")) + for node in self.tree.openElements[::-1]: + if node.name not in allowed_elements: + self.parser.parseError("expected-closing-tag-but-got-eof") + break + # Stop parsing + + def processSpaceCharactersDropNewline(self, token): + # Sometimes (start of
    , , and